1
0
Fork 0
forked from suyu/suyu

Merge pull request #2423 from FernandoS27/half-correct

Corrections on Half Float operations: HADD2 HMUL2 and HFMA2
This commit is contained in:
bunnei 2019-04-28 22:24:22 -04:00 committed by GitHub
commit 9a3737120d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 15 deletions

View file

@ -9,6 +9,7 @@
namespace VideoCommon::Shader { namespace VideoCommon::Shader {
using Tegra::Shader::HalfType;
using Tegra::Shader::Instruction; using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode; using Tegra::Shader::OpCode;
@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
} }
} }
UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
const bool negate_a = const bool negate_a =
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
Node op_b = [&]() { auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
switch (opcode->get().GetId()) { switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C: case OpCode::Id::HMUL2_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HADD2_R: case OpCode::Id::HADD2_R:
case OpCode::Id::HMUL2_R: case OpCode::Id::HMUL2_R:
return GetRegister(instr.gpr20); return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
default: default:
UNREACHABLE(); UNREACHABLE();
return Immediate(0); return {HalfType::F32, Immediate(0)};
} }
}(); }();
op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); op_b = UnpackHalfFloat(op_b, type_b);
op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
Node value = [&]() { Node value = [&]() {
switch (opcode->get().GetId()) { switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_C:
case OpCode::Id::HADD2_R: case OpCode::Id::HADD2_R:
return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
case OpCode::Id::HMUL2_C: case OpCode::Id::HMUL2_C:
case OpCode::Id::HMUL2_R: case OpCode::Id::HMUL2_R:
return Operation(OperationCode::HMul, PRECISE, op_a, op_b); return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
default: default:
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
return Immediate(0); return Immediate(0);
} }
}(); }();
value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
SetRegister(bb, instr.gpr0, value); SetRegister(bb, instr.gpr0, value);
@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
return pc; return pc;
} }
} // namespace VideoCommon::Shader } // namespace VideoCommon::Shader

View file

@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
case OpCode::Id::HFMA2_CR: case OpCode::Id::HFMA2_CR:
neg_b = instr.hfma2.negate_b; neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c; neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_b, return {instr.hfma2.saturate, HalfType::F32,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_RC: case OpCode::Id::HFMA2_RC:
neg_b = instr.hfma2.negate_b; neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c; neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
instr.hfma2.type_b, HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HFMA2_RR: case OpCode::Id::HFMA2_RR:
neg_b = instr.hfma2.rr.negate_b; neg_b = instr.hfma2.rr.negate_b;
neg_c = instr.hfma2.rr.negate_c; neg_c = instr.hfma2.rr.negate_c;
@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return {false, identity, Immediate(0), identity, Immediate(0)}; return {false, identity, Immediate(0), identity, Immediate(0)};
} }
}(); }();
UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
value = GetSaturatedHalfFloat(value, saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
SetRegister(bb, instr.gpr0, value); SetRegister(bb, instr.gpr0, value);
@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return pc; return pc;
} }
} // namespace VideoCommon::Shader } // namespace VideoCommon::Shader