3
0
Fork 0
forked from suyu/suyu

shader: Implement more of XMAD and FFMA32I and fix XMAD.CBCC

This commit is contained in:
ReinUsesLisp 2021-02-23 04:46:39 -03:00 committed by ameerj
parent e44752ddc8
commit 9d6a98d950
5 changed files with 76 additions and 28 deletions

View file

@ -224,8 +224,8 @@ void EmitShiftRightArithmetic32(EmitContext& ctx);
Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b);
Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b);
Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b);
void EmitBitFieldInsert(EmitContext& ctx); Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count);
void EmitBitFieldSExtract(EmitContext& ctx); Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count);
Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count);
Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs);

View file

@ -90,12 +90,12 @@ Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b) {
return ctx.OpBitwiseXor(ctx.U32[1], a, b); return ctx.OpBitwiseXor(ctx.U32[1], a, b);
} }
void EmitBitFieldInsert(EmitContext&) { Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) {
throw NotImplementedException("SPIR-V Instruction"); return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count);
} }
void EmitBitFieldSExtract(EmitContext&) { Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count) {
throw NotImplementedException("SPIR-V Instruction"); return ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count);
} }
Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) {

View file

@ -17,9 +17,6 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s
BitField<8, 8, IR::Reg> src_a; BitField<8, 8, IR::Reg> src_a;
} const ffma{insn}; } const ffma{insn};
if (sat) {
throw NotImplementedException("FFMA SAT");
}
if (cc) { if (cc) {
throw NotImplementedException("FFMA CC"); throw NotImplementedException("FFMA CC");
} }
@ -31,7 +28,20 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s
.rounding{CastFpRounding(fp_rounding)}, .rounding{CastFpRounding(fp_rounding)},
.fmz_mode{CastFmzMode(fmz_mode)}, .fmz_mode{CastFmzMode(fmz_mode)},
}; };
v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
if (fmz_mode == FmzMode::FMZ && !sat) {
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
value = IR::F32{v.ir.Select(any_zero, op_c, value)};
}
if (sat) {
value = v.ir.FPSaturate(value);
}
v.F(ffma.dest_reg, value);
} }
void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
@ -54,20 +64,31 @@ void TranslatorVisitor::FFMA_reg(u64 insn) {
FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
} }
void TranslatorVisitor::FFMA_rc(u64) { void TranslatorVisitor::FFMA_rc(u64 insn) {
throw NotImplementedException("FFMA (rc)"); FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
} }
void TranslatorVisitor::FFMA_cr(u64 insn) { void TranslatorVisitor::FFMA_cr(u64 insn) {
FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
} }
void TranslatorVisitor::FFMA_imm(u64) { void TranslatorVisitor::FFMA_imm(u64 insn) {
throw NotImplementedException("FFMA (imm)"); FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
} }
void TranslatorVisitor::FFMA32I(u64) { void TranslatorVisitor::FFMA32I(u64 insn) {
throw NotImplementedException("FFMA32I"); union {
u64 raw;
BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
BitField<52, 1, u64> cc;
BitField<53, 2, FmzMode> fmz_mode;
BitField<55, 1, u64> sat;
BitField<56, 1, u64> neg_a;
BitField<57, 1, u64> neg_c;
} const ffma32i{insn};
FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
} }
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View file

@ -94,6 +94,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
BitField<48, 1, u64> neg_b; BitField<48, 1, u64> neg_b;
BitField<50, 1, u64> sat; BitField<50, 1, u64> sat;
} const fmul{insn}; } const fmul{insn};
FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
fmul.neg_b != 0); fmul.neg_b != 0);
} }
@ -118,6 +119,7 @@ void TranslatorVisitor::FMUL32I(u64 insn) {
BitField<53, 2, FmzMode> fmz; BitField<53, 2, FmzMode> fmz;
BitField<55, 1, u64> sat; BitField<55, 1, u64> sat;
} const fmul32i{insn}; } const fmul32i{insn};
FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
fmul32i.sat != 0, fmul32i.cc != 0, false); fmul32i.sat != 0, fmul32i.cc != 0, false);
} }

View file

@ -58,7 +58,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
case SelectMode::CHI: case SelectMode::CHI:
return ExtractHalf(v, src_c, Half::H1, false); return ExtractHalf(v, src_c, Half::H1, false);
case SelectMode::CBCC: case SelectMode::CBCC:
return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
case SelectMode::CSFU: case SelectMode::CSFU:
throw NotImplementedException("XMAD CSFU"); throw NotImplementedException("XMAD CSFU");
} }
@ -78,16 +78,44 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
} }
} // Anonymous namespace } // Anonymous namespace
void TranslatorVisitor::XMAD_reg(u64) { void TranslatorVisitor::XMAD_reg(u64 insn) {
throw NotImplementedException("XMAD (reg)"); union {
u64 raw;
BitField<35, 1, Half> half_b;
BitField<36, 1, u64> psl;
BitField<37, 1, u64> mrg;
BitField<38, 1, u64> x;
BitField<50, 3, SelectMode> select_mode;
} const xmad{insn};
XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
xmad.mrg != 0, xmad.x != 0);
} }
void TranslatorVisitor::XMAD_rc(u64) { void TranslatorVisitor::XMAD_rc(u64 insn) {
throw NotImplementedException("XMAD (rc)"); union {
u64 raw;
BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b;
BitField<54, 1, u64> x;
} const xmad{insn};
XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
xmad.x != 0);
} }
void TranslatorVisitor::XMAD_cr(u64) { void TranslatorVisitor::XMAD_cr(u64 insn) {
throw NotImplementedException("XMAD (cr)"); union {
u64 raw;
BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b;
BitField<54, 1, u64> x;
BitField<55, 1, u64> psl;
BitField<56, 1, u64> mrg;
} const xmad{insn};
XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
xmad.mrg != 0, xmad.x != 0);
} }
void TranslatorVisitor::XMAD_imm(u64 insn) { void TranslatorVisitor::XMAD_imm(u64 insn) {
@ -97,14 +125,11 @@ void TranslatorVisitor::XMAD_imm(u64 insn) {
BitField<36, 1, u64> psl; BitField<36, 1, u64> psl;
BitField<37, 1, u64> mrg; BitField<37, 1, u64> mrg;
BitField<38, 1, u64> x; BitField<38, 1, u64> x;
BitField<39, 8, IR::Reg> src_c;
BitField<50, 3, SelectMode> select_mode; BitField<50, 3, SelectMode> select_mode;
} const xmad{insn}; } const xmad{insn};
const IR::U32 src_b{ir.Imm32(static_cast<u32>(xmad.src_b))}; XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
const IR::U32 src_c{X(xmad.src_c)}; Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0,
xmad.x != 0);
} }
} // namespace Shader::Maxwell } // namespace Shader::Maxwell