diff --git a/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp b/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp index 45b8f036..8dfb2f6e 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp @@ -10,6 +10,24 @@ #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" +#define FCODE(NAME) \ + [&code](auto... args) { \ + if constexpr (esize == 32) { \ + code.NAME##s(args...); \ + } else { \ + code.NAME##d(args...); \ + } \ + } + +#define ICODE(NAME) \ + [&code](auto... args) { \ + if constexpr (esize == 32) { \ + code.NAME##d(args...); \ + } else { \ + code.NAME##q(args...); \ + } \ + } + namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; @@ -65,43 +83,23 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.movaps(tmp, result); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { - // Do a regular unsigned operation - if constexpr (op == Op::Add) { - if constexpr (esize == 32) { - code.vpaddd(result, result, arg); - } else { - code.vpaddq(result, result, arg); - } - } else { - if constexpr (esize == 32) { - code.vpsubd(result, result, arg); - } else { - code.vpsubq(result, result, arg); - } - } - - // Determine if an overflow/underflow happened if constexpr (op == Op::Add) { + ICODE(vpadd)(result, result, arg); code.vpternlogd(tmp, result, arg, 0b00100100); } else { + ICODE(vpsub)(result, result, arg); code.vpternlogd(tmp, result, arg, 0b00011000); } - // Masked write if constexpr (esize == 32) { code.vpmovd2m(k1, tmp); - code.vpsrad(result | k1, result, 31); - code.vpxord(result | k1, result, code.MConst(xword_b, msb_mask, msb_mask)); } else { code.vpmovq2m(k1, tmp); - code.vpsraq(result | k1, result, 63); - code.vpxorq(result | k1, result, code.MConst(xword_b, msb_mask, msb_mask)); } + ICODE(vpsra)(result | k1, result, u8(esize - 1)); + ICODE(vpxor)(result | k1, result, code.MConst(xword_b, msb_mask, msb_mask)); - // Set ZF if an overflow happened code.ktestb(k1, k1); - - // Write Q if overflow/underflow occured code.setnz(overflow); code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); @@ -109,22 +107,12 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in return; } - // TODO AVX2 implementation - code.movaps(xmm0, result); if constexpr (op == Op::Add) { - if constexpr (esize == 32) { - code.paddd(result, arg); - } else { - code.paddq(result, arg); - } + ICODE(padd)(result, arg); } else { - if constexpr (esize == 32) { - code.psubd(result, arg); - } else { - code.psubq(result, arg); - } + ICODE(psub)(result, arg); } code.pxor(tmp, result); @@ -145,22 +133,14 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in if (code.HasHostFeature(HostFeature::SSE41)) { code.ptest(xmm0, code.MConst(xword, msb_mask, msb_mask)); } else { - if constexpr (esize == 32) { - code.movmskps(overflow.cvt32(), xmm0); - } else { - code.movmskpd(overflow.cvt32(), xmm0); - } + FCODE(movmskp)(overflow.cvt32(), xmm0); code.test(overflow.cvt32(), overflow.cvt32()); } code.setnz(overflow); code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); if (code.HasHostFeature(HostFeature::SSE41)) { - if constexpr (esize == 32) { - code.blendvps(result, tmp); - } else { - code.blendvpd(result, tmp); - } + FCODE(blendvp)(result, tmp); ctx.reg_alloc.DefineValue(inst, result); } else {