From b02292bec7187ad128689dc1e57554625aea2dd7 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 2 Jan 2024 12:25:31 -0800 Subject: [PATCH] block_of_code: Rename `MConst` to `XmmConst` `MConst` is refactored into `XmmConst` to clearly communicate the addressable space of the newly allocated 16-byte memory constant. --- src/dynarmic/backend/x64/block_of_code.cpp | 2 +- src/dynarmic/backend/x64/block_of_code.h | 2 +- src/dynarmic/backend/x64/emit_x64_crc32.cpp | 6 +- .../backend/x64/emit_x64_floating_point.cpp | 144 ++++++------ src/dynarmic/backend/x64/emit_x64_packed.cpp | 10 +- src/dynarmic/backend/x64/emit_x64_vector.cpp | 208 +++++++++--------- .../x64/emit_x64_vector_floating_point.cpp | 34 +-- .../x64/emit_x64_vector_saturation.cpp | 6 +- src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- 9 files changed, 207 insertions(+), 207 deletions(-) diff --git a/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/backend/x64/block_of_code.cpp index e949c466..a8cb0480 100644 --- a/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/backend/x64/block_of_code.cpp @@ -500,7 +500,7 @@ void BlockOfCode::LoadRequiredFlagsForCondFromRax(IR::Cond cond) { } } -Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) { +Xbyak::Address BlockOfCode::XmmConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) { return constant_pool.GetConstant(frame, lower, upper); } diff --git a/src/dynarmic/backend/x64/block_of_code.h b/src/dynarmic/backend/x64/block_of_code.h index 2481e94c..c6223c02 100644 --- a/src/dynarmic/backend/x64/block_of_code.h +++ b/src/dynarmic/backend/x64/block_of_code.h @@ -122,7 +122,7 @@ public: } } - Xbyak::Address MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0); + Xbyak::Address XmmConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0); CodePtr GetCodeBegin() const; size_t GetTotalCodeSize() const; diff --git a/src/dynarmic/backend/x64/emit_x64_crc32.cpp b/src/dynarmic/backend/x64/emit_x64_crc32.cpp index e15b5df0..4ce7fd53 100644 --- a/src/dynarmic/backend/x64/emit_x64_crc32.cpp +++ b/src/dynarmic/backend/x64/emit_x64_crc32.cpp @@ -42,7 +42,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(); - code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); + code.movdqa(xmm_const, code.XmmConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); code.movzx(value.cvt32(), value.changeBit(data_size)); code.xor_(value.cvt32(), crc); @@ -72,7 +72,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(); - code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); + code.movdqa(xmm_const, code.XmmConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); code.xor_(crc, value); code.shl(crc.cvt64(), 32); @@ -93,7 +93,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(); - code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); + code.movdqa(xmm_const, code.XmmConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); code.mov(crc, crc); code.xor_(crc.cvt64(), value); diff --git a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 54848295..21149139 100644 --- a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -92,7 +92,7 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t constexpr u64 denormal_to_zero64 = mcl::bit::replicate_element(denormal_to_zero); const Xbyak::Xmm tmp = xmm16; - FCODE(vmovap)(tmp, code.MConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64))); + FCODE(vmovap)(tmp, code.XmmConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64))); for (const Xbyak::Xmm& xmm : to_daz) { FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); @@ -101,17 +101,17 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t } for (const Xbyak::Xmm& xmm : to_daz) { - code.movaps(xmm0, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); + code.movaps(xmm0, code.XmmConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); code.andps(xmm0, xmm); if constexpr (fsize == 32) { - code.pcmpgtd(xmm0, code.MConst(xword, f32_smallest_normal - 1)); + code.pcmpgtd(xmm0, code.XmmConst(xword, f32_smallest_normal - 1)); } else if (code.HasHostFeature(HostFeature::SSE42)) { - code.pcmpgtq(xmm0, code.MConst(xword, f64_smallest_normal - 1)); + code.pcmpgtq(xmm0, code.XmmConst(xword, f64_smallest_normal - 1)); } else { - code.pcmpgtd(xmm0, code.MConst(xword, f64_smallest_normal - 1)); + code.pcmpgtd(xmm0, code.XmmConst(xword, f64_smallest_normal - 1)); code.pshufd(xmm0, xmm0, 0b11100101); } - code.orps(xmm0, code.MConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero)); + code.orps(xmm0, code.XmmConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero)); code.andps(xmm, xmm0); } } @@ -128,7 +128,7 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, FpFixup::PosZero); - FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0)); + FCODE(vfixupimms)(xmm_value, xmm_value, code.XmmConst(ptr, u64(nan_to_zero)), u8(0)); } else if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vcmpords)(xmm_scratch, xmm_value, xmm_value); FCODE(vandp)(xmm_value, xmm_value, xmm_scratch); @@ -144,15 +144,15 @@ void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { const Xbyak::Opmask nan_mask = k1; FCODE(vfpclasss)(nan_mask, result, u8(FpClass::QNaN | FpClass::SNaN)); - FCODE(vblendmp)(result | nan_mask, result, code.MConst(ptr_b, fsize == 32 ? f32_nan : f64_nan)); + FCODE(vblendmp)(result | nan_mask, result, code.XmmConst(ptr_b, fsize == 32 ? f32_nan : f64_nan)); } else if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vcmpunords)(xmm0, result, result); - FCODE(blendvp)(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); + FCODE(blendvp)(result, code.XmmConst(xword, fsize == 32 ? f32_nan : f64_nan)); } else { Xbyak::Label end; FCODE(ucomis)(result, result); code.jnp(end); - code.movaps(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); + code.movaps(result, code.XmmConst(xword, fsize == 32 ? f32_nan : f64_nan)); code.L(end); } } @@ -166,7 +166,7 @@ SharedLabel ProcessNaN(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm a) { ctx.deferred_emits.emplace_back([=, &code] { code.L(*nan); - code.orps(a, code.MConst(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000)); + code.orps(a, code.XmmConst(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000)); code.jmp(*end, code.T_NEAR); }); @@ -262,10 +262,10 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X // Silence the SNaN as required by spec. if (code.HasHostFeature(HostFeature::AVX)) { - code.vorps(result, op2, code.MConst(xword, mantissa_msb)); + code.vorps(result, op2, code.XmmConst(xword, mantissa_msb)); } else { code.movaps(result, op2); - code.orps(result, code.MConst(xword, mantissa_msb)); + code.orps(result, code.XmmConst(xword, mantissa_msb)); } code.jmp(end, code.T_NEAR); } @@ -346,7 +346,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) FCODE(ucomis)(op1, op2); code.jp(op_are_nans); // Here we must return a positive NaN, because the indefinite value on x86 is a negative NaN! - code.movaps(result, code.MConst(xword, FP::FPInfo::DefaultNaN())); + code.movaps(result, code.XmmConst(xword, FP::FPInfo::DefaultNaN())); code.jmp(*end, code.T_NEAR); code.L(op_are_nans); EmitPostProcessNaNs(code, result, op1, op2, tmp, *end); @@ -364,7 +364,7 @@ void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, non_sign_mask); + const Xbyak::Address mask = code.XmmConst(xword, non_sign_mask); code.andps(result, mask); @@ -390,7 +390,7 @@ void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, u64(sign_mask)); + const Xbyak::Address mask = code.XmmConst(xword, u64(sign_mask)); code.xorps(result, mask); @@ -461,7 +461,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.L(nan); if (ctx.FPCR().DN()) { - code.movaps(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); + code.movaps(result, code.XmmConst(xword, fsize == 32 ? f32_nan : f64_nan)); code.jmp(*end); } else { code.movaps(tmp, result); @@ -493,7 +493,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i if (ctx.FPCR().DN()) { FCODE(vcmps)(k1, op2, op2, Cmp::Unordered_Q); - FCODE(vmovs)(op2 | k1, code.MConst(xword, default_nan)); + FCODE(vmovs)(op2 | k1, code.XmmConst(xword, default_nan)); } } else { Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr(); @@ -550,12 +550,12 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i code.jc(maybe_both_nan); if (ctx.FPCR().DN()) { code.L(snan); - code.movaps(op2, code.MConst(xword, default_nan)); + code.movaps(op2, code.XmmConst(xword, default_nan)); code.jmp(*end); } else { code.movaps(op2, op1); code.L(snan); - code.orps(op2, code.MConst(xword, FP::FPInfo::mantissa_msb)); + code.orps(op2, code.XmmConst(xword, FP::FPInfo::mantissa_msb)); code.jmp(*end); } @@ -661,12 +661,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { FCODE(vfmadd231s)(result, operand2, operand3); if (needs_rounding_correction && needs_nan_correction) { - code.vandps(xmm0, result, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); - FCODE(ucomis)(xmm0, code.MConst(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal)); + code.vandps(xmm0, result, code.XmmConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); + FCODE(ucomis)(xmm0, code.XmmConst(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal)); code.jz(*fallback, code.T_NEAR); } else if (needs_rounding_correction) { - code.vandps(xmm0, result, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); - code.vxorps(xmm0, xmm0, code.MConst(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal)); + code.vandps(xmm0, result, code.XmmConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); + code.vxorps(xmm0, xmm0, code.XmmConst(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal)); code.ptest(xmm0, xmm0); code.jz(*fallback, code.T_NEAR); } else if (needs_nan_correction) { @@ -724,7 +724,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { Xbyak::Label has_nan, indeterminate, op1_snan, op1_done, op2_done, op3_done; - code.vmovaps(xmm0, code.MConst(xword, FP::FPInfo::mantissa_msb)); + code.vmovaps(xmm0, code.XmmConst(xword, FP::FPInfo::mantissa_msb)); FCODE(ucomis)(operand2, operand3); code.jp(has_nan); @@ -739,7 +739,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.jnp(*end); code.L(indeterminate); - code.vmovaps(result, code.MConst(xword, FP::FPInfo::DefaultNaN())); + code.vmovaps(result, code.XmmConst(xword, FP::FPInfo::DefaultNaN())); code.jmp(*end); code.L(has_nan); @@ -854,12 +854,12 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.movaps(result, op1); code.xorps(result, op2); } - code.andps(result, code.MConst(xword, FP::FPInfo::sign_mask)); - code.orps(result, code.MConst(xword, FP::FPValue())); + code.andps(result, code.XmmConst(xword, FP::FPInfo::sign_mask)); + code.orps(result, code.XmmConst(xword, FP::FPValue())); code.jmp(*end, code.T_NEAR); code.L(op_are_nans); if (do_default_nan) { - code.movaps(result, code.MConst(xword, FP::FPInfo::DefaultNaN())); + code.movaps(result, code.XmmConst(xword, FP::FPInfo::DefaultNaN())); code.jmp(*end, code.T_NEAR); } else { EmitPostProcessNaNs(code, result, op1, op2, tmp, *end); @@ -960,7 +960,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.movaps(result, code.MConst(xword, FP::FPValue())); + code.movaps(result, code.XmmConst(xword, FP::FPValue())); FCODE(vfnmadd231s)(result, operand1, operand2); ctx.reg_alloc.DefineValue(inst, result); @@ -974,7 +974,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.movaps(result, code.MConst(xword, FP::FPValue())); + code.movaps(result, code.XmmConst(xword, FP::FPValue())); FCODE(vfnmadd231s)(result, operand1, operand2); FCODE(ucomis)(result, result); code.jp(*fallback, code.T_NEAR); @@ -1006,7 +1006,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.movaps(result, code.MConst(xword, FP::FPValue())); + code.movaps(result, code.XmmConst(xword, FP::FPValue())); FCODE(muls)(operand1, operand2); FCODE(subs)(result, operand1); @@ -1135,19 +1135,19 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i code.movaps(value, operand); - code.movaps(xmm0, code.MConst(xword, fsize == 32 ? 0xFFFF8000 : 0xFFFF'F000'0000'0000)); + code.movaps(xmm0, code.XmmConst(xword, fsize == 32 ? 0xFFFF8000 : 0xFFFF'F000'0000'0000)); code.pand(value, xmm0); - code.por(value, code.MConst(xword, fsize == 32 ? 0x00008000 : 0x0000'1000'0000'0000)); + code.por(value, code.XmmConst(xword, fsize == 32 ? 0x00008000 : 0x0000'1000'0000'0000)); // Detect NaNs, negatives, zeros, denormals and infinities - FCODE(ucomis)(value, code.MConst(xword, FPT(1) << FP::FPInfo::explicit_mantissa_width)); + FCODE(ucomis)(value, code.XmmConst(xword, FPT(1) << FP::FPInfo::explicit_mantissa_width)); code.jna(*bad_values, code.T_NEAR); FCODE(sqrts)(value, value); - ICODE(mov)(result, code.MConst(xword, FP::FPValue())); + ICODE(mov)(result, code.XmmConst(xword, FP::FPValue())); FCODE(divs)(result, value); - ICODE(padd)(result, code.MConst(xword, fsize == 32 ? 0x00004000 : 0x0000'0800'0000'0000)); + ICODE(padd)(result, code.XmmConst(xword, fsize == 32 ? 0x00004000 : 0x0000'0800'0000'0000)); code.pand(result, xmm0); code.L(*end); @@ -1188,7 +1188,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } code.L(default_nan); - code.movd(result, code.MConst(xword, 0x7FC00000)); + code.movd(result, code.XmmConst(xword, 0x7FC00000)); code.jmp(*end, code.T_NEAR); } else { Xbyak::Label nan, zero; @@ -1217,26 +1217,26 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i code.L(zero); if (code.HasHostFeature(HostFeature::AVX)) { - code.vpor(result, value, code.MConst(xword, 0x7FF0'0000'0000'0000)); + code.vpor(result, value, code.XmmConst(xword, 0x7FF0'0000'0000'0000)); } else { code.movaps(result, value); - code.por(result, code.MConst(xword, 0x7FF0'0000'0000'0000)); + code.por(result, code.XmmConst(xword, 0x7FF0'0000'0000'0000)); } code.jmp(*end, code.T_NEAR); code.L(nan); if (!ctx.FPCR().DN()) { if (code.HasHostFeature(HostFeature::AVX)) { - code.vpor(result, operand, code.MConst(xword, 0x0008'0000'0000'0000)); + code.vpor(result, operand, code.XmmConst(xword, 0x0008'0000'0000'0000)); } else { code.movaps(result, operand); - code.por(result, code.MConst(xword, 0x0008'0000'0000'0000)); + code.por(result, code.XmmConst(xword, 0x0008'0000'0000'0000)); } code.jmp(*end, code.T_NEAR); } code.L(default_nan); - code.movq(result, code.MConst(xword, 0x7FF8'0000'0000'0000)); + code.movq(result, code.XmmConst(xword, 0x7FF8'0000'0000'0000)); code.jmp(*end, code.T_NEAR); } @@ -1289,9 +1289,9 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.vmovaps(result, code.MConst(xword, FP::FPValue())); + code.vmovaps(result, code.XmmConst(xword, FP::FPValue())); FCODE(vfnmadd231s)(result, operand1, operand2); - FCODE(vmuls)(result, result, code.MConst(xword, FP::FPValue())); + FCODE(vmuls)(result, result, code.XmmConst(xword, FP::FPValue())); ctx.reg_alloc.DefineValue(inst, result); return; @@ -1304,7 +1304,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.vmovaps(result, code.MConst(xword, FP::FPValue())); + code.vmovaps(result, code.XmmConst(xword, FP::FPValue())); FCODE(vfnmadd231s)(result, operand1, operand2); // Detect if the intermediate result is infinity or NaN or nearly an infinity. @@ -1319,7 +1319,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.jae(*fallback, code.T_NEAR); - FCODE(vmuls)(result, result, code.MConst(xword, FP::FPValue())); + FCODE(vmuls)(result, result, code.XmmConst(xword, FP::FPValue())); code.L(*end); ctx.deferred_emits.emplace_back([=, &code, &ctx] { @@ -1348,10 +1348,10 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.movaps(result, code.MConst(xword, FP::FPValue())); + code.movaps(result, code.XmmConst(xword, FP::FPValue())); FCODE(muls)(operand1, operand2); FCODE(subs)(result, operand1); - FCODE(muls)(result, code.MConst(xword, FP::FPValue())); + FCODE(muls)(result, code.XmmConst(xword, FP::FPValue())); ctx.reg_alloc.DefineValue(inst, operand1); return; @@ -1603,7 +1603,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { if constexpr (fsize == 64) { if (fbits != 0) { const u64 scale_factor = static_cast((fbits + 1023) << 52); - code.mulsd(src, code.MConst(xword, scale_factor)); + code.mulsd(src, code.XmmConst(xword, scale_factor)); } if (!truncating) { @@ -1612,7 +1612,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } else { if (fbits != 0) { const u32 scale_factor = static_cast((fbits + 127) << 23); - code.mulss(src, code.MConst(xword, scale_factor)); + code.mulss(src, code.XmmConst(xword, scale_factor)); } if (!truncating) { @@ -1630,7 +1630,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { ZeroIfNaN<64>(code, src, scratch); - code.movsd(scratch, code.MConst(xword, f64_max_s64_lim)); + code.movsd(scratch, code.XmmConst(xword, f64_max_s64_lim)); code.comisd(scratch, src); code.jna(*saturate_max, code.T_NEAR); code.cvttsd2si(result, src); // 64 bit gpr @@ -1649,7 +1649,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.pxor(xmm0, xmm0); code.movaps(scratch, src); - code.subsd(scratch, code.MConst(xword, f64_max_s64_lim)); + code.subsd(scratch, code.XmmConst(xword, f64_max_s64_lim)); // these both result in zero if src/scratch are NaN code.maxsd(src, xmm0); @@ -1671,21 +1671,21 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); ZeroIfNaN<64>(code, src, scratch); - code.minsd(src, code.MConst(xword, f64_max_s32)); + code.minsd(src, code.XmmConst(xword, f64_max_s32)); // maxsd not required as cvttsd2si results in 0x8000'0000 when out of range code.cvttsd2si(result.cvt32(), src); // 32 bit gpr } else { code.pxor(xmm0, xmm0); code.maxsd(src, xmm0); // results in a zero if src is NaN - code.minsd(src, code.MConst(xword, f64_max_u32)); + code.minsd(src, code.XmmConst(xword, f64_max_u32)); code.cvttsd2si(result, src); // 64 bit gpr } } else { const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); ZeroIfNaN<64>(code, src, scratch); - code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u16 : f64_min_s16)); - code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16)); + code.maxsd(src, code.XmmConst(xword, unsigned_ ? f64_min_u16 : f64_min_s16)); + code.minsd(src, code.XmmConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16)); code.cvttsd2si(result, src); // 64 bit gpr } @@ -1810,7 +1810,7 @@ void EmitX64::EmitFPFixedS16ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u32 scale_factor = static_cast((127 - fbits) << 23); - code.mulss(result, code.MConst(xword, scale_factor)); + code.mulss(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1830,7 +1830,7 @@ void EmitX64::EmitFPFixedU16ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u32 scale_factor = static_cast((127 - fbits) << 23); - code.mulss(result, code.MConst(xword, scale_factor)); + code.mulss(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1855,7 +1855,7 @@ void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u32 scale_factor = static_cast((127 - fbits) << 23); - code.mulss(result, code.MConst(xword, scale_factor)); + code.mulss(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1891,7 +1891,7 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u32 scale_factor = static_cast((127 - fbits) << 23); - code.mulss(result, code.MConst(xword, scale_factor)); + code.mulss(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1911,7 +1911,7 @@ void EmitX64::EmitFPFixedS16ToDouble(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u64 scale_factor = static_cast((1023 - fbits) << 52); - code.mulsd(result, code.MConst(xword, scale_factor)); + code.mulsd(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1931,7 +1931,7 @@ void EmitX64::EmitFPFixedU16ToDouble(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u64 scale_factor = static_cast((1023 - fbits) << 52); - code.mulsd(result, code.MConst(xword, scale_factor)); + code.mulsd(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1949,7 +1949,7 @@ void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u64 scale_factor = static_cast((1023 - fbits) << 52); - code.mulsd(result, code.MConst(xword, scale_factor)); + code.mulsd(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1976,7 +1976,7 @@ void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u64 scale_factor = static_cast((1023 - fbits) << 52); - code.mulsd(to, code.MConst(xword, scale_factor)); + code.mulsd(to, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, to); @@ -1995,7 +1995,7 @@ void EmitX64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u64 scale_factor = static_cast((1023 - fbits) << 52); - code.mulsd(result, code.MConst(xword, scale_factor)); + code.mulsd(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -2014,7 +2014,7 @@ void EmitX64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u32 scale_factor = static_cast((127 - fbits) << 23); - code.mulss(result, code.MConst(xword, scale_factor)); + code.mulss(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -2035,18 +2035,18 @@ void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.movq(tmp, from); - code.punpckldq(tmp, code.MConst(xword, 0x4530000043300000, 0)); - code.subpd(tmp, code.MConst(xword, 0x4330000000000000, 0x4530000000000000)); + code.punpckldq(tmp, code.XmmConst(xword, 0x4530000043300000, 0)); + code.subpd(tmp, code.XmmConst(xword, 0x4330000000000000, 0x4530000000000000)); code.pshufd(result, tmp, 0b01001110); code.addpd(result, tmp); if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) { - code.pand(result, code.MConst(xword, f64_non_sign_mask)); + code.pand(result, code.XmmConst(xword, f64_non_sign_mask)); } } if (fbits != 0) { const u64 scale_factor = static_cast((1023 - fbits) << 52); - code.mulsd(result, code.MConst(xword, scale_factor)); + code.mulsd(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); @@ -2090,7 +2090,7 @@ void EmitX64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) { if (fbits != 0) { const u32 scale_factor = static_cast((127 - fbits) << 23); - code.mulss(result, code.MConst(xword, scale_factor)); + code.mulss(result, code.XmmConst(xword, scale_factor)); } ctx.reg_alloc.DefineValue(inst, result); diff --git a/src/dynarmic/backend/x64/emit_x64_packed.cpp b/src/dynarmic/backend/x64/emit_x64_packed.cpp index 4cfad5ff..a3b72235 100644 --- a/src/dynarmic/backend/x64/emit_x64_packed.cpp +++ b/src/dynarmic/backend/x64/emit_x64_packed.cpp @@ -91,8 +91,8 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { // !(b <= a+b) == b > a+b code.movdqa(tmp_a, xmm_a); code.movdqa(tmp_b, xmm_b); - code.paddw(tmp_a, code.MConst(xword, 0x80008000)); - code.paddw(tmp_b, code.MConst(xword, 0x80008000)); + code.paddw(tmp_a, code.XmmConst(xword, 0x80008000)); + code.paddw(tmp_b, code.XmmConst(xword, 0x80008000)); code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! ctx.reg_alloc.DefineValue(ge_inst, tmp_b); @@ -209,8 +209,8 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { // (a >= b) == !(b > a) code.pcmpeqb(ones, ones); - code.paddw(xmm_a, code.MConst(xword, 0x80008000)); - code.paddw(xmm_b, code.MConst(xword, 0x80008000)); + code.paddw(xmm_a, code.XmmConst(xword, 0x80008000)); + code.paddw(xmm_b, code.XmmConst(xword, 0x80008000)); code.movdqa(xmm_ge, xmm_b); code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! code.pxor(xmm_ge, ones); @@ -643,7 +643,7 @@ void EmitX64::EmitPackedAbsDiffSumU8(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); // TODO: Optimize with zero-extension detection - code.movaps(tmp, code.MConst(xword, 0x0000'0000'ffff'ffff)); + code.movaps(tmp, code.XmmConst(xword, 0x0000'0000'ffff'ffff)); code.pand(xmm_a, tmp); code.pand(xmm_b, tmp); code.psadbw(xmm_a, xmm_b); diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index 85fa8d38..8b136f0b 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -486,7 +486,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const const u64 shift_matrix = shift_amount < 8 ? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8)) : 0x8080808080808080; - code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0); + code.gf2p8affineqb(result, code.XmmConst(xword, shift_matrix, shift_matrix), 0); return; } @@ -547,7 +547,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) code.pxor(tmp2, tmp2); code.psrlq(result, shift_amount); - code.movdqa(tmp1, code.MConst(xword, sign_bit, sign_bit)); + code.movdqa(tmp1, code.XmmConst(xword, sign_bit, sign_bit)); code.pand(tmp1, result); code.psubq(tmp2, tmp1); code.por(result, tmp2); @@ -599,7 +599,7 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm right_shift = xmm16; const Xbyak::Xmm tmp = xmm17; - code.vmovdqa32(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.vmovdqa32(tmp, code.XmmConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); code.vpsubw(right_shift, right_shift, left_shift); @@ -634,7 +634,7 @@ void EmitX64::EmitVectorArithmeticVShift32(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.vmovdqa(tmp, code.MConst(xword, 0x000000FF000000FF, 0x000000FF000000FF)); + code.vmovdqa(tmp, code.XmmConst(xword, 0x000000FF000000FF, 0x000000FF000000FF)); code.vpxor(right_shift, right_shift, right_shift); code.vpsubd(right_shift, right_shift, left_shift); @@ -665,7 +665,7 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm right_shift = xmm16; const Xbyak::Xmm tmp = xmm17; - code.vmovdqa32(tmp, code.MConst(xword, 0x00000000000000FF, 0x00000000000000FF)); + code.vmovdqa32(tmp, code.XmmConst(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxorq(right_shift, right_shift, right_shift); code.vpsubq(right_shift, right_shift, left_shift); @@ -953,15 +953,15 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); - code.movdqa(tmp1, code.MConst(xword, 0x0101010102020304, 0x0000000000000000)); + code.movdqa(tmp1, code.XmmConst(xword, 0x0101010102020304, 0x0000000000000000)); code.movdqa(tmp2, tmp1); code.pshufb(tmp2, data); code.psrlw(data, 4); - code.pand(data, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); + code.pand(data, code.XmmConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); code.pshufb(tmp1, data); - code.movdqa(data, code.MConst(xword, 0x0404040404040404, 0x0404040404040404)); + code.movdqa(data, code.XmmConst(xword, 0x0404040404040404, 0x0404040404040404)); code.pcmpeqb(data, tmp1); code.pand(data, tmp2); @@ -994,11 +994,11 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { code.vpcmpeqw(zeros, zeros, zeros); code.vpcmpeqw(tmp, tmp, tmp); code.vpcmpeqw(zeros, zeros, data); - code.vpmullw(data, data, code.MConst(xword, 0xf0d3f0d3f0d3f0d3, 0xf0d3f0d3f0d3f0d3)); + code.vpmullw(data, data, code.XmmConst(xword, 0xf0d3f0d3f0d3f0d3, 0xf0d3f0d3f0d3f0d3)); code.vpsllw(tmp, tmp, 15); code.vpsllw(zeros, zeros, 7); code.vpsrlw(data, data, 12); - code.vmovdqa(result, code.MConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01)); + code.vmovdqa(result, code.XmmConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01)); code.vpor(tmp, tmp, zeros); code.vpor(data, data, tmp); code.vpshufb(result, result, data); @@ -1030,11 +1030,11 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { code.pcmpeqw(zeros, zeros); code.pcmpeqw(tmp, tmp); code.pcmpeqw(zeros, data); - code.pmullw(data, code.MConst(xword, 0xf0d3f0d3f0d3f0d3, 0xf0d3f0d3f0d3f0d3)); + code.pmullw(data, code.XmmConst(xword, 0xf0d3f0d3f0d3f0d3, 0xf0d3f0d3f0d3f0d3)); code.psllw(tmp, 15); code.psllw(zeros, 7); code.psrlw(data, 12); - code.movdqa(result, code.MConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01)); + code.movdqa(result, code.XmmConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01)); code.por(tmp, zeros); code.por(data, tmp); code.pshufb(result, data); @@ -1066,7 +1066,7 @@ void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.movdqa(tmp, code.XmmConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); code.pand(rhs, tmp); code.packuswb(lhs, rhs); @@ -1127,12 +1127,12 @@ void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); code.punpcklbw(lhs, rhs); - code.pshufb(lhs, code.MConst(xword, 0x0D'09'05'01'0C'08'04'00, 0x8080808080808080)); + code.pshufb(lhs, code.XmmConst(xword, 0x0D'09'05'01'0C'08'04'00, 0x8080808080808080)); } else { const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); - code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.movdqa(tmp, code.XmmConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); code.pand(rhs, tmp); code.packuswb(lhs, rhs); @@ -1151,7 +1151,7 @@ void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); code.punpcklwd(lhs, rhs); - code.pshufb(lhs, code.MConst(xword, 0x0B0A'0302'0908'0100, 0x8080'8080'8080'8080)); + code.pshufb(lhs, code.XmmConst(xword, 0x0B0A'0302'0908'0100, 0x8080'8080'8080'8080)); } else { const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); @@ -1237,7 +1237,7 @@ void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); code.punpcklbw(lhs, rhs); - code.pshufb(lhs, code.MConst(xword, 0x0F'0B'07'03'0E'0A'06'02, 0x8080808080808080)); + code.pshufb(lhs, code.XmmConst(xword, 0x0F'0B'07'03'0E'0A'06'02, 0x8080808080808080)); } else { const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); @@ -1259,7 +1259,7 @@ void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); code.punpcklwd(lhs, rhs); - code.pshufb(lhs, code.MConst(xword, 0x0F0E'0706'0D0C'0504, 0x8080'8080'8080'8080)); + code.pshufb(lhs, code.XmmConst(xword, 0x0F0E'0706'0D0C'0504, 0x8080'8080'8080'8080)); } else { const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); @@ -1488,13 +1488,13 @@ static void EmitVectorHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Ins case 8: code.pavgb(tmp, a); code.pxor(a, b); - code.pand(a, code.MConst(xword, 0x0101010101010101, 0x0101010101010101)); + code.pand(a, code.XmmConst(xword, 0x0101010101010101, 0x0101010101010101)); code.psubb(tmp, a); break; case 16: code.pavgw(tmp, a); code.pxor(a, b); - code.pand(a, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); + code.pand(a, code.XmmConst(xword, 0x0001000100010001, 0x0001000100010001)); code.psubw(tmp, a); break; case 32: @@ -1529,7 +1529,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* switch (esize) { case 8: { const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.movdqa(tmp, code.MConst(xword, 0x8080808080808080, 0x8080808080808080)); + code.movdqa(tmp, code.XmmConst(xword, 0x8080808080808080, 0x8080808080808080)); code.pxor(a, tmp); code.pxor(b, tmp); code.pavgb(b, a); @@ -1538,7 +1538,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* } case 16: { const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.movdqa(tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); + code.movdqa(tmp, code.XmmConst(xword, 0x8000800080008000, 0x8000800080008000)); code.pxor(a, tmp); code.pxor(b, tmp); code.pavgw(b, a); @@ -1700,13 +1700,13 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { code.paddb(result, result); } else if (code.HasHostFeature(HostFeature::GFNI)) { const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8); - code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0); + code.gf2p8affineqb(result, code.XmmConst(xword, shift_matrix, shift_matrix), 0); } else { const u64 replicand = (0xFFULL << shift_amount) & 0xFF; const u64 mask = mcl::bit::replicate_element(replicand); code.psllw(result, shift_amount); - code.pand(result, code.MConst(xword, mask, mask)); + code.pand(result, code.XmmConst(xword, mask, mask)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1757,13 +1757,13 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { code.pxor(result, result); } else if (code.HasHostFeature(HostFeature::GFNI)) { const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8); - code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0); + code.gf2p8affineqb(result, code.XmmConst(xword, shift_matrix, shift_matrix), 0); } else { const u64 replicand = 0xFEULL >> shift_amount; const u64 mask = mcl::bit::replicate_element(replicand); code.psrlw(result, shift_amount); - code.pand(result, code.MConst(xword, mask, mask)); + code.pand(result, code.XmmConst(xword, mask, mask)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1817,7 +1817,7 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm right_shift = xmm16; const Xbyak::Xmm tmp = xmm17; - code.vmovdqa32(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.vmovdqa32(tmp, code.XmmConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); code.vpsubw(right_shift, right_shift, left_shift); code.vpandd(left_shift, left_shift, tmp); @@ -1845,7 +1845,7 @@ void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.vmovdqa(tmp, code.MConst(xword, 0x000000FF000000FF, 0x000000FF000000FF)); + code.vmovdqa(tmp, code.XmmConst(xword, 0x000000FF000000FF, 0x000000FF000000FF)); code.vpxor(right_shift, right_shift, right_shift); code.vpsubd(right_shift, right_shift, left_shift); code.vpand(left_shift, left_shift, tmp); @@ -1873,7 +1873,7 @@ void EmitX64::EmitVectorLogicalVShift64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.vmovdqa(tmp, code.MConst(xword, 0x00000000000000FF, 0x00000000000000FF)); + code.vmovdqa(tmp, code.XmmConst(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxor(right_shift, right_shift, right_shift); code.vpsubq(right_shift, right_shift, left_shift); code.vpand(left_shift, left_shift, tmp); @@ -1993,7 +1993,7 @@ void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.movdqa(tmp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp, code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000)); const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(); code.movdqa(tmp_b, b); @@ -2022,7 +2022,7 @@ void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.vmovdqa(xmm0, code.MConst(xword, 0x8000000000000000, 0x8000000000000000)); + code.vmovdqa(xmm0, code.XmmConst(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); code.vpsubq(xmm0, x, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0); @@ -2141,7 +2141,7 @@ void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm sint_max_plus_one = ctx.reg_alloc.ScratchXmm(); - code.movdqa(sint_max_plus_one, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(sint_max_plus_one, code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000)); const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(); code.movdqa(tmp_a, a); @@ -2172,7 +2172,7 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.vmovdqa(xmm0, code.MConst(xword, 0x8000000000000000, 0x8000000000000000)); + code.vmovdqa(xmm0, code.XmmConst(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); code.vpsubq(xmm0, x, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0); @@ -2201,7 +2201,7 @@ void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) { code.psrlw(tmp_a, 8); code.psrlw(tmp_b, 8); code.pmullw(tmp_a, tmp_b); - code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.pand(a, code.XmmConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.psllw(tmp_a, 8); code.por(a, tmp_a); @@ -2327,7 +2327,7 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); code.pxor(zeros, zeros); - code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.pand(a, code.XmmConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.packuswb(a, zeros); ctx.reg_alloc.DefineValue(inst, a); @@ -2611,7 +2611,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) code.movdqa(c, a); code.psllq(a, 32); - code.movdqa(tmp1, code.MConst(xword, 0x80000000'00000000, 0x80000000'00000000)); + code.movdqa(tmp1, code.XmmConst(xword, 0x80000000'00000000, 0x80000000'00000000)); code.movdqa(tmp2, tmp1); code.pand(tmp1, a); code.pand(tmp2, c); @@ -2860,7 +2860,7 @@ void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, x); } else { const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm(); - code.movdqa(tmp3, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp3, code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000)); const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); code.movdqa(tmp2, x); @@ -2948,7 +2948,7 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, x); } else { const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm(); - code.movdqa(tmp3, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp3, code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000)); const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); code.movdqa(tmp2, tmp1); @@ -3104,7 +3104,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Label loop; code.pxor(result, result); - code.movdqa(mask, code.MConst(xword, 0x0101010101010101, 0x0101010101010101)); + code.movdqa(mask, code.XmmConst(xword, 0x0101010101010101, 0x0101010101010101)); code.mov(counter, 8); code.L(loop); @@ -3148,7 +3148,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst code.pmovzxbw(xmm_a, xmm_a); code.pmovzxbw(xmm_b, xmm_b); code.pxor(result, result); - code.movdqa(mask, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); + code.movdqa(mask, code.XmmConst(xword, 0x0001000100010001, 0x0001000100010001)); code.mov(counter, 8); code.L(loop); @@ -3231,11 +3231,11 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { code.movdqa(high_a, low_a); code.psrlw(high_a, 4); - code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); + code.movdqa(tmp1, code.XmmConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); code.pand(high_a, tmp1); // High nibbles code.pand(low_a, tmp1); // Low nibbles - code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201)); + code.movdqa(tmp1, code.XmmConst(xword, 0x0302020102010100, 0x0403030203020201)); code.movdqa(tmp2, tmp1); code.pshufb(tmp1, low_a); code.pshufb(tmp2, high_a); @@ -3259,10 +3259,10 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); if (code.HasHostFeature(HostFeature::GFNI)) { - code.gf2p8affineqb(data, code.MConst(xword, 0x8040201008040201, 0x8040201008040201), 0); + code.gf2p8affineqb(data, code.XmmConst(xword, 0x8040201008040201, 0x8040201008040201), 0); } else { const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm(); - code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); + code.movdqa(high_nibble_reg, code.XmmConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(high_nibble_reg, data); code.pxor(data, high_nibble_reg); code.psrld(high_nibble_reg, 4); @@ -3270,25 +3270,25 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { // High lookup const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm(); - code.movdqa(high_reversed_reg, code.MConst(xword, 0xE060A020C0408000, 0xF070B030D0509010)); + code.movdqa(high_reversed_reg, code.XmmConst(xword, 0xE060A020C0408000, 0xF070B030D0509010)); code.pshufb(high_reversed_reg, data); // Low lookup (low nibble equivalent of the above) - code.movdqa(data, code.MConst(xword, 0x0E060A020C040800, 0x0F070B030D050901)); + code.movdqa(data, code.XmmConst(xword, 0x0E060A020C040800, 0x0F070B030D050901)); code.pshufb(data, high_nibble_reg); code.por(data, high_reversed_reg); } else { code.pslld(data, 4); code.por(data, high_nibble_reg); - code.movdqa(high_nibble_reg, code.MConst(xword, 0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC)); + code.movdqa(high_nibble_reg, code.XmmConst(xword, 0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC)); code.pand(high_nibble_reg, data); code.pxor(data, high_nibble_reg); code.psrld(high_nibble_reg, 2); code.pslld(data, 2); code.por(data, high_nibble_reg); - code.movdqa(high_nibble_reg, code.MConst(xword, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA)); + code.movdqa(high_nibble_reg, code.XmmConst(xword, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA)); code.pand(high_nibble_reg, data); code.pxor(data, high_nibble_reg); code.psrld(high_nibble_reg, 1); @@ -3421,7 +3421,7 @@ void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { code.paddw(data, temp); // Add pairs of 16-bit values into 32-bit lanes - code.movdqa(temp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); + code.movdqa(temp, code.XmmConst(xword, 0x0001000100010001, 0x0001000100010001)); code.pmaddwd(data, temp); // Sum adjacent 32-bit lanes @@ -3498,7 +3498,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I switch (esize) { case 8: { const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm(); - code.movdqa(vec_128, code.MConst(xword, 0x8080808080808080, 0x8080808080808080)); + code.movdqa(vec_128, code.XmmConst(xword, 0x8080808080808080, 0x8080808080808080)); code.paddb(a, vec_128); code.paddb(b, vec_128); @@ -3508,7 +3508,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I } case 16: { const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm(); - code.movdqa(vec_32768, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); + code.movdqa(vec_32768, code.XmmConst(xword, 0x8000800080008000, 0x8000800080008000)); code.paddw(a, vec_32768); code.paddw(b, vec_32768); @@ -3891,7 +3891,7 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { code.pand(tmp, y); code.pand(sign_correction, x); code.paddd(sign_correction, tmp); - code.pand(sign_correction, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); + code.pand(sign_correction, code.XmmConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); // calculate unsigned multiply code.movdqa(tmp, x); @@ -3930,13 +3930,13 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo const Xbyak::Address mask = [esize, &code] { switch (esize) { case 8: - return code.MConst(xword, 0x8080808080808080, 0x8080808080808080); + return code.XmmConst(xword, 0x8080808080808080, 0x8080808080808080); case 16: - return code.MConst(xword, 0x8000800080008000, 0x8000800080008000); + return code.XmmConst(xword, 0x8000800080008000, 0x8000800080008000); case 32: - return code.MConst(xword, 0x8000000080000000, 0x8000000080000000); + return code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000); case 64: - return code.MConst(xword, 0x8000000000000000, 0x8000000000000000); + return code.XmmConst(xword, 0x8000000000000000, 0x8000000000000000); default: UNREACHABLE(); } @@ -4100,7 +4100,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC code.vpblendvb(xmm0, tmp, tmp2, xmm0); ctx.reg_alloc.Release(tmp2); } else { - code.pand(xmm0, code.MConst(xword, 0x8080808080808080, 0x8080808080808080)); + code.pand(xmm0, code.XmmConst(xword, 0x8080808080808080, 0x8080808080808080)); code.movdqa(tmp, xmm0); code.psrlw(tmp, 7); code.pxor(xmm0, xmm0); @@ -4201,27 +4201,27 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC if (code.HasHostFeature(HostFeature::AVX)) { if constexpr (is_rounding) { code.vpsrlw(lower_tmp, lower_tmp, 14); - code.vpaddw(lower_tmp, lower_tmp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); + code.vpaddw(lower_tmp, lower_tmp, code.XmmConst(xword, 0x0001000100010001, 0x0001000100010001)); code.vpsrlw(lower_tmp, lower_tmp, 1); } else { code.vpsrlw(lower_tmp, lower_tmp, 15); } code.vpaddw(upper_tmp, upper_tmp, upper_tmp); code.vpaddw(result, upper_tmp, lower_tmp); - code.vpcmpeqw(upper_tmp, result, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); + code.vpcmpeqw(upper_tmp, result, code.XmmConst(xword, 0x8000800080008000, 0x8000800080008000)); code.vpxor(result, result, upper_tmp); } else { code.paddw(upper_tmp, upper_tmp); if constexpr (is_rounding) { code.psrlw(lower_tmp, 14); - code.paddw(lower_tmp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); + code.paddw(lower_tmp, code.XmmConst(xword, 0x0001000100010001, 0x0001000100010001)); code.psrlw(lower_tmp, 1); } else { code.psrlw(lower_tmp, 15); } code.movdqa(result, upper_tmp); code.paddw(result, lower_tmp); - code.movdqa(upper_tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); + code.movdqa(upper_tmp, code.XmmConst(xword, 0x8000800080008000, 0x8000800080008000)); code.pcmpeqw(upper_tmp, result); code.pxor(result, upper_tmp); } @@ -4265,7 +4265,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); if constexpr (is_rounding) { - code.vmovdqa(result, code.MConst(xword, 0x0000000080000000, 0x0000000080000000)); + code.vmovdqa(result, code.XmmConst(xword, 0x0000000080000000, 0x0000000080000000)); code.vpaddq(odds, odds, result); code.vpaddq(even, even, result); } @@ -4276,7 +4276,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); - code.vpcmpeqd(mask, result, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.vpcmpeqd(mask, result, code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000)); code.vpxor(result, result, mask); code.pmovmskb(bit, mask); code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); @@ -4316,7 +4316,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.paddq(x, x); if constexpr (is_rounding) { - code.movdqa(result, code.MConst(xword, 0x0000000080000000, 0x0000000080000000)); + code.movdqa(result, code.XmmConst(xword, 0x0000000080000000, 0x0000000080000000)); code.paddq(tmp, result); code.paddq(x, result); } @@ -4331,7 +4331,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); - code.movdqa(tmp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp, code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000)); code.pcmpeqd(tmp, result); code.pxor(result, tmp); code.pmovmskb(bit, tmp); @@ -4359,10 +4359,10 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, code.pmaddwd(x, y); if (code.HasHostFeature(HostFeature::AVX)) { - code.vpcmpeqd(y, x, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.vpcmpeqd(y, x, code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000)); code.vpxor(x, x, y); } else { - code.movdqa(y, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(y, code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000)); code.pcmpeqd(y, x); code.pxor(x, y); } @@ -4412,11 +4412,11 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); if (code.HasHostFeature(HostFeature::AVX)) { - code.vpcmpeqq(y, x, code.MConst(xword, 0x8000000000000000, 0x8000000000000000)); + code.vpcmpeqq(y, x, code.XmmConst(xword, 0x8000000000000000, 0x8000000000000000)); code.vpxor(x, x, y); code.vpmovmskb(bit, y); } else { - code.movdqa(y, code.MConst(xword, 0x8000000000000000, 0x8000000000000000)); + code.movdqa(y, code.XmmConst(xword, 0x8000000000000000, 0x8000000000000000)); code.pcmpeqd(y, x); code.shufps(y, y, 0b11110101); code.pxor(x, y); @@ -4565,13 +4565,13 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo const Xbyak::Address mask = [esize, &code] { switch (esize) { case 8: - return code.MConst(xword, 0x8080808080808080, 0x8080808080808080); + return code.XmmConst(xword, 0x8080808080808080, 0x8080808080808080); case 16: - return code.MConst(xword, 0x8000800080008000, 0x8000800080008000); + return code.XmmConst(xword, 0x8000800080008000, 0x8000800080008000); case 32: - return code.MConst(xword, 0x8000000080000000, 0x8000000080000000); + return code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000); case 64: - return code.MConst(xword, 0x8000000000000000, 0x8000000000000000); + return code.XmmConst(xword, 0x8000000000000000, 0x8000000000000000); default: UNREACHABLE(); } @@ -4806,7 +4806,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { const u64 index_count64 = mcl::bit::replicate_element(index_count); Xbyak::Opmask valid_indices = k1; - code.vpcmpb(valid_indices, indicies, code.MConst(xword, index_count64, 0), CmpInt::LessThan); + code.vpcmpb(valid_indices, indicies, code.XmmConst(xword, index_count64, 0), CmpInt::LessThan); if (is_defaults_zero) { defaults = defaults | valid_indices | T_z; @@ -4868,7 +4868,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { code.xorps(result, result); code.movsd(result, xmm_table0); - code.paddusb(indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); + code.paddusb(indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); code.pshufb(result, indicies); ctx.reg_alloc.DefineValue(inst, result); @@ -4881,7 +4881,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); - code.paddusb(indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); + code.paddusb(indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); code.pshufb(xmm_table0, indicies); ctx.reg_alloc.DefineValue(inst, xmm_table0); @@ -4900,10 +4900,10 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::AVX)) { - code.vpaddusb(xmm0, indicies, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); + code.vpaddusb(xmm0, indicies, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); } else { code.movaps(xmm0, indicies); - code.paddusb(xmm0, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); + code.paddusb(xmm0, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); } code.pshufb(xmm_table0, indicies); code.pblendvb(xmm_table0, defaults); @@ -4933,12 +4933,12 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::AVX)) { - code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); + code.vpaddusb(xmm0, indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); } else { code.movaps(xmm0, indicies); - code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); + code.paddusb(xmm0, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); } - code.paddusb(indicies, code.MConst(xword, 0x6060606060606060, 0xFFFFFFFFFFFFFFFF)); + code.paddusb(indicies, code.XmmConst(xword, 0x6060606060606060, 0xFFFFFFFFFFFFFFFF)); code.pshufb(xmm_table0, xmm0); code.pshufb(xmm_table1, indicies); code.pblendvb(xmm_table0, xmm_table1); @@ -4965,19 +4965,19 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::AVX)) { - code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); + code.vpaddusb(xmm0, indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); } else { code.movaps(xmm0, indicies); - code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); + code.paddusb(xmm0, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); } code.pshufb(xmm_table0, indicies); code.pshufb(xmm_table1, indicies); code.pblendvb(xmm_table0, xmm_table1); if (code.HasHostFeature(HostFeature::AVX)) { - code.vpaddusb(xmm0, indicies, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); + code.vpaddusb(xmm0, indicies, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); } else { code.movaps(xmm0, indicies); - code.paddusb(xmm0, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); + code.paddusb(xmm0, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); } code.pblendvb(xmm_table0, defaults); @@ -5042,7 +5042,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]); - code.vptestnmb(write_mask, indicies, code.MConst(xword, 0xE0E0E0E0E0E0E0E0, 0xE0E0E0E0E0E0E0E0)); + code.vptestnmb(write_mask, indicies, code.XmmConst(xword, 0xE0E0E0E0E0E0E0E0, 0xE0E0E0E0E0E0E0E0)); code.vpermi2b(indicies | write_mask, xmm_table0, xmm_table1); ctx.reg_alloc.Release(xmm_table0); @@ -5056,7 +5056,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { // Handle vector-table 2,3 // vpcmpuble - code.vpcmpub(upper_mask, indicies, code.MConst(xword, 0x3F3F3F3F3F3F3F3F, 0x3F3F3F3F3F3F3F3F), CmpInt::LessEqual); + code.vpcmpub(upper_mask, indicies, code.XmmConst(xword, 0x3F3F3F3F3F3F3F3F, 0x3F3F3F3F3F3F3F3F), CmpInt::LessEqual); code.kandnw(write_mask, write_mask, upper_mask); const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseScratchXmm(table[2]); @@ -5076,7 +5076,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]); const Xbyak::Opmask write_mask = k1; - code.vptestnmb(write_mask, indicies, code.MConst(xword, 0xE0E0E0E0E0E0E0E0, 0xE0E0E0E0E0E0E0E0)); + code.vptestnmb(write_mask, indicies, code.XmmConst(xword, 0xE0E0E0E0E0E0E0E0, 0xE0E0E0E0E0E0E0E0)); code.vpermi2b(indicies, xmm_table0, xmm_table1); if (is_defaults_zero) { @@ -5093,7 +5093,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); - code.paddusb(indicies, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); + code.paddusb(indicies, code.XmmConst(xword, 0x7070707070707070, 0x7070707070707070)); code.pshufb(xmm_table0, indicies); ctx.reg_alloc.DefineValue(inst, xmm_table0); @@ -5106,10 +5106,10 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); if (code.HasHostFeature(HostFeature::AVX)) { - code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); + code.vpaddusb(xmm0, indicies, code.XmmConst(xword, 0x7070707070707070, 0x7070707070707070)); } else { code.movaps(xmm0, indicies); - code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); + code.paddusb(xmm0, code.XmmConst(xword, 0x7070707070707070, 0x7070707070707070)); } code.pshufb(xmm_table0, indicies); code.pblendvb(xmm_table0, defaults); @@ -5124,12 +5124,12 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]); if (code.HasHostFeature(HostFeature::AVX)) { - code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); + code.vpaddusb(xmm0, indicies, code.XmmConst(xword, 0x7070707070707070, 0x7070707070707070)); } else { code.movaps(xmm0, indicies); - code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); + code.paddusb(xmm0, code.XmmConst(xword, 0x7070707070707070, 0x7070707070707070)); } - code.paddusb(indicies, code.MConst(xword, 0x6060606060606060, 0x6060606060606060)); + code.paddusb(indicies, code.XmmConst(xword, 0x6060606060606060, 0x6060606060606060)); code.pshufb(xmm_table0, xmm0); code.pshufb(xmm_table1, indicies); code.pblendvb(xmm_table0, xmm_table1); @@ -5143,14 +5143,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm masked = xmm16; - code.vpandd(masked, indicies, code.MConst(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); + code.vpandd(masked, indicies, code.XmmConst(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); for (size_t i = 0; i < table_size; ++i) { const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]); const Xbyak::Opmask table_mask = k1; const u64 table_index = mcl::bit::replicate_element(i * 16); - code.vpcmpeqb(table_mask, masked, code.MConst(xword, table_index, table_index)); + code.vpcmpeqb(table_mask, masked, code.XmmConst(xword, table_index, table_index)); if (table_index == 0 && is_defaults_zero) { code.vpshufb(result | table_mask | T_z, xmm_table, indicies); @@ -5170,7 +5170,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); - code.movaps(masked, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); + code.movaps(masked, code.XmmConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(masked, indicies); for (size_t i = 0; i < table_size; ++i) { @@ -5182,9 +5182,9 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.pxor(xmm0, xmm0); code.pcmpeqb(xmm0, masked); } else if (code.HasHostFeature(HostFeature::AVX)) { - code.vpcmpeqb(xmm0, masked, code.MConst(xword, table_index, table_index)); + code.vpcmpeqb(xmm0, masked, code.XmmConst(xword, table_index, table_index)); } else { - code.movaps(xmm0, code.MConst(xword, table_index, table_index)); + code.movaps(xmm0, code.XmmConst(xword, table_index, table_index)); code.pcmpeqb(xmm0, masked); } code.pshufb(xmm_table, indicies); @@ -5242,11 +5242,11 @@ void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) { const bool part = args[2].GetImmediateU1(); if (!part) { - code.pand(lower, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.pand(lower, code.XmmConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.psllw(upper, 8); } else { code.psrlw(lower, 8); - code.pand(upper, code.MConst(xword, 0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00)); + code.pand(upper, code.XmmConst(xword, 0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00)); } code.por(lower, upper); @@ -5261,11 +5261,11 @@ void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) { const bool part = args[2].GetImmediateU1(); if (!part) { - code.pand(lower, code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF)); + code.pand(lower, code.XmmConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF)); code.pslld(upper, 16); } else { code.psrld(lower, 16); - code.pand(upper, code.MConst(xword, 0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000)); + code.pand(upper, code.XmmConst(xword, 0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000)); } code.por(lower, upper); @@ -5336,7 +5336,7 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - code.movdqa(temp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(temp, code.XmmConst(xword, 0x8000000080000000, 0x8000000080000000)); code.pxor(x, temp); code.pxor(y, temp); code.movdqa(temp, x); diff --git a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index 0120ce60..d5e01dd3 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -146,24 +146,24 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std:: template Xbyak::Address GetVectorOf(BlockOfCode& code, u64 value) { if constexpr (fsize == 16) { - return code.MConst(xword, (value << 48) | (value << 32) | (value << 16) | value, (value << 48) | (value << 32) | (value << 16) | value); + return code.XmmConst(xword, (value << 48) | (value << 32) | (value << 16) | value, (value << 48) | (value << 32) | (value << 16) | value); } else if constexpr (fsize == 32) { - return code.MConst(xword, (value << 32) | value, (value << 32) | value); + return code.XmmConst(xword, (value << 32) | value, (value << 32) | value); } else { static_assert(fsize == 64); - return code.MConst(xword, value, value); + return code.XmmConst(xword, value, value); } } template Xbyak::Address GetVectorOf(BlockOfCode& code) { if constexpr (fsize == 16) { - return code.MConst(xword, (value << 48) | (value << 32) | (value << 16) | value, (value << 48) | (value << 32) | (value << 16) | value); + return code.XmmConst(xword, (value << 48) | (value << 32) | (value << 16) | value, (value << 48) | (value << 32) | (value << 16) | value); } else if constexpr (fsize == 32) { - return code.MConst(xword, (value << 32) | value, (value << 32) | value); + return code.XmmConst(xword, (value << 32) | value, (value << 32) | value); } else { static_assert(fsize == 64); - return code.MConst(xword, value, value); + return code.XmmConst(xword, value, value); } } @@ -227,7 +227,7 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, FpFixup::PosZero); - FCODE(vfixupimmp)(result, result, code.MConst(ptr_b, u64(nan_to_zero)), u8(0)); + FCODE(vfixupimmp)(result, result, code.XmmConst(ptr_b, u64(nan_to_zero)), u8(0)); } else if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vcmpordp)(nan_mask, result, result); FCODE(vandp)(result, result, nan_mask); @@ -253,7 +253,7 @@ void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list(denormal_to_zero); - FCODE(vmovap)(tmp, code.MConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64))); + FCODE(vmovap)(tmp, code.XmmConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64))); for (const Xbyak::Xmm& xmm : to_daz) { FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0)); @@ -800,9 +800,9 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { code.vcvtudq2ps(xmm, xmm); } else { - const Xbyak::Address mem_4B000000 = code.MConst(xword, 0x4B0000004B000000, 0x4B0000004B000000); - const Xbyak::Address mem_53000000 = code.MConst(xword, 0x5300000053000000, 0x5300000053000000); - const Xbyak::Address mem_D3000080 = code.MConst(xword, 0xD3000080D3000080, 0xD3000080D3000080); + const Xbyak::Address mem_4B000000 = code.XmmConst(xword, 0x4B0000004B000000, 0x4B0000004B000000); + const Xbyak::Address mem_53000000 = code.XmmConst(xword, 0x5300000053000000, 0x5300000053000000); + const Xbyak::Address mem_D3000080 = code.XmmConst(xword, 0xD3000080D3000080, 0xD3000080D3000080); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); @@ -813,7 +813,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) code.vaddps(xmm, xmm, mem_D3000080); code.vaddps(xmm, tmp, xmm); } else { - const Xbyak::Address mem_0xFFFF = code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF); + const Xbyak::Address mem_0xFFFF = code.XmmConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF); code.movdqa(tmp, mem_0xFFFF); @@ -831,7 +831,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) } if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) { - code.pand(xmm, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); + code.pand(xmm, code.XmmConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); } }); @@ -850,8 +850,8 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { code.vcvtuqq2pd(xmm, xmm); } else { - const Xbyak::Address unpack = code.MConst(xword, 0x4530000043300000, 0); - const Xbyak::Address subtrahend = code.MConst(xword, 0x4330000000000000, 0x4530000000000000); + const Xbyak::Address unpack = code.XmmConst(xword, 0x4530000043300000, 0); + const Xbyak::Address subtrahend = code.XmmConst(xword, 0x4330000000000000, 0x4530000000000000); const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm(); @@ -898,7 +898,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) } if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) { - code.pand(xmm, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)); + code.pand(xmm, code.XmmConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)); } }); @@ -1509,7 +1509,7 @@ void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, sign_mask64, sign_mask64); + const Xbyak::Address mask = code.XmmConst(xword, sign_mask64, sign_mask64); code.xorps(a, mask); diff --git a/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp b/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp index 8ebf1610..9d869d72 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp @@ -97,7 +97,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.vpmovq2m(k1, xmm0); } ICODE(vpsra)(result | k1, result, u8(esize - 1)); - ICODE(vpxor)(result | k1, result, code.MConst(xword_b, msb_mask, msb_mask)); + ICODE(vpxor)(result | k1, result, code.XmmConst(xword_b, msb_mask, msb_mask)); code.ktestb(k1, k1); code.setnz(overflow); @@ -148,10 +148,10 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in if constexpr (esize == 64) { code.pshufd(tmp, tmp, 0b11110101); } - code.pxor(tmp, code.MConst(xword, msb_mask, msb_mask)); + code.pxor(tmp, code.XmmConst(xword, msb_mask, msb_mask)); if (code.HasHostFeature(HostFeature::SSE41)) { - code.ptest(xmm0, code.MConst(xword, msb_mask, msb_mask)); + code.ptest(xmm0, code.XmmConst(xword, msb_mask, msb_mask)); } else { FCODE(movmskp)(overflow.cvt32(), xmm0); code.test(overflow.cvt32(), overflow.cvt32()); diff --git a/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/backend/x64/reg_alloc.cpp index f50112a5..c9ded8b8 100644 --- a/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/backend/x64/reg_alloc.cpp @@ -589,7 +589,7 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) { if (imm_value == 0) { MAYBE_AVX(xorps, reg, reg); } else { - MAYBE_AVX(movaps, reg, code.MConst(code.xword, imm_value)); + MAYBE_AVX(movaps, reg, code.XmmConst(code.xword, imm_value)); } return host_loc; }