diff --git a/src/backend_x64/block_of_code.cpp b/src/backend_x64/block_of_code.cpp index 0153035b..505998ea 100644 --- a/src/backend_x64/block_of_code.cpp +++ b/src/backend_x64/block_of_code.cpp @@ -189,8 +189,8 @@ void BlockOfCode::SwitchMxcsrOnExit() { ldmxcsr(dword[r15 + jsi.offsetof_save_host_MXCSR]); } -Xbyak::Address BlockOfCode::MConst(u64 lower, u64 upper) { - return constant_pool.GetConstant(lower, upper); +Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) { + return constant_pool.GetConstant(frame, lower, upper); } void BlockOfCode::SwitchToFarCode() { diff --git a/src/backend_x64/block_of_code.h b/src/backend_x64/block_of_code.h index e353fd1f..6b6eb6e0 100644 --- a/src/backend_x64/block_of_code.h +++ b/src/backend_x64/block_of_code.h @@ -70,7 +70,7 @@ public: } } - Xbyak::Address MConst(u64 lower, u64 upper = 0); + Xbyak::Address MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0); /// Far code sits far away from the near code. Execution remains primarily in near code. /// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary. diff --git a/src/backend_x64/constant_pool.cpp b/src/backend_x64/constant_pool.cpp index 06b5f934..80c438fd 100644 --- a/src/backend_x64/constant_pool.cpp +++ b/src/backend_x64/constant_pool.cpp @@ -20,7 +20,7 @@ ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_si current_pool_ptr = pool_begin; } -Xbyak::Address ConstantPool::GetConstant(u64 lower, u64 upper) { +Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) { const auto constant = std::make_tuple(lower, upper); auto iter = constant_info.find(constant); if (iter == constant_info.end()) { @@ -30,7 +30,7 @@ Xbyak::Address ConstantPool::GetConstant(u64 lower, u64 upper) { iter = constant_info.emplace(constant, current_pool_ptr).first; current_pool_ptr += align_size; } - return code.xword[code.rip + iter->second]; + return frame[code.rip + iter->second]; } } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/constant_pool.h b/src/backend_x64/constant_pool.h index b4b6eb0f..95372692 100644 --- a/src/backend_x64/constant_pool.h +++ b/src/backend_x64/constant_pool.h @@ -24,7 +24,7 @@ class ConstantPool final { public: ConstantPool(BlockOfCode& code, size_t size); - Xbyak::Address GetConstant(u64 lower, u64 upper = 0); + Xbyak::Address GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0); private: static constexpr size_t align_size = 16; // bytes diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 42275a9f..5b45d5a0 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -53,9 +53,9 @@ static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::R static void DenormalsAreZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { Xbyak::Label end; - auto mask = code.MConst(f64_non_sign_mask); + auto mask = code.MConst(xword, f64_non_sign_mask); mask.setBit(64); - auto penult_denormal = code.MConst(f64_penultimate_positive_denormal); + auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal); penult_denormal.setBit(64); code.movq(gpr_scratch, xmm_value); @@ -84,9 +84,9 @@ static void FlushToZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { Xbyak::Label end; - auto mask = code.MConst(f64_non_sign_mask); + auto mask = code.MConst(xword, f64_non_sign_mask); mask.setBit(64); - auto penult_denormal = code.MConst(f64_penultimate_positive_denormal); + auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal); penult_denormal.setBit(64); code.movq(gpr_scratch, xmm_value); @@ -142,7 +142,7 @@ static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) { Xbyak::Label end; code.ucomiss(xmm_value, xmm_value); code.jnp(end); - code.movaps(xmm_value, code.MConst(f32_nan)); + code.movaps(xmm_value, code.MConst(xword, f32_nan)); code.L(end); } @@ -181,7 +181,7 @@ static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) { Xbyak::Label end; code.ucomisd(xmm_value, xmm_value); code.jnp(end); - code.movaps(xmm_value, code.MConst(f64_nan)); + code.movaps(xmm_value, code.MConst(xword, f64_nan)); code.L(end); } @@ -193,7 +193,7 @@ static Xbyak::Label ProcessNaN32(BlockOfCode& code, Xbyak::Xmm a) { code.SwitchToFarCode(); code.L(nan); - code.orps(a, code.MConst(0x00400000)); + code.orps(a, code.MConst(xword, 0x00400000)); code.jmp(end, code.T_NEAR); code.SwitchToNearCode(); @@ -208,7 +208,7 @@ static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) { code.SwitchToFarCode(); code.L(nan); - code.orps(a, code.MConst(0x0008'0000'0000'0000)); + code.orps(a, code.MConst(xword, 0x0008'0000'0000'0000)); code.jmp(end, code.T_NEAR); code.SwitchToNearCode(); @@ -355,7 +355,7 @@ void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code.pand(result, code.MConst(f32_non_sign_mask)); + code.pand(result, code.MConst(xword, f32_non_sign_mask)); ctx.reg_alloc.DefineValue(inst, result); } @@ -364,7 +364,7 @@ void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code.pand(result, code.MConst(f64_non_sign_mask)); + code.pand(result, code.MConst(xword, f64_non_sign_mask)); ctx.reg_alloc.DefineValue(inst, result); } @@ -373,7 +373,7 @@ void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code.pxor(result, code.MConst(f32_negative_zero)); + code.pxor(result, code.MConst(xword, f32_negative_zero)); ctx.reg_alloc.DefineValue(inst, result); } @@ -382,7 +382,7 @@ void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code.pxor(result, code.MConst(f64_negative_zero)); + code.pxor(result, code.MConst(xword, f64_negative_zero)); ctx.reg_alloc.DefineValue(inst, result); } @@ -612,8 +612,8 @@ void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { } // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(f64_max_s32)); - code.maxsd(from, code.MConst(f64_min_s32)); + code.minsd(from, code.MConst(xword, f64_max_s32)); + code.maxsd(from, code.MConst(xword, f64_min_s32)); // Second time is for real if (round_towards_zero) { code.cvttsd2si(to, from); // 32 bit gpr @@ -644,8 +644,8 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { code.cvtss2sd(from, from); // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(f64_max_u32)); - code.maxsd(from, code.MConst(f64_min_u32)); + code.minsd(from, code.MConst(xword, f64_max_u32)); + code.maxsd(from, code.MConst(xword, f64_min_u32)); if (round_towards_zero) { code.cvttsd2si(to, from); // 64 bit gpr } else { @@ -676,8 +676,8 @@ void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { } // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(f64_max_s32)); - code.maxsd(from, code.MConst(f64_min_s32)); + code.minsd(from, code.MConst(xword, f64_max_s32)); + code.maxsd(from, code.MConst(xword, f64_min_s32)); // Second time is for real if (round_towards_zero) { code.cvttsd2si(to, from); // 32 bit gpr @@ -704,8 +704,8 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { } // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(f64_max_u32)); - code.maxsd(from, code.MConst(f64_min_u32)); + code.minsd(from, code.MConst(xword, f64_max_u32)); + code.maxsd(from, code.MConst(xword, f64_min_u32)); if (round_towards_zero) { code.cvttsd2si(to, from); // 64 bit gpr } else { diff --git a/src/backend_x64/emit_x64_packed.cpp b/src/backend_x64/emit_x64_packed.cpp index 4c5015f7..a2a543cb 100644 --- a/src/backend_x64/emit_x64_packed.cpp +++ b/src/backend_x64/emit_x64_packed.cpp @@ -100,8 +100,8 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { // !(b <= a+b) == b > a+b code.movdqa(tmp_a, xmm_a); code.movdqa(tmp_b, xmm_b); - code.paddw(tmp_a, code.MConst(0x80008000)); - code.paddw(tmp_b, code.MConst(0x80008000)); + code.paddw(tmp_a, code.MConst(xword, 0x80008000)); + code.paddw(tmp_b, code.MConst(xword, 0x80008000)); code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! ctx.reg_alloc.DefineValue(ge_inst, tmp_b); @@ -227,8 +227,8 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { // (a >= b) == !(b > a) code.pcmpeqb(ones, ones); - code.paddw(xmm_a, code.MConst(0x80008000)); - code.paddw(xmm_b, code.MConst(0x80008000)); + code.paddw(xmm_a, code.MConst(xword, 0x80008000)); + code.paddw(xmm_b, code.MConst(xword, 0x80008000)); code.movdqa(xmm_ge, xmm_b); code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! code.pxor(xmm_ge, ones); diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index c32edb08..0077cd2a 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -327,7 +327,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) code.pxor(tmp2, tmp2); code.psrlq(result, shift_amount); - code.movdqa(tmp1, code.MConst(sign_bit, sign_bit)); + code.movdqa(tmp1, code.MConst(xword, sign_bit, sign_bit)); code.pand(tmp1, result); code.psubq(tmp2, tmp1); code.por(result, tmp2); @@ -779,7 +779,7 @@ void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) { code.psrlw(tmp_a, 8); code.psrlw(tmp_b, 8); code.pmullw(tmp_a, tmp_b); - code.pand(a, code.MConst(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.psllw(tmp_a, 8); code.por(a, tmp_a); @@ -839,7 +839,7 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { // TODO: AVX512F implementation code.pxor(zeros, zeros); - code.pand(a, code.MConst(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.packuswb(a, zeros); ctx.reg_alloc.DefineValue(inst, a); @@ -853,7 +853,7 @@ void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) { // TODO: AVX512F implementation code.pxor(zeros, zeros); - code.pand(a, code.MConst(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF)); + code.pand(a, code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF)); code.packusdw(a, zeros); ctx.reg_alloc.DefineValue(inst, a); @@ -1056,11 +1056,11 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { code.movdqa(high_a, low_a); code.psrlw(high_a, 4); - code.movdqa(tmp1, code.MConst(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); + code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); code.pand(high_a, tmp1); // High nibbles code.pand(low_a, tmp1); // Low nibbles - code.movdqa(tmp1, code.MConst(0x0302020102010100, 0x0403030203020201)); + code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201)); code.movdqa(tmp2, tmp1); code.pshufb(tmp1, low_a); code.pshufb(tmp2, high_a); diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index e740f677..3067500a 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -32,7 +32,7 @@ static void EmitVectorOperation32(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.cmpordps(nan_mask, nan_mask); code.andps(xmm_a, nan_mask); code.xorps(nan_mask, tmp); - code.andps(nan_mask, code.MConst(0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000)); + code.andps(nan_mask, code.MConst(xword, 0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000)); code.orps(xmm_a, nan_mask); } @@ -114,7 +114,7 @@ static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.cmpordpd(nan_mask, nan_mask); code.andps(xmm_a, nan_mask); code.xorps(nan_mask, tmp); - code.andps(nan_mask, code.MConst(0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000)); + code.andps(nan_mask, code.MConst(xword, 0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000)); code.orps(xmm_a, nan_mask); } diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 055f0ea3..809e02e7 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -470,7 +470,7 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) { if (imm_value == 0) code.pxor(reg, reg); else - code.movdqa(reg, code.MConst(imm_value)); // TODO: movaps/movapd more appropriate sometimes + code.movdqa(reg, code.MConst(code.xword, imm_value)); // TODO: movaps/movapd more appropriate sometimes return host_loc; }