constant_pool: Add frame parameter

This commit is contained in:
MerryMage 2018-02-20 14:04:11 +00:00
parent bd2b415850
commit 1dfce0894d
9 changed files with 39 additions and 39 deletions

View file

@ -189,8 +189,8 @@ void BlockOfCode::SwitchMxcsrOnExit() {
ldmxcsr(dword[r15 + jsi.offsetof_save_host_MXCSR]); ldmxcsr(dword[r15 + jsi.offsetof_save_host_MXCSR]);
} }
Xbyak::Address BlockOfCode::MConst(u64 lower, u64 upper) { Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
return constant_pool.GetConstant(lower, upper); return constant_pool.GetConstant(frame, lower, upper);
} }
void BlockOfCode::SwitchToFarCode() { void BlockOfCode::SwitchToFarCode() {

View file

@ -70,7 +70,7 @@ public:
} }
} }
Xbyak::Address MConst(u64 lower, u64 upper = 0); Xbyak::Address MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
/// Far code sits far away from the near code. Execution remains primarily in near code. /// Far code sits far away from the near code. Execution remains primarily in near code.
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary. /// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.

View file

@ -20,7 +20,7 @@ ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_si
current_pool_ptr = pool_begin; current_pool_ptr = pool_begin;
} }
Xbyak::Address ConstantPool::GetConstant(u64 lower, u64 upper) { Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
const auto constant = std::make_tuple(lower, upper); const auto constant = std::make_tuple(lower, upper);
auto iter = constant_info.find(constant); auto iter = constant_info.find(constant);
if (iter == constant_info.end()) { if (iter == constant_info.end()) {
@ -30,7 +30,7 @@ Xbyak::Address ConstantPool::GetConstant(u64 lower, u64 upper) {
iter = constant_info.emplace(constant, current_pool_ptr).first; iter = constant_info.emplace(constant, current_pool_ptr).first;
current_pool_ptr += align_size; current_pool_ptr += align_size;
} }
return code.xword[code.rip + iter->second]; return frame[code.rip + iter->second];
} }
} // namespace Dynarmic::BackendX64 } // namespace Dynarmic::BackendX64

View file

@ -24,7 +24,7 @@ class ConstantPool final {
public: public:
ConstantPool(BlockOfCode& code, size_t size); ConstantPool(BlockOfCode& code, size_t size);
Xbyak::Address GetConstant(u64 lower, u64 upper = 0); Xbyak::Address GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
private: private:
static constexpr size_t align_size = 16; // bytes static constexpr size_t align_size = 16; // bytes

View file

@ -53,9 +53,9 @@ static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::R
static void DenormalsAreZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { static void DenormalsAreZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
Xbyak::Label end; Xbyak::Label end;
auto mask = code.MConst(f64_non_sign_mask); auto mask = code.MConst(xword, f64_non_sign_mask);
mask.setBit(64); mask.setBit(64);
auto penult_denormal = code.MConst(f64_penultimate_positive_denormal); auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal);
penult_denormal.setBit(64); penult_denormal.setBit(64);
code.movq(gpr_scratch, xmm_value); code.movq(gpr_scratch, xmm_value);
@ -84,9 +84,9 @@ static void FlushToZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32
static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
Xbyak::Label end; Xbyak::Label end;
auto mask = code.MConst(f64_non_sign_mask); auto mask = code.MConst(xword, f64_non_sign_mask);
mask.setBit(64); mask.setBit(64);
auto penult_denormal = code.MConst(f64_penultimate_positive_denormal); auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal);
penult_denormal.setBit(64); penult_denormal.setBit(64);
code.movq(gpr_scratch, xmm_value); code.movq(gpr_scratch, xmm_value);
@ -142,7 +142,7 @@ static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
Xbyak::Label end; Xbyak::Label end;
code.ucomiss(xmm_value, xmm_value); code.ucomiss(xmm_value, xmm_value);
code.jnp(end); code.jnp(end);
code.movaps(xmm_value, code.MConst(f32_nan)); code.movaps(xmm_value, code.MConst(xword, f32_nan));
code.L(end); code.L(end);
} }
@ -181,7 +181,7 @@ static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
Xbyak::Label end; Xbyak::Label end;
code.ucomisd(xmm_value, xmm_value); code.ucomisd(xmm_value, xmm_value);
code.jnp(end); code.jnp(end);
code.movaps(xmm_value, code.MConst(f64_nan)); code.movaps(xmm_value, code.MConst(xword, f64_nan));
code.L(end); code.L(end);
} }
@ -193,7 +193,7 @@ static Xbyak::Label ProcessNaN32(BlockOfCode& code, Xbyak::Xmm a) {
code.SwitchToFarCode(); code.SwitchToFarCode();
code.L(nan); code.L(nan);
code.orps(a, code.MConst(0x00400000)); code.orps(a, code.MConst(xword, 0x00400000));
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
code.SwitchToNearCode(); code.SwitchToNearCode();
@ -208,7 +208,7 @@ static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) {
code.SwitchToFarCode(); code.SwitchToFarCode();
code.L(nan); code.L(nan);
code.orps(a, code.MConst(0x0008'0000'0000'0000)); code.orps(a, code.MConst(xword, 0x0008'0000'0000'0000));
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
code.SwitchToNearCode(); code.SwitchToNearCode();
@ -355,7 +355,7 @@ void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pand(result, code.MConst(f32_non_sign_mask)); code.pand(result, code.MConst(xword, f32_non_sign_mask));
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -364,7 +364,7 @@ void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pand(result, code.MConst(f64_non_sign_mask)); code.pand(result, code.MConst(xword, f64_non_sign_mask));
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -373,7 +373,7 @@ void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pxor(result, code.MConst(f32_negative_zero)); code.pxor(result, code.MConst(xword, f32_negative_zero));
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -382,7 +382,7 @@ void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pxor(result, code.MConst(f64_negative_zero)); code.pxor(result, code.MConst(xword, f64_negative_zero));
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -612,8 +612,8 @@ void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) {
} }
// Clamp to output range // Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch); ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(f64_max_s32)); code.minsd(from, code.MConst(xword, f64_max_s32));
code.maxsd(from, code.MConst(f64_min_s32)); code.maxsd(from, code.MConst(xword, f64_min_s32));
// Second time is for real // Second time is for real
if (round_towards_zero) { if (round_towards_zero) {
code.cvttsd2si(to, from); // 32 bit gpr code.cvttsd2si(to, from); // 32 bit gpr
@ -644,8 +644,8 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
code.cvtss2sd(from, from); code.cvtss2sd(from, from);
// Clamp to output range // Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch); ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(f64_max_u32)); code.minsd(from, code.MConst(xword, f64_max_u32));
code.maxsd(from, code.MConst(f64_min_u32)); code.maxsd(from, code.MConst(xword, f64_min_u32));
if (round_towards_zero) { if (round_towards_zero) {
code.cvttsd2si(to, from); // 64 bit gpr code.cvttsd2si(to, from); // 64 bit gpr
} else { } else {
@ -676,8 +676,8 @@ void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) {
} }
// Clamp to output range // Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch); ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(f64_max_s32)); code.minsd(from, code.MConst(xword, f64_max_s32));
code.maxsd(from, code.MConst(f64_min_s32)); code.maxsd(from, code.MConst(xword, f64_min_s32));
// Second time is for real // Second time is for real
if (round_towards_zero) { if (round_towards_zero) {
code.cvttsd2si(to, from); // 32 bit gpr code.cvttsd2si(to, from); // 32 bit gpr
@ -704,8 +704,8 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
} }
// Clamp to output range // Clamp to output range
ZeroIfNaN64(code, from, xmm_scratch); ZeroIfNaN64(code, from, xmm_scratch);
code.minsd(from, code.MConst(f64_max_u32)); code.minsd(from, code.MConst(xword, f64_max_u32));
code.maxsd(from, code.MConst(f64_min_u32)); code.maxsd(from, code.MConst(xword, f64_min_u32));
if (round_towards_zero) { if (round_towards_zero) {
code.cvttsd2si(to, from); // 64 bit gpr code.cvttsd2si(to, from); // 64 bit gpr
} else { } else {

View file

@ -100,8 +100,8 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
// !(b <= a+b) == b > a+b // !(b <= a+b) == b > a+b
code.movdqa(tmp_a, xmm_a); code.movdqa(tmp_a, xmm_a);
code.movdqa(tmp_b, xmm_b); code.movdqa(tmp_b, xmm_b);
code.paddw(tmp_a, code.MConst(0x80008000)); code.paddw(tmp_a, code.MConst(xword, 0x80008000));
code.paddw(tmp_b, code.MConst(0x80008000)); code.paddw(tmp_b, code.MConst(xword, 0x80008000));
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
ctx.reg_alloc.DefineValue(ge_inst, tmp_b); ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
@ -227,8 +227,8 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
// (a >= b) == !(b > a) // (a >= b) == !(b > a)
code.pcmpeqb(ones, ones); code.pcmpeqb(ones, ones);
code.paddw(xmm_a, code.MConst(0x80008000)); code.paddw(xmm_a, code.MConst(xword, 0x80008000));
code.paddw(xmm_b, code.MConst(0x80008000)); code.paddw(xmm_b, code.MConst(xword, 0x80008000));
code.movdqa(xmm_ge, xmm_b); code.movdqa(xmm_ge, xmm_b);
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
code.pxor(xmm_ge, ones); code.pxor(xmm_ge, ones);

View file

@ -327,7 +327,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst)
code.pxor(tmp2, tmp2); code.pxor(tmp2, tmp2);
code.psrlq(result, shift_amount); code.psrlq(result, shift_amount);
code.movdqa(tmp1, code.MConst(sign_bit, sign_bit)); code.movdqa(tmp1, code.MConst(xword, sign_bit, sign_bit));
code.pand(tmp1, result); code.pand(tmp1, result);
code.psubq(tmp2, tmp1); code.psubq(tmp2, tmp1);
code.por(result, tmp2); code.por(result, tmp2);
@ -779,7 +779,7 @@ void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) {
code.psrlw(tmp_a, 8); code.psrlw(tmp_a, 8);
code.psrlw(tmp_b, 8); code.psrlw(tmp_b, 8);
code.pmullw(tmp_a, tmp_b); code.pmullw(tmp_a, tmp_b);
code.pand(a, code.MConst(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
code.psllw(tmp_a, 8); code.psllw(tmp_a, 8);
code.por(a, tmp_a); code.por(a, tmp_a);
@ -839,7 +839,7 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) {
// TODO: AVX512F implementation // TODO: AVX512F implementation
code.pxor(zeros, zeros); code.pxor(zeros, zeros);
code.pand(a, code.MConst(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
code.packuswb(a, zeros); code.packuswb(a, zeros);
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
@ -853,7 +853,7 @@ void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) {
// TODO: AVX512F implementation // TODO: AVX512F implementation
code.pxor(zeros, zeros); code.pxor(zeros, zeros);
code.pand(a, code.MConst(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF)); code.pand(a, code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF));
code.packusdw(a, zeros); code.packusdw(a, zeros);
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
@ -1056,11 +1056,11 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
code.movdqa(high_a, low_a); code.movdqa(high_a, low_a);
code.psrlw(high_a, 4); code.psrlw(high_a, 4);
code.movdqa(tmp1, code.MConst(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F));
code.pand(high_a, tmp1); // High nibbles code.pand(high_a, tmp1); // High nibbles
code.pand(low_a, tmp1); // Low nibbles code.pand(low_a, tmp1); // Low nibbles
code.movdqa(tmp1, code.MConst(0x0302020102010100, 0x0403030203020201)); code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201));
code.movdqa(tmp2, tmp1); code.movdqa(tmp2, tmp1);
code.pshufb(tmp1, low_a); code.pshufb(tmp1, low_a);
code.pshufb(tmp2, high_a); code.pshufb(tmp2, high_a);

View file

@ -32,7 +32,7 @@ static void EmitVectorOperation32(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.cmpordps(nan_mask, nan_mask); code.cmpordps(nan_mask, nan_mask);
code.andps(xmm_a, nan_mask); code.andps(xmm_a, nan_mask);
code.xorps(nan_mask, tmp); code.xorps(nan_mask, tmp);
code.andps(nan_mask, code.MConst(0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000)); code.andps(nan_mask, code.MConst(xword, 0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000));
code.orps(xmm_a, nan_mask); code.orps(xmm_a, nan_mask);
} }
@ -114,7 +114,7 @@ static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.cmpordpd(nan_mask, nan_mask); code.cmpordpd(nan_mask, nan_mask);
code.andps(xmm_a, nan_mask); code.andps(xmm_a, nan_mask);
code.xorps(nan_mask, tmp); code.xorps(nan_mask, tmp);
code.andps(nan_mask, code.MConst(0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000)); code.andps(nan_mask, code.MConst(xword, 0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000));
code.orps(xmm_a, nan_mask); code.orps(xmm_a, nan_mask);
} }

View file

@ -470,7 +470,7 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
if (imm_value == 0) if (imm_value == 0)
code.pxor(reg, reg); code.pxor(reg, reg);
else else
code.movdqa(reg, code.MConst(imm_value)); // TODO: movaps/movapd more appropriate sometimes code.movdqa(reg, code.MConst(code.xword, imm_value)); // TODO: movaps/movapd more appropriate sometimes
return host_loc; return host_loc;
} }