reg_alloc: Inform RegAlloc about rsp changes

This commit is contained in:
MerryMage 2021-05-07 11:29:57 +01:00
parent 05a6b5f623
commit 5ebe11c329
8 changed files with 51 additions and 31 deletions

View file

@ -1101,7 +1101,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf)); code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
code.sub(rsp, 16 + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
code.CallLambda( code.CallLambda(
[](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) { [](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) {
@ -1111,7 +1111,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
} }
); );
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
code.add(rsp, 16 + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -1170,7 +1170,7 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
} }
); );
} else { } else {
code.sub(rsp, 16 + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
code.movaps(xword[code.ABI_PARAM3], xmm1); code.movaps(xword[code.ABI_PARAM3], xmm1);
code.CallLambda( code.CallLambda(
@ -1181,7 +1181,7 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
}) ? 0 : 1; }) ? 0 : 1;
} }
); );
code.add(rsp, 16 + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
} }
code.L(end); code.L(end);
} }

View file

@ -24,7 +24,8 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr); ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]);
@ -32,7 +33,7 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block
code.CallFunction(fn); code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }

View file

@ -671,11 +671,11 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]); ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]);
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
#ifdef _WIN32 #ifdef _WIN32
code.sub(rsp, 16 + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
code.CallFunction(&FP::FPMulAdd<FPT>); code.CallFunction(&FP::FPMulAdd<FPT>);
code.add(rsp, 16 + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
#else #else
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPMulAdd<FPT>); code.CallFunction(&FP::FPMulAdd<FPT>);

View file

@ -59,7 +59,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr); ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
@ -67,7 +67,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
code.CallFunction(fn); code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -82,7 +82,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr); ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
@ -90,7 +90,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
code.CallFunction(fn); code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
@ -108,7 +108,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr); ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
@ -118,7 +118,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
code.CallFunction(fn); code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
@ -136,7 +136,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr); ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
@ -146,7 +146,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
code.CallFunction(fn); code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -4272,7 +4272,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
} }
const u32 stack_space = static_cast<u32>(6 * 8); const u32 stack_space = static_cast<u32>(6 * 8);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
for (size_t i = 0; i < table_size; ++i) { for (size_t i = 0; i < table_size; ++i) {
const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]); const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value); code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value);
@ -4304,7 +4304,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
); );
code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]); code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -4402,7 +4402,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
} }
const u32 stack_space = static_cast<u32>((table_size + 2) * 16); const u32 stack_space = static_cast<u32>((table_size + 2) * 16);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
for (size_t i = 0; i < table_size; ++i) { for (size_t i = 0; i < table_size; ++i) {
const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]); const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value); code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value);
@ -4434,7 +4434,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
); );
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }

View file

@ -21,14 +21,4 @@ Xbyak::Xmm HostLocToXmm(HostLoc loc) {
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0)); return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
} }
Xbyak::Address SpillToOpArg(HostLoc loc) {
ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
using namespace Xbyak::util;
return xword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(u64) * 2];
}
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -110,6 +110,5 @@ const HostLocList any_xmm = {
Xbyak::Reg64 HostLocToReg64(HostLoc loc); Xbyak::Reg64 HostLocToReg64(HostLoc loc);
Xbyak::Xmm HostLocToXmm(HostLoc loc); Xbyak::Xmm HostLocToXmm(HostLoc loc);
Xbyak::Address SpillToOpArg(HostLoc loc);
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -438,6 +438,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_r
} }
} }
void RegAlloc::AllocStackSpace(size_t stack_space) {
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
ASSERT(reserved_stack_space == 0);
reserved_stack_space = stack_space;
code.sub(code.rsp, static_cast<u32>(stack_space));
}
void RegAlloc::ReleaseStackSpace(size_t stack_space) {
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
ASSERT(reserved_stack_space == stack_space);
reserved_stack_space = 0;
code.add(code.rsp, static_cast<u32>(stack_space));
}
void RegAlloc::EndOfAllocScope() { void RegAlloc::EndOfAllocScope() {
for (auto& iter : hostloc_info) { for (auto& iter : hostloc_info) {
iter.ReleaseAll(); iter.ReleaseAll();
@ -693,4 +707,14 @@ void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
} }
} }
Xbyak::Address RegAlloc::SpillToOpArg(HostLoc loc) {
ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
using namespace Xbyak::util;
return xword[rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(u64) * 2];
}
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -127,6 +127,9 @@ public:
// TODO: Values in host flags // TODO: Values in host flags
void AllocStackSpace(size_t stack_space);
void ReleaseStackSpace(size_t stack_space);
void EndOfAllocScope(); void EndOfAllocScope();
void AssertNoMoreUses(); void AssertNoMoreUses();
@ -160,8 +163,11 @@ private:
const HostLocInfo& LocInfo(HostLoc loc) const; const HostLocInfo& LocInfo(HostLoc loc) const;
BlockOfCode& code; BlockOfCode& code;
size_t reserved_stack_space = 0;
void EmitMove(size_t bit_width, HostLoc to, HostLoc from); void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
void EmitExchange(HostLoc a, HostLoc b); void EmitExchange(HostLoc a, HostLoc b);
Xbyak::Address SpillToOpArg(HostLoc loc);
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64