reg_alloc: Inform RegAlloc about rsp changes
This commit is contained in:
parent
05a6b5f623
commit
5ebe11c329
8 changed files with 51 additions and 31 deletions
|
@ -1101,7 +1101,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
code.sub(rsp, 16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
code.CallLambda(
|
||||
[](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) {
|
||||
|
@ -1111,7 +1111,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
);
|
||||
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
|
||||
code.add(rsp, 16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -1170,7 +1170,7 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
);
|
||||
} else {
|
||||
code.sub(rsp, 16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
code.movaps(xword[code.ABI_PARAM3], xmm1);
|
||||
code.CallLambda(
|
||||
|
@ -1181,7 +1181,7 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
}) ? 0 : 1;
|
||||
}
|
||||
);
|
||||
code.add(rsp, 16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
}
|
||||
code.L(end);
|
||||
}
|
||||
|
|
|
@ -24,7 +24,8 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block
|
|||
ctx.reg_alloc.EndOfAllocScope();
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]);
|
||||
|
@ -32,7 +33,7 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block
|
|||
code.CallFunction(fn);
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]);
|
||||
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
|
|
@ -671,11 +671,11 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]);
|
||||
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
|
||||
#ifdef _WIN32
|
||||
code.sub(rsp, 16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
|
||||
code.CallFunction(&FP::FPMulAdd<FPT>);
|
||||
code.add(rsp, 16 + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
#else
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPMulAdd<FPT>);
|
||||
|
|
|
@ -59,7 +59,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
|
|||
ctx.reg_alloc.EndOfAllocScope();
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
|
||||
|
@ -67,7 +67,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
|
|||
code.CallFunction(fn);
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
|||
ctx.reg_alloc.EndOfAllocScope();
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
|
||||
|
@ -90,7 +90,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
|||
code.CallFunction(fn);
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
|
||||
|
@ -108,7 +108,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
|||
ctx.reg_alloc.EndOfAllocScope();
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
|
@ -118,7 +118,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
|||
code.CallFunction(fn);
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
|
||||
|
@ -136,7 +136,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
|
|||
ctx.reg_alloc.EndOfAllocScope();
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
|
@ -146,7 +146,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
|
|||
code.CallFunction(fn);
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -4272,7 +4272,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
const u32 stack_space = static_cast<u32>(6 * 8);
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
for (size_t i = 0; i < table_size; ++i) {
|
||||
const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
|
||||
code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value);
|
||||
|
@ -4304,7 +4304,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
|
|||
);
|
||||
|
||||
code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]);
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -4402,7 +4402,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
const u32 stack_space = static_cast<u32>((table_size + 2) * 16);
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
for (size_t i = 0; i < table_size; ++i) {
|
||||
const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
|
||||
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value);
|
||||
|
@ -4434,7 +4434,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
|||
);
|
||||
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
|
|
@ -21,14 +21,4 @@ Xbyak::Xmm HostLocToXmm(HostLoc loc) {
|
|||
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
|
||||
}
|
||||
|
||||
Xbyak::Address SpillToOpArg(HostLoc loc) {
|
||||
ASSERT(HostLocIsSpill(loc));
|
||||
|
||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
|
||||
|
||||
using namespace Xbyak::util;
|
||||
return xword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(u64) * 2];
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -110,6 +110,5 @@ const HostLocList any_xmm = {
|
|||
|
||||
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
|
||||
Xbyak::Xmm HostLocToXmm(HostLoc loc);
|
||||
Xbyak::Address SpillToOpArg(HostLoc loc);
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -438,6 +438,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_r
|
|||
}
|
||||
}
|
||||
|
||||
void RegAlloc::AllocStackSpace(size_t stack_space) {
|
||||
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
|
||||
ASSERT(reserved_stack_space == 0);
|
||||
reserved_stack_space = stack_space;
|
||||
code.sub(code.rsp, static_cast<u32>(stack_space));
|
||||
}
|
||||
|
||||
void RegAlloc::ReleaseStackSpace(size_t stack_space) {
|
||||
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
|
||||
ASSERT(reserved_stack_space == stack_space);
|
||||
reserved_stack_space = 0;
|
||||
code.add(code.rsp, static_cast<u32>(stack_space));
|
||||
}
|
||||
|
||||
void RegAlloc::EndOfAllocScope() {
|
||||
for (auto& iter : hostloc_info) {
|
||||
iter.ReleaseAll();
|
||||
|
@ -693,4 +707,14 @@ void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
|
|||
}
|
||||
}
|
||||
|
||||
Xbyak::Address RegAlloc::SpillToOpArg(HostLoc loc) {
|
||||
ASSERT(HostLocIsSpill(loc));
|
||||
|
||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
|
||||
|
||||
using namespace Xbyak::util;
|
||||
return xword[rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(u64) * 2];
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -127,6 +127,9 @@ public:
|
|||
|
||||
// TODO: Values in host flags
|
||||
|
||||
void AllocStackSpace(size_t stack_space);
|
||||
void ReleaseStackSpace(size_t stack_space);
|
||||
|
||||
void EndOfAllocScope();
|
||||
|
||||
void AssertNoMoreUses();
|
||||
|
@ -160,8 +163,11 @@ private:
|
|||
const HostLocInfo& LocInfo(HostLoc loc) const;
|
||||
|
||||
BlockOfCode& code;
|
||||
size_t reserved_stack_space = 0;
|
||||
void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
|
||||
void EmitExchange(HostLoc a, HostLoc b);
|
||||
|
||||
Xbyak::Address SpillToOpArg(HostLoc loc);
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
Loading…
Reference in a new issue