reg_alloc: Inform RegAlloc about rsp changes

This commit is contained in:
MerryMage 2021-05-07 11:29:57 +01:00
parent 05a6b5f623
commit 5ebe11c329
8 changed files with 51 additions and 31 deletions

View file

@ -1101,7 +1101,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
code.sub(rsp, 16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
code.CallLambda(
[](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) {
@ -1111,7 +1111,7 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
}
);
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
code.add(rsp, 16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -1170,7 +1170,7 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
}
);
} else {
code.sub(rsp, 16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
code.movaps(xword[code.ABI_PARAM3], xmm1);
code.CallLambda(
@ -1181,7 +1181,7 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
}) ? 0 : 1;
}
);
code.add(rsp, 16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
}
code.L(end);
}

View file

@ -24,7 +24,8 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]);
@ -32,7 +33,7 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block
code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
}

View file

@ -671,11 +671,11 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]);
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
#ifdef _WIN32
code.sub(rsp, 16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
code.CallFunction(&FP::FPMulAdd<FPT>);
code.add(rsp, 16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
#else
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPMulAdd<FPT>);

View file

@ -59,7 +59,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
@ -67,7 +67,7 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -82,7 +82,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
@ -90,7 +90,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
@ -108,7 +108,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
@ -118,7 +118,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
@ -136,7 +136,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
@ -146,7 +146,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -4272,7 +4272,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
}
const u32 stack_space = static_cast<u32>(6 * 8);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
for (size_t i = 0; i < table_size; ++i) {
const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value);
@ -4304,7 +4304,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
);
code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -4402,7 +4402,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
}
const u32 stack_space = static_cast<u32>((table_size + 2) * 16);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
for (size_t i = 0; i < table_size; ++i) {
const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value);
@ -4434,7 +4434,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
}

View file

@ -21,14 +21,4 @@ Xbyak::Xmm HostLocToXmm(HostLoc loc) {
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
}
Xbyak::Address SpillToOpArg(HostLoc loc) {
ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
using namespace Xbyak::util;
return xword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(u64) * 2];
}
} // namespace Dynarmic::Backend::X64

View file

@ -110,6 +110,5 @@ const HostLocList any_xmm = {
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
Xbyak::Xmm HostLocToXmm(HostLoc loc);
Xbyak::Address SpillToOpArg(HostLoc loc);
} // namespace Dynarmic::Backend::X64

View file

@ -438,6 +438,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_r
}
}
void RegAlloc::AllocStackSpace(size_t stack_space) {
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
ASSERT(reserved_stack_space == 0);
reserved_stack_space = stack_space;
code.sub(code.rsp, static_cast<u32>(stack_space));
}
void RegAlloc::ReleaseStackSpace(size_t stack_space) {
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
ASSERT(reserved_stack_space == stack_space);
reserved_stack_space = 0;
code.add(code.rsp, static_cast<u32>(stack_space));
}
void RegAlloc::EndOfAllocScope() {
for (auto& iter : hostloc_info) {
iter.ReleaseAll();
@ -693,4 +707,14 @@ void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
}
}
Xbyak::Address RegAlloc::SpillToOpArg(HostLoc loc) {
ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
using namespace Xbyak::util;
return xword[rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(u64) * 2];
}
} // namespace Dynarmic::Backend::X64

View file

@ -127,6 +127,9 @@ public:
// TODO: Values in host flags
void AllocStackSpace(size_t stack_space);
void ReleaseStackSpace(size_t stack_space);
void EndOfAllocScope();
void AssertNoMoreUses();
@ -160,8 +163,11 @@ private:
const HostLocInfo& LocInfo(HostLoc loc) const;
BlockOfCode& code;
size_t reserved_stack_space = 0;
void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
void EmitExchange(HostLoc a, HostLoc b);
Xbyak::Address SpillToOpArg(HostLoc loc);
};
} // namespace Dynarmic::Backend::X64