backend/arm64: Implement RSB
This commit is contained in:
parent
3f6232bfdb
commit
c326f9b02f
8 changed files with 128 additions and 22 deletions
|
@ -220,7 +220,7 @@ void A32AddressSpace::EmitPrelude() {
|
||||||
prelude_info.add_ticks = EmitCallTrampoline<&A32::UserCallbacks::AddTicks>(code, conf.callbacks);
|
prelude_info.add_ticks = EmitCallTrampoline<&A32::UserCallbacks::AddTicks>(code, conf.callbacks);
|
||||||
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A32::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
|
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A32::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
|
||||||
|
|
||||||
oaknut::Label return_from_run_code;
|
oaknut::Label return_from_run_code, l_return_to_dispatcher;
|
||||||
|
|
||||||
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
||||||
{
|
{
|
||||||
|
@ -236,6 +236,13 @@ void A32AddressSpace::EmitPrelude() {
|
||||||
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||||
|
code.LDR(Xscratch0, l_return_to_dispatcher);
|
||||||
|
for (size_t i = 0; i < RSBCount; i++) {
|
||||||
|
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (conf.enable_cycle_counting) {
|
if (conf.enable_cycle_counting) {
|
||||||
code.BL(prelude_info.get_ticks_remaining);
|
code.BL(prelude_info.get_ticks_remaining);
|
||||||
code.MOV(Xticks, X0);
|
code.MOV(Xticks, X0);
|
||||||
|
@ -268,6 +275,13 @@ void A32AddressSpace::EmitPrelude() {
|
||||||
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||||
|
code.LDR(Xscratch0, l_return_to_dispatcher);
|
||||||
|
for (size_t i = 0; i < RSBCount; i++) {
|
||||||
|
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (conf.enable_cycle_counting) {
|
if (conf.enable_cycle_counting) {
|
||||||
code.MOV(Xticks, 1);
|
code.MOV(Xticks, 1);
|
||||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||||
|
@ -342,6 +356,10 @@ void A32AddressSpace::EmitPrelude() {
|
||||||
code.RET();
|
code.RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
code.align(8);
|
||||||
|
code.l(l_return_to_dispatcher);
|
||||||
|
code.dx(mcl::bit_cast<u64>(prelude_info.return_to_dispatcher));
|
||||||
|
|
||||||
prelude_info.end_of_prelude = code.ptr<u32*>();
|
prelude_info.end_of_prelude = code.ptr<u32*>();
|
||||||
|
|
||||||
mem.invalidate_all();
|
mem.invalidate_all();
|
||||||
|
|
|
@ -398,7 +398,7 @@ void A64AddressSpace::EmitPrelude() {
|
||||||
prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks);
|
prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks);
|
||||||
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
|
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
|
||||||
|
|
||||||
oaknut::Label return_from_run_code;
|
oaknut::Label return_from_run_code, l_return_to_dispatcher;
|
||||||
|
|
||||||
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
||||||
{
|
{
|
||||||
|
@ -414,6 +414,13 @@ void A64AddressSpace::EmitPrelude() {
|
||||||
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||||
|
code.LDR(Xscratch0, l_return_to_dispatcher);
|
||||||
|
for (size_t i = 0; i < RSBCount; i++) {
|
||||||
|
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (conf.enable_cycle_counting) {
|
if (conf.enable_cycle_counting) {
|
||||||
code.BL(prelude_info.get_ticks_remaining);
|
code.BL(prelude_info.get_ticks_remaining);
|
||||||
code.MOV(Xticks, X0);
|
code.MOV(Xticks, X0);
|
||||||
|
@ -445,6 +452,13 @@ void A64AddressSpace::EmitPrelude() {
|
||||||
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||||
|
code.LDR(Xscratch0, l_return_to_dispatcher);
|
||||||
|
for (size_t i = 0; i < RSBCount; i++) {
|
||||||
|
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (conf.enable_cycle_counting) {
|
if (conf.enable_cycle_counting) {
|
||||||
code.MOV(Xticks, 1);
|
code.MOV(Xticks, 1);
|
||||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||||
|
@ -518,6 +532,10 @@ void A64AddressSpace::EmitPrelude() {
|
||||||
code.RET();
|
code.RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
code.align(8);
|
||||||
|
code.l(l_return_to_dispatcher);
|
||||||
|
code.dx(mcl::bit_cast<u64>(prelude_info.return_to_dispatcher));
|
||||||
|
|
||||||
prelude_info.end_of_prelude = code.ptr<u32*>();
|
prelude_info.end_of_prelude = code.ptr<u32*>();
|
||||||
|
|
||||||
mem.invalidate_all();
|
mem.invalidate_all();
|
||||||
|
|
|
@ -120,7 +120,7 @@ EmittedBlockInfo AddressSpace::Emit(IR::Block block) {
|
||||||
return block_info;
|
return block_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list) {
|
static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list, void* return_to_dispatcher) {
|
||||||
using namespace oaknut;
|
using namespace oaknut;
|
||||||
using namespace oaknut::util;
|
using namespace oaknut::util;
|
||||||
|
|
||||||
|
@ -135,12 +135,11 @@ static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr,
|
||||||
c.NOP();
|
c.NOP();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case BlockRelocationType::MoveToScratch0:
|
case BlockRelocationType::MoveToScratch1:
|
||||||
if (target_ptr) {
|
if (target_ptr) {
|
||||||
c.ADRL(Xscratch0, (void*)target_ptr);
|
c.ADRL(Xscratch1, (void*)target_ptr);
|
||||||
} else {
|
} else {
|
||||||
c.NOP();
|
c.ADRL(Xscratch1, return_to_dispatcher);
|
||||||
c.NOP();
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -284,7 +283,7 @@ void AddressSpace::Link(EmittedBlockInfo& block_info) {
|
||||||
|
|
||||||
for (auto [target_descriptor, list] : block_info.block_relocations) {
|
for (auto [target_descriptor, list] : block_info.block_relocations) {
|
||||||
block_references[target_descriptor].emplace(block_info.entry_point);
|
block_references[target_descriptor].emplace(block_info.entry_point);
|
||||||
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list);
|
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list, prelude_info.return_to_dispatcher);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -294,7 +293,7 @@ void AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor,
|
||||||
const EmittedBlockInfo& block_info = block_iter->second;
|
const EmittedBlockInfo& block_info = block_iter->second;
|
||||||
|
|
||||||
if (auto relocation_iter = block_info.block_relocations.find(target_descriptor); relocation_iter != block_info.block_relocations.end()) {
|
if (auto relocation_iter = block_info.block_relocations.find(target_descriptor); relocation_iter != block_info.block_relocations.end()) {
|
||||||
LinkBlockLinks(block_info.entry_point, target_ptr, relocation_iter->second);
|
LinkBlockLinks(block_info.entry_point, target_ptr, relocation_iter->second, prelude_info.return_to_dispatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
|
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
|
||||||
|
|
|
@ -45,8 +45,24 @@ void EmitIR<IR::Opcode::CallHostFunction>(oaknut::CodeGenerator& code, EmitConte
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void EmitIR<IR::Opcode::PushRSB>(oaknut::CodeGenerator&, EmitContext&, IR::Inst*) {
|
void EmitIR<IR::Opcode::PushRSB>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
// TODO
|
if (!ctx.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
ASSERT(args[0].IsImmediate());
|
||||||
|
const IR::LocationDescriptor target{args[0].GetImmediateU64()};
|
||||||
|
|
||||||
|
code.LDR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||||
|
code.ADD(Wscratch2, Wscratch2, sizeof(RSBEntry));
|
||||||
|
code.AND(Wscratch2, Wscratch2, RSBIndexMask);
|
||||||
|
code.STR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||||
|
code.ADD(Xscratch2, SP, Xscratch2);
|
||||||
|
|
||||||
|
code.MOV(Xscratch0, target.Value());
|
||||||
|
EmitBlockLinkRelocation(code, ctx, target, BlockRelocationType::MoveToScratch1);
|
||||||
|
code.STP(Xscratch0, Xscratch1, Xscratch2, offsetof(StackLayout, rsb));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
@ -262,8 +278,8 @@ void EmitBlockLinkRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, cons
|
||||||
case BlockRelocationType::Branch:
|
case BlockRelocationType::Branch:
|
||||||
code.NOP();
|
code.NOP();
|
||||||
break;
|
break;
|
||||||
case BlockRelocationType::MoveToScratch0:
|
case BlockRelocationType::MoveToScratch1:
|
||||||
code.NOP();
|
code.BRK(0);
|
||||||
code.NOP();
|
code.NOP();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -93,7 +93,7 @@ struct Relocation {
|
||||||
|
|
||||||
enum class BlockRelocationType {
|
enum class BlockRelocationType {
|
||||||
Branch,
|
Branch,
|
||||||
MoveToScratch0,
|
MoveToScratch1,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BlockRelocation {
|
struct BlockRelocation {
|
||||||
|
|
|
@ -93,10 +93,29 @@ void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Li
|
||||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) {
|
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
|
||||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
if (ctx.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
|
||||||
|
oaknut::Label fail;
|
||||||
|
|
||||||
// TODO: Implement PopRSBHint optimization
|
code.LDR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||||
|
code.AND(Wscratch2, Wscratch2, RSBIndexMask);
|
||||||
|
code.ADD(X2, SP, Xscratch2);
|
||||||
|
code.SUB(Wscratch2, Wscratch2, sizeof(RSBEntry));
|
||||||
|
code.STR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||||
|
|
||||||
|
code.LDP(Xscratch0, Xscratch1, X2, offsetof(StackLayout, rsb));
|
||||||
|
|
||||||
|
static_assert(offsetof(A32JitState, regs) + 16 * sizeof(u32) == offsetof(A32JitState, upper_location_descriptor));
|
||||||
|
code.LDUR(X0, Xstate, offsetof(A32JitState, regs) + 15 * sizeof(u32));
|
||||||
|
|
||||||
|
code.CMP(X0, Xscratch0);
|
||||||
|
code.B(NE, fail);
|
||||||
|
code.BR(Xscratch1);
|
||||||
|
|
||||||
|
code.l(fail);
|
||||||
|
}
|
||||||
|
|
||||||
|
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
|
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
|
||||||
|
|
|
@ -70,10 +70,34 @@ void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Li
|
||||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) {
|
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
|
||||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
if (ctx.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
|
||||||
|
oaknut::Label fail;
|
||||||
|
|
||||||
// TODO: Implement PopRSBHint optimization
|
code.MOV(Wscratch0, A64::LocationDescriptor::fpcr_mask);
|
||||||
|
code.LDR(W0, Xstate, offsetof(A64JitState, fpcr));
|
||||||
|
code.LDR(X1, Xstate, offsetof(A64JitState, pc));
|
||||||
|
code.AND(W0, W0, Wscratch0);
|
||||||
|
code.AND(X1, X1, A64::LocationDescriptor::pc_mask);
|
||||||
|
code.LSL(X0, X0, A64::LocationDescriptor::fpcr_shift);
|
||||||
|
code.ORR(X0, X0, X1);
|
||||||
|
|
||||||
|
code.LDR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||||
|
code.AND(Wscratch2, Wscratch2, RSBIndexMask);
|
||||||
|
code.ADD(X2, SP, Xscratch2);
|
||||||
|
code.SUB(Wscratch2, Wscratch2, sizeof(RSBEntry));
|
||||||
|
code.STR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||||
|
|
||||||
|
code.LDP(Xscratch0, Xscratch1, X2, offsetof(StackLayout, rsb));
|
||||||
|
|
||||||
|
code.CMP(X0, Xscratch0);
|
||||||
|
code.B(NE, fail);
|
||||||
|
code.BR(Xscratch1);
|
||||||
|
|
||||||
|
code.l(fail);
|
||||||
|
}
|
||||||
|
|
||||||
|
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
|
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
|
||||||
|
|
|
@ -11,16 +11,28 @@
|
||||||
|
|
||||||
namespace Dynarmic::Backend::Arm64 {
|
namespace Dynarmic::Backend::Arm64 {
|
||||||
|
|
||||||
constexpr size_t SpillCount = 64;
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# pragma warning(push)
|
# pragma warning(push)
|
||||||
# pragma warning(disable : 4324) // Structure was padded due to alignment specifier
|
# pragma warning(disable : 4324) // Structure was padded due to alignment specifier
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
constexpr size_t SpillCount = 64;
|
||||||
|
|
||||||
|
struct alignas(16) RSBEntry {
|
||||||
|
u64 target;
|
||||||
|
u64 code_ptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr size_t RSBCount = 8;
|
||||||
|
constexpr u64 RSBIndexMask = (RSBCount - 1) * sizeof(RSBEntry);
|
||||||
|
|
||||||
struct alignas(16) StackLayout {
|
struct alignas(16) StackLayout {
|
||||||
|
std::array<RSBEntry, RSBCount> rsb;
|
||||||
|
|
||||||
std::array<std::array<u64, 2>, SpillCount> spill;
|
std::array<std::array<u64, 2>, SpillCount> spill;
|
||||||
|
|
||||||
|
u32 rsb_ptr;
|
||||||
|
|
||||||
s64 cycles_to_run;
|
s64 cycles_to_run;
|
||||||
|
|
||||||
u32 save_host_fpcr;
|
u32 save_host_fpcr;
|
||||||
|
|
Loading…
Reference in a new issue