From e88c89ff476df3d69dc3c8ca77631de67da504c9 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 10 Dec 2022 23:48:35 +0000 Subject: [PATCH] emit_arm64_memory: Call prewrapped fallback functions --- .../backend/arm64/a32_address_space.cpp | 66 +++++++++ .../backend/arm64/a64_address_space.cpp | 132 +++++++++++++++++- src/dynarmic/backend/arm64/address_space.cpp | 30 ++++ src/dynarmic/backend/arm64/address_space.h | 10 ++ src/dynarmic/backend/arm64/emit_arm64.h | 10 ++ .../backend/arm64/emit_arm64_memory.cpp | 50 +++++-- 6 files changed, 281 insertions(+), 17 deletions(-) diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index 18d6d624..77d2b7ad 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -42,6 +42,35 @@ static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) { return target; } +template +static void* EmitWrappedReadCallTrampoline(oaknut::CodeGenerator& code, T* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize(this_); + + oaknut::Label l_addr, l_this; + + constexpr u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Xscratch0); + + void* target = code.ptr(); + ABI_PushRegisters(code, save_regs, 0); + code.LDR(X0, l_this); + code.LDR(X1, Xscratch0); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.MOV(Xscratch0, X0); + ABI_PopRegisters(code, save_regs, 0); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + template static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A32::UserConfig& conf) { using namespace oaknut::util; @@ -68,6 +97,35 @@ static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const return target; } +template +static void* EmitWrappedWriteCallTrampoline(oaknut::CodeGenerator& code, T* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize(this_); + + oaknut::Label l_addr, l_this; + + constexpr u64 save_regs = ABI_CALLER_SAVE; + + void* target = code.ptr(); + ABI_PushRegisters(code, save_regs, 0); + code.LDR(X0, l_this); + code.LDR(X1, Xscratch0); + code.LDR(X2, Xscratch1); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + ABI_PopRegisters(code, save_regs, 0); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + template static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A32::UserConfig& conf) { using namespace oaknut::util; @@ -130,6 +188,10 @@ void A32AddressSpace::EmitPrelude() { prelude_info.read_memory_16 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead16>(code, conf.callbacks); prelude_info.read_memory_32 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead32>(code, conf.callbacks); prelude_info.read_memory_64 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead64>(code, conf.callbacks); + prelude_info.wrapped_read_memory_8 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead8>(code, conf.callbacks); + prelude_info.wrapped_read_memory_16 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead16>(code, conf.callbacks); + prelude_info.wrapped_read_memory_32 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead32>(code, conf.callbacks); + prelude_info.wrapped_read_memory_64 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead64>(code, conf.callbacks); prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead8, u8>(code, conf); prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead16, u16>(code, conf); prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead32, u32>(code, conf); @@ -138,6 +200,10 @@ void A32AddressSpace::EmitPrelude() { prelude_info.write_memory_16 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite16>(code, conf.callbacks); prelude_info.write_memory_32 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite32>(code, conf.callbacks); prelude_info.write_memory_64 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite64>(code, conf.callbacks); + prelude_info.wrapped_write_memory_8 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite8>(code, conf.callbacks); + prelude_info.wrapped_write_memory_16 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite16>(code, conf.callbacks); + prelude_info.wrapped_write_memory_32 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite32>(code, conf.callbacks); + prelude_info.wrapped_write_memory_64 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite64>(code, conf.callbacks); prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf); prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf); prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf); diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index c9485ab9..f468808c 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -42,6 +42,35 @@ static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) { return target; } +template +static void* EmitWrappedReadCallTrampoline(oaknut::CodeGenerator& code, T* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize(this_); + + oaknut::Label l_addr, l_this; + + constexpr u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Xscratch0); + + void* target = code.ptr(); + ABI_PushRegisters(code, save_regs, 0); + code.LDR(X0, l_this); + code.LDR(X1, Xscratch0); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.MOV(Xscratch0, X0); + ABI_PopRegisters(code, save_regs, 0); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + template static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { using namespace oaknut::util; @@ -68,6 +97,35 @@ static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const return target; } +template +static void* EmitWrappedWriteCallTrampoline(oaknut::CodeGenerator& code, T* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize(this_); + + oaknut::Label l_addr, l_this; + + constexpr u64 save_regs = ABI_CALLER_SAVE; + + void* target = code.ptr(); + ABI_PushRegisters(code, save_regs, 0); + code.LDR(X0, l_this); + code.LDR(X1, Xscratch0); + code.LDR(X2, Xscratch1); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + ABI_PopRegisters(code, save_regs, 0); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + template static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { using namespace oaknut::util; @@ -123,6 +181,35 @@ static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCal return target; } +static void* EmitWrappedRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_); + + oaknut::Label l_addr, l_this; + + constexpr u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Q0); + + void* target = code.ptr(); + ABI_PushRegisters(code, save_regs, sizeof(Vector)); + code.LDR(X0, l_this); + code.LDR(X1, Xscratch0); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.STP(X0, X1, SP); + code.LDR(Q0, SP); + ABI_PopRegisters(code, save_regs, sizeof(Vector)); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + static void* EmitExclusiveRead128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { using namespace oaknut::util; @@ -161,10 +248,9 @@ static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCa oaknut::Label l_addr, l_this; void* target = code.ptr(); + code.LDR(X0, l_this); code.FMOV(X2, D0); code.FMOV(X3, V0.D()[1]); - - code.LDR(X0, l_this); code.LDR(Xscratch0, l_addr); code.BR(Xscratch0); @@ -177,6 +263,35 @@ static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCa return target; } +static void* EmitWrappedWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_); + + oaknut::Label l_addr, l_this; + + constexpr u64 save_regs = ABI_CALLER_SAVE; + + void* target = code.ptr(); + ABI_PushRegisters(code, save_regs, 0); + code.LDR(X0, l_this); + code.LDR(X1, Xscratch0); + code.FMOV(X2, D0); + code.FMOV(X3, V0.D()[1]); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + ABI_PushRegisters(code, save_regs, 0); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { using namespace oaknut::util; @@ -192,10 +307,9 @@ static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, co }; void* target = code.ptr(); + code.LDR(X0, l_this); code.FMOV(X2, D0); code.FMOV(X3, V0.D()[1]); - - code.LDR(X0, l_this); code.LDR(Xscratch0, l_addr); code.BR(Xscratch0); @@ -246,6 +360,11 @@ void A64AddressSpace::EmitPrelude() { prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks); prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks); prelude_info.read_memory_128 = EmitRead128CallTrampoline(code, conf.callbacks); + prelude_info.wrapped_read_memory_8 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks); + prelude_info.wrapped_read_memory_16 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks); + prelude_info.wrapped_read_memory_32 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks); + prelude_info.wrapped_read_memory_64 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks); + prelude_info.wrapped_read_memory_128 = EmitWrappedRead128CallTrampoline(code, conf.callbacks); prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf); prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf); prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf); @@ -256,6 +375,11 @@ void A64AddressSpace::EmitPrelude() { prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks); prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks); prelude_info.write_memory_128 = EmitWrite128CallTrampoline(code, conf.callbacks); + prelude_info.wrapped_write_memory_8 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks); + prelude_info.wrapped_write_memory_16 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks); + prelude_info.wrapped_write_memory_32 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks); + prelude_info.wrapped_write_memory_64 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks); + prelude_info.wrapped_write_memory_128 = EmitWrappedWrite128CallTrampoline(code, conf.callbacks); prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf); prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf); prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf); diff --git a/src/dynarmic/backend/arm64/address_space.cpp b/src/dynarmic/backend/arm64/address_space.cpp index 3aeaa453..22767977 100644 --- a/src/dynarmic/backend/arm64/address_space.cpp +++ b/src/dynarmic/backend/arm64/address_space.cpp @@ -118,6 +118,21 @@ void AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInf case LinkTarget::ReadMemory128: c.BL(prelude_info.read_memory_128); break; + case LinkTarget::WrappedReadMemory8: + c.BL(prelude_info.wrapped_read_memory_8); + break; + case LinkTarget::WrappedReadMemory16: + c.BL(prelude_info.wrapped_read_memory_16); + break; + case LinkTarget::WrappedReadMemory32: + c.BL(prelude_info.wrapped_read_memory_32); + break; + case LinkTarget::WrappedReadMemory64: + c.BL(prelude_info.wrapped_read_memory_64); + break; + case LinkTarget::WrappedReadMemory128: + c.BL(prelude_info.wrapped_read_memory_128); + break; case LinkTarget::ExclusiveReadMemory8: c.BL(prelude_info.exclusive_read_memory_8); break; @@ -148,6 +163,21 @@ void AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInf case LinkTarget::WriteMemory128: c.BL(prelude_info.write_memory_128); break; + case LinkTarget::WrappedWriteMemory8: + c.BL(prelude_info.wrapped_write_memory_8); + break; + case LinkTarget::WrappedWriteMemory16: + c.BL(prelude_info.wrapped_write_memory_16); + break; + case LinkTarget::WrappedWriteMemory32: + c.BL(prelude_info.wrapped_write_memory_32); + break; + case LinkTarget::WrappedWriteMemory64: + c.BL(prelude_info.wrapped_write_memory_64); + break; + case LinkTarget::WrappedWriteMemory128: + c.BL(prelude_info.wrapped_write_memory_128); + break; case LinkTarget::ExclusiveWriteMemory8: c.BL(prelude_info.exclusive_write_memory_8); break; diff --git a/src/dynarmic/backend/arm64/address_space.h b/src/dynarmic/backend/arm64/address_space.h index 3544174c..4feda44a 100644 --- a/src/dynarmic/backend/arm64/address_space.h +++ b/src/dynarmic/backend/arm64/address_space.h @@ -61,6 +61,11 @@ protected: void* read_memory_32; void* read_memory_64; void* read_memory_128; + void* wrapped_read_memory_8; + void* wrapped_read_memory_16; + void* wrapped_read_memory_32; + void* wrapped_read_memory_64; + void* wrapped_read_memory_128; void* exclusive_read_memory_8; void* exclusive_read_memory_16; void* exclusive_read_memory_32; @@ -71,6 +76,11 @@ protected: void* write_memory_32; void* write_memory_64; void* write_memory_128; + void* wrapped_write_memory_8; + void* wrapped_write_memory_16; + void* wrapped_write_memory_32; + void* wrapped_write_memory_64; + void* wrapped_write_memory_128; void* exclusive_write_memory_8; void* exclusive_write_memory_16; void* exclusive_write_memory_32; diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index 1305733e..c93aa61e 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -50,6 +50,11 @@ enum class LinkTarget { ReadMemory32, ReadMemory64, ReadMemory128, + WrappedReadMemory8, + WrappedReadMemory16, + WrappedReadMemory32, + WrappedReadMemory64, + WrappedReadMemory128, ExclusiveReadMemory8, ExclusiveReadMemory16, ExclusiveReadMemory32, @@ -60,6 +65,11 @@ enum class LinkTarget { WriteMemory32, WriteMemory64, WriteMemory128, + WrappedWriteMemory8, + WrappedWriteMemory16, + WrappedWriteMemory32, + WrappedWriteMemory64, + WrappedWriteMemory128, ExclusiveWriteMemory8, ExclusiveWriteMemory16, ExclusiveWriteMemory32, diff --git a/src/dynarmic/backend/arm64/emit_arm64_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_memory.cpp index e7b3f85c..615022ab 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_memory.cpp @@ -62,6 +62,38 @@ LinkTarget WriteMemoryLinkTarget(size_t bitsize) { UNREACHABLE(); } +LinkTarget WrappedReadMemoryLinkTarget(size_t bitsize) { + switch (bitsize) { + case 8: + return LinkTarget::WrappedReadMemory8; + case 16: + return LinkTarget::WrappedReadMemory16; + case 32: + return LinkTarget::WrappedReadMemory32; + case 64: + return LinkTarget::WrappedReadMemory64; + case 128: + return LinkTarget::WrappedReadMemory128; + } + UNREACHABLE(); +} + +LinkTarget WrappedWriteMemoryLinkTarget(size_t bitsize) { + switch (bitsize) { + case 8: + return LinkTarget::WrappedWriteMemory8; + case 16: + return LinkTarget::WrappedWriteMemory16; + case 32: + return LinkTarget::WrappedWriteMemory32; + case 64: + return LinkTarget::WrappedWriteMemory64; + case 128: + return LinkTarget::WrappedWriteMemory128; + } + UNREACHABLE(); +} + LinkTarget ExclusiveReadMemoryLinkTarget(size_t bitsize) { switch (bitsize) { case 8: @@ -336,20 +368,17 @@ void InlinePageTableEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx EmitMemoryLdr(code, Rvalue->index(), Xbase, Xoffset, ordered); ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] { - const u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Rvalue); code.l(*fallback); - ABI_PushRegisters(code, save_regs, 0); - code.MOV(X1, Xaddr); - EmitRelocation(code, ctx, ReadMemoryLinkTarget(bitsize)); + code.MOV(Xscratch0, Xaddr); + EmitRelocation(code, ctx, WrappedReadMemoryLinkTarget(bitsize)); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } if constexpr (bitsize == 128) { code.MOV(Rvalue.B16(), Q0.B16()); } else { - code.MOV(Rvalue.toX(), X0); + code.MOV(Rvalue.toX(), Xscratch0); } - ABI_PopRegisters(code, save_regs, 0); ctx.conf.emit_check_memory_abort(code, ctx, inst, *end); code.B(*end); }); @@ -379,26 +408,21 @@ void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ct EmitMemoryStr(code, Rvalue->index(), Xbase, Xoffset, ordered); ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] { - const u64 save_regs = ABI_CALLER_SAVE; code.l(*fallback); - ABI_PushRegisters(code, save_regs, 0); if constexpr (bitsize == 128) { - code.MOV(X1, Xaddr); + code.MOV(Xscratch0, Xaddr); code.MOV(Q0.B16(), Rvalue.B16()); } else { code.MOV(Xscratch0, Xaddr); code.MOV(Xscratch1, Rvalue.toX()); - code.MOV(X1, Xscratch0); - code.MOV(X2, Xscratch1); } if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } - EmitRelocation(code, ctx, WriteMemoryLinkTarget(bitsize)); + EmitRelocation(code, ctx, WrappedWriteMemoryLinkTarget(bitsize)); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } - ABI_PopRegisters(code, save_regs, 0); ctx.conf.emit_check_memory_abort(code, ctx, inst, *end); code.B(*end); });