diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index 84acde3f..8a630c93 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -226,6 +226,9 @@ void A32AddressSpace::EmitPrelude() { if (conf.page_table) { code.MOV(Xpagetable, mcl::bit_cast(conf.page_table)); } + if (conf.fastmem_pointer) { + code.MOV(Xfastmem, mcl::bit_cast(conf.fastmem_pointer)); + } if (conf.enable_cycle_counting) { code.BL(prelude_info.get_ticks_remaining); @@ -255,6 +258,9 @@ void A32AddressSpace::EmitPrelude() { if (conf.page_table) { code.MOV(Xpagetable, mcl::bit_cast(conf.page_table)); } + if (conf.fastmem_pointer) { + code.MOV(Xfastmem, mcl::bit_cast(conf.fastmem_pointer)); + } if (conf.enable_cycle_counting) { code.MOV(Xticks, 1); @@ -358,6 +364,11 @@ EmitConfig A32AddressSpace::GetEmitConfig() { .detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table, .only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary, + .fastmem_pointer = mcl::bit_cast(conf.fastmem_pointer), + .recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure, + .fastmem_address_space_bits = 32, + .silently_mirror_fastmem = true, + .wall_clock_cntpct = conf.wall_clock_cntpct, .enable_cycle_counting = conf.enable_cycle_counting, diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index 9731b341..a54ee345 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -406,6 +406,9 @@ void A64AddressSpace::EmitPrelude() { if (conf.page_table) { code.MOV(Xpagetable, mcl::bit_cast(conf.page_table)); } + if (conf.fastmem_pointer) { + code.MOV(Xfastmem, mcl::bit_cast(conf.fastmem_pointer)); + } if (conf.enable_cycle_counting) { code.BL(prelude_info.get_ticks_remaining); @@ -434,6 +437,9 @@ void A64AddressSpace::EmitPrelude() { if (conf.page_table) { code.MOV(Xpagetable, mcl::bit_cast(conf.page_table)); } + if (conf.fastmem_pointer) { + code.MOV(Xfastmem, mcl::bit_cast(conf.fastmem_pointer)); + } if (conf.enable_cycle_counting) { code.MOV(Xticks, 1); @@ -536,6 +542,11 @@ EmitConfig A64AddressSpace::GetEmitConfig() { .detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table, .only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary, + .fastmem_pointer = mcl::bit_cast(conf.fastmem_pointer), + .recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure, + .fastmem_address_space_bits = conf.fastmem_address_space_bits, + .silently_mirror_fastmem = conf.silently_mirror_fastmem, + .wall_clock_cntpct = conf.wall_clock_cntpct, .enable_cycle_counting = conf.enable_cycle_counting, diff --git a/src/dynarmic/backend/arm64/address_space.cpp b/src/dynarmic/backend/arm64/address_space.cpp index 2047df79..a314a419 100644 --- a/src/dynarmic/backend/arm64/address_space.cpp +++ b/src/dynarmic/backend/arm64/address_space.cpp @@ -300,13 +300,15 @@ FakeCall AddressSpace::FastmemCallback(u64 host_pc) { goto fail; } + const auto result = iter->second.fc; + if (iter->second.recompile) { const auto marker = iter->second.marker; fastmem_manager.MarkDoNotFastmem(marker); InvalidateBasicBlocks({std::get<0>(marker)}); } - return iter->second.fc; + return result; } fail: diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index 391193aa..c15d9266 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -128,6 +128,12 @@ struct EmitConfig { u8 detect_misaligned_access_via_page_table; bool only_detect_misalignment_via_page_table_on_page_boundary; + // Fastmem + u64 fastmem_pointer; + bool recompile_on_fastmem_failure; + size_t fastmem_address_space_bits; + bool silently_mirror_fastmem; + // Timing bool wall_clock_cntpct; bool enable_cycle_counting; diff --git a/src/dynarmic/backend/arm64/emit_arm64_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_memory.cpp index 615022ab..1f36a7a0 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_memory.cpp @@ -5,13 +5,16 @@ #include "dynarmic/backend/arm64/emit_arm64_memory.h" +#include #include +#include #include #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" +#include "dynarmic/backend/arm64/fastmem.h" #include "dynarmic/backend/arm64/fpsr_manager.h" #include "dynarmic/backend/arm64/reg_alloc.h" #include "dynarmic/interface/halt_reason.h" @@ -249,7 +252,7 @@ void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oa // Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1 // Trashes NZCV template -std::pair EmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) { +std::pair InlinePageTableEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) { const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits; const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits; @@ -280,23 +283,26 @@ std::pair EmitVAddrLookup(oaknut::CodeGenerator& cod } template -const void* EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) { - const void* fastmem_location = code.ptr(); +CodePtr EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) { + const auto ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL; + const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset}; + + const CodePtr fastmem_location = code.ptr(); switch (bitsize) { case 8: - code.LDRB(oaknut::WReg{value_idx}, Xbase, Xoffset); + code.LDRB(oaknut::WReg{value_idx}, Xbase, Roffset, ext); break; case 16: - code.LDRH(oaknut::WReg{value_idx}, Xbase, Xoffset); + code.LDRH(oaknut::WReg{value_idx}, Xbase, Roffset, ext); break; case 32: - code.LDR(oaknut::WReg{value_idx}, Xbase, Xoffset); + code.LDR(oaknut::WReg{value_idx}, Xbase, Roffset, ext); break; case 64: - code.LDR(oaknut::XReg{value_idx}, Xbase, Xoffset); + code.LDR(oaknut::XReg{value_idx}, Xbase, Roffset, ext); break; case 128: - code.LDR(oaknut::QReg{value_idx}, Xbase, Xoffset); + code.LDR(oaknut::QReg{value_idx}, Xbase, Roffset, ext); break; default: ASSERT_FALSE("Invalid bitsize"); @@ -311,28 +317,31 @@ const void* EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XR } template -const void* EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) { +CodePtr EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) { + const auto ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL; + const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset}; + if (ordered) { // TODO: Use STLR code.DMB(oaknut::BarrierOp::ISH); } - const void* fastmem_location = code.ptr(); + const CodePtr fastmem_location = code.ptr(); switch (bitsize) { case 8: - code.STRB(oaknut::WReg{value_idx}, Xbase, Xoffset); + code.STRB(oaknut::WReg{value_idx}, Xbase, Roffset, ext); break; case 16: - code.STRH(oaknut::WReg{value_idx}, Xbase, Xoffset); + code.STRH(oaknut::WReg{value_idx}, Xbase, Roffset, ext); break; case 32: - code.STR(oaknut::WReg{value_idx}, Xbase, Xoffset); + code.STR(oaknut::WReg{value_idx}, Xbase, Roffset, ext); break; case 64: - code.STR(oaknut::XReg{value_idx}, Xbase, Xoffset); + code.STR(oaknut::XReg{value_idx}, Xbase, Roffset, ext); break; case 128: - code.STR(oaknut::QReg{value_idx}, Xbase, Xoffset); + code.STR(oaknut::QReg{value_idx}, Xbase, Roffset, ext); break; default: ASSERT_FALSE("Invalid bitsize"); @@ -364,7 +373,7 @@ void InlinePageTableEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel(); - const auto [Xbase, Xoffset] = EmitVAddrLookup(code, ctx, Xaddr, fallback); + const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup(code, ctx, Xaddr, fallback); EmitMemoryLdr(code, Rvalue->index(), Xbase, Xoffset, ordered); ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] { @@ -404,7 +413,7 @@ void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ct SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel(); - const auto [Xbase, Xoffset] = EmitVAddrLookup(code, ctx, Xaddr, fallback); + const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup(code, ctx, Xaddr, fallback); EmitMemoryStr(code, Rvalue->index(), Xbase, Xoffset, ordered); ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] { @@ -430,11 +439,155 @@ void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ct code.l(*end); } +std::optional ShouldFastmem(EmitContext& ctx, IR::Inst* inst) { + if (!ctx.conf.fastmem_pointer || !ctx.fastmem.SupportsFastmem()) { + return std::nullopt; + } + + const auto inst_offset = std::distance(ctx.block.begin(), IR::Block::iterator(inst)); + const auto marker = std::make_tuple(ctx.block.Location(), inst_offset); + if (ctx.fastmem.ShouldFastmem(marker)) { + return marker; + } + return std::nullopt; +} + +inline bool ShouldExt32(EmitContext& ctx) { + return ctx.conf.fastmem_address_space_bits == 32 && ctx.conf.silently_mirror_fastmem; +} + +// May use Xscratch0 as scratch register +// Address to read/write = [ret0 + ret1], ret0 is always Xfastmem and ret1 is either Xaddr or Xscratch0 +// Trashes NZCV +template +std::pair FastmemEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) { + if (ctx.conf.fastmem_address_space_bits == 64 || ShouldExt32(ctx)) { + return std::make_pair(Xfastmem, Xaddr); + } + + if (ctx.conf.silently_mirror_fastmem) { + code.UBFX(Xscratch0, Xaddr, 0, ctx.conf.fastmem_address_space_bits); + return std::make_pair(Xfastmem, Xscratch0); + } + + code.LSR(Xscratch0, Xaddr, ctx.conf.fastmem_address_space_bits); + code.CBNZ(Xscratch0, *fallback); + return std::make_pair(Xfastmem, Xaddr); +} + +template +void FastmemEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xaddr = ctx.reg_alloc.ReadX(args[1]); + auto Rvalue = [&] { + if constexpr (bitsize == 128) { + return ctx.reg_alloc.WriteQ(inst); + } else { + return ctx.reg_alloc.WriteReg(bitsize, 32)>(inst); + } + }(); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + ctx.fpsr.Spill(); + ctx.reg_alloc.SpillFlags(); + RegAlloc::Realize(Xaddr, Rvalue); + + SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel(); + + const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup(code, ctx, Xaddr, fallback); + const auto fastmem_location = EmitMemoryLdr(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx)); + + ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] { + ctx.ebi.fastmem_patch_info.emplace( + fastmem_location - ctx.ebi.entry_point, + FastmemPatchInfo{ + .marker = marker, + .fc = FakeCall{ + .call_pc = mcl::bit_cast(code.ptr()), + .ret_pc = 0, + }, + .recompile = ctx.conf.recompile_on_fastmem_failure, + }); + + code.l(*fallback); + code.MOV(Xscratch0, Xaddr); + EmitRelocation(code, ctx, WrappedReadMemoryLinkTarget(bitsize)); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + if constexpr (bitsize == 128) { + code.MOV(Rvalue.B16(), Q0.B16()); + } else { + code.MOV(Rvalue.toX(), Xscratch0); + } + ctx.conf.emit_check_memory_abort(code, ctx, inst, *end); + code.B(*end); + }); + + code.l(*end); +} + +template +void FastmemEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xaddr = ctx.reg_alloc.ReadX(args[1]); + auto Rvalue = [&] { + if constexpr (bitsize == 128) { + return ctx.reg_alloc.ReadQ(args[2]); + } else { + return ctx.reg_alloc.ReadReg(bitsize, 32)>(args[2]); + } + }(); + const bool ordered = IsOrdered(args[3].GetImmediateAccType()); + ctx.fpsr.Spill(); + ctx.reg_alloc.SpillFlags(); + RegAlloc::Realize(Xaddr, Rvalue); + + SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel(); + + const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup(code, ctx, Xaddr, fallback); + const auto fastmem_location = EmitMemoryStr(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx)); + + ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] { + ctx.ebi.fastmem_patch_info.emplace( + fastmem_location - ctx.ebi.entry_point, + FastmemPatchInfo{ + .marker = marker, + .fc = FakeCall{ + .call_pc = mcl::bit_cast(code.ptr()), + .ret_pc = 0, + }, + .recompile = ctx.conf.recompile_on_fastmem_failure, + }); + + code.l(*fallback); + if constexpr (bitsize == 128) { + code.MOV(Xscratch0, Xaddr); + code.MOV(Q0.B16(), Rvalue.B16()); + } else { + code.MOV(Xscratch0, Xaddr); + code.MOV(Xscratch1, Rvalue.toX()); + } + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + EmitRelocation(code, ctx, WrappedWriteMemoryLinkTarget(bitsize)); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + ctx.conf.emit_check_memory_abort(code, ctx, inst, *end); + code.B(*end); + }); + + code.l(*end); +} + } // namespace template void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - if (ctx.conf.page_table_pointer != 0) { + if (const auto marker = ShouldFastmem(ctx, inst)) { + FastmemEmitReadMemory(code, ctx, inst, *marker); + } else if (ctx.conf.page_table_pointer != 0) { InlinePageTableEmitReadMemory(code, ctx, inst); } else { CallbackOnlyEmitReadMemory(code, ctx, inst); @@ -448,7 +601,9 @@ void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR:: template void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - if (ctx.conf.page_table_pointer != 0) { + if (const auto marker = ShouldFastmem(ctx, inst)) { + FastmemEmitWriteMemory(code, ctx, inst, *marker); + } else if (ctx.conf.page_table_pointer != 0) { InlinePageTableEmitWriteMemory(code, ctx, inst); } else { CallbackOnlyEmitWriteMemory(code, ctx, inst); diff --git a/src/dynarmic/backend/arm64/fastmem.h b/src/dynarmic/backend/arm64/fastmem.h index fd6d91f7..3b98b056 100644 --- a/src/dynarmic/backend/arm64/fastmem.h +++ b/src/dynarmic/backend/arm64/fastmem.h @@ -36,8 +36,12 @@ public: explicit FastmemManager(ExceptionHandler& eh) : exception_handler(eh) {} + bool SupportsFastmem() const { + return exception_handler.SupportsFastmem(); + } + bool ShouldFastmem(DoNotFastmemMarker marker) const { - return exception_handler.SupportsFastmem() && do_not_fastmem.count(marker) == 0; + return do_not_fastmem.count(marker) == 0; } void MarkDoNotFastmem(DoNotFastmemMarker marker) { diff --git a/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index ead088c3..4c807ed6 100644 --- a/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -37,16 +37,18 @@ FakeCall AxxEmitX64::FastmemCallback(u64 rip_) { ASSERT_FALSE("iter != fastmem_patch_info.end()"); } + FakeCall result{ + .call_rip = iter->second.callback, + .ret_rip = iter->second.resume_rip, + }; + if (iter->second.recompile) { const auto marker = iter->second.marker; do_not_fastmem.emplace(marker); InvalidateBasicBlocks({std::get<0>(marker)}); } - return FakeCall{ - .call_rip = iter->second.callback, - .ret_rip = iter->second.resume_rip, - }; + return result; } template