From c01d27bb7cba328d43f71e1837170bb0b8fc4938 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 10 Dec 2022 18:45:29 +0000 Subject: [PATCH] backend/arm64: Inline page table implementation Co-Authored-By: Liam --- .../backend/arm64/a32_address_space.cpp | 6 + .../backend/arm64/a64_address_space.cpp | 6 + .../backend/arm64/emit_arm64_memory.cpp | 250 +++++++++++++++++- 3 files changed, 260 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index d1169fdd..18d6d624 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -157,6 +157,9 @@ void A32AddressSpace::EmitPrelude() { code.MOV(X19, X0); code.MOV(Xstate, X1); code.MOV(Xhalt, X2); + if (conf.page_table) { + code.MOV(Xpagetable, mcl::bit_cast(conf.page_table)); + } if (conf.enable_cycle_counting) { code.BL(prelude_info.get_ticks_remaining); @@ -183,6 +186,9 @@ void A32AddressSpace::EmitPrelude() { code.MOV(X19, X0); code.MOV(Xstate, X1); code.MOV(Xhalt, X2); + if (conf.page_table) { + code.MOV(Xpagetable, mcl::bit_cast(conf.page_table)); + } if (conf.enable_cycle_counting) { code.MOV(Xticks, 1); diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index c52f4d05..c9485ab9 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -279,6 +279,9 @@ void A64AddressSpace::EmitPrelude() { code.MOV(X19, X0); code.MOV(Xstate, X1); code.MOV(Xhalt, X2); + if (conf.page_table) { + code.MOV(Xpagetable, mcl::bit_cast(conf.page_table)); + } if (conf.enable_cycle_counting) { code.BL(prelude_info.get_ticks_remaining); @@ -304,6 +307,9 @@ void A64AddressSpace::EmitPrelude() { code.MOV(X19, X0); code.MOV(Xstate, X1); code.MOV(Xhalt, X2); + if (conf.page_table) { + code.MOV(Xpagetable, mcl::bit_cast(conf.page_table)); + } if (conf.enable_cycle_counting) { code.MOV(Xticks, 1); diff --git a/src/dynarmic/backend/arm64/emit_arm64_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_memory.cpp index f51fd016..4de30869 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_memory.cpp @@ -5,12 +5,16 @@ #include "dynarmic/backend/arm64/emit_arm64_memory.h" +#include + #include #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" +#include "dynarmic/backend/arm64/fpsr_manager.h" #include "dynarmic/backend/arm64/reg_alloc.h" +#include "dynarmic/interface/halt_reason.h" #include "dynarmic/ir/acc_type.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" @@ -168,11 +172,249 @@ void CallbackOnlyEmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitConte ctx.reg_alloc.DefineAsRegister(inst, X0); } +constexpr size_t page_bits = 12; +constexpr size_t page_size = 1 << page_bits; +constexpr size_t page_mask = (1 << page_bits) - 1; + +// This function may use Xscratch0 as a scratch register +// Trashes NZCV +template +void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) { + static_assert(bitsize == 8 || bitsize == 16 || bitsize == 32 || bitsize == 64 || bitsize == 128); + + if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) { + return; + } + + if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { + const u64 align_mask = []() -> u64 { + switch (bitsize) { + case 16: + return 0b1; + case 32: + return 0b11; + case 64: + return 0b111; + case 128: + return 0b1111; + default: + UNREACHABLE(); + } + }(); + + code.TST(Xaddr, align_mask); + code.B(NE, *fallback); + } else { + // If (addr & page_mask) > page_size - byte_size, use fallback. + code.AND(Xscratch0, Xaddr, page_mask); + code.CMP(Xscratch0, page_size - bitsize / 8); + code.B(HI, *fallback); + } +} + +// Outputs Xscratch0 = page_table[addr >> page_bits] +// May use Xscratch1 as scratch register +// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1 +// Trashes NZCV +template +std::pair EmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) { + const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits; + const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits; + + EmitDetectMisalignedVAddr(code, ctx, Xaddr, fallback); + + if (ctx.conf.silently_mirror_page_table || unused_top_bits == 0) { + code.UBFX(Xscratch0, Xaddr, page_bits, valid_page_index_bits); + } else { + code.LSR(Xscratch0, Xaddr, page_bits); + code.TST(Xscratch0, u64(~u64(0)) << valid_page_index_bits); + code.B(NE, *fallback); + } + + code.LDR(Xscratch0, Xpagetable, Xscratch0, LSL, 3); + + if (ctx.conf.page_table_pointer_mask_bits != 0) { + const u64 mask = u64(~u64(0)) << ctx.conf.page_table_pointer_mask_bits; + code.AND(Xscratch0, Xscratch0, mask); + } + + code.CBZ(Xscratch0, *fallback); + + if (ctx.conf.absolute_offset_page_table) { + return std::make_pair(Xscratch0, Xaddr); + } + code.AND(Xscratch1, Xaddr, page_mask); + return std::make_pair(Xscratch0, Xscratch1); +} + +template +const void* EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) { + const void* fastmem_location = code.ptr(); + switch (bitsize) { + case 8: + code.LDRB(oaknut::WReg{value_idx}, Xbase, Xoffset); + break; + case 16: + code.LDRH(oaknut::WReg{value_idx}, Xbase, Xoffset); + break; + case 32: + code.LDR(oaknut::WReg{value_idx}, Xbase, Xoffset); + break; + case 64: + code.LDR(oaknut::XReg{value_idx}, Xbase, Xoffset); + break; + case 128: + code.LDR(oaknut::QReg{value_idx}, Xbase, Xoffset); + break; + default: + ASSERT_FALSE("Invalid bitsize"); + } + + if (ordered) { + // TODO: Use LDAR + code.DMB(oaknut::BarrierOp::ISH); + } + + return fastmem_location; +} + +template +const void* EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) { + if (ordered) { + // TODO: Use STLR + code.DMB(oaknut::BarrierOp::ISH); + } + + const void* fastmem_location = code.ptr(); + switch (bitsize) { + case 8: + code.STRB(oaknut::WReg{value_idx}, Xbase, Xoffset); + break; + case 16: + code.STRH(oaknut::WReg{value_idx}, Xbase, Xoffset); + break; + case 32: + code.STR(oaknut::WReg{value_idx}, Xbase, Xoffset); + break; + case 64: + code.STR(oaknut::XReg{value_idx}, Xbase, Xoffset); + break; + case 128: + code.STR(oaknut::QReg{value_idx}, Xbase, Xoffset); + break; + default: + ASSERT_FALSE("Invalid bitsize"); + } + + if (ordered) { + // TODO: Use STLR + code.DMB(oaknut::BarrierOp::ISH); + } + + return fastmem_location; +} + +template +void InlinePageTableEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xaddr = ctx.reg_alloc.ReadX(args[1]); + auto Rvalue = [&] { + if constexpr (bitsize == 128) { + return ctx.reg_alloc.WriteQ(inst); + } else { + return ctx.reg_alloc.WriteReg(bitsize, 32)>(inst); + } + }(); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + ctx.fpsr.Spill(); + ctx.reg_alloc.SpillFlags(); + RegAlloc::Realize(Xaddr, Rvalue); + + SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel(); + + const auto [Xbase, Xoffset] = EmitVAddrLookup(code, ctx, Xaddr, fallback); + EmitMemoryLdr(code, Rvalue->index(), Xbase, Xoffset, ordered); + + ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] { + const u64 save_regs = ABI_CALLER_SAVE & ~((bitsize == 128 ? (1ull << 32) : 1ull) << Rvalue.index()); + code.l(*fallback); + ABI_PushRegisters(code, save_regs, 0); + code.MOV(X1, Xaddr); + EmitRelocation(code, ctx, ReadMemoryLinkTarget(bitsize)); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + if constexpr (bitsize == 128) { + code.MOV(Rvalue.B16(), Q0.B16()); + } else { + code.MOV(Rvalue.toX(), X0); + } + ABI_PopRegisters(code, save_regs, 0); + ctx.conf.emit_check_memory_abort(code, ctx, inst, *end); + code.B(*end); + }); + + code.l(*end); +} + +template +void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xaddr = ctx.reg_alloc.ReadX(args[1]); + auto Rvalue = [&] { + if constexpr (bitsize == 128) { + return ctx.reg_alloc.ReadQ(args[2]); + } else { + return ctx.reg_alloc.ReadReg(bitsize, 32)>(args[2]); + } + }(); + const bool ordered = IsOrdered(args[3].GetImmediateAccType()); + ctx.fpsr.Spill(); + ctx.reg_alloc.SpillFlags(); + RegAlloc::Realize(Xaddr, Rvalue); + + SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel(); + + const auto [Xbase, Xoffset] = EmitVAddrLookup(code, ctx, Xaddr, fallback); + EmitMemoryStr(code, Rvalue->index(), Xbase, Xoffset, ordered); + + ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] { + const u64 save_regs = ABI_CALLER_SAVE; + code.l(*fallback); + ABI_PushRegisters(code, save_regs, 0); + if constexpr (bitsize == 128) { + code.MOV(X1, Xaddr); + code.MOV(Q0.B16(), Rvalue.B16()); + } else { + code.MOV(Xscratch0, Xaddr); + code.MOV(Xscratch1, Rvalue.toX()); + code.MOV(X1, Xscratch0); + code.MOV(X2, Xscratch1); + } + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + EmitRelocation(code, ctx, WriteMemoryLinkTarget(bitsize)); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + ABI_PopRegisters(code, save_regs, 0); + ctx.conf.emit_check_memory_abort(code, ctx, inst, *end); + code.B(*end); + }); + + code.l(*end); +} + } // namespace template void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - CallbackOnlyEmitReadMemory(code, ctx, inst); + if (ctx.conf.page_table_pointer != 0) { + InlinePageTableEmitReadMemory(code, ctx, inst); + } else { + CallbackOnlyEmitReadMemory(code, ctx, inst); + } } template @@ -182,7 +424,11 @@ void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR:: template void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - CallbackOnlyEmitWriteMemory(code, ctx, inst); + if (ctx.conf.page_table_pointer != 0) { + InlinePageTableEmitWriteMemory(code, ctx, inst); + } else { + CallbackOnlyEmitWriteMemory(code, ctx, inst); + } } template