backend/arm64: Inline page table implementation

Co-Authored-By: Liam <byteslice@airmail.cc>
This commit is contained in:
Merry 2022-12-10 18:45:29 +00:00
parent c4226ba25b
commit c01d27bb7c
3 changed files with 260 additions and 2 deletions

View file

@ -157,6 +157,9 @@ void A32AddressSpace::EmitPrelude() {
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.enable_cycle_counting) {
code.BL(prelude_info.get_ticks_remaining);
@ -183,6 +186,9 @@ void A32AddressSpace::EmitPrelude() {
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.enable_cycle_counting) {
code.MOV(Xticks, 1);

View file

@ -279,6 +279,9 @@ void A64AddressSpace::EmitPrelude() {
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.enable_cycle_counting) {
code.BL(prelude_info.get_ticks_remaining);
@ -304,6 +307,9 @@ void A64AddressSpace::EmitPrelude() {
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.enable_cycle_counting) {
code.MOV(Xticks, 1);

View file

@ -5,12 +5,16 @@
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
#include <utility>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/interface/halt_reason.h"
#include "dynarmic/ir/acc_type.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
@ -168,12 +172,250 @@ void CallbackOnlyEmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitConte
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
constexpr size_t page_bits = 12;
constexpr size_t page_size = 1 << page_bits;
constexpr size_t page_mask = (1 << page_bits) - 1;
// This function may use Xscratch0 as a scratch register
// Trashes NZCV
template<size_t bitsize>
void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
static_assert(bitsize == 8 || bitsize == 16 || bitsize == 32 || bitsize == 64 || bitsize == 128);
if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) {
return;
}
if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
const u64 align_mask = []() -> u64 {
switch (bitsize) {
case 16:
return 0b1;
case 32:
return 0b11;
case 64:
return 0b111;
case 128:
return 0b1111;
default:
UNREACHABLE();
}
}();
code.TST(Xaddr, align_mask);
code.B(NE, *fallback);
} else {
// If (addr & page_mask) > page_size - byte_size, use fallback.
code.AND(Xscratch0, Xaddr, page_mask);
code.CMP(Xscratch0, page_size - bitsize / 8);
code.B(HI, *fallback);
}
}
// Outputs Xscratch0 = page_table[addr >> page_bits]
// May use Xscratch1 as scratch register
// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1
// Trashes NZCV
template<size_t bitsize>
std::pair<oaknut::XReg, oaknut::XReg> EmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
EmitDetectMisalignedVAddr<bitsize>(code, ctx, Xaddr, fallback);
if (ctx.conf.silently_mirror_page_table || unused_top_bits == 0) {
code.UBFX(Xscratch0, Xaddr, page_bits, valid_page_index_bits);
} else {
code.LSR(Xscratch0, Xaddr, page_bits);
code.TST(Xscratch0, u64(~u64(0)) << valid_page_index_bits);
code.B(NE, *fallback);
}
code.LDR(Xscratch0, Xpagetable, Xscratch0, LSL, 3);
if (ctx.conf.page_table_pointer_mask_bits != 0) {
const u64 mask = u64(~u64(0)) << ctx.conf.page_table_pointer_mask_bits;
code.AND(Xscratch0, Xscratch0, mask);
}
code.CBZ(Xscratch0, *fallback);
if (ctx.conf.absolute_offset_page_table) {
return std::make_pair(Xscratch0, Xaddr);
}
code.AND(Xscratch1, Xaddr, page_mask);
return std::make_pair(Xscratch0, Xscratch1);
}
template<std::size_t bitsize>
const void* EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) {
const void* fastmem_location = code.ptr<void*>();
switch (bitsize) {
case 8:
code.LDRB(oaknut::WReg{value_idx}, Xbase, Xoffset);
break;
case 16:
code.LDRH(oaknut::WReg{value_idx}, Xbase, Xoffset);
break;
case 32:
code.LDR(oaknut::WReg{value_idx}, Xbase, Xoffset);
break;
case 64:
code.LDR(oaknut::XReg{value_idx}, Xbase, Xoffset);
break;
case 128:
code.LDR(oaknut::QReg{value_idx}, Xbase, Xoffset);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
if (ordered) {
// TODO: Use LDAR
code.DMB(oaknut::BarrierOp::ISH);
}
return fastmem_location;
}
template<std::size_t bitsize>
const void* EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) {
if (ordered) {
// TODO: Use STLR
code.DMB(oaknut::BarrierOp::ISH);
}
const void* fastmem_location = code.ptr<void*>();
switch (bitsize) {
case 8:
code.STRB(oaknut::WReg{value_idx}, Xbase, Xoffset);
break;
case 16:
code.STRH(oaknut::WReg{value_idx}, Xbase, Xoffset);
break;
case 32:
code.STR(oaknut::WReg{value_idx}, Xbase, Xoffset);
break;
case 64:
code.STR(oaknut::XReg{value_idx}, Xbase, Xoffset);
break;
case 128:
code.STR(oaknut::QReg{value_idx}, Xbase, Xoffset);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
if (ordered) {
// TODO: Use STLR
code.DMB(oaknut::BarrierOp::ISH);
}
return fastmem_location;
}
template<size_t bitsize>
void InlinePageTableEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
auto Rvalue = [&] {
if constexpr (bitsize == 128) {
return ctx.reg_alloc.WriteQ(inst);
} else {
return ctx.reg_alloc.WriteReg<std::max<std::size_t>(bitsize, 32)>(inst);
}
}();
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
ctx.fpsr.Spill();
ctx.reg_alloc.SpillFlags();
RegAlloc::Realize(Xaddr, Rvalue);
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = EmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
const u64 save_regs = ABI_CALLER_SAVE & ~((bitsize == 128 ? (1ull << 32) : 1ull) << Rvalue.index());
code.l(*fallback);
ABI_PushRegisters(code, save_regs, 0);
code.MOV(X1, Xaddr);
EmitRelocation(code, ctx, ReadMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
if constexpr (bitsize == 128) {
code.MOV(Rvalue.B16(), Q0.B16());
} else {
code.MOV(Rvalue.toX(), X0);
}
ABI_PopRegisters(code, save_regs, 0);
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
code.B(*end);
});
code.l(*end);
}
template<size_t bitsize>
void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
auto Rvalue = [&] {
if constexpr (bitsize == 128) {
return ctx.reg_alloc.ReadQ(args[2]);
} else {
return ctx.reg_alloc.ReadReg<std::max<std::size_t>(bitsize, 32)>(args[2]);
}
}();
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
ctx.fpsr.Spill();
ctx.reg_alloc.SpillFlags();
RegAlloc::Realize(Xaddr, Rvalue);
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = EmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
const u64 save_regs = ABI_CALLER_SAVE;
code.l(*fallback);
ABI_PushRegisters(code, save_regs, 0);
if constexpr (bitsize == 128) {
code.MOV(X1, Xaddr);
code.MOV(Q0.B16(), Rvalue.B16());
} else {
code.MOV(Xscratch0, Xaddr);
code.MOV(Xscratch1, Rvalue.toX());
code.MOV(X1, Xscratch0);
code.MOV(X2, Xscratch1);
}
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
EmitRelocation(code, ctx, WriteMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
ABI_PopRegisters(code, save_regs, 0);
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
code.B(*end);
});
code.l(*end);
}
} // namespace
template<size_t bitsize>
void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
if (ctx.conf.page_table_pointer != 0) {
InlinePageTableEmitReadMemory<bitsize>(code, ctx, inst);
} else {
CallbackOnlyEmitReadMemory<bitsize>(code, ctx, inst);
}
}
template<size_t bitsize>
void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
@ -182,8 +424,12 @@ void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::
template<size_t bitsize>
void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
if (ctx.conf.page_table_pointer != 0) {
InlinePageTableEmitWriteMemory<bitsize>(code, ctx, inst);
} else {
CallbackOnlyEmitWriteMemory<bitsize>(code, ctx, inst);
}
}
template<size_t bitsize>
void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {