emit_arm64_memory: Initial fastmem implementation

This commit is contained in:
Merry 2022-12-11 17:26:49 +00:00
parent f4727c4ddb
commit e07dde9ed5
7 changed files with 216 additions and 25 deletions

View file

@ -226,6 +226,9 @@ void A32AddressSpace::EmitPrelude() {
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.fastmem_pointer) {
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
}
if (conf.enable_cycle_counting) {
code.BL(prelude_info.get_ticks_remaining);
@ -255,6 +258,9 @@ void A32AddressSpace::EmitPrelude() {
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.fastmem_pointer) {
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
}
if (conf.enable_cycle_counting) {
code.MOV(Xticks, 1);
@ -358,6 +364,11 @@ EmitConfig A32AddressSpace::GetEmitConfig() {
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
.fastmem_pointer = mcl::bit_cast<u64>(conf.fastmem_pointer),
.recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure,
.fastmem_address_space_bits = 32,
.silently_mirror_fastmem = true,
.wall_clock_cntpct = conf.wall_clock_cntpct,
.enable_cycle_counting = conf.enable_cycle_counting,

View file

@ -406,6 +406,9 @@ void A64AddressSpace::EmitPrelude() {
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.fastmem_pointer) {
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
}
if (conf.enable_cycle_counting) {
code.BL(prelude_info.get_ticks_remaining);
@ -434,6 +437,9 @@ void A64AddressSpace::EmitPrelude() {
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.fastmem_pointer) {
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
}
if (conf.enable_cycle_counting) {
code.MOV(Xticks, 1);
@ -536,6 +542,11 @@ EmitConfig A64AddressSpace::GetEmitConfig() {
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
.fastmem_pointer = mcl::bit_cast<u64>(conf.fastmem_pointer),
.recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure,
.fastmem_address_space_bits = conf.fastmem_address_space_bits,
.silently_mirror_fastmem = conf.silently_mirror_fastmem,
.wall_clock_cntpct = conf.wall_clock_cntpct,
.enable_cycle_counting = conf.enable_cycle_counting,

View file

@ -300,13 +300,15 @@ FakeCall AddressSpace::FastmemCallback(u64 host_pc) {
goto fail;
}
const auto result = iter->second.fc;
if (iter->second.recompile) {
const auto marker = iter->second.marker;
fastmem_manager.MarkDoNotFastmem(marker);
InvalidateBasicBlocks({std::get<0>(marker)});
}
return iter->second.fc;
return result;
}
fail:

View file

@ -128,6 +128,12 @@ struct EmitConfig {
u8 detect_misaligned_access_via_page_table;
bool only_detect_misalignment_via_page_table_on_page_boundary;
// Fastmem
u64 fastmem_pointer;
bool recompile_on_fastmem_failure;
size_t fastmem_address_space_bits;
bool silently_mirror_fastmem;
// Timing
bool wall_clock_cntpct;
bool enable_cycle_counting;

View file

@ -5,13 +5,16 @@
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
#include <optional>
#include <utility>
#include <mcl/bit_cast.hpp>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fastmem.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/interface/halt_reason.h"
@ -249,7 +252,7 @@ void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oa
// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1
// Trashes NZCV
template<size_t bitsize>
std::pair<oaknut::XReg, oaknut::XReg> EmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
@ -280,23 +283,26 @@ std::pair<oaknut::XReg, oaknut::XReg> EmitVAddrLookup(oaknut::CodeGenerator& cod
}
template<std::size_t bitsize>
const void* EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) {
const void* fastmem_location = code.ptr<void*>();
CodePtr EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
const auto ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
const CodePtr fastmem_location = code.ptr<CodePtr>();
switch (bitsize) {
case 8:
code.LDRB(oaknut::WReg{value_idx}, Xbase, Xoffset);
code.LDRB(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
break;
case 16:
code.LDRH(oaknut::WReg{value_idx}, Xbase, Xoffset);
code.LDRH(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
break;
case 32:
code.LDR(oaknut::WReg{value_idx}, Xbase, Xoffset);
code.LDR(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
break;
case 64:
code.LDR(oaknut::XReg{value_idx}, Xbase, Xoffset);
code.LDR(oaknut::XReg{value_idx}, Xbase, Roffset, ext);
break;
case 128:
code.LDR(oaknut::QReg{value_idx}, Xbase, Xoffset);
code.LDR(oaknut::QReg{value_idx}, Xbase, Roffset, ext);
break;
default:
ASSERT_FALSE("Invalid bitsize");
@ -311,28 +317,31 @@ const void* EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XR
}
template<std::size_t bitsize>
const void* EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) {
CodePtr EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
const auto ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
if (ordered) {
// TODO: Use STLR
code.DMB(oaknut::BarrierOp::ISH);
}
const void* fastmem_location = code.ptr<void*>();
const CodePtr fastmem_location = code.ptr<CodePtr>();
switch (bitsize) {
case 8:
code.STRB(oaknut::WReg{value_idx}, Xbase, Xoffset);
code.STRB(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
break;
case 16:
code.STRH(oaknut::WReg{value_idx}, Xbase, Xoffset);
code.STRH(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
break;
case 32:
code.STR(oaknut::WReg{value_idx}, Xbase, Xoffset);
code.STR(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
break;
case 64:
code.STR(oaknut::XReg{value_idx}, Xbase, Xoffset);
code.STR(oaknut::XReg{value_idx}, Xbase, Roffset, ext);
break;
case 128:
code.STR(oaknut::QReg{value_idx}, Xbase, Xoffset);
code.STR(oaknut::QReg{value_idx}, Xbase, Roffset, ext);
break;
default:
ASSERT_FALSE("Invalid bitsize");
@ -364,7 +373,7 @@ void InlinePageTableEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = EmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
@ -404,7 +413,7 @@ void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ct
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = EmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
@ -430,11 +439,155 @@ void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ct
code.l(*end);
}
std::optional<DoNotFastmemMarker> ShouldFastmem(EmitContext& ctx, IR::Inst* inst) {
if (!ctx.conf.fastmem_pointer || !ctx.fastmem.SupportsFastmem()) {
return std::nullopt;
}
const auto inst_offset = std::distance(ctx.block.begin(), IR::Block::iterator(inst));
const auto marker = std::make_tuple(ctx.block.Location(), inst_offset);
if (ctx.fastmem.ShouldFastmem(marker)) {
return marker;
}
return std::nullopt;
}
inline bool ShouldExt32(EmitContext& ctx) {
return ctx.conf.fastmem_address_space_bits == 32 && ctx.conf.silently_mirror_fastmem;
}
// May use Xscratch0 as scratch register
// Address to read/write = [ret0 + ret1], ret0 is always Xfastmem and ret1 is either Xaddr or Xscratch0
// Trashes NZCV
template<size_t bitsize>
std::pair<oaknut::XReg, oaknut::XReg> FastmemEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
if (ctx.conf.fastmem_address_space_bits == 64 || ShouldExt32(ctx)) {
return std::make_pair(Xfastmem, Xaddr);
}
if (ctx.conf.silently_mirror_fastmem) {
code.UBFX(Xscratch0, Xaddr, 0, ctx.conf.fastmem_address_space_bits);
return std::make_pair(Xfastmem, Xscratch0);
}
code.LSR(Xscratch0, Xaddr, ctx.conf.fastmem_address_space_bits);
code.CBNZ(Xscratch0, *fallback);
return std::make_pair(Xfastmem, Xaddr);
}
template<size_t bitsize>
void FastmemEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
auto Rvalue = [&] {
if constexpr (bitsize == 128) {
return ctx.reg_alloc.WriteQ(inst);
} else {
return ctx.reg_alloc.WriteReg<std::max<std::size_t>(bitsize, 32)>(inst);
}
}();
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
ctx.fpsr.Spill();
ctx.reg_alloc.SpillFlags();
RegAlloc::Realize(Xaddr, Rvalue);
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
const auto fastmem_location = EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx));
ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] {
ctx.ebi.fastmem_patch_info.emplace(
fastmem_location - ctx.ebi.entry_point,
FastmemPatchInfo{
.marker = marker,
.fc = FakeCall{
.call_pc = mcl::bit_cast<u64>(code.ptr<void*>()),
.ret_pc = 0,
},
.recompile = ctx.conf.recompile_on_fastmem_failure,
});
code.l(*fallback);
code.MOV(Xscratch0, Xaddr);
EmitRelocation(code, ctx, WrappedReadMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
if constexpr (bitsize == 128) {
code.MOV(Rvalue.B16(), Q0.B16());
} else {
code.MOV(Rvalue.toX(), Xscratch0);
}
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
code.B(*end);
});
code.l(*end);
}
template<size_t bitsize>
void FastmemEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
auto Rvalue = [&] {
if constexpr (bitsize == 128) {
return ctx.reg_alloc.ReadQ(args[2]);
} else {
return ctx.reg_alloc.ReadReg<std::max<std::size_t>(bitsize, 32)>(args[2]);
}
}();
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
ctx.fpsr.Spill();
ctx.reg_alloc.SpillFlags();
RegAlloc::Realize(Xaddr, Rvalue);
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
const auto fastmem_location = EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx));
ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] {
ctx.ebi.fastmem_patch_info.emplace(
fastmem_location - ctx.ebi.entry_point,
FastmemPatchInfo{
.marker = marker,
.fc = FakeCall{
.call_pc = mcl::bit_cast<u64>(code.ptr<void*>()),
.ret_pc = 0,
},
.recompile = ctx.conf.recompile_on_fastmem_failure,
});
code.l(*fallback);
if constexpr (bitsize == 128) {
code.MOV(Xscratch0, Xaddr);
code.MOV(Q0.B16(), Rvalue.B16());
} else {
code.MOV(Xscratch0, Xaddr);
code.MOV(Xscratch1, Rvalue.toX());
}
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
EmitRelocation(code, ctx, WrappedWriteMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
code.B(*end);
});
code.l(*end);
}
} // namespace
template<size_t bitsize>
void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
if (ctx.conf.page_table_pointer != 0) {
if (const auto marker = ShouldFastmem(ctx, inst)) {
FastmemEmitReadMemory<bitsize>(code, ctx, inst, *marker);
} else if (ctx.conf.page_table_pointer != 0) {
InlinePageTableEmitReadMemory<bitsize>(code, ctx, inst);
} else {
CallbackOnlyEmitReadMemory<bitsize>(code, ctx, inst);
@ -448,7 +601,9 @@ void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::
template<size_t bitsize>
void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
if (ctx.conf.page_table_pointer != 0) {
if (const auto marker = ShouldFastmem(ctx, inst)) {
FastmemEmitWriteMemory<bitsize>(code, ctx, inst, *marker);
} else if (ctx.conf.page_table_pointer != 0) {
InlinePageTableEmitWriteMemory<bitsize>(code, ctx, inst);
} else {
CallbackOnlyEmitWriteMemory<bitsize>(code, ctx, inst);

View file

@ -36,8 +36,12 @@ public:
explicit FastmemManager(ExceptionHandler& eh)
: exception_handler(eh) {}
bool SupportsFastmem() const {
return exception_handler.SupportsFastmem();
}
bool ShouldFastmem(DoNotFastmemMarker marker) const {
return exception_handler.SupportsFastmem() && do_not_fastmem.count(marker) == 0;
return do_not_fastmem.count(marker) == 0;
}
void MarkDoNotFastmem(DoNotFastmemMarker marker) {

View file

@ -37,16 +37,18 @@ FakeCall AxxEmitX64::FastmemCallback(u64 rip_) {
ASSERT_FALSE("iter != fastmem_patch_info.end()");
}
FakeCall result{
.call_rip = iter->second.callback,
.ret_rip = iter->second.resume_rip,
};
if (iter->second.recompile) {
const auto marker = iter->second.marker;
do_not_fastmem.emplace(marker);
InvalidateBasicBlocks({std::get<0>(marker)});
}
return FakeCall{
.call_rip = iter->second.callback,
.ret_rip = iter->second.resume_rip,
};
return result;
}
template<std::size_t bitsize, auto callback>