Add a64_address_space

This commit is contained in:
Liam 2022-11-13 11:39:16 -05:00
parent 4a4b00f0b1
commit 6291896fbf
3 changed files with 478 additions and 3 deletions

View file

@ -0,0 +1,469 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/a64_address_space.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/devirtualize.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/stack_layout.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
#include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Backend::Arm64 {
template<auto mfp, typename T>
static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<mfp>(this_);
oaknut::Label l_addr, l_this;
void* target = code.ptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr) -> T {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr);
});
};
void* target = code.ptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
})
? 0
: 1;
};
void* target = code.ptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf)
: conf(conf)
, mem(conf.code_cache_size)
, code(mem.ptr()) {
EmitPrelude();
}
IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::A64CallbackConfigPass(ir_block, conf);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A64GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
}
Optimization::VerificationPass(ir_block);
return ir_block;
}
CodePtr A64AddressSpace::Get(IR::LocationDescriptor descriptor) {
if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) {
return iter->second;
}
return nullptr;
}
CodePtr A64AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
if (CodePtr block_entry = Get(descriptor)) {
return block_entry;
}
IR::Block ir_block = GenerateIR(descriptor);
const EmittedBlockInfo block_info = Emit(std::move(ir_block));
block_infos.insert_or_assign(descriptor.Value(), block_info);
block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point);
return block_info.entry_point;
}
void A64AddressSpace::ClearCache() {
block_entries.clear();
block_infos.clear();
block_references.clear();
code.set_ptr(prelude_info.end_of_prelude);
}
void A64AddressSpace::EmitPrelude() {
using namespace oaknut::util;
mem.unprotect();
prelude_info.read_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks);
prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
prelude_info.read_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead128>(code, conf.callbacks);
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf);
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf);
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf);
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf);
prelude_info.exclusive_read_memory_128 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead128, Vector>(code, conf);
prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
prelude_info.write_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite128>(code, conf.callbacks);
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
prelude_info.exclusive_write_memory_128 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, Vector>(code, conf);
prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks);
prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
prelude_info.ic_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionCacheOperationRaised>(code, conf.callbacks);
prelude_info.dc_raised = EmitCallTrampoline<&A64::UserCallbacks::DataCacheOperationRaised>(code, conf.callbacks);
prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks);
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
oaknut::Label return_from_run_code;
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.enable_cycle_counting) {
code.BL(prelude_info.get_ticks_remaining);
code.MOV(Xticks, X0);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.BR(X19);
}
prelude_info.step_code = code.ptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.enable_cycle_counting) {
code.MOV(Xticks, 1);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label step_hr_loop;
code.l(step_hr_loop);
code.LDAXR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
code.STLXR(Wscratch1, Wscratch0, Xhalt);
code.CBNZ(Wscratch1, step_hr_loop);
code.BR(X19);
}
prelude_info.return_to_dispatcher = code.ptr<void*>();
{
oaknut::Label l_this, l_addr;
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
if (conf.enable_cycle_counting) {
code.CMP(Xticks, 0);
code.B(LE, return_from_run_code);
}
code.LDR(X0, l_this);
code.MOV(X1, Xstate);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.BR(X0);
const auto fn = [](A64AddressSpace& self, A64JitState& context) -> CodePtr {
return self.GetOrEmit(context.GetLocationDescriptor());
};
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(this));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
}
prelude_info.return_from_run_code = code.ptr<void*>();
{
code.l(return_from_run_code);
if (conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
code.BL(prelude_info.add_ticks);
}
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label exit_hr_loop;
code.l(exit_hr_loop);
code.LDAXR(W0, Xhalt);
code.STLXR(Wscratch0, WZR, Xhalt);
code.CBNZ(Wscratch0, exit_hr_loop);
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.RET();
}
prelude_info.end_of_prelude = code.ptr<u32*>();
mem.invalidate_all();
mem.protect();
}
size_t A64AddressSpace::GetRemainingSize() {
return conf.code_cache_size - (code.ptr<CodePtr>() - reinterpret_cast<CodePtr>(mem.ptr()));
}
EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) {
if (GetRemainingSize() < 1024 * 1024) {
ClearCache();
}
mem.unprotect();
const EmitConfig emit_conf{
.hook_isb = conf.hook_isb,
.enable_cycle_counting = conf.enable_cycle_counting,
.always_little_endian = true,
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); },
.state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv),
.state_fpsr_offset = offsetof(A64JitState, fpsr),
.coprocessors{},
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
};
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
Link(block.Location(), block_info);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
RelinkForDescriptor(block.Location());
mem.protect();
return block_info;
}
static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list) {
using namespace oaknut;
using namespace oaknut::util;
for (auto [ptr_offset] : block_relocations_list) {
CodeGenerator c{reinterpret_cast<u32*>(entry_point + ptr_offset)};
if (target_ptr) {
c.B((void*)target_ptr);
} else {
c.NOP();
}
}
}
void A64AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) {
using namespace oaknut;
using namespace oaknut::util;
for (auto [ptr_offset, target] : block_info.relocations) {
CodeGenerator c{reinterpret_cast<u32*>(block_info.entry_point + ptr_offset)};
switch (target) {
case LinkTarget::ReturnToDispatcher:
c.B(prelude_info.return_to_dispatcher);
break;
case LinkTarget::ReturnFromRunCode:
c.B(prelude_info.return_from_run_code);
break;
case LinkTarget::ReadMemory8:
c.BL(prelude_info.read_memory_8);
break;
case LinkTarget::ReadMemory16:
c.BL(prelude_info.read_memory_16);
break;
case LinkTarget::ReadMemory32:
c.BL(prelude_info.read_memory_32);
break;
case LinkTarget::ReadMemory64:
c.BL(prelude_info.read_memory_64);
break;
case LinkTarget::ReadMemory128:
c.BL(prelude_info.read_memory_128);
break;
case LinkTarget::ExclusiveReadMemory8:
c.BL(prelude_info.exclusive_read_memory_8);
break;
case LinkTarget::ExclusiveReadMemory16:
c.BL(prelude_info.exclusive_read_memory_16);
break;
case LinkTarget::ExclusiveReadMemory32:
c.BL(prelude_info.exclusive_read_memory_32);
break;
case LinkTarget::ExclusiveReadMemory64:
c.BL(prelude_info.exclusive_read_memory_64);
break;
case LinkTarget::ExclusiveReadMemory128:
c.BL(prelude_info.exclusive_read_memory_128);
break;
case LinkTarget::WriteMemory8:
c.BL(prelude_info.write_memory_8);
break;
case LinkTarget::WriteMemory16:
c.BL(prelude_info.write_memory_16);
break;
case LinkTarget::WriteMemory32:
c.BL(prelude_info.write_memory_32);
break;
case LinkTarget::WriteMemory64:
c.BL(prelude_info.write_memory_64);
break;
case LinkTarget::WriteMemory128:
c.BL(prelude_info.write_memory_128);
break;
case LinkTarget::ExclusiveWriteMemory8:
c.BL(prelude_info.exclusive_write_memory_8);
break;
case LinkTarget::ExclusiveWriteMemory16:
c.BL(prelude_info.exclusive_write_memory_16);
break;
case LinkTarget::ExclusiveWriteMemory32:
c.BL(prelude_info.exclusive_write_memory_32);
break;
case LinkTarget::ExclusiveWriteMemory64:
c.BL(prelude_info.exclusive_write_memory_64);
break;
case LinkTarget::ExclusiveWriteMemory128:
c.BL(prelude_info.exclusive_write_memory_128);
break;
case LinkTarget::CallSVC:
c.BL(prelude_info.call_svc);
break;
case LinkTarget::ExceptionRaised:
c.BL(prelude_info.exception_raised);
break;
case LinkTarget::InstructionSynchronizationBarrierRaised:
c.BL(prelude_info.isb_raised);
break;
case LinkTarget::InstructionCacheOperationRaised:
c.BL(prelude_info.ic_raised);
break;
case LinkTarget::DataCacheOperationRaised:
c.BL(prelude_info.dc_raised);
break;
case LinkTarget::AddTicks:
c.BL(prelude_info.add_ticks);
break;
case LinkTarget::GetTicksRemaining:
c.BL(prelude_info.get_ticks_remaining);
break;
default:
ASSERT_FALSE("Invalid relocation target");
}
}
for (auto [target_descriptor, list] : block_info.block_relocations) {
block_references[target_descriptor.Value()].emplace(block_descriptor.Value());
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list);
}
}
void A64AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) {
for (auto block_descriptor : block_references[target_descriptor.Value()]) {
if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) {
const EmittedBlockInfo& block_info = iter->second;
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
}
}
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -20,8 +20,6 @@ struct A64JitState {
u32 cpsr_nzcv = 0;
u32 upper_location_descriptor;
alignas(16) std::array<u64, 64> vec{}; // Extension registers.
u32 exclusive_state = 0;
@ -30,7 +28,9 @@ struct A64JitState {
u32 fpcr = 0;
IR::LocationDescriptor GetLocationDescriptor() const {
return IR::LocationDescriptor{pc};
const u64 fpcr_u64 = static_cast<u64>(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift;
const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask;
return IR::LocationDescriptor{pc_u64 | fpcr_u64};
}
};

View file

@ -47,21 +47,27 @@ enum class LinkTarget {
ReadMemory16,
ReadMemory32,
ReadMemory64,
ReadMemory128,
ExclusiveReadMemory8,
ExclusiveReadMemory16,
ExclusiveReadMemory32,
ExclusiveReadMemory64,
ExclusiveReadMemory128,
WriteMemory8,
WriteMemory16,
WriteMemory32,
WriteMemory64,
WriteMemory128,
ExclusiveWriteMemory8,
ExclusiveWriteMemory16,
ExclusiveWriteMemory32,
ExclusiveWriteMemory64,
ExclusiveWriteMemory128,
CallSVC,
ExceptionRaised,
InstructionSynchronizationBarrierRaised,
InstructionCacheOperationRaised,
DataCacheOperationRaised,
AddTicks,
GetTicksRemaining,
};