From 6291896fbf906286c41a518afc3d0db37aaf2d1d Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 11:39:16 -0500 Subject: [PATCH] Add a64_address_space --- .../backend/arm64/a64_address_space.cpp | 469 ++++++++++++++++++ src/dynarmic/backend/arm64/a64_jitstate.h | 6 +- src/dynarmic/backend/arm64/emit_arm64.h | 6 + 3 files changed, 478 insertions(+), 3 deletions(-) create mode 100644 src/dynarmic/backend/arm64/a64_address_space.cpp diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp new file mode 100644 index 00000000..576ad17e --- /dev/null +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -0,0 +1,469 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/backend/arm64/a64_address_space.h" + +#include "dynarmic/backend/arm64/a64_jitstate.h" +#include "dynarmic/backend/arm64/abi.h" +#include "dynarmic/backend/arm64/devirtualize.h" +#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/backend/arm64/stack_layout.h" +#include "dynarmic/common/cast_util.h" +#include "dynarmic/common/fp/fpcr.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/exclusive_monitor.h" +#include "dynarmic/ir/opt/passes.h" + +namespace Dynarmic::Backend::Arm64 { + +template +static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize(this_); + + oaknut::Label l_addr, l_this; + + void* target = code.ptr(); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + +template +static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr) -> T { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { + return (conf.callbacks->*callback)(vaddr); + }); + }; + + void* target = code.ptr(); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + +template +static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }; + + void* target = code.ptr(); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + +A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf) + : conf(conf) + , mem(conf.code_cache_size) + , code(mem.ptr()) { + EmitPrelude(); +} + +IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { + const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; + IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, + {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); + + Optimization::A64CallbackConfigPass(ir_block, conf); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { + Optimization::A64GetSetElimination(ir_block); + Optimization::DeadCodeElimination(ir_block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) { + Optimization::ConstantPropagation(ir_block); + Optimization::DeadCodeElimination(ir_block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { + Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); + } + Optimization::VerificationPass(ir_block); + + return ir_block; +} + +CodePtr A64AddressSpace::Get(IR::LocationDescriptor descriptor) { + if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) { + return iter->second; + } + return nullptr; +} + +CodePtr A64AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) { + if (CodePtr block_entry = Get(descriptor)) { + return block_entry; + } + + IR::Block ir_block = GenerateIR(descriptor); + const EmittedBlockInfo block_info = Emit(std::move(ir_block)); + + block_infos.insert_or_assign(descriptor.Value(), block_info); + block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point); + return block_info.entry_point; +} + +void A64AddressSpace::ClearCache() { + block_entries.clear(); + block_infos.clear(); + block_references.clear(); + code.set_ptr(prelude_info.end_of_prelude); +} + +void A64AddressSpace::EmitPrelude() { + using namespace oaknut::util; + + mem.unprotect(); + + prelude_info.read_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks); + prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks); + prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks); + prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks); + prelude_info.read_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead128>(code, conf.callbacks); + prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf); + prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf); + prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf); + prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf); + prelude_info.exclusive_read_memory_128 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead128, Vector>(code, conf); + prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks); + prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks); + prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks); + prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks); + prelude_info.write_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite128>(code, conf.callbacks); + prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf); + prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf); + prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf); + prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf); + prelude_info.exclusive_write_memory_128 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, Vector>(code, conf); + prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks); + prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks); + prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks); + prelude_info.ic_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionCacheOperationRaised>(code, conf.callbacks); + prelude_info.dc_raised = EmitCallTrampoline<&A64::UserCallbacks::DataCacheOperationRaised>(code, conf.callbacks); + prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks); + prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks); + + oaknut::Label return_from_run_code; + + prelude_info.run_code = code.ptr(); + { + ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + + code.MOV(X19, X0); + code.MOV(Xstate, X1); + code.MOV(Xhalt, X2); + + if (conf.enable_cycle_counting) { + code.BL(prelude_info.get_ticks_remaining); + code.MOV(Xticks, X0); + code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run)); + } + + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); + code.MRS(Xscratch1, oaknut::SystemReg::FPCR); + code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, return_from_run_code); + + code.BR(X19); + } + + prelude_info.step_code = code.ptr(); + { + ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + + code.MOV(X19, X0); + code.MOV(Xstate, X1); + code.MOV(Xhalt, X2); + + if (conf.enable_cycle_counting) { + code.MOV(Xticks, 1); + code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run)); + } + + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); + code.MRS(Xscratch1, oaknut::SystemReg::FPCR); + code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + + oaknut::Label step_hr_loop; + code.l(step_hr_loop); + code.LDAXR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, return_from_run_code); + code.ORR(Wscratch0, Wscratch0, static_cast(HaltReason::Step)); + code.STLXR(Wscratch1, Wscratch0, Xhalt); + code.CBNZ(Wscratch1, step_hr_loop); + + code.BR(X19); + } + + prelude_info.return_to_dispatcher = code.ptr(); + { + oaknut::Label l_this, l_addr; + + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, return_from_run_code); + + if (conf.enable_cycle_counting) { + code.CMP(Xticks, 0); + code.B(LE, return_from_run_code); + } + + code.LDR(X0, l_this); + code.MOV(X1, Xstate); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.BR(X0); + + const auto fn = [](A64AddressSpace& self, A64JitState& context) -> CodePtr { + return self.GetOrEmit(context.GetLocationDescriptor()); + }; + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(this)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + } + + prelude_info.return_from_run_code = code.ptr(); + { + code.l(return_from_run_code); + + if (conf.enable_cycle_counting) { + code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run)); + code.SUB(X1, X1, Xticks); + code.BL(prelude_info.add_ticks); + } + + code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + + oaknut::Label exit_hr_loop; + code.l(exit_hr_loop); + code.LDAXR(W0, Xhalt); + code.STLXR(Wscratch0, WZR, Xhalt); + code.CBNZ(Wscratch0, exit_hr_loop); + + ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + code.RET(); + } + + prelude_info.end_of_prelude = code.ptr(); + + mem.invalidate_all(); + mem.protect(); +} + +size_t A64AddressSpace::GetRemainingSize() { + return conf.code_cache_size - (code.ptr() - reinterpret_cast(mem.ptr())); +} + +EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) { + if (GetRemainingSize() < 1024 * 1024) { + ClearCache(); + } + + mem.unprotect(); + + const EmitConfig emit_conf{ + .hook_isb = conf.hook_isb, + .enable_cycle_counting = conf.enable_cycle_counting, + .always_little_endian = true, + .descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); }, + .state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv), + .state_fpsr_offset = offsetof(A64JitState, fpsr), + .coprocessors{}, + .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, + }; + EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); + + Link(block.Location(), block_info); + + mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + + RelinkForDescriptor(block.Location()); + + mem.protect(); + + return block_info; +} + +static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector& block_relocations_list) { + using namespace oaknut; + using namespace oaknut::util; + + for (auto [ptr_offset] : block_relocations_list) { + CodeGenerator c{reinterpret_cast(entry_point + ptr_offset)}; + + if (target_ptr) { + c.B((void*)target_ptr); + } else { + c.NOP(); + } + } +} + +void A64AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) { + using namespace oaknut; + using namespace oaknut::util; + + for (auto [ptr_offset, target] : block_info.relocations) { + CodeGenerator c{reinterpret_cast(block_info.entry_point + ptr_offset)}; + + switch (target) { + case LinkTarget::ReturnToDispatcher: + c.B(prelude_info.return_to_dispatcher); + break; + case LinkTarget::ReturnFromRunCode: + c.B(prelude_info.return_from_run_code); + break; + case LinkTarget::ReadMemory8: + c.BL(prelude_info.read_memory_8); + break; + case LinkTarget::ReadMemory16: + c.BL(prelude_info.read_memory_16); + break; + case LinkTarget::ReadMemory32: + c.BL(prelude_info.read_memory_32); + break; + case LinkTarget::ReadMemory64: + c.BL(prelude_info.read_memory_64); + break; + case LinkTarget::ReadMemory128: + c.BL(prelude_info.read_memory_128); + break; + case LinkTarget::ExclusiveReadMemory8: + c.BL(prelude_info.exclusive_read_memory_8); + break; + case LinkTarget::ExclusiveReadMemory16: + c.BL(prelude_info.exclusive_read_memory_16); + break; + case LinkTarget::ExclusiveReadMemory32: + c.BL(prelude_info.exclusive_read_memory_32); + break; + case LinkTarget::ExclusiveReadMemory64: + c.BL(prelude_info.exclusive_read_memory_64); + break; + case LinkTarget::ExclusiveReadMemory128: + c.BL(prelude_info.exclusive_read_memory_128); + break; + case LinkTarget::WriteMemory8: + c.BL(prelude_info.write_memory_8); + break; + case LinkTarget::WriteMemory16: + c.BL(prelude_info.write_memory_16); + break; + case LinkTarget::WriteMemory32: + c.BL(prelude_info.write_memory_32); + break; + case LinkTarget::WriteMemory64: + c.BL(prelude_info.write_memory_64); + break; + case LinkTarget::WriteMemory128: + c.BL(prelude_info.write_memory_128); + break; + case LinkTarget::ExclusiveWriteMemory8: + c.BL(prelude_info.exclusive_write_memory_8); + break; + case LinkTarget::ExclusiveWriteMemory16: + c.BL(prelude_info.exclusive_write_memory_16); + break; + case LinkTarget::ExclusiveWriteMemory32: + c.BL(prelude_info.exclusive_write_memory_32); + break; + case LinkTarget::ExclusiveWriteMemory64: + c.BL(prelude_info.exclusive_write_memory_64); + break; + case LinkTarget::ExclusiveWriteMemory128: + c.BL(prelude_info.exclusive_write_memory_128); + break; + case LinkTarget::CallSVC: + c.BL(prelude_info.call_svc); + break; + case LinkTarget::ExceptionRaised: + c.BL(prelude_info.exception_raised); + break; + case LinkTarget::InstructionSynchronizationBarrierRaised: + c.BL(prelude_info.isb_raised); + break; + case LinkTarget::InstructionCacheOperationRaised: + c.BL(prelude_info.ic_raised); + break; + case LinkTarget::DataCacheOperationRaised: + c.BL(prelude_info.dc_raised); + break; + case LinkTarget::AddTicks: + c.BL(prelude_info.add_ticks); + break; + case LinkTarget::GetTicksRemaining: + c.BL(prelude_info.get_ticks_remaining); + break; + default: + ASSERT_FALSE("Invalid relocation target"); + } + } + + for (auto [target_descriptor, list] : block_info.block_relocations) { + block_references[target_descriptor.Value()].emplace(block_descriptor.Value()); + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list); + } +} + +void A64AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) { + for (auto block_descriptor : block_references[target_descriptor.Value()]) { + if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) { + const EmittedBlockInfo& block_info = iter->second; + + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]); + + mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + } + } +} + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_jitstate.h b/src/dynarmic/backend/arm64/a64_jitstate.h index 1b011295..223a36c9 100644 --- a/src/dynarmic/backend/arm64/a64_jitstate.h +++ b/src/dynarmic/backend/arm64/a64_jitstate.h @@ -20,8 +20,6 @@ struct A64JitState { u32 cpsr_nzcv = 0; - u32 upper_location_descriptor; - alignas(16) std::array vec{}; // Extension registers. u32 exclusive_state = 0; @@ -30,7 +28,9 @@ struct A64JitState { u32 fpcr = 0; IR::LocationDescriptor GetLocationDescriptor() const { - return IR::LocationDescriptor{pc}; + const u64 fpcr_u64 = static_cast(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift; + const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask; + return IR::LocationDescriptor{pc_u64 | fpcr_u64}; } }; diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index 1c5adf3f..72124aa4 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -47,21 +47,27 @@ enum class LinkTarget { ReadMemory16, ReadMemory32, ReadMemory64, + ReadMemory128, ExclusiveReadMemory8, ExclusiveReadMemory16, ExclusiveReadMemory32, ExclusiveReadMemory64, + ExclusiveReadMemory128, WriteMemory8, WriteMemory16, WriteMemory32, WriteMemory64, + WriteMemory128, ExclusiveWriteMemory8, ExclusiveWriteMemory16, ExclusiveWriteMemory32, ExclusiveWriteMemory64, + ExclusiveWriteMemory128, CallSVC, ExceptionRaised, InstructionSynchronizationBarrierRaised, + InstructionCacheOperationRaised, + DataCacheOperationRaised, AddTicks, GetTicksRemaining, };