From 4a4b00f0b121ad6e776dfb44aabb0ed6d9676374 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 11:13:29 -0500 Subject: [PATCH 01/47] Add headers --- .../backend/arm64/a64_address_space.h | 94 +++++++++++ src/dynarmic/backend/arm64/a64_core.h | 30 ++++ src/dynarmic/backend/arm64/a64_interface.cpp | 153 +++++++++++++++++- src/dynarmic/backend/arm64/a64_jitstate.h | 37 +++++ 4 files changed, 312 insertions(+), 2 deletions(-) create mode 100644 src/dynarmic/backend/arm64/a64_address_space.h create mode 100644 src/dynarmic/backend/arm64/a64_core.h create mode 100644 src/dynarmic/backend/arm64/a64_jitstate.h diff --git a/src/dynarmic/backend/arm64/a64_address_space.h b/src/dynarmic/backend/arm64/a64_address_space.h new file mode 100644 index 00000000..b810107c --- /dev/null +++ b/src/dynarmic/backend/arm64/a64_address_space.h @@ -0,0 +1,94 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/halt_reason.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/location_descriptor.h" + +namespace Dynarmic::Backend::Arm64 { + +struct A64JitState; + +class A64AddressSpace final { +public: + explicit A64AddressSpace(const A64::UserConfig& conf); + + IR::Block GenerateIR(IR::LocationDescriptor) const; + + CodePtr Get(IR::LocationDescriptor descriptor); + + CodePtr GetOrEmit(IR::LocationDescriptor descriptor); + + void ClearCache(); + +private: + friend class A64Core; + + void EmitPrelude(); + + size_t GetRemainingSize(); + EmittedBlockInfo Emit(IR::Block ir_block); + void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block); + void RelinkForDescriptor(IR::LocationDescriptor target_descriptor); + + const A64::UserConfig conf; + + oaknut::CodeBlock mem; + oaknut::CodeGenerator code; + + tsl::robin_map block_entries; + tsl::robin_map block_infos; + tsl::robin_map> block_references; + + struct PreludeInfo { + u32* end_of_prelude; + + using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, A64JitState* context, volatile u32* halt_reason); + RunCodeFuncType run_code; + RunCodeFuncType step_code; + void* return_to_dispatcher; + void* return_from_run_code; + + void* read_memory_8; + void* read_memory_16; + void* read_memory_32; + void* read_memory_64; + void* read_memory_128; + void* exclusive_read_memory_8; + void* exclusive_read_memory_16; + void* exclusive_read_memory_32; + void* exclusive_read_memory_64; + void* exclusive_read_memory_128; + void* write_memory_8; + void* write_memory_16; + void* write_memory_32; + void* write_memory_64; + void* write_memory_128; + void* exclusive_write_memory_8; + void* exclusive_write_memory_16; + void* exclusive_write_memory_32; + void* exclusive_write_memory_64; + void* exclusive_write_memory_128; + void* call_svc; + void* exception_raised; + void* dc_raised; + void* ic_raised; + void* isb_raised; + void* add_ticks; + void* get_ticks_remaining; + } prelude_info; +}; + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_core.h b/src/dynarmic/backend/arm64/a64_core.h new file mode 100644 index 00000000..24fbb66b --- /dev/null +++ b/src/dynarmic/backend/arm64/a64_core.h @@ -0,0 +1,30 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include "dynarmic/backend/arm64/a64_address_space.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" + +namespace Dynarmic::Backend::Arm64 { + +class A64Core final { +public: + explicit A64Core(const A64::UserConfig&) {} + + HaltReason Run(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) { + const auto location_descriptor = thread_ctx.GetLocationDescriptor(); + const auto entry_point = process.GetOrEmit(location_descriptor); + return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason); + } + + HaltReason Step(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) { + const auto location_descriptor = A64::LocationDescriptor{thread_ctx.GetLocationDescriptor()}.SetSingleStepping(true); + const auto entry_point = process.GetOrEmit(location_descriptor); + return process.prelude_info.step_code(entry_point, &thread_ctx, halt_reason); + } +}; + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_interface.cpp b/src/dynarmic/backend/arm64/a64_interface.cpp index 36ffbdc4..ca1dacab 100644 --- a/src/dynarmic/backend/arm64/a64_interface.cpp +++ b/src/dynarmic/backend/arm64/a64_interface.cpp @@ -1,5 +1,5 @@ /* This file is part of the dynarmic project. - * Copyright (c) 2021 MerryMage + * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD */ @@ -11,13 +11,162 @@ #include #include +#include "dynarmic/backend/arm64/a64_address_space.h" +#include "dynarmic/backend/arm64/a64_core.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" #include "dynarmic/common/atomic.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/interface/A64/config.h" namespace Dynarmic::A64 { -struct Jit::Impl {}; +using namespace Backend::Arm64; + +struct Jit::Impl final { + Impl(Jit* jit_interface, A64::UserConfig conf) + : jit_interface(jit_interface) + , conf(conf) + , current_address_space(conf) + , core(conf) {} + + HaltReason Run() { + ASSERT(!is_executing); + PerformRequestedCacheInvalidation(); + + is_executing = true; + SCOPE_EXIT { + is_executing = false; + }; + + HaltReason hr = core.Run(current_address_space, current_state, &halt_reason); + + PerformRequestedCacheInvalidation(); + + return hr; + } + + HaltReason Step() { + ASSERT(!is_executing); + PerformRequestedCacheInvalidation(); + + is_executing = true; + SCOPE_EXIT { + is_executing = false; + }; + + HaltReason hr = core.Step(current_address_space, current_state, &halt_reason); + + PerformRequestedCacheInvalidation(); + + return hr; + } + + void ClearCache() { + std::unique_lock lock{invalidation_mutex}; + invalidate_entire_cache = true; + HaltExecution(HaltReason::CacheInvalidation); + } + + void InvalidateCacheRange(std::uint64_t start_address, std::size_t length) { + std::unique_lock lock{invalidation_mutex}; + invalid_cache_ranges.add(boost::icl::discrete_interval::closed(start_address, start_address + length - 1)); + HaltExecution(HaltReason::CacheInvalidation); + } + + void Reset() { + current_state = {}; + } + + void HaltExecution(HaltReason hr) { + Atomic::Or(&halt_reason, static_cast(hr)); + } + + void ClearHalt(HaltReason hr) { + Atomic::And(&halt_reason, ~static_cast(hr)); + } + + std::array& Regs() { + return current_state.reg; + } + + const std::array& Regs() const { + return current_state.reg; + } + + std::array& VecRegs() { + return current_state.vec; + } + + const std::array& VecRegs() const { + return current_state.vec; + } + + std::uint32_t Fpcr() const { + return current_state.fpcr; + } + + void SetFpcr(std::uint32_t value) { + current_state.fpcr = value; + } + + std::uint32_t Fpsr() const { + return current_state.fpsr; + } + + void SetFpscr(std::uint32_t value) { + current_state.fpsr = value; + } + + std::uint32_t Pstate() const { + return current_state.cpsr_nzcv; + } + + void SetPstate(std::uint32_t value) { + current_state.cpsr_nzcv = value; + } + + void ClearExclusiveState() { + current_state.exclusive_state = false; + } + + void DumpDisassembly() const { + ASSERT_FALSE("Unimplemented"); + } + +private: + void PerformRequestedCacheInvalidation() { + ClearHalt(HaltReason::CacheInvalidation); + + if (invalidate_entire_cache) { + current_address_space.ClearCache(); + + invalidate_entire_cache = false; + invalid_cache_ranges.clear(); + return; + } + + if (!invalid_cache_ranges.empty()) { + // TODO: Optimize + current_address_space.ClearCache(); + + invalid_cache_ranges.clear(); + return; + } + } + + Jit* jit_interface; + A64::UserConfig conf; + A64JitState current_state{}; + A64AddressSpace current_address_space; + A64Core core; + + volatile u32 halt_reason = 0; + + std::mutex invalidation_mutex; + boost::icl::interval_set invalid_cache_ranges; + bool invalidate_entire_cache = false; + bool is_executing = false; +}; Jit::Jit(UserConfig conf) { (void)conf; diff --git a/src/dynarmic/backend/arm64/a64_jitstate.h b/src/dynarmic/backend/arm64/a64_jitstate.h new file mode 100644 index 00000000..1b011295 --- /dev/null +++ b/src/dynarmic/backend/arm64/a64_jitstate.h @@ -0,0 +1,37 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include + +#include + +#include "dynarmic/frontend/A64/a64_location_descriptor.h" + +namespace Dynarmic::Backend::Arm64 { + +struct A64JitState { + std::array reg{}; + u64 sp = 0; + u64 pc = 0; + + u32 cpsr_nzcv = 0; + + u32 upper_location_descriptor; + + alignas(16) std::array vec{}; // Extension registers. + + u32 exclusive_state = 0; + + u32 fpsr = 0; + u32 fpcr = 0; + + IR::LocationDescriptor GetLocationDescriptor() const { + return IR::LocationDescriptor{pc}; + } +}; + +} // namespace Dynarmic::Backend::Arm64 From 6291896fbf906286c41a518afc3d0db37aaf2d1d Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 11:39:16 -0500 Subject: [PATCH 02/47] Add a64_address_space --- .../backend/arm64/a64_address_space.cpp | 469 ++++++++++++++++++ src/dynarmic/backend/arm64/a64_jitstate.h | 6 +- src/dynarmic/backend/arm64/emit_arm64.h | 6 + 3 files changed, 478 insertions(+), 3 deletions(-) create mode 100644 src/dynarmic/backend/arm64/a64_address_space.cpp diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp new file mode 100644 index 00000000..576ad17e --- /dev/null +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -0,0 +1,469 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/backend/arm64/a64_address_space.h" + +#include "dynarmic/backend/arm64/a64_jitstate.h" +#include "dynarmic/backend/arm64/abi.h" +#include "dynarmic/backend/arm64/devirtualize.h" +#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/backend/arm64/stack_layout.h" +#include "dynarmic/common/cast_util.h" +#include "dynarmic/common/fp/fpcr.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/exclusive_monitor.h" +#include "dynarmic/ir/opt/passes.h" + +namespace Dynarmic::Backend::Arm64 { + +template +static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize(this_); + + oaknut::Label l_addr, l_this; + + void* target = code.ptr(); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + +template +static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr) -> T { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { + return (conf.callbacks->*callback)(vaddr); + }); + }; + + void* target = code.ptr(); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + +template +static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }; + + void* target = code.ptr(); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + +A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf) + : conf(conf) + , mem(conf.code_cache_size) + , code(mem.ptr()) { + EmitPrelude(); +} + +IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { + const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; + IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, + {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); + + Optimization::A64CallbackConfigPass(ir_block, conf); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { + Optimization::A64GetSetElimination(ir_block); + Optimization::DeadCodeElimination(ir_block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) { + Optimization::ConstantPropagation(ir_block); + Optimization::DeadCodeElimination(ir_block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { + Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); + } + Optimization::VerificationPass(ir_block); + + return ir_block; +} + +CodePtr A64AddressSpace::Get(IR::LocationDescriptor descriptor) { + if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) { + return iter->second; + } + return nullptr; +} + +CodePtr A64AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) { + if (CodePtr block_entry = Get(descriptor)) { + return block_entry; + } + + IR::Block ir_block = GenerateIR(descriptor); + const EmittedBlockInfo block_info = Emit(std::move(ir_block)); + + block_infos.insert_or_assign(descriptor.Value(), block_info); + block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point); + return block_info.entry_point; +} + +void A64AddressSpace::ClearCache() { + block_entries.clear(); + block_infos.clear(); + block_references.clear(); + code.set_ptr(prelude_info.end_of_prelude); +} + +void A64AddressSpace::EmitPrelude() { + using namespace oaknut::util; + + mem.unprotect(); + + prelude_info.read_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks); + prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks); + prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks); + prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks); + prelude_info.read_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead128>(code, conf.callbacks); + prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf); + prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf); + prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf); + prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf); + prelude_info.exclusive_read_memory_128 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead128, Vector>(code, conf); + prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks); + prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks); + prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks); + prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks); + prelude_info.write_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite128>(code, conf.callbacks); + prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf); + prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf); + prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf); + prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf); + prelude_info.exclusive_write_memory_128 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, Vector>(code, conf); + prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks); + prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks); + prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks); + prelude_info.ic_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionCacheOperationRaised>(code, conf.callbacks); + prelude_info.dc_raised = EmitCallTrampoline<&A64::UserCallbacks::DataCacheOperationRaised>(code, conf.callbacks); + prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks); + prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks); + + oaknut::Label return_from_run_code; + + prelude_info.run_code = code.ptr(); + { + ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + + code.MOV(X19, X0); + code.MOV(Xstate, X1); + code.MOV(Xhalt, X2); + + if (conf.enable_cycle_counting) { + code.BL(prelude_info.get_ticks_remaining); + code.MOV(Xticks, X0); + code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run)); + } + + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); + code.MRS(Xscratch1, oaknut::SystemReg::FPCR); + code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, return_from_run_code); + + code.BR(X19); + } + + prelude_info.step_code = code.ptr(); + { + ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + + code.MOV(X19, X0); + code.MOV(Xstate, X1); + code.MOV(Xhalt, X2); + + if (conf.enable_cycle_counting) { + code.MOV(Xticks, 1); + code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run)); + } + + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); + code.MRS(Xscratch1, oaknut::SystemReg::FPCR); + code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + + oaknut::Label step_hr_loop; + code.l(step_hr_loop); + code.LDAXR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, return_from_run_code); + code.ORR(Wscratch0, Wscratch0, static_cast(HaltReason::Step)); + code.STLXR(Wscratch1, Wscratch0, Xhalt); + code.CBNZ(Wscratch1, step_hr_loop); + + code.BR(X19); + } + + prelude_info.return_to_dispatcher = code.ptr(); + { + oaknut::Label l_this, l_addr; + + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, return_from_run_code); + + if (conf.enable_cycle_counting) { + code.CMP(Xticks, 0); + code.B(LE, return_from_run_code); + } + + code.LDR(X0, l_this); + code.MOV(X1, Xstate); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.BR(X0); + + const auto fn = [](A64AddressSpace& self, A64JitState& context) -> CodePtr { + return self.GetOrEmit(context.GetLocationDescriptor()); + }; + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(this)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + } + + prelude_info.return_from_run_code = code.ptr(); + { + code.l(return_from_run_code); + + if (conf.enable_cycle_counting) { + code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run)); + code.SUB(X1, X1, Xticks); + code.BL(prelude_info.add_ticks); + } + + code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + + oaknut::Label exit_hr_loop; + code.l(exit_hr_loop); + code.LDAXR(W0, Xhalt); + code.STLXR(Wscratch0, WZR, Xhalt); + code.CBNZ(Wscratch0, exit_hr_loop); + + ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + code.RET(); + } + + prelude_info.end_of_prelude = code.ptr(); + + mem.invalidate_all(); + mem.protect(); +} + +size_t A64AddressSpace::GetRemainingSize() { + return conf.code_cache_size - (code.ptr() - reinterpret_cast(mem.ptr())); +} + +EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) { + if (GetRemainingSize() < 1024 * 1024) { + ClearCache(); + } + + mem.unprotect(); + + const EmitConfig emit_conf{ + .hook_isb = conf.hook_isb, + .enable_cycle_counting = conf.enable_cycle_counting, + .always_little_endian = true, + .descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); }, + .state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv), + .state_fpsr_offset = offsetof(A64JitState, fpsr), + .coprocessors{}, + .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, + }; + EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); + + Link(block.Location(), block_info); + + mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + + RelinkForDescriptor(block.Location()); + + mem.protect(); + + return block_info; +} + +static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector& block_relocations_list) { + using namespace oaknut; + using namespace oaknut::util; + + for (auto [ptr_offset] : block_relocations_list) { + CodeGenerator c{reinterpret_cast(entry_point + ptr_offset)}; + + if (target_ptr) { + c.B((void*)target_ptr); + } else { + c.NOP(); + } + } +} + +void A64AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) { + using namespace oaknut; + using namespace oaknut::util; + + for (auto [ptr_offset, target] : block_info.relocations) { + CodeGenerator c{reinterpret_cast(block_info.entry_point + ptr_offset)}; + + switch (target) { + case LinkTarget::ReturnToDispatcher: + c.B(prelude_info.return_to_dispatcher); + break; + case LinkTarget::ReturnFromRunCode: + c.B(prelude_info.return_from_run_code); + break; + case LinkTarget::ReadMemory8: + c.BL(prelude_info.read_memory_8); + break; + case LinkTarget::ReadMemory16: + c.BL(prelude_info.read_memory_16); + break; + case LinkTarget::ReadMemory32: + c.BL(prelude_info.read_memory_32); + break; + case LinkTarget::ReadMemory64: + c.BL(prelude_info.read_memory_64); + break; + case LinkTarget::ReadMemory128: + c.BL(prelude_info.read_memory_128); + break; + case LinkTarget::ExclusiveReadMemory8: + c.BL(prelude_info.exclusive_read_memory_8); + break; + case LinkTarget::ExclusiveReadMemory16: + c.BL(prelude_info.exclusive_read_memory_16); + break; + case LinkTarget::ExclusiveReadMemory32: + c.BL(prelude_info.exclusive_read_memory_32); + break; + case LinkTarget::ExclusiveReadMemory64: + c.BL(prelude_info.exclusive_read_memory_64); + break; + case LinkTarget::ExclusiveReadMemory128: + c.BL(prelude_info.exclusive_read_memory_128); + break; + case LinkTarget::WriteMemory8: + c.BL(prelude_info.write_memory_8); + break; + case LinkTarget::WriteMemory16: + c.BL(prelude_info.write_memory_16); + break; + case LinkTarget::WriteMemory32: + c.BL(prelude_info.write_memory_32); + break; + case LinkTarget::WriteMemory64: + c.BL(prelude_info.write_memory_64); + break; + case LinkTarget::WriteMemory128: + c.BL(prelude_info.write_memory_128); + break; + case LinkTarget::ExclusiveWriteMemory8: + c.BL(prelude_info.exclusive_write_memory_8); + break; + case LinkTarget::ExclusiveWriteMemory16: + c.BL(prelude_info.exclusive_write_memory_16); + break; + case LinkTarget::ExclusiveWriteMemory32: + c.BL(prelude_info.exclusive_write_memory_32); + break; + case LinkTarget::ExclusiveWriteMemory64: + c.BL(prelude_info.exclusive_write_memory_64); + break; + case LinkTarget::ExclusiveWriteMemory128: + c.BL(prelude_info.exclusive_write_memory_128); + break; + case LinkTarget::CallSVC: + c.BL(prelude_info.call_svc); + break; + case LinkTarget::ExceptionRaised: + c.BL(prelude_info.exception_raised); + break; + case LinkTarget::InstructionSynchronizationBarrierRaised: + c.BL(prelude_info.isb_raised); + break; + case LinkTarget::InstructionCacheOperationRaised: + c.BL(prelude_info.ic_raised); + break; + case LinkTarget::DataCacheOperationRaised: + c.BL(prelude_info.dc_raised); + break; + case LinkTarget::AddTicks: + c.BL(prelude_info.add_ticks); + break; + case LinkTarget::GetTicksRemaining: + c.BL(prelude_info.get_ticks_remaining); + break; + default: + ASSERT_FALSE("Invalid relocation target"); + } + } + + for (auto [target_descriptor, list] : block_info.block_relocations) { + block_references[target_descriptor.Value()].emplace(block_descriptor.Value()); + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list); + } +} + +void A64AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) { + for (auto block_descriptor : block_references[target_descriptor.Value()]) { + if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) { + const EmittedBlockInfo& block_info = iter->second; + + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]); + + mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + } + } +} + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_jitstate.h b/src/dynarmic/backend/arm64/a64_jitstate.h index 1b011295..223a36c9 100644 --- a/src/dynarmic/backend/arm64/a64_jitstate.h +++ b/src/dynarmic/backend/arm64/a64_jitstate.h @@ -20,8 +20,6 @@ struct A64JitState { u32 cpsr_nzcv = 0; - u32 upper_location_descriptor; - alignas(16) std::array vec{}; // Extension registers. u32 exclusive_state = 0; @@ -30,7 +28,9 @@ struct A64JitState { u32 fpcr = 0; IR::LocationDescriptor GetLocationDescriptor() const { - return IR::LocationDescriptor{pc}; + const u64 fpcr_u64 = static_cast(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift; + const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask; + return IR::LocationDescriptor{pc_u64 | fpcr_u64}; } }; diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index 1c5adf3f..72124aa4 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -47,21 +47,27 @@ enum class LinkTarget { ReadMemory16, ReadMemory32, ReadMemory64, + ReadMemory128, ExclusiveReadMemory8, ExclusiveReadMemory16, ExclusiveReadMemory32, ExclusiveReadMemory64, + ExclusiveReadMemory128, WriteMemory8, WriteMemory16, WriteMemory32, WriteMemory64, + WriteMemory128, ExclusiveWriteMemory8, ExclusiveWriteMemory16, ExclusiveWriteMemory32, ExclusiveWriteMemory64, + ExclusiveWriteMemory128, CallSVC, ExceptionRaised, InstructionSynchronizationBarrierRaised, + InstructionCacheOperationRaised, + DataCacheOperationRaised, AddTicks, GetTicksRemaining, }; From 4ebc32e1e48e9effe737eee2e30b28d04a4a34d4 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 11:48:42 -0500 Subject: [PATCH 03/47] Add more to interface --- src/dynarmic/backend/arm64/a64_interface.cpp | 94 +++++++++++++------- 1 file changed, 60 insertions(+), 34 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_interface.cpp b/src/dynarmic/backend/arm64/a64_interface.cpp index ca1dacab..715f11a3 100644 --- a/src/dynarmic/backend/arm64/a64_interface.cpp +++ b/src/dynarmic/backend/arm64/a64_interface.cpp @@ -85,6 +85,22 @@ struct Jit::Impl final { Atomic::And(&halt_reason, ~static_cast(hr)); } + std::uint64_t PC() const { + return current_state.pc; + } + + void SetPC(std::uint64_t value) { + current_state.pc = value; + } + + std::uint64_t SP() const { + return current_state.sp; + } + + void SetSP(std::uint64_t value) { + current_state.sp = value; + } + std::array& Regs() { return current_state.reg; } @@ -113,7 +129,7 @@ struct Jit::Impl final { return current_state.fpsr; } - void SetFpscr(std::uint32_t value) { + void SetFpsr(std::uint32_t value) { current_state.fpsr = value; } @@ -129,10 +145,18 @@ struct Jit::Impl final { current_state.exclusive_state = false; } + bool IsExecuting() const { + return is_executing; + } + void DumpDisassembly() const { ASSERT_FALSE("Unimplemented"); } + std::vector Disassemble() const { + ASSERT_FALSE("Unimplemented"); + } + private: void PerformRequestedCacheInvalidation() { ClearHalt(HaltReason::CacheInvalidation); @@ -168,128 +192,130 @@ private: bool is_executing = false; }; -Jit::Jit(UserConfig conf) { - (void)conf; +Jit::Jit(UserConfig conf) : impl{std::make_unique(this, conf)} { } Jit::~Jit() = default; HaltReason Jit::Run() { - ASSERT_FALSE("not implemented"); + return impl->Run(); } HaltReason Jit::Step() { - ASSERT_FALSE("not implemented"); + return impl->Step(); } void Jit::ClearCache() { + impl->ClearCache(); } void Jit::InvalidateCacheRange(std::uint64_t start_address, std::size_t length) { - (void)start_address; - (void)length; + impl->InvalidateCacheRange(start_address, length); } void Jit::Reset() { + impl->Reset(); } void Jit::HaltExecution(HaltReason hr) { - (void)hr; + impl->HaltExecution(hr); } void Jit::ClearHalt(HaltReason hr) { - (void)hr; + impl->ClearHalt(hr); } std::uint64_t Jit::GetSP() const { - return 0; + return impl->SP(); } void Jit::SetSP(std::uint64_t value) { - (void)value; + impl->SetSP(value); } std::uint64_t Jit::GetPC() const { - return 0; + return impl->PC(); } void Jit::SetPC(std::uint64_t value) { - (void)value; + impl->SetPC(value); } std::uint64_t Jit::GetRegister(std::size_t index) const { - (void)index; - return 0; + return impl->Regs()[index]; } void Jit::SetRegister(size_t index, std::uint64_t value) { - (void)index; - (void)value; + impl->Regs()[index] = value; } std::array Jit::GetRegisters() const { - return {}; + return impl->Regs(); } void Jit::SetRegisters(const std::array& value) { - (void)value; + impl->Regs() = value; } Vector Jit::GetVector(std::size_t index) const { - (void)index; - return {}; + auto& vec = impl->VecRegs(); + return {vec[index], vec[index + 1]}; } void Jit::SetVector(std::size_t index, Vector value) { - (void)index; - (void)value; + auto& vec = impl->VecRegs(); + vec[index] = value[0]; + vec[index + 1] = value[1]; } std::array Jit::GetVectors() const { - return {}; + std::array ret; + std::memcpy(ret.data(), impl->VecRegs().data(), sizeof(ret)); + return ret; } void Jit::SetVectors(const std::array& value) { - (void)value; + std::memcpy(impl->VecRegs().data(), value.data(), sizeof(value)); } std::uint32_t Jit::GetFpcr() const { - return 0; + return impl->Fpcr(); } void Jit::SetFpcr(std::uint32_t value) { - (void)value; + impl->SetFpcr(value); } std::uint32_t Jit::GetFpsr() const { - return 0; + return impl->Fpsr(); } void Jit::SetFpsr(std::uint32_t value) { - (void)value; + impl->SetFpsr(value); } std::uint32_t Jit::GetPstate() const { - return 0; + return impl->Pstate(); } void Jit::SetPstate(std::uint32_t value) { - (void)value; + impl->SetPstate(value); } void Jit::ClearExclusiveState() { + impl->ClearExclusiveState(); } bool Jit::IsExecuting() const { - return false; + return impl->IsExecuting(); } void Jit::DumpDisassembly() const { - ASSERT_FALSE("not implemented"); + impl->DumpDisassembly(); } std::vector Jit::Disassemble() const { - ASSERT_FALSE("not implemented"); + impl->Disassemble(); } } // namespace Dynarmic::A64 From a8cb2c33f68591daaa35513c56befa7b94465eca Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 11:56:34 -0500 Subject: [PATCH 04/47] Add a64 memory --- .../backend/arm64/emit_arm64_a64_memory.cpp | 172 +++++++++--------- 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp index 1251a035..75fe7335 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp @@ -5,11 +5,12 @@ #include -#include "dynarmic/backend/arm64/a32_jitstate.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" #include "dynarmic/backend/arm64/reg_alloc.h" +#include "dynarmic/ir/acc_type.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -18,172 +19,171 @@ namespace Dynarmic::Backend::Arm64 { using namespace oaknut::util; +static bool IsOrdered(IR::AccType acctype) { + return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED; +} + +static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } +} + +static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + code.MOV(Wscratch0, 1); + code.STRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state)); + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } +} + +static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + const bool ordered = IsOrdered(args[3].GetImmediateAccType()); + + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } +} + +static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + const bool ordered = IsOrdered(args[3].GetImmediateAccType()); + + oaknut::Label end; + + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.LDRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state)); + code.CBZ(Wscratch0, end); + code.STRB(WZR, Xstate, offsetof(A64JitState, exclusive_state)); + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.l(end); +} + template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) { + code.STR(WZR, Xstate, offsetof(A64JitState, exclusive_state)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory8); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory16); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory32); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory64); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory128); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory8); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory16); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory32); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory64); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory128); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory8); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory16); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory32); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory64); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory128); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory8); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory16); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory32); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory64); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory128); } } // namespace Dynarmic::Backend::Arm64 From 803743488a2b2a860cadfcb1ae862b06f39abf0c Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 13:48:31 -0500 Subject: [PATCH 05/47] Add emit_arm64_a64 --- .../backend/arm64/a32_address_space.cpp | 5 + .../backend/arm64/a64_address_space.cpp | 17 +- .../backend/arm64/a64_address_space.h | 1 + src/dynarmic/backend/arm64/emit_arm64.h | 6 + src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 360 +++++++++++------- src/dynarmic/interface/A64/config.h | 2 +- 6 files changed, 241 insertions(+), 150 deletions(-) diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index 01b78938..306db097 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -303,6 +303,11 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { mem.unprotect(); const EmitConfig emit_conf{ + .tpidr_el0{}, + .tpidrro_el0{}, + .cntfreq_el0{}, + .dczid_el0{}, + .ctr_el0{}, .hook_isb = conf.hook_isb, .enable_cycle_counting = conf.enable_cycle_counting, .always_little_endian = conf.always_little_endian, diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index 576ad17e..f59e663d 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -182,6 +182,7 @@ void A64AddressSpace::EmitPrelude() { prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks); prelude_info.ic_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionCacheOperationRaised>(code, conf.callbacks); prelude_info.dc_raised = EmitCallTrampoline<&A64::UserCallbacks::DataCacheOperationRaised>(code, conf.callbacks); + prelude_info.get_cntpct = EmitCallTrampoline<&A64::UserCallbacks::GetCNTPCT>(code, conf.callbacks); prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks); prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks); @@ -201,10 +202,12 @@ void A64AddressSpace::EmitPrelude() { code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run)); } - code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); code.MRS(Xscratch1, oaknut::SystemReg::FPCR); code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); + code.LDR(Wscratch1, Xstate, offsetof(A64JitState, fpsr)); code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + code.MSR(oaknut::SystemReg::FPSR, Xscratch1); code.LDAR(Wscratch0, Xhalt); code.CBNZ(Wscratch0, return_from_run_code); @@ -225,10 +228,12 @@ void A64AddressSpace::EmitPrelude() { code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run)); } - code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); code.MRS(Xscratch1, oaknut::SystemReg::FPCR); code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); + code.LDR(Wscratch1, Xstate, offsetof(A64JitState, fpsr)); code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + code.MSR(oaknut::SystemReg::FPSR, Xscratch1); oaknut::Label step_hr_loop; code.l(step_hr_loop); @@ -311,6 +316,11 @@ EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) { mem.unprotect(); const EmitConfig emit_conf{ + .tpidr_el0 = conf.tpidr_el0, + .tpidrro_el0 = conf.tpidrro_el0, + .cntfreq_el0 = conf.cntfrq_el0, + .dczid_el0 = conf.dczid_el0, + .ctr_el0 = conf.ctr_el0, .hook_isb = conf.hook_isb, .enable_cycle_counting = conf.enable_cycle_counting, .always_little_endian = true, @@ -437,6 +447,9 @@ void A64AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlock case LinkTarget::DataCacheOperationRaised: c.BL(prelude_info.dc_raised); break; + case LinkTarget::GetCNTPCT: + c.BL(prelude_info.get_cntpct); + break; case LinkTarget::AddTicks: c.BL(prelude_info.add_ticks); break; diff --git a/src/dynarmic/backend/arm64/a64_address_space.h b/src/dynarmic/backend/arm64/a64_address_space.h index b810107c..9ae78ad8 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.h +++ b/src/dynarmic/backend/arm64/a64_address_space.h @@ -86,6 +86,7 @@ private: void* dc_raised; void* ic_raised; void* isb_raised; + void* get_cntpct; void* add_ticks; void* get_ticks_remaining; } prelude_info; diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index 72124aa4..290230ac 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -68,6 +68,7 @@ enum class LinkTarget { InstructionSynchronizationBarrierRaised, InstructionCacheOperationRaised, DataCacheOperationRaised, + GetCNTPCT, AddTicks, GetTicksRemaining, }; @@ -89,6 +90,11 @@ struct EmittedBlockInfo { }; struct EmitConfig { + u64* tpidr_el0; + const u64* tpidrro_el0; + u64 cntfreq_el0; + u32 dczid_el0; + u32 ctr_el0; bool hook_isb; bool enable_cycle_counting; bool always_little_endian; diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index 4bd2e270..6da15076 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -4,8 +4,9 @@ */ #include +#include -#include "dynarmic/backend/arm64/a32_jitstate.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" @@ -18,292 +19,357 @@ namespace Dynarmic::Backend::Arm64 { using namespace oaknut::util; +static constexpr int nzcv_c_flag_shift = 29; + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { + code.MOV(Wscratch0, 1); + code.STRB(Wscratch0, SP, offsetof(StackLayout, check_bit)); + } else { + code.STRB(WZR, SP, offsetof(StackLayout, check_bit)); + } + } else { + auto Wbit = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wbit); + code.STRB(Wbit, SP, offsetof(StackLayout, check_bit)); + } } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wresult = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wresult); + code.LDR(Wresult, Xstate, offsetof(A64JitState, cpsr_nzcv)); + code.UBFX(Wresult, Wresult, nzcv_c_flag_shift, 1); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wnzcv = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wnzcv); + + code.LDR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wnzcv = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wnzcv); + + code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wnzcv = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wnzcv); + + code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wresult); + + // TODO: Detect if Gpr vs Fpr is more appropriate + + code.LDR(Wresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xresult); + + // TODO: Detect if Gpr vs Fpr is more appropriate + + code.LDR(Xresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Sresult = ctx.reg_alloc.WriteS(inst); + RegAlloc::Realize(Sresult); + code.LDR(Sresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2 * static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Dresult = ctx.reg_alloc.WriteD(inst); + RegAlloc::Realize(Dresult); + code.LDR(Dresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2 * static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Qresult = ctx.reg_alloc.WriteQ(inst); + RegAlloc::Realize(Qresult); + code.LDR(Qresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2 * static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xresult); + + code.LDR(Xresult, Xstate, offsetof(A64JitState, sp)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wresult); + + code.LDR(Wresult, Xstate, offsetof(A64JitState, fpcr)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wresult); + + code.LDR(Wresult, Xstate, offsetof(A64JitState, fpsr)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Wvalue = ctx.reg_alloc.ReadW(args[1]); + RegAlloc::Realize(Wvalue); + + // TODO: Detect if Gpr vs Fpr is more appropriate + + code.STR(Wvalue, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Xvalue = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xvalue); + + // TODO: Detect if Gpr vs Fpr is more appropriate + + code.STR(Xvalue, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Svalue = ctx.reg_alloc.ReadS(args[1]); + RegAlloc::Realize(Svalue); + code.STR(Svalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2*static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Dvalue = ctx.reg_alloc.ReadD(args[1]); + RegAlloc::Realize(Dvalue); + code.STR(Dvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2*static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Qvalue = ctx.reg_alloc.ReadQ(args[1]); + RegAlloc::Realize(Qvalue); + code.STR(Qvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2*static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + RegAlloc::Realize(Xvalue); + code.STR(Xvalue, Xstate, offsetof(A64JitState, sp)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wvalue = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wvalue); + code.STR(Wvalue, Xstate, offsetof(A64JitState, fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Wvalue->toX()); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wvalue = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wvalue); + code.STR(Wvalue, Xstate, offsetof(A64JitState, fpsr)); + code.MSR(oaknut::SystemReg::FPSR, Wvalue->toX()); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + RegAlloc::Realize(Xvalue); + code.STR(Xvalue, Xstate, offsetof(A64JitState, pc)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(nullptr); + + if (ctx.conf.enable_cycle_counting) { + code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); + code.SUB(Xscratch0, Xscratch0, Xticks); + EmitRelocation(code, ctx, LinkTarget::AddTicks); + } + + code.MOV(W1, args[0].GetImmediateU32()); + EmitRelocation(code, ctx, LinkTarget::CallSVC); + + if (ctx.conf.enable_cycle_counting) { + EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining); + code.STR(X0, SP, offsetof(StackLayout, cycles_to_run)); + code.MOV(Xticks, X0); + } } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(nullptr); + + if (ctx.conf.enable_cycle_counting) { + code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); + code.SUB(Xscratch0, Xscratch0, Xticks); + EmitRelocation(code, ctx, LinkTarget::AddTicks); + } + + code.MOV(X1, args[0].GetImmediateU64()); + code.MOV(X2, args[1].GetImmediateU64()); + EmitRelocation(code, ctx, LinkTarget::ExceptionRaised); + + if (ctx.conf.enable_cycle_counting) { + EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining); + code.STR(X0, SP, offsetof(StackLayout, cycles_to_run)); + code.MOV(Xticks, X0); + } } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(nullptr, args[1], args[2]); + EmitRelocation(code, ctx, LinkTarget::DataCacheOperationRaised); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(nullptr, args[1], args[2]); + EmitRelocation(code, ctx, LinkTarget::InstructionCacheOperationRaised); } template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); +void EmitIR(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) { + code.DSB(oaknut::BarrierOp::SY); } template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); +void EmitIR(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) { + code.DMB(oaknut::BarrierOp::SY); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + if (!ctx.conf.hook_isb) { + return; + } + + ctx.reg_alloc.PrepareForCall(nullptr); + EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Xvalue = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xvalue); + code.MOV(Xvalue, ctx.conf.cntfreq_el0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + // FIXME: AddTicks / GetTicksRemaining + ctx.reg_alloc.PrepareForCall(inst); + EmitRelocation(code, ctx, LinkTarget::GetCNTPCT); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wvalue = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wvalue); + code.MOV(Wvalue, ctx.conf.ctr_el0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wvalue = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wvalue); + code.MOV(Wvalue, ctx.conf.dczid_el0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Xvalue = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xvalue); + code.MOV(Xscratch0, mcl::bit_cast(ctx.conf.tpidr_el0)); + code.LDR(Xvalue, Xscratch0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Xvalue = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xvalue); + code.MOV(Xscratch0, mcl::bit_cast(ctx.conf.tpidrro_el0)); + code.LDR(Xvalue, Xscratch0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + RegAlloc::Realize(Xvalue); + code.MOV(Xscratch0, mcl::bit_cast(ctx.conf.tpidrro_el0)); + code.STR(Xvalue, Xscratch0); } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/interface/A64/config.h b/src/dynarmic/interface/A64/config.h index 0b016eab..409e0048 100644 --- a/src/dynarmic/interface/A64/config.h +++ b/src/dynarmic/interface/A64/config.h @@ -198,7 +198,7 @@ struct UserConfig { /// Pointer to where TPIDR_EL0 is stored. This pointer will be inserted into /// emitted code. - const std::uint64_t* tpidr_el0 = nullptr; + std::uint64_t* tpidr_el0 = nullptr; /// Pointer to the page table which we can use for direct page table access. /// If an entry in page_table is null, the relevant memory callback will be called. From 263b7cf06b6b675c38f7343e2d1deeb7b1153a85 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 13:54:06 -0500 Subject: [PATCH 06/47] Redo target_sources --- src/dynarmic/CMakeLists.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 423de034..35335d70 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -406,14 +406,16 @@ elseif(ARCHITECTURE STREQUAL "arm64") backend/arm64/a32_address_space.h backend/arm64/a32_core.h backend/arm64/a32_interface.cpp - - # Move this to the list below when implemented - backend/arm64/a64_interface.cpp ) endif() if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") + target_sources(dynarmic PRIVATE + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp + ) endif() else() message(FATAL_ERROR "Unsupported architecture") From 5e9ca4b46c50c6845ce961b193128d99826d34c3 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 13:54:36 -0500 Subject: [PATCH 07/47] format --- src/dynarmic/backend/arm64/a64_interface.cpp | 3 ++- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 14 +++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_interface.cpp b/src/dynarmic/backend/arm64/a64_interface.cpp index 715f11a3..92827f16 100644 --- a/src/dynarmic/backend/arm64/a64_interface.cpp +++ b/src/dynarmic/backend/arm64/a64_interface.cpp @@ -192,7 +192,8 @@ private: bool is_executing = false; }; -Jit::Jit(UserConfig conf) : impl{std::make_unique(this, conf)} { +Jit::Jit(UserConfig conf) + : impl{std::make_unique(this, conf)} { } Jit::~Jit() = default; diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index 6da15076..bf7f6043 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -3,8 +3,8 @@ * SPDX-License-Identifier: 0BSD */ -#include #include +#include #include "dynarmic/backend/arm64/a64_jitstate.h" #include "dynarmic/backend/arm64/abi.h" @@ -104,7 +104,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); auto Sresult = ctx.reg_alloc.WriteS(inst); RegAlloc::Realize(Sresult); - code.LDR(Sresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2 * static_cast(vec)); + code.LDR(Sresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> @@ -112,7 +112,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); auto Dresult = ctx.reg_alloc.WriteD(inst); RegAlloc::Realize(Dresult); - code.LDR(Dresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2 * static_cast(vec)); + code.LDR(Dresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> @@ -120,7 +120,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); auto Qresult = ctx.reg_alloc.WriteQ(inst); RegAlloc::Realize(Qresult); - code.LDR(Qresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2 * static_cast(vec)); + code.LDR(Qresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> @@ -187,7 +187,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); auto Svalue = ctx.reg_alloc.ReadS(args[1]); RegAlloc::Realize(Svalue); - code.STR(Svalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2*static_cast(vec)); + code.STR(Svalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> @@ -196,7 +196,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); auto Dvalue = ctx.reg_alloc.ReadD(args[1]); RegAlloc::Realize(Dvalue); - code.STR(Dvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2*static_cast(vec)); + code.STR(Dvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> @@ -205,7 +205,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); auto Qvalue = ctx.reg_alloc.ReadQ(args[1]); RegAlloc::Realize(Qvalue); - code.STR(Qvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64)*2*static_cast(vec)); + code.STR(Qvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> From cdd658935ca98dd4027c6810e0c6beaf016c9157 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 14:01:16 -0500 Subject: [PATCH 08/47] Fix compile --- src/dynarmic/backend/arm64/a64_address_space.cpp | 4 ++-- src/dynarmic/backend/arm64/a64_interface.cpp | 2 +- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 10 +--------- src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp | 2 +- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index f59e663d..b2232aeb 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -47,7 +47,7 @@ static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const oaknut::Label l_addr, l_this; - auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr) -> T { + auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> T { return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { return (conf.callbacks->*callback)(vaddr); }); @@ -176,7 +176,7 @@ void A64AddressSpace::EmitPrelude() { prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf); prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf); prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf); - prelude_info.exclusive_write_memory_128 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, Vector>(code, conf); + prelude_info.exclusive_write_memory_128 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive128, Vector>(code, conf); prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks); prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks); prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks); diff --git a/src/dynarmic/backend/arm64/a64_interface.cpp b/src/dynarmic/backend/arm64/a64_interface.cpp index 92827f16..37c8ec5a 100644 --- a/src/dynarmic/backend/arm64/a64_interface.cpp +++ b/src/dynarmic/backend/arm64/a64_interface.cpp @@ -316,7 +316,7 @@ void Jit::DumpDisassembly() const { } std::vector Jit::Disassemble() const { - impl->Disassemble(); + return impl->Disassemble(); } } // namespace Dynarmic::A64 diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index bf7f6043..f0a35ff6 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -41,7 +41,6 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto Wresult = ctx.reg_alloc.WriteW(inst); RegAlloc::Realize(Wresult); code.LDR(Wresult, Xstate, offsetof(A64JitState, cpsr_nzcv)); @@ -50,7 +49,6 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& c template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto Wnzcv = ctx.reg_alloc.WriteW(inst); RegAlloc::Realize(Wnzcv); @@ -125,8 +123,6 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - auto Xresult = ctx.reg_alloc.WriteX(inst); RegAlloc::Realize(Xresult); @@ -135,8 +131,6 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - auto Wresult = ctx.reg_alloc.WriteW(inst); RegAlloc::Realize(Wresult); @@ -145,8 +139,6 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ct template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - auto Wresult = ctx.reg_alloc.WriteW(inst); RegAlloc::Realize(Wresult); @@ -310,7 +302,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitC } template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) { if (!ctx.conf.hook_isb) { return; } diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp index 75fe7335..38062956 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp @@ -82,7 +82,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c } template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) { +void EmitIR(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) { code.STR(WZR, Xstate, offsetof(A64JitState, exclusive_state)); } From e6949a86a272fb5564b486b23d0ca988e86f48bb Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 14:24:29 -0500 Subject: [PATCH 09/47] Terminals --- .../backend/arm64/a32_address_space.cpp | 1 + .../backend/arm64/a64_address_space.cpp | 1 + src/dynarmic/backend/arm64/emit_arm64.cpp | 20 +++- src/dynarmic/backend/arm64/emit_arm64.h | 4 + src/dynarmic/backend/arm64/emit_arm64_a32.cpp | 2 +- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 101 ++++++++++++++++++ 6 files changed, 124 insertions(+), 5 deletions(-) diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index 306db097..59c9193e 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -308,6 +308,7 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { .cntfreq_el0{}, .dczid_el0{}, .ctr_el0{}, + .is_a64 = false, .hook_isb = conf.hook_isb, .enable_cycle_counting = conf.enable_cycle_counting, .always_little_endian = conf.always_little_endian, diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index b2232aeb..fe063f5a 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -321,6 +321,7 @@ EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) { .cntfreq_el0 = conf.cntfrq_el0, .dczid_el0 = conf.dczid_el0, .ctr_el0 = conf.ctr_el0, + .is_a64 = true, .hook_isb = conf.hook_isb, .enable_cycle_counting = conf.enable_cycle_counting, .always_little_endian = true, diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index 104d173d..ffd07954 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -164,10 +164,18 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E ASSERT(!ctx.block.HasConditionFailedLocation()); } else { ASSERT(ctx.block.HasConditionFailedLocation()); + oaknut::Label pass; + + if (conf.is_a64) { + pass = EmitA64Cond(code, ctx, ctx.block.GetCondition()); + EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount()); + EmitA64ConditionFailedTerminal(code, ctx); + } else { + pass = EmitA32Cond(code, ctx, ctx.block.GetCondition()); + EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount()); + EmitA32ConditionFailedTerminal(code, ctx); + } - oaknut::Label pass = EmitA32Cond(code, ctx, ctx.block.GetCondition()); - EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount()); - EmitA32ConditionFailedTerminal(code, ctx); code.l(pass); } @@ -205,7 +213,11 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E reg_alloc.AssertNoMoreUses(); EmitAddCycles(code, ctx, block.CycleCount()); - EmitA32Terminal(code, ctx); + if (conf.is_a64) { + EmitA64Terminal(code, ctx); + } else { + EmitA32Terminal(code, ctx); + } ebi.size = code.ptr() - ebi.entry_point; return ebi; diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index 290230ac..b22bf810 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -95,6 +95,7 @@ struct EmitConfig { u64 cntfreq_el0; u32 dczid_el0; u32 ctr_el0; + bool is_a64; bool hook_isb; bool enable_cycle_counting; bool always_little_endian; @@ -120,7 +121,10 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst); void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target); void EmitBlockLinkRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, const IR::LocationDescriptor& descriptor); oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond); +oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond); void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx); +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx); void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx); +void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx); } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp index 493d75eb..f8fc5eee 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp @@ -41,7 +41,7 @@ void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Re EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); } -void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) { +static void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) { auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 { return static_cast(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32); }; diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index f0a35ff6..d24f2481 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -21,6 +21,107 @@ using namespace oaknut::util; static constexpr int nzcv_c_flag_shift = 29; +oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) { + oaknut::Label pass; + // TODO: Flags in host flags + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, cpsr_nzcv)); + code.MSR(oaknut::SystemReg::NZCV, Xscratch0); + code.B(static_cast(cond), pass); + return pass; +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step); + +void EmitA64Terminal(oaknut::CodeGenerator&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) { + ASSERT_FALSE("Interpret should never be emitted."); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) { + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) { + oaknut::Label fail; + + if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { + if (ctx.conf.enable_cycle_counting) { + code.CMP(Xticks, 0); + code.B(LE, fail); + EmitBlockLinkRelocation(code, ctx, terminal.next); + } else { + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, fail); + EmitBlockLinkRelocation(code, ctx, terminal.next); + } + } + + code.l(fail); + code.MOV(Xscratch0, A64::LocationDescriptor{terminal.next}.PC()); + code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc)); + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) { + if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { + EmitBlockLinkRelocation(code, ctx, terminal.next); + } + + code.MOV(Wscratch0, A64::LocationDescriptor{terminal.next}.PC()); + code.STR(Wscratch0, Xstate, offsetof(A64JitState, pc)); + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) { + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); + + // TODO: Implement PopRSBHint optimization +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) { + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); + + // TODO: Implement FastDispatchHint optimization +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + oaknut::Label pass = EmitA64Cond(code, ctx, terminal.if_); + EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step); + code.l(pass); + EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + oaknut::Label fail; + code.LDRB(Wscratch0, SP, offsetof(StackLayout, check_bit)); + code.CBZ(Wscratch0, fail); + EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step); + code.l(fail); + EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + oaknut::Label fail; + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, fail); + EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step); + code.l(fail); + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + boost::apply_visitor([&](const auto& t) { EmitA64Terminal(code, ctx, t, initial_location, is_single_step); }, terminal); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx) { + const A64::LocationDescriptor location{ctx.block.Location()}; + EmitA64Terminal(code, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping()); +} + +void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx) { + const A64::LocationDescriptor location{ctx.block.Location()}; + EmitA64Terminal(code, ctx, IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, location.SetSingleStepping(false), location.SingleStepping()); +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); From 92ef9a7276402329d7c6a494f3c8acb5c36c60b8 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 14:38:04 -0500 Subject: [PATCH 10/47] Add TestBit --- .../backend/arm64/emit_arm64_data_processing.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index f0250361..4f479d90 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -189,10 +189,14 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(args[0]); + RegAlloc::Realize(Xresult, Xoperand); + ASSERT(args[1].IsImmediate()); + ASSERT(args[1].GetImmediateU8() < 64); + + code.UBFX(Xresult, Xoperand, args[1].GetImmediateU8(), 1); } template<> From 3f0c0c7b098750380a96e4fa330182c7b58aa7ad Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 14:48:58 -0500 Subject: [PATCH 11/47] Impleemnt asr64, ror64 --- .../arm64/emit_arm64_data_processing.cpp | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 4f479d90..b68da06f 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -620,10 +620,23 @@ void EmitIR(oaknut::CodeGenerator& code, Emi template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + const u8 shift = shift_arg.GetImmediateU8(); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + RegAlloc::Realize(Xresult, Xoperand); + code.ASR(Xresult, Xoperand, shift <= 63 ? shift : 63); + } else { + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + auto Xshift = ctx.reg_alloc.ReadX(shift_arg); + RegAlloc::Realize(Xresult, Xoperand, Xshift); + code.ASR(Xresult, Xoperand, Xshift); + } } template<> @@ -694,10 +707,23 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + const u8 shift = shift_arg.GetImmediateU8(); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + RegAlloc::Realize(Xresult, Xoperand); + code.ROR(Xresult, Xoperand, shift); + } else { + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + auto Xshift = ctx.reg_alloc.ReadX(shift_arg); + RegAlloc::Realize(Xresult, Xoperand, Xshift); + code.ROR(Xresult, Xoperand, Xshift); + } } template<> From 5b6e2add8294849c0552ca8d70df8cf06d2a131a Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 15:03:10 -0500 Subject: [PATCH 12/47] Add masked shift instructions --- .../arm64/emit_arm64_data_processing.cpp | 110 +++++++++++++----- 1 file changed, 78 insertions(+), 32 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index b68da06f..cddb6a39 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -756,68 +756,114 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCo } } +template +static void EmitMaskedShift32(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, ShiftI si_fn, ShiftR sr_fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Woperand = ctx.reg_alloc.ReadW(operand_arg); + RegAlloc::Realize(Wresult, Woperand); + const u32 shift = shift_arg.GetImmediateU32(); + + si_fn(Wresult, Woperand, static_cast(shift & 0x1F)); + } else { + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Woperand = ctx.reg_alloc.ReadW(operand_arg); + auto Wshift = ctx.reg_alloc.ReadW(shift_arg); + RegAlloc::Realize(Wresult, Woperand, Wshift); + + sr_fn(Wresult, Woperand, Wshift); + } +} + +template +static void EmitMaskedShift64(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, ShiftI si_fn, ShiftR sr_fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + RegAlloc::Realize(Xresult, Xoperand); + const u32 shift = shift_arg.GetImmediateU64(); + + si_fn(Xresult, Xoperand, static_cast(shift & 0x3F)); + } else { + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + auto Xshift = ctx.reg_alloc.ReadX(shift_arg); + RegAlloc::Realize(Xresult, Xoperand, Xshift); + + sr_fn(Xresult, Xoperand, Xshift); + } +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift32( + code, ctx, inst, + [&](auto& Wresult, auto& Woperand, auto shift) { code.LSL(Wresult, Woperand, shift); }, + [&](auto& Wresult, auto& Woperand, auto& Wshift) { code.LSL(Wresult, Woperand, Wshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift64( + code, ctx, inst, + [&](auto& Xresult, auto& Xoperand, auto shift) { code.LSL(Xresult, Xoperand, shift); }, + [&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.LSL(Xresult, Xoperand, Xshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift32( + code, ctx, inst, + [&](auto& Wresult, auto& Woperand, auto shift) { code.LSR(Wresult, Woperand, shift); }, + [&](auto& Wresult, auto& Woperand, auto& Wshift) { code.LSR(Wresult, Woperand, Wshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift64( + code, ctx, inst, + [&](auto& Xresult, auto& Xoperand, auto shift) { code.LSR(Xresult, Xoperand, shift); }, + [&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.LSR(Xresult, Xoperand, Xshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift32( + code, ctx, inst, + [&](auto& Wresult, auto& Woperand, auto shift) { code.ASR(Wresult, Woperand, shift); }, + [&](auto& Wresult, auto& Woperand, auto& Wshift) { code.ASR(Wresult, Woperand, Wshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift64( + code, ctx, inst, + [&](auto& Xresult, auto& Xoperand, auto shift) { code.ASR(Xresult, Xoperand, shift); }, + [&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.ASR(Xresult, Xoperand, Xshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift32( + code, ctx, inst, + [&](auto& Wresult, auto& Woperand, auto shift) { code.ROR(Wresult, Woperand, shift); }, + [&](auto& Wresult, auto& Woperand, auto& Wshift) { code.ROR(Wresult, Woperand, Wshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift64( + code, ctx, inst, + [&](auto& Xresult, auto& Xoperand, auto shift) { code.ROR(Xresult, Xoperand, shift); }, + [&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.ROR(Xresult, Xoperand, Xshift); }); } template From bdc1b0f590ae75065048920974982e5ab5e01fce Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 15:10:23 -0500 Subject: [PATCH 13/47] Implement *MULH --- .../arm64/emit_arm64_data_processing.cpp | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index cddb6a39..065070e1 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -1051,18 +1051,24 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xop1 = ctx.reg_alloc.ReadX(args[0]); + auto Xop2 = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xresult, Xop1, Xop2); + + code.SMULH(Xresult, Xop1, Xop2); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xop1 = ctx.reg_alloc.ReadX(args[0]); + auto Xop2 = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xresult, Xop1, Xop2); + + code.UMULH(Xresult, Xop1, Xop2); } template<> From b5f988379a266667abeba9009e2c73f3aefd51d3 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 18:39:33 -0500 Subject: [PATCH 14/47] Fix sets --- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index d24f2481..71f72895 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -256,8 +256,8 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, RegAlloc::Realize(Wvalue); // TODO: Detect if Gpr vs Fpr is more appropriate - - code.STR(Wvalue, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); + code.MOV(Wscratch0, Wvalue); + code.STR(Xscratch0, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> @@ -280,7 +280,13 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); auto Svalue = ctx.reg_alloc.ReadS(args[1]); RegAlloc::Realize(Svalue); - code.STR(Svalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); + + // TODO: Optimize + auto Qvalue = Svalue->toQ().B16(); + code.FMOV(Wscratch0, Svalue); + code.EOR(Qvalue, Qvalue, Qvalue); + code.FMOV(Svalue, Wscratch0); + code.STR(Svalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> @@ -289,7 +295,13 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); auto Dvalue = ctx.reg_alloc.ReadD(args[1]); RegAlloc::Realize(Dvalue); - code.STR(Dvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); + + // TODO: Optimize + auto Qvalue = Dvalue->toQ().B16(); + code.FMOV(Xscratch0, Dvalue); + code.EOR(Qvalue, Qvalue, Qvalue); + code.FMOV(Dvalue, Xscratch0); + code.STR(Dvalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> From 18e00f2e5860f512065370936ddf02f4be9f22dd Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 18:58:48 -0500 Subject: [PATCH 15/47] Implement ExtractRegister --- .../arm64/emit_arm64_data_processing.cpp | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 065070e1..48f780e6 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -1395,18 +1395,30 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCo template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[2].IsImmediate()); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Wop1 = ctx.reg_alloc.ReadW(args[0]); + auto Wop2 = ctx.reg_alloc.ReadW(args[1]); + RegAlloc::Realize(Wresult, Wop1, Wop2); + const u8 lsb = args[2].GetImmediateU8(); + + code.EXTR(Wresult, Wop1, Wop2, lsb); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[2].IsImmediate()); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xop1 = ctx.reg_alloc.ReadX(args[0]); + auto Xop2 = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xresult, Xop1, Xop2); + const u8 lsb = args[2].GetImmediateU8(); + + code.EXTR(Xresult, Xop1, Xop2, lsb); } template<> From 48b0f6369b9f0b8647aea65427cf992f6d4e54a9 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 19:20:55 -0500 Subject: [PATCH 16/47] Add min/max --- .../arm64/emit_arm64_data_processing.cpp | 94 +++++++++++-------- 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 48f780e6..23a6efa5 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -1423,82 +1423,96 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCont template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Wvalue = ctx.reg_alloc.ReadW(args[0]); + const u8 bit = args[1].GetImmediateU8(); + RegAlloc::Realize(Wresult, Wvalue); + + code.UBFX(Wscratch0, Wvalue, bit, 1); + code.SUB(Wresult, WZR, Wscratch0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + const u8 bit = args[1].GetImmediateU8(); + RegAlloc::Realize(Xresult, Xvalue); + + code.UBFX(Xscratch0, Xvalue, bit, 1); + code.SUB(Xresult, XZR, Xscratch0); +} + +static void EmitMaxMin32(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Cond cond) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Wop1 = ctx.reg_alloc.ReadW(args[0]); + auto Wop2 = ctx.reg_alloc.ReadW(args[1]); + RegAlloc::Realize(Wresult, Wop1, Wop2); + ctx.reg_alloc.SpillFlags(); + + code.CMP(Wop1->toW(), Wop2); + code.CSEL(Wresult, Wop1, Wop2, cond); +} + +static void EmitMaxMin64(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Cond cond) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xop1 = ctx.reg_alloc.ReadX(args[0]); + auto Xop2 = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xresult, Xop1, Xop2); + ctx.reg_alloc.SpillFlags(); + + code.CMP(Xop1->toX(), Xop2); + code.CSEL(Xresult, Xop1, Xop2, cond); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin32(code, ctx, inst, GT); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin64(code, ctx, inst, GT); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin32(code, ctx, inst, HI); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin64(code, ctx, inst, HI); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin32(code, ctx, inst, LT); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin64(code, ctx, inst, LT); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin32(code, ctx, inst, LO); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin64(code, ctx, inst, LO); } } // namespace Dynarmic::Backend::Arm64 From ef2851d59547881e92db6eac338fb36661efea3f Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 13 Nov 2022 20:11:08 -0500 Subject: [PATCH 17/47] Optimize --- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 16 ++++------------ .../backend/arm64/emit_arm64_data_processing.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index 71f72895..ca19890c 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -256,8 +256,8 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, RegAlloc::Realize(Wvalue); // TODO: Detect if Gpr vs Fpr is more appropriate - code.MOV(Wscratch0, Wvalue); - code.STR(Xscratch0, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); + code.MOV(*Wvalue, Wvalue); + code.STR(Wvalue->toX(), Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> @@ -281,11 +281,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, auto Svalue = ctx.reg_alloc.ReadS(args[1]); RegAlloc::Realize(Svalue); - // TODO: Optimize - auto Qvalue = Svalue->toQ().B16(); - code.FMOV(Wscratch0, Svalue); - code.EOR(Qvalue, Qvalue, Qvalue); - code.FMOV(Svalue, Wscratch0); + code.FMOV(Svalue, Svalue); code.STR(Svalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } @@ -296,11 +292,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, auto Dvalue = ctx.reg_alloc.ReadD(args[1]); RegAlloc::Realize(Dvalue); - // TODO: Optimize - auto Qvalue = Dvalue->toQ().B16(); - code.FMOV(Xscratch0, Dvalue); - code.EOR(Qvalue, Qvalue, Qvalue); - code.FMOV(Dvalue, Xscratch0); + code.FMOV(Dvalue, Dvalue); code.STR(Dvalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 23a6efa5..9321f45c 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -1431,8 +1431,8 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext const u8 bit = args[1].GetImmediateU8(); RegAlloc::Realize(Wresult, Wvalue); - code.UBFX(Wscratch0, Wvalue, bit, 1); - code.SUB(Wresult, WZR, Wscratch0); + code.LSL(Wresult, Wvalue, 31 - bit); + code.ASR(Wresult, Wresult, 31); } template<> @@ -1445,8 +1445,8 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext const u8 bit = args[1].GetImmediateU8(); RegAlloc::Realize(Xresult, Xvalue); - code.UBFX(Xscratch0, Xvalue, bit, 1); - code.SUB(Xresult, XZR, Xscratch0); + code.LSL(Xresult, Xvalue, 63 - bit); + code.ASR(Xresult, Xresult, 63); } static void EmitMaxMin32(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Cond cond) { From 57871c515956a89c5269fa75f46f1352bd3b485f Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 14 Nov 2022 16:46:56 -0500 Subject: [PATCH 18/47] Fix 128-bit ops --- .../backend/arm64/a64_address_space.cpp | 125 +++++++++++++++++- src/dynarmic/backend/arm64/abi.cpp | 16 ++- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 2 +- .../arm64/emit_arm64_data_processing.cpp | 8 +- src/dynarmic/backend/arm64/reg_alloc.cpp | 27 +++- 5 files changed, 161 insertions(+), 17 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index fe063f5a..e6045ef8 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -96,6 +96,123 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const return target; } +/* =========================== 128-bit versions =========================== */ + +static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_); + + oaknut::Label l_addr, l_this; + + void* target = code.ptr(); + ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector)); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.STP(X0, X1, SP); + code.LDR(Q0, SP); + ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector)); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + +static void* EmitExclusiveRead128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> Vector { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> Vector { + return conf.callbacks->MemoryRead128(vaddr); + }); + }; + + void* target = code.ptr(); + ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector)); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.STP(X0, X1, SP); + code.LDR(Q0, SP); + ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector)); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + +static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_); + + oaknut::Label l_addr, l_this; + + void* target = code.ptr(); + ABI_PushRegisters(code, 0, sizeof(Vector)); + code.STR(Q0, SP); + code.LDP(X2, X3, SP); + ABI_PopRegisters(code, 0, sizeof(Vector)); + + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + +static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](Vector expected) -> bool { + return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected); + }) + ? 0 + : 1; + }; + + void* target = code.ptr(); + ABI_PushRegisters(code, 0, sizeof(Vector)); + code.STR(Q0, SP); + code.LDP(X2, X3, SP); + ABI_PopRegisters(code, 0, sizeof(Vector)); + + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf) : conf(conf) , mem(conf.code_cache_size) @@ -161,22 +278,22 @@ void A64AddressSpace::EmitPrelude() { prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks); prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks); prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks); - prelude_info.read_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead128>(code, conf.callbacks); + prelude_info.read_memory_128 = EmitRead128CallTrampoline(code, conf.callbacks); prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf); prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf); prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf); prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf); - prelude_info.exclusive_read_memory_128 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead128, Vector>(code, conf); + prelude_info.exclusive_read_memory_128 = EmitExclusiveRead128CallTrampoline(code, conf); prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks); prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks); prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks); prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks); - prelude_info.write_memory_128 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite128>(code, conf.callbacks); + prelude_info.write_memory_128 = EmitWrite128CallTrampoline(code, conf.callbacks); prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf); prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf); prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf); prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf); - prelude_info.exclusive_write_memory_128 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive128, Vector>(code, conf); + prelude_info.exclusive_write_memory_128 = EmitExclusiveWrite128CallTrampoline(code, conf); prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks); prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks); prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks); diff --git a/src/dynarmic/backend/arm64/abi.cpp b/src/dynarmic/backend/arm64/abi.cpp index b34f807b..e6913f0d 100644 --- a/src/dynarmic/backend/arm64/abi.cpp +++ b/src/dynarmic/backend/arm64/abi.cpp @@ -55,13 +55,15 @@ static FrameInfo CalculateFrameInfo(RegisterList rl, size_t frame_size) { }; } -#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \ - for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \ - code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \ - } \ - if (frame_info.TYPE##s.size() % 2 == 1) { \ - const size_t i = frame_info.TYPE##s.size() - 1; \ - code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \ +#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \ + if (frame_info.TYPE##s.size() > 0) { \ + for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \ + code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \ + } \ + if (frame_info.TYPE##s.size() % 2 == 1) { \ + const size_t i = frame_info.TYPE##s.size() - 1; \ + code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \ + } \ } void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) { diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index ca19890c..df593085 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -465,7 +465,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& c auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto Xvalue = ctx.reg_alloc.ReadX(args[0]); RegAlloc::Realize(Xvalue); - code.MOV(Xscratch0, mcl::bit_cast(ctx.conf.tpidrro_el0)); + code.MOV(Xscratch0, mcl::bit_cast(ctx.conf.tpidr_el0)); code.STR(Xvalue, Xscratch0); } diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 9321f45c..5cf5f49f 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -1353,9 +1353,13 @@ void EmitIR(oaknut::CodeGenerator&, EmitContex } template<> -void EmitIR(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) { +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.DefineAsExisting(inst, args[0]); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + auto Qresult = ctx.reg_alloc.WriteQ(inst); + RegAlloc::Realize(Xvalue, Qresult); + + code.FMOV(Qresult->toD(), Xvalue); } template<> diff --git a/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/backend/arm64/reg_alloc.cpp index 82bb02e7..2dff0080 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -157,15 +157,36 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optional, 4> args{arg0, arg1, arg2, arg3}; + + // AAPCS64 Next General-purpose Register Number + int ngrn = 0; + // AAPCS64 Next SIMD and Floating-point Register Number + int nsrn = 0; + for (int i = 0; i < 4; i++) { if (args[i]) { - ASSERT(gprs[i].IsCompletelyEmpty()); - LoadCopyInto(args[i]->get().value, oaknut::XReg{i}); + if (args[i]->get().GetType() == IR::Type::U128) { + ASSERT(fprs[nsrn].IsCompletelyEmpty()); + LoadCopyInto(args[i]->get().value, oaknut::QReg{nsrn}); + nsrn++; + } else { + ASSERT(gprs[ngrn].IsCompletelyEmpty()); + LoadCopyInto(args[i]->get().value, oaknut::XReg{ngrn}); + ngrn++; + } + } else { + // Gaps are assumed to be in general-purpose registers + // TODO: should there be a separate list passed for FPRs instead? + ngrn++; } } if (result) { - DefineAsRegister(result, X0); + if (result->GetType() == IR::Type::U128) { + DefineAsRegister(result, Q0); + } else { + DefineAsRegister(result, X0); + } } } From 6a14e6e73c3b6d7c012d6f40d4209328c36e2e2b Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 14 Nov 2022 20:23:01 -0500 Subject: [PATCH 19/47] Fix AndNot64 --- src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 5cf5f49f..790873c2 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -1242,7 +1242,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - EmitAndNot<32>(code, ctx, inst); + EmitAndNot<64>(code, ctx, inst); } template<> From 0df7dccf937a031d1955f992d2be793e212b11c8 Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 14 Nov 2022 23:45:48 -0500 Subject: [PATCH 20/47] Fix vector fetch --- src/dynarmic/backend/arm64/a64_interface.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_interface.cpp b/src/dynarmic/backend/arm64/a64_interface.cpp index 37c8ec5a..90b5d925 100644 --- a/src/dynarmic/backend/arm64/a64_interface.cpp +++ b/src/dynarmic/backend/arm64/a64_interface.cpp @@ -260,13 +260,13 @@ void Jit::SetRegisters(const std::array& value) { Vector Jit::GetVector(std::size_t index) const { auto& vec = impl->VecRegs(); - return {vec[index], vec[index + 1]}; + return {vec[index * 2], vec[index * 2 + 1]}; } void Jit::SetVector(std::size_t index, Vector value) { auto& vec = impl->VecRegs(); - vec[index] = value[0]; - vec[index + 1] = value[1]; + vec[index * 2] = value[0]; + vec[index * 2 + 1] = value[1]; } std::array Jit::GetVectors() const { From 6dea8c78752f42bdaa0f14f96eb838d3e74f18d6 Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 15 Nov 2022 16:36:57 -0500 Subject: [PATCH 21/47] Fix IC/DC, FABS --- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 4 ++-- .../arm64/emit_arm64_vector_floating_point.cpp | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index df593085..17bc8cc9 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -385,14 +385,14 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCon template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr, args[1], args[2]); + ctx.reg_alloc.PrepareForCall(nullptr, {}, args[1], args[2]); EmitRelocation(code, ctx, LinkTarget::DataCacheOperationRaised); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr, args[1], args[2]); + ctx.reg_alloc.PrepareForCall(nullptr, {}, args[0], args[1]); EmitRelocation(code, ctx, LinkTarget::InstructionCacheOperationRaised); } diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp index 9f632c24..087499ce 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp @@ -11,6 +11,7 @@ #include "dynarmic/backend/arm64/emit_context.h" #include "dynarmic/backend/arm64/fpsr_manager.h" #include "dynarmic/backend/arm64/reg_alloc.h" +#include "dynarmic/common/fp/info.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -234,10 +235,17 @@ void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + constexpr u16 non_sign_mask = FP::FPInfo::sign_mask - u16{1u}; + constexpr u64 non_sign_mask64 = mcl::bit::replicate_element<16, u64>(non_sign_mask); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Qoperand = ctx.reg_alloc.ReadQ(args[0]); + auto Qresult = ctx.reg_alloc.WriteQ(inst); + RegAlloc::Realize(Qoperand, Qresult); + + code.MOV(Xscratch0, non_sign_mask64); + code.DUP(Qresult->D2(), Xscratch0); + code.AND(Qresult->B16(), Qoperand->B16(), Qresult->B16()); } template<> From e02a999cad01e364337c2115b6677d8d898bd480 Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 15 Nov 2022 18:02:21 -0500 Subject: [PATCH 22/47] Add EmitTwoOpFallback and FRINT half fallback --- .../emit_arm64_vector_floating_point.cpp | 107 +++++++++++++++++- 1 file changed, 103 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp index 087499ce..ce9bab2f 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp @@ -3,15 +3,31 @@ * SPDX-License-Identifier: 0BSD */ +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include "dynarmic/backend/arm64/a32_jitstate.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" #include "dynarmic/backend/arm64/fpsr_manager.h" #include "dynarmic/backend/arm64/reg_alloc.h" +#include "dynarmic/common/cast_util.h" +#include "dynarmic/common/fp/fpcr.h" +#include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/info.h" +#include "dynarmic/common/fp/op.h" +#include "dynarmic/common/fp/rounding_mode.h" +#include "dynarmic/common/lut_from_list.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -19,6 +35,15 @@ namespace Dynarmic::Backend::Arm64 { using namespace oaknut::util; +namespace mp = mcl::mp; + +using A64FullVectorWidth = std::integral_constant; + +// Array alias that always sizes itself according to the given type T +// relative to the size of a vector register. e.g. T = u32 would result +// in a std::array. +template +using VectorArray = std::array>; template static void MaybeStandardFPSCRValue(oaknut::CodeGenerator& code, EmitContext& ctx, bool fpcr_controlled, EmitFn emit) { @@ -233,6 +258,51 @@ void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) }); } +template +static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::QReg Qresult, oaknut::QReg Qarg1, Lambda lambda, bool fpcr_controlled) { + const auto fn = static_cast*>(lambda); + + const u32 fpcr = ctx.FPCR(fpcr_controlled).Value(); + constexpr u64 stack_size = sizeof(u64) * 4; // sizeof(u128)*2 + oaknut::Label fn_ptr, end; + + ABI_PushRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size); + code.MOV(Xscratch0, SP); + code.LDR(Xscratch1, fn_ptr); + + // Call lambda(Vec&, Vec&, fpcr, fpsr&) + code.ADD(X0, Xscratch0, 0 * 16); + code.ADD(X1, Xscratch0, 1 * 16); + code.MOV(X2, fpcr); + code.ADD(X3, Xstate, offsetof(JitState, fpsr)); + code.STR(Qarg1, X1); + code.BLR(Xscratch1); + + // Reload result + code.LDR(Qresult, SP); + ABI_PopRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size); + + code.B(end); + code.align(8); + code.l(fn_ptr); + code.dx(mcl::bit_cast(fn)); + code.l(end); +} + +template +static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]); + auto Qresult = ctx.reg_alloc.WriteQ(inst); + RegAlloc::Realize(Qarg1, Qresult); + + ctx.reg_alloc.SpillFlags(); + ctx.fpsr.Spill(); + + const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1(); + EmitTwoOpFallbackWithoutRegAlloc(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled); +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { constexpr u16 non_sign_mask = FP::FPInfo::sign_mask - u16{1u}; @@ -494,10 +564,39 @@ void EmitIR(oaknut::CodeGenerator& code, E template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const auto rounding = static_cast(inst->GetArg(1).GetU8()); + const bool exact = inst->GetArg(2).GetU1(); + + using rounding_list = mp::list< + mp::lift_value, + mp::lift_value, + mp::lift_value, + mp::lift_value, + mp::lift_value>; + using exact_list = mp::list; + + static const auto lut = Common::GenerateLookupTableFromList( + [](I) { + using FPT = u16; + return std::pair{ + mp::lower_to_tuple_v, + Common::FptrCast( + [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + constexpr FP::RoundingMode rounding_mode = mp::get<0, I>::value; + constexpr bool exact = mp::get<1, I>::value; + + for (size_t i = 0; i < output.size(); ++i) { + output[i] = static_cast(FP::FPRoundInt(input[i], fpcr, rounding_mode, exact, fpsr)); + } + })}; + }, + mp::cartesian_product{}); + + if (ctx.conf.is_a64) { + EmitTwoOpFallback<3, A64JitState>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact))); + } else { + EmitTwoOpFallback<3, A32JitState>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact))); + } } template<> From 7791d3d8548c6e5885990e3e66438f3cb5272c13 Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 15 Nov 2022 20:59:23 -0500 Subject: [PATCH 23/47] Fix GetCFlag --- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index 17bc8cc9..469fc5ea 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -142,10 +142,10 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - auto Wresult = ctx.reg_alloc.WriteW(inst); - RegAlloc::Realize(Wresult); - code.LDR(Wresult, Xstate, offsetof(A64JitState, cpsr_nzcv)); - code.UBFX(Wresult, Wresult, nzcv_c_flag_shift, 1); + auto Wflag = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wflag); + code.LDR(Wflag, Xstate, offsetof(A64JitState, cpsr_nzcv)); + code.AND(Wflag, Wflag, 1 << nzcv_c_flag_shift); } template<> From 16101049f3511b5ccb3b24dd383f8d8e498b0270 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 16 Nov 2022 16:10:39 -0500 Subject: [PATCH 24/47] Fix EXTR (flipped rgister order) --- .../arm64/emit_arm64_data_processing.cpp | 4 ++-- tests/A64/a64.cpp | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 790873c2..a1e618fb 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -1408,7 +1408,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCont RegAlloc::Realize(Wresult, Wop1, Wop2); const u8 lsb = args[2].GetImmediateU8(); - code.EXTR(Wresult, Wop1, Wop2, lsb); + code.EXTR(Wresult, Wop2, Wop1, lsb); // NB: flipped } template<> @@ -1422,7 +1422,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCont RegAlloc::Realize(Xresult, Xop1, Xop2); const u8 lsb = args[2].GetImmediateU8(); - code.EXTR(Xresult, Xop1, Xop2, lsb); + code.EXTR(Xresult, Xop2, Xop1, lsb); // NB: flipped } template<> diff --git a/tests/A64/a64.cpp b/tests/A64/a64.cpp index 067da6bb..6190ae14 100644 --- a/tests/A64/a64.cpp +++ b/tests/A64/a64.cpp @@ -1024,6 +1024,23 @@ TEST_CASE("A64: This is an infinite loop if fast dispatch is enabled", "[a64]") jit.Run(); } +TEST_CASE("A64: EXTR", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x93d8fef7); // EXTR X23, X23, X24, #63 + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + jit.SetRegister(23, 0); + jit.SetRegister(24, 1); + + env.ticks_left = 2; + jit.Run(); + + REQUIRE(jit.GetRegister(23) == 0); +} + TEST_CASE("A64: Optimization failure when folding ADD", "[a64]") { A64TestEnv env; A64::Jit jit{A64::UserConfig{&env}}; From 21b42114143c3e09aafd73cf0b0fc5c99b5014c6 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 16 Nov 2022 16:50:33 -0500 Subject: [PATCH 25/47] Add test for isolated GetNZCVFromOp --- src/dynarmic/backend/arm64/emit_arm64.cpp | 31 ++++++++++++++++++++--- tests/A64/a64.cpp | 31 +++++++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index ffd07954..2680c93c 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -69,9 +69,34 @@ void EmitIR(oaknut::CodeGenerator&, EmitContext& ctx, I } template<> -void EmitIR(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) { - [[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ASSERT(ctx.reg_alloc.IsValueLive(inst)); +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if (ctx.reg_alloc.IsValueLive(inst)) { + return; + } + + switch (args[0].GetType()) { + case IR::Type::U32: { + auto Wvalue = ctx.reg_alloc.ReadW(args[0]); + auto flags = ctx.reg_alloc.WriteFlags(inst); + RegAlloc::Realize(Wvalue, flags); + + code.CMP(*Wvalue, WZR.toW()); + break; + } + case IR::Type::U64: { + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + auto flags = ctx.reg_alloc.WriteFlags(inst); + RegAlloc::Realize(Xvalue, flags); + + code.CMP(*Xvalue, XZR.toX()); + break; + } + default: + ASSERT_FALSE("Invalid type for GetNZCVFromOp"); + break; + } } template<> diff --git a/tests/A64/a64.cpp b/tests/A64/a64.cpp index 6190ae14..5edc7730 100644 --- a/tests/A64/a64.cpp +++ b/tests/A64/a64.cpp @@ -1041,6 +1041,37 @@ TEST_CASE("A64: EXTR", "[a64]") { REQUIRE(jit.GetRegister(23) == 0); } +TEST_CASE("A64: Isolated GetNZCVFromOp", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0xaa1f03f5); // MOV X21, XZR + env.code_mem.emplace_back(0x912a02da); // ADD X26, X22, #0xa80 + env.code_mem.emplace_back(0x913662dc); // ADD X28, X22, #0xd98 + env.code_mem.emplace_back(0x320003e8); // MOV W8, #1 + env.code_mem.emplace_back(0xa9006bfc); // STP X28, X26, [SP] + env.code_mem.emplace_back(0x7200011f); // TST W8, #1 + env.code_mem.emplace_back(0xf94007e8); // LDR X8, [SP, #8] + env.code_mem.emplace_back(0x321e03e3); // MOV W3, #4 + env.code_mem.emplace_back(0xaa1303e2); // MOV X2, X19 + env.code_mem.emplace_back(0x9a881357); // CSEL X23, X26, X8, NE + env.code_mem.emplace_back(0xf94003e8); // LDR X8, [SP] + env.code_mem.emplace_back(0xaa1703e0); // MOV X0, X23 + env.code_mem.emplace_back(0x9a881396); // CSEL X22, X28, X8, NE + env.code_mem.emplace_back(0x92407ea8); // AND X8, X21, #0xffffffff + env.code_mem.emplace_back(0x1ac8269b); // LSR W27, W20, W8 + env.code_mem.emplace_back(0x0b1b0768); // ADD W8, W27, W27, LSL #1 + env.code_mem.emplace_back(0x937f7d01); // SBFIZ X1, X8, #1, #32 + env.code_mem.emplace_back(0x2a1f03e4); // MOV W4, WZR + env.code_mem.emplace_back(0x531e7779); // LSL W25, W27, #2 + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + + env.ticks_left = 20; + jit.Run(); +} + TEST_CASE("A64: Optimization failure when folding ADD", "[a64]") { A64TestEnv env; A64::Jit jit{A64::UserConfig{&env}}; From 46aef36a4f8e6450dd3fd8fb0e3f080f07fd5dcd Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 26 Nov 2022 16:05:58 +0000 Subject: [PATCH 26/47] test_generator: A64 --- tests/test_generator.cpp | 275 ++++++++++++++++++++++++++++++++++----- 1 file changed, 241 insertions(+), 34 deletions(-) diff --git a/tests/test_generator.cpp b/tests/test_generator.cpp index 2d0cd032..612246af 100644 --- a/tests/test_generator.cpp +++ b/tests/test_generator.cpp @@ -14,6 +14,7 @@ #include #include "./A32/testenv.h" +#include "./A64/testenv.h" #include "./fuzz_util.h" #include "./rand_int.h" #include "dynarmic/common/fp/fpcr.h" @@ -22,7 +23,11 @@ #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/a64_types.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A32/a32.h" +#include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" #include "dynarmic/ir/opcodes.h" @@ -36,21 +41,14 @@ constexpr bool mask_fpsr_cum_bits = true; namespace { using namespace Dynarmic; -bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) { - const A32::LocationDescriptor location = A32::LocationDescriptor{pc, {}, {}}.SetTFlag(is_thumb).SetIT(it_state); - IR::Block block{location}; - const bool should_continue = A32::TranslateSingleInstruction(block, location, instruction); - - if (!should_continue && !is_last_inst) { - return false; - } - +bool ShouldTestInst(IR::Block& block) { if (auto terminal = block.GetTerminal(); boost::get(&terminal)) { return false; } for (const auto& ir_inst : block) { switch (ir_inst.GetOpcode()) { + // A32 case IR::Opcode::A32GetFpscr: case IR::Opcode::A32ExceptionRaised: case IR::Opcode::A32CallSupervisor: @@ -61,6 +59,11 @@ bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A case IR::Opcode::A32CoprocGetTwoWords: case IR::Opcode::A32CoprocLoadWords: case IR::Opcode::A32CoprocStoreWords: + // A64 + case IR::Opcode::A64ExceptionRaised: + case IR::Opcode::A64CallSupervisor: + case IR::Opcode::A64DataCacheOperationRaised: + case IR::Opcode::A64GetCNTPCT: // Half-precision case IR::Opcode::FPVectorAbs16: case IR::Opcode::FPVectorEqual16: @@ -84,6 +87,30 @@ bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A return true; } +bool ShouldTestA32Inst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) { + const A32::LocationDescriptor location = A32::LocationDescriptor{pc, {}, {}}.SetTFlag(is_thumb).SetIT(it_state); + IR::Block block{location}; + const bool should_continue = A32::TranslateSingleInstruction(block, location, instruction); + + if (!should_continue && !is_last_inst) { + return false; + } + + return ShouldTestInst(block); +} + +bool ShouldTestA64Inst(u32 instruction, u32 pc, bool is_last_inst) { + const A64::LocationDescriptor location = A64::LocationDescriptor{pc, {}}; + IR::Block block{location}; + const bool should_continue = A64::TranslateSingleInstruction(block, location, instruction); + + if (!should_continue && !is_last_inst) { + return false; + } + + return ShouldTestInst(block); +} + u32 GenRandomArmInst(u32 pc, bool is_last_inst) { static const struct InstructionGeneratorInfo { std::vector generators; @@ -144,7 +171,7 @@ u32 GenRandomArmInst(u32 pc, bool is_last_inst) { continue; } - if (ShouldTestInst(inst, pc, false, is_last_inst)) { + if (ShouldTestA32Inst(inst, pc, false, is_last_inst)) { return inst; } } @@ -245,7 +272,7 @@ std::vector GenRandomThumbInst(u32 pc, bool is_last_inst, A32::ITState it_s const u32 inst = instructions.generators[index].Generate(); const bool is_four_bytes = (inst >> 16) != 0; - if (ShouldTestInst(is_four_bytes ? mcl::bit::swap_halves_32(inst) : inst, pc, true, is_last_inst, it_state)) { + if (ShouldTestA32Inst(is_four_bytes ? mcl::bit::swap_halves_32(inst) : inst, pc, true, is_last_inst, it_state)) { if (is_four_bytes) return {static_cast(inst >> 16), static_cast(inst)}; return {static_cast(inst)}; @@ -253,8 +280,65 @@ std::vector GenRandomThumbInst(u32 pc, bool is_last_inst, A32::ITState it_s } } +u32 GenRandomA64Inst(u64 pc, bool is_last_inst) { + static const struct InstructionGeneratorInfo { + std::vector generators; + std::vector invalid; + } instructions = [] { + const std::vector> list{ +#define INST(fn, name, bitstring) {#fn, bitstring}, +#include "dynarmic/frontend/A64/decoder/a64.inc" +#undef INST + }; + + std::vector generators; + std::vector invalid; + + // List of instructions not to test + const std::vector do_not_test{ + // Dynarmic and QEMU currently differ on how the exclusive monitor's address range works. + "STXR", + "STLXR", + "STXP", + "STLXP", + "LDXR", + "LDAXR", + "LDXP", + "LDAXP", + // Behaviour differs from QEMU + "MSR_reg", + "MSR_imm", + "MRS", + }; + + for (const auto& [fn, bitstring] : list) { + if (fn == "UnallocatedEncoding") { + continue; + } + if (std::find(do_not_test.begin(), do_not_test.end(), fn) != do_not_test.end()) { + invalid.emplace_back(InstructionGenerator{bitstring}); + continue; + } + generators.emplace_back(InstructionGenerator{bitstring}); + } + return InstructionGeneratorInfo{generators, invalid}; + }(); + + while (true) { + const size_t index = RandInt(0, instructions.generators.size() - 1); + const u32 inst = instructions.generators[index].Generate(); + + if (std::any_of(instructions.invalid.begin(), instructions.invalid.end(), [inst](const auto& invalid) { return invalid.Match(inst); })) { + continue; + } + if (ShouldTestA64Inst(inst, pc, is_last_inst)) { + return inst; + } + } +} + template -Dynarmic::A32::UserConfig GetUserConfig(TestEnv& testenv) { +Dynarmic::A32::UserConfig GetA32UserConfig(TestEnv& testenv) { Dynarmic::A32::UserConfig user_config; user_config.optimizations &= ~OptimizationFlag::FastDispatch; user_config.callbacks = &testenv; @@ -262,14 +346,14 @@ Dynarmic::A32::UserConfig GetUserConfig(TestEnv& testenv) { } template -static void RunTestInstance(Dynarmic::A32::Jit& jit, - TestEnv& jit_env, - const std::array& regs, - const std::array& vecs, - const std::vector& instructions, - const u32 cpsr, - const u32 fpscr, - const size_t ticks_left) { +void RunTestInstance(Dynarmic::A32::Jit& jit, + TestEnv& jit_env, + const std::array& regs, + const std::array& vecs, + const std::vector& instructions, + const u32 cpsr, + const u32 fpscr, + const size_t ticks_left) { const u32 initial_pc = regs[15]; const u32 num_words = initial_pc / sizeof(typename TestEnv::InstructionType); const u32 code_mem_size = num_words + static_cast(instructions.size()); @@ -294,37 +378,37 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit, jit.Run(); } - fmt::print("instructions: "); + fmt::print("instructions:"); for (auto instruction : instructions) { if constexpr (sizeof(decltype(instruction)) == 2) { - fmt::print("{:04x} ", instruction); + fmt::print(" {:04x}", instruction); } else { - fmt::print("{:08x} ", instruction); + fmt::print(" {:08x}", instruction); } } fmt::print("\n"); - fmt::print("initial_regs: "); + fmt::print("initial_regs:"); for (u32 i : regs) { - fmt::print("{:08x} ", i); + fmt::print(" {:08x}", i); } fmt::print("\n"); - fmt::print("initial_vecs: "); + fmt::print("initial_vecs:"); for (u32 i : vecs) { - fmt::print("{:08x} ", i); + fmt::print(" {:08x}", i); } fmt::print("\n"); fmt::print("initial_cpsr: {:08x}\n", cpsr); fmt::print("initial_fpcr: {:08x}\n", fpscr); - fmt::print("final_regs: "); + fmt::print("final_regs:"); for (u32 i : jit.Regs()) { - fmt::print("{:08x} ", i); + fmt::print(" {:08x}", i); } fmt::print("\n"); - fmt::print("final_vecs: "); + fmt::print("final_vecs:"); for (u32 i : jit.ExtRegs()) { - fmt::print("{:08x} ", i); + fmt::print(" {:08x}", i); } fmt::print("\n"); fmt::print("final_cpsr: {:08x}\n", jit.Cpsr()); @@ -343,11 +427,104 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit, fmt::print("===\n"); } + +Dynarmic::A64::UserConfig GetA64UserConfig(A64TestEnv& jit_env) { + Dynarmic::A64::UserConfig jit_user_config{&jit_env}; + jit_user_config.optimizations &= ~OptimizationFlag::FastDispatch; + // The below corresponds to the settings for qemu's aarch64_max_initfn + jit_user_config.dczid_el0 = 7; + jit_user_config.ctr_el0 = 0x80038003; + return jit_user_config; +} + +template +void RunTestInstance(Dynarmic::A64::Jit& jit, + A64TestEnv& jit_env, + const std::array& regs, + const std::array, 32>& vecs, + const std::vector& instructions, + const u32 pstate, + const u32 fpcr, + const u64 initial_sp, + const u64 start_address, + const size_t ticks_left) { + jit.ClearCache(); + + for (size_t jit_rerun_count = 0; jit_rerun_count < num_jit_reruns; ++jit_rerun_count) { + jit_env.code_mem = instructions; + jit_env.code_mem.emplace_back(0x14000000); // B . + jit_env.code_mem_start_address = start_address; + jit_env.modified_memory.clear(); + jit_env.interrupts.clear(); + + jit.SetRegisters(regs); + jit.SetVectors(vecs); + jit.SetPC(start_address); + jit.SetSP(initial_sp); + jit.SetFpcr(fpcr); + jit.SetFpsr(0); + jit.SetPstate(pstate); + jit.ClearCache(); + + jit_env.ticks_left = ticks_left; + jit.Run(); + } + + fmt::print("instructions:"); + for (u32 instruction : instructions) { + fmt::print(" {:08x}", instruction); + } + fmt::print("\n"); + + fmt::print("initial_regs:"); + for (u64 i : regs) { + fmt::print(" {:016x}", i); + } + fmt::print("\n"); + fmt::print("initial_vecs:"); + for (auto i : vecs) { + fmt::print(" {:016x}:{:016x}", i[0], i[1]); + } + fmt::print("\n"); + fmt::print("initial_sp: {:016x}\n", initial_sp); + fmt::print("initial_pstate: {:08x}\n", pstate); + fmt::print("initial_fpcr: {:08x}\n", fpcr); + + fmt::print("final_regs:"); + for (u64 i : jit.GetRegisters()) { + fmt::print(" {:016x}", i); + } + fmt::print("\n"); + fmt::print("final_vecs:"); + for (auto i : jit.GetVectors()) { + fmt::print(" {:016x}:{:016x}", i[0], i[1]); + } + fmt::print("\n"); + fmt::print("final_sp: {:016x}\n", jit.GetSP()); + fmt::print("final_pc: {:016x}\n", jit.GetPC()); + fmt::print("final_pstate: {:08x}\n", jit.GetPstate()); + fmt::print("final_fpcr: {:08x}\n", jit.GetFpcr()); + fmt::print("final_qc : {}\n", FP::FPSR{jit.GetFpsr()}.QC()); + + fmt::print("mod_mem:"); + for (auto [addr, value] : jit_env.modified_memory) { + fmt::print(" {:08x}:{:02x}", addr, value); + } + fmt::print("\n"); + + fmt::print("interrupts:\n"); + for (const auto& i : jit_env.interrupts) { + std::puts(i.c_str()); + } + + fmt::print("===\n"); +} + } // Anonymous namespace void TestThumb(size_t num_instructions, size_t num_iterations) { ThumbTestEnv jit_env{}; - Dynarmic::A32::Jit jit{GetUserConfig(jit_env)}; + Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env)}; std::array regs; std::array ext_reg; @@ -374,7 +551,7 @@ void TestThumb(size_t num_instructions, size_t num_iterations) { void TestArm(size_t num_instructions, size_t num_iterations) { ArmTestEnv jit_env{}; - Dynarmic::A32::Jit jit{GetUserConfig(jit_env)}; + Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env)}; std::array regs; std::array ext_reg; @@ -394,13 +571,43 @@ void TestArm(size_t num_instructions, size_t num_iterations) { } regs[15] = start_address; - RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, 1); + RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, num_instructions); + } +} + +void TestA64(size_t num_instructions, size_t num_iterations) { + A64TestEnv jit_env{}; + Dynarmic::A64::Jit jit{GetA64UserConfig(jit_env)}; + + std::array regs; + std::array, 32> vecs; + std::vector instructions; + + for (size_t iteration = 0; iteration < num_iterations; ++iteration) { + std::generate(regs.begin(), regs.end(), [] { return RandInt(0, ~u64(0)); }); + std::generate(vecs.begin(), vecs.end(), RandomVector); + + const u32 start_address = 100; + const u32 pstate = (RandInt(0, 0xF) << 28); + const u32 fpcr = RandomFpcr(); + const u64 initial_sp = RandInt(0x30'0000'0000, 0x40'0000'0000) * 4; + + instructions.clear(); + for (size_t i = 0; i < num_instructions; ++i) { + instructions.emplace_back(GenRandomA64Inst(static_cast(start_address + 4 * instructions.size()), i == num_instructions - 1)); + } + + RunTestInstance(jit, jit_env, regs, vecs, instructions, pstate, fpcr, initial_sp, start_address, num_instructions); } } int main(int, char*[]) { detail::g_rand_int_generator.seed(42069); + TestThumb(1, 1); + TestArm(1, 1); + TestA64(1, 1); + TestThumb(1, 100000); TestArm(1, 100000); TestThumb(5, 100000); From b26588123e1402298c02e68e2f0f0dda03d4ce4b Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 26 Nov 2022 16:48:57 +0000 Subject: [PATCH 27/47] a64_interface: Remove jit_interface member --- src/dynarmic/backend/arm64/a64_interface.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_interface.cpp b/src/dynarmic/backend/arm64/a64_interface.cpp index 90b5d925..0698172c 100644 --- a/src/dynarmic/backend/arm64/a64_interface.cpp +++ b/src/dynarmic/backend/arm64/a64_interface.cpp @@ -23,9 +23,8 @@ namespace Dynarmic::A64 { using namespace Backend::Arm64; struct Jit::Impl final { - Impl(Jit* jit_interface, A64::UserConfig conf) - : jit_interface(jit_interface) - , conf(conf) + Impl(Jit*, A64::UserConfig conf) + : conf(conf) , current_address_space(conf) , core(conf) {} @@ -178,7 +177,6 @@ private: } } - Jit* jit_interface; A64::UserConfig conf; A64JitState current_state{}; A64AddressSpace current_address_space; From 3fd19aac99e7b416a33eef4eb5051dbf13fbaa90 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 26 Nov 2022 16:49:21 +0000 Subject: [PATCH 28/47] emit_arm64_floating_point: Implement ToOdd for FPDoubleToSingle --- .../backend/arm64/emit_arm64_floating_point.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/dynarmic/backend/arm64/emit_arm64_floating_point.cpp b/src/dynarmic/backend/arm64/emit_arm64_floating_point.cpp index 57b45807..6d85cd23 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_floating_point.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_floating_point.cpp @@ -575,6 +575,20 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + const auto rounding_mode = static_cast(inst->GetArg(1).GetU8()); + + if (rounding_mode == FP::RoundingMode::ToOdd) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Sto = ctx.reg_alloc.WriteS(inst); + auto Dfrom = ctx.reg_alloc.ReadD(args[0]); + RegAlloc::Realize(Sto, Dfrom); + ctx.fpsr.Load(); + + code.FCVTXN(Sto, Dfrom); + + return; + } + EmitConvert<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Dfrom) { code.FCVT(Sto, Dfrom); }); } From 01a9a12c84297bd828e0b84864778e4f2b4b7c24 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 26 Nov 2022 16:49:42 +0000 Subject: [PATCH 29/47] test_generator: Filter out for unimplemented IR instructions --- tests/test_generator.cpp | 41 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/test_generator.cpp b/tests/test_generator.cpp index 612246af..d54d719c 100644 --- a/tests/test_generator.cpp +++ b/tests/test_generator.cpp @@ -64,7 +64,48 @@ bool ShouldTestInst(IR::Block& block) { case IR::Opcode::A64CallSupervisor: case IR::Opcode::A64DataCacheOperationRaised: case IR::Opcode::A64GetCNTPCT: + // Unimplemented + case IR::Opcode::SignedSaturatedAdd8: + case IR::Opcode::SignedSaturatedAdd16: + case IR::Opcode::SignedSaturatedAdd32: + case IR::Opcode::SignedSaturatedAdd64: + case IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh16: + case IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh32: + case IR::Opcode::SignedSaturatedSub8: + case IR::Opcode::SignedSaturatedSub16: + case IR::Opcode::SignedSaturatedSub32: + case IR::Opcode::SignedSaturatedSub64: + case IR::Opcode::UnsignedSaturatedAdd8: + case IR::Opcode::UnsignedSaturatedAdd16: + case IR::Opcode::UnsignedSaturatedAdd32: + case IR::Opcode::UnsignedSaturatedAdd64: + case IR::Opcode::UnsignedSaturatedSub8: + case IR::Opcode::UnsignedSaturatedSub16: + case IR::Opcode::UnsignedSaturatedSub32: + case IR::Opcode::UnsignedSaturatedSub64: + case IR::Opcode::VectorMaxS64: + case IR::Opcode::VectorMaxU64: + case IR::Opcode::VectorMinS64: + case IR::Opcode::VectorMinU64: + case IR::Opcode::VectorMultiply64: + case IR::Opcode::SM4AccessSubstitutionBox: + // Half-prec conversions + case IR::Opcode::FPHalfToFixedS16: + case IR::Opcode::FPHalfToFixedS32: + case IR::Opcode::FPHalfToFixedS64: + case IR::Opcode::FPHalfToFixedU16: + case IR::Opcode::FPHalfToFixedU32: + case IR::Opcode::FPHalfToFixedU64: // Half-precision + case IR::Opcode::FPAbs16: + case IR::Opcode::FPMulAdd16: + case IR::Opcode::FPNeg16: + case IR::Opcode::FPRecipEstimate16: + case IR::Opcode::FPRecipExponent16: + case IR::Opcode::FPRecipStepFused16: + case IR::Opcode::FPRoundInt16: + case IR::Opcode::FPRSqrtEstimate16: + case IR::Opcode::FPRSqrtStepFused16: case IR::Opcode::FPVectorAbs16: case IR::Opcode::FPVectorEqual16: case IR::Opcode::FPVectorMulAdd16: From e74e03010b9029f4edea303764827a474bd831e2 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 26 Nov 2022 16:50:00 +0000 Subject: [PATCH 30/47] [TEST] test_generator: Test A64 --- tests/test_generator.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/test_generator.cpp b/tests/test_generator.cpp index d54d719c..91bbdbee 100644 --- a/tests/test_generator.cpp +++ b/tests/test_generator.cpp @@ -649,12 +649,14 @@ int main(int, char*[]) { TestArm(1, 1); TestA64(1, 1); - TestThumb(1, 100000); - TestArm(1, 100000); - TestThumb(5, 100000); - TestArm(5, 100000); - TestThumb(1024, 10000); - TestArm(1024, 10000); + TestA64(1, 10000); + + // TestThumb(1, 100000); + // TestArm(1, 100000); + // TestThumb(5, 100000); + // TestArm(5, 100000); + // TestThumb(1024, 10000); + // TestArm(1024, 10000); return 0; } From f7a092c06bd89906002adf971852d5a87f910238 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 26 Nov 2022 17:41:31 +0000 Subject: [PATCH 31/47] emit_arm64_vector: Swap arguments of EmitSaturatedAccumulate --- src/dynarmic/backend/arm64/emit_arm64_vector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp index 803f7b55..4024299e 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp @@ -201,8 +201,8 @@ static void EmitThreeOpArrangedLower(oaknut::CodeGenerator& code, EmitContext& c template static void EmitSaturatedAccumulate(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Qaccumulator = ctx.reg_alloc.ReadWriteQ(args[0], inst); - auto Qoperand = ctx.reg_alloc.ReadQ(args[1]); + auto Qaccumulator = ctx.reg_alloc.ReadWriteQ(args[1], inst); // NB: Swapped + auto Qoperand = ctx.reg_alloc.ReadQ(args[0]); // NB: Swapped RegAlloc::Realize(Qaccumulator, Qoperand); ctx.fpsr.Load(); From 26cef90d8191c7f1613230cf7a7a1c8bd7dd3ee3 Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 27 Nov 2022 14:05:26 +0000 Subject: [PATCH 32/47] reg_alloc: Q0 is scratch and needs to be moved --- src/dynarmic/backend/arm64/emit_arm64.cpp | 2 +- src/dynarmic/backend/arm64/emit_arm64_a32.cpp | 6 +-- .../arm64/emit_arm64_a32_coprocessor.cpp | 3 +- .../backend/arm64/emit_arm64_a32_memory.cpp | 11 +++-- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 13 +++--- .../backend/arm64/emit_arm64_a64_memory.cpp | 41 ++++++++++++++++--- src/dynarmic/backend/arm64/reg_alloc.cpp | 22 ++++++---- src/dynarmic/backend/arm64/reg_alloc.h | 8 ++-- 8 files changed, 72 insertions(+), 34 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index 2680c93c..065f2184 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -40,7 +40,7 @@ template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr, args[1], args[2], args[3]); + ctx.reg_alloc.PrepareForCall(args[1], args[2], args[3]); code.MOV(Xscratch0, args[0].GetImmediateU64()); code.BLR(Xscratch0); } diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp index f8fc5eee..3d43bcb7 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp @@ -555,7 +555,7 @@ void EmitIR(oaknut::CodeGenerator& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -576,7 +576,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCont template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -611,7 +611,7 @@ void EmitIR(oaknut::CodeGenera return; } - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised); } diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp index 6f2f75c7..746757e9 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp @@ -24,7 +24,7 @@ static void EmitCoprocessorException() { } static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional arg0 = {}, std::optional arg1 = {}) { - ctx.reg_alloc.PrepareForCall(inst, {}, arg0, arg1); + const auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, arg0, arg1); if (callback.user_arg) { code.MOV(X0, reinterpret_cast(*callback.user_arg)); @@ -32,6 +32,7 @@ static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A3 code.MOV(Xscratch0, reinterpret_cast(callback.function)); code.BLR(Xscratch0); + code.MOV(Xresult, X0); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp index e0bf558c..1cf5f774 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp @@ -25,18 +25,19 @@ static bool IsOrdered(IR::AccType acctype) { static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); EmitRelocation(code, ctx, fn); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + code.MOV(Xresult, X0); } static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); code.MOV(Wscratch0, 1); @@ -45,11 +46,12 @@ static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ct if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + code.MOV(Xresult, X0); } static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); if (ordered) { @@ -63,7 +65,7 @@ static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::I static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); oaknut::Label end; @@ -79,6 +81,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c code.DMB(oaknut::BarrierOp::ISH); } code.l(end); + code.MOV(Xresult, X0); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index 469fc5ea..00405a03 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -342,7 +342,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -363,7 +363,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCont template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -385,14 +385,14 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCon template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); EmitRelocation(code, ctx, LinkTarget::DataCacheOperationRaised); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr, {}, args[0], args[1]); + ctx.reg_alloc.PrepareForCall({}, args[0], args[1]); EmitRelocation(code, ctx, LinkTarget::InstructionCacheOperationRaised); } @@ -412,7 +412,7 @@ void EmitIR(oaknut::CodeGenera return; } - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised); } @@ -426,8 +426,9 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { // FIXME: AddTicks / GetTicksRemaining - ctx.reg_alloc.PrepareForCall(inst); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst); EmitRelocation(code, ctx, LinkTarget::GetCNTPCT); + code.MOV(Xresult, X0); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp index 38062956..55c185b7 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp @@ -25,18 +25,31 @@ static bool IsOrdered(IR::AccType acctype) { static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); EmitRelocation(code, ctx, fn); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + code.MOV(Xresult, X0); +} + +static void EmitReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Qresult = ctx.reg_alloc.PrepareForCallVec(inst, {}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.MOV(Qresult.B16(), Q0.B16()); } static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); code.MOV(Wscratch0, 1); @@ -45,11 +58,26 @@ static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ct if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + code.MOV(Xresult, X0); +} + +static void EmitExclusiveReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Qresult = ctx.reg_alloc.PrepareForCallVec(inst, {}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + code.MOV(Wscratch0, 1); + code.STRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state)); + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.MOV(Qresult.B16(), Q0.B16()); } static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); if (ordered) { @@ -63,7 +91,7 @@ static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::I static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); oaknut::Label end; @@ -79,6 +107,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c code.DMB(oaknut::BarrierOp::ISH); } code.l(end); + code.MOV(Xresult, X0); } template<> @@ -108,7 +137,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContex template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory128); + EmitReadMemory128(code, ctx, inst, LinkTarget::ReadMemory128); } template<> @@ -133,7 +162,7 @@ void EmitIR(oaknut::CodeGenerator& code, E template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory128); + EmitExclusiveReadMemory128(code, ctx, inst, LinkTarget::ExclusiveReadMemory128); } template<> diff --git a/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/backend/arm64/reg_alloc.cpp index 2dff0080..e40216c9 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -138,7 +138,7 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const { return !!ValueLocation(inst); } -void RegAlloc::PrepareForCall(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { +void RegAlloc::PrepareForCall(std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { fpsr_manager.Spill(); SpillFlags(); @@ -180,14 +180,20 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optionalGetType() == IR::Type::U128) { - DefineAsRegister(result, Q0); - } else { - DefineAsRegister(result, X0); - } - } +oaknut::XReg RegAlloc::PrepareForCallReg(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { + PrepareForCall(arg0, arg1, arg2, arg3); + ASSERT(result && result->GetType() != IR::Type::U128); + DefineAsRegister(result, X0); + return X0; +} + +oaknut::QReg RegAlloc::PrepareForCallVec(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { + PrepareForCall(arg0, arg1, arg2, arg3); + ASSERT(result && result->GetType() == IR::Type::U128); + DefineAsRegister(result, Q8); + return Q8; } void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) { diff --git a/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/backend/arm64/reg_alloc.h index c8560e25..4d5c3fe7 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/backend/arm64/reg_alloc.h @@ -271,11 +271,9 @@ public: } } - void PrepareForCall(IR::Inst* result = nullptr, - std::optional arg0 = {}, - std::optional arg1 = {}, - std::optional arg2 = {}, - std::optional arg3 = {}); + void PrepareForCall(std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); + oaknut::XReg PrepareForCallReg(IR::Inst* result, std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); + oaknut::QReg PrepareForCallVec(IR::Inst* result, std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); void DefineAsExisting(IR::Inst* inst, Argument& arg); void DefineAsRegister(IR::Inst* inst, oaknut::Reg reg); From 890deb17ce98065778182b4201cffbba483d25b2 Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 27 Nov 2022 14:08:29 +0000 Subject: [PATCH 33/47] test_generator: Expose interface --- .github/workflows/aarch64.yml | 21 ++++++++++++-- tests/test_generator.cpp | 52 +++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/.github/workflows/aarch64.yml b/.github/workflows/aarch64.yml index 38a794bb..e31dddde 100644 --- a/.github/workflows/aarch64.yml +++ b/.github/workflows/aarch64.yml @@ -79,6 +79,23 @@ jobs: working-directory: ${{github.workspace}} run: qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_tests -d yes - - name: Test against x86_64 implementation + - name: Test against x86_64 implementation (A32, thumb) working-directory: ${{github.workspace}} - run: diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator) <(./build-x64/tests/dynarmic_test_generator) + run: | + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 1 100000) <(./build-x64/tests/dynarmic_test_generator thumb 42 1 100000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 10 10000) <(./build-x64/tests/dynarmic_test_generator thumb 42 10 10000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 100 1000) <(./build-x64/tests/dynarmic_test_generator thumb 42 100 1000) + + - name: Test against x86_64 implementation (A32, arm) + working-directory: ${{github.workspace}} + run: | + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 1 100000) <(./build-x64/tests/dynarmic_test_generator arm 42 1 100000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 10 10000) <(./build-x64/tests/dynarmic_test_generator arm 42 10 10000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 100 1000) <(./build-x64/tests/dynarmic_test_generator arm 42 100 1000) + + - name: Test against x86_64 implementation (A64) + working-directory: ${{github.workspace}} + run: | + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 1 100000) <(./build-x64/tests/dynarmic_test_generator a64 42 1 100000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 10 10000) <(./build-x64/tests/dynarmic_test_generator a64 42 10 10000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 100 1000) <(./build-x64/tests/dynarmic_test_generator a64 42 100 1000) diff --git a/tests/test_generator.cpp b/tests/test_generator.cpp index 91bbdbee..c0d7d6e4 100644 --- a/tests/test_generator.cpp +++ b/tests/test_generator.cpp @@ -6,7 +6,10 @@ #include #include #include +#include #include +#include +#include #include #include @@ -642,21 +645,46 @@ void TestA64(size_t num_instructions, size_t num_iterations) { } } -int main(int, char*[]) { - detail::g_rand_int_generator.seed(42069); +static std::optional str2sz(char const* s) { + char* end = nullptr; + errno = 0; - TestThumb(1, 1); - TestArm(1, 1); - TestA64(1, 1); + const long l = std::strtol(s, &end, 10); + if (errno == ERANGE || l > std::numeric_limits::max() || l < 0) { + return std::nullopt; + } + if (*s == '\0' || *end != '\0') { + return std::nullopt; + } + return static_cast(l); +} - TestA64(1, 10000); +int main(int argc, char* argv[]) { + if (argc != 5) { + fmt::print("Usage: {} \n", argv[0]); + } - // TestThumb(1, 100000); - // TestArm(1, 100000); - // TestThumb(5, 100000); - // TestArm(5, 100000); - // TestThumb(1024, 10000); - // TestArm(1024, 10000); + const auto seed = str2sz(argv[2]); + const auto instruction_count = str2sz(argv[3]); + const auto iterator_count = str2sz(argv[4]); + + if (!seed || !instruction_count || !iterator_count) { + fmt::print("invalid numeric arguments\n"); + return 1; + } + + detail::g_rand_int_generator.seed(*seed); + + if (strcmp(argv[1], "thumb") == 0) { + TestThumb(*instruction_count, *iterator_count); + } else if (strcmp(argv[1], "arm") == 0) { + TestArm(*instruction_count, *iterator_count); + } else if (strcmp(argv[1], "a64") == 0) { + TestA64(*instruction_count, *iterator_count); + } else { + fmt::print("unrecognized instruction class\n"); + return 1; + } return 0; } From bcb5948ea2cdd99c2eb6fb5ad6c8552b64427ec5 Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 27 Nov 2022 14:15:06 +0000 Subject: [PATCH 34/47] GetNZCVFromOp: Ensure NZ00 --- src/dynarmic/backend/arm64/emit_arm64.cpp | 4 ++-- src/dynarmic/backend/x64/emit_x64.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index 065f2184..625bb379 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -82,7 +82,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& auto flags = ctx.reg_alloc.WriteFlags(inst); RegAlloc::Realize(Wvalue, flags); - code.CMP(*Wvalue, WZR.toW()); + code.TST(*Wvalue, Wvalue); break; } case IR::Type::U64: { @@ -90,7 +90,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& auto flags = ctx.reg_alloc.WriteFlags(inst); RegAlloc::Realize(Xvalue, flags); - code.CMP(*Xvalue, XZR.toX()); + code.TST(*Xvalue, Xvalue); break; } default: diff --git a/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/backend/x64/emit_x64.cpp index c3a7c80b..62af1ad4 100644 --- a/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/backend/x64/emit_x64.cpp @@ -154,7 +154,7 @@ void EmitX64::EmitGetNZFromOp(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(HostLoc::RAX); const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); - code.cmp(value, 0); + code.test(value, value); code.lahf(); code.movzx(eax, ah); ctx.reg_alloc.DefineValue(inst, nz); @@ -180,9 +180,9 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX); const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); - code.cmp(value, 0); + code.test(value, value); code.lahf(); - code.seto(code.al); + code.mov(al, 0); ctx.reg_alloc.DefineValue(inst, nzcv); } From 3d6faf403b91767816be7b8ad8cf4159708ad9ca Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 27 Nov 2022 14:19:06 +0000 Subject: [PATCH 35/47] test_generator: Minor fixups --- tests/test_generator.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_generator.cpp b/tests/test_generator.cpp index c0d7d6e4..58d5c088 100644 --- a/tests/test_generator.cpp +++ b/tests/test_generator.cpp @@ -143,7 +143,7 @@ bool ShouldTestA32Inst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst return ShouldTestInst(block); } -bool ShouldTestA64Inst(u32 instruction, u32 pc, bool is_last_inst) { +bool ShouldTestA64Inst(u32 instruction, u64 pc, bool is_last_inst) { const A64::LocationDescriptor location = A64::LocationDescriptor{pc, {}}; IR::Block block{location}; const bool should_continue = A64::TranslateSingleInstruction(block, location, instruction); @@ -650,7 +650,7 @@ static std::optional str2sz(char const* s) { errno = 0; const long l = std::strtol(s, &end, 10); - if (errno == ERANGE || l > std::numeric_limits::max() || l < 0) { + if (errno == ERANGE || l < 0) { return std::nullopt; } if (*s == '\0' || *end != '\0') { @@ -673,7 +673,7 @@ int main(int argc, char* argv[]) { return 1; } - detail::g_rand_int_generator.seed(*seed); + detail::g_rand_int_generator.seed(static_cast(*seed)); if (strcmp(argv[1], "thumb") == 0) { TestThumb(*instruction_count, *iterator_count); From a3fc95204b717c6f75fbb850713ef13e9ecbd9cf Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 27 Nov 2022 14:20:31 +0000 Subject: [PATCH 36/47] fixup --- .github/workflows/aarch64.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/aarch64.yml b/.github/workflows/aarch64.yml index e31dddde..05d6d271 100644 --- a/.github/workflows/aarch64.yml +++ b/.github/workflows/aarch64.yml @@ -48,7 +48,6 @@ jobs: -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DDYNARMIC_TESTS_USE_UNICORN=0 -DDYNARMIC_USE_LLVM=0 - -DDYNARMIC_FRONTENDS=A32 -G Ninja - name: Build AArch64 @@ -66,7 +65,6 @@ jobs: -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - -DDYNARMIC_FRONTENDS=A32 -DDYNARMIC_TESTS_USE_UNICORN=0 -DDYNARMIC_USE_LLVM=0 -G Ninja From 59ccccdc26cf9a5cab791b1dd3f071a3b624615a Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 27 Nov 2022 18:28:55 +0000 Subject: [PATCH 37/47] fixup --- .../backend/arm64/emit_arm64_a32_coprocessor.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp index 746757e9..629c381c 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp @@ -24,7 +24,20 @@ static void EmitCoprocessorException() { } static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional arg0 = {}, std::optional arg1 = {}) { - const auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, arg0, arg1); + if (inst) { + const auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, arg0, arg1); + + if (callback.user_arg) { + code.MOV(X0, reinterpret_cast(*callback.user_arg)); + } + + code.MOV(Xscratch0, reinterpret_cast(callback.function)); + code.BLR(Xscratch0); + code.MOV(Xresult, X0); + return; + } + + ctx.reg_alloc.PrepareForCall({}, arg0, arg1); if (callback.user_arg) { code.MOV(X0, reinterpret_cast(*callback.user_arg)); @@ -32,7 +45,6 @@ static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A3 code.MOV(Xscratch0, reinterpret_cast(callback.function)); code.BLR(Xscratch0); - code.MOV(Xresult, X0); } template<> From 167ba85ce8ac1ad38e702843d317da5a245b9a0e Mon Sep 17 00:00:00 2001 From: Merry Date: Mon, 28 Nov 2022 21:50:35 +0000 Subject: [PATCH 38/47] emit_arm64_a64: Implement A64GetCNTPCT --- src/dynarmic/backend/arm64/a32_address_space.cpp | 1 + src/dynarmic/backend/arm64/a64_address_space.cpp | 1 + src/dynarmic/backend/arm64/emit_arm64.h | 1 + src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 9 ++++++++- 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index 59c9193e..208a033e 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -311,6 +311,7 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { .is_a64 = false, .hook_isb = conf.hook_isb, .enable_cycle_counting = conf.enable_cycle_counting, + .wall_clock_cntpct = conf.wall_clock_cntpct, .always_little_endian = conf.always_little_endian, .descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; }, .state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv), diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index e6045ef8..72b12b6d 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -441,6 +441,7 @@ EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) { .is_a64 = true, .hook_isb = conf.hook_isb, .enable_cycle_counting = conf.enable_cycle_counting, + .wall_clock_cntpct = conf.wall_clock_cntpct, .always_little_endian = true, .descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); }, .state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv), diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index b22bf810..ee3fc03a 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -98,6 +98,7 @@ struct EmitConfig { bool is_a64; bool hook_isb; bool enable_cycle_counting; + bool wall_clock_cntpct; bool always_little_endian; FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor); diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index 00405a03..44c39096 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -425,8 +425,15 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - // FIXME: AddTicks / GetTicksRemaining auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst); + if (!ctx.conf.wall_clock_cntpct && ctx.conf.enable_cycle_counting) { + code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run)); + code.SUB(X1, X1, Xticks); + EmitRelocation(code, ctx, LinkTarget::AddTicks); + EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining); + code.STR(X0, SP, offsetof(StackLayout, cycles_to_run)); + code.MOV(Xticks, X0); + } EmitRelocation(code, ctx, LinkTarget::GetCNTPCT); code.MOV(Xresult, X0); } From 0707aa3a044d49b61b6c3bcacc6111feefcb0691 Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 29 Nov 2022 12:03:06 +0000 Subject: [PATCH 39/47] emit_arm64: Remove is_a64 --- .../backend/arm64/a32_address_space.cpp | 25 ++++++++----- .../backend/arm64/a64_address_space.cpp | 17 ++++++--- src/dynarmic/backend/arm64/emit_arm64.cpp | 19 +++------- src/dynarmic/backend/arm64/emit_arm64.h | 36 ++++++++++++------- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 4 +-- .../emit_arm64_vector_floating_point.cpp | 14 +++----- 6 files changed, 63 insertions(+), 52 deletions(-) diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index 208a033e..a7a8a7a8 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -303,21 +303,30 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { mem.unprotect(); const EmitConfig emit_conf{ - .tpidr_el0{}, - .tpidrro_el0{}, - .cntfreq_el0{}, - .dczid_el0{}, - .ctr_el0{}, - .is_a64 = false, + .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, + .hook_isb = conf.hook_isb, - .enable_cycle_counting = conf.enable_cycle_counting, + + .cntfreq_el0{}, + .ctr_el0{}, + .dczid_el0{}, + .tpidrro_el0{}, + .tpidr_el0{}, + .wall_clock_cntpct = conf.wall_clock_cntpct, + .enable_cycle_counting = conf.enable_cycle_counting, + .always_little_endian = conf.always_little_endian, + .descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; }, + .emit_cond = EmitA32Cond, + .emit_condition_failed_terminal = EmitA32ConditionFailedTerminal, + .emit_terminal = EmitA32Terminal, + .state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv), .state_fpsr_offset = offsetof(A32JitState, fpsr), + .coprocessors = conf.coprocessors, - .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, }; EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index 72b12b6d..3bb2b7cf 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -433,21 +433,30 @@ EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) { mem.unprotect(); const EmitConfig emit_conf{ + .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, + + .hook_isb = conf.hook_isb, + .tpidr_el0 = conf.tpidr_el0, .tpidrro_el0 = conf.tpidrro_el0, .cntfreq_el0 = conf.cntfrq_el0, .dczid_el0 = conf.dczid_el0, .ctr_el0 = conf.ctr_el0, - .is_a64 = true, - .hook_isb = conf.hook_isb, - .enable_cycle_counting = conf.enable_cycle_counting, + .wall_clock_cntpct = conf.wall_clock_cntpct, + .enable_cycle_counting = conf.enable_cycle_counting, + .always_little_endian = true, + .descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); }, + .emit_cond = EmitA64Cond, + .emit_condition_failed_terminal = EmitA64ConditionFailedTerminal, + .emit_terminal = EmitA64Terminal, + .state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv), .state_fpsr_offset = offsetof(A64JitState, fpsr), + .coprocessors{}, - .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, }; EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index 625bb379..ef553f2b 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -8,7 +8,6 @@ #include #include -#include "dynarmic/backend/arm64/a32_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_context.h" #include "dynarmic/backend/arm64/fpsr_manager.h" @@ -191,15 +190,9 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E ASSERT(ctx.block.HasConditionFailedLocation()); oaknut::Label pass; - if (conf.is_a64) { - pass = EmitA64Cond(code, ctx, ctx.block.GetCondition()); - EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount()); - EmitA64ConditionFailedTerminal(code, ctx); - } else { - pass = EmitA32Cond(code, ctx, ctx.block.GetCondition()); - EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount()); - EmitA32ConditionFailedTerminal(code, ctx); - } + pass = conf.emit_cond(code, ctx, ctx.block.GetCondition()); + EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount()); + conf.emit_condition_failed_terminal(code, ctx); code.l(pass); } @@ -238,11 +231,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E reg_alloc.AssertNoMoreUses(); EmitAddCycles(code, ctx, block.CycleCount()); - if (conf.is_a64) { - EmitA64Terminal(code, ctx); - } else { - EmitA32Terminal(code, ctx); - } + conf.emit_terminal(code, ctx); ebi.size = code.ptr() - ebi.entry_point; return ebi; diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index ee3fc03a..20d10f92 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -38,6 +38,8 @@ enum class Opcode; namespace Dynarmic::Backend::Arm64 { +struct EmitContext; + using CodePtr = std::byte*; enum class LinkTarget { @@ -90,31 +92,39 @@ struct EmittedBlockInfo { }; struct EmitConfig { - u64* tpidr_el0; - const u64* tpidrro_el0; - u64 cntfreq_el0; - u32 dczid_el0; - u32 ctr_el0; - bool is_a64; + OptimizationFlag optimizations; + bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; } + bool hook_isb; - bool enable_cycle_counting; + + // System registers + u64 cntfreq_el0; + u32 ctr_el0; + u32 dczid_el0; + const u64* tpidrro_el0; + u64* tpidr_el0; + + // Timing bool wall_clock_cntpct; + bool enable_cycle_counting; + + // Endianness bool always_little_endian; + // Frontend specific callbacks FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor); + oaknut::Label (*emit_cond)(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond); + void (*emit_condition_failed_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx); + void (*emit_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx); + // State offsets size_t state_nzcv_offset; size_t state_fpsr_offset; + // A32 specific std::array, 16> coprocessors{}; - - OptimizationFlag optimizations; - - bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; } }; -struct EmitContext; - EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf); template diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index 44c39096..a0f2fac9 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -19,8 +19,6 @@ namespace Dynarmic::Backend::Arm64 { using namespace oaknut::util; -static constexpr int nzcv_c_flag_shift = 29; - oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) { oaknut::Label pass; // TODO: Flags in host flags @@ -145,7 +143,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& c auto Wflag = ctx.reg_alloc.WriteW(inst); RegAlloc::Realize(Wflag); code.LDR(Wflag, Xstate, offsetof(A64JitState, cpsr_nzcv)); - code.AND(Wflag, Wflag, 1 << nzcv_c_flag_shift); + code.AND(Wflag, Wflag, 1 << 29); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp index ce9bab2f..d534c8b8 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp @@ -258,7 +258,7 @@ void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) }); } -template +template static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::QReg Qresult, oaknut::QReg Qarg1, Lambda lambda, bool fpcr_controlled) { const auto fn = static_cast*>(lambda); @@ -274,7 +274,7 @@ static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitCo code.ADD(X0, Xscratch0, 0 * 16); code.ADD(X1, Xscratch0, 1 * 16); code.MOV(X2, fpcr); - code.ADD(X3, Xstate, offsetof(JitState, fpsr)); + code.ADD(X3, Xstate, ctx.conf.state_fpsr_offset); code.STR(Qarg1, X1); code.BLR(Xscratch1); @@ -289,7 +289,7 @@ static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitCo code.l(end); } -template +template static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]); @@ -300,7 +300,7 @@ static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR: ctx.fpsr.Spill(); const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1(); - EmitTwoOpFallbackWithoutRegAlloc(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled); + EmitTwoOpFallbackWithoutRegAlloc(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled); } template<> @@ -592,11 +592,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCon }, mp::cartesian_product{}); - if (ctx.conf.is_a64) { - EmitTwoOpFallback<3, A64JitState>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact))); - } else { - EmitTwoOpFallback<3, A32JitState>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact))); - } + EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact))); } template<> From 8f9d1dbf4ee8c2beca046db92bf6111a9e1638d3 Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 29 Nov 2022 12:22:45 +0000 Subject: [PATCH 40/47] address_space: Deduplicate {A32,A64}AddressSpace --- src/dynarmic/CMakeLists.txt | 2 + .../backend/arm64/a32_address_space.cpp | 173 +------------- .../backend/arm64/a32_address_space.h | 70 +----- .../backend/arm64/a64_address_space.cpp | 196 +--------------- .../backend/arm64/a64_address_space.h | 77 +------ src/dynarmic/backend/arm64/address_space.cpp | 213 ++++++++++++++++++ src/dynarmic/backend/arm64/address_space.h | 92 ++++++++ 7 files changed, 327 insertions(+), 496 deletions(-) create mode 100644 src/dynarmic/backend/arm64/address_space.cpp create mode 100644 src/dynarmic/backend/arm64/address_space.h diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 35335d70..992f3c4f 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -373,6 +373,8 @@ elseif(ARCHITECTURE STREQUAL "arm64") backend/arm64/a32_jitstate.h backend/arm64/abi.cpp backend/arm64/abi.h + backend/arm64/address_space.cpp + backend/arm64/address_space.h backend/arm64/devirtualize.h backend/arm64/emit_arm64.cpp backend/arm64/emit_arm64.h diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index a7a8a7a8..333d21fa 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -14,6 +14,7 @@ #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" +#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/ir/opt/passes.h" @@ -97,9 +98,8 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const } A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) - : conf(conf) - , mem(conf.code_cache_size) - , code(mem.ptr()) { + : AddressSpace(conf.code_cache_size) + , conf(conf) { EmitPrelude(); } @@ -121,33 +121,6 @@ IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { return ir_block; } -CodePtr A32AddressSpace::Get(IR::LocationDescriptor descriptor) { - if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) { - return iter->second; - } - return nullptr; -} - -CodePtr A32AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) { - if (CodePtr block_entry = Get(descriptor)) { - return block_entry; - } - - IR::Block ir_block = GenerateIR(descriptor); - const EmittedBlockInfo block_info = Emit(std::move(ir_block)); - - block_infos.insert_or_assign(descriptor.Value(), block_info); - block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point); - return block_info.entry_point; -} - -void A32AddressSpace::ClearCache() { - block_entries.clear(); - block_infos.clear(); - block_references.clear(); - code.set_ptr(prelude_info.end_of_prelude); -} - void A32AddressSpace::EmitPrelude() { using namespace oaknut::util; @@ -291,18 +264,8 @@ void A32AddressSpace::EmitPrelude() { mem.protect(); } -size_t A32AddressSpace::GetRemainingSize() { - return conf.code_cache_size - (code.ptr() - reinterpret_cast(mem.ptr())); -} - -EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { - if (GetRemainingSize() < 1024 * 1024) { - ClearCache(); - } - - mem.unprotect(); - - const EmitConfig emit_conf{ +EmitConfig A32AddressSpace::GetEmitConfig() { + return EmitConfig{ .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, .hook_isb = conf.hook_isb, @@ -328,132 +291,6 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { .coprocessors = conf.coprocessors, }; - EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); - - Link(block.Location(), block_info); - - mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); - - RelinkForDescriptor(block.Location()); - - mem.protect(); - - return block_info; -} - -static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector& block_relocations_list) { - using namespace oaknut; - using namespace oaknut::util; - - for (auto [ptr_offset] : block_relocations_list) { - CodeGenerator c{reinterpret_cast(entry_point + ptr_offset)}; - - if (target_ptr) { - c.B((void*)target_ptr); - } else { - c.NOP(); - } - } -} - -void A32AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) { - using namespace oaknut; - using namespace oaknut::util; - - for (auto [ptr_offset, target] : block_info.relocations) { - CodeGenerator c{reinterpret_cast(block_info.entry_point + ptr_offset)}; - - switch (target) { - case LinkTarget::ReturnToDispatcher: - c.B(prelude_info.return_to_dispatcher); - break; - case LinkTarget::ReturnFromRunCode: - c.B(prelude_info.return_from_run_code); - break; - case LinkTarget::ReadMemory8: - c.BL(prelude_info.read_memory_8); - break; - case LinkTarget::ReadMemory16: - c.BL(prelude_info.read_memory_16); - break; - case LinkTarget::ReadMemory32: - c.BL(prelude_info.read_memory_32); - break; - case LinkTarget::ReadMemory64: - c.BL(prelude_info.read_memory_64); - break; - case LinkTarget::ExclusiveReadMemory8: - c.BL(prelude_info.exclusive_read_memory_8); - break; - case LinkTarget::ExclusiveReadMemory16: - c.BL(prelude_info.exclusive_read_memory_16); - break; - case LinkTarget::ExclusiveReadMemory32: - c.BL(prelude_info.exclusive_read_memory_32); - break; - case LinkTarget::ExclusiveReadMemory64: - c.BL(prelude_info.exclusive_read_memory_64); - break; - case LinkTarget::WriteMemory8: - c.BL(prelude_info.write_memory_8); - break; - case LinkTarget::WriteMemory16: - c.BL(prelude_info.write_memory_16); - break; - case LinkTarget::WriteMemory32: - c.BL(prelude_info.write_memory_32); - break; - case LinkTarget::WriteMemory64: - c.BL(prelude_info.write_memory_64); - break; - case LinkTarget::ExclusiveWriteMemory8: - c.BL(prelude_info.exclusive_write_memory_8); - break; - case LinkTarget::ExclusiveWriteMemory16: - c.BL(prelude_info.exclusive_write_memory_16); - break; - case LinkTarget::ExclusiveWriteMemory32: - c.BL(prelude_info.exclusive_write_memory_32); - break; - case LinkTarget::ExclusiveWriteMemory64: - c.BL(prelude_info.exclusive_write_memory_64); - break; - case LinkTarget::CallSVC: - c.BL(prelude_info.call_svc); - break; - case LinkTarget::ExceptionRaised: - c.BL(prelude_info.exception_raised); - break; - case LinkTarget::InstructionSynchronizationBarrierRaised: - c.BL(prelude_info.isb_raised); - break; - case LinkTarget::AddTicks: - c.BL(prelude_info.add_ticks); - break; - case LinkTarget::GetTicksRemaining: - c.BL(prelude_info.get_ticks_remaining); - break; - default: - ASSERT_FALSE("Invalid relocation target"); - } - } - - for (auto [target_descriptor, list] : block_info.block_relocations) { - block_references[target_descriptor.Value()].emplace(block_descriptor.Value()); - LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list); - } -} - -void A32AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) { - for (auto block_descriptor : block_references[target_descriptor.Value()]) { - if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) { - const EmittedBlockInfo& block_info = iter->second; - - LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]); - - mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); - } - } } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a32_address_space.h b/src/dynarmic/backend/arm64/a32_address_space.h index 18ed118d..e33be9b0 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.h +++ b/src/dynarmic/backend/arm64/a32_address_space.h @@ -5,84 +5,24 @@ #pragma once -#include -#include -#include -#include -#include - -#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/backend/arm64/address_space.h" #include "dynarmic/interface/A32/config.h" -#include "dynarmic/interface/halt_reason.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/location_descriptor.h" namespace Dynarmic::Backend::Arm64 { -struct A32JitState; - -class A32AddressSpace final { +class A32AddressSpace final : public AddressSpace { public: explicit A32AddressSpace(const A32::UserConfig& conf); - IR::Block GenerateIR(IR::LocationDescriptor) const; + IR::Block GenerateIR(IR::LocationDescriptor) const override; - CodePtr Get(IR::LocationDescriptor descriptor); - - CodePtr GetOrEmit(IR::LocationDescriptor descriptor); - - void ClearCache(); - -private: +protected: friend class A32Core; void EmitPrelude(); - - size_t GetRemainingSize(); - EmittedBlockInfo Emit(IR::Block ir_block); - void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block); - void RelinkForDescriptor(IR::LocationDescriptor target_descriptor); + EmitConfig GetEmitConfig() override; const A32::UserConfig conf; - - oaknut::CodeBlock mem; - oaknut::CodeGenerator code; - - tsl::robin_map block_entries; - tsl::robin_map block_infos; - tsl::robin_map> block_references; - - struct PreludeInfo { - u32* end_of_prelude; - - using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, A32JitState* context, volatile u32* halt_reason); - RunCodeFuncType run_code; - RunCodeFuncType step_code; - void* return_to_dispatcher; - void* return_from_run_code; - - void* read_memory_8; - void* read_memory_16; - void* read_memory_32; - void* read_memory_64; - void* exclusive_read_memory_8; - void* exclusive_read_memory_16; - void* exclusive_read_memory_32; - void* exclusive_read_memory_64; - void* write_memory_8; - void* write_memory_16; - void* write_memory_32; - void* write_memory_64; - void* exclusive_write_memory_8; - void* exclusive_write_memory_16; - void* exclusive_write_memory_32; - void* exclusive_write_memory_64; - void* call_svc; - void* exception_raised; - void* isb_raised; - void* add_ticks; - void* get_ticks_remaining; - } prelude_info; }; } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index 3bb2b7cf..529ac245 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -14,6 +14,7 @@ #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/frontend/A64/a64_location_descriptor.h" #include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/ir/opt/passes.h" @@ -96,8 +97,6 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const return target; } -/* =========================== 128-bit versions =========================== */ - static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) { using namespace oaknut::util; @@ -214,9 +213,8 @@ static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, co } A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf) - : conf(conf) - , mem(conf.code_cache_size) - , code(mem.ptr()) { + : AddressSpace(conf.code_cache_size) + , conf(conf) { EmitPrelude(); } @@ -242,33 +240,6 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { return ir_block; } -CodePtr A64AddressSpace::Get(IR::LocationDescriptor descriptor) { - if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) { - return iter->second; - } - return nullptr; -} - -CodePtr A64AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) { - if (CodePtr block_entry = Get(descriptor)) { - return block_entry; - } - - IR::Block ir_block = GenerateIR(descriptor); - const EmittedBlockInfo block_info = Emit(std::move(ir_block)); - - block_infos.insert_or_assign(descriptor.Value(), block_info); - block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point); - return block_info.entry_point; -} - -void A64AddressSpace::ClearCache() { - block_entries.clear(); - block_infos.clear(); - block_references.clear(); - code.set_ptr(prelude_info.end_of_prelude); -} - void A64AddressSpace::EmitPrelude() { using namespace oaknut::util; @@ -421,18 +392,8 @@ void A64AddressSpace::EmitPrelude() { mem.protect(); } -size_t A64AddressSpace::GetRemainingSize() { - return conf.code_cache_size - (code.ptr() - reinterpret_cast(mem.ptr())); -} - -EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) { - if (GetRemainingSize() < 1024 * 1024) { - ClearCache(); - } - - mem.unprotect(); - - const EmitConfig emit_conf{ +EmitConfig A64AddressSpace::GetEmitConfig() { + return EmitConfig{ .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, .hook_isb = conf.hook_isb, @@ -458,153 +419,6 @@ EmittedBlockInfo A64AddressSpace::Emit(IR::Block block) { .coprocessors{}, }; - EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); - - Link(block.Location(), block_info); - - mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); - - RelinkForDescriptor(block.Location()); - - mem.protect(); - - return block_info; -} - -static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector& block_relocations_list) { - using namespace oaknut; - using namespace oaknut::util; - - for (auto [ptr_offset] : block_relocations_list) { - CodeGenerator c{reinterpret_cast(entry_point + ptr_offset)}; - - if (target_ptr) { - c.B((void*)target_ptr); - } else { - c.NOP(); - } - } -} - -void A64AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) { - using namespace oaknut; - using namespace oaknut::util; - - for (auto [ptr_offset, target] : block_info.relocations) { - CodeGenerator c{reinterpret_cast(block_info.entry_point + ptr_offset)}; - - switch (target) { - case LinkTarget::ReturnToDispatcher: - c.B(prelude_info.return_to_dispatcher); - break; - case LinkTarget::ReturnFromRunCode: - c.B(prelude_info.return_from_run_code); - break; - case LinkTarget::ReadMemory8: - c.BL(prelude_info.read_memory_8); - break; - case LinkTarget::ReadMemory16: - c.BL(prelude_info.read_memory_16); - break; - case LinkTarget::ReadMemory32: - c.BL(prelude_info.read_memory_32); - break; - case LinkTarget::ReadMemory64: - c.BL(prelude_info.read_memory_64); - break; - case LinkTarget::ReadMemory128: - c.BL(prelude_info.read_memory_128); - break; - case LinkTarget::ExclusiveReadMemory8: - c.BL(prelude_info.exclusive_read_memory_8); - break; - case LinkTarget::ExclusiveReadMemory16: - c.BL(prelude_info.exclusive_read_memory_16); - break; - case LinkTarget::ExclusiveReadMemory32: - c.BL(prelude_info.exclusive_read_memory_32); - break; - case LinkTarget::ExclusiveReadMemory64: - c.BL(prelude_info.exclusive_read_memory_64); - break; - case LinkTarget::ExclusiveReadMemory128: - c.BL(prelude_info.exclusive_read_memory_128); - break; - case LinkTarget::WriteMemory8: - c.BL(prelude_info.write_memory_8); - break; - case LinkTarget::WriteMemory16: - c.BL(prelude_info.write_memory_16); - break; - case LinkTarget::WriteMemory32: - c.BL(prelude_info.write_memory_32); - break; - case LinkTarget::WriteMemory64: - c.BL(prelude_info.write_memory_64); - break; - case LinkTarget::WriteMemory128: - c.BL(prelude_info.write_memory_128); - break; - case LinkTarget::ExclusiveWriteMemory8: - c.BL(prelude_info.exclusive_write_memory_8); - break; - case LinkTarget::ExclusiveWriteMemory16: - c.BL(prelude_info.exclusive_write_memory_16); - break; - case LinkTarget::ExclusiveWriteMemory32: - c.BL(prelude_info.exclusive_write_memory_32); - break; - case LinkTarget::ExclusiveWriteMemory64: - c.BL(prelude_info.exclusive_write_memory_64); - break; - case LinkTarget::ExclusiveWriteMemory128: - c.BL(prelude_info.exclusive_write_memory_128); - break; - case LinkTarget::CallSVC: - c.BL(prelude_info.call_svc); - break; - case LinkTarget::ExceptionRaised: - c.BL(prelude_info.exception_raised); - break; - case LinkTarget::InstructionSynchronizationBarrierRaised: - c.BL(prelude_info.isb_raised); - break; - case LinkTarget::InstructionCacheOperationRaised: - c.BL(prelude_info.ic_raised); - break; - case LinkTarget::DataCacheOperationRaised: - c.BL(prelude_info.dc_raised); - break; - case LinkTarget::GetCNTPCT: - c.BL(prelude_info.get_cntpct); - break; - case LinkTarget::AddTicks: - c.BL(prelude_info.add_ticks); - break; - case LinkTarget::GetTicksRemaining: - c.BL(prelude_info.get_ticks_remaining); - break; - default: - ASSERT_FALSE("Invalid relocation target"); - } - } - - for (auto [target_descriptor, list] : block_info.block_relocations) { - block_references[target_descriptor.Value()].emplace(block_descriptor.Value()); - LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list); - } -} - -void A64AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) { - for (auto block_descriptor : block_references[target_descriptor.Value()]) { - if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) { - const EmittedBlockInfo& block_info = iter->second; - - LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]); - - mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); - } - } } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_address_space.h b/src/dynarmic/backend/arm64/a64_address_space.h index 9ae78ad8..2eadf953 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.h +++ b/src/dynarmic/backend/arm64/a64_address_space.h @@ -5,91 +5,24 @@ #pragma once -#include -#include -#include -#include -#include - -#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/backend/arm64/address_space.h" #include "dynarmic/interface/A64/config.h" -#include "dynarmic/interface/halt_reason.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/location_descriptor.h" namespace Dynarmic::Backend::Arm64 { -struct A64JitState; - -class A64AddressSpace final { +class A64AddressSpace final : public AddressSpace { public: explicit A64AddressSpace(const A64::UserConfig& conf); - IR::Block GenerateIR(IR::LocationDescriptor) const; + IR::Block GenerateIR(IR::LocationDescriptor) const override; - CodePtr Get(IR::LocationDescriptor descriptor); - - CodePtr GetOrEmit(IR::LocationDescriptor descriptor); - - void ClearCache(); - -private: +protected: friend class A64Core; void EmitPrelude(); - - size_t GetRemainingSize(); - EmittedBlockInfo Emit(IR::Block ir_block); - void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block); - void RelinkForDescriptor(IR::LocationDescriptor target_descriptor); + EmitConfig GetEmitConfig() override; const A64::UserConfig conf; - - oaknut::CodeBlock mem; - oaknut::CodeGenerator code; - - tsl::robin_map block_entries; - tsl::robin_map block_infos; - tsl::robin_map> block_references; - - struct PreludeInfo { - u32* end_of_prelude; - - using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, A64JitState* context, volatile u32* halt_reason); - RunCodeFuncType run_code; - RunCodeFuncType step_code; - void* return_to_dispatcher; - void* return_from_run_code; - - void* read_memory_8; - void* read_memory_16; - void* read_memory_32; - void* read_memory_64; - void* read_memory_128; - void* exclusive_read_memory_8; - void* exclusive_read_memory_16; - void* exclusive_read_memory_32; - void* exclusive_read_memory_64; - void* exclusive_read_memory_128; - void* write_memory_8; - void* write_memory_16; - void* write_memory_32; - void* write_memory_64; - void* write_memory_128; - void* exclusive_write_memory_8; - void* exclusive_write_memory_16; - void* exclusive_write_memory_32; - void* exclusive_write_memory_64; - void* exclusive_write_memory_128; - void* call_svc; - void* exception_raised; - void* dc_raised; - void* ic_raised; - void* isb_raised; - void* get_cntpct; - void* add_ticks; - void* get_ticks_remaining; - } prelude_info; }; } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/address_space.cpp b/src/dynarmic/backend/arm64/address_space.cpp new file mode 100644 index 00000000..3aeaa453 --- /dev/null +++ b/src/dynarmic/backend/arm64/address_space.cpp @@ -0,0 +1,213 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/backend/arm64/a64_address_space.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" +#include "dynarmic/backend/arm64/abi.h" +#include "dynarmic/backend/arm64/devirtualize.h" +#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/backend/arm64/stack_layout.h" +#include "dynarmic/common/cast_util.h" +#include "dynarmic/common/fp/fpcr.h" +#include "dynarmic/interface/exclusive_monitor.h" +#include "dynarmic/ir/opt/passes.h" + +namespace Dynarmic::Backend::Arm64 { + +AddressSpace::AddressSpace(size_t code_cache_size) + : code_cache_size(code_cache_size) + , mem(code_cache_size) + , code(mem.ptr()) {} + +AddressSpace::~AddressSpace() = default; + +CodePtr AddressSpace::Get(IR::LocationDescriptor descriptor) { + if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) { + return iter->second; + } + return nullptr; +} + +CodePtr AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) { + if (CodePtr block_entry = Get(descriptor)) { + return block_entry; + } + + IR::Block ir_block = GenerateIR(descriptor); + const EmittedBlockInfo block_info = Emit(std::move(ir_block)); + + block_infos.insert_or_assign(descriptor.Value(), block_info); + block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point); + return block_info.entry_point; +} + +void AddressSpace::ClearCache() { + block_entries.clear(); + block_infos.clear(); + block_references.clear(); + code.set_ptr(prelude_info.end_of_prelude); +} + +size_t AddressSpace::GetRemainingSize() { + return code_cache_size - (code.ptr() - reinterpret_cast(mem.ptr())); +} + +EmittedBlockInfo AddressSpace::Emit(IR::Block block) { + if (GetRemainingSize() < 1024 * 1024) { + ClearCache(); + } + + mem.unprotect(); + + EmittedBlockInfo block_info = EmitArm64(code, std::move(block), GetEmitConfig()); + + Link(block.Location(), block_info); + + mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + + RelinkForDescriptor(block.Location()); + + mem.protect(); + + return block_info; +} + +static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector& block_relocations_list) { + using namespace oaknut; + using namespace oaknut::util; + + for (auto [ptr_offset] : block_relocations_list) { + CodeGenerator c{reinterpret_cast(entry_point + ptr_offset)}; + + if (target_ptr) { + c.B((void*)target_ptr); + } else { + c.NOP(); + } + } +} + +void AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) { + using namespace oaknut; + using namespace oaknut::util; + + for (auto [ptr_offset, target] : block_info.relocations) { + CodeGenerator c{reinterpret_cast(block_info.entry_point + ptr_offset)}; + + switch (target) { + case LinkTarget::ReturnToDispatcher: + c.B(prelude_info.return_to_dispatcher); + break; + case LinkTarget::ReturnFromRunCode: + c.B(prelude_info.return_from_run_code); + break; + case LinkTarget::ReadMemory8: + c.BL(prelude_info.read_memory_8); + break; + case LinkTarget::ReadMemory16: + c.BL(prelude_info.read_memory_16); + break; + case LinkTarget::ReadMemory32: + c.BL(prelude_info.read_memory_32); + break; + case LinkTarget::ReadMemory64: + c.BL(prelude_info.read_memory_64); + break; + case LinkTarget::ReadMemory128: + c.BL(prelude_info.read_memory_128); + break; + case LinkTarget::ExclusiveReadMemory8: + c.BL(prelude_info.exclusive_read_memory_8); + break; + case LinkTarget::ExclusiveReadMemory16: + c.BL(prelude_info.exclusive_read_memory_16); + break; + case LinkTarget::ExclusiveReadMemory32: + c.BL(prelude_info.exclusive_read_memory_32); + break; + case LinkTarget::ExclusiveReadMemory64: + c.BL(prelude_info.exclusive_read_memory_64); + break; + case LinkTarget::ExclusiveReadMemory128: + c.BL(prelude_info.exclusive_read_memory_128); + break; + case LinkTarget::WriteMemory8: + c.BL(prelude_info.write_memory_8); + break; + case LinkTarget::WriteMemory16: + c.BL(prelude_info.write_memory_16); + break; + case LinkTarget::WriteMemory32: + c.BL(prelude_info.write_memory_32); + break; + case LinkTarget::WriteMemory64: + c.BL(prelude_info.write_memory_64); + break; + case LinkTarget::WriteMemory128: + c.BL(prelude_info.write_memory_128); + break; + case LinkTarget::ExclusiveWriteMemory8: + c.BL(prelude_info.exclusive_write_memory_8); + break; + case LinkTarget::ExclusiveWriteMemory16: + c.BL(prelude_info.exclusive_write_memory_16); + break; + case LinkTarget::ExclusiveWriteMemory32: + c.BL(prelude_info.exclusive_write_memory_32); + break; + case LinkTarget::ExclusiveWriteMemory64: + c.BL(prelude_info.exclusive_write_memory_64); + break; + case LinkTarget::ExclusiveWriteMemory128: + c.BL(prelude_info.exclusive_write_memory_128); + break; + case LinkTarget::CallSVC: + c.BL(prelude_info.call_svc); + break; + case LinkTarget::ExceptionRaised: + c.BL(prelude_info.exception_raised); + break; + case LinkTarget::InstructionSynchronizationBarrierRaised: + c.BL(prelude_info.isb_raised); + break; + case LinkTarget::InstructionCacheOperationRaised: + c.BL(prelude_info.ic_raised); + break; + case LinkTarget::DataCacheOperationRaised: + c.BL(prelude_info.dc_raised); + break; + case LinkTarget::GetCNTPCT: + c.BL(prelude_info.get_cntpct); + break; + case LinkTarget::AddTicks: + c.BL(prelude_info.add_ticks); + break; + case LinkTarget::GetTicksRemaining: + c.BL(prelude_info.get_ticks_remaining); + break; + default: + ASSERT_FALSE("Invalid relocation target"); + } + } + + for (auto [target_descriptor, list] : block_info.block_relocations) { + block_references[target_descriptor.Value()].emplace(block_descriptor.Value()); + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list); + } +} + +void AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) { + for (auto block_descriptor : block_references[target_descriptor.Value()]) { + if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) { + const EmittedBlockInfo& block_info = iter->second; + + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]); + + mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + } + } +} + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/address_space.h b/src/dynarmic/backend/arm64/address_space.h new file mode 100644 index 00000000..3544174c --- /dev/null +++ b/src/dynarmic/backend/arm64/address_space.h @@ -0,0 +1,92 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/interface/halt_reason.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/location_descriptor.h" + +namespace Dynarmic::Backend::Arm64 { + +class AddressSpace { +public: + explicit AddressSpace(size_t code_cache_size); + virtual ~AddressSpace(); + + virtual IR::Block GenerateIR(IR::LocationDescriptor) const = 0; + + CodePtr Get(IR::LocationDescriptor descriptor); + + CodePtr GetOrEmit(IR::LocationDescriptor descriptor); + + void ClearCache(); + +protected: + virtual EmitConfig GetEmitConfig() = 0; + + size_t GetRemainingSize(); + EmittedBlockInfo Emit(IR::Block ir_block); + void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block); + void RelinkForDescriptor(IR::LocationDescriptor target_descriptor); + + const size_t code_cache_size; + oaknut::CodeBlock mem; + oaknut::CodeGenerator code; + + tsl::robin_map block_entries; + tsl::robin_map block_infos; + tsl::robin_map> block_references; + + struct PreludeInfo { + u32* end_of_prelude; + + using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, void* jit_state, volatile u32* halt_reason); + RunCodeFuncType run_code; + RunCodeFuncType step_code; + void* return_to_dispatcher; + void* return_from_run_code; + + void* read_memory_8; + void* read_memory_16; + void* read_memory_32; + void* read_memory_64; + void* read_memory_128; + void* exclusive_read_memory_8; + void* exclusive_read_memory_16; + void* exclusive_read_memory_32; + void* exclusive_read_memory_64; + void* exclusive_read_memory_128; + void* write_memory_8; + void* write_memory_16; + void* write_memory_32; + void* write_memory_64; + void* write_memory_128; + void* exclusive_write_memory_8; + void* exclusive_write_memory_16; + void* exclusive_write_memory_32; + void* exclusive_write_memory_64; + void* exclusive_write_memory_128; + + void* call_svc; + void* exception_raised; + void* dc_raised; + void* ic_raised; + void* isb_raised; + + void* get_cntpct; + void* add_ticks; + void* get_ticks_remaining; + } prelude_info; +}; + +} // namespace Dynarmic::Backend::Arm64 From c30ecd4d0b8d8663246ac732fd9844e25d8fd99b Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 29 Nov 2022 12:33:47 +0000 Subject: [PATCH 41/47] a64_address_space: Don't load fpsr here --- src/dynarmic/backend/arm64/a64_address_space.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index 529ac245..14d216a1 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -293,9 +293,7 @@ void A64AddressSpace::EmitPrelude() { code.MRS(Xscratch1, oaknut::SystemReg::FPCR); code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); - code.LDR(Wscratch1, Xstate, offsetof(A64JitState, fpsr)); code.MSR(oaknut::SystemReg::FPCR, Xscratch0); - code.MSR(oaknut::SystemReg::FPSR, Xscratch1); code.LDAR(Wscratch0, Xhalt); code.CBNZ(Wscratch0, return_from_run_code); From 6965095cb90b656888be2cf028a0c6074ade322d Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 29 Nov 2022 12:52:28 +0000 Subject: [PATCH 42/47] a64_address_space: Reorder declaration appropriately --- src/dynarmic/backend/arm64/a64_address_space.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index 14d216a1..acbd381f 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -396,11 +396,11 @@ EmitConfig A64AddressSpace::GetEmitConfig() { .hook_isb = conf.hook_isb, - .tpidr_el0 = conf.tpidr_el0, - .tpidrro_el0 = conf.tpidrro_el0, .cntfreq_el0 = conf.cntfrq_el0, - .dczid_el0 = conf.dczid_el0, .ctr_el0 = conf.ctr_el0, + .dczid_el0 = conf.dczid_el0, + .tpidrro_el0 = conf.tpidrro_el0, + .tpidr_el0 = conf.tpidr_el0, .wall_clock_cntpct = conf.wall_clock_cntpct, .enable_cycle_counting = conf.enable_cycle_counting, From cf704a460dfc7dde3ae6d6561789669bfd4b4f20 Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 29 Nov 2022 14:14:00 +0000 Subject: [PATCH 43/47] EmitTwoOpFallbackWithoutRegAlloc: Simplify --- src/dynarmic/backend/arm64/a64_jitstate.h | 2 +- .../emit_arm64_vector_floating_point.cpp | 23 +++++-------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_jitstate.h b/src/dynarmic/backend/arm64/a64_jitstate.h index 223a36c9..215e6987 100644 --- a/src/dynarmic/backend/arm64/a64_jitstate.h +++ b/src/dynarmic/backend/arm64/a64_jitstate.h @@ -20,7 +20,7 @@ struct A64JitState { u32 cpsr_nzcv = 0; - alignas(16) std::array vec{}; // Extension registers. + alignas(16) std::array vec{}; u32 exclusive_state = 0; diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp index d534c8b8..088b5cc2 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp @@ -263,30 +263,20 @@ static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitCo const auto fn = static_cast*>(lambda); const u32 fpcr = ctx.FPCR(fpcr_controlled).Value(); - constexpr u64 stack_size = sizeof(u64) * 4; // sizeof(u128)*2 - oaknut::Label fn_ptr, end; + constexpr u64 stack_size = sizeof(u64) * 4; // sizeof(u128) * 2 ABI_PushRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size); - code.MOV(Xscratch0, SP); - code.LDR(Xscratch1, fn_ptr); - // Call lambda(Vec&, Vec&, fpcr, fpsr&) - code.ADD(X0, Xscratch0, 0 * 16); - code.ADD(X1, Xscratch0, 1 * 16); + code.MOV(Xscratch0, mcl::bit_cast(fn)); + code.ADD(X0, SP, 0 * 16); + code.ADD(X1, SP, 1 * 16); code.MOV(X2, fpcr); code.ADD(X3, Xstate, ctx.conf.state_fpsr_offset); code.STR(Qarg1, X1); - code.BLR(Xscratch1); - - // Reload result + code.BLR(Xscratch0); code.LDR(Qresult, SP); - ABI_PopRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size); - code.B(end); - code.align(8); - code.l(fn_ptr); - code.dx(mcl::bit_cast(fn)); - code.l(end); + ABI_PopRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size); } template @@ -295,7 +285,6 @@ static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR: auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]); auto Qresult = ctx.reg_alloc.WriteQ(inst); RegAlloc::Realize(Qarg1, Qresult); - ctx.reg_alloc.SpillFlags(); ctx.fpsr.Spill(); From 73eecfbaefc9a6d1f08cc0c6da5478482f261fad Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 29 Nov 2022 14:14:17 +0000 Subject: [PATCH 44/47] emit_arm64_vector_floating_point: Simplify FPVectorAbs16 --- .../arm64/emit_arm64_vector_floating_point.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp index 088b5cc2..4bb1614a 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp @@ -294,17 +294,11 @@ static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR: template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - constexpr u16 non_sign_mask = FP::FPInfo::sign_mask - u16{1u}; - constexpr u64 non_sign_mask64 = mcl::bit::replicate_element<16, u64>(non_sign_mask); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Qoperand = ctx.reg_alloc.ReadQ(args[0]); - auto Qresult = ctx.reg_alloc.WriteQ(inst); - RegAlloc::Realize(Qoperand, Qresult); + auto Qresult = ctx.reg_alloc.ReadWriteQ(args[0], inst); + RegAlloc::Realize(Qresult); - code.MOV(Xscratch0, non_sign_mask64); - code.DUP(Qresult->D2(), Xscratch0); - code.AND(Qresult->B16(), Qoperand->B16(), Qresult->B16()); + code.BIC(Qresult->H8(), 0b10000000, LSL, 8); } template<> From 4f1f7c8e975d72e54f6dfa3b6cfbb19d0e1b7dc8 Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 29 Nov 2022 15:11:00 +0000 Subject: [PATCH 45/47] arm64/reg_alloc: Remove PrepareForCallReg and PrepareForCallVec --- .../arm64/emit_arm64_a32_coprocessor.cpp | 17 ++++---------- .../backend/arm64/emit_arm64_a32_memory.cpp | 12 +++++----- src/dynarmic/backend/arm64/emit_arm64_a64.cpp | 4 ++-- .../backend/arm64/emit_arm64_a64_memory.cpp | 22 ++++++++++--------- src/dynarmic/backend/arm64/reg_alloc.cpp | 14 ------------ src/dynarmic/backend/arm64/reg_alloc.h | 2 -- 6 files changed, 24 insertions(+), 47 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp index 629c381c..5115fbbb 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp @@ -24,19 +24,6 @@ static void EmitCoprocessorException() { } static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional arg0 = {}, std::optional arg1 = {}) { - if (inst) { - const auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, arg0, arg1); - - if (callback.user_arg) { - code.MOV(X0, reinterpret_cast(*callback.user_arg)); - } - - code.MOV(Xscratch0, reinterpret_cast(callback.function)); - code.BLR(Xscratch0); - code.MOV(Xresult, X0); - return; - } - ctx.reg_alloc.PrepareForCall({}, arg0, arg1); if (callback.user_arg) { @@ -45,6 +32,10 @@ static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A3 code.MOV(Xscratch0, reinterpret_cast(callback.function)); code.BLR(Xscratch0); + + if (inst) { + ctx.reg_alloc.DefineAsRegister(inst, X0); + } } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp index 1cf5f774..8d72dd19 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp @@ -25,19 +25,19 @@ static bool IsOrdered(IR::AccType acctype) { static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); + ctx.reg_alloc.PrepareForCall({}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); EmitRelocation(code, ctx, fn); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } - code.MOV(Xresult, X0); + ctx.reg_alloc.DefineAsRegister(inst, X0); } static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); + ctx.reg_alloc.PrepareForCall({}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); code.MOV(Wscratch0, 1); @@ -46,7 +46,7 @@ static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ct if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } - code.MOV(Xresult, X0); + ctx.reg_alloc.DefineAsRegister(inst, X0); } static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { @@ -65,7 +65,7 @@ static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::I static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); oaknut::Label end; @@ -81,7 +81,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c code.DMB(oaknut::BarrierOp::ISH); } code.l(end); - code.MOV(Xresult, X0); + ctx.reg_alloc.DefineAsRegister(inst, X0); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index a0f2fac9..a686f555 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -423,7 +423,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst); + ctx.reg_alloc.PrepareForCall(); if (!ctx.conf.wall_clock_cntpct && ctx.conf.enable_cycle_counting) { code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run)); code.SUB(X1, X1, Xticks); @@ -433,7 +433,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& code.MOV(Xticks, X0); } EmitRelocation(code, ctx, LinkTarget::GetCNTPCT); - code.MOV(Xresult, X0); + ctx.reg_alloc.DefineAsRegister(inst, X0); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp index 55c185b7..bf378605 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp @@ -25,31 +25,32 @@ static bool IsOrdered(IR::AccType acctype) { static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); + ctx.reg_alloc.PrepareForCall({}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); EmitRelocation(code, ctx, fn); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } - code.MOV(Xresult, X0); + ctx.reg_alloc.DefineAsRegister(inst, X0); } static void EmitReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Qresult = ctx.reg_alloc.PrepareForCallVec(inst, {}, args[1]); + ctx.reg_alloc.PrepareForCall({}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); EmitRelocation(code, ctx, fn); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } - code.MOV(Qresult.B16(), Q0.B16()); + code.MOV(Q8.B16(), Q0.B16()); + ctx.reg_alloc.DefineAsRegister(inst, Q8); } static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); + ctx.reg_alloc.PrepareForCall({}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); code.MOV(Wscratch0, 1); @@ -58,12 +59,12 @@ static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ct if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } - code.MOV(Xresult, X0); + ctx.reg_alloc.DefineAsRegister(inst, X0); } static void EmitExclusiveReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Qresult = ctx.reg_alloc.PrepareForCallVec(inst, {}, args[1]); + ctx.reg_alloc.PrepareForCall({}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); code.MOV(Wscratch0, 1); @@ -72,7 +73,8 @@ static void EmitExclusiveReadMemory128(oaknut::CodeGenerator& code, EmitContext& if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } - code.MOV(Qresult.B16(), Q0.B16()); + code.MOV(Q8.B16(), Q0.B16()); + ctx.reg_alloc.DefineAsRegister(inst, Q8); } static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { @@ -91,7 +93,7 @@ static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::I static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); oaknut::Label end; @@ -107,7 +109,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c code.DMB(oaknut::BarrierOp::ISH); } code.l(end); - code.MOV(Xresult, X0); + ctx.reg_alloc.DefineAsRegister(inst, X0); } template<> diff --git a/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/backend/arm64/reg_alloc.cpp index e40216c9..3e61532a 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -182,20 +182,6 @@ void RegAlloc::PrepareForCall(std::optional arg0, } } -oaknut::XReg RegAlloc::PrepareForCallReg(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { - PrepareForCall(arg0, arg1, arg2, arg3); - ASSERT(result && result->GetType() != IR::Type::U128); - DefineAsRegister(result, X0); - return X0; -} - -oaknut::QReg RegAlloc::PrepareForCallVec(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { - PrepareForCall(arg0, arg1, arg2, arg3); - ASSERT(result && result->GetType() == IR::Type::U128); - DefineAsRegister(result, Q8); - return Q8; -} - void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) { ASSERT(!ValueLocation(inst)); diff --git a/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/backend/arm64/reg_alloc.h index 4d5c3fe7..a101ee6d 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/backend/arm64/reg_alloc.h @@ -272,8 +272,6 @@ public: } void PrepareForCall(std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); - oaknut::XReg PrepareForCallReg(IR::Inst* result, std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); - oaknut::QReg PrepareForCallVec(IR::Inst* result, std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); void DefineAsExisting(IR::Inst* inst, Argument& arg); void DefineAsRegister(IR::Inst* inst, oaknut::Reg reg); From 6960d29868130f0d6e85b84906695e32eddf677d Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 29 Nov 2022 15:15:21 +0000 Subject: [PATCH 46/47] a64_address_space: Remove fpsr load in step_code --- src/dynarmic/backend/arm64/a64_address_space.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index acbd381f..c8394b1d 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -317,9 +317,7 @@ void A64AddressSpace::EmitPrelude() { code.MRS(Xscratch1, oaknut::SystemReg::FPCR); code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); - code.LDR(Wscratch1, Xstate, offsetof(A64JitState, fpsr)); code.MSR(oaknut::SystemReg::FPCR, Xscratch0); - code.MSR(oaknut::SystemReg::FPSR, Xscratch1); oaknut::Label step_hr_loop; code.l(step_hr_loop); From 2c87e2f76f330b0e525c20ef9e80ccba4bc02078 Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 29 Nov 2022 15:21:09 +0000 Subject: [PATCH 47/47] a64_address_space: Simplify 128-bit Q0->{X2,X3} transfer --- src/dynarmic/backend/arm64/a64_address_space.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp index c8394b1d..96ce49b8 100644 --- a/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -161,10 +161,8 @@ static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCa oaknut::Label l_addr, l_this; void* target = code.ptr(); - ABI_PushRegisters(code, 0, sizeof(Vector)); - code.STR(Q0, SP); - code.LDP(X2, X3, SP); - ABI_PopRegisters(code, 0, sizeof(Vector)); + code.FMOV(X2, D0); + code.FMOV(X3, V0.D()[1]); code.LDR(X0, l_this); code.LDR(Xscratch0, l_addr); @@ -194,10 +192,8 @@ static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, co }; void* target = code.ptr(); - ABI_PushRegisters(code, 0, sizeof(Vector)); - code.STR(Q0, SP); - code.LDP(X2, X3, SP); - ABI_PopRegisters(code, 0, sizeof(Vector)); + code.FMOV(X2, D0); + code.FMOV(X3, V0.D()[1]); code.LDR(X0, l_this); code.LDR(Xscratch0, l_addr);