diff --git a/.github/workflows/aarch64.yml b/.github/workflows/aarch64.yml index 38a794bb..05d6d271 100644 --- a/.github/workflows/aarch64.yml +++ b/.github/workflows/aarch64.yml @@ -48,7 +48,6 @@ jobs: -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DDYNARMIC_TESTS_USE_UNICORN=0 -DDYNARMIC_USE_LLVM=0 - -DDYNARMIC_FRONTENDS=A32 -G Ninja - name: Build AArch64 @@ -66,7 +65,6 @@ jobs: -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - -DDYNARMIC_FRONTENDS=A32 -DDYNARMIC_TESTS_USE_UNICORN=0 -DDYNARMIC_USE_LLVM=0 -G Ninja @@ -79,6 +77,23 @@ jobs: working-directory: ${{github.workspace}} run: qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_tests -d yes - - name: Test against x86_64 implementation + - name: Test against x86_64 implementation (A32, thumb) working-directory: ${{github.workspace}} - run: diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator) <(./build-x64/tests/dynarmic_test_generator) + run: | + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 1 100000) <(./build-x64/tests/dynarmic_test_generator thumb 42 1 100000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 10 10000) <(./build-x64/tests/dynarmic_test_generator thumb 42 10 10000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 100 1000) <(./build-x64/tests/dynarmic_test_generator thumb 42 100 1000) + + - name: Test against x86_64 implementation (A32, arm) + working-directory: ${{github.workspace}} + run: | + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 1 100000) <(./build-x64/tests/dynarmic_test_generator arm 42 1 100000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 10 10000) <(./build-x64/tests/dynarmic_test_generator arm 42 10 10000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 100 1000) <(./build-x64/tests/dynarmic_test_generator arm 42 100 1000) + + - name: Test against x86_64 implementation (A64) + working-directory: ${{github.workspace}} + run: | + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 1 100000) <(./build-x64/tests/dynarmic_test_generator a64 42 1 100000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 10 10000) <(./build-x64/tests/dynarmic_test_generator a64 42 10 10000) + diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 100 1000) <(./build-x64/tests/dynarmic_test_generator a64 42 100 1000) diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 423de034..992f3c4f 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -373,6 +373,8 @@ elseif(ARCHITECTURE STREQUAL "arm64") backend/arm64/a32_jitstate.h backend/arm64/abi.cpp backend/arm64/abi.h + backend/arm64/address_space.cpp + backend/arm64/address_space.h backend/arm64/devirtualize.h backend/arm64/emit_arm64.cpp backend/arm64/emit_arm64.h @@ -406,14 +408,16 @@ elseif(ARCHITECTURE STREQUAL "arm64") backend/arm64/a32_address_space.h backend/arm64/a32_core.h backend/arm64/a32_interface.cpp - - # Move this to the list below when implemented - backend/arm64/a64_interface.cpp ) endif() if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") + target_sources(dynarmic PRIVATE + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp + ) endif() else() message(FATAL_ERROR "Unsupported architecture") diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index 01b78938..333d21fa 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -14,6 +14,7 @@ #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" +#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/ir/opt/passes.h" @@ -97,9 +98,8 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const } A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) - : conf(conf) - , mem(conf.code_cache_size) - , code(mem.ptr()) { + : AddressSpace(conf.code_cache_size) + , conf(conf) { EmitPrelude(); } @@ -121,33 +121,6 @@ IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { return ir_block; } -CodePtr A32AddressSpace::Get(IR::LocationDescriptor descriptor) { - if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) { - return iter->second; - } - return nullptr; -} - -CodePtr A32AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) { - if (CodePtr block_entry = Get(descriptor)) { - return block_entry; - } - - IR::Block ir_block = GenerateIR(descriptor); - const EmittedBlockInfo block_info = Emit(std::move(ir_block)); - - block_infos.insert_or_assign(descriptor.Value(), block_info); - block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point); - return block_info.entry_point; -} - -void A32AddressSpace::ClearCache() { - block_entries.clear(); - block_infos.clear(); - block_references.clear(); - code.set_ptr(prelude_info.end_of_prelude); -} - void A32AddressSpace::EmitPrelude() { using namespace oaknut::util; @@ -291,153 +264,33 @@ void A32AddressSpace::EmitPrelude() { mem.protect(); } -size_t A32AddressSpace::GetRemainingSize() { - return conf.code_cache_size - (code.ptr() - reinterpret_cast(mem.ptr())); -} +EmitConfig A32AddressSpace::GetEmitConfig() { + return EmitConfig{ + .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, -EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { - if (GetRemainingSize() < 1024 * 1024) { - ClearCache(); - } - - mem.unprotect(); - - const EmitConfig emit_conf{ .hook_isb = conf.hook_isb, + + .cntfreq_el0{}, + .ctr_el0{}, + .dczid_el0{}, + .tpidrro_el0{}, + .tpidr_el0{}, + + .wall_clock_cntpct = conf.wall_clock_cntpct, .enable_cycle_counting = conf.enable_cycle_counting, + .always_little_endian = conf.always_little_endian, + .descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; }, + .emit_cond = EmitA32Cond, + .emit_condition_failed_terminal = EmitA32ConditionFailedTerminal, + .emit_terminal = EmitA32Terminal, + .state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv), .state_fpsr_offset = offsetof(A32JitState, fpsr), + .coprocessors = conf.coprocessors, - .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, }; - EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); - - Link(block.Location(), block_info); - - mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); - - RelinkForDescriptor(block.Location()); - - mem.protect(); - - return block_info; -} - -static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector& block_relocations_list) { - using namespace oaknut; - using namespace oaknut::util; - - for (auto [ptr_offset] : block_relocations_list) { - CodeGenerator c{reinterpret_cast(entry_point + ptr_offset)}; - - if (target_ptr) { - c.B((void*)target_ptr); - } else { - c.NOP(); - } - } -} - -void A32AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) { - using namespace oaknut; - using namespace oaknut::util; - - for (auto [ptr_offset, target] : block_info.relocations) { - CodeGenerator c{reinterpret_cast(block_info.entry_point + ptr_offset)}; - - switch (target) { - case LinkTarget::ReturnToDispatcher: - c.B(prelude_info.return_to_dispatcher); - break; - case LinkTarget::ReturnFromRunCode: - c.B(prelude_info.return_from_run_code); - break; - case LinkTarget::ReadMemory8: - c.BL(prelude_info.read_memory_8); - break; - case LinkTarget::ReadMemory16: - c.BL(prelude_info.read_memory_16); - break; - case LinkTarget::ReadMemory32: - c.BL(prelude_info.read_memory_32); - break; - case LinkTarget::ReadMemory64: - c.BL(prelude_info.read_memory_64); - break; - case LinkTarget::ExclusiveReadMemory8: - c.BL(prelude_info.exclusive_read_memory_8); - break; - case LinkTarget::ExclusiveReadMemory16: - c.BL(prelude_info.exclusive_read_memory_16); - break; - case LinkTarget::ExclusiveReadMemory32: - c.BL(prelude_info.exclusive_read_memory_32); - break; - case LinkTarget::ExclusiveReadMemory64: - c.BL(prelude_info.exclusive_read_memory_64); - break; - case LinkTarget::WriteMemory8: - c.BL(prelude_info.write_memory_8); - break; - case LinkTarget::WriteMemory16: - c.BL(prelude_info.write_memory_16); - break; - case LinkTarget::WriteMemory32: - c.BL(prelude_info.write_memory_32); - break; - case LinkTarget::WriteMemory64: - c.BL(prelude_info.write_memory_64); - break; - case LinkTarget::ExclusiveWriteMemory8: - c.BL(prelude_info.exclusive_write_memory_8); - break; - case LinkTarget::ExclusiveWriteMemory16: - c.BL(prelude_info.exclusive_write_memory_16); - break; - case LinkTarget::ExclusiveWriteMemory32: - c.BL(prelude_info.exclusive_write_memory_32); - break; - case LinkTarget::ExclusiveWriteMemory64: - c.BL(prelude_info.exclusive_write_memory_64); - break; - case LinkTarget::CallSVC: - c.BL(prelude_info.call_svc); - break; - case LinkTarget::ExceptionRaised: - c.BL(prelude_info.exception_raised); - break; - case LinkTarget::InstructionSynchronizationBarrierRaised: - c.BL(prelude_info.isb_raised); - break; - case LinkTarget::AddTicks: - c.BL(prelude_info.add_ticks); - break; - case LinkTarget::GetTicksRemaining: - c.BL(prelude_info.get_ticks_remaining); - break; - default: - ASSERT_FALSE("Invalid relocation target"); - } - } - - for (auto [target_descriptor, list] : block_info.block_relocations) { - block_references[target_descriptor.Value()].emplace(block_descriptor.Value()); - LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list); - } -} - -void A32AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) { - for (auto block_descriptor : block_references[target_descriptor.Value()]) { - if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) { - const EmittedBlockInfo& block_info = iter->second; - - LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]); - - mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); - } - } } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a32_address_space.h b/src/dynarmic/backend/arm64/a32_address_space.h index 18ed118d..e33be9b0 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.h +++ b/src/dynarmic/backend/arm64/a32_address_space.h @@ -5,84 +5,24 @@ #pragma once -#include -#include -#include -#include -#include - -#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/backend/arm64/address_space.h" #include "dynarmic/interface/A32/config.h" -#include "dynarmic/interface/halt_reason.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/location_descriptor.h" namespace Dynarmic::Backend::Arm64 { -struct A32JitState; - -class A32AddressSpace final { +class A32AddressSpace final : public AddressSpace { public: explicit A32AddressSpace(const A32::UserConfig& conf); - IR::Block GenerateIR(IR::LocationDescriptor) const; + IR::Block GenerateIR(IR::LocationDescriptor) const override; - CodePtr Get(IR::LocationDescriptor descriptor); - - CodePtr GetOrEmit(IR::LocationDescriptor descriptor); - - void ClearCache(); - -private: +protected: friend class A32Core; void EmitPrelude(); - - size_t GetRemainingSize(); - EmittedBlockInfo Emit(IR::Block ir_block); - void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block); - void RelinkForDescriptor(IR::LocationDescriptor target_descriptor); + EmitConfig GetEmitConfig() override; const A32::UserConfig conf; - - oaknut::CodeBlock mem; - oaknut::CodeGenerator code; - - tsl::robin_map block_entries; - tsl::robin_map block_infos; - tsl::robin_map> block_references; - - struct PreludeInfo { - u32* end_of_prelude; - - using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, A32JitState* context, volatile u32* halt_reason); - RunCodeFuncType run_code; - RunCodeFuncType step_code; - void* return_to_dispatcher; - void* return_from_run_code; - - void* read_memory_8; - void* read_memory_16; - void* read_memory_32; - void* read_memory_64; - void* exclusive_read_memory_8; - void* exclusive_read_memory_16; - void* exclusive_read_memory_32; - void* exclusive_read_memory_64; - void* write_memory_8; - void* write_memory_16; - void* write_memory_32; - void* write_memory_64; - void* exclusive_write_memory_8; - void* exclusive_write_memory_16; - void* exclusive_write_memory_32; - void* exclusive_write_memory_64; - void* call_svc; - void* exception_raised; - void* isb_raised; - void* add_ticks; - void* get_ticks_remaining; - } prelude_info; }; } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/backend/arm64/a64_address_space.cpp new file mode 100644 index 00000000..96ce49b8 --- /dev/null +++ b/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -0,0 +1,416 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/backend/arm64/a64_address_space.h" + +#include "dynarmic/backend/arm64/a64_jitstate.h" +#include "dynarmic/backend/arm64/abi.h" +#include "dynarmic/backend/arm64/devirtualize.h" +#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/backend/arm64/stack_layout.h" +#include "dynarmic/common/cast_util.h" +#include "dynarmic/common/fp/fpcr.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/exclusive_monitor.h" +#include "dynarmic/ir/opt/passes.h" + +namespace Dynarmic::Backend::Arm64 { + +template +static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize(this_); + + oaknut::Label l_addr, l_this; + + void* target = code.ptr(); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + +template +static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> T { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { + return (conf.callbacks->*callback)(vaddr); + }); + }; + + void* target = code.ptr(); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + +template +static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }; + + void* target = code.ptr(); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + +static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_); + + oaknut::Label l_addr, l_this; + + void* target = code.ptr(); + ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector)); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.STP(X0, X1, SP); + code.LDR(Q0, SP); + ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector)); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + +static void* EmitExclusiveRead128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> Vector { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> Vector { + return conf.callbacks->MemoryRead128(vaddr); + }); + }; + + void* target = code.ptr(); + ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector)); + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.STP(X0, X1, SP); + code.LDR(Q0, SP); + ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector)); + code.RET(); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + +static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) { + using namespace oaknut::util; + + const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_); + + oaknut::Label l_addr, l_this; + + void* target = code.ptr(); + code.FMOV(X2, D0); + code.FMOV(X3, V0.D()[1]); + + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(info.this_ptr); + code.l(l_addr); + code.dx(info.fn_ptr); + + return target; +} + +static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) { + using namespace oaknut::util; + + oaknut::Label l_addr, l_this; + + auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](Vector expected) -> bool { + return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected); + }) + ? 0 + : 1; + }; + + void* target = code.ptr(); + code.FMOV(X2, D0); + code.FMOV(X3, V0.D()[1]); + + code.LDR(X0, l_this); + code.LDR(Xscratch0, l_addr); + code.BR(Xscratch0); + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(&conf)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + + return target; +} + +A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf) + : AddressSpace(conf.code_cache_size) + , conf(conf) { + EmitPrelude(); +} + +IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { + const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; + IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, + {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); + + Optimization::A64CallbackConfigPass(ir_block, conf); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { + Optimization::A64GetSetElimination(ir_block); + Optimization::DeadCodeElimination(ir_block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) { + Optimization::ConstantPropagation(ir_block); + Optimization::DeadCodeElimination(ir_block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { + Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); + } + Optimization::VerificationPass(ir_block); + + return ir_block; +} + +void A64AddressSpace::EmitPrelude() { + using namespace oaknut::util; + + mem.unprotect(); + + prelude_info.read_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks); + prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks); + prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks); + prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks); + prelude_info.read_memory_128 = EmitRead128CallTrampoline(code, conf.callbacks); + prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf); + prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf); + prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf); + prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf); + prelude_info.exclusive_read_memory_128 = EmitExclusiveRead128CallTrampoline(code, conf); + prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks); + prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks); + prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks); + prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks); + prelude_info.write_memory_128 = EmitWrite128CallTrampoline(code, conf.callbacks); + prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf); + prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf); + prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf); + prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf); + prelude_info.exclusive_write_memory_128 = EmitExclusiveWrite128CallTrampoline(code, conf); + prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks); + prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks); + prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks); + prelude_info.ic_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionCacheOperationRaised>(code, conf.callbacks); + prelude_info.dc_raised = EmitCallTrampoline<&A64::UserCallbacks::DataCacheOperationRaised>(code, conf.callbacks); + prelude_info.get_cntpct = EmitCallTrampoline<&A64::UserCallbacks::GetCNTPCT>(code, conf.callbacks); + prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks); + prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks); + + oaknut::Label return_from_run_code; + + prelude_info.run_code = code.ptr(); + { + ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + + code.MOV(X19, X0); + code.MOV(Xstate, X1); + code.MOV(Xhalt, X2); + + if (conf.enable_cycle_counting) { + code.BL(prelude_info.get_ticks_remaining); + code.MOV(Xticks, X0); + code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run)); + } + + code.MRS(Xscratch1, oaknut::SystemReg::FPCR); + code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, return_from_run_code); + + code.BR(X19); + } + + prelude_info.step_code = code.ptr(); + { + ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + + code.MOV(X19, X0); + code.MOV(Xstate, X1); + code.MOV(Xhalt, X2); + + if (conf.enable_cycle_counting) { + code.MOV(Xticks, 1); + code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run)); + } + + code.MRS(Xscratch1, oaknut::SystemReg::FPCR); + code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr)); + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + + oaknut::Label step_hr_loop; + code.l(step_hr_loop); + code.LDAXR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, return_from_run_code); + code.ORR(Wscratch0, Wscratch0, static_cast(HaltReason::Step)); + code.STLXR(Wscratch1, Wscratch0, Xhalt); + code.CBNZ(Wscratch1, step_hr_loop); + + code.BR(X19); + } + + prelude_info.return_to_dispatcher = code.ptr(); + { + oaknut::Label l_this, l_addr; + + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, return_from_run_code); + + if (conf.enable_cycle_counting) { + code.CMP(Xticks, 0); + code.B(LE, return_from_run_code); + } + + code.LDR(X0, l_this); + code.MOV(X1, Xstate); + code.LDR(Xscratch0, l_addr); + code.BLR(Xscratch0); + code.BR(X0); + + const auto fn = [](A64AddressSpace& self, A64JitState& context) -> CodePtr { + return self.GetOrEmit(context.GetLocationDescriptor()); + }; + + code.align(8); + code.l(l_this); + code.dx(mcl::bit_cast(this)); + code.l(l_addr); + code.dx(mcl::bit_cast(Common::FptrCast(fn))); + } + + prelude_info.return_from_run_code = code.ptr(); + { + code.l(return_from_run_code); + + if (conf.enable_cycle_counting) { + code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run)); + code.SUB(X1, X1, Xticks); + code.BL(prelude_info.add_ticks); + } + + code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Xscratch0); + + oaknut::Label exit_hr_loop; + code.l(exit_hr_loop); + code.LDAXR(W0, Xhalt); + code.STLXR(Wscratch0, WZR, Xhalt); + code.CBNZ(Wscratch0, exit_hr_loop); + + ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); + code.RET(); + } + + prelude_info.end_of_prelude = code.ptr(); + + mem.invalidate_all(); + mem.protect(); +} + +EmitConfig A64AddressSpace::GetEmitConfig() { + return EmitConfig{ + .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, + + .hook_isb = conf.hook_isb, + + .cntfreq_el0 = conf.cntfrq_el0, + .ctr_el0 = conf.ctr_el0, + .dczid_el0 = conf.dczid_el0, + .tpidrro_el0 = conf.tpidrro_el0, + .tpidr_el0 = conf.tpidr_el0, + + .wall_clock_cntpct = conf.wall_clock_cntpct, + .enable_cycle_counting = conf.enable_cycle_counting, + + .always_little_endian = true, + + .descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); }, + .emit_cond = EmitA64Cond, + .emit_condition_failed_terminal = EmitA64ConditionFailedTerminal, + .emit_terminal = EmitA64Terminal, + + .state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv), + .state_fpsr_offset = offsetof(A64JitState, fpsr), + + .coprocessors{}, + }; +} + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_address_space.h b/src/dynarmic/backend/arm64/a64_address_space.h new file mode 100644 index 00000000..2eadf953 --- /dev/null +++ b/src/dynarmic/backend/arm64/a64_address_space.h @@ -0,0 +1,28 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include "dynarmic/backend/arm64/address_space.h" +#include "dynarmic/interface/A64/config.h" + +namespace Dynarmic::Backend::Arm64 { + +class A64AddressSpace final : public AddressSpace { +public: + explicit A64AddressSpace(const A64::UserConfig& conf); + + IR::Block GenerateIR(IR::LocationDescriptor) const override; + +protected: + friend class A64Core; + + void EmitPrelude(); + EmitConfig GetEmitConfig() override; + + const A64::UserConfig conf; +}; + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_core.h b/src/dynarmic/backend/arm64/a64_core.h new file mode 100644 index 00000000..24fbb66b --- /dev/null +++ b/src/dynarmic/backend/arm64/a64_core.h @@ -0,0 +1,30 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include "dynarmic/backend/arm64/a64_address_space.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" + +namespace Dynarmic::Backend::Arm64 { + +class A64Core final { +public: + explicit A64Core(const A64::UserConfig&) {} + + HaltReason Run(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) { + const auto location_descriptor = thread_ctx.GetLocationDescriptor(); + const auto entry_point = process.GetOrEmit(location_descriptor); + return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason); + } + + HaltReason Step(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) { + const auto location_descriptor = A64::LocationDescriptor{thread_ctx.GetLocationDescriptor()}.SetSingleStepping(true); + const auto entry_point = process.GetOrEmit(location_descriptor); + return process.prelude_info.step_code(entry_point, &thread_ctx, halt_reason); + } +}; + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a64_interface.cpp b/src/dynarmic/backend/arm64/a64_interface.cpp index 36ffbdc4..0698172c 100644 --- a/src/dynarmic/backend/arm64/a64_interface.cpp +++ b/src/dynarmic/backend/arm64/a64_interface.cpp @@ -1,5 +1,5 @@ /* This file is part of the dynarmic project. - * Copyright (c) 2021 MerryMage + * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD */ @@ -11,136 +11,310 @@ #include #include +#include "dynarmic/backend/arm64/a64_address_space.h" +#include "dynarmic/backend/arm64/a64_core.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" #include "dynarmic/common/atomic.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/interface/A64/config.h" namespace Dynarmic::A64 { -struct Jit::Impl {}; +using namespace Backend::Arm64; -Jit::Jit(UserConfig conf) { - (void)conf; +struct Jit::Impl final { + Impl(Jit*, A64::UserConfig conf) + : conf(conf) + , current_address_space(conf) + , core(conf) {} + + HaltReason Run() { + ASSERT(!is_executing); + PerformRequestedCacheInvalidation(); + + is_executing = true; + SCOPE_EXIT { + is_executing = false; + }; + + HaltReason hr = core.Run(current_address_space, current_state, &halt_reason); + + PerformRequestedCacheInvalidation(); + + return hr; + } + + HaltReason Step() { + ASSERT(!is_executing); + PerformRequestedCacheInvalidation(); + + is_executing = true; + SCOPE_EXIT { + is_executing = false; + }; + + HaltReason hr = core.Step(current_address_space, current_state, &halt_reason); + + PerformRequestedCacheInvalidation(); + + return hr; + } + + void ClearCache() { + std::unique_lock lock{invalidation_mutex}; + invalidate_entire_cache = true; + HaltExecution(HaltReason::CacheInvalidation); + } + + void InvalidateCacheRange(std::uint64_t start_address, std::size_t length) { + std::unique_lock lock{invalidation_mutex}; + invalid_cache_ranges.add(boost::icl::discrete_interval::closed(start_address, start_address + length - 1)); + HaltExecution(HaltReason::CacheInvalidation); + } + + void Reset() { + current_state = {}; + } + + void HaltExecution(HaltReason hr) { + Atomic::Or(&halt_reason, static_cast(hr)); + } + + void ClearHalt(HaltReason hr) { + Atomic::And(&halt_reason, ~static_cast(hr)); + } + + std::uint64_t PC() const { + return current_state.pc; + } + + void SetPC(std::uint64_t value) { + current_state.pc = value; + } + + std::uint64_t SP() const { + return current_state.sp; + } + + void SetSP(std::uint64_t value) { + current_state.sp = value; + } + + std::array& Regs() { + return current_state.reg; + } + + const std::array& Regs() const { + return current_state.reg; + } + + std::array& VecRegs() { + return current_state.vec; + } + + const std::array& VecRegs() const { + return current_state.vec; + } + + std::uint32_t Fpcr() const { + return current_state.fpcr; + } + + void SetFpcr(std::uint32_t value) { + current_state.fpcr = value; + } + + std::uint32_t Fpsr() const { + return current_state.fpsr; + } + + void SetFpsr(std::uint32_t value) { + current_state.fpsr = value; + } + + std::uint32_t Pstate() const { + return current_state.cpsr_nzcv; + } + + void SetPstate(std::uint32_t value) { + current_state.cpsr_nzcv = value; + } + + void ClearExclusiveState() { + current_state.exclusive_state = false; + } + + bool IsExecuting() const { + return is_executing; + } + + void DumpDisassembly() const { + ASSERT_FALSE("Unimplemented"); + } + + std::vector Disassemble() const { + ASSERT_FALSE("Unimplemented"); + } + +private: + void PerformRequestedCacheInvalidation() { + ClearHalt(HaltReason::CacheInvalidation); + + if (invalidate_entire_cache) { + current_address_space.ClearCache(); + + invalidate_entire_cache = false; + invalid_cache_ranges.clear(); + return; + } + + if (!invalid_cache_ranges.empty()) { + // TODO: Optimize + current_address_space.ClearCache(); + + invalid_cache_ranges.clear(); + return; + } + } + + A64::UserConfig conf; + A64JitState current_state{}; + A64AddressSpace current_address_space; + A64Core core; + + volatile u32 halt_reason = 0; + + std::mutex invalidation_mutex; + boost::icl::interval_set invalid_cache_ranges; + bool invalidate_entire_cache = false; + bool is_executing = false; +}; + +Jit::Jit(UserConfig conf) + : impl{std::make_unique(this, conf)} { } Jit::~Jit() = default; HaltReason Jit::Run() { - ASSERT_FALSE("not implemented"); + return impl->Run(); } HaltReason Jit::Step() { - ASSERT_FALSE("not implemented"); + return impl->Step(); } void Jit::ClearCache() { + impl->ClearCache(); } void Jit::InvalidateCacheRange(std::uint64_t start_address, std::size_t length) { - (void)start_address; - (void)length; + impl->InvalidateCacheRange(start_address, length); } void Jit::Reset() { + impl->Reset(); } void Jit::HaltExecution(HaltReason hr) { - (void)hr; + impl->HaltExecution(hr); } void Jit::ClearHalt(HaltReason hr) { - (void)hr; + impl->ClearHalt(hr); } std::uint64_t Jit::GetSP() const { - return 0; + return impl->SP(); } void Jit::SetSP(std::uint64_t value) { - (void)value; + impl->SetSP(value); } std::uint64_t Jit::GetPC() const { - return 0; + return impl->PC(); } void Jit::SetPC(std::uint64_t value) { - (void)value; + impl->SetPC(value); } std::uint64_t Jit::GetRegister(std::size_t index) const { - (void)index; - return 0; + return impl->Regs()[index]; } void Jit::SetRegister(size_t index, std::uint64_t value) { - (void)index; - (void)value; + impl->Regs()[index] = value; } std::array Jit::GetRegisters() const { - return {}; + return impl->Regs(); } void Jit::SetRegisters(const std::array& value) { - (void)value; + impl->Regs() = value; } Vector Jit::GetVector(std::size_t index) const { - (void)index; - return {}; + auto& vec = impl->VecRegs(); + return {vec[index * 2], vec[index * 2 + 1]}; } void Jit::SetVector(std::size_t index, Vector value) { - (void)index; - (void)value; + auto& vec = impl->VecRegs(); + vec[index * 2] = value[0]; + vec[index * 2 + 1] = value[1]; } std::array Jit::GetVectors() const { - return {}; + std::array ret; + std::memcpy(ret.data(), impl->VecRegs().data(), sizeof(ret)); + return ret; } void Jit::SetVectors(const std::array& value) { - (void)value; + std::memcpy(impl->VecRegs().data(), value.data(), sizeof(value)); } std::uint32_t Jit::GetFpcr() const { - return 0; + return impl->Fpcr(); } void Jit::SetFpcr(std::uint32_t value) { - (void)value; + impl->SetFpcr(value); } std::uint32_t Jit::GetFpsr() const { - return 0; + return impl->Fpsr(); } void Jit::SetFpsr(std::uint32_t value) { - (void)value; + impl->SetFpsr(value); } std::uint32_t Jit::GetPstate() const { - return 0; + return impl->Pstate(); } void Jit::SetPstate(std::uint32_t value) { - (void)value; + impl->SetPstate(value); } void Jit::ClearExclusiveState() { + impl->ClearExclusiveState(); } bool Jit::IsExecuting() const { - return false; + return impl->IsExecuting(); } void Jit::DumpDisassembly() const { - ASSERT_FALSE("not implemented"); + impl->DumpDisassembly(); } std::vector Jit::Disassemble() const { - ASSERT_FALSE("not implemented"); + return impl->Disassemble(); } } // namespace Dynarmic::A64 diff --git a/src/dynarmic/backend/arm64/a64_jitstate.h b/src/dynarmic/backend/arm64/a64_jitstate.h new file mode 100644 index 00000000..215e6987 --- /dev/null +++ b/src/dynarmic/backend/arm64/a64_jitstate.h @@ -0,0 +1,37 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include + +#include + +#include "dynarmic/frontend/A64/a64_location_descriptor.h" + +namespace Dynarmic::Backend::Arm64 { + +struct A64JitState { + std::array reg{}; + u64 sp = 0; + u64 pc = 0; + + u32 cpsr_nzcv = 0; + + alignas(16) std::array vec{}; + + u32 exclusive_state = 0; + + u32 fpsr = 0; + u32 fpcr = 0; + + IR::LocationDescriptor GetLocationDescriptor() const { + const u64 fpcr_u64 = static_cast(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift; + const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask; + return IR::LocationDescriptor{pc_u64 | fpcr_u64}; + } +}; + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/abi.cpp b/src/dynarmic/backend/arm64/abi.cpp index b34f807b..e6913f0d 100644 --- a/src/dynarmic/backend/arm64/abi.cpp +++ b/src/dynarmic/backend/arm64/abi.cpp @@ -55,13 +55,15 @@ static FrameInfo CalculateFrameInfo(RegisterList rl, size_t frame_size) { }; } -#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \ - for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \ - code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \ - } \ - if (frame_info.TYPE##s.size() % 2 == 1) { \ - const size_t i = frame_info.TYPE##s.size() - 1; \ - code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \ +#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \ + if (frame_info.TYPE##s.size() > 0) { \ + for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \ + code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \ + } \ + if (frame_info.TYPE##s.size() % 2 == 1) { \ + const size_t i = frame_info.TYPE##s.size() - 1; \ + code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \ + } \ } void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) { diff --git a/src/dynarmic/backend/arm64/address_space.cpp b/src/dynarmic/backend/arm64/address_space.cpp new file mode 100644 index 00000000..3aeaa453 --- /dev/null +++ b/src/dynarmic/backend/arm64/address_space.cpp @@ -0,0 +1,213 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/backend/arm64/a64_address_space.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" +#include "dynarmic/backend/arm64/abi.h" +#include "dynarmic/backend/arm64/devirtualize.h" +#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/backend/arm64/stack_layout.h" +#include "dynarmic/common/cast_util.h" +#include "dynarmic/common/fp/fpcr.h" +#include "dynarmic/interface/exclusive_monitor.h" +#include "dynarmic/ir/opt/passes.h" + +namespace Dynarmic::Backend::Arm64 { + +AddressSpace::AddressSpace(size_t code_cache_size) + : code_cache_size(code_cache_size) + , mem(code_cache_size) + , code(mem.ptr()) {} + +AddressSpace::~AddressSpace() = default; + +CodePtr AddressSpace::Get(IR::LocationDescriptor descriptor) { + if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) { + return iter->second; + } + return nullptr; +} + +CodePtr AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) { + if (CodePtr block_entry = Get(descriptor)) { + return block_entry; + } + + IR::Block ir_block = GenerateIR(descriptor); + const EmittedBlockInfo block_info = Emit(std::move(ir_block)); + + block_infos.insert_or_assign(descriptor.Value(), block_info); + block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point); + return block_info.entry_point; +} + +void AddressSpace::ClearCache() { + block_entries.clear(); + block_infos.clear(); + block_references.clear(); + code.set_ptr(prelude_info.end_of_prelude); +} + +size_t AddressSpace::GetRemainingSize() { + return code_cache_size - (code.ptr() - reinterpret_cast(mem.ptr())); +} + +EmittedBlockInfo AddressSpace::Emit(IR::Block block) { + if (GetRemainingSize() < 1024 * 1024) { + ClearCache(); + } + + mem.unprotect(); + + EmittedBlockInfo block_info = EmitArm64(code, std::move(block), GetEmitConfig()); + + Link(block.Location(), block_info); + + mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + + RelinkForDescriptor(block.Location()); + + mem.protect(); + + return block_info; +} + +static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector& block_relocations_list) { + using namespace oaknut; + using namespace oaknut::util; + + for (auto [ptr_offset] : block_relocations_list) { + CodeGenerator c{reinterpret_cast(entry_point + ptr_offset)}; + + if (target_ptr) { + c.B((void*)target_ptr); + } else { + c.NOP(); + } + } +} + +void AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) { + using namespace oaknut; + using namespace oaknut::util; + + for (auto [ptr_offset, target] : block_info.relocations) { + CodeGenerator c{reinterpret_cast(block_info.entry_point + ptr_offset)}; + + switch (target) { + case LinkTarget::ReturnToDispatcher: + c.B(prelude_info.return_to_dispatcher); + break; + case LinkTarget::ReturnFromRunCode: + c.B(prelude_info.return_from_run_code); + break; + case LinkTarget::ReadMemory8: + c.BL(prelude_info.read_memory_8); + break; + case LinkTarget::ReadMemory16: + c.BL(prelude_info.read_memory_16); + break; + case LinkTarget::ReadMemory32: + c.BL(prelude_info.read_memory_32); + break; + case LinkTarget::ReadMemory64: + c.BL(prelude_info.read_memory_64); + break; + case LinkTarget::ReadMemory128: + c.BL(prelude_info.read_memory_128); + break; + case LinkTarget::ExclusiveReadMemory8: + c.BL(prelude_info.exclusive_read_memory_8); + break; + case LinkTarget::ExclusiveReadMemory16: + c.BL(prelude_info.exclusive_read_memory_16); + break; + case LinkTarget::ExclusiveReadMemory32: + c.BL(prelude_info.exclusive_read_memory_32); + break; + case LinkTarget::ExclusiveReadMemory64: + c.BL(prelude_info.exclusive_read_memory_64); + break; + case LinkTarget::ExclusiveReadMemory128: + c.BL(prelude_info.exclusive_read_memory_128); + break; + case LinkTarget::WriteMemory8: + c.BL(prelude_info.write_memory_8); + break; + case LinkTarget::WriteMemory16: + c.BL(prelude_info.write_memory_16); + break; + case LinkTarget::WriteMemory32: + c.BL(prelude_info.write_memory_32); + break; + case LinkTarget::WriteMemory64: + c.BL(prelude_info.write_memory_64); + break; + case LinkTarget::WriteMemory128: + c.BL(prelude_info.write_memory_128); + break; + case LinkTarget::ExclusiveWriteMemory8: + c.BL(prelude_info.exclusive_write_memory_8); + break; + case LinkTarget::ExclusiveWriteMemory16: + c.BL(prelude_info.exclusive_write_memory_16); + break; + case LinkTarget::ExclusiveWriteMemory32: + c.BL(prelude_info.exclusive_write_memory_32); + break; + case LinkTarget::ExclusiveWriteMemory64: + c.BL(prelude_info.exclusive_write_memory_64); + break; + case LinkTarget::ExclusiveWriteMemory128: + c.BL(prelude_info.exclusive_write_memory_128); + break; + case LinkTarget::CallSVC: + c.BL(prelude_info.call_svc); + break; + case LinkTarget::ExceptionRaised: + c.BL(prelude_info.exception_raised); + break; + case LinkTarget::InstructionSynchronizationBarrierRaised: + c.BL(prelude_info.isb_raised); + break; + case LinkTarget::InstructionCacheOperationRaised: + c.BL(prelude_info.ic_raised); + break; + case LinkTarget::DataCacheOperationRaised: + c.BL(prelude_info.dc_raised); + break; + case LinkTarget::GetCNTPCT: + c.BL(prelude_info.get_cntpct); + break; + case LinkTarget::AddTicks: + c.BL(prelude_info.add_ticks); + break; + case LinkTarget::GetTicksRemaining: + c.BL(prelude_info.get_ticks_remaining); + break; + default: + ASSERT_FALSE("Invalid relocation target"); + } + } + + for (auto [target_descriptor, list] : block_info.block_relocations) { + block_references[target_descriptor.Value()].emplace(block_descriptor.Value()); + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list); + } +} + +void AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) { + for (auto block_descriptor : block_references[target_descriptor.Value()]) { + if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) { + const EmittedBlockInfo& block_info = iter->second; + + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]); + + mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + } + } +} + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/address_space.h b/src/dynarmic/backend/arm64/address_space.h new file mode 100644 index 00000000..3544174c --- /dev/null +++ b/src/dynarmic/backend/arm64/address_space.h @@ -0,0 +1,92 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "dynarmic/backend/arm64/emit_arm64.h" +#include "dynarmic/interface/halt_reason.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/location_descriptor.h" + +namespace Dynarmic::Backend::Arm64 { + +class AddressSpace { +public: + explicit AddressSpace(size_t code_cache_size); + virtual ~AddressSpace(); + + virtual IR::Block GenerateIR(IR::LocationDescriptor) const = 0; + + CodePtr Get(IR::LocationDescriptor descriptor); + + CodePtr GetOrEmit(IR::LocationDescriptor descriptor); + + void ClearCache(); + +protected: + virtual EmitConfig GetEmitConfig() = 0; + + size_t GetRemainingSize(); + EmittedBlockInfo Emit(IR::Block ir_block); + void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block); + void RelinkForDescriptor(IR::LocationDescriptor target_descriptor); + + const size_t code_cache_size; + oaknut::CodeBlock mem; + oaknut::CodeGenerator code; + + tsl::robin_map block_entries; + tsl::robin_map block_infos; + tsl::robin_map> block_references; + + struct PreludeInfo { + u32* end_of_prelude; + + using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, void* jit_state, volatile u32* halt_reason); + RunCodeFuncType run_code; + RunCodeFuncType step_code; + void* return_to_dispatcher; + void* return_from_run_code; + + void* read_memory_8; + void* read_memory_16; + void* read_memory_32; + void* read_memory_64; + void* read_memory_128; + void* exclusive_read_memory_8; + void* exclusive_read_memory_16; + void* exclusive_read_memory_32; + void* exclusive_read_memory_64; + void* exclusive_read_memory_128; + void* write_memory_8; + void* write_memory_16; + void* write_memory_32; + void* write_memory_64; + void* write_memory_128; + void* exclusive_write_memory_8; + void* exclusive_write_memory_16; + void* exclusive_write_memory_32; + void* exclusive_write_memory_64; + void* exclusive_write_memory_128; + + void* call_svc; + void* exception_raised; + void* dc_raised; + void* ic_raised; + void* isb_raised; + + void* get_cntpct; + void* add_ticks; + void* get_ticks_remaining; + } prelude_info; +}; + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index 104d173d..ef553f2b 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -8,7 +8,6 @@ #include #include -#include "dynarmic/backend/arm64/a32_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_context.h" #include "dynarmic/backend/arm64/fpsr_manager.h" @@ -40,7 +39,7 @@ template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr, args[1], args[2], args[3]); + ctx.reg_alloc.PrepareForCall(args[1], args[2], args[3]); code.MOV(Xscratch0, args[0].GetImmediateU64()); code.BLR(Xscratch0); } @@ -69,9 +68,34 @@ void EmitIR(oaknut::CodeGenerator&, EmitContext& ctx, I } template<> -void EmitIR(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) { - [[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ASSERT(ctx.reg_alloc.IsValueLive(inst)); +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if (ctx.reg_alloc.IsValueLive(inst)) { + return; + } + + switch (args[0].GetType()) { + case IR::Type::U32: { + auto Wvalue = ctx.reg_alloc.ReadW(args[0]); + auto flags = ctx.reg_alloc.WriteFlags(inst); + RegAlloc::Realize(Wvalue, flags); + + code.TST(*Wvalue, Wvalue); + break; + } + case IR::Type::U64: { + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + auto flags = ctx.reg_alloc.WriteFlags(inst); + RegAlloc::Realize(Xvalue, flags); + + code.TST(*Xvalue, Xvalue); + break; + } + default: + ASSERT_FALSE("Invalid type for GetNZCVFromOp"); + break; + } } template<> @@ -164,10 +188,12 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E ASSERT(!ctx.block.HasConditionFailedLocation()); } else { ASSERT(ctx.block.HasConditionFailedLocation()); + oaknut::Label pass; - oaknut::Label pass = EmitA32Cond(code, ctx, ctx.block.GetCondition()); + pass = conf.emit_cond(code, ctx, ctx.block.GetCondition()); EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount()); - EmitA32ConditionFailedTerminal(code, ctx); + conf.emit_condition_failed_terminal(code, ctx); + code.l(pass); } @@ -205,7 +231,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E reg_alloc.AssertNoMoreUses(); EmitAddCycles(code, ctx, block.CycleCount()); - EmitA32Terminal(code, ctx); + conf.emit_terminal(code, ctx); ebi.size = code.ptr() - ebi.entry_point; return ebi; diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index 1c5adf3f..20d10f92 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -38,6 +38,8 @@ enum class Opcode; namespace Dynarmic::Backend::Arm64 { +struct EmitContext; + using CodePtr = std::byte*; enum class LinkTarget { @@ -47,21 +49,28 @@ enum class LinkTarget { ReadMemory16, ReadMemory32, ReadMemory64, + ReadMemory128, ExclusiveReadMemory8, ExclusiveReadMemory16, ExclusiveReadMemory32, ExclusiveReadMemory64, + ExclusiveReadMemory128, WriteMemory8, WriteMemory16, WriteMemory32, WriteMemory64, + WriteMemory128, ExclusiveWriteMemory8, ExclusiveWriteMemory16, ExclusiveWriteMemory32, ExclusiveWriteMemory64, + ExclusiveWriteMemory128, CallSVC, ExceptionRaised, InstructionSynchronizationBarrierRaised, + InstructionCacheOperationRaised, + DataCacheOperationRaised, + GetCNTPCT, AddTicks, GetTicksRemaining, }; @@ -83,24 +92,39 @@ struct EmittedBlockInfo { }; struct EmitConfig { + OptimizationFlag optimizations; + bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; } + bool hook_isb; + + // System registers + u64 cntfreq_el0; + u32 ctr_el0; + u32 dczid_el0; + const u64* tpidrro_el0; + u64* tpidr_el0; + + // Timing + bool wall_clock_cntpct; bool enable_cycle_counting; + + // Endianness bool always_little_endian; + // Frontend specific callbacks FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor); + oaknut::Label (*emit_cond)(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond); + void (*emit_condition_failed_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx); + void (*emit_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx); + // State offsets size_t state_nzcv_offset; size_t state_fpsr_offset; + // A32 specific std::array, 16> coprocessors{}; - - OptimizationFlag optimizations; - - bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; } }; -struct EmitContext; - EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf); template @@ -108,7 +132,10 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst); void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target); void EmitBlockLinkRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, const IR::LocationDescriptor& descriptor); oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond); +oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond); void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx); +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx); void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx); +void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx); } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp index 493d75eb..3d43bcb7 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp @@ -41,7 +41,7 @@ void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Re EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); } -void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) { +static void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) { auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 { return static_cast(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32); }; @@ -555,7 +555,7 @@ void EmitIR(oaknut::CodeGenerator& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -576,7 +576,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCont template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -611,7 +611,7 @@ void EmitIR(oaknut::CodeGenera return; } - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised); } diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp index 6f2f75c7..5115fbbb 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp @@ -24,7 +24,7 @@ static void EmitCoprocessorException() { } static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional arg0 = {}, std::optional arg1 = {}) { - ctx.reg_alloc.PrepareForCall(inst, {}, arg0, arg1); + ctx.reg_alloc.PrepareForCall({}, arg0, arg1); if (callback.user_arg) { code.MOV(X0, reinterpret_cast(*callback.user_arg)); @@ -32,6 +32,10 @@ static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A3 code.MOV(Xscratch0, reinterpret_cast(callback.function)); code.BLR(Xscratch0); + + if (inst) { + ctx.reg_alloc.DefineAsRegister(inst, X0); + } } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp index e0bf558c..8d72dd19 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp @@ -25,18 +25,19 @@ static bool IsOrdered(IR::AccType acctype) { static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + ctx.reg_alloc.PrepareForCall({}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); EmitRelocation(code, ctx, fn); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + ctx.reg_alloc.DefineAsRegister(inst, X0); } static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + ctx.reg_alloc.PrepareForCall({}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); code.MOV(Wscratch0, 1); @@ -45,11 +46,12 @@ static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ct if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + ctx.reg_alloc.DefineAsRegister(inst, X0); } static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); if (ordered) { @@ -63,7 +65,7 @@ static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::I static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); oaknut::Label end; @@ -79,6 +81,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c code.DMB(oaknut::BarrierOp::ISH); } code.l(end); + ctx.reg_alloc.DefineAsRegister(inst, X0); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index 4bd2e270..a686f555 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -3,9 +3,10 @@ * SPDX-License-Identifier: 0BSD */ +#include #include -#include "dynarmic/backend/arm64/a32_jitstate.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" @@ -18,292 +19,460 @@ namespace Dynarmic::Backend::Arm64 { using namespace oaknut::util; +oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) { + oaknut::Label pass; + // TODO: Flags in host flags + code.LDR(Wscratch0, Xstate, offsetof(A64JitState, cpsr_nzcv)); + code.MSR(oaknut::SystemReg::NZCV, Xscratch0); + code.B(static_cast(cond), pass); + return pass; +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step); + +void EmitA64Terminal(oaknut::CodeGenerator&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) { + ASSERT_FALSE("Interpret should never be emitted."); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) { + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) { + oaknut::Label fail; + + if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { + if (ctx.conf.enable_cycle_counting) { + code.CMP(Xticks, 0); + code.B(LE, fail); + EmitBlockLinkRelocation(code, ctx, terminal.next); + } else { + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, fail); + EmitBlockLinkRelocation(code, ctx, terminal.next); + } + } + + code.l(fail); + code.MOV(Xscratch0, A64::LocationDescriptor{terminal.next}.PC()); + code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc)); + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) { + if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { + EmitBlockLinkRelocation(code, ctx, terminal.next); + } + + code.MOV(Wscratch0, A64::LocationDescriptor{terminal.next}.PC()); + code.STR(Wscratch0, Xstate, offsetof(A64JitState, pc)); + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) { + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); + + // TODO: Implement PopRSBHint optimization +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) { + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); + + // TODO: Implement FastDispatchHint optimization +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + oaknut::Label pass = EmitA64Cond(code, ctx, terminal.if_); + EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step); + code.l(pass); + EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + oaknut::Label fail; + code.LDRB(Wscratch0, SP, offsetof(StackLayout, check_bit)); + code.CBZ(Wscratch0, fail); + EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step); + code.l(fail); + EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + oaknut::Label fail; + code.LDAR(Wscratch0, Xhalt); + code.CBNZ(Wscratch0, fail); + EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step); + code.l(fail); + EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + boost::apply_visitor([&](const auto& t) { EmitA64Terminal(code, ctx, t, initial_location, is_single_step); }, terminal); +} + +void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx) { + const A64::LocationDescriptor location{ctx.block.Location()}; + EmitA64Terminal(code, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping()); +} + +void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx) { + const A64::LocationDescriptor location{ctx.block.Location()}; + EmitA64Terminal(code, ctx, IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, location.SetSingleStepping(false), location.SingleStepping()); +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if (args[0].IsImmediate()) { + if (args[0].GetImmediateU1()) { + code.MOV(Wscratch0, 1); + code.STRB(Wscratch0, SP, offsetof(StackLayout, check_bit)); + } else { + code.STRB(WZR, SP, offsetof(StackLayout, check_bit)); + } + } else { + auto Wbit = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wbit); + code.STRB(Wbit, SP, offsetof(StackLayout, check_bit)); + } } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wflag = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wflag); + code.LDR(Wflag, Xstate, offsetof(A64JitState, cpsr_nzcv)); + code.AND(Wflag, Wflag, 1 << 29); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wnzcv = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wnzcv); + + code.LDR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wnzcv = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wnzcv); + + code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wnzcv = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wnzcv); + + code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wresult); + + // TODO: Detect if Gpr vs Fpr is more appropriate + + code.LDR(Wresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xresult); + + // TODO: Detect if Gpr vs Fpr is more appropriate + + code.LDR(Xresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Sresult = ctx.reg_alloc.WriteS(inst); + RegAlloc::Realize(Sresult); + code.LDR(Sresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Dresult = ctx.reg_alloc.WriteD(inst); + RegAlloc::Realize(Dresult); + code.LDR(Dresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Qresult = ctx.reg_alloc.WriteQ(inst); + RegAlloc::Realize(Qresult); + code.LDR(Qresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Xresult = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xresult); + + code.LDR(Xresult, Xstate, offsetof(A64JitState, sp)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wresult = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wresult); + + code.LDR(Wresult, Xstate, offsetof(A64JitState, fpcr)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wresult = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wresult); + + code.LDR(Wresult, Xstate, offsetof(A64JitState, fpsr)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Wvalue = ctx.reg_alloc.ReadW(args[1]); + RegAlloc::Realize(Wvalue); + + // TODO: Detect if Gpr vs Fpr is more appropriate + code.MOV(*Wvalue, Wvalue); + code.STR(Wvalue->toX(), Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Xvalue = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xvalue); + + // TODO: Detect if Gpr vs Fpr is more appropriate + + code.STR(Xvalue, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Svalue = ctx.reg_alloc.ReadS(args[1]); + RegAlloc::Realize(Svalue); + + code.FMOV(Svalue, Svalue); + code.STR(Svalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Dvalue = ctx.reg_alloc.ReadD(args[1]); + RegAlloc::Realize(Dvalue); + + code.FMOV(Dvalue, Dvalue); + code.STR(Dvalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto Qvalue = ctx.reg_alloc.ReadQ(args[1]); + RegAlloc::Realize(Qvalue); + code.STR(Qvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + RegAlloc::Realize(Xvalue); + code.STR(Xvalue, Xstate, offsetof(A64JitState, sp)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wvalue = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wvalue); + code.STR(Wvalue, Xstate, offsetof(A64JitState, fpcr)); + code.MSR(oaknut::SystemReg::FPCR, Wvalue->toX()); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Wvalue = ctx.reg_alloc.ReadW(args[0]); + RegAlloc::Realize(Wvalue); + code.STR(Wvalue, Xstate, offsetof(A64JitState, fpsr)); + code.MSR(oaknut::SystemReg::FPSR, Wvalue->toX()); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + RegAlloc::Realize(Xvalue); + code.STR(Xvalue, Xstate, offsetof(A64JitState, pc)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(); + + if (ctx.conf.enable_cycle_counting) { + code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); + code.SUB(Xscratch0, Xscratch0, Xticks); + EmitRelocation(code, ctx, LinkTarget::AddTicks); + } + + code.MOV(W1, args[0].GetImmediateU32()); + EmitRelocation(code, ctx, LinkTarget::CallSVC); + + if (ctx.conf.enable_cycle_counting) { + EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining); + code.STR(X0, SP, offsetof(StackLayout, cycles_to_run)); + code.MOV(Xticks, X0); + } } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall(); + + if (ctx.conf.enable_cycle_counting) { + code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); + code.SUB(Xscratch0, Xscratch0, Xticks); + EmitRelocation(code, ctx, LinkTarget::AddTicks); + } + + code.MOV(X1, args[0].GetImmediateU64()); + code.MOV(X2, args[1].GetImmediateU64()); + EmitRelocation(code, ctx, LinkTarget::ExceptionRaised); + + if (ctx.conf.enable_cycle_counting) { + EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining); + code.STR(X0, SP, offsetof(StackLayout, cycles_to_run)); + code.MOV(Xticks, X0); + } } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); + EmitRelocation(code, ctx, LinkTarget::DataCacheOperationRaised); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall({}, args[0], args[1]); + EmitRelocation(code, ctx, LinkTarget::InstructionCacheOperationRaised); } template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); +void EmitIR(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) { + code.DSB(oaknut::BarrierOp::SY); } template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); +void EmitIR(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) { + code.DMB(oaknut::BarrierOp::SY); } template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) { + if (!ctx.conf.hook_isb) { + return; + } + + ctx.reg_alloc.PrepareForCall(); + EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Xvalue = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xvalue); + code.MOV(Xvalue, ctx.conf.cntfreq_el0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + ctx.reg_alloc.PrepareForCall(); + if (!ctx.conf.wall_clock_cntpct && ctx.conf.enable_cycle_counting) { + code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run)); + code.SUB(X1, X1, Xticks); + EmitRelocation(code, ctx, LinkTarget::AddTicks); + EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining); + code.STR(X0, SP, offsetof(StackLayout, cycles_to_run)); + code.MOV(Xticks, X0); + } + EmitRelocation(code, ctx, LinkTarget::GetCNTPCT); + ctx.reg_alloc.DefineAsRegister(inst, X0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wvalue = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wvalue); + code.MOV(Wvalue, ctx.conf.ctr_el0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Wvalue = ctx.reg_alloc.WriteW(inst); + RegAlloc::Realize(Wvalue); + code.MOV(Wvalue, ctx.conf.dczid_el0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Xvalue = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xvalue); + code.MOV(Xscratch0, mcl::bit_cast(ctx.conf.tpidr_el0)); + code.LDR(Xvalue, Xscratch0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto Xvalue = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xvalue); + code.MOV(Xscratch0, mcl::bit_cast(ctx.conf.tpidrro_el0)); + code.LDR(Xvalue, Xscratch0); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + RegAlloc::Realize(Xvalue); + code.MOV(Xscratch0, mcl::bit_cast(ctx.conf.tpidr_el0)); + code.STR(Xvalue, Xscratch0); } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp index 1251a035..bf378605 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp @@ -5,11 +5,12 @@ #include -#include "dynarmic/backend/arm64/a32_jitstate.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" #include "dynarmic/backend/arm64/reg_alloc.h" +#include "dynarmic/ir/acc_type.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -18,172 +19,202 @@ namespace Dynarmic::Backend::Arm64 { using namespace oaknut::util; +static bool IsOrdered(IR::AccType acctype) { + return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED; +} + +static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall({}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + ctx.reg_alloc.DefineAsRegister(inst, X0); +} + +static void EmitReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall({}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.MOV(Q8.B16(), Q0.B16()); + ctx.reg_alloc.DefineAsRegister(inst, Q8); +} + +static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall({}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + code.MOV(Wscratch0, 1); + code.STRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state)); + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + ctx.reg_alloc.DefineAsRegister(inst, X0); +} + +static void EmitExclusiveReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall({}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + code.MOV(Wscratch0, 1); + code.STRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state)); + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.MOV(Q8.B16(), Q0.B16()); + ctx.reg_alloc.DefineAsRegister(inst, Q8); +} + +static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); + const bool ordered = IsOrdered(args[3].GetImmediateAccType()); + + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } +} + +static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); + const bool ordered = IsOrdered(args[3].GetImmediateAccType()); + + oaknut::Label end; + + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.LDRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state)); + code.CBZ(Wscratch0, end); + code.STRB(WZR, Xstate, offsetof(A64JitState, exclusive_state)); + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.l(end); + ctx.reg_alloc.DefineAsRegister(inst, X0); +} + template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); +void EmitIR(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) { + code.STR(WZR, Xstate, offsetof(A64JitState, exclusive_state)); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory8); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory16); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory32); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory64); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitReadMemory128(code, ctx, inst, LinkTarget::ReadMemory128); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory8); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory16); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory32); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory64); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveReadMemory128(code, ctx, inst, LinkTarget::ExclusiveReadMemory128); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory8); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory16); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory32); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory64); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory128); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory8); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory16); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory32); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory64); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory128); } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index f0250361..a1e618fb 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -189,10 +189,14 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(args[0]); + RegAlloc::Realize(Xresult, Xoperand); + ASSERT(args[1].IsImmediate()); + ASSERT(args[1].GetImmediateU8() < 64); + + code.UBFX(Xresult, Xoperand, args[1].GetImmediateU8(), 1); } template<> @@ -616,10 +620,23 @@ void EmitIR(oaknut::CodeGenerator& code, Emi template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + const u8 shift = shift_arg.GetImmediateU8(); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + RegAlloc::Realize(Xresult, Xoperand); + code.ASR(Xresult, Xoperand, shift <= 63 ? shift : 63); + } else { + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + auto Xshift = ctx.reg_alloc.ReadX(shift_arg); + RegAlloc::Realize(Xresult, Xoperand, Xshift); + code.ASR(Xresult, Xoperand, Xshift); + } } template<> @@ -690,10 +707,23 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + const u8 shift = shift_arg.GetImmediateU8(); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + RegAlloc::Realize(Xresult, Xoperand); + code.ROR(Xresult, Xoperand, shift); + } else { + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + auto Xshift = ctx.reg_alloc.ReadX(shift_arg); + RegAlloc::Realize(Xresult, Xoperand, Xshift); + code.ROR(Xresult, Xoperand, Xshift); + } } template<> @@ -726,68 +756,114 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCo } } +template +static void EmitMaskedShift32(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, ShiftI si_fn, ShiftR sr_fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Woperand = ctx.reg_alloc.ReadW(operand_arg); + RegAlloc::Realize(Wresult, Woperand); + const u32 shift = shift_arg.GetImmediateU32(); + + si_fn(Wresult, Woperand, static_cast(shift & 0x1F)); + } else { + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Woperand = ctx.reg_alloc.ReadW(operand_arg); + auto Wshift = ctx.reg_alloc.ReadW(shift_arg); + RegAlloc::Realize(Wresult, Woperand, Wshift); + + sr_fn(Wresult, Woperand, Wshift); + } +} + +template +static void EmitMaskedShift64(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, ShiftI si_fn, ShiftR sr_fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + RegAlloc::Realize(Xresult, Xoperand); + const u32 shift = shift_arg.GetImmediateU64(); + + si_fn(Xresult, Xoperand, static_cast(shift & 0x3F)); + } else { + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + auto Xshift = ctx.reg_alloc.ReadX(shift_arg); + RegAlloc::Realize(Xresult, Xoperand, Xshift); + + sr_fn(Xresult, Xoperand, Xshift); + } +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift32( + code, ctx, inst, + [&](auto& Wresult, auto& Woperand, auto shift) { code.LSL(Wresult, Woperand, shift); }, + [&](auto& Wresult, auto& Woperand, auto& Wshift) { code.LSL(Wresult, Woperand, Wshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift64( + code, ctx, inst, + [&](auto& Xresult, auto& Xoperand, auto shift) { code.LSL(Xresult, Xoperand, shift); }, + [&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.LSL(Xresult, Xoperand, Xshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift32( + code, ctx, inst, + [&](auto& Wresult, auto& Woperand, auto shift) { code.LSR(Wresult, Woperand, shift); }, + [&](auto& Wresult, auto& Woperand, auto& Wshift) { code.LSR(Wresult, Woperand, Wshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift64( + code, ctx, inst, + [&](auto& Xresult, auto& Xoperand, auto shift) { code.LSR(Xresult, Xoperand, shift); }, + [&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.LSR(Xresult, Xoperand, Xshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift32( + code, ctx, inst, + [&](auto& Wresult, auto& Woperand, auto shift) { code.ASR(Wresult, Woperand, shift); }, + [&](auto& Wresult, auto& Woperand, auto& Wshift) { code.ASR(Wresult, Woperand, Wshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift64( + code, ctx, inst, + [&](auto& Xresult, auto& Xoperand, auto shift) { code.ASR(Xresult, Xoperand, shift); }, + [&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.ASR(Xresult, Xoperand, Xshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift32( + code, ctx, inst, + [&](auto& Wresult, auto& Woperand, auto shift) { code.ROR(Wresult, Woperand, shift); }, + [&](auto& Wresult, auto& Woperand, auto& Wshift) { code.ROR(Wresult, Woperand, Wshift); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaskedShift64( + code, ctx, inst, + [&](auto& Xresult, auto& Xoperand, auto shift) { code.ROR(Xresult, Xoperand, shift); }, + [&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.ROR(Xresult, Xoperand, Xshift); }); } template @@ -975,18 +1051,24 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xop1 = ctx.reg_alloc.ReadX(args[0]); + auto Xop2 = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xresult, Xop1, Xop2); + + code.SMULH(Xresult, Xop1, Xop2); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xop1 = ctx.reg_alloc.ReadX(args[0]); + auto Xop2 = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xresult, Xop1, Xop2); + + code.UMULH(Xresult, Xop1, Xop2); } template<> @@ -1160,7 +1242,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - EmitAndNot<32>(code, ctx, inst); + EmitAndNot<64>(code, ctx, inst); } template<> @@ -1271,9 +1353,13 @@ void EmitIR(oaknut::CodeGenerator&, EmitContex } template<> -void EmitIR(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) { +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.DefineAsExisting(inst, args[0]); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + auto Qresult = ctx.reg_alloc.WriteQ(inst); + RegAlloc::Realize(Xvalue, Qresult); + + code.FMOV(Qresult->toD(), Xvalue); } template<> @@ -1313,98 +1399,124 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCo template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[2].IsImmediate()); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Wop1 = ctx.reg_alloc.ReadW(args[0]); + auto Wop2 = ctx.reg_alloc.ReadW(args[1]); + RegAlloc::Realize(Wresult, Wop1, Wop2); + const u8 lsb = args[2].GetImmediateU8(); + + code.EXTR(Wresult, Wop2, Wop1, lsb); // NB: flipped } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[2].IsImmediate()); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xop1 = ctx.reg_alloc.ReadX(args[0]); + auto Xop2 = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xresult, Xop1, Xop2); + const u8 lsb = args[2].GetImmediateU8(); + + code.EXTR(Xresult, Xop2, Xop1, lsb); // NB: flipped } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Wvalue = ctx.reg_alloc.ReadW(args[0]); + const u8 bit = args[1].GetImmediateU8(); + RegAlloc::Realize(Wresult, Wvalue); + + code.LSL(Wresult, Wvalue, 31 - bit); + code.ASR(Wresult, Wresult, 31); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + const u8 bit = args[1].GetImmediateU8(); + RegAlloc::Realize(Xresult, Xvalue); + + code.LSL(Xresult, Xvalue, 63 - bit); + code.ASR(Xresult, Xresult, 63); +} + +static void EmitMaxMin32(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Cond cond) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Wresult = ctx.reg_alloc.WriteW(inst); + auto Wop1 = ctx.reg_alloc.ReadW(args[0]); + auto Wop2 = ctx.reg_alloc.ReadW(args[1]); + RegAlloc::Realize(Wresult, Wop1, Wop2); + ctx.reg_alloc.SpillFlags(); + + code.CMP(Wop1->toW(), Wop2); + code.CSEL(Wresult, Wop1, Wop2, cond); +} + +static void EmitMaxMin64(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Cond cond) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xop1 = ctx.reg_alloc.ReadX(args[0]); + auto Xop2 = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xresult, Xop1, Xop2); + ctx.reg_alloc.SpillFlags(); + + code.CMP(Xop1->toX(), Xop2); + code.CSEL(Xresult, Xop1, Xop2, cond); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin32(code, ctx, inst, GT); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin64(code, ctx, inst, GT); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin32(code, ctx, inst, HI); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin64(code, ctx, inst, HI); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin32(code, ctx, inst, LT); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin64(code, ctx, inst, LT); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin32(code, ctx, inst, LO); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitMaxMin64(code, ctx, inst, LO); } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/emit_arm64_floating_point.cpp b/src/dynarmic/backend/arm64/emit_arm64_floating_point.cpp index 57b45807..6d85cd23 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_floating_point.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_floating_point.cpp @@ -575,6 +575,20 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + const auto rounding_mode = static_cast(inst->GetArg(1).GetU8()); + + if (rounding_mode == FP::RoundingMode::ToOdd) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Sto = ctx.reg_alloc.WriteS(inst); + auto Dfrom = ctx.reg_alloc.ReadD(args[0]); + RegAlloc::Realize(Sto, Dfrom); + ctx.fpsr.Load(); + + code.FCVTXN(Sto, Dfrom); + + return; + } + EmitConvert<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Dfrom) { code.FCVT(Sto, Dfrom); }); } diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp index 803f7b55..4024299e 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp @@ -201,8 +201,8 @@ static void EmitThreeOpArrangedLower(oaknut::CodeGenerator& code, EmitContext& c template static void EmitSaturatedAccumulate(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - auto Qaccumulator = ctx.reg_alloc.ReadWriteQ(args[0], inst); - auto Qoperand = ctx.reg_alloc.ReadQ(args[1]); + auto Qaccumulator = ctx.reg_alloc.ReadWriteQ(args[1], inst); // NB: Swapped + auto Qoperand = ctx.reg_alloc.ReadQ(args[0]); // NB: Swapped RegAlloc::Realize(Qaccumulator, Qoperand); ctx.fpsr.Load(); diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp index 9f632c24..4bb1614a 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp @@ -3,14 +3,31 @@ * SPDX-License-Identifier: 0BSD */ +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include "dynarmic/backend/arm64/a32_jitstate.h" +#include "dynarmic/backend/arm64/a64_jitstate.h" #include "dynarmic/backend/arm64/abi.h" #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_context.h" #include "dynarmic/backend/arm64/fpsr_manager.h" #include "dynarmic/backend/arm64/reg_alloc.h" +#include "dynarmic/common/cast_util.h" +#include "dynarmic/common/fp/fpcr.h" +#include "dynarmic/common/fp/fpsr.h" +#include "dynarmic/common/fp/info.h" +#include "dynarmic/common/fp/op.h" +#include "dynarmic/common/fp/rounding_mode.h" +#include "dynarmic/common/lut_from_list.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -18,6 +35,15 @@ namespace Dynarmic::Backend::Arm64 { using namespace oaknut::util; +namespace mp = mcl::mp; + +using A64FullVectorWidth = std::integral_constant; + +// Array alias that always sizes itself according to the given type T +// relative to the size of a vector register. e.g. T = u32 would result +// in a std::array. +template +using VectorArray = std::array>; template static void MaybeStandardFPSCRValue(oaknut::CodeGenerator& code, EmitContext& ctx, bool fpcr_controlled, EmitFn emit) { @@ -232,12 +258,47 @@ void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) }); } +template +static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::QReg Qresult, oaknut::QReg Qarg1, Lambda lambda, bool fpcr_controlled) { + const auto fn = static_cast*>(lambda); + + const u32 fpcr = ctx.FPCR(fpcr_controlled).Value(); + constexpr u64 stack_size = sizeof(u64) * 4; // sizeof(u128) * 2 + + ABI_PushRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size); + + code.MOV(Xscratch0, mcl::bit_cast(fn)); + code.ADD(X0, SP, 0 * 16); + code.ADD(X1, SP, 1 * 16); + code.MOV(X2, fpcr); + code.ADD(X3, Xstate, ctx.conf.state_fpsr_offset); + code.STR(Qarg1, X1); + code.BLR(Xscratch0); + code.LDR(Qresult, SP); + + ABI_PopRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size); +} + +template +static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]); + auto Qresult = ctx.reg_alloc.WriteQ(inst); + RegAlloc::Realize(Qarg1, Qresult); + ctx.reg_alloc.SpillFlags(); + ctx.fpsr.Spill(); + + const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1(); + EmitTwoOpFallbackWithoutRegAlloc(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled); +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Qresult = ctx.reg_alloc.ReadWriteQ(args[0], inst); + RegAlloc::Realize(Qresult); + + code.BIC(Qresult->H8(), 0b10000000, LSL, 8); } template<> @@ -486,10 +547,35 @@ void EmitIR(oaknut::CodeGenerator& code, E template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + const auto rounding = static_cast(inst->GetArg(1).GetU8()); + const bool exact = inst->GetArg(2).GetU1(); + + using rounding_list = mp::list< + mp::lift_value, + mp::lift_value, + mp::lift_value, + mp::lift_value, + mp::lift_value>; + using exact_list = mp::list; + + static const auto lut = Common::GenerateLookupTableFromList( + [](I) { + using FPT = u16; + return std::pair{ + mp::lower_to_tuple_v, + Common::FptrCast( + [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + constexpr FP::RoundingMode rounding_mode = mp::get<0, I>::value; + constexpr bool exact = mp::get<1, I>::value; + + for (size_t i = 0; i < output.size(); ++i) { + output[i] = static_cast(FP::FPRoundInt(input[i], fpcr, rounding_mode, exact, fpsr)); + } + })}; + }, + mp::cartesian_product{}); + + EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact))); } template<> diff --git a/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/backend/arm64/reg_alloc.cpp index 82bb02e7..3e61532a 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -138,7 +138,7 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const { return !!ValueLocation(inst); } -void RegAlloc::PrepareForCall(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { +void RegAlloc::PrepareForCall(std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { fpsr_manager.Spill(); SpillFlags(); @@ -157,16 +157,29 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optional, 4> args{arg0, arg1, arg2, arg3}; + + // AAPCS64 Next General-purpose Register Number + int ngrn = 0; + // AAPCS64 Next SIMD and Floating-point Register Number + int nsrn = 0; + for (int i = 0; i < 4; i++) { if (args[i]) { - ASSERT(gprs[i].IsCompletelyEmpty()); - LoadCopyInto(args[i]->get().value, oaknut::XReg{i}); + if (args[i]->get().GetType() == IR::Type::U128) { + ASSERT(fprs[nsrn].IsCompletelyEmpty()); + LoadCopyInto(args[i]->get().value, oaknut::QReg{nsrn}); + nsrn++; + } else { + ASSERT(gprs[ngrn].IsCompletelyEmpty()); + LoadCopyInto(args[i]->get().value, oaknut::XReg{ngrn}); + ngrn++; + } + } else { + // Gaps are assumed to be in general-purpose registers + // TODO: should there be a separate list passed for FPRs instead? + ngrn++; } } - - if (result) { - DefineAsRegister(result, X0); - } } void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) { diff --git a/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/backend/arm64/reg_alloc.h index c8560e25..a101ee6d 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/backend/arm64/reg_alloc.h @@ -271,11 +271,7 @@ public: } } - void PrepareForCall(IR::Inst* result = nullptr, - std::optional arg0 = {}, - std::optional arg1 = {}, - std::optional arg2 = {}, - std::optional arg3 = {}); + void PrepareForCall(std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); void DefineAsExisting(IR::Inst* inst, Argument& arg); void DefineAsRegister(IR::Inst* inst, oaknut::Reg reg); diff --git a/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/backend/x64/emit_x64.cpp index c3a7c80b..62af1ad4 100644 --- a/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/backend/x64/emit_x64.cpp @@ -154,7 +154,7 @@ void EmitX64::EmitGetNZFromOp(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(HostLoc::RAX); const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); - code.cmp(value, 0); + code.test(value, value); code.lahf(); code.movzx(eax, ah); ctx.reg_alloc.DefineValue(inst, nz); @@ -180,9 +180,9 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX); const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); - code.cmp(value, 0); + code.test(value, value); code.lahf(); - code.seto(code.al); + code.mov(al, 0); ctx.reg_alloc.DefineValue(inst, nzcv); } diff --git a/src/dynarmic/interface/A64/config.h b/src/dynarmic/interface/A64/config.h index 0b016eab..409e0048 100644 --- a/src/dynarmic/interface/A64/config.h +++ b/src/dynarmic/interface/A64/config.h @@ -198,7 +198,7 @@ struct UserConfig { /// Pointer to where TPIDR_EL0 is stored. This pointer will be inserted into /// emitted code. - const std::uint64_t* tpidr_el0 = nullptr; + std::uint64_t* tpidr_el0 = nullptr; /// Pointer to the page table which we can use for direct page table access. /// If an entry in page_table is null, the relevant memory callback will be called. diff --git a/tests/A64/a64.cpp b/tests/A64/a64.cpp index 067da6bb..5edc7730 100644 --- a/tests/A64/a64.cpp +++ b/tests/A64/a64.cpp @@ -1024,6 +1024,54 @@ TEST_CASE("A64: This is an infinite loop if fast dispatch is enabled", "[a64]") jit.Run(); } +TEST_CASE("A64: EXTR", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x93d8fef7); // EXTR X23, X23, X24, #63 + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + jit.SetRegister(23, 0); + jit.SetRegister(24, 1); + + env.ticks_left = 2; + jit.Run(); + + REQUIRE(jit.GetRegister(23) == 0); +} + +TEST_CASE("A64: Isolated GetNZCVFromOp", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0xaa1f03f5); // MOV X21, XZR + env.code_mem.emplace_back(0x912a02da); // ADD X26, X22, #0xa80 + env.code_mem.emplace_back(0x913662dc); // ADD X28, X22, #0xd98 + env.code_mem.emplace_back(0x320003e8); // MOV W8, #1 + env.code_mem.emplace_back(0xa9006bfc); // STP X28, X26, [SP] + env.code_mem.emplace_back(0x7200011f); // TST W8, #1 + env.code_mem.emplace_back(0xf94007e8); // LDR X8, [SP, #8] + env.code_mem.emplace_back(0x321e03e3); // MOV W3, #4 + env.code_mem.emplace_back(0xaa1303e2); // MOV X2, X19 + env.code_mem.emplace_back(0x9a881357); // CSEL X23, X26, X8, NE + env.code_mem.emplace_back(0xf94003e8); // LDR X8, [SP] + env.code_mem.emplace_back(0xaa1703e0); // MOV X0, X23 + env.code_mem.emplace_back(0x9a881396); // CSEL X22, X28, X8, NE + env.code_mem.emplace_back(0x92407ea8); // AND X8, X21, #0xffffffff + env.code_mem.emplace_back(0x1ac8269b); // LSR W27, W20, W8 + env.code_mem.emplace_back(0x0b1b0768); // ADD W8, W27, W27, LSL #1 + env.code_mem.emplace_back(0x937f7d01); // SBFIZ X1, X8, #1, #32 + env.code_mem.emplace_back(0x2a1f03e4); // MOV W4, WZR + env.code_mem.emplace_back(0x531e7779); // LSL W25, W27, #2 + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + + env.ticks_left = 20; + jit.Run(); +} + TEST_CASE("A64: Optimization failure when folding ADD", "[a64]") { A64TestEnv env; A64::Jit jit{A64::UserConfig{&env}}; diff --git a/tests/test_generator.cpp b/tests/test_generator.cpp index 2d0cd032..58d5c088 100644 --- a/tests/test_generator.cpp +++ b/tests/test_generator.cpp @@ -6,7 +6,10 @@ #include #include #include +#include #include +#include +#include #include #include @@ -14,6 +17,7 @@ #include #include "./A32/testenv.h" +#include "./A64/testenv.h" #include "./fuzz_util.h" #include "./rand_int.h" #include "dynarmic/common/fp/fpcr.h" @@ -22,7 +26,11 @@ #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/a64_types.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A32/a32.h" +#include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" #include "dynarmic/ir/opcodes.h" @@ -36,21 +44,14 @@ constexpr bool mask_fpsr_cum_bits = true; namespace { using namespace Dynarmic; -bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) { - const A32::LocationDescriptor location = A32::LocationDescriptor{pc, {}, {}}.SetTFlag(is_thumb).SetIT(it_state); - IR::Block block{location}; - const bool should_continue = A32::TranslateSingleInstruction(block, location, instruction); - - if (!should_continue && !is_last_inst) { - return false; - } - +bool ShouldTestInst(IR::Block& block) { if (auto terminal = block.GetTerminal(); boost::get(&terminal)) { return false; } for (const auto& ir_inst : block) { switch (ir_inst.GetOpcode()) { + // A32 case IR::Opcode::A32GetFpscr: case IR::Opcode::A32ExceptionRaised: case IR::Opcode::A32CallSupervisor: @@ -61,7 +62,53 @@ bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A case IR::Opcode::A32CoprocGetTwoWords: case IR::Opcode::A32CoprocLoadWords: case IR::Opcode::A32CoprocStoreWords: + // A64 + case IR::Opcode::A64ExceptionRaised: + case IR::Opcode::A64CallSupervisor: + case IR::Opcode::A64DataCacheOperationRaised: + case IR::Opcode::A64GetCNTPCT: + // Unimplemented + case IR::Opcode::SignedSaturatedAdd8: + case IR::Opcode::SignedSaturatedAdd16: + case IR::Opcode::SignedSaturatedAdd32: + case IR::Opcode::SignedSaturatedAdd64: + case IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh16: + case IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh32: + case IR::Opcode::SignedSaturatedSub8: + case IR::Opcode::SignedSaturatedSub16: + case IR::Opcode::SignedSaturatedSub32: + case IR::Opcode::SignedSaturatedSub64: + case IR::Opcode::UnsignedSaturatedAdd8: + case IR::Opcode::UnsignedSaturatedAdd16: + case IR::Opcode::UnsignedSaturatedAdd32: + case IR::Opcode::UnsignedSaturatedAdd64: + case IR::Opcode::UnsignedSaturatedSub8: + case IR::Opcode::UnsignedSaturatedSub16: + case IR::Opcode::UnsignedSaturatedSub32: + case IR::Opcode::UnsignedSaturatedSub64: + case IR::Opcode::VectorMaxS64: + case IR::Opcode::VectorMaxU64: + case IR::Opcode::VectorMinS64: + case IR::Opcode::VectorMinU64: + case IR::Opcode::VectorMultiply64: + case IR::Opcode::SM4AccessSubstitutionBox: + // Half-prec conversions + case IR::Opcode::FPHalfToFixedS16: + case IR::Opcode::FPHalfToFixedS32: + case IR::Opcode::FPHalfToFixedS64: + case IR::Opcode::FPHalfToFixedU16: + case IR::Opcode::FPHalfToFixedU32: + case IR::Opcode::FPHalfToFixedU64: // Half-precision + case IR::Opcode::FPAbs16: + case IR::Opcode::FPMulAdd16: + case IR::Opcode::FPNeg16: + case IR::Opcode::FPRecipEstimate16: + case IR::Opcode::FPRecipExponent16: + case IR::Opcode::FPRecipStepFused16: + case IR::Opcode::FPRoundInt16: + case IR::Opcode::FPRSqrtEstimate16: + case IR::Opcode::FPRSqrtStepFused16: case IR::Opcode::FPVectorAbs16: case IR::Opcode::FPVectorEqual16: case IR::Opcode::FPVectorMulAdd16: @@ -84,6 +131,30 @@ bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A return true; } +bool ShouldTestA32Inst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) { + const A32::LocationDescriptor location = A32::LocationDescriptor{pc, {}, {}}.SetTFlag(is_thumb).SetIT(it_state); + IR::Block block{location}; + const bool should_continue = A32::TranslateSingleInstruction(block, location, instruction); + + if (!should_continue && !is_last_inst) { + return false; + } + + return ShouldTestInst(block); +} + +bool ShouldTestA64Inst(u32 instruction, u64 pc, bool is_last_inst) { + const A64::LocationDescriptor location = A64::LocationDescriptor{pc, {}}; + IR::Block block{location}; + const bool should_continue = A64::TranslateSingleInstruction(block, location, instruction); + + if (!should_continue && !is_last_inst) { + return false; + } + + return ShouldTestInst(block); +} + u32 GenRandomArmInst(u32 pc, bool is_last_inst) { static const struct InstructionGeneratorInfo { std::vector generators; @@ -144,7 +215,7 @@ u32 GenRandomArmInst(u32 pc, bool is_last_inst) { continue; } - if (ShouldTestInst(inst, pc, false, is_last_inst)) { + if (ShouldTestA32Inst(inst, pc, false, is_last_inst)) { return inst; } } @@ -245,7 +316,7 @@ std::vector GenRandomThumbInst(u32 pc, bool is_last_inst, A32::ITState it_s const u32 inst = instructions.generators[index].Generate(); const bool is_four_bytes = (inst >> 16) != 0; - if (ShouldTestInst(is_four_bytes ? mcl::bit::swap_halves_32(inst) : inst, pc, true, is_last_inst, it_state)) { + if (ShouldTestA32Inst(is_four_bytes ? mcl::bit::swap_halves_32(inst) : inst, pc, true, is_last_inst, it_state)) { if (is_four_bytes) return {static_cast(inst >> 16), static_cast(inst)}; return {static_cast(inst)}; @@ -253,8 +324,65 @@ std::vector GenRandomThumbInst(u32 pc, bool is_last_inst, A32::ITState it_s } } +u32 GenRandomA64Inst(u64 pc, bool is_last_inst) { + static const struct InstructionGeneratorInfo { + std::vector generators; + std::vector invalid; + } instructions = [] { + const std::vector> list{ +#define INST(fn, name, bitstring) {#fn, bitstring}, +#include "dynarmic/frontend/A64/decoder/a64.inc" +#undef INST + }; + + std::vector generators; + std::vector invalid; + + // List of instructions not to test + const std::vector do_not_test{ + // Dynarmic and QEMU currently differ on how the exclusive monitor's address range works. + "STXR", + "STLXR", + "STXP", + "STLXP", + "LDXR", + "LDAXR", + "LDXP", + "LDAXP", + // Behaviour differs from QEMU + "MSR_reg", + "MSR_imm", + "MRS", + }; + + for (const auto& [fn, bitstring] : list) { + if (fn == "UnallocatedEncoding") { + continue; + } + if (std::find(do_not_test.begin(), do_not_test.end(), fn) != do_not_test.end()) { + invalid.emplace_back(InstructionGenerator{bitstring}); + continue; + } + generators.emplace_back(InstructionGenerator{bitstring}); + } + return InstructionGeneratorInfo{generators, invalid}; + }(); + + while (true) { + const size_t index = RandInt(0, instructions.generators.size() - 1); + const u32 inst = instructions.generators[index].Generate(); + + if (std::any_of(instructions.invalid.begin(), instructions.invalid.end(), [inst](const auto& invalid) { return invalid.Match(inst); })) { + continue; + } + if (ShouldTestA64Inst(inst, pc, is_last_inst)) { + return inst; + } + } +} + template -Dynarmic::A32::UserConfig GetUserConfig(TestEnv& testenv) { +Dynarmic::A32::UserConfig GetA32UserConfig(TestEnv& testenv) { Dynarmic::A32::UserConfig user_config; user_config.optimizations &= ~OptimizationFlag::FastDispatch; user_config.callbacks = &testenv; @@ -262,14 +390,14 @@ Dynarmic::A32::UserConfig GetUserConfig(TestEnv& testenv) { } template -static void RunTestInstance(Dynarmic::A32::Jit& jit, - TestEnv& jit_env, - const std::array& regs, - const std::array& vecs, - const std::vector& instructions, - const u32 cpsr, - const u32 fpscr, - const size_t ticks_left) { +void RunTestInstance(Dynarmic::A32::Jit& jit, + TestEnv& jit_env, + const std::array& regs, + const std::array& vecs, + const std::vector& instructions, + const u32 cpsr, + const u32 fpscr, + const size_t ticks_left) { const u32 initial_pc = regs[15]; const u32 num_words = initial_pc / sizeof(typename TestEnv::InstructionType); const u32 code_mem_size = num_words + static_cast(instructions.size()); @@ -294,37 +422,37 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit, jit.Run(); } - fmt::print("instructions: "); + fmt::print("instructions:"); for (auto instruction : instructions) { if constexpr (sizeof(decltype(instruction)) == 2) { - fmt::print("{:04x} ", instruction); + fmt::print(" {:04x}", instruction); } else { - fmt::print("{:08x} ", instruction); + fmt::print(" {:08x}", instruction); } } fmt::print("\n"); - fmt::print("initial_regs: "); + fmt::print("initial_regs:"); for (u32 i : regs) { - fmt::print("{:08x} ", i); + fmt::print(" {:08x}", i); } fmt::print("\n"); - fmt::print("initial_vecs: "); + fmt::print("initial_vecs:"); for (u32 i : vecs) { - fmt::print("{:08x} ", i); + fmt::print(" {:08x}", i); } fmt::print("\n"); fmt::print("initial_cpsr: {:08x}\n", cpsr); fmt::print("initial_fpcr: {:08x}\n", fpscr); - fmt::print("final_regs: "); + fmt::print("final_regs:"); for (u32 i : jit.Regs()) { - fmt::print("{:08x} ", i); + fmt::print(" {:08x}", i); } fmt::print("\n"); - fmt::print("final_vecs: "); + fmt::print("final_vecs:"); for (u32 i : jit.ExtRegs()) { - fmt::print("{:08x} ", i); + fmt::print(" {:08x}", i); } fmt::print("\n"); fmt::print("final_cpsr: {:08x}\n", jit.Cpsr()); @@ -343,11 +471,104 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit, fmt::print("===\n"); } + +Dynarmic::A64::UserConfig GetA64UserConfig(A64TestEnv& jit_env) { + Dynarmic::A64::UserConfig jit_user_config{&jit_env}; + jit_user_config.optimizations &= ~OptimizationFlag::FastDispatch; + // The below corresponds to the settings for qemu's aarch64_max_initfn + jit_user_config.dczid_el0 = 7; + jit_user_config.ctr_el0 = 0x80038003; + return jit_user_config; +} + +template +void RunTestInstance(Dynarmic::A64::Jit& jit, + A64TestEnv& jit_env, + const std::array& regs, + const std::array, 32>& vecs, + const std::vector& instructions, + const u32 pstate, + const u32 fpcr, + const u64 initial_sp, + const u64 start_address, + const size_t ticks_left) { + jit.ClearCache(); + + for (size_t jit_rerun_count = 0; jit_rerun_count < num_jit_reruns; ++jit_rerun_count) { + jit_env.code_mem = instructions; + jit_env.code_mem.emplace_back(0x14000000); // B . + jit_env.code_mem_start_address = start_address; + jit_env.modified_memory.clear(); + jit_env.interrupts.clear(); + + jit.SetRegisters(regs); + jit.SetVectors(vecs); + jit.SetPC(start_address); + jit.SetSP(initial_sp); + jit.SetFpcr(fpcr); + jit.SetFpsr(0); + jit.SetPstate(pstate); + jit.ClearCache(); + + jit_env.ticks_left = ticks_left; + jit.Run(); + } + + fmt::print("instructions:"); + for (u32 instruction : instructions) { + fmt::print(" {:08x}", instruction); + } + fmt::print("\n"); + + fmt::print("initial_regs:"); + for (u64 i : regs) { + fmt::print(" {:016x}", i); + } + fmt::print("\n"); + fmt::print("initial_vecs:"); + for (auto i : vecs) { + fmt::print(" {:016x}:{:016x}", i[0], i[1]); + } + fmt::print("\n"); + fmt::print("initial_sp: {:016x}\n", initial_sp); + fmt::print("initial_pstate: {:08x}\n", pstate); + fmt::print("initial_fpcr: {:08x}\n", fpcr); + + fmt::print("final_regs:"); + for (u64 i : jit.GetRegisters()) { + fmt::print(" {:016x}", i); + } + fmt::print("\n"); + fmt::print("final_vecs:"); + for (auto i : jit.GetVectors()) { + fmt::print(" {:016x}:{:016x}", i[0], i[1]); + } + fmt::print("\n"); + fmt::print("final_sp: {:016x}\n", jit.GetSP()); + fmt::print("final_pc: {:016x}\n", jit.GetPC()); + fmt::print("final_pstate: {:08x}\n", jit.GetPstate()); + fmt::print("final_fpcr: {:08x}\n", jit.GetFpcr()); + fmt::print("final_qc : {}\n", FP::FPSR{jit.GetFpsr()}.QC()); + + fmt::print("mod_mem:"); + for (auto [addr, value] : jit_env.modified_memory) { + fmt::print(" {:08x}:{:02x}", addr, value); + } + fmt::print("\n"); + + fmt::print("interrupts:\n"); + for (const auto& i : jit_env.interrupts) { + std::puts(i.c_str()); + } + + fmt::print("===\n"); +} + } // Anonymous namespace void TestThumb(size_t num_instructions, size_t num_iterations) { ThumbTestEnv jit_env{}; - Dynarmic::A32::Jit jit{GetUserConfig(jit_env)}; + Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env)}; std::array regs; std::array ext_reg; @@ -374,7 +595,7 @@ void TestThumb(size_t num_instructions, size_t num_iterations) { void TestArm(size_t num_instructions, size_t num_iterations) { ArmTestEnv jit_env{}; - Dynarmic::A32::Jit jit{GetUserConfig(jit_env)}; + Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env)}; std::array regs; std::array ext_reg; @@ -394,19 +615,76 @@ void TestArm(size_t num_instructions, size_t num_iterations) { } regs[15] = start_address; - RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, 1); + RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, num_instructions); } } -int main(int, char*[]) { - detail::g_rand_int_generator.seed(42069); +void TestA64(size_t num_instructions, size_t num_iterations) { + A64TestEnv jit_env{}; + Dynarmic::A64::Jit jit{GetA64UserConfig(jit_env)}; - TestThumb(1, 100000); - TestArm(1, 100000); - TestThumb(5, 100000); - TestArm(5, 100000); - TestThumb(1024, 10000); - TestArm(1024, 10000); + std::array regs; + std::array, 32> vecs; + std::vector instructions; + + for (size_t iteration = 0; iteration < num_iterations; ++iteration) { + std::generate(regs.begin(), regs.end(), [] { return RandInt(0, ~u64(0)); }); + std::generate(vecs.begin(), vecs.end(), RandomVector); + + const u32 start_address = 100; + const u32 pstate = (RandInt(0, 0xF) << 28); + const u32 fpcr = RandomFpcr(); + const u64 initial_sp = RandInt(0x30'0000'0000, 0x40'0000'0000) * 4; + + instructions.clear(); + for (size_t i = 0; i < num_instructions; ++i) { + instructions.emplace_back(GenRandomA64Inst(static_cast(start_address + 4 * instructions.size()), i == num_instructions - 1)); + } + + RunTestInstance(jit, jit_env, regs, vecs, instructions, pstate, fpcr, initial_sp, start_address, num_instructions); + } +} + +static std::optional str2sz(char const* s) { + char* end = nullptr; + errno = 0; + + const long l = std::strtol(s, &end, 10); + if (errno == ERANGE || l < 0) { + return std::nullopt; + } + if (*s == '\0' || *end != '\0') { + return std::nullopt; + } + return static_cast(l); +} + +int main(int argc, char* argv[]) { + if (argc != 5) { + fmt::print("Usage: {} \n", argv[0]); + } + + const auto seed = str2sz(argv[2]); + const auto instruction_count = str2sz(argv[3]); + const auto iterator_count = str2sz(argv[4]); + + if (!seed || !instruction_count || !iterator_count) { + fmt::print("invalid numeric arguments\n"); + return 1; + } + + detail::g_rand_int_generator.seed(static_cast(*seed)); + + if (strcmp(argv[1], "thumb") == 0) { + TestThumb(*instruction_count, *iterator_count); + } else if (strcmp(argv[1], "arm") == 0) { + TestArm(*instruction_count, *iterator_count); + } else if (strcmp(argv[1], "a64") == 0) { + TestA64(*instruction_count, *iterator_count); + } else { + fmt::print("unrecognized instruction class\n"); + return 1; + } return 0; }