Merge pull request #719 from liamwhite/a64

backend/arm64: A64 frontend
This commit is contained in:
merry 2022-12-03 17:09:03 +00:00 committed by GitHub
commit a76a2fff53
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 2322 additions and 711 deletions

View file

@ -48,7 +48,6 @@ jobs:
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
-DDYNARMIC_TESTS_USE_UNICORN=0
-DDYNARMIC_USE_LLVM=0
-DDYNARMIC_FRONTENDS=A32
-G Ninja
- name: Build AArch64
@ -66,7 +65,6 @@ jobs:
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
-DCMAKE_C_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DDYNARMIC_FRONTENDS=A32
-DDYNARMIC_TESTS_USE_UNICORN=0
-DDYNARMIC_USE_LLVM=0
-G Ninja
@ -79,6 +77,23 @@ jobs:
working-directory: ${{github.workspace}}
run: qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_tests -d yes
- name: Test against x86_64 implementation
- name: Test against x86_64 implementation (A32, thumb)
working-directory: ${{github.workspace}}
run: diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator) <(./build-x64/tests/dynarmic_test_generator)
run: |
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 1 100000) <(./build-x64/tests/dynarmic_test_generator thumb 42 1 100000)
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 10 10000) <(./build-x64/tests/dynarmic_test_generator thumb 42 10 10000)
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator thumb 42 100 1000) <(./build-x64/tests/dynarmic_test_generator thumb 42 100 1000)
- name: Test against x86_64 implementation (A32, arm)
working-directory: ${{github.workspace}}
run: |
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 1 100000) <(./build-x64/tests/dynarmic_test_generator arm 42 1 100000)
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 10 10000) <(./build-x64/tests/dynarmic_test_generator arm 42 10 10000)
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator arm 42 100 1000) <(./build-x64/tests/dynarmic_test_generator arm 42 100 1000)
- name: Test against x86_64 implementation (A64)
working-directory: ${{github.workspace}}
run: |
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 1 100000) <(./build-x64/tests/dynarmic_test_generator a64 42 1 100000)
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 10 10000) <(./build-x64/tests/dynarmic_test_generator a64 42 10 10000)
diff <(qemu-aarch64 -L /usr/aarch64-linux-gnu ./build-arm64/tests/dynarmic_test_generator a64 42 100 1000) <(./build-x64/tests/dynarmic_test_generator a64 42 100 1000)

View file

@ -373,6 +373,8 @@ elseif(ARCHITECTURE STREQUAL "arm64")
backend/arm64/a32_jitstate.h
backend/arm64/abi.cpp
backend/arm64/abi.h
backend/arm64/address_space.cpp
backend/arm64/address_space.h
backend/arm64/devirtualize.h
backend/arm64/emit_arm64.cpp
backend/arm64/emit_arm64.h
@ -406,14 +408,16 @@ elseif(ARCHITECTURE STREQUAL "arm64")
backend/arm64/a32_address_space.h
backend/arm64/a32_core.h
backend/arm64/a32_interface.cpp
# Move this to the list below when implemented
backend/arm64/a64_interface.cpp
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
target_sources(dynarmic PRIVATE
backend/arm64/a64_address_space.cpp
backend/arm64/a64_address_space.h
backend/arm64/a64_core.h
backend/arm64/a64_interface.cpp
)
endif()
else()
message(FATAL_ERROR "Unsupported architecture")

View file

@ -14,6 +14,7 @@
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h"
@ -97,9 +98,8 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const
}
A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
: conf(conf)
, mem(conf.code_cache_size)
, code(mem.ptr()) {
: AddressSpace(conf.code_cache_size)
, conf(conf) {
EmitPrelude();
}
@ -121,33 +121,6 @@ IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
return ir_block;
}
CodePtr A32AddressSpace::Get(IR::LocationDescriptor descriptor) {
if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) {
return iter->second;
}
return nullptr;
}
CodePtr A32AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
if (CodePtr block_entry = Get(descriptor)) {
return block_entry;
}
IR::Block ir_block = GenerateIR(descriptor);
const EmittedBlockInfo block_info = Emit(std::move(ir_block));
block_infos.insert_or_assign(descriptor.Value(), block_info);
block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point);
return block_info.entry_point;
}
void A32AddressSpace::ClearCache() {
block_entries.clear();
block_infos.clear();
block_references.clear();
code.set_ptr(prelude_info.end_of_prelude);
}
void A32AddressSpace::EmitPrelude() {
using namespace oaknut::util;
@ -291,153 +264,33 @@ void A32AddressSpace::EmitPrelude() {
mem.protect();
}
size_t A32AddressSpace::GetRemainingSize() {
return conf.code_cache_size - (code.ptr<CodePtr>() - reinterpret_cast<CodePtr>(mem.ptr()));
}
EmitConfig A32AddressSpace::GetEmitConfig() {
return EmitConfig{
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) {
if (GetRemainingSize() < 1024 * 1024) {
ClearCache();
}
mem.unprotect();
const EmitConfig emit_conf{
.hook_isb = conf.hook_isb,
.cntfreq_el0{},
.ctr_el0{},
.dczid_el0{},
.tpidrro_el0{},
.tpidr_el0{},
.wall_clock_cntpct = conf.wall_clock_cntpct,
.enable_cycle_counting = conf.enable_cycle_counting,
.always_little_endian = conf.always_little_endian,
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; },
.emit_cond = EmitA32Cond,
.emit_condition_failed_terminal = EmitA32ConditionFailedTerminal,
.emit_terminal = EmitA32Terminal,
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
.state_fpsr_offset = offsetof(A32JitState, fpsr),
.coprocessors = conf.coprocessors,
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
};
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
Link(block.Location(), block_info);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
RelinkForDescriptor(block.Location());
mem.protect();
return block_info;
}
static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list) {
using namespace oaknut;
using namespace oaknut::util;
for (auto [ptr_offset] : block_relocations_list) {
CodeGenerator c{reinterpret_cast<u32*>(entry_point + ptr_offset)};
if (target_ptr) {
c.B((void*)target_ptr);
} else {
c.NOP();
}
}
}
void A32AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) {
using namespace oaknut;
using namespace oaknut::util;
for (auto [ptr_offset, target] : block_info.relocations) {
CodeGenerator c{reinterpret_cast<u32*>(block_info.entry_point + ptr_offset)};
switch (target) {
case LinkTarget::ReturnToDispatcher:
c.B(prelude_info.return_to_dispatcher);
break;
case LinkTarget::ReturnFromRunCode:
c.B(prelude_info.return_from_run_code);
break;
case LinkTarget::ReadMemory8:
c.BL(prelude_info.read_memory_8);
break;
case LinkTarget::ReadMemory16:
c.BL(prelude_info.read_memory_16);
break;
case LinkTarget::ReadMemory32:
c.BL(prelude_info.read_memory_32);
break;
case LinkTarget::ReadMemory64:
c.BL(prelude_info.read_memory_64);
break;
case LinkTarget::ExclusiveReadMemory8:
c.BL(prelude_info.exclusive_read_memory_8);
break;
case LinkTarget::ExclusiveReadMemory16:
c.BL(prelude_info.exclusive_read_memory_16);
break;
case LinkTarget::ExclusiveReadMemory32:
c.BL(prelude_info.exclusive_read_memory_32);
break;
case LinkTarget::ExclusiveReadMemory64:
c.BL(prelude_info.exclusive_read_memory_64);
break;
case LinkTarget::WriteMemory8:
c.BL(prelude_info.write_memory_8);
break;
case LinkTarget::WriteMemory16:
c.BL(prelude_info.write_memory_16);
break;
case LinkTarget::WriteMemory32:
c.BL(prelude_info.write_memory_32);
break;
case LinkTarget::WriteMemory64:
c.BL(prelude_info.write_memory_64);
break;
case LinkTarget::ExclusiveWriteMemory8:
c.BL(prelude_info.exclusive_write_memory_8);
break;
case LinkTarget::ExclusiveWriteMemory16:
c.BL(prelude_info.exclusive_write_memory_16);
break;
case LinkTarget::ExclusiveWriteMemory32:
c.BL(prelude_info.exclusive_write_memory_32);
break;
case LinkTarget::ExclusiveWriteMemory64:
c.BL(prelude_info.exclusive_write_memory_64);
break;
case LinkTarget::CallSVC:
c.BL(prelude_info.call_svc);
break;
case LinkTarget::ExceptionRaised:
c.BL(prelude_info.exception_raised);
break;
case LinkTarget::InstructionSynchronizationBarrierRaised:
c.BL(prelude_info.isb_raised);
break;
case LinkTarget::AddTicks:
c.BL(prelude_info.add_ticks);
break;
case LinkTarget::GetTicksRemaining:
c.BL(prelude_info.get_ticks_remaining);
break;
default:
ASSERT_FALSE("Invalid relocation target");
}
}
for (auto [target_descriptor, list] : block_info.block_relocations) {
block_references[target_descriptor.Value()].emplace(block_descriptor.Value());
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list);
}
}
void A32AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) {
for (auto block_descriptor : block_references[target_descriptor.Value()]) {
if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) {
const EmittedBlockInfo& block_info = iter->second;
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
}
}
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -5,84 +5,24 @@
#pragma once
#include <mcl/stdint.hpp>
#include <oaknut/code_block.hpp>
#include <oaknut/oaknut.hpp>
#include <tsl/robin_map.h>
#include <tsl/robin_set.h>
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/address_space.h"
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/interface/halt_reason.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/location_descriptor.h"
namespace Dynarmic::Backend::Arm64 {
struct A32JitState;
class A32AddressSpace final {
class A32AddressSpace final : public AddressSpace {
public:
explicit A32AddressSpace(const A32::UserConfig& conf);
IR::Block GenerateIR(IR::LocationDescriptor) const;
IR::Block GenerateIR(IR::LocationDescriptor) const override;
CodePtr Get(IR::LocationDescriptor descriptor);
CodePtr GetOrEmit(IR::LocationDescriptor descriptor);
void ClearCache();
private:
protected:
friend class A32Core;
void EmitPrelude();
size_t GetRemainingSize();
EmittedBlockInfo Emit(IR::Block ir_block);
void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block);
void RelinkForDescriptor(IR::LocationDescriptor target_descriptor);
EmitConfig GetEmitConfig() override;
const A32::UserConfig conf;
oaknut::CodeBlock mem;
oaknut::CodeGenerator code;
tsl::robin_map<u64, CodePtr> block_entries;
tsl::robin_map<u64, EmittedBlockInfo> block_infos;
tsl::robin_map<u64, tsl::robin_set<u64>> block_references;
struct PreludeInfo {
u32* end_of_prelude;
using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, A32JitState* context, volatile u32* halt_reason);
RunCodeFuncType run_code;
RunCodeFuncType step_code;
void* return_to_dispatcher;
void* return_from_run_code;
void* read_memory_8;
void* read_memory_16;
void* read_memory_32;
void* read_memory_64;
void* exclusive_read_memory_8;
void* exclusive_read_memory_16;
void* exclusive_read_memory_32;
void* exclusive_read_memory_64;
void* write_memory_8;
void* write_memory_16;
void* write_memory_32;
void* write_memory_64;
void* exclusive_write_memory_8;
void* exclusive_write_memory_16;
void* exclusive_write_memory_32;
void* exclusive_write_memory_64;
void* call_svc;
void* exception_raised;
void* isb_raised;
void* add_ticks;
void* get_ticks_remaining;
} prelude_info;
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,416 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/a64_address_space.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/devirtualize.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/stack_layout.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
#include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/A64/config.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Backend::Arm64 {
template<auto mfp, typename T>
static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<mfp>(this_);
oaknut::Label l_addr, l_this;
void* target = code.ptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> T {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr);
});
};
void* target = code.ptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
})
? 0
: 1;
};
void* target = code.ptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_);
oaknut::Label l_addr, l_this;
void* target = code.ptr<void*>();
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.STP(X0, X1, SP);
code.LDR(Q0, SP);
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
code.RET();
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
static void* EmitExclusiveRead128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> Vector {
return conf.global_monitor->ReadAndMark<Vector>(conf.processor_id, vaddr, [&]() -> Vector {
return conf.callbacks->MemoryRead128(vaddr);
});
};
void* target = code.ptr<void*>();
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.STP(X0, X1, SP);
code.LDR(Q0, SP);
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), sizeof(Vector));
code.RET();
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_);
oaknut::Label l_addr, l_this;
void* target = code.ptr<void*>();
code.FMOV(X2, D0);
code.FMOV(X3, V0.D()[1]);
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
[&](Vector expected) -> bool {
return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected);
})
? 0
: 1;
};
void* target = code.ptr<void*>();
code.FMOV(X2, D0);
code.FMOV(X3, V0.D()[1]);
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf)
: AddressSpace(conf.code_cache_size)
, conf(conf) {
EmitPrelude();
}
IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::A64CallbackConfigPass(ir_block, conf);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A64GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
}
Optimization::VerificationPass(ir_block);
return ir_block;
}
void A64AddressSpace::EmitPrelude() {
using namespace oaknut::util;
mem.unprotect();
prelude_info.read_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks);
prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
prelude_info.read_memory_128 = EmitRead128CallTrampoline(code, conf.callbacks);
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf);
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf);
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf);
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf);
prelude_info.exclusive_read_memory_128 = EmitExclusiveRead128CallTrampoline(code, conf);
prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
prelude_info.write_memory_128 = EmitWrite128CallTrampoline(code, conf.callbacks);
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
prelude_info.exclusive_write_memory_128 = EmitExclusiveWrite128CallTrampoline(code, conf);
prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks);
prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
prelude_info.ic_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionCacheOperationRaised>(code, conf.callbacks);
prelude_info.dc_raised = EmitCallTrampoline<&A64::UserCallbacks::DataCacheOperationRaised>(code, conf.callbacks);
prelude_info.get_cntpct = EmitCallTrampoline<&A64::UserCallbacks::GetCNTPCT>(code, conf.callbacks);
prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks);
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
oaknut::Label return_from_run_code;
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.enable_cycle_counting) {
code.BL(prelude_info.get_ticks_remaining);
code.MOV(Xticks, X0);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.BR(X19);
}
prelude_info.step_code = code.ptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.enable_cycle_counting) {
code.MOV(Xticks, 1);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label step_hr_loop;
code.l(step_hr_loop);
code.LDAXR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
code.STLXR(Wscratch1, Wscratch0, Xhalt);
code.CBNZ(Wscratch1, step_hr_loop);
code.BR(X19);
}
prelude_info.return_to_dispatcher = code.ptr<void*>();
{
oaknut::Label l_this, l_addr;
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
if (conf.enable_cycle_counting) {
code.CMP(Xticks, 0);
code.B(LE, return_from_run_code);
}
code.LDR(X0, l_this);
code.MOV(X1, Xstate);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.BR(X0);
const auto fn = [](A64AddressSpace& self, A64JitState& context) -> CodePtr {
return self.GetOrEmit(context.GetLocationDescriptor());
};
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(this));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
}
prelude_info.return_from_run_code = code.ptr<void*>();
{
code.l(return_from_run_code);
if (conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
code.BL(prelude_info.add_ticks);
}
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label exit_hr_loop;
code.l(exit_hr_loop);
code.LDAXR(W0, Xhalt);
code.STLXR(Wscratch0, WZR, Xhalt);
code.CBNZ(Wscratch0, exit_hr_loop);
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.RET();
}
prelude_info.end_of_prelude = code.ptr<u32*>();
mem.invalidate_all();
mem.protect();
}
EmitConfig A64AddressSpace::GetEmitConfig() {
return EmitConfig{
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
.hook_isb = conf.hook_isb,
.cntfreq_el0 = conf.cntfrq_el0,
.ctr_el0 = conf.ctr_el0,
.dczid_el0 = conf.dczid_el0,
.tpidrro_el0 = conf.tpidrro_el0,
.tpidr_el0 = conf.tpidr_el0,
.wall_clock_cntpct = conf.wall_clock_cntpct,
.enable_cycle_counting = conf.enable_cycle_counting,
.always_little_endian = true,
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); },
.emit_cond = EmitA64Cond,
.emit_condition_failed_terminal = EmitA64ConditionFailedTerminal,
.emit_terminal = EmitA64Terminal,
.state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv),
.state_fpsr_offset = offsetof(A64JitState, fpsr),
.coprocessors{},
};
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,28 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/backend/arm64/address_space.h"
#include "dynarmic/interface/A64/config.h"
namespace Dynarmic::Backend::Arm64 {
class A64AddressSpace final : public AddressSpace {
public:
explicit A64AddressSpace(const A64::UserConfig& conf);
IR::Block GenerateIR(IR::LocationDescriptor) const override;
protected:
friend class A64Core;
void EmitPrelude();
EmitConfig GetEmitConfig() override;
const A64::UserConfig conf;
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,30 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/backend/arm64/a64_address_space.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
namespace Dynarmic::Backend::Arm64 {
class A64Core final {
public:
explicit A64Core(const A64::UserConfig&) {}
HaltReason Run(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) {
const auto location_descriptor = thread_ctx.GetLocationDescriptor();
const auto entry_point = process.GetOrEmit(location_descriptor);
return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason);
}
HaltReason Step(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) {
const auto location_descriptor = A64::LocationDescriptor{thread_ctx.GetLocationDescriptor()}.SetSingleStepping(true);
const auto entry_point = process.GetOrEmit(location_descriptor);
return process.prelude_info.step_code(entry_point, &thread_ctx, halt_reason);
}
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -1,5 +1,5 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2021 MerryMage
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
@ -11,136 +11,310 @@
#include <mcl/scope_exit.hpp>
#include <mcl/stdint.hpp>
#include "dynarmic/backend/arm64/a64_address_space.h"
#include "dynarmic/backend/arm64/a64_core.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/common/atomic.h"
#include "dynarmic/interface/A64/a64.h"
#include "dynarmic/interface/A64/config.h"
namespace Dynarmic::A64 {
struct Jit::Impl {};
using namespace Backend::Arm64;
Jit::Jit(UserConfig conf) {
(void)conf;
struct Jit::Impl final {
Impl(Jit*, A64::UserConfig conf)
: conf(conf)
, current_address_space(conf)
, core(conf) {}
HaltReason Run() {
ASSERT(!is_executing);
PerformRequestedCacheInvalidation();
is_executing = true;
SCOPE_EXIT {
is_executing = false;
};
HaltReason hr = core.Run(current_address_space, current_state, &halt_reason);
PerformRequestedCacheInvalidation();
return hr;
}
HaltReason Step() {
ASSERT(!is_executing);
PerformRequestedCacheInvalidation();
is_executing = true;
SCOPE_EXIT {
is_executing = false;
};
HaltReason hr = core.Step(current_address_space, current_state, &halt_reason);
PerformRequestedCacheInvalidation();
return hr;
}
void ClearCache() {
std::unique_lock lock{invalidation_mutex};
invalidate_entire_cache = true;
HaltExecution(HaltReason::CacheInvalidation);
}
void InvalidateCacheRange(std::uint64_t start_address, std::size_t length) {
std::unique_lock lock{invalidation_mutex};
invalid_cache_ranges.add(boost::icl::discrete_interval<u64>::closed(start_address, start_address + length - 1));
HaltExecution(HaltReason::CacheInvalidation);
}
void Reset() {
current_state = {};
}
void HaltExecution(HaltReason hr) {
Atomic::Or(&halt_reason, static_cast<u32>(hr));
}
void ClearHalt(HaltReason hr) {
Atomic::And(&halt_reason, ~static_cast<u32>(hr));
}
std::uint64_t PC() const {
return current_state.pc;
}
void SetPC(std::uint64_t value) {
current_state.pc = value;
}
std::uint64_t SP() const {
return current_state.sp;
}
void SetSP(std::uint64_t value) {
current_state.sp = value;
}
std::array<std::uint64_t, 31>& Regs() {
return current_state.reg;
}
const std::array<std::uint64_t, 31>& Regs() const {
return current_state.reg;
}
std::array<std::uint64_t, 64>& VecRegs() {
return current_state.vec;
}
const std::array<std::uint64_t, 64>& VecRegs() const {
return current_state.vec;
}
std::uint32_t Fpcr() const {
return current_state.fpcr;
}
void SetFpcr(std::uint32_t value) {
current_state.fpcr = value;
}
std::uint32_t Fpsr() const {
return current_state.fpsr;
}
void SetFpsr(std::uint32_t value) {
current_state.fpsr = value;
}
std::uint32_t Pstate() const {
return current_state.cpsr_nzcv;
}
void SetPstate(std::uint32_t value) {
current_state.cpsr_nzcv = value;
}
void ClearExclusiveState() {
current_state.exclusive_state = false;
}
bool IsExecuting() const {
return is_executing;
}
void DumpDisassembly() const {
ASSERT_FALSE("Unimplemented");
}
std::vector<std::string> Disassemble() const {
ASSERT_FALSE("Unimplemented");
}
private:
void PerformRequestedCacheInvalidation() {
ClearHalt(HaltReason::CacheInvalidation);
if (invalidate_entire_cache) {
current_address_space.ClearCache();
invalidate_entire_cache = false;
invalid_cache_ranges.clear();
return;
}
if (!invalid_cache_ranges.empty()) {
// TODO: Optimize
current_address_space.ClearCache();
invalid_cache_ranges.clear();
return;
}
}
A64::UserConfig conf;
A64JitState current_state{};
A64AddressSpace current_address_space;
A64Core core;
volatile u32 halt_reason = 0;
std::mutex invalidation_mutex;
boost::icl::interval_set<u64> invalid_cache_ranges;
bool invalidate_entire_cache = false;
bool is_executing = false;
};
Jit::Jit(UserConfig conf)
: impl{std::make_unique<Jit::Impl>(this, conf)} {
}
Jit::~Jit() = default;
HaltReason Jit::Run() {
ASSERT_FALSE("not implemented");
return impl->Run();
}
HaltReason Jit::Step() {
ASSERT_FALSE("not implemented");
return impl->Step();
}
void Jit::ClearCache() {
impl->ClearCache();
}
void Jit::InvalidateCacheRange(std::uint64_t start_address, std::size_t length) {
(void)start_address;
(void)length;
impl->InvalidateCacheRange(start_address, length);
}
void Jit::Reset() {
impl->Reset();
}
void Jit::HaltExecution(HaltReason hr) {
(void)hr;
impl->HaltExecution(hr);
}
void Jit::ClearHalt(HaltReason hr) {
(void)hr;
impl->ClearHalt(hr);
}
std::uint64_t Jit::GetSP() const {
return 0;
return impl->SP();
}
void Jit::SetSP(std::uint64_t value) {
(void)value;
impl->SetSP(value);
}
std::uint64_t Jit::GetPC() const {
return 0;
return impl->PC();
}
void Jit::SetPC(std::uint64_t value) {
(void)value;
impl->SetPC(value);
}
std::uint64_t Jit::GetRegister(std::size_t index) const {
(void)index;
return 0;
return impl->Regs()[index];
}
void Jit::SetRegister(size_t index, std::uint64_t value) {
(void)index;
(void)value;
impl->Regs()[index] = value;
}
std::array<std::uint64_t, 31> Jit::GetRegisters() const {
return {};
return impl->Regs();
}
void Jit::SetRegisters(const std::array<std::uint64_t, 31>& value) {
(void)value;
impl->Regs() = value;
}
Vector Jit::GetVector(std::size_t index) const {
(void)index;
return {};
auto& vec = impl->VecRegs();
return {vec[index * 2], vec[index * 2 + 1]};
}
void Jit::SetVector(std::size_t index, Vector value) {
(void)index;
(void)value;
auto& vec = impl->VecRegs();
vec[index * 2] = value[0];
vec[index * 2 + 1] = value[1];
}
std::array<Vector, 32> Jit::GetVectors() const {
return {};
std::array<Vector, 32> ret;
std::memcpy(ret.data(), impl->VecRegs().data(), sizeof(ret));
return ret;
}
void Jit::SetVectors(const std::array<Vector, 32>& value) {
(void)value;
std::memcpy(impl->VecRegs().data(), value.data(), sizeof(value));
}
std::uint32_t Jit::GetFpcr() const {
return 0;
return impl->Fpcr();
}
void Jit::SetFpcr(std::uint32_t value) {
(void)value;
impl->SetFpcr(value);
}
std::uint32_t Jit::GetFpsr() const {
return 0;
return impl->Fpsr();
}
void Jit::SetFpsr(std::uint32_t value) {
(void)value;
impl->SetFpsr(value);
}
std::uint32_t Jit::GetPstate() const {
return 0;
return impl->Pstate();
}
void Jit::SetPstate(std::uint32_t value) {
(void)value;
impl->SetPstate(value);
}
void Jit::ClearExclusiveState() {
impl->ClearExclusiveState();
}
bool Jit::IsExecuting() const {
return false;
return impl->IsExecuting();
}
void Jit::DumpDisassembly() const {
ASSERT_FALSE("not implemented");
impl->DumpDisassembly();
}
std::vector<std::string> Jit::Disassemble() const {
ASSERT_FALSE("not implemented");
return impl->Disassemble();
}
} // namespace Dynarmic::A64

View file

@ -0,0 +1,37 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include <mcl/stdint.hpp>
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
namespace Dynarmic::Backend::Arm64 {
struct A64JitState {
std::array<u64, 31> reg{};
u64 sp = 0;
u64 pc = 0;
u32 cpsr_nzcv = 0;
alignas(16) std::array<u64, 64> vec{};
u32 exclusive_state = 0;
u32 fpsr = 0;
u32 fpcr = 0;
IR::LocationDescriptor GetLocationDescriptor() const {
const u64 fpcr_u64 = static_cast<u64>(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift;
const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask;
return IR::LocationDescriptor{pc_u64 | fpcr_u64};
}
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -55,13 +55,15 @@ static FrameInfo CalculateFrameInfo(RegisterList rl, size_t frame_size) {
};
}
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
} \
if (frame_info.TYPE##s.size() % 2 == 1) { \
const size_t i = frame_info.TYPE##s.size() - 1; \
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
if (frame_info.TYPE##s.size() > 0) { \
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
} \
if (frame_info.TYPE##s.size() % 2 == 1) { \
const size_t i = frame_info.TYPE##s.size() - 1; \
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
} \
}
void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) {

View file

@ -0,0 +1,213 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/a64_address_space.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/devirtualize.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/stack_layout.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Backend::Arm64 {
AddressSpace::AddressSpace(size_t code_cache_size)
: code_cache_size(code_cache_size)
, mem(code_cache_size)
, code(mem.ptr()) {}
AddressSpace::~AddressSpace() = default;
CodePtr AddressSpace::Get(IR::LocationDescriptor descriptor) {
if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) {
return iter->second;
}
return nullptr;
}
CodePtr AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
if (CodePtr block_entry = Get(descriptor)) {
return block_entry;
}
IR::Block ir_block = GenerateIR(descriptor);
const EmittedBlockInfo block_info = Emit(std::move(ir_block));
block_infos.insert_or_assign(descriptor.Value(), block_info);
block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point);
return block_info.entry_point;
}
void AddressSpace::ClearCache() {
block_entries.clear();
block_infos.clear();
block_references.clear();
code.set_ptr(prelude_info.end_of_prelude);
}
size_t AddressSpace::GetRemainingSize() {
return code_cache_size - (code.ptr<CodePtr>() - reinterpret_cast<CodePtr>(mem.ptr()));
}
EmittedBlockInfo AddressSpace::Emit(IR::Block block) {
if (GetRemainingSize() < 1024 * 1024) {
ClearCache();
}
mem.unprotect();
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), GetEmitConfig());
Link(block.Location(), block_info);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
RelinkForDescriptor(block.Location());
mem.protect();
return block_info;
}
static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list) {
using namespace oaknut;
using namespace oaknut::util;
for (auto [ptr_offset] : block_relocations_list) {
CodeGenerator c{reinterpret_cast<u32*>(entry_point + ptr_offset)};
if (target_ptr) {
c.B((void*)target_ptr);
} else {
c.NOP();
}
}
}
void AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) {
using namespace oaknut;
using namespace oaknut::util;
for (auto [ptr_offset, target] : block_info.relocations) {
CodeGenerator c{reinterpret_cast<u32*>(block_info.entry_point + ptr_offset)};
switch (target) {
case LinkTarget::ReturnToDispatcher:
c.B(prelude_info.return_to_dispatcher);
break;
case LinkTarget::ReturnFromRunCode:
c.B(prelude_info.return_from_run_code);
break;
case LinkTarget::ReadMemory8:
c.BL(prelude_info.read_memory_8);
break;
case LinkTarget::ReadMemory16:
c.BL(prelude_info.read_memory_16);
break;
case LinkTarget::ReadMemory32:
c.BL(prelude_info.read_memory_32);
break;
case LinkTarget::ReadMemory64:
c.BL(prelude_info.read_memory_64);
break;
case LinkTarget::ReadMemory128:
c.BL(prelude_info.read_memory_128);
break;
case LinkTarget::ExclusiveReadMemory8:
c.BL(prelude_info.exclusive_read_memory_8);
break;
case LinkTarget::ExclusiveReadMemory16:
c.BL(prelude_info.exclusive_read_memory_16);
break;
case LinkTarget::ExclusiveReadMemory32:
c.BL(prelude_info.exclusive_read_memory_32);
break;
case LinkTarget::ExclusiveReadMemory64:
c.BL(prelude_info.exclusive_read_memory_64);
break;
case LinkTarget::ExclusiveReadMemory128:
c.BL(prelude_info.exclusive_read_memory_128);
break;
case LinkTarget::WriteMemory8:
c.BL(prelude_info.write_memory_8);
break;
case LinkTarget::WriteMemory16:
c.BL(prelude_info.write_memory_16);
break;
case LinkTarget::WriteMemory32:
c.BL(prelude_info.write_memory_32);
break;
case LinkTarget::WriteMemory64:
c.BL(prelude_info.write_memory_64);
break;
case LinkTarget::WriteMemory128:
c.BL(prelude_info.write_memory_128);
break;
case LinkTarget::ExclusiveWriteMemory8:
c.BL(prelude_info.exclusive_write_memory_8);
break;
case LinkTarget::ExclusiveWriteMemory16:
c.BL(prelude_info.exclusive_write_memory_16);
break;
case LinkTarget::ExclusiveWriteMemory32:
c.BL(prelude_info.exclusive_write_memory_32);
break;
case LinkTarget::ExclusiveWriteMemory64:
c.BL(prelude_info.exclusive_write_memory_64);
break;
case LinkTarget::ExclusiveWriteMemory128:
c.BL(prelude_info.exclusive_write_memory_128);
break;
case LinkTarget::CallSVC:
c.BL(prelude_info.call_svc);
break;
case LinkTarget::ExceptionRaised:
c.BL(prelude_info.exception_raised);
break;
case LinkTarget::InstructionSynchronizationBarrierRaised:
c.BL(prelude_info.isb_raised);
break;
case LinkTarget::InstructionCacheOperationRaised:
c.BL(prelude_info.ic_raised);
break;
case LinkTarget::DataCacheOperationRaised:
c.BL(prelude_info.dc_raised);
break;
case LinkTarget::GetCNTPCT:
c.BL(prelude_info.get_cntpct);
break;
case LinkTarget::AddTicks:
c.BL(prelude_info.add_ticks);
break;
case LinkTarget::GetTicksRemaining:
c.BL(prelude_info.get_ticks_remaining);
break;
default:
ASSERT_FALSE("Invalid relocation target");
}
}
for (auto [target_descriptor, list] : block_info.block_relocations) {
block_references[target_descriptor.Value()].emplace(block_descriptor.Value());
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list);
}
}
void AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) {
for (auto block_descriptor : block_references[target_descriptor.Value()]) {
if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) {
const EmittedBlockInfo& block_info = iter->second;
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
}
}
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,92 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <mcl/stdint.hpp>
#include <oaknut/code_block.hpp>
#include <oaknut/oaknut.hpp>
#include <tsl/robin_map.h>
#include <tsl/robin_set.h>
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/interface/halt_reason.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/location_descriptor.h"
namespace Dynarmic::Backend::Arm64 {
class AddressSpace {
public:
explicit AddressSpace(size_t code_cache_size);
virtual ~AddressSpace();
virtual IR::Block GenerateIR(IR::LocationDescriptor) const = 0;
CodePtr Get(IR::LocationDescriptor descriptor);
CodePtr GetOrEmit(IR::LocationDescriptor descriptor);
void ClearCache();
protected:
virtual EmitConfig GetEmitConfig() = 0;
size_t GetRemainingSize();
EmittedBlockInfo Emit(IR::Block ir_block);
void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block);
void RelinkForDescriptor(IR::LocationDescriptor target_descriptor);
const size_t code_cache_size;
oaknut::CodeBlock mem;
oaknut::CodeGenerator code;
tsl::robin_map<u64, CodePtr> block_entries;
tsl::robin_map<u64, EmittedBlockInfo> block_infos;
tsl::robin_map<u64, tsl::robin_set<u64>> block_references;
struct PreludeInfo {
u32* end_of_prelude;
using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, void* jit_state, volatile u32* halt_reason);
RunCodeFuncType run_code;
RunCodeFuncType step_code;
void* return_to_dispatcher;
void* return_from_run_code;
void* read_memory_8;
void* read_memory_16;
void* read_memory_32;
void* read_memory_64;
void* read_memory_128;
void* exclusive_read_memory_8;
void* exclusive_read_memory_16;
void* exclusive_read_memory_32;
void* exclusive_read_memory_64;
void* exclusive_read_memory_128;
void* write_memory_8;
void* write_memory_16;
void* write_memory_32;
void* write_memory_64;
void* write_memory_128;
void* exclusive_write_memory_8;
void* exclusive_write_memory_16;
void* exclusive_write_memory_32;
void* exclusive_write_memory_64;
void* exclusive_write_memory_128;
void* call_svc;
void* exception_raised;
void* dc_raised;
void* ic_raised;
void* isb_raised;
void* get_cntpct;
void* add_ticks;
void* get_ticks_remaining;
} prelude_info;
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -8,7 +8,6 @@
#include <fmt/ostream.h>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
@ -40,7 +39,7 @@ template<>
void EmitIR<IR::Opcode::CallHostFunction>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall(nullptr, args[1], args[2], args[3]);
ctx.reg_alloc.PrepareForCall(args[1], args[2], args[3]);
code.MOV(Xscratch0, args[0].GetImmediateU64());
code.BLR(Xscratch0);
}
@ -69,9 +68,34 @@ void EmitIR<IR::Opcode::GetGEFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, I
}
template<>
void EmitIR<IR::Opcode::GetNZCVFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(ctx.reg_alloc.IsValueLive(inst));
void EmitIR<IR::Opcode::GetNZCVFromOp>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (ctx.reg_alloc.IsValueLive(inst)) {
return;
}
switch (args[0].GetType()) {
case IR::Type::U32: {
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
auto flags = ctx.reg_alloc.WriteFlags(inst);
RegAlloc::Realize(Wvalue, flags);
code.TST(*Wvalue, Wvalue);
break;
}
case IR::Type::U64: {
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
auto flags = ctx.reg_alloc.WriteFlags(inst);
RegAlloc::Realize(Xvalue, flags);
code.TST(*Xvalue, Xvalue);
break;
}
default:
ASSERT_FALSE("Invalid type for GetNZCVFromOp");
break;
}
}
template<>
@ -164,10 +188,12 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
ASSERT(!ctx.block.HasConditionFailedLocation());
} else {
ASSERT(ctx.block.HasConditionFailedLocation());
oaknut::Label pass;
oaknut::Label pass = EmitA32Cond(code, ctx, ctx.block.GetCondition());
pass = conf.emit_cond(code, ctx, ctx.block.GetCondition());
EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount());
EmitA32ConditionFailedTerminal(code, ctx);
conf.emit_condition_failed_terminal(code, ctx);
code.l(pass);
}
@ -205,7 +231,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
reg_alloc.AssertNoMoreUses();
EmitAddCycles(code, ctx, block.CycleCount());
EmitA32Terminal(code, ctx);
conf.emit_terminal(code, ctx);
ebi.size = code.ptr<CodePtr>() - ebi.entry_point;
return ebi;

View file

@ -38,6 +38,8 @@ enum class Opcode;
namespace Dynarmic::Backend::Arm64 {
struct EmitContext;
using CodePtr = std::byte*;
enum class LinkTarget {
@ -47,21 +49,28 @@ enum class LinkTarget {
ReadMemory16,
ReadMemory32,
ReadMemory64,
ReadMemory128,
ExclusiveReadMemory8,
ExclusiveReadMemory16,
ExclusiveReadMemory32,
ExclusiveReadMemory64,
ExclusiveReadMemory128,
WriteMemory8,
WriteMemory16,
WriteMemory32,
WriteMemory64,
WriteMemory128,
ExclusiveWriteMemory8,
ExclusiveWriteMemory16,
ExclusiveWriteMemory32,
ExclusiveWriteMemory64,
ExclusiveWriteMemory128,
CallSVC,
ExceptionRaised,
InstructionSynchronizationBarrierRaised,
InstructionCacheOperationRaised,
DataCacheOperationRaised,
GetCNTPCT,
AddTicks,
GetTicksRemaining,
};
@ -83,24 +92,39 @@ struct EmittedBlockInfo {
};
struct EmitConfig {
OptimizationFlag optimizations;
bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; }
bool hook_isb;
// System registers
u64 cntfreq_el0;
u32 ctr_el0;
u32 dczid_el0;
const u64* tpidrro_el0;
u64* tpidr_el0;
// Timing
bool wall_clock_cntpct;
bool enable_cycle_counting;
// Endianness
bool always_little_endian;
// Frontend specific callbacks
FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor);
oaknut::Label (*emit_cond)(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
void (*emit_condition_failed_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
void (*emit_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
// State offsets
size_t state_nzcv_offset;
size_t state_fpsr_offset;
// A32 specific
std::array<std::shared_ptr<A32::Coprocessor>, 16> coprocessors{};
OptimizationFlag optimizations;
bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; }
};
struct EmitContext;
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf);
template<IR::Opcode op>
@ -108,7 +132,10 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target);
void EmitBlockLinkRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, const IR::LocationDescriptor& descriptor);
oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx);
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx);
void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx);
void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx);
} // namespace Dynarmic::Backend::Arm64

View file

@ -41,7 +41,7 @@ void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Re
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
static void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 {
return static_cast<u32>(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32);
};
@ -555,7 +555,7 @@ void EmitIR<IR::Opcode::A32UpdateUpperLocationDescriptor>(oaknut::CodeGenerator&
template<>
void EmitIR<IR::Opcode::A32CallSupervisor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall(nullptr);
ctx.reg_alloc.PrepareForCall();
if (ctx.conf.enable_cycle_counting) {
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
@ -576,7 +576,7 @@ void EmitIR<IR::Opcode::A32CallSupervisor>(oaknut::CodeGenerator& code, EmitCont
template<>
void EmitIR<IR::Opcode::A32ExceptionRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall(nullptr);
ctx.reg_alloc.PrepareForCall();
if (ctx.conf.enable_cycle_counting) {
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
@ -611,7 +611,7 @@ void EmitIR<IR::Opcode::A32InstructionSynchronizationBarrier>(oaknut::CodeGenera
return;
}
ctx.reg_alloc.PrepareForCall(nullptr);
ctx.reg_alloc.PrepareForCall();
EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised);
}

View file

@ -24,7 +24,7 @@ static void EmitCoprocessorException() {
}
static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
ctx.reg_alloc.PrepareForCall(inst, {}, arg0, arg1);
ctx.reg_alloc.PrepareForCall({}, arg0, arg1);
if (callback.user_arg) {
code.MOV(X0, reinterpret_cast<u64>(*callback.user_arg));
@ -32,6 +32,10 @@ static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A3
code.MOV(Xscratch0, reinterpret_cast<u64>(callback.function));
code.BLR(Xscratch0);
if (inst) {
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
}
template<>

View file

@ -25,18 +25,19 @@ static bool IsOrdered(IR::AccType acctype) {
static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall(inst, {}, args[1]);
ctx.reg_alloc.PrepareForCall({}, args[1]);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
EmitRelocation(code, ctx, fn);
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall(inst, {}, args[1]);
ctx.reg_alloc.PrepareForCall({}, args[1]);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
code.MOV(Wscratch0, 1);
@ -45,11 +46,12 @@ static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ct
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]);
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
if (ordered) {
@ -63,7 +65,7 @@ static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::I
static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]);
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
oaknut::Label end;
@ -79,6 +81,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c
code.DMB(oaknut::BarrierOp::ISH);
}
code.l(end);
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
template<>

View file

@ -3,9 +3,10 @@
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/bit_cast.hpp>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
@ -18,292 +19,460 @@ namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) {
oaknut::Label pass;
// TODO: Flags in host flags
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, cpsr_nzcv));
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
code.B(static_cast<oaknut::Cond>(cond), pass);
return pass;
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
void EmitA64Terminal(oaknut::CodeGenerator&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) {
ASSERT_FALSE("Interpret should never be emitted.");
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
oaknut::Label fail;
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
if (ctx.conf.enable_cycle_counting) {
code.CMP(Xticks, 0);
code.B(LE, fail);
EmitBlockLinkRelocation(code, ctx, terminal.next);
} else {
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, fail);
EmitBlockLinkRelocation(code, ctx, terminal.next);
}
}
code.l(fail);
code.MOV(Xscratch0, A64::LocationDescriptor{terminal.next}.PC());
code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc));
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
EmitBlockLinkRelocation(code, ctx, terminal.next);
}
code.MOV(Wscratch0, A64::LocationDescriptor{terminal.next}.PC());
code.STR(Wscratch0, Xstate, offsetof(A64JitState, pc));
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) {
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
// TODO: Implement PopRSBHint optimization
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
// TODO: Implement FastDispatchHint optimization
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
oaknut::Label pass = EmitA64Cond(code, ctx, terminal.if_);
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
code.l(pass);
EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
oaknut::Label fail;
code.LDRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
code.CBZ(Wscratch0, fail);
EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
code.l(fail);
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
oaknut::Label fail;
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, fail);
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
code.l(fail);
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
boost::apply_visitor([&](const auto& t) { EmitA64Terminal(code, ctx, t, initial_location, is_single_step); }, terminal);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
const A64::LocationDescriptor location{ctx.block.Location()};
EmitA64Terminal(code, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping());
}
void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
const A64::LocationDescriptor location{ctx.block.Location()};
EmitA64Terminal(code, ctx, IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, location.SetSingleStepping(false), location.SingleStepping());
}
template<>
void EmitIR<IR::Opcode::A64SetCheckBit>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code.MOV(Wscratch0, 1);
code.STRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
} else {
code.STRB(WZR, SP, offsetof(StackLayout, check_bit));
}
} else {
auto Wbit = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wbit);
code.STRB(Wbit, SP, offsetof(StackLayout, check_bit));
}
}
template<>
void EmitIR<IR::Opcode::A64GetCFlag>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Wflag = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wflag);
code.LDR(Wflag, Xstate, offsetof(A64JitState, cpsr_nzcv));
code.AND(Wflag, Wflag, 1 << 29);
}
template<>
void EmitIR<IR::Opcode::A64GetNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Wnzcv = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wnzcv);
code.LDR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A64SetNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnzcv);
code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A64SetNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnzcv);
code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A64GetW>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wresult);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.LDR(Wresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A64GetX>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto Xresult = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xresult);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.LDR(Xresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A64GetS>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Sresult = ctx.reg_alloc.WriteS(inst);
RegAlloc::Realize(Sresult);
code.LDR(Sresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64GetD>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Dresult = ctx.reg_alloc.WriteD(inst);
RegAlloc::Realize(Dresult);
code.LDR(Dresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64GetQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Qresult = ctx.reg_alloc.WriteQ(inst);
RegAlloc::Realize(Qresult);
code.LDR(Qresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64GetSP>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Xresult = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xresult);
code.LDR(Xresult, Xstate, offsetof(A64JitState, sp));
}
template<>
void EmitIR<IR::Opcode::A64GetFPCR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wresult);
code.LDR(Wresult, Xstate, offsetof(A64JitState, fpcr));
}
template<>
void EmitIR<IR::Opcode::A64GetFPSR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wresult);
code.LDR(Wresult, Xstate, offsetof(A64JitState, fpsr));
}
template<>
void EmitIR<IR::Opcode::A64SetW>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wvalue = ctx.reg_alloc.ReadW(args[1]);
RegAlloc::Realize(Wvalue);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.MOV(*Wvalue, Wvalue);
code.STR(Wvalue->toX(), Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A64SetX>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[1]);
RegAlloc::Realize(Xvalue);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.STR(Xvalue, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A64SetS>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Svalue = ctx.reg_alloc.ReadS(args[1]);
RegAlloc::Realize(Svalue);
code.FMOV(Svalue, Svalue);
code.STR(Svalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64SetD>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Dvalue = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Dvalue);
code.FMOV(Dvalue, Dvalue);
code.STR(Dvalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64SetQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Qvalue = ctx.reg_alloc.ReadQ(args[1]);
RegAlloc::Realize(Qvalue);
code.STR(Qvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64SetSP>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xvalue);
code.STR(Xvalue, Xstate, offsetof(A64JitState, sp));
}
template<>
void EmitIR<IR::Opcode::A64SetFPCR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wvalue);
code.STR(Wvalue, Xstate, offsetof(A64JitState, fpcr));
code.MSR(oaknut::SystemReg::FPCR, Wvalue->toX());
}
template<>
void EmitIR<IR::Opcode::A64SetFPSR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wvalue);
code.STR(Wvalue, Xstate, offsetof(A64JitState, fpsr));
code.MSR(oaknut::SystemReg::FPSR, Wvalue->toX());
}
template<>
void EmitIR<IR::Opcode::A64SetPC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xvalue);
code.STR(Xvalue, Xstate, offsetof(A64JitState, pc));
}
template<>
void EmitIR<IR::Opcode::A64CallSupervisor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall();
if (ctx.conf.enable_cycle_counting) {
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(Xscratch0, Xscratch0, Xticks);
EmitRelocation(code, ctx, LinkTarget::AddTicks);
}
code.MOV(W1, args[0].GetImmediateU32());
EmitRelocation(code, ctx, LinkTarget::CallSVC);
if (ctx.conf.enable_cycle_counting) {
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
code.MOV(Xticks, X0);
}
}
template<>
void EmitIR<IR::Opcode::A64ExceptionRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall();
if (ctx.conf.enable_cycle_counting) {
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(Xscratch0, Xscratch0, Xticks);
EmitRelocation(code, ctx, LinkTarget::AddTicks);
}
code.MOV(X1, args[0].GetImmediateU64());
code.MOV(X2, args[1].GetImmediateU64());
EmitRelocation(code, ctx, LinkTarget::ExceptionRaised);
if (ctx.conf.enable_cycle_counting) {
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
code.MOV(Xticks, X0);
}
}
template<>
void EmitIR<IR::Opcode::A64DataCacheOperationRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
EmitRelocation(code, ctx, LinkTarget::DataCacheOperationRaised);
}
template<>
void EmitIR<IR::Opcode::A64InstructionCacheOperationRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[0], args[1]);
EmitRelocation(code, ctx, LinkTarget::InstructionCacheOperationRaised);
}
template<>
void EmitIR<IR::Opcode::A64DataSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
void EmitIR<IR::Opcode::A64DataSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.DSB(oaknut::BarrierOp::SY);
}
template<>
void EmitIR<IR::Opcode::A64DataMemoryBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
void EmitIR<IR::Opcode::A64DataMemoryBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.DMB(oaknut::BarrierOp::SY);
}
template<>
void EmitIR<IR::Opcode::A64InstructionSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
void EmitIR<IR::Opcode::A64InstructionSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) {
if (!ctx.conf.hook_isb) {
return;
}
ctx.reg_alloc.PrepareForCall();
EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised);
}
template<>
void EmitIR<IR::Opcode::A64GetCNTFRQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Xvalue = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xvalue);
code.MOV(Xvalue, ctx.conf.cntfreq_el0);
}
template<>
void EmitIR<IR::Opcode::A64GetCNTPCT>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
ctx.reg_alloc.PrepareForCall();
if (!ctx.conf.wall_clock_cntpct && ctx.conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
EmitRelocation(code, ctx, LinkTarget::AddTicks);
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
code.MOV(Xticks, X0);
}
EmitRelocation(code, ctx, LinkTarget::GetCNTPCT);
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
template<>
void EmitIR<IR::Opcode::A64GetCTR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Wvalue = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wvalue);
code.MOV(Wvalue, ctx.conf.ctr_el0);
}
template<>
void EmitIR<IR::Opcode::A64GetDCZID>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Wvalue = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wvalue);
code.MOV(Wvalue, ctx.conf.dczid_el0);
}
template<>
void EmitIR<IR::Opcode::A64GetTPIDR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Xvalue = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xvalue);
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
code.LDR(Xvalue, Xscratch0);
}
template<>
void EmitIR<IR::Opcode::A64GetTPIDRRO>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto Xvalue = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xvalue);
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidrro_el0));
code.LDR(Xvalue, Xscratch0);
}
template<>
void EmitIR<IR::Opcode::A64SetTPIDR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xvalue);
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
code.STR(Xvalue, Xscratch0);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -5,11 +5,12 @@
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/ir/acc_type.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
@ -18,172 +19,202 @@ namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
static bool IsOrdered(IR::AccType acctype) {
return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED;
}
static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1]);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
EmitRelocation(code, ctx, fn);
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
static void EmitReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1]);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
EmitRelocation(code, ctx, fn);
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
code.MOV(Q8.B16(), Q0.B16());
ctx.reg_alloc.DefineAsRegister(inst, Q8);
}
static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1]);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
code.MOV(Wscratch0, 1);
code.STRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state));
EmitRelocation(code, ctx, fn);
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
static void EmitExclusiveReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1]);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
code.MOV(Wscratch0, 1);
code.STRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state));
EmitRelocation(code, ctx, fn);
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
code.MOV(Q8.B16(), Q0.B16());
ctx.reg_alloc.DefineAsRegister(inst, Q8);
}
static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
EmitRelocation(code, ctx, fn);
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
}
static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
oaknut::Label end;
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
code.LDRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state));
code.CBZ(Wscratch0, end);
code.STRB(WZR, Xstate, offsetof(A64JitState, exclusive_state));
EmitRelocation(code, ctx, fn);
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
code.l(end);
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
template<>
void EmitIR<IR::Opcode::A64ClearExclusive>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
void EmitIR<IR::Opcode::A64ClearExclusive>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.STR(WZR, Xstate, offsetof(A64JitState, exclusive_state));
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory8);
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory16);
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory32);
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory64);
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitReadMemory128(code, ctx, inst, LinkTarget::ReadMemory128);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory8);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory16);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory32);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory64);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveReadMemory128(code, ctx, inst, LinkTarget::ExclusiveReadMemory128);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory8);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory16);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory32);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory64);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitWriteMemory(code, ctx, inst, LinkTarget::WriteMemory128);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory8);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory16);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory32);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory64);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitExclusiveWriteMemory(code, ctx, inst, LinkTarget::ExclusiveWriteMemory128);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -189,10 +189,14 @@ void EmitIR<IR::Opcode::IsZero64>(oaknut::CodeGenerator& code, EmitContext& ctx,
template<>
void EmitIR<IR::Opcode::TestBit>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xoperand = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xresult, Xoperand);
ASSERT(args[1].IsImmediate());
ASSERT(args[1].GetImmediateU8() < 64);
code.UBFX(Xresult, Xoperand, args[1].GetImmediateU8(), 1);
}
template<>
@ -616,10 +620,23 @@ void EmitIR<IR::Opcode::ArithmeticShiftRight32>(oaknut::CodeGenerator& code, Emi
template<>
void EmitIR<IR::Opcode::ArithmeticShiftRight64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];
if (shift_arg.IsImmediate()) {
const u8 shift = shift_arg.GetImmediateU8();
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
RegAlloc::Realize(Xresult, Xoperand);
code.ASR(Xresult, Xoperand, shift <= 63 ? shift : 63);
} else {
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
auto Xshift = ctx.reg_alloc.ReadX(shift_arg);
RegAlloc::Realize(Xresult, Xoperand, Xshift);
code.ASR(Xresult, Xoperand, Xshift);
}
}
template<>
@ -690,10 +707,23 @@ void EmitIR<IR::Opcode::RotateRight32>(oaknut::CodeGenerator& code, EmitContext&
template<>
void EmitIR<IR::Opcode::RotateRight64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];
if (shift_arg.IsImmediate()) {
const u8 shift = shift_arg.GetImmediateU8();
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
RegAlloc::Realize(Xresult, Xoperand);
code.ROR(Xresult, Xoperand, shift);
} else {
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
auto Xshift = ctx.reg_alloc.ReadX(shift_arg);
RegAlloc::Realize(Xresult, Xoperand, Xshift);
code.ROR(Xresult, Xoperand, Xshift);
}
}
template<>
@ -726,68 +756,114 @@ void EmitIR<IR::Opcode::RotateRightExtended>(oaknut::CodeGenerator& code, EmitCo
}
}
template<typename ShiftI, typename ShiftR>
static void EmitMaskedShift32(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, ShiftI si_fn, ShiftR sr_fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];
if (shift_arg.IsImmediate()) {
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Woperand = ctx.reg_alloc.ReadW(operand_arg);
RegAlloc::Realize(Wresult, Woperand);
const u32 shift = shift_arg.GetImmediateU32();
si_fn(Wresult, Woperand, static_cast<int>(shift & 0x1F));
} else {
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Woperand = ctx.reg_alloc.ReadW(operand_arg);
auto Wshift = ctx.reg_alloc.ReadW(shift_arg);
RegAlloc::Realize(Wresult, Woperand, Wshift);
sr_fn(Wresult, Woperand, Wshift);
}
}
template<typename ShiftI, typename ShiftR>
static void EmitMaskedShift64(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, ShiftI si_fn, ShiftR sr_fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];
if (shift_arg.IsImmediate()) {
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
RegAlloc::Realize(Xresult, Xoperand);
const u32 shift = shift_arg.GetImmediateU64();
si_fn(Xresult, Xoperand, static_cast<int>(shift & 0x3F));
} else {
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
auto Xshift = ctx.reg_alloc.ReadX(shift_arg);
RegAlloc::Realize(Xresult, Xoperand, Xshift);
sr_fn(Xresult, Xoperand, Xshift);
}
}
template<>
void EmitIR<IR::Opcode::LogicalShiftLeftMasked32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaskedShift32(
code, ctx, inst,
[&](auto& Wresult, auto& Woperand, auto shift) { code.LSL(Wresult, Woperand, shift); },
[&](auto& Wresult, auto& Woperand, auto& Wshift) { code.LSL(Wresult, Woperand, Wshift); });
}
template<>
void EmitIR<IR::Opcode::LogicalShiftLeftMasked64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaskedShift64(
code, ctx, inst,
[&](auto& Xresult, auto& Xoperand, auto shift) { code.LSL(Xresult, Xoperand, shift); },
[&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.LSL(Xresult, Xoperand, Xshift); });
}
template<>
void EmitIR<IR::Opcode::LogicalShiftRightMasked32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaskedShift32(
code, ctx, inst,
[&](auto& Wresult, auto& Woperand, auto shift) { code.LSR(Wresult, Woperand, shift); },
[&](auto& Wresult, auto& Woperand, auto& Wshift) { code.LSR(Wresult, Woperand, Wshift); });
}
template<>
void EmitIR<IR::Opcode::LogicalShiftRightMasked64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaskedShift64(
code, ctx, inst,
[&](auto& Xresult, auto& Xoperand, auto shift) { code.LSR(Xresult, Xoperand, shift); },
[&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.LSR(Xresult, Xoperand, Xshift); });
}
template<>
void EmitIR<IR::Opcode::ArithmeticShiftRightMasked32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaskedShift32(
code, ctx, inst,
[&](auto& Wresult, auto& Woperand, auto shift) { code.ASR(Wresult, Woperand, shift); },
[&](auto& Wresult, auto& Woperand, auto& Wshift) { code.ASR(Wresult, Woperand, Wshift); });
}
template<>
void EmitIR<IR::Opcode::ArithmeticShiftRightMasked64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaskedShift64(
code, ctx, inst,
[&](auto& Xresult, auto& Xoperand, auto shift) { code.ASR(Xresult, Xoperand, shift); },
[&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.ASR(Xresult, Xoperand, Xshift); });
}
template<>
void EmitIR<IR::Opcode::RotateRightMasked32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaskedShift32(
code, ctx, inst,
[&](auto& Wresult, auto& Woperand, auto shift) { code.ROR(Wresult, Woperand, shift); },
[&](auto& Wresult, auto& Woperand, auto& Wshift) { code.ROR(Wresult, Woperand, Wshift); });
}
template<>
void EmitIR<IR::Opcode::RotateRightMasked64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaskedShift64(
code, ctx, inst,
[&](auto& Xresult, auto& Xoperand, auto shift) { code.ROR(Xresult, Xoperand, shift); },
[&](auto& Xresult, auto& Xoperand, auto& Xshift) { code.ROR(Xresult, Xoperand, Xshift); });
}
template<size_t bitsize, typename EmitFn>
@ -975,18 +1051,24 @@ void EmitIR<IR::Opcode::Mul64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR
template<>
void EmitIR<IR::Opcode::SignedMultiplyHigh64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xop1 = ctx.reg_alloc.ReadX(args[0]);
auto Xop2 = ctx.reg_alloc.ReadX(args[1]);
RegAlloc::Realize(Xresult, Xop1, Xop2);
code.SMULH(Xresult, Xop1, Xop2);
}
template<>
void EmitIR<IR::Opcode::UnsignedMultiplyHigh64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xop1 = ctx.reg_alloc.ReadX(args[0]);
auto Xop2 = ctx.reg_alloc.ReadX(args[1]);
RegAlloc::Realize(Xresult, Xop1, Xop2);
code.UMULH(Xresult, Xop1, Xop2);
}
template<>
@ -1160,7 +1242,7 @@ void EmitIR<IR::Opcode::AndNot32>(oaknut::CodeGenerator& code, EmitContext& ctx,
template<>
void EmitIR<IR::Opcode::AndNot64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitAndNot<32>(code, ctx, inst);
EmitAndNot<64>(code, ctx, inst);
}
template<>
@ -1271,9 +1353,13 @@ void EmitIR<IR::Opcode::ZeroExtendWordToLong>(oaknut::CodeGenerator&, EmitContex
}
template<>
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
RegAlloc::Realize(Xvalue, Qresult);
code.FMOV(Qresult->toD(), Xvalue);
}
template<>
@ -1313,98 +1399,124 @@ void EmitIR<IR::Opcode::CountLeadingZeros64>(oaknut::CodeGenerator& code, EmitCo
template<>
void EmitIR<IR::Opcode::ExtractRegister32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[2].IsImmediate());
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Wop1 = ctx.reg_alloc.ReadW(args[0]);
auto Wop2 = ctx.reg_alloc.ReadW(args[1]);
RegAlloc::Realize(Wresult, Wop1, Wop2);
const u8 lsb = args[2].GetImmediateU8();
code.EXTR(Wresult, Wop2, Wop1, lsb); // NB: flipped
}
template<>
void EmitIR<IR::Opcode::ExtractRegister64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[2].IsImmediate());
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xop1 = ctx.reg_alloc.ReadX(args[0]);
auto Xop2 = ctx.reg_alloc.ReadX(args[1]);
RegAlloc::Realize(Xresult, Xop1, Xop2);
const u8 lsb = args[2].GetImmediateU8();
code.EXTR(Xresult, Xop2, Xop1, lsb); // NB: flipped
}
template<>
void EmitIR<IR::Opcode::ReplicateBit32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[1].IsImmediate());
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
const u8 bit = args[1].GetImmediateU8();
RegAlloc::Realize(Wresult, Wvalue);
code.LSL(Wresult, Wvalue, 31 - bit);
code.ASR(Wresult, Wresult, 31);
}
template<>
void EmitIR<IR::Opcode::ReplicateBit64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[1].IsImmediate());
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
const u8 bit = args[1].GetImmediateU8();
RegAlloc::Realize(Xresult, Xvalue);
code.LSL(Xresult, Xvalue, 63 - bit);
code.ASR(Xresult, Xresult, 63);
}
static void EmitMaxMin32(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Cond cond) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Wop1 = ctx.reg_alloc.ReadW(args[0]);
auto Wop2 = ctx.reg_alloc.ReadW(args[1]);
RegAlloc::Realize(Wresult, Wop1, Wop2);
ctx.reg_alloc.SpillFlags();
code.CMP(Wop1->toW(), Wop2);
code.CSEL(Wresult, Wop1, Wop2, cond);
}
static void EmitMaxMin64(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Cond cond) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xop1 = ctx.reg_alloc.ReadX(args[0]);
auto Xop2 = ctx.reg_alloc.ReadX(args[1]);
RegAlloc::Realize(Xresult, Xop1, Xop2);
ctx.reg_alloc.SpillFlags();
code.CMP(Xop1->toX(), Xop2);
code.CSEL(Xresult, Xop1, Xop2, cond);
}
template<>
void EmitIR<IR::Opcode::MaxSigned32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaxMin32(code, ctx, inst, GT);
}
template<>
void EmitIR<IR::Opcode::MaxSigned64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaxMin64(code, ctx, inst, GT);
}
template<>
void EmitIR<IR::Opcode::MaxUnsigned32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaxMin32(code, ctx, inst, HI);
}
template<>
void EmitIR<IR::Opcode::MaxUnsigned64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaxMin64(code, ctx, inst, HI);
}
template<>
void EmitIR<IR::Opcode::MinSigned32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaxMin32(code, ctx, inst, LT);
}
template<>
void EmitIR<IR::Opcode::MinSigned64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaxMin64(code, ctx, inst, LT);
}
template<>
void EmitIR<IR::Opcode::MinUnsigned32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaxMin32(code, ctx, inst, LO);
}
template<>
void EmitIR<IR::Opcode::MinUnsigned64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
EmitMaxMin64(code, ctx, inst, LO);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -575,6 +575,20 @@ void EmitIR<IR::Opcode::FPDoubleToHalf>(oaknut::CodeGenerator& code, EmitContext
template<>
void EmitIR<IR::Opcode::FPDoubleToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
if (rounding_mode == FP::RoundingMode::ToOdd) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Sto = ctx.reg_alloc.WriteS(inst);
auto Dfrom = ctx.reg_alloc.ReadD(args[0]);
RegAlloc::Realize(Sto, Dfrom);
ctx.fpsr.Load();
code.FCVTXN(Sto, Dfrom);
return;
}
EmitConvert<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Dfrom) { code.FCVT(Sto, Dfrom); });
}

View file

@ -201,8 +201,8 @@ static void EmitThreeOpArrangedLower(oaknut::CodeGenerator& code, EmitContext& c
template<size_t size, typename EmitFn>
static void EmitSaturatedAccumulate(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qaccumulator = ctx.reg_alloc.ReadWriteQ(args[0], inst);
auto Qoperand = ctx.reg_alloc.ReadQ(args[1]);
auto Qaccumulator = ctx.reg_alloc.ReadWriteQ(args[1], inst); // NB: Swapped
auto Qoperand = ctx.reg_alloc.ReadQ(args[0]); // NB: Swapped
RegAlloc::Realize(Qaccumulator, Qoperand);
ctx.fpsr.Load();

View file

@ -3,14 +3,31 @@
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/bit_cast.hpp>
#include <mcl/mp/metavalue/lift_value.hpp>
#include <mcl/mp/typelist/cartesian_product.hpp>
#include <mcl/mp/typelist/get.hpp>
#include <mcl/mp/typelist/lift_sequence.hpp>
#include <mcl/mp/typelist/list.hpp>
#include <mcl/mp/typelist/lower_to_tuple.hpp>
#include <mcl/type_traits/function_info.hpp>
#include <mcl/type_traits/integer_of_size.hpp>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op.h"
#include "dynarmic/common/fp/rounding_mode.h"
#include "dynarmic/common/lut_from_list.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
@ -18,6 +35,15 @@
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
namespace mp = mcl::mp;
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>.
template<typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T>>;
template<typename EmitFn>
static void MaybeStandardFPSCRValue(oaknut::CodeGenerator& code, EmitContext& ctx, bool fpcr_controlled, EmitFn emit) {
@ -232,12 +258,47 @@ void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst)
});
}
template<typename Lambda>
static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::QReg Qresult, oaknut::QReg Qarg1, Lambda lambda, bool fpcr_controlled) {
const auto fn = static_cast<mcl::equivalent_function_type<Lambda>*>(lambda);
const u32 fpcr = ctx.FPCR(fpcr_controlled).Value();
constexpr u64 stack_size = sizeof(u64) * 4; // sizeof(u128) * 2
ABI_PushRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size);
code.MOV(Xscratch0, mcl::bit_cast<u64>(fn));
code.ADD(X0, SP, 0 * 16);
code.ADD(X1, SP, 1 * 16);
code.MOV(X2, fpcr);
code.ADD(X3, Xstate, ctx.conf.state_fpsr_offset);
code.STR(Qarg1, X1);
code.BLR(Xscratch0);
code.LDR(Qresult, SP);
ABI_PopRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size);
}
template<size_t fpcr_controlled_arg_index = 1, typename Lambda>
static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
RegAlloc::Realize(Qarg1, Qresult);
ctx.reg_alloc.SpillFlags();
ctx.fpsr.Spill();
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
EmitTwoOpFallbackWithoutRegAlloc(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled);
}
template<>
void EmitIR<IR::Opcode::FPVectorAbs16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qresult = ctx.reg_alloc.ReadWriteQ(args[0], inst);
RegAlloc::Realize(Qresult);
code.BIC(Qresult->H8(), 0b10000000, LSL, 8);
}
template<>
@ -486,10 +547,35 @@ void EmitIR<IR::Opcode::FPVectorRecipStepFused64>(oaknut::CodeGenerator& code, E
template<>
void EmitIR<IR::Opcode::FPVectorRoundInt16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
const bool exact = inst->GetArg(2).GetU1();
using rounding_list = mp::list<
mp::lift_value<FP::RoundingMode::ToNearest_TieEven>,
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
using exact_list = mp::list<std::true_type, std::false_type>;
static const auto lut = Common::GenerateLookupTableFromList(
[]<typename I>(I) {
using FPT = u16;
return std::pair{
mp::lower_to_tuple_v<I>,
Common::FptrCast(
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
constexpr FP::RoundingMode rounding_mode = mp::get<0, I>::value;
constexpr bool exact = mp::get<1, I>::value;
for (size_t i = 0; i < output.size(); ++i) {
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
}
})};
},
mp::cartesian_product<rounding_list, exact_list>{});
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
}
template<>

View file

@ -138,7 +138,7 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const {
return !!ValueLocation(inst);
}
void RegAlloc::PrepareForCall(IR::Inst* result, std::optional<Argument::copyable_reference> arg0, std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg3) {
void RegAlloc::PrepareForCall(std::optional<Argument::copyable_reference> arg0, std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg3) {
fpsr_manager.Spill();
SpillFlags();
@ -157,16 +157,29 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optional<Argument::copyable
}
const std::array<std::optional<Argument::copyable_reference>, 4> args{arg0, arg1, arg2, arg3};
// AAPCS64 Next General-purpose Register Number
int ngrn = 0;
// AAPCS64 Next SIMD and Floating-point Register Number
int nsrn = 0;
for (int i = 0; i < 4; i++) {
if (args[i]) {
ASSERT(gprs[i].IsCompletelyEmpty());
LoadCopyInto(args[i]->get().value, oaknut::XReg{i});
if (args[i]->get().GetType() == IR::Type::U128) {
ASSERT(fprs[nsrn].IsCompletelyEmpty());
LoadCopyInto(args[i]->get().value, oaknut::QReg{nsrn});
nsrn++;
} else {
ASSERT(gprs[ngrn].IsCompletelyEmpty());
LoadCopyInto(args[i]->get().value, oaknut::XReg{ngrn});
ngrn++;
}
} else {
// Gaps are assumed to be in general-purpose registers
// TODO: should there be a separate list passed for FPRs instead?
ngrn++;
}
}
if (result) {
DefineAsRegister(result, X0);
}
}
void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) {

View file

@ -271,11 +271,7 @@ public:
}
}
void PrepareForCall(IR::Inst* result = nullptr,
std::optional<Argument::copyable_reference> arg0 = {},
std::optional<Argument::copyable_reference> arg1 = {},
std::optional<Argument::copyable_reference> arg2 = {},
std::optional<Argument::copyable_reference> arg3 = {});
void PrepareForCall(std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}, std::optional<Argument::copyable_reference> arg2 = {}, std::optional<Argument::copyable_reference> arg3 = {});
void DefineAsExisting(IR::Inst* inst, Argument& arg);
void DefineAsRegister(IR::Inst* inst, oaknut::Reg reg);

View file

@ -154,7 +154,7 @@ void EmitX64::EmitGetNZFromOp(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
code.cmp(value, 0);
code.test(value, value);
code.lahf();
code.movzx(eax, ah);
ctx.reg_alloc.DefineValue(inst, nz);
@ -180,9 +180,9 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
code.cmp(value, 0);
code.test(value, value);
code.lahf();
code.seto(code.al);
code.mov(al, 0);
ctx.reg_alloc.DefineValue(inst, nzcv);
}

View file

@ -198,7 +198,7 @@ struct UserConfig {
/// Pointer to where TPIDR_EL0 is stored. This pointer will be inserted into
/// emitted code.
const std::uint64_t* tpidr_el0 = nullptr;
std::uint64_t* tpidr_el0 = nullptr;
/// Pointer to the page table which we can use for direct page table access.
/// If an entry in page_table is null, the relevant memory callback will be called.

View file

@ -1024,6 +1024,54 @@ TEST_CASE("A64: This is an infinite loop if fast dispatch is enabled", "[a64]")
jit.Run();
}
TEST_CASE("A64: EXTR", "[a64]") {
A64TestEnv env;
A64::Jit jit{A64::UserConfig{&env}};
env.code_mem.emplace_back(0x93d8fef7); // EXTR X23, X23, X24, #63
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(0);
jit.SetRegister(23, 0);
jit.SetRegister(24, 1);
env.ticks_left = 2;
jit.Run();
REQUIRE(jit.GetRegister(23) == 0);
}
TEST_CASE("A64: Isolated GetNZCVFromOp", "[a64]") {
A64TestEnv env;
A64::Jit jit{A64::UserConfig{&env}};
env.code_mem.emplace_back(0xaa1f03f5); // MOV X21, XZR
env.code_mem.emplace_back(0x912a02da); // ADD X26, X22, #0xa80
env.code_mem.emplace_back(0x913662dc); // ADD X28, X22, #0xd98
env.code_mem.emplace_back(0x320003e8); // MOV W8, #1
env.code_mem.emplace_back(0xa9006bfc); // STP X28, X26, [SP]
env.code_mem.emplace_back(0x7200011f); // TST W8, #1
env.code_mem.emplace_back(0xf94007e8); // LDR X8, [SP, #8]
env.code_mem.emplace_back(0x321e03e3); // MOV W3, #4
env.code_mem.emplace_back(0xaa1303e2); // MOV X2, X19
env.code_mem.emplace_back(0x9a881357); // CSEL X23, X26, X8, NE
env.code_mem.emplace_back(0xf94003e8); // LDR X8, [SP]
env.code_mem.emplace_back(0xaa1703e0); // MOV X0, X23
env.code_mem.emplace_back(0x9a881396); // CSEL X22, X28, X8, NE
env.code_mem.emplace_back(0x92407ea8); // AND X8, X21, #0xffffffff
env.code_mem.emplace_back(0x1ac8269b); // LSR W27, W20, W8
env.code_mem.emplace_back(0x0b1b0768); // ADD W8, W27, W27, LSL #1
env.code_mem.emplace_back(0x937f7d01); // SBFIZ X1, X8, #1, #32
env.code_mem.emplace_back(0x2a1f03e4); // MOV W4, WZR
env.code_mem.emplace_back(0x531e7779); // LSL W25, W27, #2
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(0);
env.ticks_left = 20;
jit.Run();
}
TEST_CASE("A64: Optimization failure when folding ADD", "[a64]") {
A64TestEnv env;
A64::Jit jit{A64::UserConfig{&env}};

View file

@ -6,7 +6,10 @@
#include <algorithm>
#include <array>
#include <cstdio>
#include <cstdlib>
#include <functional>
#include <limits>
#include <optional>
#include <tuple>
#include <vector>
@ -14,6 +17,7 @@
#include <mcl/stdint.hpp>
#include "./A32/testenv.h"
#include "./A64/testenv.h"
#include "./fuzz_util.h"
#include "./rand_int.h"
#include "dynarmic/common/fp/fpcr.h"
@ -22,7 +26,11 @@
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
#include "dynarmic/frontend/A64/a64_types.h"
#include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/A32/a32.h"
#include "dynarmic/interface/A64/a64.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/location_descriptor.h"
#include "dynarmic/ir/opcodes.h"
@ -36,21 +44,14 @@ constexpr bool mask_fpsr_cum_bits = true;
namespace {
using namespace Dynarmic;
bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) {
const A32::LocationDescriptor location = A32::LocationDescriptor{pc, {}, {}}.SetTFlag(is_thumb).SetIT(it_state);
IR::Block block{location};
const bool should_continue = A32::TranslateSingleInstruction(block, location, instruction);
if (!should_continue && !is_last_inst) {
return false;
}
bool ShouldTestInst(IR::Block& block) {
if (auto terminal = block.GetTerminal(); boost::get<IR::Term::Interpret>(&terminal)) {
return false;
}
for (const auto& ir_inst : block) {
switch (ir_inst.GetOpcode()) {
// A32
case IR::Opcode::A32GetFpscr:
case IR::Opcode::A32ExceptionRaised:
case IR::Opcode::A32CallSupervisor:
@ -61,7 +62,53 @@ bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A
case IR::Opcode::A32CoprocGetTwoWords:
case IR::Opcode::A32CoprocLoadWords:
case IR::Opcode::A32CoprocStoreWords:
// A64
case IR::Opcode::A64ExceptionRaised:
case IR::Opcode::A64CallSupervisor:
case IR::Opcode::A64DataCacheOperationRaised:
case IR::Opcode::A64GetCNTPCT:
// Unimplemented
case IR::Opcode::SignedSaturatedAdd8:
case IR::Opcode::SignedSaturatedAdd16:
case IR::Opcode::SignedSaturatedAdd32:
case IR::Opcode::SignedSaturatedAdd64:
case IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh16:
case IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh32:
case IR::Opcode::SignedSaturatedSub8:
case IR::Opcode::SignedSaturatedSub16:
case IR::Opcode::SignedSaturatedSub32:
case IR::Opcode::SignedSaturatedSub64:
case IR::Opcode::UnsignedSaturatedAdd8:
case IR::Opcode::UnsignedSaturatedAdd16:
case IR::Opcode::UnsignedSaturatedAdd32:
case IR::Opcode::UnsignedSaturatedAdd64:
case IR::Opcode::UnsignedSaturatedSub8:
case IR::Opcode::UnsignedSaturatedSub16:
case IR::Opcode::UnsignedSaturatedSub32:
case IR::Opcode::UnsignedSaturatedSub64:
case IR::Opcode::VectorMaxS64:
case IR::Opcode::VectorMaxU64:
case IR::Opcode::VectorMinS64:
case IR::Opcode::VectorMinU64:
case IR::Opcode::VectorMultiply64:
case IR::Opcode::SM4AccessSubstitutionBox:
// Half-prec conversions
case IR::Opcode::FPHalfToFixedS16:
case IR::Opcode::FPHalfToFixedS32:
case IR::Opcode::FPHalfToFixedS64:
case IR::Opcode::FPHalfToFixedU16:
case IR::Opcode::FPHalfToFixedU32:
case IR::Opcode::FPHalfToFixedU64:
// Half-precision
case IR::Opcode::FPAbs16:
case IR::Opcode::FPMulAdd16:
case IR::Opcode::FPNeg16:
case IR::Opcode::FPRecipEstimate16:
case IR::Opcode::FPRecipExponent16:
case IR::Opcode::FPRecipStepFused16:
case IR::Opcode::FPRoundInt16:
case IR::Opcode::FPRSqrtEstimate16:
case IR::Opcode::FPRSqrtStepFused16:
case IR::Opcode::FPVectorAbs16:
case IR::Opcode::FPVectorEqual16:
case IR::Opcode::FPVectorMulAdd16:
@ -84,6 +131,30 @@ bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A
return true;
}
bool ShouldTestA32Inst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) {
const A32::LocationDescriptor location = A32::LocationDescriptor{pc, {}, {}}.SetTFlag(is_thumb).SetIT(it_state);
IR::Block block{location};
const bool should_continue = A32::TranslateSingleInstruction(block, location, instruction);
if (!should_continue && !is_last_inst) {
return false;
}
return ShouldTestInst(block);
}
bool ShouldTestA64Inst(u32 instruction, u64 pc, bool is_last_inst) {
const A64::LocationDescriptor location = A64::LocationDescriptor{pc, {}};
IR::Block block{location};
const bool should_continue = A64::TranslateSingleInstruction(block, location, instruction);
if (!should_continue && !is_last_inst) {
return false;
}
return ShouldTestInst(block);
}
u32 GenRandomArmInst(u32 pc, bool is_last_inst) {
static const struct InstructionGeneratorInfo {
std::vector<InstructionGenerator> generators;
@ -144,7 +215,7 @@ u32 GenRandomArmInst(u32 pc, bool is_last_inst) {
continue;
}
if (ShouldTestInst(inst, pc, false, is_last_inst)) {
if (ShouldTestA32Inst(inst, pc, false, is_last_inst)) {
return inst;
}
}
@ -245,7 +316,7 @@ std::vector<u16> GenRandomThumbInst(u32 pc, bool is_last_inst, A32::ITState it_s
const u32 inst = instructions.generators[index].Generate();
const bool is_four_bytes = (inst >> 16) != 0;
if (ShouldTestInst(is_four_bytes ? mcl::bit::swap_halves_32(inst) : inst, pc, true, is_last_inst, it_state)) {
if (ShouldTestA32Inst(is_four_bytes ? mcl::bit::swap_halves_32(inst) : inst, pc, true, is_last_inst, it_state)) {
if (is_four_bytes)
return {static_cast<u16>(inst >> 16), static_cast<u16>(inst)};
return {static_cast<u16>(inst)};
@ -253,8 +324,65 @@ std::vector<u16> GenRandomThumbInst(u32 pc, bool is_last_inst, A32::ITState it_s
}
}
u32 GenRandomA64Inst(u64 pc, bool is_last_inst) {
static const struct InstructionGeneratorInfo {
std::vector<InstructionGenerator> generators;
std::vector<InstructionGenerator> invalid;
} instructions = [] {
const std::vector<std::tuple<std::string, const char*>> list{
#define INST(fn, name, bitstring) {#fn, bitstring},
#include "dynarmic/frontend/A64/decoder/a64.inc"
#undef INST
};
std::vector<InstructionGenerator> generators;
std::vector<InstructionGenerator> invalid;
// List of instructions not to test
const std::vector<std::string> do_not_test{
// Dynarmic and QEMU currently differ on how the exclusive monitor's address range works.
"STXR",
"STLXR",
"STXP",
"STLXP",
"LDXR",
"LDAXR",
"LDXP",
"LDAXP",
// Behaviour differs from QEMU
"MSR_reg",
"MSR_imm",
"MRS",
};
for (const auto& [fn, bitstring] : list) {
if (fn == "UnallocatedEncoding") {
continue;
}
if (std::find(do_not_test.begin(), do_not_test.end(), fn) != do_not_test.end()) {
invalid.emplace_back(InstructionGenerator{bitstring});
continue;
}
generators.emplace_back(InstructionGenerator{bitstring});
}
return InstructionGeneratorInfo{generators, invalid};
}();
while (true) {
const size_t index = RandInt<size_t>(0, instructions.generators.size() - 1);
const u32 inst = instructions.generators[index].Generate();
if (std::any_of(instructions.invalid.begin(), instructions.invalid.end(), [inst](const auto& invalid) { return invalid.Match(inst); })) {
continue;
}
if (ShouldTestA64Inst(inst, pc, is_last_inst)) {
return inst;
}
}
}
template<typename TestEnv>
Dynarmic::A32::UserConfig GetUserConfig(TestEnv& testenv) {
Dynarmic::A32::UserConfig GetA32UserConfig(TestEnv& testenv) {
Dynarmic::A32::UserConfig user_config;
user_config.optimizations &= ~OptimizationFlag::FastDispatch;
user_config.callbacks = &testenv;
@ -262,14 +390,14 @@ Dynarmic::A32::UserConfig GetUserConfig(TestEnv& testenv) {
}
template<size_t num_jit_reruns = 1, typename TestEnv>
static void RunTestInstance(Dynarmic::A32::Jit& jit,
TestEnv& jit_env,
const std::array<u32, 16>& regs,
const std::array<u32, 64>& vecs,
const std::vector<typename TestEnv::InstructionType>& instructions,
const u32 cpsr,
const u32 fpscr,
const size_t ticks_left) {
void RunTestInstance(Dynarmic::A32::Jit& jit,
TestEnv& jit_env,
const std::array<u32, 16>& regs,
const std::array<u32, 64>& vecs,
const std::vector<typename TestEnv::InstructionType>& instructions,
const u32 cpsr,
const u32 fpscr,
const size_t ticks_left) {
const u32 initial_pc = regs[15];
const u32 num_words = initial_pc / sizeof(typename TestEnv::InstructionType);
const u32 code_mem_size = num_words + static_cast<u32>(instructions.size());
@ -294,37 +422,37 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
jit.Run();
}
fmt::print("instructions: ");
fmt::print("instructions:");
for (auto instruction : instructions) {
if constexpr (sizeof(decltype(instruction)) == 2) {
fmt::print("{:04x} ", instruction);
fmt::print(" {:04x}", instruction);
} else {
fmt::print("{:08x} ", instruction);
fmt::print(" {:08x}", instruction);
}
}
fmt::print("\n");
fmt::print("initial_regs: ");
fmt::print("initial_regs:");
for (u32 i : regs) {
fmt::print("{:08x} ", i);
fmt::print(" {:08x}", i);
}
fmt::print("\n");
fmt::print("initial_vecs: ");
fmt::print("initial_vecs:");
for (u32 i : vecs) {
fmt::print("{:08x} ", i);
fmt::print(" {:08x}", i);
}
fmt::print("\n");
fmt::print("initial_cpsr: {:08x}\n", cpsr);
fmt::print("initial_fpcr: {:08x}\n", fpscr);
fmt::print("final_regs: ");
fmt::print("final_regs:");
for (u32 i : jit.Regs()) {
fmt::print("{:08x} ", i);
fmt::print(" {:08x}", i);
}
fmt::print("\n");
fmt::print("final_vecs: ");
fmt::print("final_vecs:");
for (u32 i : jit.ExtRegs()) {
fmt::print("{:08x} ", i);
fmt::print(" {:08x}", i);
}
fmt::print("\n");
fmt::print("final_cpsr: {:08x}\n", jit.Cpsr());
@ -343,11 +471,104 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
fmt::print("===\n");
}
Dynarmic::A64::UserConfig GetA64UserConfig(A64TestEnv& jit_env) {
Dynarmic::A64::UserConfig jit_user_config{&jit_env};
jit_user_config.optimizations &= ~OptimizationFlag::FastDispatch;
// The below corresponds to the settings for qemu's aarch64_max_initfn
jit_user_config.dczid_el0 = 7;
jit_user_config.ctr_el0 = 0x80038003;
return jit_user_config;
}
template<size_t num_jit_reruns = 1>
void RunTestInstance(Dynarmic::A64::Jit& jit,
A64TestEnv& jit_env,
const std::array<u64, 31>& regs,
const std::array<std::array<u64, 2>, 32>& vecs,
const std::vector<u32>& instructions,
const u32 pstate,
const u32 fpcr,
const u64 initial_sp,
const u64 start_address,
const size_t ticks_left) {
jit.ClearCache();
for (size_t jit_rerun_count = 0; jit_rerun_count < num_jit_reruns; ++jit_rerun_count) {
jit_env.code_mem = instructions;
jit_env.code_mem.emplace_back(0x14000000); // B .
jit_env.code_mem_start_address = start_address;
jit_env.modified_memory.clear();
jit_env.interrupts.clear();
jit.SetRegisters(regs);
jit.SetVectors(vecs);
jit.SetPC(start_address);
jit.SetSP(initial_sp);
jit.SetFpcr(fpcr);
jit.SetFpsr(0);
jit.SetPstate(pstate);
jit.ClearCache();
jit_env.ticks_left = ticks_left;
jit.Run();
}
fmt::print("instructions:");
for (u32 instruction : instructions) {
fmt::print(" {:08x}", instruction);
}
fmt::print("\n");
fmt::print("initial_regs:");
for (u64 i : regs) {
fmt::print(" {:016x}", i);
}
fmt::print("\n");
fmt::print("initial_vecs:");
for (auto i : vecs) {
fmt::print(" {:016x}:{:016x}", i[0], i[1]);
}
fmt::print("\n");
fmt::print("initial_sp: {:016x}\n", initial_sp);
fmt::print("initial_pstate: {:08x}\n", pstate);
fmt::print("initial_fpcr: {:08x}\n", fpcr);
fmt::print("final_regs:");
for (u64 i : jit.GetRegisters()) {
fmt::print(" {:016x}", i);
}
fmt::print("\n");
fmt::print("final_vecs:");
for (auto i : jit.GetVectors()) {
fmt::print(" {:016x}:{:016x}", i[0], i[1]);
}
fmt::print("\n");
fmt::print("final_sp: {:016x}\n", jit.GetSP());
fmt::print("final_pc: {:016x}\n", jit.GetPC());
fmt::print("final_pstate: {:08x}\n", jit.GetPstate());
fmt::print("final_fpcr: {:08x}\n", jit.GetFpcr());
fmt::print("final_qc : {}\n", FP::FPSR{jit.GetFpsr()}.QC());
fmt::print("mod_mem:");
for (auto [addr, value] : jit_env.modified_memory) {
fmt::print(" {:08x}:{:02x}", addr, value);
}
fmt::print("\n");
fmt::print("interrupts:\n");
for (const auto& i : jit_env.interrupts) {
std::puts(i.c_str());
}
fmt::print("===\n");
}
} // Anonymous namespace
void TestThumb(size_t num_instructions, size_t num_iterations) {
ThumbTestEnv jit_env{};
Dynarmic::A32::Jit jit{GetUserConfig(jit_env)};
Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env)};
std::array<u32, 16> regs;
std::array<u32, 64> ext_reg;
@ -374,7 +595,7 @@ void TestThumb(size_t num_instructions, size_t num_iterations) {
void TestArm(size_t num_instructions, size_t num_iterations) {
ArmTestEnv jit_env{};
Dynarmic::A32::Jit jit{GetUserConfig(jit_env)};
Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env)};
std::array<u32, 16> regs;
std::array<u32, 64> ext_reg;
@ -394,19 +615,76 @@ void TestArm(size_t num_instructions, size_t num_iterations) {
}
regs[15] = start_address;
RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, 1);
RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, num_instructions);
}
}
int main(int, char*[]) {
detail::g_rand_int_generator.seed(42069);
void TestA64(size_t num_instructions, size_t num_iterations) {
A64TestEnv jit_env{};
Dynarmic::A64::Jit jit{GetA64UserConfig(jit_env)};
TestThumb(1, 100000);
TestArm(1, 100000);
TestThumb(5, 100000);
TestArm(5, 100000);
TestThumb(1024, 10000);
TestArm(1024, 10000);
std::array<u64, 31> regs;
std::array<std::array<u64, 2>, 32> vecs;
std::vector<u32> instructions;
for (size_t iteration = 0; iteration < num_iterations; ++iteration) {
std::generate(regs.begin(), regs.end(), [] { return RandInt<u64>(0, ~u64(0)); });
std::generate(vecs.begin(), vecs.end(), RandomVector);
const u32 start_address = 100;
const u32 pstate = (RandInt<u32>(0, 0xF) << 28);
const u32 fpcr = RandomFpcr();
const u64 initial_sp = RandInt<u64>(0x30'0000'0000, 0x40'0000'0000) * 4;
instructions.clear();
for (size_t i = 0; i < num_instructions; ++i) {
instructions.emplace_back(GenRandomA64Inst(static_cast<u32>(start_address + 4 * instructions.size()), i == num_instructions - 1));
}
RunTestInstance(jit, jit_env, regs, vecs, instructions, pstate, fpcr, initial_sp, start_address, num_instructions);
}
}
static std::optional<size_t> str2sz(char const* s) {
char* end = nullptr;
errno = 0;
const long l = std::strtol(s, &end, 10);
if (errno == ERANGE || l < 0) {
return std::nullopt;
}
if (*s == '\0' || *end != '\0') {
return std::nullopt;
}
return static_cast<size_t>(l);
}
int main(int argc, char* argv[]) {
if (argc != 5) {
fmt::print("Usage: {} <thumb|arm|a64> <seed> <instruction_count> <iteration_count>\n", argv[0]);
}
const auto seed = str2sz(argv[2]);
const auto instruction_count = str2sz(argv[3]);
const auto iterator_count = str2sz(argv[4]);
if (!seed || !instruction_count || !iterator_count) {
fmt::print("invalid numeric arguments\n");
return 1;
}
detail::g_rand_int_generator.seed(static_cast<std::mt19937::result_type>(*seed));
if (strcmp(argv[1], "thumb") == 0) {
TestThumb(*instruction_count, *iterator_count);
} else if (strcmp(argv[1], "arm") == 0) {
TestArm(*instruction_count, *iterator_count);
} else if (strcmp(argv[1], "a64") == 0) {
TestA64(*instruction_count, *iterator_count);
} else {
fmt::print("unrecognized instruction class\n");
return 1;
}
return 0;
}