diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 234c5e6f..de5167fc 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -371,6 +371,7 @@ elseif(ARCHITECTURE STREQUAL "arm64") target_sources(dynarmic PRIVATE backend/arm64/a32_jitstate.cpp backend/arm64/a32_jitstate.h + backend/arm64/abi.cpp backend/arm64/abi.h backend/arm64/emit_arm64.cpp backend/arm64/emit_arm64.h diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index 119145e6..3ce05922 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -72,28 +72,13 @@ void A32AddressSpace::EmitPrelude() { mem.unprotect(); prelude_info.run_code = code.ptr(); - // TODO: Minimize this. - code.STR(X30, SP, PRE_INDEXED, -16); - for (int i = 0; i < 30; i += 2) { - code.STP(XReg{i}, XReg{i + 1}, SP, PRE_INDEXED, -16); - } - for (int i = 0; i < 32; i += 2) { - code.STP(QReg{i}, QReg{i + 1}, SP, PRE_INDEXED, -32); - } - code.SUB(SP, SP, sizeof(StackLayout)); + ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); code.MOV(Xstate, X1); code.MOV(Xhalt, X2); code.BR(X0); prelude_info.return_from_run_code = code.ptr(); - code.ADD(SP, SP, sizeof(StackLayout)); - for (int i = 30; i >= 0; i -= 2) { - code.LDP(QReg{i}, QReg{i + 1}, SP, POST_INDEXED, 32); - } - for (int i = 28; i >= 0; i -= 2) { - code.LDP(XReg{i}, XReg{i + 1}, SP, POST_INDEXED, 16); - } - code.LDR(X30, SP, POST_INDEXED, 16); + ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout)); code.RET(); mem.protect(); diff --git a/src/dynarmic/backend/arm64/abi.cpp b/src/dynarmic/backend/arm64/abi.cpp new file mode 100644 index 00000000..1ea59595 --- /dev/null +++ b/src/dynarmic/backend/arm64/abi.cpp @@ -0,0 +1,106 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/backend/arm64/abi.h" + +#include + +#include +#include +#include + +namespace Dynarmic::Backend::Arm64 { + +using namespace oaknut::util; + +static constexpr size_t gpr_size = 8; +static constexpr size_t fpr_size = 16; + +struct FrameInfo { + std::vector gprs; + std::vector fprs; + size_t frame_size; + size_t gprs_size; + size_t fprs_size; +}; + +static std::vector ListToIndexes(u32 list) { + std::vector indexes; + for (int i = 0; i < 32; i++) { + if (mcl::bit::get_bit(i, list)) { + indexes.emplace_back(i); + } + } + return indexes; +} + +static FrameInfo CalculateFrameInfo(RegisterList rl, size_t frame_size) { + const auto gprs = ListToIndexes(static_cast(rl)); + const auto fprs = ListToIndexes(static_cast(rl >> 32)); + + const size_t num_gprs = gprs.size(); + const size_t num_fprs = fprs.size(); + + const size_t gprs_size = (num_gprs + 1) / 2 * 16; + const size_t fprs_size = num_fprs * 16; + + return { + gprs, + fprs, + frame_size, + gprs_size, + fprs_size, + }; +} + +void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) { + const FrameInfo frame_info = CalculateFrameInfo(rl, frame_size); + + code.SUB(SP, SP, frame_info.gprs_size + frame_info.fprs_size); + + for (size_t i = 0; i < frame_info.gprs.size() - 1; i += 2) { + code.STP(oaknut::XReg{frame_info.gprs[i]}, oaknut::XReg{frame_info.gprs[i + 1]}, SP, i * gpr_size); + } + if (frame_info.gprs.size() % 2 == 1) { + const size_t i = frame_info.gprs.size() - 1; + code.STR(oaknut::XReg{frame_info.gprs[i]}, SP, i * gpr_size); + } + + for (size_t i = 0; i < frame_info.fprs.size() - 1; i += 2) { + code.STP(oaknut::QReg{frame_info.fprs[i]}, oaknut::QReg{frame_info.fprs[i + 1]}, SP, frame_info.gprs_size + i * fpr_size); + } + if (frame_info.fprs.size() % 2 == 1) { + const size_t i = frame_info.fprs.size() - 1; + code.STR(oaknut::QReg{frame_info.fprs[i]}, SP, frame_info.gprs_size + i * fpr_size); + } + + code.SUB(SP, SP, frame_info.frame_size); +} + +void ABI_PopRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) { + const FrameInfo frame_info = CalculateFrameInfo(rl, frame_size); + + code.ADD(SP, SP, frame_info.frame_size); + + for (size_t i = 0; i < frame_info.gprs.size() - 1; i += 2) { + code.LDP(oaknut::XReg{frame_info.gprs[i]}, oaknut::XReg{frame_info.gprs[i + 1]}, SP, i * gpr_size); + } + if (frame_info.gprs.size() % 2 == 1) { + const size_t i = frame_info.gprs.size() - 1; + code.LDR(oaknut::XReg{frame_info.gprs[i]}, SP, i * gpr_size); + } + + for (size_t i = 0; i < frame_info.fprs.size() - 1; i += 2) { + code.LDP(oaknut::QReg{frame_info.fprs[i]}, oaknut::QReg{frame_info.fprs[i + 1]}, SP, frame_info.gprs_size + i * fpr_size); + } + if (frame_info.fprs.size() % 2 == 1) { + const size_t i = frame_info.fprs.size() - 1; + code.LDR(oaknut::QReg{frame_info.fprs[i]}, SP, frame_info.gprs_size + i * fpr_size); + } + + code.ADD(SP, SP, frame_info.gprs_size + frame_info.fprs_size); +} + +} // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/abi.h b/src/dynarmic/backend/arm64/abi.h index 86d6cef3..599ea720 100644 --- a/src/dynarmic/backend/arm64/abi.h +++ b/src/dynarmic/backend/arm64/abi.h @@ -5,6 +5,9 @@ #pragma once +#include + +#include #include namespace Dynarmic::Backend::Arm64 { @@ -37,4 +40,15 @@ constexpr auto Rscratch1() { } } +constexpr std::initializer_list GPR_ORDER{19, 20, 21, 22, 23, 24, 25, 26, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8}; +constexpr std::initializer_list FPR_ORDER{8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + +using RegisterList = u64; + +constexpr RegisterList ABI_CALLEE_SAVE = 0x0000ff00'3ff80000; +constexpr RegisterList ABI_CALLER_SAVE = 0xffffffff'4000ffff; + +void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t stack_space); +void ABI_PopRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t stack_space); + } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index e687ee09..758a7d4a 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -119,9 +119,7 @@ void EmitIR(oaknut::CodeGenerator&, EmitContext EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf) { EmittedBlockInfo ebi; - const std::vector gpr_order{19, 20, 21, 22, 23, 24, 25, 26, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8}; - const std::vector fpr_order{8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; - RegAlloc reg_alloc{code, gpr_order, fpr_order}; + RegAlloc reg_alloc{code, GPR_ORDER, FPR_ORDER}; EmitContext ctx{block, reg_alloc, emit_conf, ebi}; ebi.entry_point = code.ptr(); diff --git a/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/backend/arm64/reg_alloc.cpp index 2d01abea..611e498d 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include "dynarmic/backend/arm64/abi.h" @@ -140,6 +141,35 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const { return !!ValueLocation(inst); } +void RegAlloc::PrepareForCall(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { + SpillFlags(); + + // TODO: Spill into callee-save registers + + for (int i = 0; i < 32; i++) { + if (mcl::bit::get_bit(i, static_cast(ABI_CALLER_SAVE))) { + SpillGpr(i); + } + } + + for (int i = 0; i < 32; i++) { + if (mcl::bit::get_bit(i, static_cast(ABI_CALLER_SAVE >> 32))) { + SpillFpr(i); + } + } + + const std::array, 4> args{arg0, arg1, arg2, arg3}; + for (int i = 0; i < 4; i++) { + if (args[i]) { + LoadCopyInto(args[i]->get().value.GetInst(), oaknut::XReg{i}); + } + } + + if (result) { + DefineAsRegister(result, X0); + } +} + void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) { ASSERT(!ValueLocation(inst)); auto& info = ValueInfo(arg.value.GetInst()); @@ -184,7 +214,7 @@ int RegAlloc::RealizeReadImpl(const IR::Inst* value) { ASSERT_FALSE("Logic error"); break; case HostLoc::Kind::Fpr: - code.FMOV(oaknut::XReg{current_location->index}, oaknut::DReg{new_location_index}); + code.FMOV(oaknut::XReg{new_location_index}, oaknut::DReg{current_location->index}); // ASSERT size fits break; case HostLoc::Kind::Spill: @@ -339,6 +369,27 @@ int RegAlloc::FindFreeSpill() const { return static_cast(iter - spills.begin()); } +void RegAlloc::LoadCopyInto(IR::Inst* inst, oaknut::XReg reg) { + const auto current_location = ValueLocation(inst); + ASSERT(current_location); + ASSERT(gprs[reg.index()].IsCompletelyEmpty()); + switch (current_location->kind) { + case HostLoc::Kind::Gpr: + code.MOV(reg, oaknut::XReg{current_location->index}); + break; + case HostLoc::Kind::Fpr: + code.FMOV(reg, oaknut::DReg{current_location->index}); + // ASSERT size fits + break; + case HostLoc::Kind::Spill: + code.LDR(reg, SP, spill_offset + current_location->index * spill_slot_size); + break; + case HostLoc::Kind::Flags: + code.MRS(reg, oaknut::SystemReg::NZCV); + break; + } +} + std::optional RegAlloc::ValueLocation(const IR::Inst* value) const { const auto contains_value = [value](const HostLocInfo& info) { return info.Contains(value); }; diff --git a/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/backend/arm64/reg_alloc.h index a39e804e..c483256d 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/backend/arm64/reg_alloc.h @@ -222,6 +222,12 @@ public: } } + void PrepareForCall(IR::Inst* result = nullptr, + std::optional arg0 = {}, + std::optional arg1 = {}, + std::optional arg2 = {}, + std::optional arg3 = {}); + void DefineAsExisting(IR::Inst* inst, Argument& arg); void DefineAsRegister(IR::Inst* inst, oaknut::Reg reg); @@ -259,6 +265,8 @@ private: void SpillFpr(int index); int FindFreeSpill() const; + void LoadCopyInto(IR::Inst* inst, oaknut::XReg reg); + std::optional ValueLocation(const IR::Inst* value) const; HostLocInfo& ValueInfo(HostLoc host_loc); HostLocInfo& ValueInfo(const IR::Inst* value);