From f856ac9f33cb2a212e9dd0a6c95631c766219ded Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Fri, 19 Jan 2024 14:24:10 +0800 Subject: [PATCH] backend/rv64: Add minimal toy implementation enough to execute LSLS --- src/dynarmic/CMakeLists.txt | 2 + .../backend/riscv64/a32_address_space.cpp | 6 +- src/dynarmic/backend/riscv64/emit_riscv64.cpp | 31 ++++++++- .../backend/riscv64/emit_riscv64_a32.cpp | 63 +++++++++++++++++ .../riscv64/emit_riscv64_data_processing.cpp | 57 +++++++++++++++ src/dynarmic/backend/riscv64/reg_alloc.cpp | 69 +++++++++++++++---- src/dynarmic/backend/riscv64/reg_alloc.h | 61 ++++++++++------ 7 files changed, 251 insertions(+), 38 deletions(-) create mode 100644 src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp create mode 100644 src/dynarmic/backend/riscv64/emit_riscv64_data_processing.cpp diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 848da788..f6340ff9 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -405,6 +405,7 @@ if ("riscv" IN_LIST ARCHITECTURE) target_sources(dynarmic PRIVATE backend/riscv64/abi.h backend/riscv64/emit_context.h + backend/riscv64/emit_riscv64_data_processing.cpp backend/riscv64/emit_riscv64.cpp backend/riscv64/emit_riscv64.h backend/riscv64/reg_alloc.cpp @@ -421,6 +422,7 @@ if ("riscv" IN_LIST ARCHITECTURE) backend/riscv64/a32_jitstate.cpp backend/riscv64/a32_jitstate.h backend/riscv64/code_block.h + backend/riscv64/emit_riscv64_a32.cpp ) endif() diff --git a/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/backend/riscv64/a32_address_space.cpp index e60b5629..5b705e56 100644 --- a/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -78,7 +78,7 @@ void A32AddressSpace::EmitPrelude() { as.SD(GPR{i}, i * 8, sp); } for (u32 i = 0; i < 32; i += 1) { - as.FSD(FPR{i}, 32 + i * 8, sp); + as.FSD(FPR{i}, (32 + i) * 8, sp); } as.ADDI(Xstate, a1, 0); @@ -92,7 +92,7 @@ void A32AddressSpace::EmitPrelude() { as.LD(GPR{i}, i * 8, sp); } for (u32 i = 0; i < 32; i += 1) { - as.FLD(FPR{i}, 32 + i * 8, sp); + as.FLD(FPR{i}, (32 + i) * 8, sp); } as.ADDI(sp, sp, 64 * 8); as.JALR(ra); @@ -128,7 +128,7 @@ void A32AddressSpace::Link(EmittedBlockInfo& block_info) { switch (target) { case LinkTarget::ReturnFromRunCode: { - std::ptrdiff_t off = prelude_info.return_from_run_code - GetCursorPtr(); + std::ptrdiff_t off = prelude_info.return_from_run_code - reinterpret_cast(a.GetCursorPointer()); a.JAL(x0, off); break; } diff --git a/src/dynarmic/backend/riscv64/emit_riscv64.cpp b/src/dynarmic/backend/riscv64/emit_riscv64.cpp index 77694da7..acf7826a 100644 --- a/src/dynarmic/backend/riscv64/emit_riscv64.cpp +++ b/src/dynarmic/backend/riscv64/emit_riscv64.cpp @@ -22,7 +22,7 @@ namespace Dynarmic::Backend::RV64 { // TODO: We should really move this to biscuit. -static void Mov64(biscuit::Assembler& as, biscuit::GPR rd, u64 imm) { +void Mov64(biscuit::Assembler& as, biscuit::GPR rd, u64 imm) { if (mcl::bit::sign_extend<32>(imm) == imm) { as.LI(rd, static_cast(imm)); return; @@ -38,7 +38,7 @@ static void Mov64(biscuit::Assembler& as, biscuit::GPR rd, u64 imm) { int shift = 12 + std::countr_zero(hi52); hi52 = mcl::bit::sign_extend(shift, hi52 >> (shift - 12)); Mov64(as, rd, hi52); - as.SLLI(rd, rd, shift); + as.SLLI64(rd, rd, shift); if (lo12 != 0) { as.ADDI(rd, rd, lo12); } @@ -49,11 +49,38 @@ void EmitIR(biscuit::Assembler&, EmitContext&, IR::Inst*) { ASSERT_FALSE("Unimplemented opcode {}", op); } +template<> +void EmitIR(biscuit::Assembler&, EmitContext&, IR::Inst*) {} + +template<> +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst); +template<> +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst); +template<> +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst); +template<> +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst); + template<> void EmitIR(biscuit::Assembler&, EmitContext& ctx, IR::Inst* inst) { ASSERT(ctx.reg_alloc.IsValueLive(inst)); } +template<> +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Xvalue = ctx.reg_alloc.ReadX(args[0]); + auto Xnz = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xvalue, Xnz); + + as.SEQZ(Xnz, Xvalue); + as.SLLI(Xnz, Xnz, 30); + as.SLT(Xscratch0, Xvalue, biscuit::zero); + as.SLLI(Xscratch0, Xscratch0, 31); + as.OR(Xnz, Xnz, Xscratch0); +} + EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitConfig& emit_conf) { using namespace biscuit; diff --git a/src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp b/src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp new file mode 100644 index 00000000..f22f520c --- /dev/null +++ b/src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp @@ -0,0 +1,63 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2024 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include "dynarmic/backend/riscv64/a32_jitstate.h" +#include "dynarmic/backend/riscv64/abi.h" +#include "dynarmic/backend/riscv64/emit_context.h" +#include "dynarmic/backend/riscv64/emit_riscv64.h" +#include "dynarmic/backend/riscv64/reg_alloc.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" + +namespace Dynarmic::Backend::RV64 { + +template<> +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + RegAlloc::Realize(Xresult); + + as.LWU(Xresult, offsetof(A32JitState, regs) + sizeof(u32) * static_cast(reg), Xstate); +} + +template<> +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + auto Xvalue = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xvalue); + + // TODO: Detect if Gpr vs Fpr is more appropriate + + as.SW(Xvalue, offsetof(A32JitState, regs) + sizeof(u32) * static_cast(reg), Xstate); +} + +template<> +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + // TODO: Add full implementation + ASSERT(!args[0].IsImmediate() && !args[1].IsImmediate()); + + auto Xnz = ctx.reg_alloc.ReadX(args[0]); + auto Xc = ctx.reg_alloc.ReadX(args[1]); + RegAlloc::Realize(Xnz, Xc); + + as.LWU(Xscratch0, offsetof(A32JitState, cpsr_nzcv), Xstate); + as.LUI(Xscratch1, 0x10000); + as.AND(Xscratch0, Xscratch0, Xscratch1); + as.OR(Xscratch0, Xscratch0, Xnz); + as.OR(Xscratch0, Xscratch0, Xc); + as.SW(Xscratch0, offsetof(A32JitState, cpsr_nzcv), Xstate); +} + +} // namespace Dynarmic::Backend::RV64 diff --git a/src/dynarmic/backend/riscv64/emit_riscv64_data_processing.cpp b/src/dynarmic/backend/riscv64/emit_riscv64_data_processing.cpp new file mode 100644 index 00000000..69153b34 --- /dev/null +++ b/src/dynarmic/backend/riscv64/emit_riscv64_data_processing.cpp @@ -0,0 +1,57 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2024 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include "dynarmic/backend/riscv64/a32_jitstate.h" +#include "dynarmic/backend/riscv64/abi.h" +#include "dynarmic/backend/riscv64/emit_context.h" +#include "dynarmic/backend/riscv64/emit_riscv64.h" +#include "dynarmic/backend/riscv64/reg_alloc.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" + +namespace Dynarmic::Backend::RV64 { + +template<> +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) { + const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + auto& carry_arg = args[2]; + + // TODO: Add full implementation + ASSERT(carry_inst != nullptr); + ASSERT(shift_arg.IsImmediate()); + + auto Xresult = ctx.reg_alloc.WriteX(inst); + auto Xcarry_out = ctx.reg_alloc.WriteX(carry_inst); + auto Xoperand = ctx.reg_alloc.ReadX(operand_arg); + auto Xcarry_in = ctx.reg_alloc.ReadX(carry_arg); + RegAlloc::Realize(Xresult, Xcarry_out, Xoperand, Xcarry_in); + + const u8 shift = shift_arg.GetImmediateU8(); + + if (shift == 0) { + as.ADDW(Xresult, Xoperand, biscuit::zero); + as.ADDW(Xcarry_out, Xcarry_in, biscuit::zero); + } else if (shift < 32) { + as.SRLIW(Xcarry_out, Xoperand, 32 - shift); + as.ANDI(Xcarry_out, Xcarry_out, 1); + as.SLLIW(Xresult, Xoperand, shift); + } else if (shift > 32) { + as.MV(Xresult, biscuit::zero); + as.MV(Xcarry_out, biscuit::zero); + } else { + as.ANDI(Xcarry_out, Xresult, 1); + as.MV(Xresult, biscuit::zero); + } +} + +} // namespace Dynarmic::Backend::RV64 diff --git a/src/dynarmic/backend/riscv64/reg_alloc.cpp b/src/dynarmic/backend/riscv64/reg_alloc.cpp index b80550d6..c1f20659 100644 --- a/src/dynarmic/backend/riscv64/reg_alloc.cpp +++ b/src/dynarmic/backend/riscv64/reg_alloc.cpp @@ -9,10 +9,16 @@ #include #include +#include #include +#include "dynarmic/common/always_false.h" + namespace Dynarmic::Backend::RV64 { +// TODO: We should really move this to biscuit. +void Mov64(biscuit::Assembler& as, biscuit::GPR rd, u64 imm); + constexpr size_t spill_offset = offsetof(StackLayout, spill); constexpr size_t spill_slot_size = sizeof(decltype(StackLayout::spill)::value_type); @@ -73,6 +79,15 @@ bool HostLocInfo::Contains(const IR::Inst* value) const { return std::find(values.begin(), values.end(), value) != values.end(); } +void HostLocInfo::SetupScratchLocation() { + ASSERT(IsCompletelyEmpty()); + realized = true; +} + +bool HostLocInfo::IsCompletelyEmpty() const { + return values.empty() && !locked && !realized && !accumulated_uses && !expected_uses; +} + RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}}; for (size_t i = 0; i < inst->NumArgs(); i++) { @@ -90,11 +105,35 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const { return !!ValueLocation(inst); } -template -u32 RegAlloc::RealizeReadImpl(const IR::Inst* value) { - constexpr HostLoc::Kind required_kind = is_fpr ? HostLoc::Kind::Fpr : HostLoc::Kind::Gpr; +template +u32 RegAlloc::GenerateImmediate(const IR::Value& value) { + // TODO + // ASSERT(value.GetType() != IR::Type::U1); - const auto current_location = ValueLocation(value); + if constexpr (kind == HostLoc::Kind::Gpr) { + const u32 new_location_index = AllocateRegister(gprs, gpr_order); + SpillGpr(new_location_index); + gprs[new_location_index].SetupScratchLocation(); + + Mov64(as, biscuit::GPR{new_location_index}, value.GetImmediateAsU64()); + + return new_location_index; + } else if constexpr (kind == HostLoc::Kind::Fpr) { + ASSERT_FALSE("Unimplemented"); + } else { + static_assert(Common::always_false_v>); + } + + return 0; +} + +template +u32 RegAlloc::RealizeReadImpl(const IR::Value& value) { + if (value.IsImmediate()) { + return GenerateImmediate(value); + } + + const auto current_location = ValueLocation(value.GetInst()); ASSERT(current_location); if (current_location->kind == required_kind) { @@ -105,7 +144,7 @@ u32 RegAlloc::RealizeReadImpl(const IR::Inst* value) { ASSERT(!ValueInfo(*current_location).realized); ASSERT(!ValueInfo(*current_location).locked); - if constexpr (is_fpr) { + if constexpr (required_kind == HostLoc::Kind::Fpr) { const u32 new_location_index = AllocateRegister(fprs, fpr_order); SpillFpr(new_location_index); @@ -124,7 +163,7 @@ u32 RegAlloc::RealizeReadImpl(const IR::Inst* value) { fprs[new_location_index] = std::exchange(ValueInfo(*current_location), {}); fprs[new_location_index].realized = true; return new_location_index; - } else { + } else if constexpr (required_kind == HostLoc::Kind::Gpr) { const u32 new_location_index = AllocateRegister(gprs, gpr_order); SpillGpr(new_location_index); @@ -144,10 +183,12 @@ u32 RegAlloc::RealizeReadImpl(const IR::Inst* value) { gprs[new_location_index] = std::exchange(ValueInfo(*current_location), {}); gprs[new_location_index].realized = true; return new_location_index; + } else { + static_assert(Common::always_false_v>); } } -template +template u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value) { ASSERT(!ValueLocation(value)); @@ -159,23 +200,25 @@ u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value) { info.expected_uses += value->UseCount(); }; - if constexpr (is_fpr) { + if constexpr (required_kind == HostLoc::Kind::Fpr) { const u32 new_location_index = AllocateRegister(fprs, fpr_order); SpillFpr(new_location_index); setup_location(fprs[new_location_index]); return new_location_index; - } else { + } else if constexpr (required_kind == HostLoc::Kind::Gpr) { const u32 new_location_index = AllocateRegister(gprs, gpr_order); SpillGpr(new_location_index); setup_location(gprs[new_location_index]); return new_location_index; + } else { + static_assert(Common::always_false_v>); } } -template u32 RegAlloc::RealizeReadImpl(const IR::Inst* value); -template u32 RegAlloc::RealizeReadImpl(const IR::Inst* value); -template u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value); -template u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value); +template u32 RegAlloc::RealizeReadImpl(const IR::Value& value); +template u32 RegAlloc::RealizeReadImpl(const IR::Value& value); +template u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value); +template u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value); void RegAlloc::Unlock(HostLoc host_loc) { HostLocInfo& info = ValueInfo(host_loc); diff --git a/src/dynarmic/backend/riscv64/reg_alloc.h b/src/dynarmic/backend/riscv64/reg_alloc.h index 19549d44..5b38c482 100644 --- a/src/dynarmic/backend/riscv64/reg_alloc.h +++ b/src/dynarmic/backend/riscv64/reg_alloc.h @@ -64,35 +64,40 @@ private: template struct RAReg { public: - static constexpr bool is_fpr = std::is_base_of_v; + static constexpr HostLoc::Kind kind = std::is_base_of_v + ? HostLoc::Kind::Fpr + : HostLoc::Kind::Gpr; operator T() const { return *reg; } T operator*() const { return *reg; } + const T* operator->() const { return &*reg; } + ~RAReg(); private: friend class RegAlloc; - explicit RAReg(RegAlloc& reg_alloc, bool write, const IR::Inst* value) - : reg_alloc{reg_alloc}, write{write}, value{value} {} + explicit RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value); void Realize(); RegAlloc& reg_alloc; bool write; - const IR::Inst* value; + const IR::Value value; std::optional reg; }; struct HostLocInfo final { std::vector values; - bool locked = false; + size_t locked = 0; bool realized = false; size_t accumulated_uses = 0; size_t expected_uses = 0; bool Contains(const IR::Inst*) const; + void SetupScratchLocation(); + bool IsCompletelyEmpty() const; }; class RegAlloc { @@ -105,11 +110,11 @@ public: ArgumentInfo GetArgumentInfo(IR::Inst* inst); bool IsValueLive(IR::Inst* inst) const; - auto ReadX(Argument& arg) { return RAReg{*this, false, PreReadImpl(arg.value)}; } - auto ReadD(Argument& arg) { return RAReg{*this, false, PreReadImpl(arg.value)}; } + auto ReadX(Argument& arg) { return RAReg{*this, false, arg.value}; } + auto ReadD(Argument& arg) { return RAReg{*this, false, arg.value}; } - auto WriteX(IR::Inst* inst) { return RAReg{*this, true, inst}; } - auto WriteD(IR::Inst* inst) { return RAReg{*this, true, inst}; } + auto WriteX(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } + auto WriteD(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } void SpillAll(); @@ -123,14 +128,11 @@ private: template friend struct RAReg; - const IR::Inst* PreReadImpl(const IR::Value& value) { - ValueInfo(value.GetInst()).locked = true; - return value.GetInst(); - } - - template - u32 RealizeReadImpl(const IR::Inst* value); - template + template + u32 GenerateImmediate(const IR::Value& value); + template + u32 RealizeReadImpl(const IR::Value& value); + template u32 RealizeWriteImpl(const IR::Inst* value); void Unlock(HostLoc host_loc); @@ -154,16 +156,35 @@ private: mutable std::mt19937 rand_gen; }; +template +RAReg::RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value) + : reg_alloc{reg_alloc}, write{write}, value{value} { + if (!write && !value.IsImmediate()) { + reg_alloc.ValueInfo(value.GetInst()).locked++; + } +} + template RAReg::~RAReg() { - if (reg) { - reg_alloc.Unlock(HostLoc{is_fpr ? HostLoc::Kind::Fpr : HostLoc::Kind::Gpr, reg->Index()}); + if (value.IsImmediate()) { + if (reg) { + // Immediate in scratch register + HostLocInfo& info = reg_alloc.ValueInfo(HostLoc{kind, reg->Index()}); + info.locked--; + info.realized = false; + } + } else { + HostLocInfo& info = reg_alloc.ValueInfo(value.GetInst()); + info.locked--; + if (reg) { + info.realized = false; + } } } template void RAReg::Realize() { - reg = T{write ? reg_alloc.RealizeWriteImpl(value) : reg_alloc.RealizeReadImpl(value)}; + reg = T{write ? reg_alloc.RealizeWriteImpl(value.GetInst()) : reg_alloc.RealizeReadImpl(value)}; } } // namespace Dynarmic::Backend::RV64