backend/rv64: Initial implementation of register allocator

This commit is contained in:
Yang Liu 2024-01-10 11:19:13 +08:00 committed by Merry
parent c47dacb1de
commit 62ff78d527
9 changed files with 628 additions and 10 deletions

View file

@ -402,6 +402,16 @@ endif()
if ("riscv" IN_LIST ARCHITECTURE)
target_link_libraries(dynarmic PRIVATE biscuit::biscuit)
target_sources(dynarmic PRIVATE
backend/riscv64/abi.h
backend/riscv64/emit_context.h
backend/riscv64/emit_riscv64.cpp
backend/riscv64/emit_riscv64.h
backend/riscv64/reg_alloc.cpp
backend/riscv64/reg_alloc.h
backend/riscv64/stack_layout.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
backend/riscv64/a32_address_space.cpp
@ -411,8 +421,6 @@ if ("riscv" IN_LIST ARCHITECTURE)
backend/riscv64/a32_jitstate.cpp
backend/riscv64/a32_jitstate.h
backend/riscv64/code_block.h
backend/riscv64/emit_riscv64.cpp
backend/riscv64/emit_riscv64.h
)
endif()

View file

@ -7,6 +7,7 @@
#include <mcl/assert.hpp>
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h"
@ -80,6 +81,8 @@ void A32AddressSpace::EmitPrelude() {
as.FSD(FPR{i}, 32 + i * 8, sp);
}
as.ADDI(Xstate, a1, 0);
as.ADDI(Xhalt, a2, 0);
as.JALR(x0, 0, a0);
prelude_info.return_from_run_code = GetCursorPtr<CodePtr>();
@ -112,7 +115,7 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) {
ClearCache();
}
EmittedBlockInfo block_info = EmitRV64(as, std::move(block));
EmittedBlockInfo block_info = EmitRV64(as, std::move(block), {});
Link(block_info);
return block_info;

View file

@ -0,0 +1,20 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <biscuit/registers.hpp>
namespace Dynarmic::Backend::RV64 {
constexpr biscuit::GPR Xstate{27};
constexpr biscuit::GPR Xhalt{26};
constexpr biscuit::GPR Xscratch0{30}, Xscratch1{31};
constexpr std::initializer_list<u32> GPR_ORDER{8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 5, 6, 7, 28, 29, 10, 11, 12, 13, 14, 15, 16, 17};
constexpr std::initializer_list<u32> FPR_ORDER{8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,26 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
namespace Dynarmic::IR {
class Block;
} // namespace Dynarmic::IR
namespace Dynarmic::Backend::RV64 {
struct EmitConfig;
struct EmitContext {
IR::Block& block;
RegAlloc& reg_alloc;
const EmitConfig& emit_conf;
EmittedBlockInfo& ebi;
};
} // namespace Dynarmic::Backend::RV64

View file

@ -5,25 +5,98 @@
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include <bit>
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include <mcl/bit/bit_field.hpp>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
EmittedBlockInfo EmitRV64(biscuit::Assembler& as, [[maybe_unused]] IR::Block block) {
// TODO: We should really move this to biscuit.
static void Mov64(biscuit::Assembler& as, biscuit::GPR rd, u64 imm) {
if (mcl::bit::sign_extend<32>(imm) == imm) {
as.LI(rd, static_cast<u32>(imm));
return;
}
// For 64-bit imm, a sequence of up to 8 instructions (i.e. LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) is emitted.
// In the following, imm is processed from LSB to MSB while instruction emission is performed from MSB to LSB by calling Mov64 recursively.
// In each recursion, the lowest 12 bits are removed from imm and the optimal shift amount is calculated.
// Then, the remaining part of imm is processed recursively and as.LI() get called as soon as it fits into 32 bits.
s32 lo12 = static_cast<s32>(mcl::bit::sign_extend<12>(imm));
/* Add 0x800 to cancel out the signed extension of ADDI. */
u64 hi52 = (imm + 0x800) >> 12;
int shift = 12 + std::countr_zero(hi52);
hi52 = mcl::bit::sign_extend(shift, hi52 >> (shift - 12));
Mov64(as, rd, hi52);
as.SLLI(rd, rd, shift);
if (lo12 != 0) {
as.ADDI(rd, rd, lo12);
}
}
template<IR::Opcode op>
void EmitIR(biscuit::Assembler&, EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unimplemented opcode {}", op);
}
template<>
void EmitIR<IR::Opcode::GetCarryFromOp>(biscuit::Assembler&, EmitContext& ctx, IR::Inst* inst) {
ASSERT(ctx.reg_alloc.IsValueLive(inst));
}
EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitConfig& emit_conf) {
using namespace biscuit;
EmittedBlockInfo ebi;
RegAlloc reg_alloc{as, GPR_ORDER, FPR_ORDER};
EmitContext ctx{block, reg_alloc, emit_conf, ebi};
ebi.entry_point = reinterpret_cast<CodePtr>(as.GetCursorPointer());
as.ADDIW(a0, zero, 8);
as.SW(a0, offsetof(A32JitState, regs) + 0 * sizeof(u32), a1);
for (auto iter = block.begin(); iter != block.end(); ++iter) {
IR::Inst* inst = &*iter;
as.ADDIW(a0, zero, 2);
as.SW(a0, offsetof(A32JitState, regs) + 1 * sizeof(u32), a1);
as.SW(a0, offsetof(A32JitState, regs) + 15 * sizeof(u32), a1);
switch (inst->GetOpcode()) {
#define OPCODE(name, type, ...) \
case IR::Opcode::name: \
EmitIR<IR::Opcode::name>(as, ctx, inst); \
break;
#define A32OPC(name, type, ...) \
case IR::Opcode::A32##name: \
EmitIR<IR::Opcode::A32##name>(as, ctx, inst); \
break;
#define A64OPC(name, type, ...) \
case IR::Opcode::A64##name: \
EmitIR<IR::Opcode::A64##name>(as, ctx, inst); \
break;
#include "dynarmic/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
default:
ASSERT_FALSE("Invalid opcode: {}", inst->GetOpcode());
break;
}
}
// TODO: Add Cycles
// TODO: Emit Terminal
const auto term = block.GetTerminal();
const IR::Term::LinkBlock* link_block_term = boost::get<IR::Term::LinkBlock>(&term);
ASSERT(link_block_term);
Mov64(as, Xscratch0, link_block_term->next.Value());
as.SD(Xscratch0, offsetof(A32JitState, regs) + sizeof(u32) * 15, Xstate);
ptrdiff_t offset = reinterpret_cast<CodePtr>(as.GetCursorPointer()) - ebi.entry_point;
ebi.relocations.emplace_back(Relocation{offset, LinkTarget::ReturnFromRunCode});

View file

@ -15,6 +15,8 @@ class Assembler;
namespace Dynarmic::IR {
class Block;
enum class Opcode;
class Inst;
} // namespace Dynarmic::IR
namespace Dynarmic::Backend::RV64 {
@ -36,6 +38,13 @@ struct EmittedBlockInfo {
std::vector<Relocation> relocations;
};
EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block);
struct EmitConfig {};
struct EmitContext;
template<IR::Opcode op>
void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst);
EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitConfig& emit_conf);
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,280 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include <algorithm>
#include <array>
#include <mcl/assert.hpp>
#include <mcl/stdint.hpp>
namespace Dynarmic::Backend::RV64 {
constexpr size_t spill_offset = offsetof(StackLayout, spill);
constexpr size_t spill_slot_size = sizeof(decltype(StackLayout::spill)::value_type);
static bool IsValuelessType(IR::Type type) {
switch (type) {
case IR::Type::Table:
return true;
default:
return false;
}
}
IR::Type Argument::GetType() const {
return value.GetType();
}
bool Argument::IsImmediate() const {
return value.IsImmediate();
}
bool Argument::GetImmediateU1() const {
return value.GetU1();
}
u8 Argument::GetImmediateU8() const {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100);
return u8(imm);
}
u16 Argument::GetImmediateU16() const {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x10000);
return u16(imm);
}
u32 Argument::GetImmediateU32() const {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100000000);
return u32(imm);
}
u64 Argument::GetImmediateU64() const {
return value.GetImmediateAsU64();
}
IR::Cond Argument::GetImmediateCond() const {
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
return value.GetCond();
}
IR::AccType Argument::GetImmediateAccType() const {
ASSERT(IsImmediate() && GetType() == IR::Type::AccType);
return value.GetAccType();
}
bool HostLocInfo::Contains(const IR::Inst* value) const {
return std::find(values.begin(), values.end(), value) != values.end();
}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value arg = inst->GetArg(i);
ret[i].value = arg;
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
ValueInfo(arg.GetInst()).accumulated_uses++;
}
}
return ret;
}
bool RegAlloc::IsValueLive(IR::Inst* inst) const {
return !!ValueLocation(inst);
}
template<bool is_fpr>
u32 RegAlloc::RealizeReadImpl(const IR::Inst* value) {
constexpr HostLoc::Kind required_kind = is_fpr ? HostLoc::Kind::Fpr : HostLoc::Kind::Gpr;
const auto current_location = ValueLocation(value);
ASSERT(current_location);
if (current_location->kind == required_kind) {
ValueInfo(*current_location).realized = true;
return current_location->index;
}
ASSERT(!ValueInfo(*current_location).realized);
ASSERT(!ValueInfo(*current_location).locked);
if constexpr (is_fpr) {
const u32 new_location_index = AllocateRegister(fprs, fpr_order);
SpillFpr(new_location_index);
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
as.FMV_D_X(biscuit::FPR{new_location_index}, biscuit::GPR(current_location->index));
break;
case HostLoc::Kind::Fpr:
ASSERT_FALSE("Logic error");
break;
case HostLoc::Kind::Spill:
as.FLD(biscuit::FPR{new_location_index}, spill_offset + new_location_index * spill_slot_size, biscuit::sp);
break;
}
fprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
fprs[new_location_index].realized = true;
return new_location_index;
} else {
const u32 new_location_index = AllocateRegister(gprs, gpr_order);
SpillGpr(new_location_index);
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
ASSERT_FALSE("Logic error");
break;
case HostLoc::Kind::Fpr:
as.FMV_X_D(biscuit::GPR(new_location_index), biscuit::FPR{current_location->index});
// ASSERT size fits
break;
case HostLoc::Kind::Spill:
as.LD(biscuit::GPR{new_location_index}, spill_offset + new_location_index * spill_slot_size, biscuit::sp);
break;
}
gprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
gprs[new_location_index].realized = true;
return new_location_index;
}
}
template<bool is_fpr>
u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value) {
ASSERT(!ValueLocation(value));
const auto setup_location = [&](HostLocInfo& info) {
info = {};
info.values.emplace_back(value);
info.locked = true;
info.realized = true;
info.expected_uses += value->UseCount();
};
if constexpr (is_fpr) {
const u32 new_location_index = AllocateRegister(fprs, fpr_order);
SpillFpr(new_location_index);
setup_location(fprs[new_location_index]);
return new_location_index;
} else {
const u32 new_location_index = AllocateRegister(gprs, gpr_order);
SpillGpr(new_location_index);
setup_location(gprs[new_location_index]);
return new_location_index;
}
}
template u32 RegAlloc::RealizeReadImpl<true>(const IR::Inst* value);
template u32 RegAlloc::RealizeReadImpl<false>(const IR::Inst* value);
template u32 RegAlloc::RealizeWriteImpl<true>(const IR::Inst* value);
template u32 RegAlloc::RealizeWriteImpl<false>(const IR::Inst* value);
void RegAlloc::Unlock(HostLoc host_loc) {
HostLocInfo& info = ValueInfo(host_loc);
if (!info.realized) {
return;
}
if (info.accumulated_uses == info.expected_uses) {
info = {};
} else {
info.realized = false;
info.locked = false;
}
}
u32 RegAlloc::AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<u32>& order) const {
const auto empty = std::find_if(order.begin(), order.end(), [&](u32 i) { return regs[i].values.empty() && !regs[i].locked; });
if (empty != order.end()) {
return *empty;
}
std::vector<u32> candidates;
std::copy_if(order.begin(), order.end(), std::back_inserter(candidates), [&](u32 i) { return !regs[i].locked; });
// TODO: LRU
std::uniform_int_distribution<size_t> dis{0, candidates.size() - 1};
return candidates[dis(rand_gen)];
}
void RegAlloc::SpillGpr(u32 index) {
ASSERT(!gprs[index].locked && !gprs[index].realized);
if (gprs[index].values.empty()) {
return;
}
const u32 new_location_index = FindFreeSpill();
as.SD(biscuit::GPR{index}, spill_offset + new_location_index * spill_slot_size, biscuit::sp);
spills[new_location_index] = std::exchange(gprs[index], {});
}
void RegAlloc::SpillFpr(u32 index) {
ASSERT(!fprs[index].locked && !fprs[index].realized);
if (fprs[index].values.empty()) {
return;
}
const u32 new_location_index = FindFreeSpill();
as.FSD(biscuit::FPR{index}, spill_offset + new_location_index * spill_slot_size, biscuit::sp);
spills[new_location_index] = std::exchange(fprs[index], {});
}
u32 RegAlloc::FindFreeSpill() const {
const auto iter = std::find_if(spills.begin(), spills.end(), [](const HostLocInfo& info) { return info.values.empty(); });
ASSERT_MSG(iter != spills.end(), "All spill locations are full");
return static_cast<u32>(iter - spills.begin());
}
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
const auto contains_value = [value](const HostLocInfo& info) {
return info.Contains(value);
};
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) {
return HostLoc{HostLoc::Kind::Gpr, static_cast<u32>(iter - gprs.begin())};
}
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != fprs.end()) {
return HostLoc{HostLoc::Kind::Fpr, static_cast<u32>(iter - fprs.begin())};
}
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != spills.end()) {
return HostLoc{HostLoc::Kind::Spill, static_cast<u32>(iter - spills.begin())};
}
return std::nullopt;
}
HostLocInfo& RegAlloc::ValueInfo(HostLoc host_loc) {
switch (host_loc.kind) {
case HostLoc::Kind::Gpr:
return gprs[static_cast<size_t>(host_loc.index)];
case HostLoc::Kind::Fpr:
return fprs[static_cast<size_t>(host_loc.index)];
case HostLoc::Kind::Spill:
return spills[static_cast<size_t>(host_loc.index)];
}
ASSERT_FALSE("RegAlloc::ValueInfo: Invalid HostLoc::Kind");
}
HostLocInfo& RegAlloc::ValueInfo(const IR::Inst* value) {
const auto contains_value = [value](const HostLocInfo& info) {
return info.Contains(value);
};
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value)) {
return *iter;
}
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value)) {
return *iter;
}
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value)) {
return *iter;
}
ASSERT_FALSE("RegAlloc::ValueInfo: Value not found");
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,169 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include <optional>
#include <random>
#include <utility>
#include <vector>
#include <biscuit/assembler.hpp>
#include <biscuit/registers.hpp>
#include <mcl/assert.hpp>
#include <mcl/stdint.hpp>
#include <mcl/type_traits/is_instance_of_template.hpp>
#include <tsl/robin_set.h>
#include "dynarmic/backend/riscv64/stack_layout.h"
#include "dynarmic/ir/cond.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/value.h"
namespace Dynarmic::Backend::RV64 {
class RegAlloc;
struct HostLoc {
enum class Kind {
Gpr,
Fpr,
Spill,
} kind;
u32 index;
};
struct Argument {
public:
using copyable_reference = std::reference_wrapper<Argument>;
IR::Type GetType() const;
bool IsImmediate() const;
bool GetImmediateU1() const;
u8 GetImmediateU8() const;
u16 GetImmediateU16() const;
u32 GetImmediateU32() const;
u64 GetImmediateU64() const;
IR::Cond GetImmediateCond() const;
IR::AccType GetImmediateAccType() const;
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc)
: reg_alloc{reg_alloc} {}
bool allocated = false;
RegAlloc& reg_alloc;
IR::Value value;
};
template<typename T>
struct RAReg {
public:
static constexpr bool is_fpr = std::is_base_of_v<biscuit::FPR, T>;
operator T() const { return *reg; }
T operator*() const { return *reg; }
~RAReg();
private:
friend class RegAlloc;
explicit RAReg(RegAlloc& reg_alloc, bool write, const IR::Inst* value)
: reg_alloc{reg_alloc}, write{write}, value{value} {}
void Realize();
RegAlloc& reg_alloc;
bool write;
const IR::Inst* value;
std::optional<T> reg;
};
struct HostLocInfo final {
std::vector<const IR::Inst*> values;
bool locked = false;
bool realized = false;
size_t accumulated_uses = 0;
size_t expected_uses = 0;
bool Contains(const IR::Inst*) const;
};
class RegAlloc {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
explicit RegAlloc(biscuit::Assembler& as, std::vector<u32> gpr_order, std::vector<u32> fpr_order)
: as{as}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {}
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
bool IsValueLive(IR::Inst* inst) const;
auto ReadX(Argument& arg) { return RAReg<biscuit::GPR>{*this, false, PreReadImpl(arg.value)}; }
auto ReadD(Argument& arg) { return RAReg<biscuit::FPR>{*this, false, PreReadImpl(arg.value)}; }
auto WriteX(IR::Inst* inst) { return RAReg<biscuit::GPR>{*this, true, inst}; }
auto WriteD(IR::Inst* inst) { return RAReg<biscuit::FPR>{*this, true, inst}; }
void SpillAll();
template<typename... Ts>
static void Realize(Ts&... rs) {
static_assert((mcl::is_instance_of_template<RAReg, Ts>() && ...));
(rs.Realize(), ...);
}
private:
template<typename>
friend struct RAReg;
const IR::Inst* PreReadImpl(const IR::Value& value) {
ValueInfo(value.GetInst()).locked = true;
return value.GetInst();
}
template<bool is_fpr>
u32 RealizeReadImpl(const IR::Inst* value);
template<bool is_fpr>
u32 RealizeWriteImpl(const IR::Inst* value);
void Unlock(HostLoc host_loc);
u32 AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<u32>& order) const;
void SpillGpr(u32 index);
void SpillFpr(u32 index);
u32 FindFreeSpill() const;
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
HostLocInfo& ValueInfo(HostLoc host_loc);
HostLocInfo& ValueInfo(const IR::Inst* value);
biscuit::Assembler& as;
std::vector<u32> gpr_order;
std::vector<u32> fpr_order;
std::array<HostLocInfo, 32> gprs;
std::array<HostLocInfo, 32> fprs;
std::array<HostLocInfo, SpillCount> spills;
mutable std::mt19937 rand_gen;
};
template<typename T>
RAReg<T>::~RAReg() {
if (reg) {
reg_alloc.Unlock(HostLoc{is_fpr ? HostLoc::Kind::Fpr : HostLoc::Kind::Gpr, reg->Index()});
}
}
template<typename T>
void RAReg<T>::Realize() {
reg = T{write ? reg_alloc.RealizeWriteImpl<is_fpr>(value) : reg_alloc.RealizeReadImpl<is_fpr>(value)};
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,30 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include <mcl/stdint.hpp>
namespace Dynarmic::Backend::RV64 {
constexpr size_t SpillCount = 64;
struct alignas(16) StackLayout {
s64 cycles_remaining;
s64 cycles_to_run;
std::array<u64, SpillCount> spill;
u32 save_host_fpcr;
u32 save_host_fpsr;
bool check_bit;
};
static_assert(sizeof(StackLayout) % 16 == 0);
} // namespace Dynarmic::Backend::RV64