diff --git a/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/backend/arm64/reg_alloc.cpp index 28be2192..9ea0aff1 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -93,7 +93,6 @@ bool HostLocInfo::Contains(const IR::Inst* value) const { void HostLocInfo::SetupScratchLocation() { ASSERT(IsCompletelyEmpty()); - locked++; realized = true; } @@ -101,7 +100,6 @@ void HostLocInfo::SetupLocation(const IR::Inst* value) { ASSERT(IsCompletelyEmpty()); values.clear(); values.emplace_back(value); - locked++; realized = true; uses_this_inst = 0; accumulated_uses = 0; @@ -112,8 +110,8 @@ bool HostLocInfo::IsCompletelyEmpty() const { return values.empty() && !locked && !realized && !accumulated_uses && !expected_uses && !uses_this_inst; } -bool HostLocInfo::IsImmediatelyAllocatable() const { - return values.empty() && !locked; +bool HostLocInfo::MaybeAllocatable() const { + return !locked && !realized; } bool HostLocInfo::IsOneRemainingUse() const { @@ -169,6 +167,7 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optional, 4> args{arg0, arg1, arg2, arg3}; for (int i = 0; i < 4; i++) { if (args[i]) { + ASSERT(gprs[i].IsCompletelyEmpty()); LoadCopyInto(args[i]->get().value, oaknut::XReg{i}); } } @@ -339,21 +338,43 @@ int RegAlloc::RealizeWriteImpl(const IR::Inst* value) { } } +template +int RegAlloc::RealizeReadWriteImpl(const IR::Value& read_value, const IR::Inst* write_value) { + // TODO: Move elimination + + const int write_loc = RealizeWriteImpl(write_value); + + if constexpr (kind == HostLoc::Kind::Gpr) { + LoadCopyInto(read_value, oaknut::XReg{write_loc}); + return write_loc; + } else if constexpr (kind == HostLoc::Kind::Fpr) { + LoadCopyInto(read_value, oaknut::QReg{write_loc}); + return write_loc; + } else if constexpr (kind == HostLoc::Kind::Flags) { + ASSERT_FALSE("Incorrect function for ReadWrite of flags"); + } else { + static_assert(kind == HostLoc::Kind::Fpr || kind == HostLoc::Kind::Gpr || kind == HostLoc::Kind::Flags); + } +} + template int RegAlloc::RealizeReadImpl(const IR::Value& value); template int RegAlloc::RealizeReadImpl(const IR::Value& value); template int RegAlloc::RealizeReadImpl(const IR::Value& value); template int RegAlloc::RealizeWriteImpl(const IR::Inst* value); template int RegAlloc::RealizeWriteImpl(const IR::Inst* value); template int RegAlloc::RealizeWriteImpl(const IR::Inst* value); +template int RegAlloc::RealizeReadWriteImpl(const IR::Value&, const IR::Inst*); +template int RegAlloc::RealizeReadWriteImpl(const IR::Value&, const IR::Inst*); +template int RegAlloc::RealizeReadWriteImpl(const IR::Value&, const IR::Inst*); int RegAlloc::AllocateRegister(const std::array& regs, const std::vector& order) const { - const auto empty = std::find_if(order.begin(), order.end(), [&](int i) { return regs[i].IsImmediatelyAllocatable(); }); + const auto empty = std::find_if(order.begin(), order.end(), [&](int i) { return regs[i].IsCompletelyEmpty(); }); if (empty != order.end()) { return *empty; } std::vector candidates; - std::copy_if(order.begin(), order.end(), std::back_inserter(candidates), [&](int i) { return !regs[i].locked; }); + std::copy_if(order.begin(), order.end(), std::back_inserter(candidates), [&](int i) { return regs[i].MaybeAllocatable(); }); // TODO: LRU std::uniform_int_distribution dis{0, candidates.size() - 1}; @@ -405,7 +426,6 @@ void RegAlloc::ReadWriteFlags(Argument& read, IR::Inst* write) { if (write) { flags.SetupLocation(write); - flags.locked--; flags.realized = false; } } @@ -435,7 +455,6 @@ void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::XReg reg) { const auto current_location = ValueLocation(value.GetInst()); ASSERT(current_location); - ASSERT(gprs[reg.index()].IsCompletelyEmpty()); switch (current_location->kind) { case HostLoc::Kind::Gpr: code.MOV(reg, oaknut::XReg{current_location->index}); @@ -453,6 +472,32 @@ void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::XReg reg) { } } +void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::QReg reg) { + if (value.IsImmediate()) { + code.MOV(Xscratch0, value.GetImmediateAsU64()); + code.FMOV(reg.toD(), Xscratch0); + return; + } + + const auto current_location = ValueLocation(value.GetInst()); + ASSERT(current_location); + switch (current_location->kind) { + case HostLoc::Kind::Gpr: + code.FMOV(reg.toD(), oaknut::XReg{current_location->index}); + break; + case HostLoc::Kind::Fpr: + code.MOV(reg.B16(), oaknut::QReg{current_location->index}.B16()); + break; + case HostLoc::Kind::Spill: + // TODO: Minimize move size to max value width + code.LDR(reg, SP, spill_offset + current_location->index * spill_slot_size); + break; + case HostLoc::Kind::Flags: + ASSERT_FALSE("Moving from flags into fprs is not currently supported"); + break; + } +} + std::optional RegAlloc::ValueLocation(const IR::Inst* value) const { const auto contains_value = [value](const HostLocInfo& info) { return info.Contains(value); }; diff --git a/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/backend/arm64/reg_alloc.h index 7be78c36..1111dfb0 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/backend/arm64/reg_alloc.h @@ -26,7 +26,7 @@ namespace Dynarmic::Backend::Arm64 { class FpsrManager; class RegAlloc; -struct HostLoc { +struct HostLoc final { enum class Kind { Gpr, Fpr, @@ -36,7 +36,13 @@ struct HostLoc { int index; }; -struct Argument { +enum RWType { + Read, + Write, + ReadWrite, +}; + +struct Argument final { public: using copyable_reference = std::reference_wrapper; @@ -68,7 +74,7 @@ private: IR::Value value; }; -struct FlagsTag { +struct FlagsTag final { private: template friend struct RAReg; @@ -78,7 +84,7 @@ private: }; template -struct RAReg { +struct RAReg final { public: static constexpr HostLoc::Kind kind = !std::is_same_v ? std::is_base_of_v @@ -103,7 +109,7 @@ public: private: friend class RegAlloc; - explicit RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value); + explicit RAReg(RegAlloc& reg_alloc, RWType rw, const IR::Value& read_value, const IR::Inst* write_value); RAReg(const RAReg&) = delete; RAReg& operator=(const RAReg&) = delete; @@ -113,12 +119,13 @@ private: void Realize(); RegAlloc& reg_alloc; - bool write; - const IR::Value value; + RWType rw; + const IR::Value read_value; + const IR::Inst* write_value; std::optional reg; }; -struct HostLocInfo { +struct HostLocInfo final { std::vector values; size_t locked = 0; bool realized = false; @@ -130,12 +137,12 @@ struct HostLocInfo { void SetupScratchLocation(); void SetupLocation(const IR::Inst*); bool IsCompletelyEmpty() const; - bool IsImmediatelyAllocatable() const; + bool MaybeAllocatable() const; bool IsOneRemainingUse() const; void UpdateUses(); }; -class RegAlloc { +class RegAlloc final { public: using ArgumentInfo = std::array; @@ -145,14 +152,14 @@ public: ArgumentInfo GetArgumentInfo(IR::Inst* inst); bool IsValueLive(IR::Inst* inst) const; - auto ReadX(Argument& arg) { return RAReg{*this, false, arg.value}; } - auto ReadW(Argument& arg) { return RAReg{*this, false, arg.value}; } + auto ReadX(Argument& arg) { return RAReg{*this, RWType::Read, arg.value, nullptr}; } + auto ReadW(Argument& arg) { return RAReg{*this, RWType::Read, arg.value, nullptr}; } - auto ReadQ(Argument& arg) { return RAReg{*this, false, arg.value}; } - auto ReadD(Argument& arg) { return RAReg{*this, false, arg.value}; } - auto ReadS(Argument& arg) { return RAReg{*this, false, arg.value}; } - auto ReadH(Argument& arg) { return RAReg{*this, false, arg.value}; } - auto ReadB(Argument& arg) { return RAReg{*this, false, arg.value}; } + auto ReadQ(Argument& arg) { return RAReg{*this, RWType::Read, arg.value, nullptr}; } + auto ReadD(Argument& arg) { return RAReg{*this, RWType::Read, arg.value, nullptr}; } + auto ReadS(Argument& arg) { return RAReg{*this, RWType::Read, arg.value, nullptr}; } + auto ReadH(Argument& arg) { return RAReg{*this, RWType::Read, arg.value, nullptr}; } + auto ReadB(Argument& arg) { return RAReg{*this, RWType::Read, arg.value, nullptr}; } template auto ReadReg(Argument& arg) { @@ -182,16 +189,16 @@ public: } } - auto WriteX(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } - auto WriteW(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } + auto WriteX(IR::Inst* inst) { return RAReg{*this, RWType::Write, {}, inst}; } + auto WriteW(IR::Inst* inst) { return RAReg{*this, RWType::Write, {}, inst}; } - auto WriteQ(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } - auto WriteD(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } - auto WriteS(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } - auto WriteH(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } - auto WriteB(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } + auto WriteQ(IR::Inst* inst) { return RAReg{*this, RWType::Write, {}, inst}; } + auto WriteD(IR::Inst* inst) { return RAReg{*this, RWType::Write, {}, inst}; } + auto WriteS(IR::Inst* inst) { return RAReg{*this, RWType::Write, {}, inst}; } + auto WriteH(IR::Inst* inst) { return RAReg{*this, RWType::Write, {}, inst}; } + auto WriteB(IR::Inst* inst) { return RAReg{*this, RWType::Write, {}, inst}; } - auto WriteFlags(IR::Inst* inst) { return RAReg{*this, true, IR::Value{inst}}; } + auto WriteFlags(IR::Inst* inst) { return RAReg{*this, RWType::Write, {}, inst}; } template auto WriteReg(IR::Inst* inst) { @@ -221,6 +228,43 @@ public: } } + auto ReadWriteX(Argument& arg, const IR::Inst* inst) { return RAReg{*this, RWType::ReadWrite, arg.value, inst}; } + auto ReadWriteW(Argument& arg, const IR::Inst* inst) { return RAReg{*this, RWType::ReadWrite, arg.value, inst}; } + + auto ReadWriteQ(Argument& arg, const IR::Inst* inst) { return RAReg{*this, RWType::ReadWrite, arg.value, inst}; } + auto ReadWriteD(Argument& arg, const IR::Inst* inst) { return RAReg{*this, RWType::ReadWrite, arg.value, inst}; } + auto ReadWriteS(Argument& arg, const IR::Inst* inst) { return RAReg{*this, RWType::ReadWrite, arg.value, inst}; } + auto ReadWriteH(Argument& arg, const IR::Inst* inst) { return RAReg{*this, RWType::ReadWrite, arg.value, inst}; } + auto ReadWriteB(Argument& arg, const IR::Inst* inst) { return RAReg{*this, RWType::ReadWrite, arg.value, inst}; } + + template + auto ReadWriteReg(Argument& arg, const IR::Inst* inst) { + if constexpr (size == 64) { + return ReadWriteX(arg, inst); + } else if constexpr (size == 32) { + return ReadWriteW(arg, inst); + } else { + ASSERT_FALSE("Invalid size to ReadWriteReg {}", size); + } + } + + template + auto ReadWriteVec(Argument& arg, const IR::Inst* inst) { + if constexpr (size == 128) { + return ReadWriteQ(arg, inst); + } else if constexpr (size == 64) { + return ReadWriteD(arg, inst); + } else if constexpr (size == 32) { + return ReadWriteS(arg, inst); + } else if constexpr (size == 16) { + return ReadWriteH(arg, inst); + } else if constexpr (size == 8) { + return ReadWriteB(arg, inst); + } else { + ASSERT_FALSE("Invalid size to ReadWriteVec {}", size); + } + } + void PrepareForCall(IR::Inst* result = nullptr, std::optional arg0 = {}, std::optional arg1 = {}, @@ -254,6 +298,8 @@ private: int RealizeReadImpl(const IR::Value& value); template int RealizeWriteImpl(const IR::Inst* value); + template + int RealizeReadWriteImpl(const IR::Value& read_value, const IR::Inst* write_value); int AllocateRegister(const std::array& regs, const std::vector& order) const; void SpillGpr(int index); @@ -261,6 +307,7 @@ private: int FindFreeSpill() const; void LoadCopyInto(const IR::Value& value, oaknut::XReg reg); + void LoadCopyInto(const IR::Value& value, oaknut::QReg reg); std::optional ValueLocation(const IR::Inst* value) const; HostLocInfo& ValueInfo(HostLoc host_loc); @@ -280,34 +327,38 @@ private: }; template -RAReg::RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value) - : reg_alloc{reg_alloc}, write{write}, value{value} { - if (!write && !value.IsImmediate()) { - reg_alloc.ValueInfo(value.GetInst()).locked++; +RAReg::RAReg(RegAlloc& reg_alloc, RWType rw, const IR::Value& read_value, const IR::Inst* write_value) + : reg_alloc{reg_alloc}, rw{rw}, read_value{read_value}, write_value{write_value} { + if (rw != RWType::Write && !read_value.IsImmediate()) { + reg_alloc.ValueInfo(read_value.GetInst()).locked++; } } template RAReg::~RAReg() { - if (value.IsImmediate()) { - if (reg) { - // Immediate in scratch register - HostLocInfo& info = reg_alloc.ValueInfo(HostLoc{kind, reg->index()}); - info.locked--; - info.realized = false; - } - } else { - HostLocInfo& info = reg_alloc.ValueInfo(value.GetInst()); - info.locked--; - if (reg) { - reg_alloc.ValueInfo(HostLoc{kind, reg->index()}).realized = false; - } + if (rw != RWType::Write && !read_value.IsImmediate()) { + reg_alloc.ValueInfo(read_value.GetInst()).locked--; + } + if (reg) { + reg_alloc.ValueInfo(HostLoc{kind, reg->index()}).realized = false; } } template void RAReg::Realize() { - reg = T{write ? reg_alloc.RealizeWriteImpl(value.GetInst()) : reg_alloc.RealizeReadImpl(value)}; + switch (rw) { + case RWType::Read: + reg = T{reg_alloc.RealizeReadImpl(read_value)}; + break; + case RWType::Write: + reg = T{reg_alloc.RealizeWriteImpl(write_value)}; + break; + case RWType::ReadWrite: + reg = T{reg_alloc.RealizeReadWriteImpl(read_value, write_value)}; + break; + default: + ASSERT_FALSE("Invalid RWType"); + } } } // namespace Dynarmic::Backend::Arm64