Compare commits

...

3 commits

Author SHA1 Message Date
spectranator
f575a2b47f Implemented initial (but broken) IR cache 2024-05-21 20:23:33 +02:00
spectranator
474ca1b008 Implemented basic IR serialization 2024-05-21 13:40:10 +02:00
spectranator
b8acc0390a Changed precompiled binary download path own fork from yuzu-mirror
Nintendo will not be able to take these repositories down anyways, they'd have to get my whole Github account banned for that
2024-05-21 07:50:16 +02:00
17 changed files with 322 additions and 9 deletions

View file

@ -7,7 +7,7 @@
# prefix_var: name of a variable which will be set with the path to the extracted contents
function(download_bundled_external remote_path lib_name prefix_var)
set(package_base_url "https://github.com/yuzu-mirror/")
set(package_base_url "https://github.com/torzu/")
set(package_repo "no_platform")
set(package_extension "no_platform")
if (WIN32)

View file

@ -6,6 +6,8 @@
#include <cstring>
#include <memory>
#include <mutex>
#include <fstream>
#include <filesystem>
#include <boost/icl/interval_set.hpp>
#include <mcl/assert.hpp>
@ -268,6 +270,28 @@ private:
}
block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE);
// Get cache path
const auto cache_path = std::filesystem::path(conf.ir_cache_path) / (std::to_string(current_location.Value())+".ir");
// Load from disk cache
if (!conf.ir_cache_path.empty()) {
std::ifstream cache_file(cache_path, std::ios::binary);
if (cache_file) {
// Read entire file
std::vector<uint16_t> data;
while (cache_file.read(reinterpret_cast<char*>(&data.emplace_back(0)), sizeof(data[0])));
data.pop_back();
cache_file.close();
// Deserialize file
IR::Block ir_block(A64::LocationDescriptor{current_location});
auto it = data.begin();
ir_block.Deserialize(it);
ASSERT(!(it > data.end()));
ASSERT(!(it < data.end()));
return emitter.Emit(ir_block).entrypoint;
}
}
// JIT Compile
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
@ -287,6 +311,16 @@ private:
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
}
Optimization::VerificationPass(ir_block);
// Store to disk cache if non-empty
if (!conf.ir_cache_path.empty() && !ir_block.empty()) {
std::ofstream cache_file(cache_path, std::ios::binary);
ASSERT_MSG(cache_file, "Failed to write cache file");
std::vector<uint16_t> data;
ir_block.Serialize(data);
cache_file.write(reinterpret_cast<const char*>(data.data()), data.size()*sizeof(data[0]));
}
return emitter.Emit(ir_block).entrypoint;
}

View file

@ -5,6 +5,7 @@
#pragma once
#include <string>
#include <array>
#include <cstddef>
#include <cstdint>
@ -285,10 +286,13 @@ struct UserConfig {
/// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
bool enable_cycle_counting = true;
// Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host).
// Maximum size is limited by the maximum length of a x86_64 / arm64 jump.
/// Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host).
/// Maximum size is limited by the maximum length of a x86_64 / arm64 jump.
size_t code_cache_size = 128 * 1024 * 1024; // bytes
/// IR cache location, disabled if empty. Must be directory that exists.
std::string ir_cache_path;
/// Internal use only
bool very_verbose_debugging_output = false;
};

View file

@ -30,8 +30,12 @@ Block::Block(Block&&) = default;
Block& Block::operator=(Block&&) = default;
void Block::AppendNewInst(Opcode opcode, std::initializer_list<IR::Value> args) {
PrependNewInst(end(), opcode, args);
Block::iterator Block::AppendNewInst(Opcode opcode, std::initializer_list<IR::Value> args) {
return PrependNewInst(end(), opcode, args);
}
Block::iterator Block::AppendNewInst(Inst&& inst) {
return PrependNewInst(end(), std::move(inst));
}
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) {
@ -46,6 +50,12 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, s
return instructions.insert_before(insertion_point, inst);
}
Block::iterator Block::PrependNewInst(iterator insertion_point, Inst&& inst) {
IR::Inst* new_inst = new (instruction_alloc_pool->Alloc()) IR::Inst(std::move(inst));
return instructions.insert_before(insertion_point, new_inst);
}
LocationDescriptor Block::Location() const {
return location;
}
@ -120,6 +130,144 @@ const size_t& Block::CycleCount() const {
return cycle_count;
}
void Block::Serialize(std::vector<uint16_t>& fres) const {
ASSERT(!empty());
fres.push_back(0xa91e);
fres.push_back(size());
for (const auto& inst : *this)
inst.Serialize(*this, fres);
SerializeTerminal(GetTerminal(), fres);
EndLocation().Serialize(fres);
}
void Block::Deserialize(std::vector<uint16_t>::iterator& it) {
ASSERT(empty());
const bool magic_ok = *(it++) == 0xa91e;
ASSERT_MSG(magic_ok, "Bad IR block magic");
const auto inst_count = *(it++);
ASSERT(inst_count > 0);
std::vector<Inst*> insts;
for (unsigned idx = 0; idx != inst_count; ++idx) {
auto inst = Inst::Deserialize(insts, it);
Inst* ptr = &*AppendNewInst(std::move(inst));
insts.push_back(ptr);
}
SetTerminal(DeserializeTerminal(it));
SetEndLocation(LocationDescriptor::Deserialize(it));
}
void Block::SerializeTerminal(const Term::Terminal& term, std::vector<uint16_t>& fres) {
fres.push_back(0xa91f);
struct Visitor : boost::static_visitor<void> {
std::vector<uint16_t>& fres;
Visitor(std::vector<uint16_t>& fres) : fres(fres) {}
void operator()(const Term::Invalid&) const {
fres.push_back(0);
}
void operator()(const Term::Interpret& interp) const {
fres.push_back(1);
interp.next.Serialize(fres);
ASSERT(interp.num_instructions <= 0xffff);
fres.push_back(static_cast<uint16_t>(interp.num_instructions));
}
void operator()(const Term::ReturnToDispatch&) const {
fres.push_back(2);
}
void operator()(const Term::LinkBlock& link_block) const {
fres.push_back(3);
link_block.next.Serialize(fres);
}
void operator()(const Term::LinkBlockFast& link_block_fast) const {
fres.push_back(4);
link_block_fast.next.Serialize(fres);
}
void operator()(const Term::PopRSBHint&) const {
fres.push_back(5);
}
void operator()(const Term::FastDispatchHint&) const {
fres.push_back(6);
}
void operator()(const Term::If& if_) const {
fres.push_back(7);
fres.push_back(static_cast<uint16_t>(if_.if_));
SerializeTerminal(if_.then_, fres);
SerializeTerminal(if_.else_, fres);
}
void operator()(const Term::CheckBit& check_bit) const {
fres.push_back(8);
SerializeTerminal(check_bit.then_, fres);
SerializeTerminal(check_bit.else_, fres);
}
void operator()(const Term::CheckHalt& check_bit) const {
fres.push_back(9);
SerializeTerminal(check_bit.else_, fres);
}
} visitor{fres};
boost::apply_visitor(visitor, term);
}
Term::Terminal Block::DeserializeTerminal(std::vector<uint16_t>::iterator& it) {
const bool magic_ok = *(it++) == 0xa91f;
ASSERT_MSG(magic_ok, "Bad IR block magic");
Term::Terminal fres;
const auto term_idx = *(it++);
switch (term_idx) {
case 0: {
fres = Term::Invalid();
} break;
case 1: {
Term::Interpret interp(LocationDescriptor::Deserialize(it));
interp.num_instructions = *(it++);
fres = std::move(interp);
} break;
case 2: {
fres = Term::ReturnToDispatch();
} break;
case 3: {
fres = Term::LinkBlock(LocationDescriptor::Deserialize(it));
} break;
case 4: {
fres = Term::LinkBlockFast(LocationDescriptor::Deserialize(it));
} break;
case 5: {
fres = Term::PopRSBHint();
} break;
case 6: {
fres = Term::FastDispatchHint();
} break;
case 7: {
const auto cond = static_cast<Cond>(*(it++));
Term::Terminal then = DeserializeTerminal(it);
Term::Terminal else_ = DeserializeTerminal(it);
fres = Term::If(cond, std::move(then), std::move(else_));
} break;
case 8: {
Term::Terminal then = DeserializeTerminal(it);
Term::Terminal else_ = DeserializeTerminal(it);
fres = Term::CheckBit(std::move(then), std::move(else_));
} break;
case 9: {
Term::Terminal else_ = DeserializeTerminal(it);
fres = Term::CheckHalt(std::move(else_));
} break;
default: ASSERT_FALSE("Invalid terminal type index");
}
return fres;
}
static std::string TerminalToString(const Terminal& terminal_variant) {
struct : boost::static_visitor<std::string> {
std::string operator()(const Term::Invalid&) const {

View file

@ -8,6 +8,7 @@
#include <initializer_list>
#include <memory>
#include <optional>
#include <vector>
#include <string>
#include <mcl/container/intrusive_list.hpp>
@ -82,8 +83,10 @@ public:
*
* @param op Opcode representing the instruction to add.
* @param args A sequence of Value instances used as arguments for the instruction.
* @returns Iterator to the newly created instruction.
*/
void AppendNewInst(Opcode op, std::initializer_list<Value> args);
Block::iterator AppendNewInst(Opcode op, std::initializer_list<Value> args);
Block::iterator AppendNewInst(Inst&& inst);
/**
* Prepends a new instruction to this basic block before the insertion point,
@ -95,6 +98,7 @@ public:
* @returns Iterator to the newly created instruction.
*/
iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args);
iterator PrependNewInst(iterator insertion_point, Inst&& inst);
/// Gets the starting location for this basic block.
LocationDescriptor Location() const;
@ -139,7 +143,13 @@ public:
/// Gets an immutable reference to the cycle count for this basic block.
const size_t& CycleCount() const;
void Serialize(std::vector<uint16_t>&) const;
void Deserialize(std::vector<uint16_t>::iterator&);
private:
static void SerializeTerminal(const Terminal&, std::vector<uint16_t>&);
static Terminal DeserializeTerminal(std::vector<uint16_t>::iterator&);
/// Description of the starting location of this block
LocationDescriptor location;
/// Description of the end location of this block

View file

@ -13,4 +13,23 @@ std::string ToString(const LocationDescriptor& descriptor) {
return fmt::format("{{{:016x}}}", descriptor.Value());
}
void LocationDescriptor::Serialize(std::vector<uint16_t>& fres) const {
u64 work_value = value;
fres.push_back(static_cast<uint16_t>(work_value));
work_value >>= 16;
fres.push_back(static_cast<uint16_t>(work_value));
work_value >>= 16;
fres.push_back(static_cast<uint16_t>(work_value));
work_value >>= 16;
fres.push_back(static_cast<uint16_t>(work_value));
}
LocationDescriptor LocationDescriptor::Deserialize(std::vector<uint16_t>::iterator& it) {
u64 value = *(it++);
value |= static_cast<u64>(*(it++)) << 16;
value |= static_cast<u64>(*(it++)) << 32;
value |= static_cast<u64>(*(it++)) << 48;
return LocationDescriptor(value);
}
} // namespace Dynarmic::IR

View file

@ -7,6 +7,7 @@
#include <functional>
#include <string>
#include <vector>
#include <fmt/format.h>
#include <mcl/stdint.hpp>
@ -26,6 +27,9 @@ public:
return !operator==(o);
}
void Serialize(std::vector<uint16_t>& fres) const;
static LocationDescriptor Deserialize(std::vector<uint16_t>::iterator&);
u64 Value() const { return value; }
private:

View file

@ -652,6 +652,27 @@ void Inst::SetArg(size_t index, Value value) {
args[index] = value;
}
void Inst::Serialize(const Block& block, std::vector<uint16_t>& fres) const {
fres.push_back(0xa91d);
fres.push_back(static_cast<uint16_t>(GetOpcode()));
fres.push_back(NumArgs());
for (unsigned idx = 0; idx != NumArgs(); idx++)
GetArg(idx).Serialize(block, fres);
}
Inst Inst::Deserialize(const std::vector<Inst*>& insts, std::vector<uint16_t>::iterator& it) {
const bool magic_ok = *(it++) == 0xa91d;
ASSERT_MSG(magic_ok, "Bad IR instruction magic");
Inst fres(static_cast<Opcode>(*(it++)));
const auto num_args = *(it++);
for (unsigned idx = 0; idx != num_args; idx++)
fres.SetArg(idx, Value::Deserialize(insts, it));
return fres;
}
void Inst::Invalidate() {
ClearArgs();
op = Opcode::Void;

View file

@ -25,8 +25,17 @@ constexpr size_t max_arg_count = 4;
*/
class Inst final : public mcl::intrusive_list_node<Inst> {
public:
explicit Inst(Opcode op)
Inst(Opcode op)
: op(op) {}
Inst(Inst&& o)
: op(o.op), use_count(o.use_count), name(o.name), args(o.args), next_pseudoop(o.next_pseudoop) {
o.use_count = 0;
o.name = 0;
std::fill(o.args.begin(), o.args.end(), Value());
o.next_pseudoop = nullptr;
}
Inst(const Inst& o)
: op(o.op), use_count(o.use_count), name(o.name), args(o.args), next_pseudoop(o.next_pseudoop) {}
/// Determines whether or not this instruction performs an arithmetic shift.
bool IsArithmeticShift() const;
@ -136,6 +145,9 @@ public:
Value GetArg(size_t index) const;
void SetArg(size_t index, Value value);
void Serialize(const Block&, std::vector<uint16_t>&) const;
static Inst Deserialize(const std::vector<Inst*>&, std::vector<uint16_t>::iterator&);
void Invalidate();
void ClearArgs();

View file

@ -197,6 +197,49 @@ AccType Value::GetAccType() const {
return inner.imm_acctype;
}
void Value::Serialize(const Block& block, std::vector<uint16_t>& fres) const {
fres.push_back(0xa91c);
fres.push_back(static_cast<uint16_t>(type));
if (type != Type::Opaque) {
for (unsigned it = 0; it != sizeof(inner.raw)/sizeof(*inner.raw); it++)
fres.push_back(inner.raw[it]);
return;
}
unsigned it = 0;
for (const auto& instr : block) {
if (&instr == inner.inst) {
fres.push_back(it);
return;
}
++it;
}
ASSERT_FALSE("Instruction index not found");
UNREACHABLE();
}
Value Value::Deserialize(const std::vector<Inst*>& insts, std::vector<uint16_t>::iterator& it) {
const bool magic_ok = *(it++) == 0xa91c;
ASSERT_MSG(magic_ok, "Bad IR value magic");
Value fres;
fres.type = static_cast<Type>(*(it++));
if (fres.type != Type::Opaque) {
for (unsigned idx = 0; idx != sizeof(inner.raw)/sizeof(*inner.raw); idx++)
fres.inner.raw[idx] = *(it++);
return fres;
}
const auto idx = *(it++);
ASSERT(idx < insts.size());
fres.inner.inst = insts[idx];
return fres;
}
s64 Value::GetImmediateAsS64() const {
ASSERT(IsImmediate());

View file

@ -26,6 +26,7 @@ enum class Vec;
namespace Dynarmic::IR {
class Inst;
class Block;
enum class AccType;
enum class Cond;
@ -75,6 +76,9 @@ public:
Cond GetCond() const;
AccType GetAccType() const;
void Serialize(const Block&, std::vector<uint16_t>&) const;
static Value Deserialize(const std::vector<Inst*>&, std::vector<uint16_t>::iterator&);
/**
* Retrieves the immediate of a Value instance as a signed 64-bit value.
*
@ -147,6 +151,7 @@ private:
CoprocessorInfo imm_coproc;
Cond imm_cond;
AccType imm_acctype;
uint16_t raw[4];
} inner;
};
static_assert(sizeof(Value) <= 2 * sizeof(u64), "IR::Value should be kept small in size");

View file

@ -23,6 +23,7 @@
#define SCREENSHOTS_DIR "screenshots"
#define SDMC_DIR "sdmc"
#define SHADER_DIR "shader"
#define RECOMPILER_DIR "recompiler"
#define TAS_DIR "tas"
#define ICONS_DIR "icons"

View file

@ -129,6 +129,7 @@ public:
GenerateYuzuPath(YuzuPath::ScreenshotsDir, yuzu_path / SCREENSHOTS_DIR);
GenerateYuzuPath(YuzuPath::SDMCDir, yuzu_path / SDMC_DIR);
GenerateYuzuPath(YuzuPath::ShaderDir, yuzu_path / SHADER_DIR);
GenerateYuzuPath(YuzuPath::RecompilerDir, yuzu_path / RECOMPILER_DIR);
GenerateYuzuPath(YuzuPath::TASDir, yuzu_path / TAS_DIR);
GenerateYuzuPath(YuzuPath::IconsDir, yuzu_path / ICONS_DIR);
}

View file

@ -25,6 +25,7 @@ enum class YuzuPath {
ScreenshotsDir, // Where yuzu screenshots are stored.
SDMCDir, // Where the emulated SDMC is stored.
ShaderDir, // Where shaders are stored.
RecompilerDir, // Where cached recompiler IR is stored.
TASDir, // Where TAS scripts are stored.
IconsDir, // Where Icons for Windows shortcuts are stored.
};

View file

@ -212,6 +212,8 @@ struct Values {
&use_speed_limit};
SwitchableSetting<bool> sync_core_speed{linkage, false, "sync_core_speed", Category::Core,
Specialization::Default};
SwitchableSetting<bool> ir_cache{linkage, false, "ir_cache", Category::Core,
Specialization::Default};
// Cpu
SwitchableSetting<CpuBackend, true> cpu_backend{linkage,

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "common/fs/path_util.h"
#include "core/arm/dynarmic/arm_dynarmic.h"
#include "core/arm/dynarmic/arm_dynarmic_64.h"
#include "core/arm/dynarmic/dynarmic_exclusive_monitor.h"
@ -261,6 +262,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
// Unpredictable instructions
config.define_unpredictable_behaviour = true;
// IR cache
if (Settings::values.ir_cache) {
config.ir_cache_path = Common::FS::GetYuzuPath(Common::FS::YuzuPath::RecompilerDir);
}
// Timing
config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock;

View file

@ -76,9 +76,11 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent) {
tr("Synchronizes CPU core speed to game's maximum rendering speed, which can be useful to "
"increase FPS without increasing the actual speed of the game (animations, physics, "
"etc.)\n"
"It's up to each game if it plays well with this or not. Most games (specially original "
"ones) "
"It's up to each game if it plays well with this or not. Most games (specially originals) "
"simply ignore this.\nThis can help play the game stutter-free at a lower framerate."));
INSERT(Settings, ir_cache, tr("Recompiler cache"),
tr("Caches optimized IR from recompiler. Improves performance\nin some scenarios at the "
"cost of increased disk activity and space consumption."));
// Cpu
INSERT(Settings, cpu_accuracy, tr("Accuracy:"),