A32: Implement FastDispatchHint
This commit is contained in:
parent
aa8d826c13
commit
f96c43d422
11 changed files with 112 additions and 23 deletions
|
@ -87,6 +87,9 @@ struct UserConfig {
|
|||
/// instruction the ExceptionRaised callback is called. If this is true, we define
|
||||
/// definite behaviour for some unpredictable instructions.
|
||||
bool define_unpredictable_behaviour = false;
|
||||
|
||||
/// This enables the fast dispatcher.
|
||||
bool enable_fast_dispatch = true;
|
||||
};
|
||||
|
||||
} // namespace A32
|
||||
|
|
|
@ -79,10 +79,11 @@ bool A32EmitContext::FPSCR_DN() const {
|
|||
}
|
||||
|
||||
A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface)
|
||||
: EmitX64(code), config(std::move(config)), jit_interface(jit_interface)
|
||||
{
|
||||
: EmitX64(code), config(std::move(config)), jit_interface(jit_interface) {
|
||||
GenMemoryAccessors();
|
||||
GenTerminalHandlers();
|
||||
code.PreludeComplete();
|
||||
ClearFastDispatchTable();
|
||||
}
|
||||
|
||||
A32EmitX64::~A32EmitX64() = default;
|
||||
|
@ -148,10 +149,16 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
|||
void A32EmitX64::ClearCache() {
|
||||
EmitX64::ClearCache();
|
||||
block_ranges.ClearCache();
|
||||
ClearFastDispatchTable();
|
||||
}
|
||||
|
||||
void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
|
||||
InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges));
|
||||
ClearFastDispatchTable();
|
||||
}
|
||||
|
||||
void A32EmitX64::ClearFastDispatchTable() {
|
||||
fast_dispatch_table.fill({0xFFFFFFFFFFFFFFFFull, nullptr});
|
||||
}
|
||||
|
||||
void A32EmitX64::GenMemoryAccessors() {
|
||||
|
@ -220,6 +227,61 @@ void A32EmitX64::GenMemoryAccessors() {
|
|||
PerfMapRegister(write_memory_64, code.getCurr(), "a32_write_memory_64");
|
||||
}
|
||||
|
||||
void A32EmitX64::GenTerminalHandlers() {
|
||||
// PC ends up in ebp, location_descriptor ends up in rbx
|
||||
const auto calculate_location_descriptor = [this] {
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
|
||||
code.mov(ecx, MJitStateReg(A32::Reg::PC));
|
||||
code.mov(ebp, ecx);
|
||||
code.shl(rcx, 32);
|
||||
code.mov(ebx, dword[r15 + offsetof(A32JitState, FPSCR_mode)]);
|
||||
code.or_(ebx, dword[r15 + offsetof(A32JitState, CPSR_et)]);
|
||||
code.or_(rbx, rcx);
|
||||
};
|
||||
|
||||
Xbyak::Label fast_dispatch_cache_miss, rsb_cache_miss;
|
||||
|
||||
code.align();
|
||||
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
||||
calculate_location_descriptor();
|
||||
code.mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]);
|
||||
code.sub(eax, 1);
|
||||
code.and_(eax, u32(A32JitState::RSBPtrMask));
|
||||
code.mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
if (config.enable_fast_dispatch) {
|
||||
code.jne(rsb_cache_miss);
|
||||
} else {
|
||||
code.jne(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||
code.jmp(rax);
|
||||
PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a32_terminal_handler_pop_rsb_hint");
|
||||
|
||||
if (config.enable_fast_dispatch) {
|
||||
code.align();
|
||||
terminal_handler_fast_dispatch_hint = code.getCurr<const void*>();
|
||||
calculate_location_descriptor();
|
||||
code.L(rsb_cache_miss);
|
||||
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) {
|
||||
code.crc32(ebp, r12d);
|
||||
}
|
||||
code.and_(ebp, fast_dispatch_table_mask);
|
||||
code.lea(rbp, ptr[r12 + rbp]);
|
||||
code.cmp(rbx, qword[rbp + offsetof(FastDispatchEntry, location_descriptor)]);
|
||||
code.jne(fast_dispatch_cache_miss);
|
||||
code.jmp(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)]);
|
||||
code.L(fast_dispatch_cache_miss);
|
||||
code.mov(qword[rbp + offsetof(FastDispatchEntry, location_descriptor)], rbx);
|
||||
code.LookupBlock();
|
||||
code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax);
|
||||
code.jmp(rax);
|
||||
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
|
||||
|
@ -1222,16 +1284,15 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location
|
|||
}
|
||||
|
||||
void A32EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor) {
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
|
||||
code.mov(ecx, MJitStateReg(A32::Reg::PC));
|
||||
code.shl(rcx, 32);
|
||||
code.mov(ebx, dword[r15 + offsetof(A32JitState, FPSCR_mode)]);
|
||||
code.or_(ebx, dword[r15 + offsetof(A32JitState, CPSR_et)]);
|
||||
code.or_(rbx, rcx);
|
||||
code.jmp(terminal_handler_pop_rsb_hint);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor initial_location) {
|
||||
EmitTerminalImpl(IR::Term::ReturnToDispatch{}, initial_location);
|
||||
void A32EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor) {
|
||||
if (config.enable_fast_dispatch) {
|
||||
code.jmp(terminal_handler_fast_dispatch_hint);
|
||||
} else {
|
||||
code.ReturnFromRunCode();
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) {
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
|
||||
#include "backend/x64/a32_jitstate.h"
|
||||
|
@ -49,6 +51,16 @@ protected:
|
|||
A32::Jit* jit_interface;
|
||||
BlockRangeInformation<u32> block_ranges;
|
||||
|
||||
struct FastDispatchEntry {
|
||||
u64 location_descriptor;
|
||||
const void* code_ptr;
|
||||
};
|
||||
static_assert(sizeof(FastDispatchEntry) == 0x10);
|
||||
static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
|
||||
static constexpr size_t fast_dispatch_table_size = 0x10000;
|
||||
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
|
||||
void ClearFastDispatchTable();
|
||||
|
||||
const void* read_memory_8;
|
||||
const void* read_memory_16;
|
||||
const void* read_memory_32;
|
||||
|
@ -59,6 +71,10 @@ protected:
|
|||
const void* write_memory_64;
|
||||
void GenMemoryAccessors();
|
||||
|
||||
const void* terminal_handler_pop_rsb_hint;
|
||||
const void* terminal_handler_fast_dispatch_hint = nullptr;
|
||||
void GenTerminalHandlers();
|
||||
|
||||
// Microinstruction emitters
|
||||
#define OPCODE(...)
|
||||
#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
|
||||
|
|
|
@ -257,6 +257,10 @@ void BlockOfCode::UpdateTicks() {
|
|||
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
|
||||
}
|
||||
|
||||
void BlockOfCode::LookupBlock() {
|
||||
cb.LookupBlock->EmitCall(*this);
|
||||
}
|
||||
|
||||
Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
||||
return constant_pool.GetConstant(frame, lower, upper);
|
||||
}
|
||||
|
|
|
@ -57,8 +57,11 @@ public:
|
|||
/// Code emitter: Makes saved host MXCSR the current MXCSR
|
||||
void SwitchMxcsrOnExit();
|
||||
/// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
|
||||
/// @note this clobbers ABI callee-save registers
|
||||
/// @note this clobbers ABI caller-save registers
|
||||
void UpdateTicks();
|
||||
/// Code emitter: Performs a block lookup based on current state
|
||||
/// @note this clobbers ABI caller-save registers
|
||||
void LookupBlock();
|
||||
|
||||
/// Code emitter: Calls the function
|
||||
template <typename FunctionPointer>
|
||||
|
|
|
@ -52,7 +52,7 @@ bool ArmTranslatorVisitor::arm_BLX_reg(Cond cond, Reg m) {
|
|||
ir.PushRSB(ir.current_location.AdvancePC(4));
|
||||
ir.BXWritePC(ir.GetRegister(m));
|
||||
ir.SetRegister(Reg::LR, ir.Imm32(ir.current_location.PC() + 4));
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -65,7 +65,7 @@ bool ArmTranslatorVisitor::arm_BX(Cond cond, Reg m) {
|
|||
if (m == Reg::R14)
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
else
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -66,7 +66,7 @@ bool ArmTranslatorVisitor::arm_LDR_lit(Cond cond, bool U, Reg t, Imm12 imm12) {
|
|||
|
||||
if (t == Reg::PC) {
|
||||
ir.LoadWritePC(data);
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -96,7 +96,7 @@ bool ArmTranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n,
|
|||
if (!P && W && n == Reg::R13)
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
else
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -121,7 +121,7 @@ bool ArmTranslatorVisitor::arm_LDR_reg(Cond cond, bool P, bool U, bool W, Reg n,
|
|||
|
||||
if (t == Reg::PC) {
|
||||
ir.LoadWritePC(data);
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -623,7 +623,7 @@ static bool LDMHelper(A32::IREmitter& ir, bool W, Reg n, RegList list, IR::U32 s
|
|||
if (n == Reg::R13)
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
else
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -370,7 +370,7 @@ struct ThumbTranslatorVisitor final {
|
|||
if (d == Reg::PC) {
|
||||
ir.ALUWritePC(result.result);
|
||||
// Return to dispatch as we can't predict what PC is going to be. Stop compilation.
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
} else {
|
||||
ir.SetRegister(d, result.result);
|
||||
|
@ -400,7 +400,7 @@ struct ThumbTranslatorVisitor final {
|
|||
auto result = ir.GetRegister(m);
|
||||
if (d == Reg::PC) {
|
||||
ir.ALUWritePC(result);
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
} else {
|
||||
ir.SetRegister(d, result);
|
||||
|
@ -775,7 +775,7 @@ struct ThumbTranslatorVisitor final {
|
|||
if (m == Reg::R14)
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
else
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -784,7 +784,7 @@ struct ThumbTranslatorVisitor final {
|
|||
ir.PushRSB(ir.current_location.AdvancePC(2));
|
||||
ir.BXWritePC(ir.GetRegister(m));
|
||||
ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.PC() + 2) | 1));
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ bool TranslatorVisitor::MSR_reg(Imm<1> o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, I
|
|||
case SystemRegisterEncoding::FPCR:
|
||||
ir.SetFPCR(X(32, Rt));
|
||||
ir.SetPC(ir.Imm64(ir.current_location->PC() + 4));
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
case SystemRegisterEncoding::FPSR:
|
||||
ir.SetFPSR(X(32, Rt));
|
||||
|
|
|
@ -41,6 +41,7 @@ using Dynarmic::Common::Bits;
|
|||
|
||||
static Dynarmic::A32::UserConfig GetUserConfig(ArmTestEnv* testenv) {
|
||||
Dynarmic::A32::UserConfig user_config;
|
||||
user_config.enable_fast_dispatch = false;
|
||||
user_config.callbacks = testenv;
|
||||
return user_config;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) {
|
||||
Dynarmic::A32::UserConfig user_config;
|
||||
user_config.enable_fast_dispatch = false;
|
||||
user_config.callbacks = testenv;
|
||||
return user_config;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue