Optimization: Read page-table directly for memory access
This commit is contained in:
parent
57169ec093
commit
3b5c43b427
5 changed files with 219 additions and 27 deletions
|
@ -6,6 +6,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace Dynarmic {
|
||||
|
@ -30,6 +32,11 @@ struct UserCallbacks {
|
|||
void (*InterpreterFallback)(std::uint32_t pc, Jit* jit);
|
||||
|
||||
bool (*CallSVC)(std::uint32_t swi);
|
||||
|
||||
// Page Table
|
||||
static constexpr std::size_t PAGE_BITS = 12;
|
||||
static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS);
|
||||
std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>* page_table = nullptr;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic
|
||||
|
|
|
@ -12,11 +12,12 @@
|
|||
#include "backend_x64/block_of_code.h"
|
||||
#include "backend_x64/jitstate.h"
|
||||
#include "common/assert.h"
|
||||
#include "dynarmic/callbacks.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
BlockOfCode::BlockOfCode() : Xbyak::CodeGenerator(128 * 1024 * 1024) {
|
||||
BlockOfCode::BlockOfCode(UserCallbacks cb) : Xbyak::CodeGenerator(128 * 1024 * 1024), cb(cb) {
|
||||
ClearCache(false);
|
||||
}
|
||||
|
||||
|
@ -27,6 +28,7 @@ void BlockOfCode::ClearCache(bool poison_memory) {
|
|||
GenConstants();
|
||||
GenRunCode();
|
||||
GenReturnFromRunCode();
|
||||
GenMemoryAccessors();
|
||||
}
|
||||
|
||||
size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cycles_to_run) const {
|
||||
|
@ -118,6 +120,64 @@ void BlockOfCode::GenReturnFromRunCode() {
|
|||
ret();
|
||||
}
|
||||
|
||||
void BlockOfCode::GenMemoryAccessors() {
|
||||
align();
|
||||
read_memory_8 = getCurr<const void*>();
|
||||
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||
CallFunction(cb.MemoryRead8);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||
ret();
|
||||
|
||||
align();
|
||||
read_memory_16 = getCurr<const void*>();
|
||||
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||
CallFunction(cb.MemoryRead16);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||
ret();
|
||||
|
||||
align();
|
||||
read_memory_32 = getCurr<const void*>();
|
||||
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||
CallFunction(cb.MemoryRead32);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||
ret();
|
||||
|
||||
align();
|
||||
read_memory_64 = getCurr<const void*>();
|
||||
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||
CallFunction(cb.MemoryRead64);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||
ret();
|
||||
|
||||
align();
|
||||
write_memory_8 = getCurr<const void*>();
|
||||
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||
CallFunction(cb.MemoryWrite8);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||
ret();
|
||||
|
||||
align();
|
||||
write_memory_16 = getCurr<const void*>();
|
||||
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||
CallFunction(cb.MemoryWrite16);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||
ret();
|
||||
|
||||
align();
|
||||
write_memory_32 = getCurr<const void*>();
|
||||
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||
CallFunction(cb.MemoryWrite32);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||
ret();
|
||||
|
||||
align();
|
||||
write_memory_64 = getCurr<const void*>();
|
||||
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||
CallFunction(cb.MemoryWrite64);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||
ret();
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnEntry() {
|
||||
stmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
|
||||
ldmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]);
|
||||
|
|
|
@ -11,13 +11,14 @@
|
|||
|
||||
#include "backend_x64/jitstate.h"
|
||||
#include "common/common_types.h"
|
||||
#include "dynarmic/callbacks.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
class BlockOfCode final : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
BlockOfCode();
|
||||
explicit BlockOfCode(UserCallbacks cb);
|
||||
|
||||
/// Clears this block of code and resets code pointer to beginning.
|
||||
void ClearCache(bool poison_memory);
|
||||
|
@ -93,6 +94,36 @@ public:
|
|||
return return_from_run_code;
|
||||
}
|
||||
|
||||
const void* GetMemoryReadCallback(size_t bit_size) const {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
return read_memory_8;
|
||||
case 16:
|
||||
return read_memory_16;
|
||||
case 32:
|
||||
return read_memory_32;
|
||||
case 64:
|
||||
return read_memory_64;
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
const void* GetMemoryWriteCallback(size_t bit_size) const {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
return write_memory_8;
|
||||
case 16:
|
||||
return write_memory_16;
|
||||
case 32:
|
||||
return write_memory_32;
|
||||
case 64:
|
||||
return write_memory_64;
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void int3() { db(0xCC); }
|
||||
void nop(size_t size = 1);
|
||||
|
||||
|
@ -114,6 +145,8 @@ public:
|
|||
#endif
|
||||
|
||||
private:
|
||||
UserCallbacks cb;
|
||||
|
||||
struct Consts {
|
||||
Xbyak::Label FloatPositiveZero32;
|
||||
Xbyak::Label FloatNegativeZero32;
|
||||
|
@ -138,6 +171,16 @@ private:
|
|||
const void* return_from_run_code = nullptr;
|
||||
const void* return_from_run_code_without_mxcsr_switch = nullptr;
|
||||
void GenReturnFromRunCode();
|
||||
|
||||
const void* read_memory_8 = nullptr;
|
||||
const void* read_memory_16 = nullptr;
|
||||
const void* read_memory_32 = nullptr;
|
||||
const void* read_memory_64 = nullptr;
|
||||
const void* write_memory_8 = nullptr;
|
||||
const void* write_memory_16 = nullptr;
|
||||
const void* write_memory_32 = nullptr;
|
||||
const void* write_memory_64 = nullptr;
|
||||
void GenMemoryAccessors();
|
||||
};
|
||||
|
||||
} // namespace BackendX64
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <unordered_map>
|
||||
#include <common/bit_util.h>
|
||||
|
||||
#include "backend_x64/abi.h"
|
||||
#include "backend_x64/emit_x64.h"
|
||||
#include "backend_x64/jitstate.h"
|
||||
#include "frontend/arm_types.h"
|
||||
|
@ -1829,52 +1830,133 @@ void EmitX64::EmitSetExclusive(IR::Block&, IR::Inst* inst) {
|
|||
code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address);
|
||||
}
|
||||
|
||||
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
||||
template <typename FunctionPointer>
|
||||
static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
||||
if (!cb.page_table) {
|
||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
||||
code->CallFunction(fn);
|
||||
return;
|
||||
}
|
||||
|
||||
code->CallFunction(cb.MemoryRead8);
|
||||
using namespace Xbyak::util;
|
||||
|
||||
Xbyak::Reg64 result = reg_alloc.DefGpr(inst, { ABI_RETURN });
|
||||
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
|
||||
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
|
||||
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
|
||||
|
||||
Xbyak::Label abort, end;
|
||||
|
||||
code->mov(rax, u64(cb.page_table));
|
||||
code->mov(page_index.cvt32(), vaddr);
|
||||
code->shr(page_index.cvt32(), 12);
|
||||
code->mov(rax, qword[rax + page_index * 8]);
|
||||
code->test(rax, rax);
|
||||
code->jz(abort);
|
||||
code->mov(page_offset.cvt32(), vaddr);
|
||||
code->and_(page_offset.cvt32(), 4095);
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
code->movzx(result, code->byte[rax + page_offset]);
|
||||
break;
|
||||
case 16:
|
||||
code->movzx(result, word[rax + page_offset]);
|
||||
break;
|
||||
case 32:
|
||||
code->mov(result.cvt32(), dword[rax + page_offset]);
|
||||
break;
|
||||
case 64:
|
||||
code->mov(result.cvt64(), qword[rax + page_offset]);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid bit_size");
|
||||
break;
|
||||
}
|
||||
code->jmp(end);
|
||||
code->L(abort);
|
||||
code->call(code->GetMemoryReadCallback(bit_size));
|
||||
code->L(end);
|
||||
}
|
||||
|
||||
template<typename FunctionPointer>
|
||||
static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
||||
if (!cb.page_table) {
|
||||
reg_alloc.HostCall(inst, inst->GetArg(0), inst->GetArg(1));
|
||||
code->CallFunction(fn);
|
||||
return;
|
||||
}
|
||||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
reg_alloc.ScratchGpr({ HostLoc::RAX });
|
||||
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
|
||||
Xbyak::Reg64 value = reg_alloc.UseScratchGpr(inst->GetArg(1), { ABI_PARAM2 });
|
||||
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
|
||||
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
|
||||
|
||||
Xbyak::Label abort, end;
|
||||
|
||||
code->mov(rax, u64(cb.page_table));
|
||||
code->mov(page_index.cvt32(), vaddr);
|
||||
code->shr(page_index.cvt32(), 12);
|
||||
code->mov(rax, qword[rax + page_index * 8]);
|
||||
code->test(rax, rax);
|
||||
code->jz(abort);
|
||||
code->mov(page_offset.cvt32(), vaddr);
|
||||
code->and_(page_offset.cvt32(), 4095);
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
code->mov(code->byte[rax + page_offset], value.cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code->mov(word[rax + page_offset], value.cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code->mov(dword[rax + page_offset], value.cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code->mov(qword[rax + page_offset], value.cvt64());
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid bit_size");
|
||||
break;
|
||||
}
|
||||
code->jmp(end);
|
||||
code->L(abort);
|
||||
code->call(code->GetMemoryWriteCallback(bit_size));
|
||||
code->L(end);
|
||||
}
|
||||
|
||||
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
||||
ReadMemory(code, reg_alloc, inst, cb, 8, cb.MemoryRead8);
|
||||
}
|
||||
|
||||
void EmitX64::EmitReadMemory16(IR::Block&, IR::Inst* inst) {
|
||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
||||
|
||||
code->CallFunction(cb.MemoryRead16);
|
||||
ReadMemory(code, reg_alloc, inst, cb, 16, cb.MemoryRead16);
|
||||
}
|
||||
|
||||
void EmitX64::EmitReadMemory32(IR::Block&, IR::Inst* inst) {
|
||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
||||
|
||||
code->CallFunction(cb.MemoryRead32);
|
||||
ReadMemory(code, reg_alloc, inst, cb, 32, cb.MemoryRead32);
|
||||
}
|
||||
|
||||
void EmitX64::EmitReadMemory64(IR::Block&, IR::Inst* inst) {
|
||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
||||
|
||||
code->CallFunction(cb.MemoryRead64);
|
||||
ReadMemory(code, reg_alloc, inst, cb, 64, cb.MemoryRead64);
|
||||
}
|
||||
|
||||
void EmitX64::EmitWriteMemory8(IR::Block&, IR::Inst* inst) {
|
||||
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
||||
|
||||
code->CallFunction(cb.MemoryWrite8);
|
||||
WriteMemory(code, reg_alloc, inst, cb, 8, cb.MemoryWrite8);
|
||||
}
|
||||
|
||||
void EmitX64::EmitWriteMemory16(IR::Block&, IR::Inst* inst) {
|
||||
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
||||
|
||||
code->CallFunction(cb.MemoryWrite16);
|
||||
WriteMemory(code, reg_alloc, inst, cb, 16, cb.MemoryWrite16);
|
||||
}
|
||||
|
||||
void EmitX64::EmitWriteMemory32(IR::Block&, IR::Inst* inst) {
|
||||
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
||||
|
||||
code->CallFunction(cb.MemoryWrite32);
|
||||
WriteMemory(code, reg_alloc, inst, cb, 32, cb.MemoryWrite32);
|
||||
}
|
||||
|
||||
void EmitX64::EmitWriteMemory64(IR::Block&, IR::Inst* inst) {
|
||||
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
||||
|
||||
code->CallFunction(cb.MemoryWrite64);
|
||||
WriteMemory(code, reg_alloc, inst, cb, 64, cb.MemoryWrite64);
|
||||
}
|
||||
|
||||
template <typename FunctionPointer>
|
||||
|
|
|
@ -31,7 +31,7 @@ using namespace BackendX64;
|
|||
|
||||
struct Jit::Impl {
|
||||
Impl(Jit* jit, UserCallbacks callbacks)
|
||||
: block_of_code()
|
||||
: block_of_code(callbacks)
|
||||
, jit_state()
|
||||
, emitter(&block_of_code, callbacks, jit)
|
||||
, callbacks(callbacks)
|
||||
|
|
Loading…
Reference in a new issue