Optimization: Read page-table directly for memory access

This commit is contained in:
MerryMage 2016-09-01 00:06:40 +01:00
parent 57169ec093
commit 3b5c43b427
5 changed files with 219 additions and 27 deletions

View file

@ -6,6 +6,8 @@
#pragma once #pragma once
#include <array>
#include <cstddef>
#include <cstdint> #include <cstdint>
namespace Dynarmic { namespace Dynarmic {
@ -30,6 +32,11 @@ struct UserCallbacks {
void (*InterpreterFallback)(std::uint32_t pc, Jit* jit); void (*InterpreterFallback)(std::uint32_t pc, Jit* jit);
bool (*CallSVC)(std::uint32_t swi); bool (*CallSVC)(std::uint32_t swi);
// Page Table
static constexpr std::size_t PAGE_BITS = 12;
static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS);
std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>* page_table = nullptr;
}; };
} // namespace Dynarmic } // namespace Dynarmic

View file

@ -12,11 +12,12 @@
#include "backend_x64/block_of_code.h" #include "backend_x64/block_of_code.h"
#include "backend_x64/jitstate.h" #include "backend_x64/jitstate.h"
#include "common/assert.h" #include "common/assert.h"
#include "dynarmic/callbacks.h"
namespace Dynarmic { namespace Dynarmic {
namespace BackendX64 { namespace BackendX64 {
BlockOfCode::BlockOfCode() : Xbyak::CodeGenerator(128 * 1024 * 1024) { BlockOfCode::BlockOfCode(UserCallbacks cb) : Xbyak::CodeGenerator(128 * 1024 * 1024), cb(cb) {
ClearCache(false); ClearCache(false);
} }
@ -27,6 +28,7 @@ void BlockOfCode::ClearCache(bool poison_memory) {
GenConstants(); GenConstants();
GenRunCode(); GenRunCode();
GenReturnFromRunCode(); GenReturnFromRunCode();
GenMemoryAccessors();
} }
size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cycles_to_run) const { size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cycles_to_run) const {
@ -118,6 +120,64 @@ void BlockOfCode::GenReturnFromRunCode() {
ret(); ret();
} }
void BlockOfCode::GenMemoryAccessors() {
align();
read_memory_8 = getCurr<const void*>();
ABI_PushCallerSaveRegistersAndAdjustStack(this);
CallFunction(cb.MemoryRead8);
ABI_PopCallerSaveRegistersAndAdjustStack(this);
ret();
align();
read_memory_16 = getCurr<const void*>();
ABI_PushCallerSaveRegistersAndAdjustStack(this);
CallFunction(cb.MemoryRead16);
ABI_PopCallerSaveRegistersAndAdjustStack(this);
ret();
align();
read_memory_32 = getCurr<const void*>();
ABI_PushCallerSaveRegistersAndAdjustStack(this);
CallFunction(cb.MemoryRead32);
ABI_PopCallerSaveRegistersAndAdjustStack(this);
ret();
align();
read_memory_64 = getCurr<const void*>();
ABI_PushCallerSaveRegistersAndAdjustStack(this);
CallFunction(cb.MemoryRead64);
ABI_PopCallerSaveRegistersAndAdjustStack(this);
ret();
align();
write_memory_8 = getCurr<const void*>();
ABI_PushCallerSaveRegistersAndAdjustStack(this);
CallFunction(cb.MemoryWrite8);
ABI_PopCallerSaveRegistersAndAdjustStack(this);
ret();
align();
write_memory_16 = getCurr<const void*>();
ABI_PushCallerSaveRegistersAndAdjustStack(this);
CallFunction(cb.MemoryWrite16);
ABI_PopCallerSaveRegistersAndAdjustStack(this);
ret();
align();
write_memory_32 = getCurr<const void*>();
ABI_PushCallerSaveRegistersAndAdjustStack(this);
CallFunction(cb.MemoryWrite32);
ABI_PopCallerSaveRegistersAndAdjustStack(this);
ret();
align();
write_memory_64 = getCurr<const void*>();
ABI_PushCallerSaveRegistersAndAdjustStack(this);
CallFunction(cb.MemoryWrite64);
ABI_PopCallerSaveRegistersAndAdjustStack(this);
ret();
}
void BlockOfCode::SwitchMxcsrOnEntry() { void BlockOfCode::SwitchMxcsrOnEntry() {
stmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]); stmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
ldmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]); ldmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]);

View file

@ -11,13 +11,14 @@
#include "backend_x64/jitstate.h" #include "backend_x64/jitstate.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "dynarmic/callbacks.h"
namespace Dynarmic { namespace Dynarmic {
namespace BackendX64 { namespace BackendX64 {
class BlockOfCode final : public Xbyak::CodeGenerator { class BlockOfCode final : public Xbyak::CodeGenerator {
public: public:
BlockOfCode(); explicit BlockOfCode(UserCallbacks cb);
/// Clears this block of code and resets code pointer to beginning. /// Clears this block of code and resets code pointer to beginning.
void ClearCache(bool poison_memory); void ClearCache(bool poison_memory);
@ -93,6 +94,36 @@ public:
return return_from_run_code; return return_from_run_code;
} }
const void* GetMemoryReadCallback(size_t bit_size) const {
switch (bit_size) {
case 8:
return read_memory_8;
case 16:
return read_memory_16;
case 32:
return read_memory_32;
case 64:
return read_memory_64;
default:
return nullptr;
}
}
const void* GetMemoryWriteCallback(size_t bit_size) const {
switch (bit_size) {
case 8:
return write_memory_8;
case 16:
return write_memory_16;
case 32:
return write_memory_32;
case 64:
return write_memory_64;
default:
return nullptr;
}
}
void int3() { db(0xCC); } void int3() { db(0xCC); }
void nop(size_t size = 1); void nop(size_t size = 1);
@ -114,6 +145,8 @@ public:
#endif #endif
private: private:
UserCallbacks cb;
struct Consts { struct Consts {
Xbyak::Label FloatPositiveZero32; Xbyak::Label FloatPositiveZero32;
Xbyak::Label FloatNegativeZero32; Xbyak::Label FloatNegativeZero32;
@ -138,6 +171,16 @@ private:
const void* return_from_run_code = nullptr; const void* return_from_run_code = nullptr;
const void* return_from_run_code_without_mxcsr_switch = nullptr; const void* return_from_run_code_without_mxcsr_switch = nullptr;
void GenReturnFromRunCode(); void GenReturnFromRunCode();
const void* read_memory_8 = nullptr;
const void* read_memory_16 = nullptr;
const void* read_memory_32 = nullptr;
const void* read_memory_64 = nullptr;
const void* write_memory_8 = nullptr;
const void* write_memory_16 = nullptr;
const void* write_memory_32 = nullptr;
const void* write_memory_64 = nullptr;
void GenMemoryAccessors();
}; };
} // namespace BackendX64 } // namespace BackendX64

View file

@ -8,6 +8,7 @@
#include <unordered_map> #include <unordered_map>
#include <common/bit_util.h> #include <common/bit_util.h>
#include "backend_x64/abi.h"
#include "backend_x64/emit_x64.h" #include "backend_x64/emit_x64.h"
#include "backend_x64/jitstate.h" #include "backend_x64/jitstate.h"
#include "frontend/arm_types.h" #include "frontend/arm_types.h"
@ -1829,52 +1830,133 @@ void EmitX64::EmitSetExclusive(IR::Block&, IR::Inst* inst) {
code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address); code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address);
} }
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) { template <typename FunctionPointer>
reg_alloc.HostCall(inst, inst->GetArg(0)); static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
if (!cb.page_table) {
reg_alloc.HostCall(inst, inst->GetArg(0));
code->CallFunction(fn);
return;
}
code->CallFunction(cb.MemoryRead8); using namespace Xbyak::util;
Xbyak::Reg64 result = reg_alloc.DefGpr(inst, { ABI_RETURN });
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
Xbyak::Label abort, end;
code->mov(rax, u64(cb.page_table));
code->mov(page_index.cvt32(), vaddr);
code->shr(page_index.cvt32(), 12);
code->mov(rax, qword[rax + page_index * 8]);
code->test(rax, rax);
code->jz(abort);
code->mov(page_offset.cvt32(), vaddr);
code->and_(page_offset.cvt32(), 4095);
switch (bit_size) {
case 8:
code->movzx(result, code->byte[rax + page_offset]);
break;
case 16:
code->movzx(result, word[rax + page_offset]);
break;
case 32:
code->mov(result.cvt32(), dword[rax + page_offset]);
break;
case 64:
code->mov(result.cvt64(), qword[rax + page_offset]);
break;
default:
ASSERT_MSG(false, "Invalid bit_size");
break;
}
code->jmp(end);
code->L(abort);
code->call(code->GetMemoryReadCallback(bit_size));
code->L(end);
}
template<typename FunctionPointer>
static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
if (!cb.page_table) {
reg_alloc.HostCall(inst, inst->GetArg(0), inst->GetArg(1));
code->CallFunction(fn);
return;
}
using namespace Xbyak::util;
reg_alloc.ScratchGpr({ HostLoc::RAX });
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
Xbyak::Reg64 value = reg_alloc.UseScratchGpr(inst->GetArg(1), { ABI_PARAM2 });
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
Xbyak::Label abort, end;
code->mov(rax, u64(cb.page_table));
code->mov(page_index.cvt32(), vaddr);
code->shr(page_index.cvt32(), 12);
code->mov(rax, qword[rax + page_index * 8]);
code->test(rax, rax);
code->jz(abort);
code->mov(page_offset.cvt32(), vaddr);
code->and_(page_offset.cvt32(), 4095);
switch (bit_size) {
case 8:
code->mov(code->byte[rax + page_offset], value.cvt8());
break;
case 16:
code->mov(word[rax + page_offset], value.cvt16());
break;
case 32:
code->mov(dword[rax + page_offset], value.cvt32());
break;
case 64:
code->mov(qword[rax + page_offset], value.cvt64());
break;
default:
ASSERT_MSG(false, "Invalid bit_size");
break;
}
code->jmp(end);
code->L(abort);
code->call(code->GetMemoryWriteCallback(bit_size));
code->L(end);
}
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
ReadMemory(code, reg_alloc, inst, cb, 8, cb.MemoryRead8);
} }
void EmitX64::EmitReadMemory16(IR::Block&, IR::Inst* inst) { void EmitX64::EmitReadMemory16(IR::Block&, IR::Inst* inst) {
reg_alloc.HostCall(inst, inst->GetArg(0)); ReadMemory(code, reg_alloc, inst, cb, 16, cb.MemoryRead16);
code->CallFunction(cb.MemoryRead16);
} }
void EmitX64::EmitReadMemory32(IR::Block&, IR::Inst* inst) { void EmitX64::EmitReadMemory32(IR::Block&, IR::Inst* inst) {
reg_alloc.HostCall(inst, inst->GetArg(0)); ReadMemory(code, reg_alloc, inst, cb, 32, cb.MemoryRead32);
code->CallFunction(cb.MemoryRead32);
} }
void EmitX64::EmitReadMemory64(IR::Block&, IR::Inst* inst) { void EmitX64::EmitReadMemory64(IR::Block&, IR::Inst* inst) {
reg_alloc.HostCall(inst, inst->GetArg(0)); ReadMemory(code, reg_alloc, inst, cb, 64, cb.MemoryRead64);
code->CallFunction(cb.MemoryRead64);
} }
void EmitX64::EmitWriteMemory8(IR::Block&, IR::Inst* inst) { void EmitX64::EmitWriteMemory8(IR::Block&, IR::Inst* inst) {
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); WriteMemory(code, reg_alloc, inst, cb, 8, cb.MemoryWrite8);
code->CallFunction(cb.MemoryWrite8);
} }
void EmitX64::EmitWriteMemory16(IR::Block&, IR::Inst* inst) { void EmitX64::EmitWriteMemory16(IR::Block&, IR::Inst* inst) {
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); WriteMemory(code, reg_alloc, inst, cb, 16, cb.MemoryWrite16);
code->CallFunction(cb.MemoryWrite16);
} }
void EmitX64::EmitWriteMemory32(IR::Block&, IR::Inst* inst) { void EmitX64::EmitWriteMemory32(IR::Block&, IR::Inst* inst) {
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); WriteMemory(code, reg_alloc, inst, cb, 32, cb.MemoryWrite32);
code->CallFunction(cb.MemoryWrite32);
} }
void EmitX64::EmitWriteMemory64(IR::Block&, IR::Inst* inst) { void EmitX64::EmitWriteMemory64(IR::Block&, IR::Inst* inst) {
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); WriteMemory(code, reg_alloc, inst, cb, 64, cb.MemoryWrite64);
code->CallFunction(cb.MemoryWrite64);
} }
template <typename FunctionPointer> template <typename FunctionPointer>

View file

@ -31,7 +31,7 @@ using namespace BackendX64;
struct Jit::Impl { struct Jit::Impl {
Impl(Jit* jit, UserCallbacks callbacks) Impl(Jit* jit, UserCallbacks callbacks)
: block_of_code() : block_of_code(callbacks)
, jit_state() , jit_state()
, emitter(&block_of_code, callbacks, jit) , emitter(&block_of_code, callbacks, jit)
, callbacks(callbacks) , callbacks(callbacks)