From d9c69ad9970c8b8f0b20cacba4b1ded0a6b06485 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 18 Mar 2017 17:20:21 +0000 Subject: [PATCH] constant_pool: Implement a constant pool --- src/CMakeLists.txt | 2 + src/backend_x64/block_of_code.cpp | 57 +++---------------------- src/backend_x64/block_of_code.h | 58 ++------------------------ src/backend_x64/constant_pool.cpp | 36 ++++++++++++++++ src/backend_x64/constant_pool.h | 42 +++++++++++++++++++ src/backend_x64/emit_x64.cpp | 69 ++++++++++++++++++------------- 6 files changed, 129 insertions(+), 135 deletions(-) create mode 100644 src/backend_x64/constant_pool.cpp create mode 100644 src/backend_x64/constant_pool.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1865954b..fcc75d08 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -75,6 +75,7 @@ if (ARCHITECTURE_x86_64) list(APPEND SRCS backend_x64/abi.cpp backend_x64/block_of_code.cpp + backend_x64/constant_pool.cpp backend_x64/emit_x64.cpp backend_x64/hostloc.cpp backend_x64/interface_x64.cpp @@ -85,6 +86,7 @@ if (ARCHITECTURE_x86_64) list(APPEND HEADERS backend_x64/abi.h backend_x64/block_of_code.h + backend_x64/constant_pool.h backend_x64/emit_x64.h backend_x64/hostloc.h backend_x64/jitstate.h diff --git a/src/backend_x64/block_of_code.cpp b/src/backend_x64/block_of_code.cpp index 5771dc92..e4f4ec02 100644 --- a/src/backend_x64/block_of_code.cpp +++ b/src/backend_x64/block_of_code.cpp @@ -18,8 +18,7 @@ namespace Dynarmic { namespace BackendX64 { -BlockOfCode::BlockOfCode(UserCallbacks cb) : Xbyak::CodeGenerator(128 * 1024 * 1024), cb(cb) { - GenConstants(); +BlockOfCode::BlockOfCode(UserCallbacks cb) : Xbyak::CodeGenerator(128 * 1024 * 1024), cb(cb), constant_pool(this, 256) { GenRunCode(); GenReturnFromRunCode(); GenMemoryAccessors(); @@ -44,56 +43,6 @@ void BlockOfCode::ReturnFromRunCode(bool MXCSR_switch) { jmp(MXCSR_switch ? return_from_run_code : return_from_run_code_without_mxcsr_switch); } -void BlockOfCode::GenConstants() { - align(); - L(consts.FloatNegativeZero32); - dd(0x80000000u); - - align(); - L(consts.FloatNaN32); - dd(0x7fc00000u); - - align(); - L(consts.FloatNonSignMask32); - dq(0x7fffffffu); - - align(); - L(consts.FloatNegativeZero64); - dq(0x8000000000000000u); - - align(); - L(consts.FloatNaN64); - dq(0x7ff8000000000000u); - - align(); - L(consts.FloatNonSignMask64); - dq(0x7fffffffffffffffu); - - align(); - L(consts.FloatPenultimatePositiveDenormal64); - dq(0x000ffffffffffffeu); - - align(); - L(consts.FloatMinS32); - dq(0xc1e0000000000000u); // -2147483648 as a double - - align(); - L(consts.FloatMaxS32); - dq(0x41dfffffffc00000u); // 2147483647 as a double - - align(); - L(consts.FloatPositiveZero32); - L(consts.FloatPositiveZero64); - L(consts.FloatMinU32); - dq(0x0000000000000000u); // 0 as a double - - align(); - L(consts.FloatMaxU32); - dq(0x41efffffffe00000u); // 4294967295 as a double - - align(); -} - void BlockOfCode::GenRunCode() { align(); run_code = getCurr(); @@ -188,6 +137,10 @@ void BlockOfCode::SwitchMxcsrOnExit() { ldmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]); } +Xbyak::Address BlockOfCode::MConst(u64 constant) { + return constant_pool.GetConstant(constant); +} + void BlockOfCode::nop(size_t size) { switch (size) { case 0: diff --git a/src/backend_x64/block_of_code.h b/src/backend_x64/block_of_code.h index 4824e6b6..9bf1b7bd 100644 --- a/src/backend_x64/block_of_code.h +++ b/src/backend_x64/block_of_code.h @@ -11,6 +11,7 @@ #include +#include "backend_x64/constant_pool.h" #include "backend_x64/jitstate.h" #include "common/common_types.h" #include "dynarmic/callbacks.h" @@ -52,45 +53,7 @@ public: } } - Xbyak::Address MFloatPositiveZero32() { - return xword[rip + consts.FloatPositiveZero32]; - } - Xbyak::Address MFloatNegativeZero32() { - return xword[rip + consts.FloatNegativeZero32]; - } - Xbyak::Address MFloatNaN32() { - return xword[rip + consts.FloatNaN32]; - } - Xbyak::Address MFloatNonSignMask32() { - return xword[rip + consts.FloatNonSignMask32]; - } - Xbyak::Address MFloatPositiveZero64() { - return xword[rip + consts.FloatPositiveZero64]; - } - Xbyak::Address MFloatNegativeZero64() { - return xword[rip + consts.FloatNegativeZero64]; - } - Xbyak::Address MFloatNaN64() { - return xword[rip + consts.FloatNaN64]; - } - Xbyak::Address MFloatNonSignMask64() { - return xword[rip + consts.FloatNonSignMask64]; - } - Xbyak::Address MFloatPenultimatePositiveDenormal64() { - return xword[rip + consts.FloatPenultimatePositiveDenormal64]; - } - Xbyak::Address MFloatMinS32() { - return xword[rip + consts.FloatMinS32]; - } - Xbyak::Address MFloatMaxS32() { - return xword[rip + consts.FloatMaxS32]; - } - Xbyak::Address MFloatMinU32() { - return xword[rip + consts.FloatMinU32]; - } - Xbyak::Address MFloatMaxU32() { - return xword[rip + consts.FloatMaxU32]; - } + Xbyak::Address MConst(u64 constant); const void* GetReturnFromRunCodeAddress() const { return return_from_run_code; @@ -155,22 +118,7 @@ private: UserCallbacks cb; CodePtr user_code_begin; - struct Consts { - Xbyak::Label FloatPositiveZero32; - Xbyak::Label FloatNegativeZero32; - Xbyak::Label FloatNaN32; - Xbyak::Label FloatNonSignMask32; - Xbyak::Label FloatPositiveZero64; - Xbyak::Label FloatNegativeZero64; - Xbyak::Label FloatNaN64; - Xbyak::Label FloatNonSignMask64; - Xbyak::Label FloatPenultimatePositiveDenormal64; - Xbyak::Label FloatMinS32; - Xbyak::Label FloatMaxS32; - Xbyak::Label FloatMinU32; - Xbyak::Label FloatMaxU32; - } consts; - void GenConstants(); + ConstantPool constant_pool; using RunCodeFuncType = void(*)(JitState*, CodePtr); RunCodeFuncType run_code = nullptr; diff --git a/src/backend_x64/constant_pool.cpp b/src/backend_x64/constant_pool.cpp new file mode 100644 index 00000000..acf41523 --- /dev/null +++ b/src/backend_x64/constant_pool.cpp @@ -0,0 +1,36 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include "backend_x64/block_of_code.h" +#include "backend_x64/constant_pool.h" +#include "common/assert.h" + +namespace Dynarmic { +namespace BackendX64 { + +ConstantPool::ConstantPool(BlockOfCode* code, size_t size) : code(code), pool_size(size) { + code->int3(); + code->align(align_size); + pool_begin = reinterpret_cast(code->AllocateFromCodeSpace(size)); + std::memset(pool_begin, 0, size); + current_pool_ptr = pool_begin; +} + +Xbyak::Address ConstantPool::GetConstant(u64 constant) { + auto iter = constant_info.find(constant); + if (iter == constant_info.end()) { + ASSERT(static_cast(current_pool_ptr - pool_begin) < pool_size); + std::memcpy(current_pool_ptr, &constant, sizeof(u64)); + iter = constant_info.emplace(constant, current_pool_ptr).first; + current_pool_ptr += align_size; + } + return code->xword[code->rip + iter->second]; +} + +} // namespace BackendX64 +} // namespace Dynarmic diff --git a/src/backend_x64/constant_pool.h b/src/backend_x64/constant_pool.h new file mode 100644 index 00000000..033061b3 --- /dev/null +++ b/src/backend_x64/constant_pool.h @@ -0,0 +1,42 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include + +#include + +#include "common/common_types.h" + +namespace Dynarmic { +namespace BackendX64 { + +class BlockOfCode; + +/// ConstantPool allocates a block of memory from BlockOfCode. +/// It places constants into this block of memory, returning the address +/// of the memory location where the constant is placed. If the constant +/// already exists, its memory location is reused. +class ConstantPool final { +public: + ConstantPool(BlockOfCode* code, size_t size); + + Xbyak::Address GetConstant(u64 constant); + +private: + constexpr static size_t align_size = 16; // bytes + + std::map constant_info; + + BlockOfCode* code; + size_t pool_size; + u8* pool_begin; + u8* current_pool_ptr; +}; + +} // namespace BackendX64 +} // namespace Dynarmic diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 6fb70618..b82618c8 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -27,6 +27,19 @@ namespace Dynarmic { namespace BackendX64 { +constexpr u64 f32_negative_zero = 0x80000000u; +constexpr u64 f32_nan = 0x7fc00000u; +constexpr u64 f32_non_sign_mask = 0x7fffffffu; + +constexpr u64 f64_negative_zero = 0x8000000000000000u; +constexpr u64 f64_nan = 0x7ff8000000000000u; +constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu; + +constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu; +constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double +constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double +constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double + static Xbyak::Address MJitStateReg(Arm::Reg reg) { using namespace Xbyak::util; return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast(reg)]; @@ -2094,9 +2107,9 @@ static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R using namespace Xbyak::util; Xbyak::Label end; - auto mask = code->MFloatNonSignMask64(); + auto mask = code->MConst(f64_non_sign_mask); mask.setBit(64); - auto penult_denormal = code->MFloatPenultimatePositiveDenormal64(); + auto penult_denormal = code->MConst(f64_penultimate_positive_denormal); penult_denormal.setBit(64); code->movq(gpr_scratch, xmm_value); @@ -2127,9 +2140,9 @@ static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 using namespace Xbyak::util; Xbyak::Label end; - auto mask = code->MFloatNonSignMask64(); + auto mask = code->MConst(f64_non_sign_mask); mask.setBit(64); - auto penult_denormal = code->MFloatPenultimatePositiveDenormal64(); + auto penult_denormal = code->MConst(f64_penultimate_positive_denormal); penult_denormal.setBit(64); code->movq(gpr_scratch, xmm_value); @@ -2147,7 +2160,7 @@ static void DefaultNaN32(BlockOfCode* code, Xbyak::Xmm xmm_value) { code->ucomiss(xmm_value, xmm_value); code->jnp(end); - code->movaps(xmm_value, code->MFloatNaN32()); + code->movaps(xmm_value, code->MConst(f32_nan)); code->L(end); } @@ -2156,7 +2169,7 @@ static void DefaultNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value) { code->ucomisd(xmm_value, xmm_value); code->jnp(end); - code->movaps(xmm_value, code->MFloatNaN64()); + code->movaps(xmm_value, code->MConst(f64_nan)); code->L(end); } @@ -2288,7 +2301,7 @@ void EmitX64::EmitFPAbs32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - code->pand(result, code->MFloatNonSignMask32()); + code->pand(result, code->MConst(f32_non_sign_mask)); reg_alloc.DefineValue(inst, result); } @@ -2297,7 +2310,7 @@ void EmitX64::EmitFPAbs64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - code->pand(result, code->MFloatNonSignMask64()); + code->pand(result, code->MConst(f64_non_sign_mask)); reg_alloc.DefineValue(inst, result); } @@ -2306,7 +2319,7 @@ void EmitX64::EmitFPNeg32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - code->pxor(result, code->MFloatNegativeZero32()); + code->pxor(result, code->MConst(f32_negative_zero)); reg_alloc.DefineValue(inst, result); } @@ -2315,7 +2328,7 @@ void EmitX64::EmitFPNeg64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { auto args = reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - code->pxor(result, code->MFloatNegativeZero64()); + code->pxor(result, code->MConst(f64_negative_zero)); reg_alloc.DefineValue(inst, result); } @@ -2473,8 +2486,8 @@ void EmitX64::EmitFPSingleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* } // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); - code->minsd(from, code->MFloatMaxS32()); - code->maxsd(from, code->MFloatMinS32()); + code->minsd(from, code->MConst(f64_max_s32)); + code->maxsd(from, code->MConst(f64_min_s32)); // Second time is for real if (round_towards_zero) { code->cvttsd2si(to, from); // 32 bit gpr @@ -2506,12 +2519,12 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); // Bring into SSE range - code->addsd(from, code->MFloatMinS32()); + code->addsd(from, code->MConst(f64_min_s32)); // First time is to set flags code->cvtsd2si(to, from); // 32 bit gpr // Clamp to output range - code->minsd(from, code->MFloatMaxS32()); - code->maxsd(from, code->MFloatMinS32()); + code->minsd(from, code->MConst(f64_max_s32)); + code->maxsd(from, code->MConst(f64_min_s32)); // Actually convert code->cvtsd2si(to, from); // 32 bit gpr // Bring back into original range @@ -2526,18 +2539,18 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* code->cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); // Generate masks if out-of-signed-range - code->movaps(xmm_mask, code->MFloatMaxS32()); + code->movaps(xmm_mask, code->MConst(f64_max_s32)); code->cmpltsd(xmm_mask, from); code->movd(gpr_mask, xmm_mask); - code->pand(xmm_mask, code->MFloatMinS32()); + code->pand(xmm_mask, code->MConst(f64_min_s32)); code->and_(gpr_mask, u32(2147483648u)); // Bring into range if necessary code->addsd(from, xmm_mask); // First time is to set flags code->cvttsd2si(to, from); // 32 bit gpr // Clamp to output range - code->minsd(from, code->MFloatMaxS32()); - code->maxsd(from, code->MFloatMinU32()); + code->minsd(from, code->MConst(f64_max_s32)); + code->maxsd(from, code->MConst(f64_min_u32)); // Actually convert code->cvttsd2si(to, from); // 32 bit gpr // Bring back into original range if necessary @@ -2568,8 +2581,8 @@ void EmitX64::EmitFPDoubleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* } // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); - code->minsd(from, code->MFloatMaxS32()); - code->maxsd(from, code->MFloatMinS32()); + code->minsd(from, code->MConst(f64_max_s32)); + code->maxsd(from, code->MConst(f64_min_s32)); // Second time is for real if (round_towards_zero) { code->cvttsd2si(to, from); // 32 bit gpr @@ -2598,12 +2611,12 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* } ZeroIfNaN64(code, from, xmm_scratch); // Bring into SSE range - code->addsd(from, code->MFloatMinS32()); + code->addsd(from, code->MConst(f64_min_s32)); // First time is to set flags code->cvtsd2si(gpr_scratch, from); // 32 bit gpr // Clamp to output range - code->minsd(from, code->MFloatMaxS32()); - code->maxsd(from, code->MFloatMinS32()); + code->minsd(from, code->MConst(f64_max_s32)); + code->maxsd(from, code->MConst(f64_min_s32)); // Actually convert code->cvtsd2si(to, from); // 32 bit gpr // Bring back into original range @@ -2617,18 +2630,18 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* } ZeroIfNaN64(code, from, xmm_scratch); // Generate masks if out-of-signed-range - code->movaps(xmm_mask, code->MFloatMaxS32()); + code->movaps(xmm_mask, code->MConst(f64_max_s32)); code->cmpltsd(xmm_mask, from); code->movd(gpr_mask, xmm_mask); - code->pand(xmm_mask, code->MFloatMinS32()); + code->pand(xmm_mask, code->MConst(f64_min_s32)); code->and_(gpr_mask, u32(2147483648u)); // Bring into range if necessary code->addsd(from, xmm_mask); // First time is to set flags code->cvttsd2si(gpr_scratch, from); // 32 bit gpr // Clamp to output range - code->minsd(from, code->MFloatMaxS32()); - code->maxsd(from, code->MFloatMinU32()); + code->minsd(from, code->MConst(f64_max_s32)); + code->maxsd(from, code->MConst(f64_min_u32)); // Actually convert code->cvttsd2si(to, from); // 32 bit gpr // Bring back into original range if necessary