From db30e02ac8141ef6ea2e22d301ee01e984f2be33 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 23 Jan 2018 19:16:39 +0000 Subject: [PATCH] emit_x64: Extract BlockRangeInformation, remove template parameter --- src/CMakeLists.txt | 2 + src/backend_x64/a32_emit_x64.cpp | 13 +- src/backend_x64/a32_emit_x64.h | 8 +- src/backend_x64/a64_emit_x64.cpp | 13 +- src/backend_x64/a64_emit_x64.h | 8 +- src/backend_x64/block_range_information.cpp | 47 ++++++ src/backend_x64/block_range_information.h | 31 ++++ src/backend_x64/emit_x64.cpp | 86 ++++------ src/backend_x64/emit_x64.h | 15 +- src/backend_x64/emit_x64_data_processing.cpp | 158 ++++++------------- src/backend_x64/emit_x64_floating_point.cpp | 157 +++++++----------- src/backend_x64/emit_x64_packed.cpp | 108 ++++--------- src/backend_x64/emit_x64_saturation.cpp | 18 +-- src/backend_x64/emit_x64_vector.cpp | 42 ++--- 14 files changed, 308 insertions(+), 398 deletions(-) create mode 100644 src/backend_x64/block_range_information.cpp create mode 100644 src/backend_x64/block_range_information.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 37bbee91..e30e698b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -121,6 +121,8 @@ if (ARCHITECTURE_x86_64) backend_x64/abi.h backend_x64/block_of_code.cpp backend_x64/block_of_code.h + backend_x64/block_range_information.cpp + backend_x64/block_range_information.h backend_x64/callback.cpp backend_x64/callback.h backend_x64/constant_pool.cpp diff --git a/src/backend_x64/a32_emit_x64.cpp b/src/backend_x64/a32_emit_x64.cpp index e4ec3779..287d1d8c 100644 --- a/src/backend_x64/a32_emit_x64.cpp +++ b/src/backend_x64/a32_emit_x64.cpp @@ -127,13 +127,22 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { const size_t size = static_cast(code->getCurr() - entrypoint); const A32::LocationDescriptor end_location{block.EndLocation()}; const auto range = boost::icl::discrete_interval::closed(descriptor.PC(), end_location.PC() - 1); - A32EmitX64::BlockDescriptor block_desc{entrypoint, size, block.Location(), range}; + A32EmitX64::BlockDescriptor block_desc{entrypoint, size}; block_descriptors.emplace(descriptor.UniqueHash(), block_desc); - block_ranges.add(std::make_pair(range, std::set{descriptor})); + block_ranges.AddRange(range, descriptor); return block_desc; } +void A32EmitX64::ClearCache() { + EmitX64::ClearCache(); + block_ranges.ClearCache(); +} + +void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set& ranges) { + InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges)); +} + void A32EmitX64::GenMemoryAccessors() { code->align(); read_memory_8 = code->getCurr(); diff --git a/src/backend_x64/a32_emit_x64.h b/src/backend_x64/a32_emit_x64.h index 376f1d32..4967ef3c 100644 --- a/src/backend_x64/a32_emit_x64.h +++ b/src/backend_x64/a32_emit_x64.h @@ -9,6 +9,7 @@ #include #include "backend_x64/a32_jitstate.h" +#include "backend_x64/block_range_information.h" #include "backend_x64/emit_x64.h" #include "dynarmic/A32/a32.h" #include "dynarmic/A32/callbacks.h" @@ -28,7 +29,7 @@ struct A32EmitContext final : public EmitContext { bool FPSCR_DN() const override; }; -class A32EmitX64 final : public EmitX64 { +class A32EmitX64 final : public EmitX64 { public: A32EmitX64(BlockOfCode* code, A32::UserCallbacks cb, A32::Jit* jit_interface); ~A32EmitX64(); @@ -39,9 +40,14 @@ public: */ BlockDescriptor Emit(IR::Block& ir); + void ClearCache() override; + + void InvalidateCacheRanges(const boost::icl::interval_set& ranges); + protected: const A32::UserCallbacks cb; A32::Jit* jit_interface; + BlockRangeInformation block_ranges; const void* read_memory_8; const void* read_memory_16; diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index cc2238e5..7c8d9292 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -109,13 +109,22 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { const size_t size = static_cast(code->getCurr() - entrypoint); const A64::LocationDescriptor end_location{block.EndLocation()}; const auto range = boost::icl::discrete_interval::closed(descriptor.PC(), end_location.PC() - 1); - A64EmitX64::BlockDescriptor block_desc{entrypoint, size, block.Location(), range}; + A64EmitX64::BlockDescriptor block_desc{entrypoint, size}; block_descriptors.emplace(descriptor.UniqueHash(), block_desc); - block_ranges.add(std::make_pair(range, std::set{descriptor})); + block_ranges.AddRange(range, descriptor); return block_desc; } +void A64EmitX64::ClearCache() { + EmitX64::ClearCache(); + block_ranges.ClearCache(); +} + +void A64EmitX64::InvalidateCacheRanges(const boost::icl::interval_set& ranges) { + InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges)); +} + void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); diff --git a/src/backend_x64/a64_emit_x64.h b/src/backend_x64/a64_emit_x64.h index 0eb74a22..46458c28 100644 --- a/src/backend_x64/a64_emit_x64.h +++ b/src/backend_x64/a64_emit_x64.h @@ -7,6 +7,7 @@ #pragma once #include "backend_x64/a64_jitstate.h" +#include "backend_x64/block_range_information.h" #include "backend_x64/emit_x64.h" #include "dynarmic/A64/config.h" #include "frontend/A64/location_descriptor.h" @@ -25,7 +26,7 @@ struct A64EmitContext final : public EmitContext { bool FPSCR_DN() const override; }; -class A64EmitX64 final : public EmitX64 { +class A64EmitX64 final : public EmitX64 { public: A64EmitX64(BlockOfCode* code, A64::UserConfig conf); ~A64EmitX64(); @@ -36,8 +37,13 @@ public: */ BlockDescriptor Emit(IR::Block& ir); + void ClearCache() override; + + void InvalidateCacheRanges(const boost::icl::interval_set& ranges); + protected: const A64::UserConfig conf; + BlockRangeInformation block_ranges; // Microinstruction emitters #define OPCODE(...) diff --git a/src/backend_x64/block_range_information.cpp b/src/backend_x64/block_range_information.cpp new file mode 100644 index 00000000..a5d1e51e --- /dev/null +++ b/src/backend_x64/block_range_information.cpp @@ -0,0 +1,47 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include +#include + +#include "backend_x64/block_range_information.h" +#include "common/common_types.h" + +namespace Dynarmic { +namespace BackendX64 { + +template +void BlockRangeInformation::AddRange(boost::icl::discrete_interval range, IR::LocationDescriptor location) { + block_ranges.add(std::make_pair(range, std::set{location})); +} + +template +void BlockRangeInformation::ClearCache() { + block_ranges.clear(); +} + +template +std::unordered_set BlockRangeInformation::InvalidateRanges(const boost::icl::interval_set& ranges) { + std::unordered_set erase_locations; + for (auto invalidate_interval : ranges) { + auto pair = block_ranges.equal_range(invalidate_interval); + for (auto it = pair.first; it != pair.second; ++it) { + for (const auto &descriptor : it->second) { + erase_locations.insert(descriptor); + } + } + } + // TODO: EFFICIENCY: Remove ranges that are to be erased. + return erase_locations; +} + +template class BlockRangeInformation; +template class BlockRangeInformation; + +} // namespace BackendX64 +} // namespace Dynarmic diff --git a/src/backend_x64/block_range_information.h b/src/backend_x64/block_range_information.h new file mode 100644 index 00000000..4976c477 --- /dev/null +++ b/src/backend_x64/block_range_information.h @@ -0,0 +1,31 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include + +#include +#include + +#include "frontend/ir/location_descriptor.h" + +namespace Dynarmic { +namespace BackendX64 { + +template +class BlockRangeInformation { +public: + void AddRange(boost::icl::discrete_interval range, IR::LocationDescriptor location); + void ClearCache(); + std::unordered_set InvalidateRanges(const boost::icl::interval_set& ranges); + +private: + boost::icl::interval_map> block_ranges; +}; + +} // namespace BackendX64 +} // namespace Dynarmic diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 1be47d9e..2d888b6e 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -31,40 +31,33 @@ void EmitContext::EraseInstruction(IR::Inst* inst) { inst->ClearArgs(); } -template -EmitX64::EmitX64(BlockOfCode* code) +EmitX64::EmitX64(BlockOfCode* code) : code(code) {} -template -EmitX64::~EmitX64() {} +EmitX64::~EmitX64() {} -template -boost::optional::BlockDescriptor> EmitX64::GetBasicBlock(IR::LocationDescriptor descriptor) const { +boost::optional EmitX64::GetBasicBlock(IR::LocationDescriptor descriptor) const { auto iter = block_descriptors.find(descriptor); if (iter == block_descriptors.end()) return boost::none; return iter->second; } -template -void EmitX64::EmitVoid(EmitContext&, IR::Inst*) { +void EmitX64::EmitVoid(EmitContext&, IR::Inst*) { } -template -void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) { +void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) { code->int3(); } -template -void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (!args[0].IsImmediate()) { ctx.reg_alloc.DefineValue(inst, args[0]); } } -template -void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target) { +void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target) { using namespace Xbyak::util; auto iter = block_descriptors.find(target); @@ -87,8 +80,7 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_r code->mov(dword[r15 + code->GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32()); } -template -void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[0].IsImmediate()); u64 unique_hash_of_target = args[0].GetImmediateU64(); @@ -100,23 +92,19 @@ void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target}); } -template -void EmitX64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) { +void EmitX64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) { ASSERT_MSG(false, "should never happen"); } -template -void EmitX64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) { +void EmitX64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) { ASSERT_MSG(false, "should never happen"); } -template -void EmitX64::EmitGetGEFromOp(EmitContext&, IR::Inst*) { +void EmitX64::EmitGetGEFromOp(EmitContext&, IR::Inst*) { ASSERT_MSG(false, "should never happen"); } -template -void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const int bitsize = [&]{ @@ -143,14 +131,12 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, nzcv); } -template -void EmitX64::EmitAddCycles(size_t cycles) { +void EmitX64::EmitAddCycles(size_t cycles) { ASSERT(cycles < std::numeric_limits::max()); code->sub(qword[r15 + code->GetJitStateInfo().offsetof_cycles_remaining], static_cast(cycles)); } -template -Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { +Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { Xbyak::Label label; const Xbyak::Reg32 cpsr = eax; @@ -262,8 +248,7 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { return label; } -template -void EmitX64::EmitCondPrelude(const IR::Block& block) { +void EmitX64::EmitCondPrelude(const IR::Block& block) { if (block.GetCondition() == IR::Cond::AL) { ASSERT(!block.HasConditionFailedLocation()); return; @@ -277,8 +262,7 @@ void EmitX64::EmitCondPrelude(const IR::Block& block) { code->L(pass); } -template -void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) { +void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) { Common::VisitVariant(terminal, [this, &initial_location](auto x) { using T = std::decay_t; if constexpr (!std::is_same_v) { @@ -289,8 +273,7 @@ void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor in }); } -template -void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) { +void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) { const CodePtr save_code_ptr = code->getCurr(); const PatchInformation& patch_info = patch_information[desc]; @@ -312,39 +295,28 @@ void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) { code->SetCodePtr(save_code_ptr); } -template -void EmitX64::Unpatch(const IR::LocationDescriptor& desc) { +void EmitX64::Unpatch(const IR::LocationDescriptor& desc) { Patch(desc, nullptr); } -template -void EmitX64::ClearCache() { +void EmitX64::ClearCache() { block_descriptors.clear(); patch_information.clear(); } -template -void EmitX64::InvalidateCacheRanges(const boost::icl::interval_set& ranges) { - // Remove cached block descriptors and patch information overlapping with the given range. - for (auto invalidate_interval : ranges) { - auto pair = block_ranges.equal_range(invalidate_interval); - for (auto it = pair.first; it != pair.second; ++it) { - for (const auto& descriptor : it->second) { - if (patch_information.count(descriptor)) { - Unpatch(descriptor); - } - block_descriptors.erase(descriptor); - } +void EmitX64::InvalidateBasicBlocks(const std::unordered_set& locations) { + for (const auto &descriptor : locations) { + auto it = block_descriptors.find(descriptor); + if (it == block_descriptors.end()) { + continue; } - block_ranges.erase(pair.first, pair.second); + + if (patch_information.count(descriptor)) { + Unpatch(descriptor); + } + block_descriptors.erase(it); } } } // namespace BackendX64 } // namespace Dynarmic - -#include "backend_x64/a32_jitstate.h" -#include "backend_x64/a64_jitstate.h" - -template class Dynarmic::BackendX64::EmitX64; -template class Dynarmic::BackendX64::EmitX64; diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h index 1ced7a7e..a057cd33 100644 --- a/src/backend_x64/emit_x64.h +++ b/src/backend_x64/emit_x64.h @@ -7,10 +7,9 @@ #pragma once #include +#include #include -#include -#include #include #include @@ -44,17 +43,11 @@ struct EmitContext { IR::Block& block; }; -template class EmitX64 { public: - using ProgramCounterType = typename JitStateType::ProgramCounterType; - struct BlockDescriptor { CodePtr entrypoint; // Entrypoint of emitted code size_t size; // Length in bytes of emitted code - - IR::LocationDescriptor start_location; - boost::icl::discrete_interval range; }; EmitX64(BlockOfCode* code); @@ -64,9 +57,10 @@ public: boost::optional GetBasicBlock(IR::LocationDescriptor descriptor) const; /// Empties the entire cache. - void ClearCache(); + virtual void ClearCache(); - void InvalidateCacheRanges(const boost::icl::interval_set& ranges); + /// Invalidates a selection of basic blocks. + void InvalidateBasicBlocks(const std::unordered_set& locations); protected: // Microinstruction emitters @@ -111,7 +105,6 @@ protected: BlockOfCode* code; std::unordered_map block_descriptors; std::unordered_map patch_information; - boost::icl::interval_map> block_ranges; }; } // namespace BackendX64 diff --git a/src/backend_x64/emit_x64_data_processing.cpp b/src/backend_x64/emit_x64_data_processing.cpp index ba1e7e6e..b074cc9c 100644 --- a/src/backend_x64/emit_x64_data_processing.cpp +++ b/src/backend_x64/emit_x64_data_processing.cpp @@ -17,8 +17,7 @@ namespace BackendX64 { using namespace Xbyak::util; -template -void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 lo = ctx.reg_alloc.UseScratchGpr(args[0]); Xbyak::Reg64 hi = ctx.reg_alloc.UseScratchGpr(args[1]); @@ -30,14 +29,12 @@ void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, lo); } -template -void EmitX64::EmitLeastSignificantWord(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitLeastSignificantWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.DefineValue(inst, args[0]); } -template -void EmitX64::EmitMostSignificantWord(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitMostSignificantWord(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -54,20 +51,17 @@ void EmitX64::EmitMostSignificantWord(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitLeastSignificantHalf(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitLeastSignificantHalf(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.DefineValue(inst, args[0]); } -template -void EmitX64::EmitLeastSignificantByte(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitLeastSignificantByte(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.DefineValue(inst, args[0]); } -template -void EmitX64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); // TODO: Flag optimization @@ -75,8 +69,7 @@ void EmitX64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitIsZero32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitIsZero32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); // TODO: Flag optimization @@ -86,8 +79,7 @@ void EmitX64::EmitIsZero32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitIsZero64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitIsZero64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); // TODO: Flag optimization @@ -97,8 +89,7 @@ void EmitX64::EmitIsZero64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitTestBit(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitTestBit(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); ASSERT(args[1].IsImmediate()); @@ -108,7 +99,6 @@ void EmitX64::EmitTestBit(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template static void EmitConditionalSelect(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bitsize) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX}).cvt32(); @@ -179,18 +169,15 @@ static void EmitConditionalSelect(BlockOfCode* code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.DefineValue(inst, else_); } -template -void EmitX64::EmitConditionalSelect32(EmitContext& ctx, IR::Inst* inst) { - EmitConditionalSelect(code, ctx, inst, 32); +void EmitX64::EmitConditionalSelect32(EmitContext& ctx, IR::Inst* inst) { + EmitConditionalSelect(code, ctx, inst, 32); } -template -void EmitX64::EmitConditionalSelect64(EmitContext& ctx, IR::Inst* inst) { - EmitConditionalSelect(code, ctx, inst, 64); +void EmitX64::EmitConditionalSelect64(EmitContext& ctx, IR::Inst* inst) { + EmitConditionalSelect(code, ctx, inst, 64); } -template -void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -290,8 +277,7 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& operand_arg = args[0]; auto& shift_arg = args[1]; @@ -324,8 +310,7 @@ void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -424,8 +409,7 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& operand_arg = args[0]; auto& shift_arg = args[1]; @@ -458,8 +442,7 @@ void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -547,8 +530,7 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) } } -template -void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& operand_arg = args[0]; auto& shift_arg = args[1]; @@ -579,8 +561,7 @@ void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) } } -template -void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -659,8 +640,7 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& operand_arg = args[0]; auto& shift_arg = args[1]; @@ -683,8 +663,7 @@ void EmitX64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -788,13 +767,11 @@ static void EmitAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bit ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitAdd32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitAdd32(EmitContext& ctx, IR::Inst* inst) { EmitAdd(code, ctx, inst, 32); } -template -void EmitX64::EmitAdd64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitAdd64(EmitContext& ctx, IR::Inst* inst) { EmitAdd(code, ctx, inst, 64); } @@ -867,18 +844,15 @@ static void EmitSub(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bit ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitSub32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSub32(EmitContext& ctx, IR::Inst* inst) { EmitSub(code, ctx, inst, 32); } -template -void EmitX64::EmitSub64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSub64(EmitContext& ctx, IR::Inst* inst) { EmitSub(code, ctx, inst, 64); } -template -void EmitX64::EmitMul32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitMul32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -893,8 +867,7 @@ void EmitX64::EmitMul32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); @@ -905,8 +878,7 @@ void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitAnd32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitAnd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -925,8 +897,7 @@ void EmitX64::EmitAnd32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitAnd64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitAnd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); @@ -945,8 +916,7 @@ void EmitX64::EmitAnd64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitEor32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitEor32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -965,8 +935,7 @@ void EmitX64::EmitEor32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitEor64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitEor64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); @@ -985,8 +954,7 @@ void EmitX64::EmitEor64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitOr32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitOr32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -1005,8 +973,7 @@ void EmitX64::EmitOr32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitOr64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitOr64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); @@ -1025,8 +992,7 @@ void EmitX64::EmitOr64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitNot32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitNot32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result; @@ -1040,8 +1006,7 @@ void EmitX64::EmitNot32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitNot64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitNot64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result; @@ -1055,112 +1020,98 @@ void EmitX64::EmitNot64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitSignExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSignExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movsx(result.cvt32(), result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitSignExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSignExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movsx(result.cvt32(), result.cvt16()); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitSignExtendByteToLong(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSignExtendByteToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movsx(result.cvt64(), result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitSignExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSignExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movsx(result.cvt64(), result.cvt16()); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitSignExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSignExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movsxd(result.cvt64(), result.cvt32()); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitZeroExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitZeroExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movzx(result.cvt32(), result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitZeroExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitZeroExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movzx(result.cvt32(), result.cvt16()); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitZeroExtendByteToLong(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitZeroExtendByteToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movzx(result.cvt32(), result.cvt8()); // x64 zeros upper 32 bits on a 32-bit move ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitZeroExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitZeroExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movzx(result.cvt32(), result.cvt16()); // x64 zeros upper 32 bits on a 32-bit move ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitZeroExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitZeroExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitByteReverseWord(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitByteReverseWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->bswap(result); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitByteReverseHalf(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitByteReverseHalf(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg16 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt16(); code->rol(result, 8); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitByteReverseDual(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitByteReverseDual(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->bswap(result); ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32(); @@ -1184,8 +1135,7 @@ void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64(); @@ -1211,9 +1161,3 @@ void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { } // namespace BackendX64 } // namespace Dynarmic - -#include "backend_x64/a32_jitstate.h" -#include "backend_x64/a64_jitstate.h" - -template class Dynarmic::BackendX64::EmitX64; -template class Dynarmic::BackendX64::EmitX64; diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index eada0ff2..51b1d06d 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -30,7 +30,6 @@ constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double -template static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { Xbyak::Label end; @@ -47,7 +46,6 @@ static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R code->L(end); } -template static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { Xbyak::Label end; @@ -66,7 +64,6 @@ static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R code->L(end); } -template static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { Xbyak::Label end; @@ -80,7 +77,6 @@ static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 code->L(end); } -template static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { Xbyak::Label end; @@ -123,7 +119,6 @@ static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_ code->pand(xmm_value, xmm_scratch); } -template static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -132,12 +127,12 @@ static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); if (ctx.FPSCR_FTZ()) { - DenormalsAreZero32(code, result, gpr_scratch); - DenormalsAreZero32(code, operand, gpr_scratch); + DenormalsAreZero32(code, result, gpr_scratch); + DenormalsAreZero32(code, operand, gpr_scratch); } (code->*fn)(result, operand); if (ctx.FPSCR_FTZ()) { - FlushToZero32(code, result, gpr_scratch); + FlushToZero32(code, result, gpr_scratch); } if (ctx.FPSCR_DN()) { DefaultNaN32(code, result); @@ -146,7 +141,6 @@ static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi ctx.reg_alloc.DefineValue(inst, result); } -template static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -155,12 +149,12 @@ static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, result, gpr_scratch); - DenormalsAreZero64(code, operand, gpr_scratch); + DenormalsAreZero64(code, result, gpr_scratch); + DenormalsAreZero64(code, operand, gpr_scratch); } (code->*fn)(result, operand); if (ctx.FPSCR_FTZ()) { - FlushToZero64(code, result, gpr_scratch); + FlushToZero64(code, result, gpr_scratch); } if (ctx.FPSCR_DN()) { DefaultNaN64(code, result); @@ -169,7 +163,6 @@ static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi ctx.reg_alloc.DefineValue(inst, result); } -template static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -177,12 +170,12 @@ static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); if (ctx.FPSCR_FTZ()) { - DenormalsAreZero32(code, result, gpr_scratch); + DenormalsAreZero32(code, result, gpr_scratch); } (code->*fn)(result, result); if (ctx.FPSCR_FTZ()) { - FlushToZero32(code, result, gpr_scratch); + FlushToZero32(code, result, gpr_scratch); } if (ctx.FPSCR_DN()) { DefaultNaN32(code, result); @@ -191,7 +184,6 @@ static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void ctx.reg_alloc.DefineValue(inst, result); } -template static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -199,12 +191,12 @@ static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, result, gpr_scratch); + DenormalsAreZero64(code, result, gpr_scratch); } (code->*fn)(result, result); if (ctx.FPSCR_FTZ()) { - FlushToZero64(code, result, gpr_scratch); + FlushToZero64(code, result, gpr_scratch); } if (ctx.FPSCR_DN()) { DefaultNaN64(code, result); @@ -213,8 +205,7 @@ static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -223,8 +214,7 @@ void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -233,8 +223,7 @@ void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -243,8 +232,7 @@ void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -253,57 +241,46 @@ void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::addss); +void EmitX64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::addss); } -template -void EmitX64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::addsd); +void EmitX64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::addsd); } -template -void EmitX64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::divss); +void EmitX64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::divss); } -template -void EmitX64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::divsd); +void EmitX64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::divsd); } -template -void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::mulss); +void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::mulss); } -template -void EmitX64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::mulsd); +void EmitX64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::mulsd); } -template -void EmitX64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) { - FPTwoOp32(code, ctx, inst, &Xbyak::CodeGenerator::sqrtss); +void EmitX64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) { + FPTwoOp32(code, ctx, inst, &Xbyak::CodeGenerator::sqrtss); } -template -void EmitX64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) { - FPTwoOp64(code, ctx, inst, &Xbyak::CodeGenerator::sqrtsd); +void EmitX64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) { + FPTwoOp64(code, ctx, inst, &Xbyak::CodeGenerator::sqrtsd); } -template -void EmitX64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::subss); +void EmitX64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::subss); } -template -void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) { - FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd); +void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd); } -template static void SetFpscrNzcvFromFlags(BlockOfCode* code, EmitContext& ctx) { ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -316,8 +293,7 @@ static void SetFpscrNzcvFromFlags(BlockOfCode* code, EmitContext& ctx) { code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv); } -template -void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm reg_a = ctx.reg_alloc.UseXmm(args[0]); Xbyak::Xmm reg_b = ctx.reg_alloc.UseXmm(args[1]); @@ -329,11 +305,10 @@ void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { code->ucomiss(reg_a, reg_b); } - SetFpscrNzcvFromFlags(code, ctx); + SetFpscrNzcvFromFlags(code, ctx); } -template -void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm reg_a = ctx.reg_alloc.UseXmm(args[0]); Xbyak::Xmm reg_b = ctx.reg_alloc.UseXmm(args[1]); @@ -345,21 +320,20 @@ void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { code->ucomisd(reg_a, reg_b); } - SetFpscrNzcvFromFlags(code, ctx); + SetFpscrNzcvFromFlags(code, ctx); } -template -void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); if (ctx.FPSCR_FTZ()) { - DenormalsAreZero32(code, result, gpr_scratch.cvt32()); + DenormalsAreZero32(code, result, gpr_scratch.cvt32()); } code->cvtss2sd(result, result); if (ctx.FPSCR_FTZ()) { - FlushToZero64(code, result, gpr_scratch); + FlushToZero64(code, result, gpr_scratch); } if (ctx.FPSCR_DN()) { DefaultNaN64(code, result); @@ -368,18 +342,17 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, result, gpr_scratch); + DenormalsAreZero64(code, result, gpr_scratch); } code->cvtsd2ss(result, result); if (ctx.FPSCR_FTZ()) { - FlushToZero32(code, result, gpr_scratch.cvt32()); + FlushToZero32(code, result, gpr_scratch.cvt32()); } if (ctx.FPSCR_DN()) { DefaultNaN32(code, result); @@ -388,8 +361,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -400,7 +372,7 @@ void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { // Conversion to double is lossless, and allows for clamping. if (ctx.FPSCR_FTZ()) { - DenormalsAreZero32(code, from, to); + DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); // First time is to set flags @@ -423,8 +395,7 @@ void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, to); } -template -void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -440,7 +411,7 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { if (!ctx.FPSCR_RoundTowardsZero() && !round_towards_zero) { if (ctx.FPSCR_FTZ()) { - DenormalsAreZero32(code, from, to); + DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); @@ -460,7 +431,7 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32(); if (ctx.FPSCR_FTZ()) { - DenormalsAreZero32(code, from, to); + DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); @@ -486,8 +457,7 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, to); } -template -void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -498,7 +468,7 @@ void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { // ARM saturates on conversion; this differs from x64 which returns a sentinel value. if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, from, gpr_scratch.cvt64()); + DenormalsAreZero64(code, from, gpr_scratch.cvt64()); } // First time is to set flags if (round_towards_zero) { @@ -520,8 +490,7 @@ void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, to); } -template -void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -535,7 +504,7 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { if (!ctx.FPSCR_RoundTowardsZero() && !round_towards_zero) { if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, from, gpr_scratch.cvt64()); + DenormalsAreZero64(code, from, gpr_scratch.cvt64()); } ZeroIfNaN64(code, from, xmm_scratch); // Bring into SSE range @@ -554,7 +523,7 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32(); if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, from, gpr_scratch.cvt64()); + DenormalsAreZero64(code, from, gpr_scratch.cvt64()); } ZeroIfNaN64(code, from, xmm_scratch); // Generate masks if out-of-signed-range @@ -579,8 +548,7 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, to); } -template -void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32(); Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); @@ -592,8 +560,7 @@ void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, to); } -template -void EmitX64::EmitFPU32ToSingle(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPU32ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); @@ -607,8 +574,7 @@ void EmitX64::EmitFPU32ToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, to); } -template -void EmitX64::EmitFPS32ToDouble(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPS32ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32(); Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); @@ -620,8 +586,7 @@ void EmitX64::EmitFPS32ToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, to); } -template -void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); @@ -637,9 +602,3 @@ void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) { } // namespace BackendX64 } // namespace Dynarmic - -#include "backend_x64/a32_jitstate.h" -#include "backend_x64/a64_jitstate.h" - -template class Dynarmic::BackendX64::EmitX64; -template class Dynarmic::BackendX64::EmitX64; diff --git a/src/backend_x64/emit_x64_packed.cpp b/src/backend_x64/emit_x64_packed.cpp index 34e78fb4..30e31a26 100644 --- a/src/backend_x64/emit_x64_packed.cpp +++ b/src/backend_x64/emit_x64_packed.cpp @@ -17,8 +17,7 @@ namespace BackendX64 { using namespace Xbyak::util; -template -void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -45,8 +44,7 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -73,8 +71,7 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -116,8 +113,7 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -144,8 +140,7 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -168,8 +163,7 @@ void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -196,8 +190,7 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -236,8 +229,7 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -264,8 +256,7 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsInXmm() || args[1].IsInXmm()) { @@ -308,8 +299,7 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsInXmm() || args[1].IsInXmm()) { @@ -347,8 +337,7 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -377,8 +366,7 @@ void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -399,8 +387,7 @@ void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -430,8 +417,7 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, minuend); } -template -void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -467,8 +453,7 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, minuend); } -template -void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -491,8 +476,7 @@ void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, minuend); } -template -void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -588,43 +572,35 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool ctx.reg_alloc.DefineValue(inst, reg_a_hi); } -template -void EmitX64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) { EmitPackedSubAdd(code, ctx, inst, true, false, false); } -template -void EmitX64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) { EmitPackedSubAdd(code, ctx, inst, true, true, false); } -template -void EmitX64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) { EmitPackedSubAdd(code, ctx, inst, false, false, false); } -template -void EmitX64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) { EmitPackedSubAdd(code, ctx, inst, false, true, false); } -template -void EmitX64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) { EmitPackedSubAdd(code, ctx, inst, true, false, true); } -template -void EmitX64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) { EmitPackedSubAdd(code, ctx, inst, true, true, true); } -template -void EmitX64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) { EmitPackedSubAdd(code, ctx, inst, false, false, true); } -template -void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) { EmitPackedSubAdd(code, ctx, inst, false, true, true); } @@ -639,53 +615,43 @@ static void EmitPackedOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* i ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) { EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddusb); } -template -void EmitX64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) { EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddsb); } -template -void EmitX64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) { EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubusb); } -template -void EmitX64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) { EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubsb); } -template -void EmitX64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) { EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddusw); } -template -void EmitX64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) { EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddsw); } -template -void EmitX64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) { EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubusw); } -template -void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) { EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubsw); } -template -void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) { EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psadbw); } -template -void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm(); @@ -726,9 +692,3 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { } // namespace BackendX64 } // namespace Dynarmic - -#include "backend_x64/a32_jitstate.h" -#include "backend_x64/a64_jitstate.h" - -template class Dynarmic::BackendX64::EmitX64; -template class Dynarmic::BackendX64::EmitX64; diff --git a/src/backend_x64/emit_x64_saturation.cpp b/src/backend_x64/emit_x64_saturation.cpp index 4bd4ee7f..72ac906d 100644 --- a/src/backend_x64/emit_x64_saturation.cpp +++ b/src/backend_x64/emit_x64_saturation.cpp @@ -18,8 +18,7 @@ namespace BackendX64 { using namespace Xbyak::util; -template -void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -45,8 +44,7 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -72,8 +70,7 @@ void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -103,8 +100,7 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -155,9 +151,3 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { } // namespace BackendX64 } // namespace Dynarmic - -#include "backend_x64/a32_jitstate.h" -#include "backend_x64/a64_jitstate.h" - -template class Dynarmic::BackendX64::EmitX64; -template class Dynarmic::BackendX64::EmitX64; diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 2621a6a7..ef23f212 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -28,33 +28,27 @@ static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* i ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitVectorAdd8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorAdd8(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddb); } -template -void EmitX64::EmitVectorAdd16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorAdd16(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddw); } -template -void EmitX64::EmitVectorAdd32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorAdd32(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddd); } -template -void EmitX64::EmitVectorAdd64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorAdd64(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddq); } -template -void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pand); } -template -void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -72,8 +66,7 @@ void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitVectorLowerPairedAdd16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLowerPairedAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -96,8 +89,7 @@ void EmitX64::EmitVectorLowerPairedAdd16(EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitVectorLowerPairedAdd32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLowerPairedAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -119,8 +111,7 @@ void EmitX64::EmitVectorLowerPairedAdd32(EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(inst, xmm_a); } -template -void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -141,8 +132,7 @@ void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } -template -void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { @@ -172,8 +162,7 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { @@ -201,8 +190,7 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { } } -template -void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -219,9 +207,3 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { } // namespace BackendX64 } // namespace Dynarmic - -#include "backend_x64/a32_jitstate.h" -#include "backend_x64/a64_jitstate.h" - -template class Dynarmic::BackendX64::EmitX64; -template class Dynarmic::BackendX64::EmitX64;