diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9c174d82..21a053a7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -101,6 +101,7 @@ add_library(dynarmic frontend/ir/value.h ir_opt/a32_constant_memory_reads_pass.cpp ir_opt/a32_get_set_elimination_pass.cpp + ir_opt/a64_get_set_elimination_pass.cpp ir_opt/a64_merge_interpret_blocks.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp diff --git a/src/backend_x64/a64_interface.cpp b/src/backend_x64/a64_interface.cpp index 233700f4..fa88aa46 100644 --- a/src/backend_x64/a64_interface.cpp +++ b/src/backend_x64/a64_interface.cpp @@ -181,6 +181,7 @@ private: // JIT Compile IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }); + Optimization::A64GetSetElimination(ir_block); Optimization::DeadCodeElimination(ir_block); Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); // printf("%s\n", IR::DumpBlock(ir_block).c_str()); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index e5cbb981..1b90a513 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -114,6 +114,7 @@ bool Inst::ReadsFromCPSR() const { case Opcode::A32GetCFlag: case Opcode::A32GetVFlag: case Opcode::A32GetGEFlags: + case Opcode::A64GetCFlag: case Opcode::ConditionalSelect32: case Opcode::ConditionalSelect64: return true; diff --git a/src/ir_opt/a64_get_set_elimination_pass.cpp b/src/ir_opt/a64_get_set_elimination_pass.cpp new file mode 100644 index 00000000..3cbd78f9 --- /dev/null +++ b/src/ir_opt/a64_get_set_elimination_pass.cpp @@ -0,0 +1,149 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "frontend/ir/basic_block.h" +#include "frontend/ir/ir_emitter.h" +#include "frontend/ir/value.h" +#include "ir_opt/passes.h" + +namespace Dynarmic::Optimization { + +void A64GetSetElimination(IR::Block& block) { + using Iterator = IR::Block::iterator; + struct RegisterInfo { + IR::Value register_value; + bool set_instruction_present = false; + Iterator last_set_instruction; + }; + std::array reg_info; + std::array vec_info; + RegisterInfo sp_info; + RegisterInfo nzcv_info; + + const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + + info.register_value = value; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; + }; + + const auto do_get = [&block](RegisterInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + + if (!info.set_instruction_present) { + static const std::vector ordering { + IR::Opcode::A64GetW, + IR::Opcode::A64GetX, + IR::Opcode::A64GetS, + IR::Opcode::A64GetD, + IR::Opcode::A64GetQ, + }; + const auto source_order = std::find(ordering.begin(), ordering.end(), info.register_value.GetInst()->GetOpcode()); + const auto dest_order = std::find(ordering.begin(), ordering.end(), get_inst->GetOpcode()); + if (source_order < dest_order) { + // Zero extension of the value is not appropriate in this case. + // Replace currently known value with the new value. + info.register_value = IR::Value(&*get_inst); + return; + } + } + + if (get_inst->GetType() == info.register_value.GetType()) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + + const IR::Value replacement = [&]() -> IR::Value { + IR::IREmitter ir{block}; + ir.SetInsertionPoint(get_inst); + + const IR::UAny value_to_convert{info.register_value}; + switch (get_inst->GetType()) { + case IR::Type::U8: + return ir.LeastSignificantByte(ir.ZeroExtendToWord(value_to_convert)); + case IR::Type::U16: + return ir.LeastSignificantHalf(ir.ZeroExtendToWord(value_to_convert)); + case IR::Type::U32: + return ir.ZeroExtendToWord(value_to_convert); + case IR::Type::U64: + return ir.ZeroExtendToLong(value_to_convert); + case IR::Type::U128: + return ir.ZeroExtendToQuad(value_to_convert); + default: + UNREACHABLE(); + return {}; + } + }(); + get_inst->ReplaceUsesWith(replacement); + }; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + switch (inst->GetOpcode()) { + case IR::Opcode::A64GetW: + case IR::Opcode::A64GetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst); + break; + } + case IR::Opcode::A64GetS: + case IR::Opcode::A64GetD: + case IR::Opcode::A64GetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst); + break; + } + case IR::Opcode::A64GetSP: { + do_get(sp_info, inst); + break; + } + case IR::Opcode::A64SetW: + case IR::Opcode::A64SetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst); + break; + } + case IR::Opcode::A64SetS: + case IR::Opcode::A64SetD: + case IR::Opcode::A64SetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst); + break; + } + case IR::Opcode::A64SetSP: { + do_set(sp_info, inst->GetArg(0), inst); + break; + } + case IR::Opcode::A64SetNZCV: { + do_set(nzcv_info, inst->GetArg(0), inst); + break; + } + default: { + if (inst->ReadsFromCPSR() || inst->WritesToCPSR()) { + nzcv_info = {}; + } + if (inst->ReadsFromCoreRegister() || inst->WritesToCoreRegister()) { + reg_info = {}; + vec_info = {}; + sp_info = {}; + } + break; + } + } + } +} + +} // namespace Dynarmic::Optimization diff --git a/src/ir_opt/passes.h b/src/ir_opt/passes.h index 14a45656..cc685ff3 100644 --- a/src/ir_opt/passes.h +++ b/src/ir_opt/passes.h @@ -17,6 +17,7 @@ namespace Dynarmic::Optimization { void A32GetSetElimination(IR::Block& block); void A32ConstantMemoryReads(IR::Block& block, const A32::UserCallbacks::Memory& memory_callbacks); +void A64GetSetElimination(IR::Block& block); void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); void ConstantPropagation(IR::Block& block); void DeadCodeElimination(IR::Block& block);