From 7d3b4f913b52d563eb6067ca0ea709468fe03de0 Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 5 Feb 2023 17:19:08 +0000 Subject: [PATCH] a32_get_set_elimination_pass: Fix bugs in A32 get/set algorithm --- .../arm64/verbose_debugging_output.cpp | 8 +- .../ir/opt/a32_get_set_elimination_pass.cpp | 341 +++++++++++------- tests/test_generator.cpp | 40 +- 3 files changed, 234 insertions(+), 155 deletions(-) diff --git a/src/dynarmic/backend/arm64/verbose_debugging_output.cpp b/src/dynarmic/backend/arm64/verbose_debugging_output.cpp index e9646885..aec0472f 100644 --- a/src/dynarmic/backend/arm64/verbose_debugging_output.cpp +++ b/src/dynarmic/backend/arm64/verbose_debugging_output.cpp @@ -17,8 +17,6 @@ using namespace oaknut::util; void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx) { code.SUB(SP, SP, sizeof(RegisterData)); - code.MRS(X0, oaknut::SystemReg::FPSR); - code.STR(X0, SP, offsetof(RegisterData, fpsr)); for (int i = 0; i < 30; i++) { if (i == 18) { continue; // Platform register @@ -32,9 +30,13 @@ void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx) { code.STR(X0, SP, offsetof(RegisterData, nzcv)); code.ADD(X0, SP, sizeof(RegisterData) + offsetof(StackLayout, spill)); code.STR(X0, SP, offsetof(RegisterData, spill)); + code.MRS(X0, oaknut::SystemReg::FPSR); + code.STR(X0, SP, offsetof(RegisterData, fpsr)); ctx.reg_alloc.EmitVerboseDebuggingOutput(); + code.LDR(X0, SP, offsetof(RegisterData, fpsr)); + code.MSR(oaknut::SystemReg::FPSR, X0); code.LDR(X0, SP, offsetof(RegisterData, nzcv)); code.MSR(oaknut::SystemReg::NZCV, X0); for (int i = 0; i < 32; i++) { @@ -46,8 +48,6 @@ void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx) { } code.LDR(oaknut::XReg{i}, SP, offsetof(RegisterData, x) + i * sizeof(u64)); } - code.LDR(X0, SP, offsetof(RegisterData, fpsr)); - code.MSR(oaknut::SystemReg::FPSR, X0); code.ADD(SP, SP, sizeof(RegisterData)); } diff --git a/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp index 95a020b7..5d7eb082 100644 --- a/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ b/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp @@ -19,52 +19,45 @@ namespace Dynarmic::Optimization { -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { +namespace { + +void FlagsPass(IR::Block& block) { using Iterator = std::reverse_iterator; - struct RegisterInfo { + struct FlagInfo { bool set_not_required = false; bool has_value_request = false; Iterator value_request = {}; }; - struct ValuelessRegisterInfo { + struct ValuelessFlagInfo { bool set_not_required = false; }; - std::array reg_info; - std::array ext_reg_singles_info; - std::array ext_reg_doubles_info; - std::array ext_reg_vector_double_info; - std::array ext_reg_vector_quad_info; - ValuelessRegisterInfo nzcvq; - ValuelessRegisterInfo nzcv; - ValuelessRegisterInfo nz; - RegisterInfo c_flag; - RegisterInfo ge; + ValuelessFlagInfo nzcvq; + ValuelessFlagInfo nzcv; + ValuelessFlagInfo nz; + FlagInfo c_flag; + FlagInfo ge; - auto do_set = [&](RegisterInfo& info, IR::Value value, Iterator inst, std::initializer_list> dependants = {}) { + auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { if (info.has_value_request) { info.value_request->ReplaceUsesWith(value); } info.has_value_request = false; - if (info.set_not_required && std::all_of(dependants.begin(), dependants.end(), [](auto d) { return !d.get().has_value_request; })) { - inst->Invalidate(); - } - info.set_not_required = true; - - for (auto d : dependants) { - d.get() = {}; - } - }; - - auto do_set_valueless = [&](ValuelessRegisterInfo& info, Iterator inst) { if (info.set_not_required) { inst->Invalidate(); } info.set_not_required = true; }; - auto do_get = [](RegisterInfo& info, Iterator inst) { + auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_get = [](FlagInfo& info, Iterator inst) { if (info.has_value_request) { info.value_request->ReplaceUsesWith(IR::Value{&*inst}); } @@ -76,107 +69,6 @@ void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { switch (inst->GetOpcode()) { - case IR::Opcode::A32SetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - if (reg == A32::Reg::PC) { - break; - } - const auto reg_index = static_cast(reg); - do_set(reg_info[reg_index], inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - ASSERT(reg != A32::Reg::PC); - const size_t reg_index = static_cast(reg); - do_get(reg_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_set(ext_reg_singles_info[reg_index], - inst->GetArg(1), - inst, - { - ext_reg_doubles_info[reg_index / 2], - ext_reg_vector_double_info[reg_index / 2], - ext_reg_vector_quad_info[reg_index / 4], - }); - break; - } - case IR::Opcode::A32GetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_get(ext_reg_singles_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_set(ext_reg_doubles_info[reg_index], - inst->GetArg(1), - inst, - { - ext_reg_singles_info[reg_index * 2 + 0], - ext_reg_singles_info[reg_index * 2 + 1], - ext_reg_vector_double_info[reg_index], - ext_reg_vector_quad_info[reg_index / 2], - }); - break; - } - case IR::Opcode::A32GetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_get(ext_reg_doubles_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - ir.SetInsertionPointBefore(std::prev(inst.base())); - const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); - - do_set(ext_reg_vector_double_info[reg_index], - stored_value, - inst, - { - ext_reg_singles_info[reg_index * 2 + 0], - ext_reg_singles_info[reg_index * 2 + 1], - ext_reg_doubles_info[reg_index], - ext_reg_vector_quad_info[reg_index / 2], - }); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - - do_set(ext_reg_vector_quad_info[reg_index], - inst->GetArg(1), - inst, - { - ext_reg_singles_info[reg_index * 4 + 0], - ext_reg_singles_info[reg_index * 4 + 1], - ext_reg_singles_info[reg_index * 4 + 2], - ext_reg_singles_info[reg_index * 4 + 3], - ext_reg_doubles_info[reg_index * 2 + 0], - ext_reg_doubles_info[reg_index * 2 + 1], - ext_reg_vector_double_info[reg_index * 2 + 0], - ext_reg_vector_double_info[reg_index * 2 + 1], - }); - } - break; - } - case IR::Opcode::A32GetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - do_get(ext_reg_vector_double_info[reg_index], inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_get(ext_reg_vector_quad_info[reg_index], inst); - } - break; - } case IR::Opcode::A32GetCFlag: { do_get(c_flag, inst); break; @@ -282,12 +174,192 @@ void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { c_flag = {}; ge = {}; } + break; + } + } + } +} + +void RegisterPass(IR::Block& block) { + using Iterator = IR::Block::iterator; + + struct RegInfo { + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array reg_info; + + const auto do_get = [](RegInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + get_inst->ReplaceUsesWith(info.register_value); + }; + + const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { + if (info.last_set_instruction) { + (*info.last_set_instruction)->Invalidate(); + } + info = { + .register_value = value, + .last_set_instruction = set_inst, + }; + }; + + enum class ExtValueType { + Empty, + Single, + Double, + VectorDouble, + VectorQuad, + }; + struct ExtRegInfo { + ExtValueType value_type = ExtValueType::Empty; + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array ext_reg_info; + + const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { + if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = IR::Value(&*get_inst), + .last_set_instruction = std::nullopt, + }; + } + return; + } + get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); + }; + + const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { + if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + if (std::data(infos)[0].get().last_set_instruction) { + (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); + } + } + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = value, + .last_set_instruction = set_inst, + }; + } + }; + + // Location and version don't matter here. + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + switch (inst->GetOpcode()) { + case IR::Opcode::A32GetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + ASSERT(reg != A32::Reg::PC); + const size_t reg_index = static_cast(reg); + do_get(reg_info[reg_index], inst); + break; + } + case IR::Opcode::A32SetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (reg == A32::Reg::PC) { + break; + } + const auto reg_index = static_cast(reg); + do_set(reg_info[reg_index], inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); + break; + } + case IR::Opcode::A32SetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + break; + } + case IR::Opcode::A32SetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst->GetArg(1), + inst); + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_ext_get(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_get(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst); + } + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + ir.SetInsertionPointAfter(inst); + const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); + do_ext_set(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + stored_value, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_set(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst->GetArg(1), + inst); + } + break; + } + default: { if (inst->ReadsFromCoreRegister() || inst->WritesToCoreRegister()) { reg_info = {}; - ext_reg_singles_info = {}; - ext_reg_doubles_info = {}; - ext_reg_vector_double_info = {}; - ext_reg_vector_quad_info = {}; + ext_reg_info = {}; } break; } @@ -295,4 +367,11 @@ void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { } } +} // namespace + +void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { + FlagsPass(block); + RegisterPass(block); +} + } // namespace Dynarmic::Optimization diff --git a/tests/test_generator.cpp b/tests/test_generator.cpp index 35f8ff39..90c2d635 100644 --- a/tests/test_generator.cpp +++ b/tests/test_generator.cpp @@ -406,26 +406,6 @@ void RunTestInstance(Dynarmic::A32::Jit& jit, const u32 num_words = initial_pc / sizeof(typename TestEnv::InstructionType); const u32 code_mem_size = num_words + static_cast(instructions.size()); - jit.ClearCache(); - - for (size_t jit_rerun_count = 0; jit_rerun_count < num_jit_reruns; ++jit_rerun_count) { - jit_env.code_mem.resize(code_mem_size); - std::fill(jit_env.code_mem.begin(), jit_env.code_mem.end(), TestEnv::infinite_loop); - - std::copy(instructions.begin(), instructions.end(), jit_env.code_mem.begin() + num_words); - jit_env.PadCodeMem(); - jit_env.modified_memory.clear(); - jit_env.interrupts.clear(); - - jit.Regs() = regs; - jit.ExtRegs() = vecs; - jit.SetFpscr(fpscr); - jit.SetCpsr(cpsr); - - jit_env.ticks_left = ticks_left; - jit.Run(); - } - fmt::print("instructions:"); for (auto instruction : instructions) { if constexpr (sizeof(decltype(instruction)) == 2) { @@ -449,6 +429,26 @@ void RunTestInstance(Dynarmic::A32::Jit& jit, fmt::print("initial_cpsr: {:08x}\n", cpsr); fmt::print("initial_fpcr: {:08x}\n", fpscr); + jit.ClearCache(); + + for (size_t jit_rerun_count = 0; jit_rerun_count < num_jit_reruns; ++jit_rerun_count) { + jit_env.code_mem.resize(code_mem_size); + std::fill(jit_env.code_mem.begin(), jit_env.code_mem.end(), TestEnv::infinite_loop); + + std::copy(instructions.begin(), instructions.end(), jit_env.code_mem.begin() + num_words); + jit_env.PadCodeMem(); + jit_env.modified_memory.clear(); + jit_env.interrupts.clear(); + + jit.Regs() = regs; + jit.ExtRegs() = vecs; + jit.SetFpscr(fpscr); + jit.SetCpsr(cpsr); + + jit_env.ticks_left = ticks_left; + jit.Run(); + } + fmt::print("final_regs:"); for (u32 i : jit.Regs()) { fmt::print(" {:08x}", i);