diff --git a/src/backend/x64/emit_x64_data_processing.cpp b/src/backend/x64/emit_x64_data_processing.cpp index 6d847670..5a291758 100644 --- a/src/backend/x64/emit_x64_data_processing.cpp +++ b/src/backend/x64/emit_x64_data_processing.cpp @@ -4,6 +4,9 @@ * General Public License version 2 or any later version. */ +#include +#include + #include "backend/x64/block_of_code.h" #include "backend/x64/emit_x64.h" #include "common/assert.h" @@ -725,6 +728,112 @@ void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +template +static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + const u8 shift = shift_arg.GetImmediateU8(); + + shift_fn(result, shift & 0x1F); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + if constexpr (!std::is_same_v) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32(); + const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(shift_arg).cvt32(); + + (code.*bmi2_shift)(result, operand, shift); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + } + + ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + + shift_fn(result, code.cl); + + ctx.reg_alloc.DefineValue(inst, result); +} + +template +static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto& operand_arg = args[0]; + auto& shift_arg = args[1]; + + if (shift_arg.IsImmediate()) { + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); + const u8 shift = shift_arg.GetImmediateU8(); + + shift_fn(result, shift & 0x3F); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + if constexpr (!std::is_same_v) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg); + const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(shift_arg); + + (code.*bmi2_shift)(result, operand, shift); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + } + + ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); + + shift_fn(result, code.cl); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitLogicalShiftLeftMasked32(EmitContext& ctx, IR::Inst* inst) { + EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx); +} + +void EmitX64::EmitLogicalShiftLeftMasked64(EmitContext& ctx, IR::Inst* inst) { + EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx); +} + +void EmitX64::EmitLogicalShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) { + EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx); +} + +void EmitX64::EmitLogicalShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) { + EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx); +} + +void EmitX64::EmitArithmeticShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) { + EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx); +} + +void EmitX64::EmitArithmeticShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) { + EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx); +} + +void EmitX64::EmitRotateRightMasked32(EmitContext& ctx, IR::Inst* inst) { + EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr); +} + +void EmitX64::EmitRotateRightMasked64(EmitContext& ctx, IR::Inst* inst) { + EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr); +} + static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) { if (carry_in.IsImmediate()) { return carry_out ? reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1}; diff --git a/src/frontend/A64/translate/impl/data_processing_shift.cpp b/src/frontend/A64/translate/impl/data_processing_shift.cpp index 158914a7..b63a861d 100644 --- a/src/frontend/A64/translate/impl/data_processing_shift.cpp +++ b/src/frontend/A64/translate/impl/data_processing_shift.cpp @@ -8,17 +8,13 @@ namespace Dynarmic::A64 { -static IR::U8 SanitizeShiftAmount(TranslatorVisitor& v, size_t datasize, const IR::U32U64& amount) { - return v.ir.LeastSignificantByte(v.ir.And(amount, v.I(datasize, datasize - 1))); -} - bool TranslatorVisitor::LSLV(bool sf, Reg Rm, Reg Rn, Reg Rd) { const size_t datasize = sf ? 64 : 32; const IR::U32U64 operand = X(datasize, Rn); const IR::U32U64 shift_amount = X(datasize, Rm); - const IR::U32U64 result = ir.LogicalShiftLeft(operand, SanitizeShiftAmount(*this, datasize, shift_amount)); + const IR::U32U64 result = ir.LogicalShiftLeftMasked(operand, shift_amount); X(datasize, Rd, result); return true; @@ -30,7 +26,7 @@ bool TranslatorVisitor::LSRV(bool sf, Reg Rm, Reg Rn, Reg Rd) { const IR::U32U64 operand = X(datasize, Rn); const IR::U32U64 shift_amount = X(datasize, Rm); - const IR::U32U64 result = ir.LogicalShiftRight(operand, SanitizeShiftAmount(*this, datasize, shift_amount)); + const IR::U32U64 result = ir.LogicalShiftRightMasked(operand, shift_amount); X(datasize, Rd, result); return true; @@ -42,7 +38,7 @@ bool TranslatorVisitor::ASRV(bool sf, Reg Rm, Reg Rn, Reg Rd) { const IR::U32U64 operand = X(datasize, Rn); const IR::U32U64 shift_amount = X(datasize, Rm); - const IR::U32U64 result = ir.ArithmeticShiftRight(operand, SanitizeShiftAmount(*this, datasize, shift_amount)); + const IR::U32U64 result = ir.ArithmeticShiftRightMasked(operand, shift_amount); X(datasize, Rd, result); return true; @@ -54,7 +50,7 @@ bool TranslatorVisitor::RORV(bool sf, Reg Rm, Reg Rn, Reg Rd) { const IR::U32U64 operand = X(datasize, Rn); const IR::U32U64 shift_amount = X(datasize, Rm); - const IR::U32U64 result = ir.RotateRight(operand, SanitizeShiftAmount(*this, datasize, shift_amount)); + const IR::U32U64 result = ir.RotateRightMasked(operand, shift_amount); X(datasize, Rd, result); return true; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index b6c93a3f..c00d3758 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -185,6 +185,42 @@ U32U64 IREmitter::RotateRight(const U32U64& value_in, const U8& shift_amount) { } } +U32U64 IREmitter::LogicalShiftLeftMasked(const U32U64& value_in, const U32U64& shift_amount) { + ASSERT(value_in.GetType() == shift_amount.GetType()); + if (value_in.GetType() == Type::U32) { + return Inst(Opcode::LogicalShiftLeftMasked32, value_in, shift_amount); + } else { + return Inst(Opcode::LogicalShiftLeftMasked64, value_in, shift_amount); + } +} + +U32U64 IREmitter::LogicalShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount) { + ASSERT(value_in.GetType() == shift_amount.GetType()); + if (value_in.GetType() == Type::U32) { + return Inst(Opcode::LogicalShiftRightMasked32, value_in, shift_amount); + } else { + return Inst(Opcode::LogicalShiftRightMasked64, value_in, shift_amount); + } +} + +U32U64 IREmitter::ArithmeticShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount) { + ASSERT(value_in.GetType() == shift_amount.GetType()); + if (value_in.GetType() == Type::U32) { + return Inst(Opcode::ArithmeticShiftRightMasked32, value_in, shift_amount); + } else { + return Inst(Opcode::ArithmeticShiftRightMasked64, value_in, shift_amount); + } +} + +U32U64 IREmitter::RotateRightMasked(const U32U64& value_in, const U32U64& shift_amount) { + ASSERT(value_in.GetType() == shift_amount.GetType()); + if (value_in.GetType() == Type::U32) { + return Inst(Opcode::RotateRightMasked32, value_in, shift_amount); + } else { + return Inst(Opcode::RotateRightMasked64, value_in, shift_amount); + } +} + ResultAndCarryAndOverflow IREmitter::AddWithCarry(const U32& a, const U32& b, const U1& carry_in) { const auto result = Inst(Opcode::Add32, a, b, carry_in); const auto carry_out = Inst(Opcode::GetCarryFromOp, result); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 940a61e0..f984d039 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -114,6 +114,10 @@ public: U32U64 LogicalShiftRight(const U32U64& value_in, const U8& shift_amount); U32U64 ArithmeticShiftRight(const U32U64& value_in, const U8& shift_amount); U32U64 RotateRight(const U32U64& value_in, const U8& shift_amount); + U32U64 LogicalShiftLeftMasked(const U32U64& value_in, const U32U64& shift_amount); + U32U64 LogicalShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount); + U32U64 ArithmeticShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount); + U32U64 RotateRightMasked(const U32U64& value_in, const U32U64& shift_amount); ResultAndCarry RotateRightExtended(const U32& value_in, const U1& carry_in); ResultAndCarryAndOverflow AddWithCarry(const U32& a, const U32& b, const U1& carry_in); ResultAndCarryAndOverflow SubWithCarry(const U32& a, const U32& b, const U1& carry_in); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 8df11d9e..a2a90a49 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -113,6 +113,14 @@ OPCODE(ArithmeticShiftRight64, U64, U64, OPCODE(RotateRight32, U32, U32, U8, U1 ) OPCODE(RotateRight64, U64, U64, U8 ) OPCODE(RotateRightExtended, U32, U32, U1 ) +OPCODE(LogicalShiftLeftMasked32, U32, U32, U32 ) +OPCODE(LogicalShiftLeftMasked64, U64, U64, U64 ) +OPCODE(LogicalShiftRightMasked32, U32, U32, U32 ) +OPCODE(LogicalShiftRightMasked64, U64, U64, U64 ) +OPCODE(ArithmeticShiftRightMasked32, U32, U32, U32 ) +OPCODE(ArithmeticShiftRightMasked64, U64, U64, U64 ) +OPCODE(RotateRightMasked32, U32, U32, U32 ) +OPCODE(RotateRightMasked64, U64, U64, U64 ) OPCODE(Add32, U32, U32, U32, U1 ) OPCODE(Add64, U64, U64, U64, U1 ) OPCODE(Sub32, U32, U32, U32, U1 )