From 08b123feb57582a27b15f407e9969be4a8e57892 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 6 Aug 2022 14:55:44 +0100 Subject: [PATCH] IR: Modify VectorSignedSaturatedShiftLeftUnsigned to only accept immediate shift amounts --- src/dynarmic/backend/x64/emit_x64_vector.cpp | 47 +++++++++++++------ .../translate/impl/asimd_two_regs_shift.cpp | 2 +- .../impl/simd_scalar_shift_by_immediate.cpp | 4 +- .../impl/simd_shift_by_immediate.cpp | 2 +- src/dynarmic/ir/ir_emitter.cpp | 11 +++-- src/dynarmic/ir/ir_emitter.h | 2 +- src/dynarmic/ir/opcodes.inc | 8 ++-- 7 files changed, 48 insertions(+), 28 deletions(-) diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index 11c7ee32..b402b217 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -129,6 +129,33 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext ctx.reg_alloc.DefineValue(inst, result); } +template +static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { + const auto fn = static_cast*>(lambda); + constexpr u32 stack_space = 2 * 16; + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); + const u8 arg2 = args[1].GetImmediateU8(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + ctx.reg_alloc.EndOfAllocScope(); + + ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); + + code.movaps(xword[code.ABI_PARAM2], arg1); + code.mov(code.ABI_PARAM3, arg2); + code.CallFunction(fn); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + + ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); + + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + + ctx.reg_alloc.DefineValue(inst, result); +} + template static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { const auto fn = static_cast*>(lambda); @@ -4436,27 +4463,19 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* i } template> -static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray& dst, const VectorArray& data, const VectorArray& shift_values) { +static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray& dst, const VectorArray& data, u8 shift_amount) { static_assert(std::is_signed_v, "T must be signed."); - constexpr size_t bit_size_minus_one = mcl::bitsizeof - 1; - bool qc_flag = false; for (size_t i = 0; i < dst.size(); i++) { const T element = data[i]; - const T shift = std::clamp(static_cast(mcl::bit::sign_extend<8>(static_cast(shift_values[i] & 0xFF))), - -static_cast(bit_size_minus_one), std::numeric_limits::max()); + const T shift = static_cast(shift_amount); if (element == 0) { dst[i] = 0; } else if (element < 0) { dst[i] = 0; qc_flag = true; - } else if (shift < 0) { - dst[i] = static_cast(element >> -shift); - } else if (static_cast(shift) > bit_size_minus_one) { - dst[i] = static_cast(std::numeric_limits::max()); - qc_flag = true; } else { const U shifted = static_cast(element) << static_cast(shift); const U shifted_test = shifted >> static_cast(shift); @@ -4474,19 +4493,19 @@ static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray& dst, const Ve } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned8(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned16(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned32(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned64(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); } void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp index aec91d0c..96c18b4b 100644 --- a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp @@ -236,7 +236,7 @@ bool TranslatorVisitor::asimd_VQSHL(bool U, bool D, size_t imm6, size_t Vd, bool return ir.VectorUnsignedSaturatedShiftLeft(esize, reg_m, shift_vec); } - return ir.VectorSignedSaturatedShiftLeftUnsigned(esize, reg_m, shift_vec); + return ir.VectorSignedSaturatedShiftLeftUnsigned(esize, reg_m, shift_amount); } if (op) { return ir.VectorSignedSaturatedShiftLeft(esize, reg_m, shift_vec); diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp index 042213de..00f5e993 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp @@ -47,7 +47,7 @@ bool SaturatingShiftLeft(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, const IR::U128 operand = v.ir.ZeroExtendToQuad(v.V_scalar(esize, Vn)); const IR::U128 shift = v.ir.ZeroExtendToQuad(v.I(esize, shift_amount)); - const IR::U128 result = [&v, esize, operand, shift, type] { + const IR::U128 result = [&v, esize, operand, shift, type, shift_amount] { if (type == SaturatingShiftLeftType::Signed) { return v.ir.VectorSignedSaturatedShiftLeft(esize, operand, shift); } @@ -56,7 +56,7 @@ bool SaturatingShiftLeft(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, return v.ir.VectorUnsignedSaturatedShiftLeft(esize, operand, shift); } - return v.ir.VectorSignedSaturatedShiftLeftUnsigned(esize, operand, shift); + return v.ir.VectorSignedSaturatedShiftLeftUnsigned(esize, operand, shift_amount); }(); v.ir.SetQ(Vd, result); diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp index ee36423f..b1c2593a 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp @@ -182,7 +182,7 @@ bool SaturatingShiftLeft(TranslatorVisitor& v, bool Q, Imm<4> immh, Imm<3> immb, return v.ir.VectorUnsignedSaturatedShiftLeft(esize, operand, shift_vec); } - return v.ir.VectorSignedSaturatedShiftLeftUnsigned(esize, operand, shift_vec); + return v.ir.VectorSignedSaturatedShiftLeftUnsigned(esize, operand, shift); }(); v.V(datasize, Vd, result); diff --git a/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/ir/ir_emitter.cpp index 1901647e..9d1746b3 100644 --- a/src/dynarmic/ir/ir_emitter.cpp +++ b/src/dynarmic/ir/ir_emitter.cpp @@ -1858,16 +1858,17 @@ U128 IREmitter::VectorSignedSaturatedShiftLeft(size_t esize, const U128& a, cons UNREACHABLE(); } -U128 IREmitter::VectorSignedSaturatedShiftLeftUnsigned(size_t esize, const U128& a, const U128& b) { +U128 IREmitter::VectorSignedSaturatedShiftLeftUnsigned(size_t esize, const U128& a, u8 shift_amount) { + ASSERT(shift_amount < esize); switch (esize) { case 8: - return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned8, a, b); + return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned8, a, Imm8(shift_amount)); case 16: - return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned16, a, b); + return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned16, a, Imm8(shift_amount)); case 32: - return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned32, a, b); + return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned32, a, Imm8(shift_amount)); case 64: - return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned64, a, b); + return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned64, a, Imm8(shift_amount)); } UNREACHABLE(); } diff --git a/src/dynarmic/ir/ir_emitter.h b/src/dynarmic/ir/ir_emitter.h index 0152ce31..6f6b499c 100644 --- a/src/dynarmic/ir/ir_emitter.h +++ b/src/dynarmic/ir/ir_emitter.h @@ -304,7 +304,7 @@ public: U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNeg(size_t esize, const U128& a); U128 VectorSignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b); - U128 VectorSignedSaturatedShiftLeftUnsigned(size_t esize, const U128& a, const U128& b); + U128 VectorSignedSaturatedShiftLeftUnsigned(size_t esize, const U128& a, u8 shift_amount); U128 VectorSub(size_t esize, const U128& a, const U128& b); Table VectorTable(std::vector values); Table VectorTable(std::vector values); diff --git a/src/dynarmic/ir/opcodes.inc b/src/dynarmic/ir/opcodes.inc index f48afa63..1af47b0a 100644 --- a/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/ir/opcodes.inc @@ -505,10 +505,10 @@ OPCODE(VectorSignedSaturatedShiftLeft8, U128, U128 OPCODE(VectorSignedSaturatedShiftLeft16, U128, U128, U128 ) OPCODE(VectorSignedSaturatedShiftLeft32, U128, U128, U128 ) OPCODE(VectorSignedSaturatedShiftLeft64, U128, U128, U128 ) -OPCODE(VectorSignedSaturatedShiftLeftUnsigned8, U128, U128, U128 ) -OPCODE(VectorSignedSaturatedShiftLeftUnsigned16, U128, U128, U128 ) -OPCODE(VectorSignedSaturatedShiftLeftUnsigned32, U128, U128, U128 ) -OPCODE(VectorSignedSaturatedShiftLeftUnsigned64, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedShiftLeftUnsigned8, U128, U128, U8 ) +OPCODE(VectorSignedSaturatedShiftLeftUnsigned16, U128, U128, U8 ) +OPCODE(VectorSignedSaturatedShiftLeftUnsigned32, U128, U128, U8 ) +OPCODE(VectorSignedSaturatedShiftLeftUnsigned64, U128, U128, U8 ) OPCODE(VectorSub8, U128, U128, U128 ) OPCODE(VectorSub16, U128, U128, U128 ) OPCODE(VectorSub32, U128, U128, U128 )