From a97105c29638217594196251bae31e00a42dd941 Mon Sep 17 00:00:00 2001 From: Merry Date: Wed, 3 Aug 2022 23:07:42 +0100 Subject: [PATCH] IR: Split VectorSignedSaturatedDoublingMultiply into VectorSignedSaturatedDoublingMultiply{High,HighRounding} --- .../backend/arm64/emit_arm64_vector.cpp | 20 ++- src/dynarmic/backend/x64/emit_x64_vector.cpp | 146 +++++++++--------- .../A32/translate/impl/asimd_three_regs.cpp | 7 +- .../translate/impl/asimd_two_regs_scalar.cpp | 18 +-- .../translate/impl/simd_scalar_three_same.cpp | 3 +- .../impl/simd_scalar_x_indexed_element.cpp | 3 +- .../A64/translate/impl/simd_three_same.cpp | 5 +- .../impl/simd_vector_x_indexed_element.cpp | 5 +- src/dynarmic/ir/ir_emitter.cpp | 34 ++-- src/dynarmic/ir/ir_emitter.h | 3 +- src/dynarmic/ir/microinstruction.cpp | 6 +- src/dynarmic/ir/opcodes.inc | 6 +- 12 files changed, 131 insertions(+), 125 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp index 12dd98f5..4abaeeb9 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp @@ -1359,7 +1359,7 @@ void EmitIR(oaknut::CodeG } template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { (void)code; (void)ctx; (void)inst; @@ -1367,7 +1367,23 @@ void EmitIR(oaknut::CodeGen } template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { (void)code; (void)ctx; (void)inst; diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index c7293e02..63d17e67 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -3812,10 +3812,8 @@ void EmitX64::EmitVectorSignedSaturatedAccumulateUnsigned64(EmitContext& ctx, IR EmitVectorSignedSaturatedAccumulateUnsigned<64>(code, ctx, inst); } -void EmitX64::EmitVectorSignedSaturatedDoublingMultiply16(EmitContext& ctx, IR::Inst* inst) { - const auto upper_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetUpperFromOp); - const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); - +template +static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); @@ -3839,52 +3837,53 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply16(EmitContext& ctx, IR:: ctx.reg_alloc.Release(x); ctx.reg_alloc.Release(y); - if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(); - - if (code.HasHostFeature(HostFeature::AVX)) { - code.vpaddw(lower_result, lower_tmp, lower_tmp); - } else { - code.movdqa(lower_result, lower_tmp); - code.paddw(lower_result, lower_result); - } - - ctx.reg_alloc.DefineValue(lower_inst, lower_result); - ctx.EraseInstruction(lower_inst); - } - - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); if (code.HasHostFeature(HostFeature::AVX)) { - code.vpsrlw(lower_tmp, lower_tmp, 15); + if constexpr (is_rounding) { + code.vpsrlw(lower_tmp, lower_tmp, 14); + code.vpaddw(lower_tmp, lower_tmp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); + code.vpsrlw(lower_tmp, lower_tmp, 1); + } else { + code.vpsrlw(lower_tmp, lower_tmp, 15); + } code.vpaddw(upper_tmp, upper_tmp, upper_tmp); - code.vpor(upper_result, upper_tmp, lower_tmp); - code.vpcmpeqw(upper_tmp, upper_result, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); - code.vpxor(upper_result, upper_result, upper_tmp); + code.vpaddw(result, upper_tmp, lower_tmp); + code.vpcmpeqw(upper_tmp, result, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); + code.vpxor(result, result, upper_tmp); } else { code.paddw(upper_tmp, upper_tmp); - code.psrlw(lower_tmp, 15); - code.movdqa(upper_result, upper_tmp); - code.por(upper_result, lower_tmp); + if constexpr (is_rounding) { + code.psrlw(lower_tmp, 14); + code.paddw(lower_tmp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); + code.psrlw(lower_tmp, 1); + } else { + code.psrlw(lower_tmp, 15); + } + code.movdqa(result, upper_tmp); + code.paddw(result, lower_tmp); code.movdqa(upper_tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); - code.pcmpeqw(upper_tmp, upper_result); - code.pxor(upper_result, upper_tmp); + code.pcmpeqw(upper_tmp, result); + code.pxor(result, upper_tmp); } const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, upper_tmp); code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - if (upper_inst) { - ctx.reg_alloc.DefineValue(upper_inst, upper_result); - ctx.EraseInstruction(upper_inst); - } + ctx.reg_alloc.DefineValue(inst, result); } -void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::Inst* inst) { - const auto upper_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetUpperFromOp); - const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); +void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHigh16(EmitContext& ctx, IR::Inst* inst) { + EmitVectorSignedSaturatedDoublingMultiply16(code, ctx, inst); +} +void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHighRounding16(EmitContext& ctx, IR::Inst* inst) { + EmitVectorSignedSaturatedDoublingMultiply16(code, ctx, inst); +} + +template +void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX)) { @@ -3904,37 +3903,29 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR:: code.vpaddq(odds, odds, odds); code.vpaddq(even, even, even); - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.vpsrlq(upper_result, odds, 32); - code.vblendps(upper_result, upper_result, even, 0b1010); + if constexpr (is_rounding) { + code.vmovdqa(result, code.MConst(xword, 0x0000000080000000, 0x0000000080000000)); + code.vpaddq(odds, odds, result); + code.vpaddq(even, even, result); + } + + code.vpsrlq(result, odds, 32); + code.vblendps(result, result, even, 0b1010); const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); - code.vpcmpeqd(mask, upper_result, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); - code.vpxor(upper_result, upper_result, mask); + code.vpcmpeqd(mask, result, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.vpxor(result, result, mask); code.pmovmskb(bit, mask); code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.Release(mask); ctx.reg_alloc.Release(bit); - if (upper_inst) { - ctx.reg_alloc.DefineValue(upper_inst, upper_result); - ctx.EraseInstruction(upper_inst); - } - - if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(); - - code.vpsllq(lower_result, even, 32); - code.vblendps(lower_result, lower_result, odds, 0b0101); - - ctx.reg_alloc.DefineValue(lower_inst, lower_result); - ctx.EraseInstruction(lower_inst); - } - + ctx.reg_alloc.DefineValue(inst, result); return; } @@ -3942,8 +3933,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR:: const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); // calculate sign correction code.movdqa(tmp, x); @@ -3966,35 +3956,37 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR:: code.paddq(tmp, tmp); code.paddq(x, x); + if constexpr (is_rounding) { + code.movdqa(result, code.MConst(xword, 0x0000000080000000, 0x0000000080000000)); + code.paddq(tmp, result); + code.paddq(x, result); + } + // put everything into place - code.pcmpeqw(upper_result, upper_result); - code.pcmpeqw(lower_result, lower_result); - code.psllq(upper_result, 32); - code.psrlq(lower_result, 32); - code.pand(upper_result, x); - code.pand(lower_result, tmp); + code.pcmpeqw(result, result); + code.psllq(result, 32); + code.pand(result, x); code.psrlq(tmp, 32); - code.psllq(x, 32); - code.por(upper_result, tmp); - code.por(lower_result, x); - code.psubd(upper_result, sign_correction); + code.por(result, tmp); + code.psubd(result, sign_correction); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.movdqa(tmp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); - code.pcmpeqd(tmp, upper_result); - code.pxor(upper_result, tmp); + code.pcmpeqd(tmp, result); + code.pxor(result, tmp); code.pmovmskb(bit, tmp); code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - if (upper_inst) { - ctx.reg_alloc.DefineValue(upper_inst, upper_result); - ctx.EraseInstruction(upper_inst); - } - if (lower_inst) { - ctx.reg_alloc.DefineValue(lower_inst, lower_result); - ctx.EraseInstruction(lower_inst); - } + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHigh32(EmitContext& ctx, IR::Inst* inst) { + EmitVectorSignedSaturatedDoublingMultiply32(code, ctx, inst); +} + +void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHighRounding32(EmitContext& ctx, IR::Inst* inst) { + EmitVectorSignedSaturatedDoublingMultiply32(code, ctx, inst); } void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp b/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp index f6da2700..ead53747 100644 --- a/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp @@ -663,9 +663,9 @@ bool TranslatorVisitor::asimd_VQDMULH(bool D, size_t sz, size_t Vn, size_t Vd, b const auto reg_n = ir.GetVector(n); const auto reg_m = ir.GetVector(m); - const auto result = ir.VectorSignedSaturatedDoublingMultiply(esize, reg_n, reg_m); + const auto result = ir.VectorSignedSaturatedDoublingMultiplyHigh(esize, reg_n, reg_m); - ir.SetVector(d, result.upper); + ir.SetVector(d, result); return true; } @@ -685,8 +685,7 @@ bool TranslatorVisitor::asimd_VQRDMULH(bool D, size_t sz, size_t Vn, size_t Vd, const auto reg_n = ir.GetVector(n); const auto reg_m = ir.GetVector(m); - const auto multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, reg_n, reg_m); - const auto result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast(esize - 1))); + const auto result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, reg_n, reg_m); ir.SetVector(d, result); return true; diff --git a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp index 534612fe..8d1876a0 100644 --- a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp @@ -106,7 +106,7 @@ bool ScalarMultiplyLong(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t return true; } -bool ScalarMultiplyReturnHigh(TranslatorVisitor& v, bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm, Rounding round) { +bool ScalarMultiplyDoublingReturnHigh(TranslatorVisitor& v, bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm, Rounding round) { if (sz == 0b11) { return v.DecodeError(); } @@ -126,15 +126,9 @@ bool ScalarMultiplyReturnHigh(TranslatorVisitor& v, bool Q, bool D, size_t sz, s const auto reg_n = v.ir.GetVector(n); const auto reg_m = v.ir.VectorBroadcastElement(esize, v.ir.GetVector(m), index); - const auto result = [&] { - const auto tmp = v.ir.VectorSignedSaturatedDoublingMultiply(esize, reg_n, reg_m); - - if (round == Rounding::Round) { - return v.ir.VectorAdd(esize, tmp.upper, v.ir.VectorLogicalShiftRight(esize, tmp.lower, static_cast(esize - 1))); - } - - return tmp.upper; - }(); + const auto result = round == Rounding::None + ? v.ir.VectorSignedSaturatedDoublingMultiplyHigh(esize, reg_n, reg_m) + : v.ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, reg_n, reg_m); v.ir.SetVector(d, result); return true; @@ -184,11 +178,11 @@ bool TranslatorVisitor::asimd_VQDMULL_scalar(bool D, size_t sz, size_t Vn, size_ } bool TranslatorVisitor::asimd_VQDMULH_scalar(bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm) { - return ScalarMultiplyReturnHigh(*this, Q, D, sz, Vn, Vd, N, M, Vm, Rounding::None); + return ScalarMultiplyDoublingReturnHigh(*this, Q, D, sz, Vn, Vd, N, M, Vm, Rounding::None); } bool TranslatorVisitor::asimd_VQRDMULH_scalar(bool Q, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool M, size_t Vm) { - return ScalarMultiplyReturnHigh(*this, Q, D, sz, Vn, Vd, N, M, Vm, Rounding::Round); + return ScalarMultiplyDoublingReturnHigh(*this, Q, D, sz, Vn, Vd, N, M, Vm, Rounding::Round); } } // namespace Dynarmic::A32 diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp index 7a56f2aa..fb9ae9d1 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp @@ -158,8 +158,7 @@ bool TranslatorVisitor::SQRDMULH_vec_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 operand1 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vn), 0)); const IR::U128 operand2 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vm), 0)); - const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, operand2); - const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast(esize - 1))); + const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, operand1, operand2); V_scalar(esize, Vd, ir.VectorGetElement(esize, result, 0)); return true; diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_x_indexed_element.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_x_indexed_element.cpp index 93347c50..dbbc4ce1 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_scalar_x_indexed_element.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_scalar_x_indexed_element.cpp @@ -142,8 +142,7 @@ bool TranslatorVisitor::SQRDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> V const IR::U128 operand1 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vn), 0)); const IR::U128 operand2 = V(128, Vm); const IR::U128 broadcast = ir.VectorBroadcastElement(esize, operand2, index); - const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, broadcast); - const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast(esize - 1))); + const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, operand1, broadcast); V(128, Vd, result); return true; diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp index 5bcf3c73..6d78c537 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp @@ -441,7 +441,7 @@ bool TranslatorVisitor::SQDMULH_vec_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec V const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(datasize, Vm); - const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, operand2).upper; + const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHigh(esize, operand1, operand2); V(datasize, Vd, result); return true; @@ -457,8 +457,7 @@ bool TranslatorVisitor::SQRDMULH_vec_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(datasize, Vm); - const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, operand2); - const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast(esize - 1))); + const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, operand1, operand2); V(datasize, Vd, result); return true; diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp index 8b76e290..7f0625e8 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp @@ -368,7 +368,7 @@ bool TranslatorVisitor::SQDMULH_elt_2(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, I const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(idxsize, Vm); const IR::U128 index_vector = ir.VectorBroadcastElement(esize, operand2, index); - const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, index_vector).upper; + const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHigh(esize, operand1, index_vector); V(datasize, Vd, result); return true; @@ -387,8 +387,7 @@ bool TranslatorVisitor::SQRDMULH_elt_2(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(idxsize, Vm); const IR::U128 index_vector = ir.VectorBroadcastElement(esize, operand2, index); - const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, index_vector); - const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast(esize - 1))); + const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyHighRounding(esize, operand1, index_vector); V(datasize, Vd, result); return true; diff --git a/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/ir/ir_emitter.cpp index d05e959c..62abfc2b 100644 --- a/src/dynarmic/ir/ir_emitter.cpp +++ b/src/dynarmic/ir/ir_emitter.cpp @@ -1749,22 +1749,26 @@ U128 IREmitter::VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128 UNREACHABLE(); } -UpperAndLower IREmitter::VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b) { - const Value multiply = [&] { - switch (esize) { - case 16: - return Inst(Opcode::VectorSignedSaturatedDoublingMultiply16, a, b); - case 32: - return Inst(Opcode::VectorSignedSaturatedDoublingMultiply32, a, b); - default: - UNREACHABLE(); - } - }(); +U128 IREmitter::VectorSignedSaturatedDoublingMultiplyHigh(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 16: + return Inst(Opcode::VectorSignedSaturatedDoublingMultiplyHigh16, a, b); + case 32: + return Inst(Opcode::VectorSignedSaturatedDoublingMultiplyHigh32, a, b); + default: + UNREACHABLE(); + } +} - return { - Inst(Opcode::GetUpperFromOp, multiply), - Inst(Opcode::GetLowerFromOp, multiply), - }; +U128 IREmitter::VectorSignedSaturatedDoublingMultiplyHighRounding(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 16: + return Inst(Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding16, a, b); + case 32: + return Inst(Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding32, a, b); + default: + UNREACHABLE(); + } } U128 IREmitter::VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b) { diff --git a/src/dynarmic/ir/ir_emitter.h b/src/dynarmic/ir/ir_emitter.h index 17b27de8..c747af81 100644 --- a/src/dynarmic/ir/ir_emitter.h +++ b/src/dynarmic/ir/ir_emitter.h @@ -296,7 +296,8 @@ public: UpperAndLower VectorSignedMultiply(size_t esize, const U128& a, const U128& b); U128 VectorSignedSaturatedAbs(size_t esize, const U128& a); U128 VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128& a, const U128& b); - UpperAndLower VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b); + U128 VectorSignedSaturatedDoublingMultiplyHigh(size_t esize, const U128& a, const U128& b); + U128 VectorSignedSaturatedDoublingMultiplyHighRounding(size_t esize, const U128& a, const U128& b); U128 VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b); U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); diff --git a/src/dynarmic/ir/microinstruction.cpp b/src/dynarmic/ir/microinstruction.cpp index ba8c08da..fcebcda7 100644 --- a/src/dynarmic/ir/microinstruction.cpp +++ b/src/dynarmic/ir/microinstruction.cpp @@ -448,8 +448,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const { case Opcode::VectorSignedSaturatedAdd16: case Opcode::VectorSignedSaturatedAdd32: case Opcode::VectorSignedSaturatedAdd64: - case Opcode::VectorSignedSaturatedDoublingMultiply16: - case Opcode::VectorSignedSaturatedDoublingMultiply32: + case Opcode::VectorSignedSaturatedDoublingMultiplyHigh16: + case Opcode::VectorSignedSaturatedDoublingMultiplyHigh32: + case Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding16: + case Opcode::VectorSignedSaturatedDoublingMultiplyHighRounding32: case Opcode::VectorSignedSaturatedDoublingMultiplyLong16: case Opcode::VectorSignedSaturatedDoublingMultiplyLong32: case Opcode::VectorSignedSaturatedNarrowToSigned16: diff --git a/src/dynarmic/ir/opcodes.inc b/src/dynarmic/ir/opcodes.inc index 75d2af2e..2f3897ed 100644 --- a/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/ir/opcodes.inc @@ -481,8 +481,10 @@ OPCODE(VectorSignedSaturatedAccumulateUnsigned8, U128, U128 OPCODE(VectorSignedSaturatedAccumulateUnsigned16, U128, U128, U128 ) OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128, U128 ) OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 ) -OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 ) -OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 ) +OPCODE(VectorSignedSaturatedDoublingMultiplyHigh16, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedDoublingMultiplyHigh32, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedDoublingMultiplyHighRounding16, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedDoublingMultiplyHighRounding32, U128, U128, U128 ) OPCODE(VectorSignedSaturatedDoublingMultiplyLong16, U128, U128, U128 ) OPCODE(VectorSignedSaturatedDoublingMultiplyLong32, U128, U128, U128 ) OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 )