diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp index 83858b3f..776033f0 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp @@ -1146,6 +1146,54 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCont EmitTwoOpArranged<8>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.RBIT(Vresult, Voperand); }); } +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { EmitReduce<8>(code, ctx, inst, [&](auto& Bresult, auto Voperand) { code.ADDV(Bresult, Voperand); }); @@ -1236,22 +1284,6 @@ void EmitIR(oaknut::CodeGenerator& code, EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.URSHL(Vresult, Va, Vb); }); } -template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); -} - -template<> -void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); -} - template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { (void)code; diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index 63d17e67..11c7ee32 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -3023,6 +3023,89 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, data); } +void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + + code.movdqa(tmp, data); + code.psllw(tmp, 8); + code.psrlw(data, 8); + code.por(data, tmp); + + ctx.reg_alloc.DefineValue(inst, data); +} + +void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + + // TODO: PSHUFB + + code.movdqa(tmp, data); + code.psllw(tmp, 8); + code.psrlw(data, 8); + code.por(data, tmp); + code.pshuflw(data, data, 0b10110001); + code.pshufhw(data, data, 0b10110001); + + ctx.reg_alloc.DefineValue(inst, data); +} + +void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + + code.pshuflw(data, data, 0b10110001); + code.pshufhw(data, data, 0b10110001); + + ctx.reg_alloc.DefineValue(inst, data); +} + +void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + + // TODO: PSHUFB + + code.movdqa(tmp, data); + code.psllw(tmp, 8); + code.psrlw(data, 8); + code.por(data, tmp); + code.pshuflw(data, data, 0b00011011); + code.pshufhw(data, data, 0b00011011); + + ctx.reg_alloc.DefineValue(inst, data); +} + +void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + + code.pshuflw(data, data, 0b00011011); + code.pshufhw(data, data, 0b00011011); + + ctx.reg_alloc.DefineValue(inst, data); +} + +void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + + code.pshuflw(data, data, 0b01001110); + code.pshufhw(data, data, 0b01001110); + + ctx.reg_alloc.DefineValue(inst, data); +} + void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -3308,14 +3391,6 @@ static void VectorShuffleImpl(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins ctx.reg_alloc.DefineValue(inst, result); } -void EmitX64::EmitVectorShuffleHighHalfwords(EmitContext& ctx, IR::Inst* inst) { - VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshufhw); -} - -void EmitX64::EmitVectorShuffleLowHalfwords(EmitContext& ctx, IR::Inst* inst) { - VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshuflw); -} - void EmitX64::EmitVectorShuffleWords(EmitContext& ctx, IR::Inst* inst) { VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshufd); } diff --git a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp index b1961b20..93a59136 100644 --- a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp @@ -117,47 +117,18 @@ bool TranslatorVisitor::asimd_VREV(bool D, size_t sz, size_t Vd, size_t op, bool const auto m = ToVector(Q, Vm, M); const auto result = [this, m, op, sz] { const auto reg_m = ir.GetVector(m); - const size_t esize = 16U << sz; - const auto shift = static_cast(8U << sz); + const size_t esize = 8 << sz; - // 64-bit regions - if (op == 0b00) { - IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, reg_m, shift), - ir.VectorLogicalShiftLeft(esize, reg_m, shift)); - - switch (sz) { - case 0: // 8-bit elements - result = ir.VectorShuffleLowHalfwords(result, 0b00011011); - result = ir.VectorShuffleHighHalfwords(result, 0b00011011); - break; - case 1: // 16-bit elements - result = ir.VectorShuffleLowHalfwords(result, 0b01001110); - result = ir.VectorShuffleHighHalfwords(result, 0b01001110); - break; - } - - return result; + switch (op) { + case 0b00: + return ir.VectorReverseElementsInLongGroups(esize, reg_m); + case 0b01: + return ir.VectorReverseElementsInWordGroups(esize, reg_m); + case 0b10: + return ir.VectorReverseElementsInHalfGroups(esize, reg_m); } - // 32-bit regions - if (op == 0b01) { - IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, reg_m, shift), - ir.VectorLogicalShiftLeft(esize, reg_m, shift)); - - // If dealing with 8-bit elements we'll need to shuffle the bytes in each halfword - // e.g. Assume the following numbers point out bytes in a 32-bit word, we're essentially - // changing [3, 2, 1, 0] to [2, 3, 0, 1] - if (sz == 0) { - result = ir.VectorShuffleLowHalfwords(result, 0b10110001); - result = ir.VectorShuffleHighHalfwords(result, 0b10110001); - } - - return result; - } - - // 16-bit regions - return ir.VectorOr(ir.VectorLogicalShiftRight(esize, reg_m, 8), - ir.VectorLogicalShiftLeft(esize, reg_m, 8)); + UNREACHABLE(); }(); ir.SetVector(d, result); diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_two_register_misc.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_two_register_misc.cpp index 77509754..ca3e3b95 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_two_register_misc.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_two_register_misc.cpp @@ -673,81 +673,45 @@ bool TranslatorVisitor::RBIT_asimd(bool Q, Vec Vn, Vec Vd) { } bool TranslatorVisitor::REV16_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) { - if (size != 0) { + if (size > 0) { return UnallocatedEncoding(); } const size_t datasize = Q ? 128 : 64; - constexpr size_t esize = 16; + constexpr size_t esize = 8; const IR::U128 data = V(datasize, Vn); - const IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, 8), - ir.VectorLogicalShiftLeft(esize, data, 8)); + const IR::U128 result = ir.VectorReverseElementsInHalfGroups(esize, data); V(datasize, Vd, result); return true; } bool TranslatorVisitor::REV32_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) { - const u32 zext_size = size.ZeroExtend(); - - if (zext_size > 1) { + if (size > 1) { return UnallocatedEncoding(); } const size_t datasize = Q ? 128 : 64; - const size_t esize = 16 << zext_size; - const u8 shift = static_cast(8 << zext_size); + const size_t esize = 8 << size.ZeroExtend(); const IR::U128 data = V(datasize, Vn); - - // TODO: Consider factoring byte swapping code out into its own opcode. - // Technically the rest of the following code can be a PSHUFB - // in the presence of SSSE3. - IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, shift), - ir.VectorLogicalShiftLeft(esize, data, shift)); - - // If dealing with 8-bit elements we'll need to shuffle the bytes in each halfword - // e.g. Assume the following numbers point out bytes in a 32-bit word, we're essentially - // changing [3, 2, 1, 0] to [2, 3, 0, 1] - if (zext_size == 0) { - result = ir.VectorShuffleLowHalfwords(result, 0b10110001); - result = ir.VectorShuffleHighHalfwords(result, 0b10110001); - } + const IR::U128 result = ir.VectorReverseElementsInWordGroups(esize, data); V(datasize, Vd, result); return true; } bool TranslatorVisitor::REV64_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) { - const u32 zext_size = size.ZeroExtend(); - - if (zext_size >= 3) { + if (size > 2) { return UnallocatedEncoding(); } const size_t datasize = Q ? 128 : 64; - const size_t esize = 16 << zext_size; - const u8 shift = static_cast(8 << zext_size); + const size_t esize = 8 << size.ZeroExtend(); const IR::U128 data = V(datasize, Vn); - - // TODO: Consider factoring byte swapping code out into its own opcode. - // Technically the rest of the following code can be a PSHUFB - // in the presence of SSSE3. - IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, shift), - ir.VectorLogicalShiftLeft(esize, data, shift)); - - switch (zext_size) { - case 0: // 8-bit elements - result = ir.VectorShuffleLowHalfwords(result, 0b00011011); - result = ir.VectorShuffleHighHalfwords(result, 0b00011011); - break; - case 1: // 16-bit elements - result = ir.VectorShuffleLowHalfwords(result, 0b01001110); - result = ir.VectorShuffleHighHalfwords(result, 0b01001110); - break; - } + const IR::U128 result = ir.VectorReverseElementsInLongGroups(esize, data); V(datasize, Vd, result); return true; diff --git a/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/ir/ir_emitter.cpp index 62abfc2b..1901647e 100644 --- a/src/dynarmic/ir/ir_emitter.cpp +++ b/src/dynarmic/ir/ir_emitter.cpp @@ -1573,6 +1573,39 @@ U128 IREmitter::VectorReverseBits(const U128& a) { return Inst(Opcode::VectorReverseBits, a); } +U128 IREmitter::VectorReverseElementsInHalfGroups(size_t esize, const U128& a) { + switch (esize) { + case 8: + return Inst(Opcode::VectorReverseElementsInHalfGroups8, a); + default: + UNREACHABLE(); + } +} + +U128 IREmitter::VectorReverseElementsInWordGroups(size_t esize, const U128& a) { + switch (esize) { + case 8: + return Inst(Opcode::VectorReverseElementsInWordGroups8, a); + case 16: + return Inst(Opcode::VectorReverseElementsInWordGroups16, a); + default: + UNREACHABLE(); + } +} + +U128 IREmitter::VectorReverseElementsInLongGroups(size_t esize, const U128& a) { + switch (esize) { + case 8: + return Inst(Opcode::VectorReverseElementsInLongGroups8, a); + case 16: + return Inst(Opcode::VectorReverseElementsInLongGroups16, a); + case 32: + return Inst(Opcode::VectorReverseElementsInLongGroups32, a); + default: + UNREACHABLE(); + } +} + U128 IREmitter::VectorReduceAdd(size_t esize, const U128& a) { switch (esize) { case 8: @@ -1666,14 +1699,6 @@ U128 IREmitter::VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, con UNREACHABLE(); } -U128 IREmitter::VectorShuffleHighHalfwords(const U128& a, u8 mask) { - return Inst(Opcode::VectorShuffleHighHalfwords, a, mask); -} - -U128 IREmitter::VectorShuffleLowHalfwords(const U128& a, u8 mask) { - return Inst(Opcode::VectorShuffleLowHalfwords, a, mask); -} - U128 IREmitter::VectorShuffleWords(const U128& a, u8 mask) { return Inst(Opcode::VectorShuffleWords, a, mask); } diff --git a/src/dynarmic/ir/ir_emitter.h b/src/dynarmic/ir/ir_emitter.h index c747af81..0152ce31 100644 --- a/src/dynarmic/ir/ir_emitter.h +++ b/src/dynarmic/ir/ir_emitter.h @@ -281,6 +281,9 @@ public: U128 VectorPolynomialMultiplyLong(size_t esize, const U128& a, const U128& b); U128 VectorPopulationCount(const U128& a); U128 VectorReverseBits(const U128& a); + U128 VectorReverseElementsInHalfGroups(size_t esize, const U128& a); + U128 VectorReverseElementsInWordGroups(size_t esize, const U128& a); + U128 VectorReverseElementsInLongGroups(size_t esize, const U128& a); U128 VectorReduceAdd(size_t esize, const U128& a); U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount); U128 VectorRotateRight(size_t esize, const U128& a, u8 amount); @@ -288,8 +291,6 @@ public: U128 VectorRoundingHalvingAddUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorRoundingShiftLeftSigned(size_t esize, const U128& a, const U128& b); U128 VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, const U128& b); - U128 VectorShuffleHighHalfwords(const U128& a, u8 mask); - U128 VectorShuffleLowHalfwords(const U128& a, u8 mask); U128 VectorShuffleWords(const U128& a, u8 mask); U128 VectorSignExtend(size_t original_esize, const U128& a); U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); diff --git a/src/dynarmic/ir/opcodes.inc b/src/dynarmic/ir/opcodes.inc index 2f3897ed..f48afa63 100644 --- a/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/ir/opcodes.inc @@ -443,6 +443,12 @@ OPCODE(VectorPolynomialMultiplyLong8, U128, U128 OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 ) OPCODE(VectorPopulationCount, U128, U128 ) OPCODE(VectorReverseBits, U128, U128 ) +OPCODE(VectorReverseElementsInHalfGroups8, U128, U128 ) +OPCODE(VectorReverseElementsInWordGroups8, U128, U128 ) +OPCODE(VectorReverseElementsInWordGroups16, U128, U128 ) +OPCODE(VectorReverseElementsInLongGroups8, U128, U128 ) +OPCODE(VectorReverseElementsInLongGroups16, U128, U128 ) +OPCODE(VectorReverseElementsInLongGroups32, U128, U128 ) OPCODE(VectorReduceAdd8, U128, U128 ) OPCODE(VectorReduceAdd16, U128, U128 ) OPCODE(VectorReduceAdd32, U128, U128 ) @@ -461,8 +467,6 @@ OPCODE(VectorRoundingShiftLeftU8, U128, U128 OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 ) OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 ) OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 ) -OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 ) -OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 ) OPCODE(VectorShuffleWords, U128, U128, U8 ) OPCODE(VectorSignExtend8, U128, U128 ) OPCODE(VectorSignExtend16, U128, U128 )