diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 6058bdbc..e6659ad2 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -652,6 +652,66 @@ void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitVectorLogicalVShift8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](u8 x, u8 y) -> u8 { + s8 shift_amount = static_cast(static_cast(y)); + if (shift_amount <= -8 || shift_amount >= 8) { + return 0; + } + if (shift_amount < 0) { + return x >> u8(-shift_amount); + } + return x << u8(shift_amount); + }); + }); +} + +void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](u16 x, u16 y) -> u16 { + s8 shift_amount = static_cast(static_cast(y)); + if (shift_amount <= -16 || shift_amount >= 16) { + return 0; + } + if (shift_amount < 0) { + return x >> u16(-shift_amount); + } + return x << u16(shift_amount); + }); + }); +} + +void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](u32 x, u32 y) -> u32 { + s8 shift_amount = static_cast(static_cast(y)); + if (shift_amount <= -32 || shift_amount >= 32) { + return 0; + } + if (shift_amount < 0) { + return x >> u32(-shift_amount); + } + return x << u32(shift_amount); + }); + }); +} + +void EmitX64::EmitVectorLogicalVShift64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](u64 x, u64 y) -> u64 { + s8 shift_amount = static_cast(static_cast(y)); + if (shift_amount <= -64 || shift_amount >= 64) { + return 0; + } + if (shift_amount < 0) { + return x >> u64(-shift_amount); + } + return x << u64(shift_amount); + }); + }); +} + void EmitX64::EmitVectorMaxS8(EmitContext& ctx, IR::Inst* inst) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 812391e4..0c8eaafb 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -748,7 +748,7 @@ INST(ORN_asimd, "ORN (vector)", "0Q001 //INST(UQSUB_2, "UQSUB", "0Q101110zz1mmmmm001011nnnnnddddd") INST(CMHI_2, "CMHI (register)", "0Q101110zz1mmmmm001101nnnnnddddd") INST(CMHS_2, "CMHS (register)", "0Q101110zz1mmmmm001111nnnnnddddd") -//INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd") +INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd") //INST(UQSHL_reg_2, "UQSHL (register)", "0Q101110zz1mmmmm010011nnnnnddddd") //INST(URSHL_2, "URSHL", "0Q101110zz1mmmmm010101nnnnnddddd") //INST(UQRSHL_2, "UQRSHL", "0Q101110zz1mmmmm010111nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index 0b4b3030..ca81197b 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -199,6 +199,20 @@ bool TranslatorVisitor::CMHS_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::USHL_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { + if (size == 0b11 && !Q) { + return ReservedValue(); + } + const size_t esize = 8 << size.ZeroExtend(); + const size_t datasize = Q ? 128 : 64; + + const IR::U128 operand1 = V(datasize, Vn); + const IR::U128 operand2 = V(datasize, Vm); + const IR::U128 result = ir.VectorLogicalVShift(esize, operand1, operand2); + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::UMAX(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { if (size == 0b11) { return ReservedValue(); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 2dedf077..bd3020b2 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -960,6 +960,21 @@ U128 IREmitter::VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_am return {}; } +U128 IREmitter::VectorLogicalVShift(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorLogicalVShift8, a, b); + case 16: + return Inst(Opcode::VectorLogicalVShift16, a, b); + case 32: + return Inst(Opcode::VectorLogicalVShift32, a, b); + case 64: + return Inst(Opcode::VectorLogicalVShift64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorMaxSigned(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 3ad414e9..b315047a 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -226,6 +226,7 @@ public: U128 VectorLessUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount); U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount); + U128 VectorLogicalVShift(size_t esize, const U128& a, const U128& b); U128 VectorMaxSigned(size_t esize, const U128& a, const U128& b); U128 VectorMaxUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorMinSigned(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 83c10c2a..f471ffa9 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -250,6 +250,10 @@ OPCODE(VectorLogicalShiftRight8, T::U128, T::U128, T::U8 OPCODE(VectorLogicalShiftRight16, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftRight32, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftRight64, T::U128, T::U128, T::U8 ) +OPCODE(VectorLogicalVShift8, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShift16, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShift32, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShift64, T::U128, T::U128, T::U128 ) OPCODE(VectorMaxS8, T::U128, T::U128, T::U128 ) OPCODE(VectorMaxS16, T::U128, T::U128, T::U128 ) OPCODE(VectorMaxS32, T::U128, T::U128, T::U128 )