diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 68d7d7b4..26a5f006 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "backend_x64/abi.h" #include "backend_x64/block_of_code.h" @@ -923,10 +924,23 @@ static constexpr T LogicalVShift(T x, T y) { const s8 shift_amount = static_cast(static_cast(y)); const s64 bit_size = static_cast(Common::BitSize()); - if (shift_amount <= -bit_size || shift_amount >= bit_size) { + if constexpr (std::is_signed_v) { + if (shift_amount >= bit_size) { + return 0; + } + } else if (shift_amount <= -bit_size || shift_amount >= bit_size) { return 0; } + if constexpr (std::is_signed_v) { + if (shift_amount <= -bit_size) { + // Parentheses necessary, as MSVC doesn't appear to consider cast parentheses + // as a grouping in terms of precedence, causing warning C4554 to fire. See: + // https://developercommunity.visualstudio.com/content/problem/144783/msvc-2017-does-not-understand-that-static-cast-cou.html + return x >> (T(bit_size - 1)); + } + } + if (shift_amount < 0) { return x >> T(-shift_amount); } @@ -934,25 +948,49 @@ static constexpr T LogicalVShift(T x, T y) { return x << T(shift_amount); } -void EmitX64::EmitVectorLogicalVShift8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLogicalVShiftS8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b) { + std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); + }); +} + +void EmitX64::EmitVectorLogicalVShiftS16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); + }); +} + +void EmitX64::EmitVectorLogicalVShiftS32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); + }); +} + +void EmitX64::EmitVectorLogicalVShiftS64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ + std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); + }); +} + +void EmitX64::EmitVectorLogicalVShiftU8(EmitContext& ctx, IR::Inst* inst) { EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } -void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLogicalVShiftU16(EmitContext& ctx, IR::Inst* inst) { EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } -void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLogicalVShiftU32(EmitContext& ctx, IR::Inst* inst) { EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); } -void EmitX64::EmitVectorLogicalVShift64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLogicalVShiftU64(EmitContext& ctx, IR::Inst* inst) { EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); }); diff --git a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp index 187a2c3f..87c3e7bd 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp @@ -151,7 +151,7 @@ bool TranslatorVisitor::USHL_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 operand1 = V(64, Vn); const IR::U128 operand2 = V(64, Vm); - const IR::U128 result = ir.VectorLogicalVShift(64, operand1, operand2); + const IR::U128 result = ir.VectorLogicalVShiftUnsigned(64, operand1, operand2); V(64, Vd, result); return true; diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index b7f1921d..51157dea 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -299,7 +299,7 @@ bool TranslatorVisitor::USHL_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(datasize, Vm); - const IR::U128 result = ir.VectorLogicalVShift(esize, operand1, operand2); + const IR::U128 result = ir.VectorLogicalVShiftUnsigned(esize, operand1, operand2); V(datasize, Vd, result); return true; } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index deb290c5..0911d2dc 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -958,16 +958,31 @@ U128 IREmitter::VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_am return {}; } -U128 IREmitter::VectorLogicalVShift(size_t esize, const U128& a, const U128& b) { +U128 IREmitter::VectorLogicalVShiftSigned(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: - return Inst(Opcode::VectorLogicalVShift8, a, b); + return Inst(Opcode::VectorLogicalVShiftS8, a, b); case 16: - return Inst(Opcode::VectorLogicalVShift16, a, b); + return Inst(Opcode::VectorLogicalVShiftS16, a, b); case 32: - return Inst(Opcode::VectorLogicalVShift32, a, b); + return Inst(Opcode::VectorLogicalVShiftS32, a, b); case 64: - return Inst(Opcode::VectorLogicalVShift64, a, b); + return Inst(Opcode::VectorLogicalVShiftS64, a, b); + } + UNREACHABLE(); + return {}; +} + +U128 IREmitter::VectorLogicalVShiftUnsigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorLogicalVShiftU8, a, b); + case 16: + return Inst(Opcode::VectorLogicalVShiftU16, a, b); + case 32: + return Inst(Opcode::VectorLogicalVShiftU32, a, b); + case 64: + return Inst(Opcode::VectorLogicalVShiftU64, a, b); } UNREACHABLE(); return {}; diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index bbe3fde2..c4905af9 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -214,7 +214,8 @@ public: U128 VectorLessUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount); U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount); - U128 VectorLogicalVShift(size_t esize, const U128& a, const U128& b); + U128 VectorLogicalVShiftSigned(size_t esize, const U128& a, const U128& b); + U128 VectorLogicalVShiftUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorMaxSigned(size_t esize, const U128& a, const U128& b); U128 VectorMaxUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorMinSigned(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 86fb76ed..cb70f62a 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -267,10 +267,14 @@ OPCODE(VectorLogicalShiftRight8, T::U128, T::U128, T::U OPCODE(VectorLogicalShiftRight16, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftRight32, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftRight64, T::U128, T::U128, T::U8 ) -OPCODE(VectorLogicalVShift8, T::U128, T::U128, T::U128 ) -OPCODE(VectorLogicalVShift16, T::U128, T::U128, T::U128 ) -OPCODE(VectorLogicalVShift32, T::U128, T::U128, T::U128 ) -OPCODE(VectorLogicalVShift64, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShiftS8, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShiftS16, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShiftS32, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShiftS64, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShiftU8, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShiftU16, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShiftU32, T::U128, T::U128, T::U128 ) +OPCODE(VectorLogicalVShiftU64, T::U128, T::U128, T::U128 ) OPCODE(VectorMaxS8, T::U128, T::U128, T::U128 ) OPCODE(VectorMaxS16, T::U128, T::U128, T::U128 ) OPCODE(VectorMaxS32, T::U128, T::U128, T::U128 )