ir: Add opcodes for unsigned saturating left shifts

2018-09-18 18:09:47 -04:00 · 2018-09-18 18:09:47 -04:00 · d426dfe942
commit d426dfe942
parent ab60720418
5 changed files with 76 additions and 0 deletions
--- a/src/backend/x64/emit_x64_vector.cpp
+++ b/src/backend/x64/emit_x64_vector.cpp
@ -4100,6 +4100,58 @@ void EmitX64::EmitVectorUnsignedSaturatedNarrow64(EmitContext& ctx, IR::Inst* in
    });
 }

+template <typename T, typename S = std::make_signed_t<T>>
+static bool VectorUnsignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
+    static_assert(std::is_unsigned_v<T>, "T must be an unsigned type.");
+
+    bool qc_flag = false;
+
+    constexpr size_t bit_size = Common::BitSize<T>();
+    constexpr S negative_bit_size = -static_cast<S>(bit_size);
+
+    for (size_t i = 0; i < dst.size(); i++) {
+        const T element = data[i];
+        const S shift = std::clamp(static_cast<S>(Common::SignExtend<8>(shift_values[i] & 0xFF)),
+                                   negative_bit_size, std::numeric_limits<S>::max());
+
+        if (element == 0 || shift <= negative_bit_size) {
+            dst[i] = 0;
+        } else if (shift < 0) {
+            dst[i] = static_cast<T>(element >> -shift);
+        } else if (shift >= static_cast<S>(bit_size)) {
+            dst[i] = std::numeric_limits<T>::max();
+            qc_flag = true;
+        } else {
+            const T shifted = element << shift;
+
+            if ((shifted >> shift) != element) {
+                dst[i] = std::numeric_limits<T>::max();
+                qc_flag = true;
+            } else {
+                dst[i] = shifted;
+            }
+        }
+    }
+
+    return qc_flag;
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
+    EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft<u8>);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedShiftLeft16(EmitContext& ctx, IR::Inst* inst) {
+    EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft<u16>);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
+    EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft<u32>);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* inst) {
+    EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft<u64>);
+}
+
 void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1735,6 +1735,21 @@ U128 IREmitter::VectorUnsignedSaturatedNarrow(size_t esize, const U128& a) {
    return {};
 }

+U128 IREmitter::VectorUnsignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b) {
+    switch (esize) {
+    case 8:
+        return Inst<U128>(Opcode::VectorUnsignedSaturatedShiftLeft8, a, b);
+    case 16:
+        return Inst<U128>(Opcode::VectorUnsignedSaturatedShiftLeft16, a, b);
+    case 32:
+        return Inst<U128>(Opcode::VectorUnsignedSaturatedShiftLeft32, a, b);
+    case 64:
+        return Inst<U128>(Opcode::VectorUnsignedSaturatedShiftLeft64, a, b);
+    }
+    UNREACHABLE();
+    return {};
+}
+
 U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
    switch (original_esize) {
    case 8:
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -287,6 +287,7 @@ public:
    U128 VectorUnsignedRecipSqrtEstimate(const U128& a);
    U128 VectorUnsignedSaturatedAccumulateSigned(size_t esize, const U128& a, const U128& b);
    U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
+    U128 VectorUnsignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b);
    U128 VectorZeroExtend(size_t original_esize, const U128& a);
    U128 VectorZeroUpper(const U128& a);
    U128 ZeroVector();
--- a/src/frontend/ir/microinstruction.cpp
+++ b/src/frontend/ir/microinstruction.cpp
@ -380,6 +380,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
    case Opcode::VectorUnsignedSaturatedNarrow16:
    case Opcode::VectorUnsignedSaturatedNarrow32:
    case Opcode::VectorUnsignedSaturatedNarrow64:
+    case Opcode::VectorUnsignedSaturatedShiftLeft8:
+    case Opcode::VectorUnsignedSaturatedShiftLeft16:
+    case Opcode::VectorUnsignedSaturatedShiftLeft32:
+    case Opcode::VectorUnsignedSaturatedShiftLeft64:
        return true;

    default:
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -446,6 +446,10 @@ OPCODE(VectorUnsignedSaturatedAccumulateSigned64,           U128,           U128
 OPCODE(VectorUnsignedSaturatedNarrow16,                     U128,           U128                                                            )
 OPCODE(VectorUnsignedSaturatedNarrow32,                     U128,           U128                                                            )
 OPCODE(VectorUnsignedSaturatedNarrow64,                     U128,           U128                                                            )
+OPCODE(VectorUnsignedSaturatedShiftLeft8,                   U128,           U128,           U128                                            )
+OPCODE(VectorUnsignedSaturatedShiftLeft16,                  U128,           U128,           U128                                            )
+OPCODE(VectorUnsignedSaturatedShiftLeft32,                  U128,           U128,           U128                                            )
+OPCODE(VectorUnsignedSaturatedShiftLeft64,                  U128,           U128,           U128                                            )
 OPCODE(VectorZeroExtend8,                                   U128,           U128                                                            )
 OPCODE(VectorZeroExtend16,                                  U128,           U128                                                            )
 OPCODE(VectorZeroExtend32,                                  U128,           U128                                                            )