IR: Add opcodes for interleaving upper-order bytes/halfwords/words/doublewords

I should have added this when I introduced the functions for interleaving low-order equivalents for consistency in the interface.
2018-03-29 16:11:14 -04:00 · 2018-03-29 16:11:14 -04:00 · 701f43d61e
commit 701f43d61e
parent 94f0fba16b
4 changed files with 60 additions and 0 deletions
--- a/src/backend_x64/emit_x64_vector.cpp
+++ b/src/backend_x64/emit_x64_vector.cpp
@ -552,6 +552,46 @@ void EmitX64::EmitVectorInterleaveLower64(EmitContext& ctx, IR::Inst* inst) {
    EmitVectorInterleaveLower(code, ctx, inst, 64);
 }

+static void EmitVectorInterleaveUpper(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
+
+    switch (size) {
+    case 8:
+        code.punpckhbw(a, b);
+        break;
+    case 16:
+        code.punpckhwd(a, b);
+        break;
+    case 32:
+        code.punpckhdq(a, b);
+        break;
+    case 64:
+        code.punpckhqdq(a, b);
+        break;
+    }
+
+    ctx.reg_alloc.DefineValue(inst, a);
+}
+
+void EmitX64::EmitVectorInterleaveUpper8(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorInterleaveUpper(code, ctx, inst, 8);
+}
+
+void EmitX64::EmitVectorInterleaveUpper16(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorInterleaveUpper(code, ctx, inst, 16);
+}
+
+void EmitX64::EmitVectorInterleaveUpper32(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorInterleaveUpper(code, ctx, inst, 32);
+}
+
+void EmitX64::EmitVectorInterleaveUpper64(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorInterleaveUpper(code, ctx, inst, 64);
+}
+
 void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);

--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -914,6 +914,21 @@ U128 IREmitter::VectorInterleaveLower(size_t esize, const U128& a, const U128& b
    return {};
 }

+U128 IREmitter::VectorInterleaveUpper(size_t esize, const U128& a, const U128& b) {
+    switch (esize) {
+    case 8:
+        return Inst<U128>(Opcode::VectorInterleaveUpper8, a, b);
+    case 16:
+        return Inst<U128>(Opcode::VectorInterleaveUpper16, a, b);
+    case 32:
+        return Inst<U128>(Opcode::VectorInterleaveUpper32, a, b);
+    case 64:
+        return Inst<U128>(Opcode::VectorInterleaveUpper64, a, b);
+    }
+    UNREACHABLE();
+    return {};
+}
+
 U128 IREmitter::VectorLessEqualSigned(size_t esize, const U128& a, const U128& b) {
    return VectorNot(VectorGreaterSigned(esize, a, b));
 }
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -220,6 +220,7 @@ public:
    U128 VectorGreaterSigned(size_t esize, const U128& a, const U128& b);
    U128 VectorGreaterUnsigned(size_t esize, const U128& a, const U128& b);
    U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
+    U128 VectorInterleaveUpper(size_t esize, const U128& a, const U128& b);
    U128 VectorLessEqualSigned(size_t esize, const U128& a, const U128& b);
    U128 VectorLessEqualUnsigned(size_t esize, const U128& a, const U128& b);
    U128 VectorLessSigned(size_t esize, const U128& a, const U128& b);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -242,6 +242,10 @@ OPCODE(VectorInterleaveLower8,      T::U128,        T::U128,        T::U128
 OPCODE(VectorInterleaveLower16,     T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorInterleaveLower32,     T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorInterleaveLower64,     T::U128,        T::U128,        T::U128                         )
+OPCODE(VectorInterleaveUpper8,      T::U128,        T::U128,        T::U128                         )
+OPCODE(VectorInterleaveUpper16,     T::U128,        T::U128,        T::U128                         )
+OPCODE(VectorInterleaveUpper32,     T::U128,        T::U128,        T::U128                         )
+OPCODE(VectorInterleaveUpper64,     T::U128,        T::U128,        T::U128                         )
 OPCODE(VectorLogicalShiftLeft8,     T::U128,        T::U128,        T::U8                           )
 OPCODE(VectorLogicalShiftLeft16,    T::U128,        T::U128,        T::U8                           )
 OPCODE(VectorLogicalShiftLeft32,    T::U128,        T::U128,        T::U8                           )