ir: Add opcode for floating-point GE and GT comparisons

The rest of the comparisons can be implemented in terms of these two
2018-06-03 19:39:21 -04:00 · 2018-06-03 19:39:21 -04:00 · c695da1cf3
commit c695da1cf3
parent d86fea0d28
4 changed files with 68 additions and 0 deletions
--- a/src/backend_x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend_x64/emit_x64_vector_floating_point.cpp
@ -209,6 +209,46 @@ void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
    ctx.reg_alloc.DefineValue(inst, a);
 }

+void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
+    const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+    code.cmpltps(b, a);
+
+    ctx.reg_alloc.DefineValue(inst, b);
+}
+
+void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
+    const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+    code.cmpltpd(b, a);
+
+    ctx.reg_alloc.DefineValue(inst, b);
+}
+
+void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
+    const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+    code.cmpleps(b, a);
+
+    ctx.reg_alloc.DefineValue(inst, b);
+}
+
+void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
+    const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+    code.cmplepd(b, a);
+
+    ctx.reg_alloc.DefineValue(inst, b);
+}
+
 void EmitX64::EmitFPVectorMul32(EmitContext& ctx, IR::Inst* inst) {
    EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::mulps);
 }
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1514,6 +1514,28 @@ U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b) {
    return {};
 }

+U128 IREmitter::FPVectorGreater(size_t esize, const U128& a, const U128& b) {
+    switch (esize) {
+    case 32:
+        return Inst<U128>(Opcode::FPVectorGreater32, a, b);
+    case 64:
+        return Inst<U128>(Opcode::FPVectorGreater64, a, b);
+    }
+    UNREACHABLE();
+    return {};
+}
+
+U128 IREmitter::FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b) {
+    switch (esize) {
+    case 32:
+        return Inst<U128>(Opcode::FPVectorGreaterEqual32, a, b);
+    case 64:
+        return Inst<U128>(Opcode::FPVectorGreaterEqual64, a, b);
+    }
+    UNREACHABLE();
+    return {};
+}
+
 U128 IREmitter::FPVectorMul(size_t esize, const U128& a, const U128& b) {
    switch (esize) {
    case 32:
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -275,6 +275,8 @@ public:
    U128 FPVectorAdd(size_t esize, const U128& a, const U128& b);
    U128 FPVectorDiv(size_t esize, const U128& a, const U128& b);
    U128 FPVectorEqual(size_t esize, const U128& a, const U128& b);
+    U128 FPVectorGreater(size_t esize, const U128& a, const U128& b);
+    U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b);
    U128 FPVectorMul(size_t esize, const U128& a, const U128& b);
    U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
    U128 FPVectorS32ToSingle(const U128& a);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -400,6 +400,10 @@ OPCODE(FPVectorDiv32,                       T::U128,        T::U128,        T::U
 OPCODE(FPVectorDiv64,                       T::U128,        T::U128,        T::U128                         )
 OPCODE(FPVectorEqual32,                     T::U128,        T::U128,        T::U128                         )
 OPCODE(FPVectorEqual64,                     T::U128,        T::U128,        T::U128                         )
+OPCODE(FPVectorGreater32,                   T::U128,        T::U128,        T::U128                         )
+OPCODE(FPVectorGreater64,                   T::U128,        T::U128,        T::U128                         )
+OPCODE(FPVectorGreaterEqual32,              T::U128,        T::U128,        T::U128                         )
+OPCODE(FPVectorGreaterEqual64,              T::U128,        T::U128,        T::U128                         )
 OPCODE(FPVectorMul32,                       T::U128,        T::U128,        T::U128                         )
 OPCODE(FPVectorMul64,                       T::U128,        T::U128,        T::U128                         )
 OPCODE(FPVectorS32ToSingle,                 T::U128,        T::U128                                         )