From 7731dcdca99146b20514d21201412b0baea7abab Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Tue, 31 Jul 2018 20:25:03 +0100
Subject: [PATCH] emit_x64_vector_floating_point: Reduce codesize of
 EmitTwoOpVectorOperation

---
 .../emit_x64_vector_floating_point.cpp        | 34 ++++++++++++-------
 1 file changed, 21 insertions(+), 13 deletions(-)
diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp
index cb21a35e..4bc00d4b 100644
--- a/src/backend_x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend_x64/emit_x64_vector_floating_point.cpp
@@ -241,17 +241,21 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
 
     if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) {
         auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-        const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+
+        Xbyak::Xmm result;
 
         if constexpr (std::is_member_function_pointer_v<Function>) {
-            (code.*fn)(xmm_a);
+            result = ctx.reg_alloc.UseScratchXmm(args[0]);
+            (code.*fn)(result);
         } else {
-            fn(xmm_a);
+            const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
+            result = ctx.reg_alloc.ScratchXmm();
+            fn(result, xmm_a);
         }
 
-        ForceToDefaultNaN<fsize>(code, ctx, xmm_a);
+        ForceToDefaultNaN<fsize>(code, ctx, result);
 
-        ctx.reg_alloc.DefineValue(inst, xmm_a);
+        ctx.reg_alloc.DefineValue(inst, result);
         return;
     }
 
@@ -261,15 +265,19 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
     const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
     const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
 
-    code.movaps(nan_mask, xmm_a);
-    code.movaps(result, xmm_a);
-    FCODE(cmpunordp)(nan_mask, nan_mask);
     if constexpr (std::is_member_function_pointer_v<Function>) {
+        code.movaps(result, xmm_a);
         (code.*fn)(result);
     } else {
-        fn(result);
+        fn(result, xmm_a);
+    }
+
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
+        FCODE(vcmpunordp)(nan_mask, result, result);
+    } else {
+        code.movaps(nan_mask, result);
+        FCODE(cmpunordp)(nan_mask, nan_mask);
     }
-    FCODE(cmpunordp)(nan_mask, result);
 
     HandleNaNs<fsize, 1>(code, ctx, {result, xmm_a}, nan_mask, nan_handler);
 
@@ -907,11 +915,11 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
             return 0;
         }();
 
-        EmitTwoOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result){
+        EmitTwoOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){
             if constexpr (fsize == 32) {
-                code.roundps(result, result, round_imm);
+                code.roundps(result, xmm_a, round_imm);
             } else {
-                code.roundpd(result, result, round_imm);
+                code.roundpd(result, xmm_a, round_imm);
             }
         });