From 656419286c720115f6807b66e756a1ca85103761 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 20 Jun 2020 00:01:10 +0100 Subject: [PATCH] ir: Add fpcr_controlled argument to FPVector{Equal,Greater,GreaterEqual} --- src/backend/x64/a32_emit_x64.cpp | 5 +- src/backend/x64/a32_emit_x64.h | 2 +- src/backend/x64/a32_jitstate.cpp | 2 +- src/backend/x64/a32_jitstate.h | 2 +- src/backend/x64/a64_emit_x64.cpp | 4 +- src/backend/x64/a64_emit_x64.h | 2 +- src/backend/x64/a64_jitstate.h | 2 +- src/backend/x64/emit_x64.h | 2 +- .../x64/emit_x64_vector_floating_point.cpp | 92 ++++++++++++++----- src/common/fp/fpcr.h | 2 + src/frontend/ir/ir_emitter.cpp | 20 ++-- src/frontend/ir/ir_emitter.h | 6 +- src/frontend/ir/opcodes.inc | 14 +-- 13 files changed, 102 insertions(+), 53 deletions(-) diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index f9882c6a..fc196ccf 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -71,8 +71,9 @@ bool A32EmitContext::IsSingleStep() const { return Location().SingleStepping(); } -FP::FPCR A32EmitContext::FPCR() const { - return FP::FPCR{Location().FPSCR().Value()}; +FP::FPCR A32EmitContext::FPCR(bool fpcr_controlled) const { + const FP::FPCR fpcr = FP::FPCR{Location().FPSCR().Value()}; + return fpcr_controlled ? fpcr : fpcr.ASIMDStandardValue(); } A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig conf, A32::Jit* jit_interface) diff --git a/src/backend/x64/a32_emit_x64.h b/src/backend/x64/a32_emit_x64.h index 2b69d7b9..db4bc665 100644 --- a/src/backend/x64/a32_emit_x64.h +++ b/src/backend/x64/a32_emit_x64.h @@ -30,7 +30,7 @@ struct A32EmitContext final : public EmitContext { A32::LocationDescriptor Location() const; bool IsSingleStep() const; - FP::FPCR FPCR() const override; + FP::FPCR FPCR(bool fpcr_controlled = true) const override; const A32::UserConfig& conf; }; diff --git a/src/backend/x64/a32_jitstate.cpp b/src/backend/x64/a32_jitstate.cpp index 75da3b09..fc7cc289 100644 --- a/src/backend/x64/a32_jitstate.cpp +++ b/src/backend/x64/a32_jitstate.cpp @@ -185,7 +185,7 @@ void A32JitState::SetFpscr(u32 FPSCR) { fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK; guest_MXCSR = 0x00001f80; - asimd_MXCSR = 0x00001f80; + asimd_MXCSR = 0x00009fc0; // RMode const std::array MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; diff --git a/src/backend/x64/a32_jitstate.h b/src/backend/x64/a32_jitstate.h index 55f8923e..40d85e6b 100644 --- a/src/backend/x64/a32_jitstate.h +++ b/src/backend/x64/a32_jitstate.h @@ -48,7 +48,7 @@ struct A32JitState { // For internal use (See: BlockOfCode::RunCode) u32 guest_MXCSR = 0x00001f80; - u32 asimd_MXCSR = 0x00001f80; + u32 asimd_MXCSR = 0x00009fc0; u32 save_host_MXCSR = 0; s64 cycles_to_run = 0; s64 cycles_remaining = 0; diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index 8e8adf7e..df575757 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -48,8 +48,8 @@ bool A64EmitContext::IsSingleStep() const { return Location().SingleStepping(); } -FP::FPCR A64EmitContext::FPCR() const { - return Location().FPCR(); +FP::FPCR A64EmitContext::FPCR(bool fpcr_controlled) const { + return fpcr_controlled ? Location().FPCR() : Location().FPCR().ASIMDStandardValue(); } bool A64EmitContext::AccurateNaN() const { diff --git a/src/backend/x64/a64_emit_x64.h b/src/backend/x64/a64_emit_x64.h index 1090be4c..3817d631 100644 --- a/src/backend/x64/a64_emit_x64.h +++ b/src/backend/x64/a64_emit_x64.h @@ -27,7 +27,7 @@ struct A64EmitContext final : public EmitContext { A64::LocationDescriptor Location() const; bool IsSingleStep() const; - FP::FPCR FPCR() const override; + FP::FPCR FPCR(bool fpcr_controlled = true) const override; bool AccurateNaN() const override; const A64::UserConfig& conf; diff --git a/src/backend/x64/a64_jitstate.h b/src/backend/x64/a64_jitstate.h index d6c45185..1bfe3ad5 100644 --- a/src/backend/x64/a64_jitstate.h +++ b/src/backend/x64/a64_jitstate.h @@ -51,7 +51,7 @@ struct A64JitState { // For internal use (See: BlockOfCode::RunCode) u32 guest_MXCSR = 0x00001f80; - u32 asimd_MXCSR = 0x00001f80; + u32 asimd_MXCSR = 0x00009fc0; u32 save_host_MXCSR = 0; s64 cycles_to_run = 0; s64 cycles_remaining = 0; diff --git a/src/backend/x64/emit_x64.h b/src/backend/x64/emit_x64.h index c81899f9..427db234 100644 --- a/src/backend/x64/emit_x64.h +++ b/src/backend/x64/emit_x64.h @@ -46,7 +46,7 @@ struct EmitContext { size_t GetInstOffset(IR::Inst* inst) const; void EraseInstruction(IR::Inst* inst); - virtual FP::FPCR FPCR() const = 0; + virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0; virtual bool AccurateNaN() const { return true; } RegAlloc& reg_alloc; diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index 0405e6c9..6fc363d0 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -35,6 +35,11 @@ using namespace Xbyak::util; namespace { +enum FpcrControlledArgument { + Present, + Absent, +}; + template T ChooseOnFsize([[maybe_unused]] T f32, [[maybe_unused]] T f64) { static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64"); @@ -196,9 +201,9 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) { } template -void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list to_daz, Xbyak::Xmm tmp) { - if (ctx.FPCR().FZ()) { - if (ctx.FPCR().RMode() != FP::RoundingMode::TowardsMinusInfinity) { +void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list to_daz, Xbyak::Xmm tmp) { + if (fpcr.FZ()) { + if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) { code.movaps(tmp, GetNegativeZeroVector(code)); } else { code.xorps(tmp, tmp); @@ -383,16 +388,18 @@ void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lamb } template -void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm result, Xbyak::Xmm arg1, Xbyak::Xmm arg2, Lambda lambda) { +void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm result, Xbyak::Xmm arg1, Xbyak::Xmm arg2, Lambda lambda, bool fpcr_controlled = true) { const auto fn = static_cast*>(lambda); + const u32 fpcr = ctx.FPCR(fpcr_controlled).Value(); + #ifdef _WIN32 constexpr u32 stack_space = 4 * 16; code.sub(rsp, stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); - code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); + code.mov(code.ABI_PARAM4.cvt32(), fpcr); code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax); #else @@ -401,7 +408,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); - code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); + code.mov(code.ABI_PARAM4.cvt32(), fpcr); code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); #endif @@ -418,7 +425,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.add(rsp, stack_space + ABI_SHADOW_SPACE); } -template +template void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); @@ -427,7 +434,9 @@ void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, La ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda); + const bool fpcr_controlled = fcarg == FpcrControlledArgument::Absent || args[2].GetImmediateU1(); + + EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda, fpcr_controlled); ctx.reg_alloc.DefineValue(inst, result); } @@ -486,6 +495,19 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam ctx.reg_alloc.DefineValue(inst, result); } +template +void MaybeStandardFPSCRValue(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, Lambda lambda) { + const bool switch_mxcsr = ctx.FPCR(fpcr_controlled) != ctx.FPCR(); + + if (switch_mxcsr) { + code.EnterStandardASIMD(); + lambda(); + code.LeaveStandardASIMD(); + } else { + lambda(); + } +} + } // anonymous namespace void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { @@ -538,7 +560,7 @@ void EmitX64::EmitFPVectorDiv64(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) { - EmitThreeOpFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& op1, const VectorArray& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { + EmitThreeOpFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& op1, const VectorArray& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { for (size_t i = 0; i < result.size(); i++) { result[i] = FP::FPCompareEQ(op1[i], op2[i], fpcr, fpsr) ? 0xFFFF : 0; } @@ -548,9 +570,13 @@ void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const bool fpcr_controlled = args[2].GetImmediateU1(); - code.cmpeqps(a, b); + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ + DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); + code.cmpeqps(a, b); + }); ctx.reg_alloc.DefineValue(inst, a); } @@ -558,9 +584,13 @@ void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const bool fpcr_controlled = args[2].GetImmediateU1(); - code.cmpeqpd(a, b); + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ + DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); + code.cmpeqpd(a, b); + }); ctx.reg_alloc.DefineValue(inst, a); } @@ -742,40 +772,56 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const bool fpcr_controlled = args[2].GetImmediateU1(); - code.cmpltps(b, a); + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ + DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); + code.cmpltps(b, a); + }); ctx.reg_alloc.DefineValue(inst, b); } void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const bool fpcr_controlled = args[2].GetImmediateU1(); - code.cmpltpd(b, a); + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ + DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); + code.cmpltpd(b, a); + }); ctx.reg_alloc.DefineValue(inst, b); } void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const bool fpcr_controlled = args[2].GetImmediateU1(); - code.cmpleps(b, a); + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ + DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); + code.cmpleps(b, a); + }); ctx.reg_alloc.DefineValue(inst, b); } void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const bool fpcr_controlled = args[2].GetImmediateU1(); - code.cmplepd(b, a); + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ + DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); + code.cmplepd(b, a); + }); ctx.reg_alloc.DefineValue(inst, b); } @@ -791,7 +837,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); - DenormalsAreZero(code, ctx, {result, xmm_b}, mask); + DenormalsAreZero(code, ctx.FPCR(), {result, xmm_b}, mask); if (code.HasAVX()) { FCODE(vcmpeqp)(mask, result, xmm_b); @@ -842,7 +888,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm prev_xmm_b = xmm_b; xmm_b = ctx.reg_alloc.ScratchXmm(); code.movaps(xmm_b, prev_xmm_b); - DenormalsAreZero(code, ctx, {result, xmm_b}, mask); + DenormalsAreZero(code, ctx.FPCR(), {result, xmm_b}, mask); } // What we are doing here is handling the case when the inputs are differently signed zeros. diff --git a/src/common/fp/fpcr.h b/src/common/fp/fpcr.h index 6c34af7f..e160102e 100644 --- a/src/common/fp/fpcr.h +++ b/src/common/fp/fpcr.h @@ -185,6 +185,8 @@ public: FPCR stdvalue; stdvalue.AHP(AHP()); stdvalue.FZ16(FZ16()); + stdvalue.FZ(true); + stdvalue.DN(true); return stdvalue; } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index c243791d..bdc329fe 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2302,14 +2302,14 @@ U128 IREmitter::FPVectorDiv(size_t esize, const U128& a, const U128& b) { UNREACHABLE(); } -U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b) { +U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) { switch (esize) { case 16: - return Inst(Opcode::FPVectorEqual16, a, b); + return Inst(Opcode::FPVectorEqual16, a, b, Imm1(fpcr_controlled)); case 32: - return Inst(Opcode::FPVectorEqual32, a, b); + return Inst(Opcode::FPVectorEqual32, a, b, Imm1(fpcr_controlled)); case 64: - return Inst(Opcode::FPVectorEqual64, a, b); + return Inst(Opcode::FPVectorEqual64, a, b, Imm1(fpcr_controlled)); } UNREACHABLE(); } @@ -2336,22 +2336,22 @@ U128 IREmitter::FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fb UNREACHABLE(); } -U128 IREmitter::FPVectorGreater(size_t esize, const U128& a, const U128& b) { +U128 IREmitter::FPVectorGreater(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) { switch (esize) { case 32: - return Inst(Opcode::FPVectorGreater32, a, b); + return Inst(Opcode::FPVectorGreater32, a, b, Imm1(fpcr_controlled)); case 64: - return Inst(Opcode::FPVectorGreater64, a, b); + return Inst(Opcode::FPVectorGreater64, a, b, Imm1(fpcr_controlled)); } UNREACHABLE(); } -U128 IREmitter::FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b) { +U128 IREmitter::FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) { switch (esize) { case 32: - return Inst(Opcode::FPVectorGreaterEqual32, a, b); + return Inst(Opcode::FPVectorGreaterEqual32, a, b, Imm1(fpcr_controlled)); case 64: - return Inst(Opcode::FPVectorGreaterEqual64, a, b); + return Inst(Opcode::FPVectorGreaterEqual64, a, b, Imm1(fpcr_controlled)); } UNREACHABLE(); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index eac96060..a8e9be3c 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -347,11 +347,11 @@ public: U128 FPVectorAbs(size_t esize, const U128& a); U128 FPVectorAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorDiv(size_t esize, const U128& a, const U128& b); - U128 FPVectorEqual(size_t esize, const U128& a, const U128& b); + U128 FPVectorEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); U128 FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); U128 FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); - U128 FPVectorGreater(size_t esize, const U128& a, const U128& b); - U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b); + U128 FPVectorGreater(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); + U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); U128 FPVectorMax(size_t esize, const U128& a, const U128& b); U128 FPVectorMin(size_t esize, const U128& a, const U128& b); U128 FPVectorMul(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 3a4a0c06..687fbdb7 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -584,17 +584,17 @@ OPCODE(FPVectorAdd32, U128, U128 OPCODE(FPVectorAdd64, U128, U128, U128 ) OPCODE(FPVectorDiv32, U128, U128, U128 ) OPCODE(FPVectorDiv64, U128, U128, U128 ) -OPCODE(FPVectorEqual16, U128, U128, U128 ) -OPCODE(FPVectorEqual32, U128, U128, U128 ) -OPCODE(FPVectorEqual64, U128, U128, U128 ) +OPCODE(FPVectorEqual16, U128, U128, U128, U1 ) +OPCODE(FPVectorEqual32, U128, U128, U128, U1 ) +OPCODE(FPVectorEqual64, U128, U128, U128, U1 ) OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 ) OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 ) OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 ) OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 ) -OPCODE(FPVectorGreater32, U128, U128, U128 ) -OPCODE(FPVectorGreater64, U128, U128, U128 ) -OPCODE(FPVectorGreaterEqual32, U128, U128, U128 ) -OPCODE(FPVectorGreaterEqual64, U128, U128, U128 ) +OPCODE(FPVectorGreater32, U128, U128, U128, U1 ) +OPCODE(FPVectorGreater64, U128, U128, U128, U1 ) +OPCODE(FPVectorGreaterEqual32, U128, U128, U128, U1 ) +OPCODE(FPVectorGreaterEqual64, U128, U128, U128, U1 ) OPCODE(FPVectorMax32, U128, U128, U128 ) OPCODE(FPVectorMax64, U128, U128, U128 ) OPCODE(FPVectorMin32, U128, U128, U128 )