From 5a2adc662952a3a47defd89d1bbb9cdcfeafb676 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 24 Mar 2019 10:59:44 +0000 Subject: [PATCH] backend/x64: Expose FPCR in EmitContext instead of its subcomponents --- src/backend/x64/a32_emit_x64.cpp | 16 +--- src/backend/x64/a32_emit_x64.h | 5 +- src/backend/x64/a64_emit_x64.cpp | 16 +--- src/backend/x64/a64_emit_x64.h | 5 +- src/backend/x64/emit_x64.h | 6 +- src/backend/x64/emit_x64_floating_point.cpp | 78 +++++++++---------- .../x64/emit_x64_vector_floating_point.cpp | 42 +++++----- 7 files changed, 68 insertions(+), 100 deletions(-) diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index 32ee18a0..b47d74fa 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -61,20 +61,8 @@ A32::LocationDescriptor A32EmitContext::Location() const { return A32::LocationDescriptor{block.Location()}; } -FP::RoundingMode A32EmitContext::FPSCR_RMode() const { - return Location().FPSCR().RMode(); -} - -u32 A32EmitContext::FPCR() const { - return Location().FPSCR().Value(); -} - -bool A32EmitContext::FPSCR_FTZ() const { - return Location().FPSCR().FTZ(); -} - -bool A32EmitContext::FPSCR_DN() const { - return Location().FPSCR().DN(); +FP::FPCR A32EmitContext::FPCR() const { + return FP::FPCR{Location().FPSCR().Value()}; } A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface) diff --git a/src/backend/x64/a32_emit_x64.h b/src/backend/x64/a32_emit_x64.h index c25d783d..57be4a63 100644 --- a/src/backend/x64/a32_emit_x64.h +++ b/src/backend/x64/a32_emit_x64.h @@ -24,10 +24,7 @@ class RegAlloc; struct A32EmitContext final : public EmitContext { A32EmitContext(RegAlloc& reg_alloc, IR::Block& block); A32::LocationDescriptor Location() const; - FP::RoundingMode FPSCR_RMode() const override; - u32 FPCR() const override; - bool FPSCR_FTZ() const override; - bool FPSCR_DN() const override; + FP::FPCR FPCR() const override; }; class A32EmitX64 final : public EmitX64 { diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index 27dd1d76..7cf398a8 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -42,20 +42,8 @@ A64::LocationDescriptor A64EmitContext::Location() const { return A64::LocationDescriptor{block.Location()}; } -FP::RoundingMode A64EmitContext::FPSCR_RMode() const { - return Location().FPCR().RMode(); -} - -u32 A64EmitContext::FPCR() const { - return Location().FPCR().Value(); -} - -bool A64EmitContext::FPSCR_FTZ() const { - return Location().FPCR().FZ(); -} - -bool A64EmitContext::FPSCR_DN() const { - return Location().FPCR().DN() || conf.floating_point_nan_accuracy == A64::UserConfig::NaNAccuracy::AlwaysForceDefaultNaN; +FP::FPCR A64EmitContext::FPCR() const { + return Location().FPCR(); } bool A64EmitContext::AccurateNaN() const { diff --git a/src/backend/x64/a64_emit_x64.h b/src/backend/x64/a64_emit_x64.h index d349e238..dfb4c196 100644 --- a/src/backend/x64/a64_emit_x64.h +++ b/src/backend/x64/a64_emit_x64.h @@ -24,10 +24,7 @@ class RegAlloc; struct A64EmitContext final : public EmitContext { A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block); A64::LocationDescriptor Location() const; - FP::RoundingMode FPSCR_RMode() const override; - u32 FPCR() const override; - bool FPSCR_FTZ() const override; - bool FPSCR_DN() const override; + FP::FPCR FPCR() const override; bool AccurateNaN() const override; const A64::UserConfig& conf; diff --git a/src/backend/x64/emit_x64.h b/src/backend/x64/emit_x64.h index ca5f539c..eb52418c 100644 --- a/src/backend/x64/emit_x64.h +++ b/src/backend/x64/emit_x64.h @@ -18,6 +18,7 @@ #include "backend/x64/reg_alloc.h" #include "common/bit_util.h" +#include "common/fp/fpcr.h" #include "common/fp/rounding_mode.h" #include "frontend/ir/location_descriptor.h" #include "frontend/ir/terminal.h" @@ -44,10 +45,7 @@ struct EmitContext { void EraseInstruction(IR::Inst* inst); - virtual FP::RoundingMode FPSCR_RMode() const = 0; - virtual u32 FPCR() const = 0; - virtual bool FPSCR_FTZ() const = 0; - virtual bool FPSCR_DN() const = 0; + virtual FP::FPCR FPCR() const = 0; virtual bool AccurateNaN() const { return true; } RegAlloc& reg_alloc; diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 9ffc248a..f2e6b131 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -251,7 +251,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) { + if (ctx.AccurateNaN() && !ctx.FPCR().DN()) { end = ProcessNaN(code, result); } if constexpr (std::is_member_function_pointer_v) { @@ -259,7 +259,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { } else { fn(result); } - if (ctx.FPSCR_DN()) { + if (ctx.FPCR().DN()) { ForceToDefaultNaN(code, result); } else if (ctx.AccurateNaN()) { PostProcessNaN(code, result, ctx.reg_alloc.ScratchXmm()); @@ -275,7 +275,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (ctx.FPSCR_DN() || !ctx.AccurateNaN()) { + if (ctx.FPCR().DN() || !ctx.AccurateNaN()) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); @@ -405,7 +405,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); - if (ctx.FPSCR_FTZ()) { + if (ctx.FPCR().FZ()) { DenormalsAreZero(code, result, gpr_scratch); DenormalsAreZero(code, operand, gpr_scratch); } @@ -433,7 +433,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.jmp(end); code.L(nan); - if (ctx.FPSCR_DN() || !ctx.AccurateNaN()) { + if (ctx.FPCR().DN() || !ctx.AccurateNaN()) { code.movaps(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); code.jmp(end); } else { @@ -467,7 +467,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } }; - if (ctx.FPSCR_FTZ()) { + if (ctx.FPCR().FZ()) { DenormalsAreZero(code, op1, tmp.cvt64()); DenormalsAreZero(code, op2, tmp.cvt64()); } @@ -511,7 +511,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i move_to_tmp(op1); code.bt(tmp, mantissa_msb_bit); code.jc(maybe_both_nan); - if (ctx.FPSCR_DN()) { + if (ctx.FPCR().DN()) { code.L(snan); code.movaps(op2, code.MConst(xword, FP::FPInfo::DefaultNaN())); code.jmp(end); @@ -525,7 +525,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i code.L(maybe_both_nan); FCODE(ucomis)(op2, op2); code.jnp(end, code.T_NEAR); - if (ctx.FPSCR_DN()) { + if (ctx.FPCR().DN()) { code.jmp(snan); } else { move_to_tmp(op2); @@ -620,7 +620,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM2, operand2); code.movq(code.ABI_PARAM3, operand3); - code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 code.sub(rsp, 16 + ABI_SHADOW_SPACE); code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); @@ -644,7 +644,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]); - code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 code.sub(rsp, 16 + ABI_SHADOW_SPACE); code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); @@ -671,7 +671,7 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const bool do_default_nan = ctx.FPSCR_DN() || !ctx.AccurateNaN(); + const bool do_default_nan = ctx.FPCR().DN() || !ctx.AccurateNaN(); const Xbyak::Xmm op1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm op2 = ctx.reg_alloc.UseXmm(args[1]); @@ -727,7 +727,7 @@ template static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipEstimate); } @@ -744,7 +744,7 @@ template static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipExponent); } @@ -787,7 +787,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM2, operand2); - code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipStepFused); code.movq(result, code.ABI_RETURN); @@ -803,7 +803,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0], args[1]); - code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipStepFused); } @@ -886,7 +886,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); - code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.CallFunction(lut.at(std::make_tuple(fsize, rounding, exact))); } @@ -902,7 +902,7 @@ template static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtEstimate); } @@ -953,7 +953,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM2, operand2); - code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtStepFused); code.movq(result, code.ABI_RETURN); @@ -969,7 +969,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0], args[1]); - code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtStepFused); } @@ -1062,7 +1062,7 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { const auto rounding_mode = static_cast(args[1].GetImmediateU8()); ctx.reg_alloc.HostCall(inst, args[0]); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); @@ -1073,7 +1073,7 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) { const auto rounding_mode = static_cast(args[1].GetImmediateU8()); ctx.reg_alloc.HostCall(inst, args[0]); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); @@ -1084,17 +1084,17 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { const auto rounding_mode = static_cast(args[1].GetImmediateU8()); // We special-case the non-IEEE-defined ToOdd rounding mode. - if (rounding_mode == ctx.FPSCR_RMode() && rounding_mode != FP::RoundingMode::ToOdd) { + if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); code.cvtss2sd(result, result); - if (ctx.FPSCR_DN()) { + if (ctx.FPCR().DN()) { ForceToDefaultNaN<64>(code, result); } ctx.reg_alloc.DefineValue(inst, result); } else { ctx.reg_alloc.HostCall(inst, args[0]); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); @@ -1106,7 +1106,7 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) { const auto rounding_mode = static_cast(args[1].GetImmediateU8()); ctx.reg_alloc.HostCall(inst, args[0]); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); @@ -1117,7 +1117,7 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) { const auto rounding_mode = static_cast(args[1].GetImmediateU8()); ctx.reg_alloc.HostCall(inst, args[0]); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); @@ -1128,17 +1128,17 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { const auto rounding_mode = static_cast(args[1].GetImmediateU8()); // We special-case the non-IEEE-defined ToOdd rounding mode. - if (rounding_mode == ctx.FPSCR_RMode() && rounding_mode != FP::RoundingMode::ToOdd) { + if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); code.cvtsd2ss(result, result); - if (ctx.FPSCR_DN()) { + if (ctx.FPCR().DN()) { ForceToDefaultNaN<32>(code, result); } ctx.reg_alloc.DefineValue(inst, result); } else { ctx.reg_alloc.HostCall(inst, args[0]); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); @@ -1268,7 +1268,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); - code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.CallFunction(lut.at(std::make_tuple(fbits, rounding))); } @@ -1311,7 +1311,7 @@ void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); code.cvtsi2ss(result, from); @@ -1329,7 +1329,7 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512F)) { const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); @@ -1356,7 +1356,7 @@ void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); code.cvtsi2sd(result, from); @@ -1375,7 +1375,7 @@ void EmitX64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); code.cvtsi2sd(result, from); @@ -1394,7 +1394,7 @@ void EmitX64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); code.cvtsi2ss(result, from); @@ -1412,7 +1412,7 @@ void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512F)) { const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); @@ -1439,7 +1439,7 @@ void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512F)) { code.vcvtusi2sd(result, result, from); @@ -1451,7 +1451,7 @@ void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { code.subpd(tmp, code.MConst(xword, 0x4330000000000000, 0x4530000000000000)); code.pshufd(result, tmp, 0b01001110); code.addpd(result, tmp); - if (ctx.FPSCR_RMode() == FP::RoundingMode::TowardsMinusInfinity) { + if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) { code.pand(result, code.MConst(xword, f64_non_sign_mask)); } } @@ -1470,7 +1470,7 @@ void EmitX64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512F)) { const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index f0f9520f..5a5e0629 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -112,7 +112,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, std::array void ForceToDefaultNaN(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm result) { - if (ctx.FPSCR_DN()) { + if (ctx.FPCR().DN()) { const Xbyak::Xmm nan_mask = xmm0; if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) { FCODE(vcmpunordp)(nan_mask, result, result); @@ -199,8 +199,8 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) { template void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list to_daz, Xbyak::Xmm tmp) { - if (ctx.FPSCR_FTZ()) { - if (ctx.FPSCR_RMode() != FP::RoundingMode::TowardsMinusInfinity) { + if (ctx.FPCR().FZ()) { + if (ctx.FPCR().RMode() != FP::RoundingMode::TowardsMinusInfinity) { code.movaps(tmp, GetNegativeZeroVector(code)); } else { code.xorps(tmp, tmp); @@ -271,7 +271,7 @@ template class Indexer, typename Function> void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, typename NaNHandler::function_type nan_handler = NaNHandler::GetDefault()) { static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64"); - if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) { + if (!ctx.AccurateNaN() || ctx.FPCR().DN()) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result; @@ -320,7 +320,7 @@ template class Indexer, typename Function> void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, typename NaNHandler::function_type nan_handler = NaNHandler::GetDefault()) { static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64"); - if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) { + if (!ctx.AccurateNaN() || ctx.FPCR().DN()) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); @@ -373,7 +373,7 @@ void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lamb code.sub(rsp, stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); - code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.movaps(xword[code.ABI_PARAM2], arg1); @@ -395,7 +395,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); - code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax); #else @@ -404,7 +404,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); - code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); #endif @@ -446,7 +446,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]); - code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR()); + code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR().Value()); code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax); #else @@ -456,7 +456,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); - code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR()); + code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); #endif @@ -565,7 +565,7 @@ void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); const int fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); code.cvtdq2ps(xmm, xmm); @@ -581,7 +581,7 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); const int fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ)) { code.vcvtqq2pd(xmm, xmm); @@ -629,7 +629,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); const int fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) { code.vcvtudq2ps(xmm, xmm); @@ -664,7 +664,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) code.mulps(xmm, GetVectorOf<32>(code, static_cast(127 - fbits) << 23)); } - if (ctx.FPSCR_RMode() == FP::RoundingMode::TowardsMinusInfinity) { + if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) { code.pand(xmm, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); } @@ -676,7 +676,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); const int fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPSCR_RMode()); + ASSERT(rounding_mode == ctx.FPCR().RMode()); if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) { code.vcvtuqq2pd(xmm, xmm); @@ -728,7 +728,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) code.mulpd(xmm, GetVectorOf<64>(code, static_cast(1023 - fbits) << 52)); } - if (ctx.FPSCR_RMode() == FP::RoundingMode::TowardsMinusInfinity) { + if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) { code.pand(xmm, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)); } @@ -777,10 +777,10 @@ void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) { template static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - if (ctx.FPSCR_DN()) { + if (ctx.FPCR().DN()) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.FPSCR_FTZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_b = ctx.FPCR().FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm mask = xmm0; const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); @@ -833,7 +833,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm mask = xmm0; const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); - if (ctx.FPSCR_FTZ()) { + if (ctx.FPCR().FZ()) { const Xbyak::Xmm prev_xmm_b = xmm_b; xmm_b = ctx.reg_alloc.ScratchXmm(); code.movaps(xmm_b, prev_xmm_b); @@ -960,7 +960,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (ctx.FPSCR_DN() && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) { + if (ctx.FPCR().DN() && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();