diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 8386338d..f921041b 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -1228,74 +1228,77 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const size_t fbits = args[1].GetImmediateU8(); const auto rounding_mode = static_cast(args[2].GetImmediateU8()); - const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); - if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && round_imm){ - const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); + if constexpr (fsize != 16) { + const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); - if constexpr (fsize == 64) { - if (fbits != 0) { - const u64 scale_factor = static_cast((fbits + 1023) << 52); - code.mulsd(src, code.MConst(xword, scale_factor)); - } + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && round_imm){ + const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); - code.roundsd(src, src, *round_imm); - } else { - if (fbits != 0) { - const u32 scale_factor = static_cast((fbits + 127) << 23); - code.mulss(src, code.MConst(xword, scale_factor)); - } + if constexpr (fsize == 64) { + if (fbits != 0) { + const u64 scale_factor = static_cast((fbits + 1023) << 52); + code.mulsd(src, code.MConst(xword, scale_factor)); + } - code.roundss(src, src, *round_imm); - code.cvtss2sd(src, src); - } - - ZeroIfNaN<64>(code, src, scratch); - - if constexpr (isize == 64) { - Xbyak::Label saturate_max, end; - - if (unsigned_) { - code.maxsd(src, code.MConst(xword, f64_min_u64)); - } - code.movsd(scratch, code.MConst(xword, unsigned_ ? f64_max_u64_lim : f64_max_s64_lim)); - code.comisd(scratch, src); - code.jna(saturate_max, code.T_NEAR); - if (unsigned_) { - Xbyak::Label below_max; - - code.movsd(scratch, code.MConst(xword, f64_max_s64_lim)); - code.comisd(src, scratch); - code.jb(below_max); - code.subsd(src, scratch); - code.cvttsd2si(result, src); - code.btc(result, 63); - code.jmp(end); - code.L(below_max); - } - code.cvttsd2si(result, src); // 64 bit gpr - code.L(end); - - code.SwitchToFarCode(); - code.L(saturate_max); - code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } else { - code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32)); - if (unsigned_) { - code.maxsd(src, code.MConst(xword, f64_min_u32)); - code.cvttsd2si(result, src); // 64 bit gpr + code.roundsd(src, src, *round_imm); } else { - code.cvttsd2si(result.cvt32(), src); + if (fbits != 0) { + const u32 scale_factor = static_cast((fbits + 127) << 23); + code.mulss(src, code.MConst(xword, scale_factor)); + } + + code.roundss(src, src, *round_imm); + code.cvtss2sd(src, src); } + + ZeroIfNaN<64>(code, src, scratch); + + if constexpr (isize == 64) { + Xbyak::Label saturate_max, end; + + if (unsigned_) { + code.maxsd(src, code.MConst(xword, f64_min_u64)); + } + code.movsd(scratch, code.MConst(xword, unsigned_ ? f64_max_u64_lim : f64_max_s64_lim)); + code.comisd(scratch, src); + code.jna(saturate_max, code.T_NEAR); + if (unsigned_) { + Xbyak::Label below_max; + + code.movsd(scratch, code.MConst(xword, f64_max_s64_lim)); + code.comisd(src, scratch); + code.jb(below_max); + code.subsd(src, scratch); + code.cvttsd2si(result, src); + code.btc(result, 63); + code.jmp(end); + code.L(below_max); + } + code.cvttsd2si(result, src); // 64 bit gpr + code.L(end); + + code.SwitchToFarCode(); + code.L(saturate_max); + code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + } else { + code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32)); + if (unsigned_) { + code.maxsd(src, code.MConst(xword, f64_min_u32)); + code.cvttsd2si(result, src); // 64 bit gpr + } else { + code.cvttsd2si(result.cvt32(), src); + } + } + + ctx.reg_alloc.DefineValue(inst, result); + + return; } - - ctx.reg_alloc.DefineValue(inst, result); - - return; } using fbits_list = mp::vllift>; @@ -1351,6 +1354,22 @@ void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<64, true, 64>(code, ctx, inst); } +void EmitX64::EmitFPHalfToFixedS32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<16, false, 32>(code, ctx, inst); +} + +void EmitX64::EmitFPHalfToFixedS64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<16, false, 64>(code, ctx, inst); +} + +void EmitX64::EmitFPHalfToFixedU32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<16, true, 32>(code, ctx, inst); +} + +void EmitX64::EmitFPHalfToFixedU64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<16, true, 64>(code, ctx, inst); +} + void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<32, false, 32>(code, ctx, inst); } diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index 5d31418b..6e4482de 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -1361,98 +1361,100 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { // TODO: AVX512 implementation - if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if constexpr (fsize != 16) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); - const int round_imm = [&]{ - switch (rounding) { - case FP::RoundingMode::ToNearest_TieEven: - default: - return 0b00; - case FP::RoundingMode::TowardsPlusInfinity: - return 0b10; - case FP::RoundingMode::TowardsMinusInfinity: - return 0b01; - case FP::RoundingMode::TowardsZero: - return 0b11; + const int round_imm = [&]{ + switch (rounding) { + case FP::RoundingMode::ToNearest_TieEven: + default: + return 0b00; + case FP::RoundingMode::TowardsPlusInfinity: + return 0b10; + case FP::RoundingMode::TowardsMinusInfinity: + return 0b01; + case FP::RoundingMode::TowardsZero: + return 0b11; + } + }(); + + const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) { + // MSVC doesn't allow us to use a [&] capture, so we have to do this instead. + (void)ctx; + + if constexpr (fsize == 32) { + code.cvttps2dq(src, src); + } else { + const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(); + + code.cvttsd2si(lo, src); + code.punpckhqdq(src, src); + code.cvttsd2si(hi, src); + code.movq(src, lo); + code.pinsrq(src, hi, 1); + + ctx.reg_alloc.Release(hi); + ctx.reg_alloc.Release(lo); + } + }; + + if (fbits != 0) { + const u64 scale_factor = fsize == 32 + ? static_cast(fbits + 127) << 23 + : static_cast(fbits + 1023) << 52; + FCODE(mulp)(src, GetVectorOf(code, scale_factor)); } - }(); - const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) { - // MSVC doesn't allow us to use a [&] capture, so we have to do this instead. - (void)ctx; + FCODE(roundp)(src, src, static_cast(round_imm)); + ZeroIfNaN(code, src); - if constexpr (fsize == 32) { - code.cvttps2dq(src, src); + constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000; + [[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000; + + if constexpr (unsigned_) { + // Zero is minimum + code.xorps(xmm0, xmm0); + FCODE(cmplep)(xmm0, src); + FCODE(andp)(src, xmm0); + + // Will we exceed unsigned range? + const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(); + code.movaps(exceed_unsigned, GetVectorOf(code)); + FCODE(cmplep)(exceed_unsigned, src); + + // Will be exceed signed range? + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + code.movaps(tmp, GetVectorOf(code)); + code.movaps(xmm0, tmp); + FCODE(cmplep)(xmm0, src); + FCODE(andp)(tmp, xmm0); + FCODE(subp)(src, tmp); + perform_conversion(src); + if constexpr (fsize == 32) { + code.pslld(xmm0, 31); + } else { + code.psllq(xmm0, 63); + } + FCODE(orp)(src, xmm0); + + // Saturate to max + FCODE(orp)(src, exceed_unsigned); } else { - const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(); + constexpr u64 integer_max = static_cast(std::numeric_limits>>::max()); - code.cvttsd2si(lo, src); - code.punpckhqdq(src, src); - code.cvttsd2si(hi, src); - code.movq(src, lo); - code.pinsrq(src, hi, 1); - - ctx.reg_alloc.Release(hi); - ctx.reg_alloc.Release(lo); + code.movaps(xmm0, GetVectorOf(code)); + FCODE(cmplep)(xmm0, src); + perform_conversion(src); + FCODE(blendvp)(src, GetVectorOf(code)); } - }; - if (fbits != 0) { - const u64 scale_factor = fsize == 32 - ? static_cast(fbits + 127) << 23 - : static_cast(fbits + 1023) << 52; - FCODE(mulp)(src, GetVectorOf(code, scale_factor)); + ctx.reg_alloc.DefineValue(inst, src); + return; } - - FCODE(roundp)(src, src, static_cast(round_imm)); - ZeroIfNaN(code, src); - - constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000; - [[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000; - - if constexpr (unsigned_) { - // Zero is minimum - code.xorps(xmm0, xmm0); - FCODE(cmplep)(xmm0, src); - FCODE(andp)(src, xmm0); - - // Will we exceed unsigned range? - const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(); - code.movaps(exceed_unsigned, GetVectorOf(code)); - FCODE(cmplep)(exceed_unsigned, src); - - // Will be exceed signed range? - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.movaps(tmp, GetVectorOf(code)); - code.movaps(xmm0, tmp); - FCODE(cmplep)(xmm0, src); - FCODE(andp)(tmp, xmm0); - FCODE(subp)(src, tmp); - perform_conversion(src); - if constexpr (fsize == 32) { - code.pslld(xmm0, 31); - } else { - code.psllq(xmm0, 63); - } - FCODE(orp)(src, xmm0); - - // Saturate to max - FCODE(orp)(src, exceed_unsigned); - } else { - constexpr u64 integer_max = static_cast(std::numeric_limits>>::max()); - - code.movaps(xmm0, GetVectorOf(code)); - FCODE(cmplep)(xmm0, src); - perform_conversion(src); - FCODE(blendvp)(src, GetVectorOf(code)); - } - - ctx.reg_alloc.DefineValue(inst, src); - return; } using fbits_list = mp::vllift>; @@ -1489,6 +1491,10 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding))); } +void EmitX64::EmitFPVectorToSignedFixed16(EmitContext& ctx, IR::Inst* inst) { + EmitFPVectorToFixed<16, false>(code, ctx, inst); +} + void EmitX64::EmitFPVectorToSignedFixed32(EmitContext& ctx, IR::Inst* inst) { EmitFPVectorToFixed<32, false>(code, ctx, inst); } @@ -1497,6 +1503,10 @@ void EmitX64::EmitFPVectorToSignedFixed64(EmitContext& ctx, IR::Inst* inst) { EmitFPVectorToFixed<64, false>(code, ctx, inst); } +void EmitX64::EmitFPVectorToUnsignedFixed16(EmitContext& ctx, IR::Inst* inst) { + EmitFPVectorToFixed<16, true>(code, ctx, inst); +} + void EmitX64::EmitFPVectorToUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) { EmitFPVectorToFixed<32, true>(code, ctx, inst); } diff --git a/src/common/fp/op/FPToFixed.cpp b/src/common/fp/op/FPToFixed.cpp index 97f549e6..9240d5e5 100644 --- a/src/common/fp/op/FPToFixed.cpp +++ b/src/common/fp/op/FPToFixed.cpp @@ -95,6 +95,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou return int_result & Common::Ones(ibits); } +template u64 FPToFixed(size_t ibits, u16 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u64 FPToFixed(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u64 FPToFixed(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp index 434658b1..132db71a 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp @@ -69,7 +69,7 @@ bool TranslatorVisitor::UCVTF_float_fix(bool sf, Imm<2> type, Imm<6> scale, Reg bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec Vn, Reg Rd) { const size_t intsize = sf ? 64 : 32; const auto fltsize = FPGetDataSize(type); - if (!fltsize || *fltsize == 16) { + if (!fltsize) { return UnallocatedEncoding(); } if (!sf && !scale.Bit<5>()) { @@ -77,7 +77,7 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec } const u8 fracbits = 64 - scale.ZeroExtend(); - const IR::U32U64 fltval = V_scalar(*fltsize, Vn); + const IR::U16U32U64 fltval = V_scalar(*fltsize, Vn); IR::U32U64 intval; if (intsize == 32) { intval = ir.FPToFixedS32(fltval, fracbits, FP::RoundingMode::TowardsZero); @@ -94,7 +94,7 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec Vn, Reg Rd) { const size_t intsize = sf ? 64 : 32; const auto fltsize = FPGetDataSize(type); - if (!fltsize || *fltsize == 16) { + if (!fltsize) { return UnallocatedEncoding(); } if (!sf && !scale.Bit<5>()) { @@ -102,7 +102,7 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec } const u8 fracbits = 64 - scale.ZeroExtend(); - const IR::U32U64 fltval = V_scalar(*fltsize, Vn); + const IR::U16U32U64 fltval = V_scalar(*fltsize, Vn); IR::U32U64 intval; if (intsize == 32) { intval = ir.FPToFixedU32(fltval, fracbits, FP::RoundingMode::TowardsZero); diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index 15fabe05..be307576 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -119,11 +119,11 @@ bool TranslatorVisitor::FMOV_float_gen(bool sf, Imm<2> type, Imm<1> rmode_0, Imm static bool FloaingPointConvertSignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) { const size_t intsize = sf ? 64 : 32; const auto fltsize = FPGetDataSize(type); - if (!fltsize || *fltsize == 16) { + if (!fltsize) { return v.UnallocatedEncoding(); } - const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn); + const IR::U16U32U64 fltval = v.V_scalar(*fltsize, Vn); IR::U32U64 intval; if (intsize == 32) { @@ -135,18 +135,17 @@ static bool FloaingPointConvertSignedInteger(TranslatorVisitor& v, bool sf, Imm< } v.X(intsize, Rd, intval); - return true; } static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) { const size_t intsize = sf ? 64 : 32; const auto fltsize = FPGetDataSize(type); - if (!fltsize || *fltsize == 16) { + if (!fltsize) { return v.UnallocatedEncoding(); } - const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn); + const IR::U16U32U64 fltval = v.V_scalar(*fltsize, Vn); IR::U32U64 intval; if (intsize == 32) { @@ -158,7 +157,6 @@ static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Im } v.X(intsize, Rd, intval); - return true; } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 6f9b8715..9dc81799 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2055,28 +2055,80 @@ U16 IREmitter::FPSingleToHalf(const U32& a, FP::RoundingMode rounding) { return Inst(Opcode::FPSingleToHalf, a, Imm8(static_cast(rounding))); } -U32 IREmitter::FPToFixedS32(const U32U64& a, size_t fbits, FP::RoundingMode rounding) { +U32 IREmitter::FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { ASSERT(fbits <= 32); - const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedS32 : Opcode::FPDoubleToFixedS32; - return Inst(opcode, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + + const U8 fbits_imm = Imm8(static_cast(fbits)); + const U8 rounding_imm = Imm8(static_cast(rounding)); + + switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPHalfToFixedS32, a, fbits_imm, rounding_imm); + case Type::U32: + return Inst(Opcode::FPSingleToFixedS32, a, fbits_imm, rounding_imm); + case Type::U64: + return Inst(Opcode::FPDoubleToFixedS32, a, fbits_imm, rounding_imm); + default: + UNREACHABLE(); + return U32{}; + } } -U64 IREmitter::FPToFixedS64(const U32U64& a, size_t fbits, FP::RoundingMode rounding) { +U64 IREmitter::FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { ASSERT(fbits <= 64); - const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedS64 : Opcode::FPDoubleToFixedS64; - return Inst(opcode, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + + const U8 fbits_imm = Imm8(static_cast(fbits)); + const U8 rounding_imm = Imm8(static_cast(rounding)); + + switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPHalfToFixedS64, a, fbits_imm, rounding_imm); + case Type::U32: + return Inst(Opcode::FPSingleToFixedS64, a, fbits_imm, rounding_imm); + case Type::U64: + return Inst(Opcode::FPDoubleToFixedS64, a, fbits_imm, rounding_imm); + default: + UNREACHABLE(); + return U64{}; + } } -U32 IREmitter::FPToFixedU32(const U32U64& a, size_t fbits, FP::RoundingMode rounding) { +U32 IREmitter::FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { ASSERT(fbits <= 32); - const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedU32 : Opcode::FPDoubleToFixedU32; - return Inst(opcode, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + + const U8 fbits_imm = Imm8(static_cast(fbits)); + const U8 rounding_imm = Imm8(static_cast(rounding)); + + switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPHalfToFixedU32, a, fbits_imm, rounding_imm); + case Type::U32: + return Inst(Opcode::FPSingleToFixedU32, a, fbits_imm, rounding_imm); + case Type::U64: + return Inst(Opcode::FPDoubleToFixedU32, a, fbits_imm, rounding_imm); + default: + UNREACHABLE(); + return U32{}; + } } -U64 IREmitter::FPToFixedU64(const U32U64& a, size_t fbits, FP::RoundingMode rounding) { +U64 IREmitter::FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { ASSERT(fbits <= 64); - const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedU64 : Opcode::FPDoubleToFixedU64; - return Inst(opcode, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + + const U8 fbits_imm = Imm8(static_cast(fbits)); + const U8 rounding_imm = Imm8(static_cast(rounding)); + + switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPHalfToFixedU64, a, fbits_imm, rounding_imm); + case Type::U32: + return Inst(Opcode::FPSingleToFixedU64, a, fbits_imm, rounding_imm); + case Type::U64: + return Inst(Opcode::FPDoubleToFixedU64, a, fbits_imm, rounding_imm); + default: + UNREACHABLE(); + return U64{}; + } } U32 IREmitter::FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding) { @@ -2379,24 +2431,38 @@ U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) { U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) { ASSERT(fbits <= esize); + + const U8 fbits_imm = Imm8(static_cast(fbits)); + const U8 rounding_imm = Imm8(static_cast(rounding)); + switch (esize) { + case 16: + return Inst(Opcode::FPVectorToSignedFixed16, a, fbits_imm, rounding_imm); case 32: - return Inst(Opcode::FPVectorToSignedFixed32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + return Inst(Opcode::FPVectorToSignedFixed32, a, fbits_imm, rounding_imm); case 64: - return Inst(Opcode::FPVectorToSignedFixed64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + return Inst(Opcode::FPVectorToSignedFixed64, a, fbits_imm, rounding_imm); } + UNREACHABLE(); return {}; } U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) { ASSERT(fbits <= esize); + + const U8 fbits_imm = Imm8(static_cast(fbits)); + const U8 rounding_imm = Imm8(static_cast(rounding)); + switch (esize) { + case 16: + return Inst(Opcode::FPVectorToUnsignedFixed16, a, fbits_imm, rounding_imm); case 32: - return Inst(Opcode::FPVectorToUnsignedFixed32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + return Inst(Opcode::FPVectorToUnsignedFixed32, a, fbits_imm, rounding_imm); case 64: - return Inst(Opcode::FPVectorToUnsignedFixed64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + return Inst(Opcode::FPVectorToUnsignedFixed64, a, fbits_imm, rounding_imm); } + UNREACHABLE(); return {}; } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 0b80d924..80f7974d 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -319,10 +319,10 @@ public: U32 FPHalfToSingle(const U16& a, FP::RoundingMode rounding); U16 FPSingleToHalf(const U32& a, FP::RoundingMode rounding); U64 FPSingleToDouble(const U32& a, FP::RoundingMode rounding); - U32 FPToFixedS32(const U32U64& a, size_t fbits, FP::RoundingMode rounding); - U64 FPToFixedS64(const U32U64& a, size_t fbits, FP::RoundingMode rounding); - U32 FPToFixedU32(const U32U64& a, size_t fbits, FP::RoundingMode rounding); - U64 FPToFixedU64(const U32U64& a, size_t fbits, FP::RoundingMode rounding); + U32 FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); + U64 FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); + U32 FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); + U64 FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); U32 FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding); U32 FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding); U64 FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index a6b6bee7..52fc24e1 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -304,6 +304,10 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const { case Opcode::FPDoubleToFixedS64: case Opcode::FPDoubleToFixedU32: case Opcode::FPDoubleToFixedU64: + case Opcode::FPHalfToFixedS32: + case Opcode::FPHalfToFixedS64: + case Opcode::FPHalfToFixedU32: + case Opcode::FPHalfToFixedU64: case Opcode::FPSingleToFixedS32: case Opcode::FPSingleToFixedS64: case Opcode::FPSingleToFixedU32: @@ -358,6 +362,12 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const { case Opcode::FPVectorSqrt64: case Opcode::FPVectorSub32: case Opcode::FPVectorSub64: + case Opcode::FPVectorToSignedFixed16: + case Opcode::FPVectorToSignedFixed32: + case Opcode::FPVectorToSignedFixed64: + case Opcode::FPVectorToUnsignedFixed16: + case Opcode::FPVectorToUnsignedFixed32: + case Opcode::FPVectorToUnsignedFixed64: return true; default: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index fbe6c303..814df3da 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -525,6 +525,10 @@ OPCODE(FPDoubleToFixedS32, U32, U64, OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 ) OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 ) OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 ) +OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 ) +OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 ) +OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 ) +OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 ) OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 ) OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 ) OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 ) @@ -593,8 +597,10 @@ OPCODE(FPVectorSqrt32, U128, U128 OPCODE(FPVectorSqrt64, U128, U128 ) OPCODE(FPVectorSub32, U128, U128, U128 ) OPCODE(FPVectorSub64, U128, U128, U128 ) +OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8 ) OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 ) OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 ) +OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8 ) OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 ) OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )