Merge pull request #482 from lioncash/fixedfp

A64: Handle half-precision variants of FP->Fixed instructions
2019-04-15 20:08:01 +01:00 · 2019-04-15 20:08:01 +01:00 · 09ee64ea98
commit 09ee64ea98
parent 1e1e9c17c7 64e3d233f4
9 changed files with 283 additions and 173 deletions
--- a/src/backend/x64/emit_x64_floating_point.cpp
+++ b/src/backend/x64/emit_x64_floating_point.cpp
@ -1228,6 +1228,8 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {

    const size_t fbits = args[1].GetImmediateU8();
    const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+
+    if constexpr (fsize != 16) {
        const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);

        if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && round_imm){
@ -1297,6 +1299,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {

            return;
        }
+    }

    using fbits_list = mp::vllift<std::make_index_sequence<isize + 1>>;
    using rounding_list = mp::list<
@ -1351,6 +1354,22 @@ void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
    EmitFPToFixed<64, true, 64>(code, ctx, inst);
 }

+void EmitX64::EmitFPHalfToFixedS32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<16, false, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitFPHalfToFixedS64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<16, false, 64>(code, ctx, inst);
+}
+
+void EmitX64::EmitFPHalfToFixedU32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<16, true, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitFPHalfToFixedU64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<16, true, 64>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
    EmitFPToFixed<32, false, 32>(code, ctx, inst);
 }
--- a/src/backend/x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend/x64/emit_x64_vector_floating_point.cpp
@ -1361,6 +1361,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {

    // TODO: AVX512 implementation

+    if constexpr (fsize != 16) {
        if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
            auto args = ctx.reg_alloc.GetArgumentInfo(inst);

@ -1454,6 +1455,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
            ctx.reg_alloc.DefineValue(inst, src);
            return;
        }
+    }

    using fbits_list = mp::vllift<std::make_index_sequence<fsize + 1>>;
    using rounding_list = mp::list<
@ -1489,6 +1491,10 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
 }

+void EmitX64::EmitFPVectorToSignedFixed16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPVectorToFixed<16, false>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPVectorToSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
    EmitFPVectorToFixed<32, false>(code, ctx, inst);
 }
@ -1497,6 +1503,10 @@ void EmitX64::EmitFPVectorToSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
    EmitFPVectorToFixed<64, false>(code, ctx, inst);
 }

+void EmitX64::EmitFPVectorToUnsignedFixed16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPVectorToFixed<16, true>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPVectorToUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) {
    EmitFPVectorToFixed<32, true>(code, ctx, inst);
 }
--- a/src/common/fp/op/FPToFixed.cpp
+++ b/src/common/fp/op/FPToFixed.cpp
@ -95,6 +95,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
    return int_result & Common::Ones<u64>(ibits);
 }

+template u64 FPToFixed<u16>(size_t ibits, u16 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);

--- a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
@ -69,7 +69,7 @@ bool TranslatorVisitor::UCVTF_float_fix(bool sf, Imm<2> type, Imm<6> scale, Reg
 bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec Vn, Reg Rd) {
    const size_t intsize = sf ? 64 : 32;
    const auto fltsize = FPGetDataSize(type);
-    if (!fltsize || *fltsize == 16) {
+    if (!fltsize) {
        return UnallocatedEncoding();
    }
    if (!sf && !scale.Bit<5>()) {
@ -77,7 +77,7 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
    }
    const u8 fracbits = 64 - scale.ZeroExtend<u8>();

-    const IR::U32U64 fltval = V_scalar(*fltsize, Vn);
+    const IR::U16U32U64 fltval = V_scalar(*fltsize, Vn);
    IR::U32U64 intval;
    if (intsize == 32) {
        intval = ir.FPToFixedS32(fltval, fracbits, FP::RoundingMode::TowardsZero);
@ -94,7 +94,7 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
 bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec Vn, Reg Rd) {
    const size_t intsize = sf ? 64 : 32;
    const auto fltsize = FPGetDataSize(type);
-    if (!fltsize || *fltsize == 16) {
+    if (!fltsize) {
        return UnallocatedEncoding();
    }
    if (!sf && !scale.Bit<5>()) {
@ -102,7 +102,7 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
    }
    const u8 fracbits = 64 - scale.ZeroExtend<u8>();

-    const IR::U32U64 fltval = V_scalar(*fltsize, Vn);
+    const IR::U16U32U64 fltval = V_scalar(*fltsize, Vn);
    IR::U32U64 intval;
    if (intsize == 32) {
        intval = ir.FPToFixedU32(fltval, fracbits, FP::RoundingMode::TowardsZero);
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@ -119,11 +119,11 @@ bool TranslatorVisitor::FMOV_float_gen(bool sf, Imm<2> type, Imm<1> rmode_0, Imm
 static bool FloaingPointConvertSignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
    const size_t intsize = sf ? 64 : 32;
    const auto fltsize = FPGetDataSize(type);
-    if (!fltsize || *fltsize == 16) {
+    if (!fltsize) {
        return v.UnallocatedEncoding();
    }

-    const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
+    const IR::U16U32U64 fltval = v.V_scalar(*fltsize, Vn);
    IR::U32U64 intval;

    if (intsize == 32) {
@ -135,18 +135,17 @@ static bool FloaingPointConvertSignedInteger(TranslatorVisitor& v, bool sf, Imm<
    }

    v.X(intsize, Rd, intval);
-
    return true;
 }

 static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
    const size_t intsize = sf ? 64 : 32;
    const auto fltsize = FPGetDataSize(type);
-    if (!fltsize || *fltsize == 16) {
+    if (!fltsize) {
        return v.UnallocatedEncoding();
    }

-    const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
+    const IR::U16U32U64 fltval = v.V_scalar(*fltsize, Vn);
    IR::U32U64 intval;

    if (intsize == 32) {
@ -158,7 +157,6 @@ static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Im
    }

    v.X(intsize, Rd, intval);
-
    return true;
 }

--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -2055,28 +2055,80 @@ U16 IREmitter::FPSingleToHalf(const U32& a, FP::RoundingMode rounding) {
    return Inst<U16>(Opcode::FPSingleToHalf, a, Imm8(static_cast<u8>(rounding)));
 }

-U32 IREmitter::FPToFixedS32(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+U32 IREmitter::FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= 32);
-    const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedS32 : Opcode::FPDoubleToFixedS32;
-    return Inst<U32>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+
+    const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
+    const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
+
+    switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U32>(Opcode::FPHalfToFixedS32, a, fbits_imm, rounding_imm);
+    case Type::U32:
+        return Inst<U32>(Opcode::FPSingleToFixedS32, a, fbits_imm, rounding_imm);
+    case Type::U64:
+        return Inst<U32>(Opcode::FPDoubleToFixedS32, a, fbits_imm, rounding_imm);
+    default:
+        UNREACHABLE();
+        return U32{};
+    }
 }

-U64 IREmitter::FPToFixedS64(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+U64 IREmitter::FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= 64);
-    const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedS64 : Opcode::FPDoubleToFixedS64;
-    return Inst<U64>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+
+    const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
+    const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
+
+    switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U64>(Opcode::FPHalfToFixedS64, a, fbits_imm, rounding_imm);
+    case Type::U32:
+        return Inst<U64>(Opcode::FPSingleToFixedS64, a, fbits_imm, rounding_imm);
+    case Type::U64:
+        return Inst<U64>(Opcode::FPDoubleToFixedS64, a, fbits_imm, rounding_imm);
+    default:
+        UNREACHABLE();
+        return U64{};
+    }
 }

-U32 IREmitter::FPToFixedU32(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+U32 IREmitter::FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= 32);
-    const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedU32 : Opcode::FPDoubleToFixedU32;
-    return Inst<U32>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+
+    const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
+    const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
+
+    switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U32>(Opcode::FPHalfToFixedU32, a, fbits_imm, rounding_imm);
+    case Type::U32:
+        return Inst<U32>(Opcode::FPSingleToFixedU32, a, fbits_imm, rounding_imm);
+    case Type::U64:
+        return Inst<U32>(Opcode::FPDoubleToFixedU32, a, fbits_imm, rounding_imm);
+    default:
+        UNREACHABLE();
+        return U32{};
+    }
 }

-U64 IREmitter::FPToFixedU64(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+U64 IREmitter::FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= 64);
-    const Opcode opcode = a.GetType() == Type::U32 ? Opcode::FPSingleToFixedU64 : Opcode::FPDoubleToFixedU64;
-    return Inst<U64>(opcode, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+
+    const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
+    const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
+
+    switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U64>(Opcode::FPHalfToFixedU64, a, fbits_imm, rounding_imm);
+    case Type::U32:
+        return Inst<U64>(Opcode::FPSingleToFixedU64, a, fbits_imm, rounding_imm);
+    case Type::U64:
+        return Inst<U64>(Opcode::FPDoubleToFixedU64, a, fbits_imm, rounding_imm);
+    default:
+        UNREACHABLE();
+        return U64{};
+    }
 }

 U32 IREmitter::FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
@ -2379,24 +2431,38 @@ U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) {

 U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= esize);
+
+    const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
+    const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
+
    switch (esize) {
+    case 16:
+        return Inst<U128>(Opcode::FPVectorToSignedFixed16, a, fbits_imm, rounding_imm);
    case 32:
-        return Inst<U128>(Opcode::FPVectorToSignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+        return Inst<U128>(Opcode::FPVectorToSignedFixed32, a, fbits_imm, rounding_imm);
    case 64:
-        return Inst<U128>(Opcode::FPVectorToSignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+        return Inst<U128>(Opcode::FPVectorToSignedFixed64, a, fbits_imm, rounding_imm);
    }
+
    UNREACHABLE();
    return {};
 }

 U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= esize);
+
+    const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
+    const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
+
    switch (esize) {
+    case 16:
+        return Inst<U128>(Opcode::FPVectorToUnsignedFixed16, a, fbits_imm, rounding_imm);
    case 32:
-        return Inst<U128>(Opcode::FPVectorToUnsignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+        return Inst<U128>(Opcode::FPVectorToUnsignedFixed32, a, fbits_imm, rounding_imm);
    case 64:
-        return Inst<U128>(Opcode::FPVectorToUnsignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+        return Inst<U128>(Opcode::FPVectorToUnsignedFixed64, a, fbits_imm, rounding_imm);
    }
+
    UNREACHABLE();
    return {};
 }
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -319,10 +319,10 @@ public:
    U32 FPHalfToSingle(const U16& a, FP::RoundingMode rounding);
    U16 FPSingleToHalf(const U32& a, FP::RoundingMode rounding);
    U64 FPSingleToDouble(const U32& a, FP::RoundingMode rounding);
-    U32 FPToFixedS32(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
-    U64 FPToFixedS64(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
-    U32 FPToFixedU32(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
-    U64 FPToFixedU64(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
    U32 FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
    U32 FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
    U64 FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
--- a/src/frontend/ir/microinstruction.cpp
+++ b/src/frontend/ir/microinstruction.cpp
@ -304,6 +304,10 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
    case Opcode::FPDoubleToFixedS64:
    case Opcode::FPDoubleToFixedU32:
    case Opcode::FPDoubleToFixedU64:
+    case Opcode::FPHalfToFixedS32:
+    case Opcode::FPHalfToFixedS64:
+    case Opcode::FPHalfToFixedU32:
+    case Opcode::FPHalfToFixedU64:
    case Opcode::FPSingleToFixedS32:
    case Opcode::FPSingleToFixedS64:
    case Opcode::FPSingleToFixedU32:
@ -358,6 +362,12 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
    case Opcode::FPVectorSqrt64:
    case Opcode::FPVectorSub32:
    case Opcode::FPVectorSub64:
+    case Opcode::FPVectorToSignedFixed16:
+    case Opcode::FPVectorToSignedFixed32:
+    case Opcode::FPVectorToSignedFixed64:
+    case Opcode::FPVectorToUnsignedFixed16:
+    case Opcode::FPVectorToUnsignedFixed32:
+    case Opcode::FPVectorToUnsignedFixed64:
        return true;

    default:
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -525,6 +525,10 @@ OPCODE(FPDoubleToFixedS32,                                  U32,            U64,
 OPCODE(FPDoubleToFixedS64,                                  U64,            U64,            U8,             U8                              )
 OPCODE(FPDoubleToFixedU32,                                  U32,            U64,            U8,             U8                              )
 OPCODE(FPDoubleToFixedU64,                                  U64,            U64,            U8,             U8                              )
+OPCODE(FPHalfToFixedS32,                                    U32,            U16,            U8,             U8                              )
+OPCODE(FPHalfToFixedS64,                                    U64,            U16,            U8,             U8                              )
+OPCODE(FPHalfToFixedU32,                                    U32,            U16,            U8,             U8                              )
+OPCODE(FPHalfToFixedU64,                                    U64,            U16,            U8,             U8                              )
 OPCODE(FPSingleToFixedS32,                                  U32,            U32,            U8,             U8                              )
 OPCODE(FPSingleToFixedS64,                                  U64,            U32,            U8,             U8                              )
 OPCODE(FPSingleToFixedU32,                                  U32,            U32,            U8,             U8                              )
@ -593,8 +597,10 @@ OPCODE(FPVectorSqrt32,                                      U128,           U128
 OPCODE(FPVectorSqrt64,                                      U128,           U128                                                            )
 OPCODE(FPVectorSub32,                                       U128,           U128,           U128                                            )
 OPCODE(FPVectorSub64,                                       U128,           U128,           U128                                            )
+OPCODE(FPVectorToSignedFixed16,                             U128,           U128,           U8,             U8                              )
 OPCODE(FPVectorToSignedFixed32,                             U128,           U128,           U8,             U8                              )
 OPCODE(FPVectorToSignedFixed64,                             U128,           U128,           U8,             U8                              )
+OPCODE(FPVectorToUnsignedFixed16,                           U128,           U128,           U8,             U8                              )
 OPCODE(FPVectorToUnsignedFixed32,                           U128,           U128,           U8,             U8                              )
 OPCODE(FPVectorToUnsignedFixed64,                           U128,           U128,           U8,             U8                              )