From 3ea49fc6d681ab5ecdf5cfbc0bdd73ae226018e8 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 22 Jun 2020 21:03:20 +0100
Subject: [PATCH] A32: Implement VFPv3 VCT (between floating-point and
 fixed-point)

---
 src/backend/x64/emit_x64_floating_point.cpp   | 207 ++++++++++++++----
 src/frontend/A32/decoder/vfp.inc              |   4 +-
 .../A32/disassembler/disassembler_arm.cpp     |  12 +
 .../A32/translate/impl/translate_arm.h        |   2 +
 src/frontend/A32/translate/impl/vfp.cpp       |  68 ++++++
 src/frontend/ir/ir_emitter.cpp                |  60 ++++-
 src/frontend/ir/ir_emitter.h                  |  10 +-
 src/frontend/ir/opcodes.inc                   |  12 +-
 tests/A32/fuzz_arm.cpp                        |   1 +
 9 files changed, 322 insertions(+), 54 deletions(-)

diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp
index f38d1950..715f6cb3 100644
--- a/src/backend/x64/emit_x64_floating_point.cpp
+++ b/src/backend/x64/emit_x64_floating_point.cpp
@@ -50,6 +50,9 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
 constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
 constexpr u64 f64_smallest_normal = 0x0010000000000000u;
 
+constexpr u64 f64_max_s16 = 0x40dfffc000000000u; // 32767 as a double
+constexpr u64 f64_min_u16 = 0x0000000000000000u; // 0 as a double
+constexpr u64 f64_max_u16 = 0x40efffe000000000u; // 65535 as a double
 constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
 constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
 constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
@@ -1262,7 +1265,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
                 code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF);
                 code.jmp(end, code.T_NEAR);
                 code.SwitchToNearCode();
-            } else {
+            } else if constexpr (isize == 32) {
                 code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
                 if (unsigned_) {
                     code.maxsd(src, code.MConst(xword, f64_min_u32));
@@ -1270,6 +1273,14 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
                 } else {
                     code.cvttsd2si(result.cvt32(), src);
                 }
+            } else {
+                code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16));
+                if (unsigned_) {
+                    code.maxsd(src, code.MConst(xword, f64_min_u16));
+                    code.cvttsd2si(result, src); // 64 bit gpr
+                } else {
+                    code.cvttsd2si(result.cvt32(), src);
+                }
             }
 
             ctx.reg_alloc.DefineValue(inst, result);
@@ -1312,6 +1323,10 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
     code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode)));
 }
 
+void EmitX64::EmitFPDoubleToFixedS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<64, false, 16>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<64, false, 32>(code, ctx, inst);
 }
@@ -1320,6 +1335,10 @@ void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<64, false, 64>(code, ctx, inst);
 }
 
+void EmitX64::EmitFPDoubleToFixedU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<64, true, 16>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<64, true, 32>(code, ctx, inst);
 }
@@ -1328,6 +1347,10 @@ void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<64, true, 64>(code, ctx, inst);
 }
 
+void EmitX64::EmitFPHalfToFixedS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<16, false, 16>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPHalfToFixedS32(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<16, false, 32>(code, ctx, inst);
 }
@@ -1336,6 +1359,10 @@ void EmitX64::EmitFPHalfToFixedS64(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<16, false, 64>(code, ctx, inst);
 }
 
+void EmitX64::EmitFPHalfToFixedU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<16, true, 16>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPHalfToFixedU32(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<16, true, 32>(code, ctx, inst);
 }
@@ -1344,6 +1371,10 @@ void EmitX64::EmitFPHalfToFixedU64(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<16, true, 64>(code, ctx, inst);
 }
 
+void EmitX64::EmitFPSingleToFixedS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<32, false, 16>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<32, false, 32>(code, ctx, inst);
 }
@@ -1352,6 +1383,10 @@ void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<32, false, 64>(code, ctx, inst);
 }
 
+void EmitX64::EmitFPSingleToFixedU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed<32, true, 16>(code, ctx, inst);
+}
+
 void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<32, true, 32>(code, ctx, inst);
 }
@@ -1360,6 +1395,46 @@ void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
     EmitFPToFixed<32, true, 64>(code, ctx, inst);
 }
 
+void EmitX64::EmitFPFixedS16ToSingle(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16();
+    const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
+    const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+    const size_t fbits = args[1].GetImmediateU8();
+    [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
+
+    code.movsx(tmp, from);
+    code.cvtsi2ss(result, tmp);
+
+    if (fbits != 0) {
+        const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
+        code.mulss(result, code.MConst(xword, scale_factor));
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitX64::EmitFPFixedU16ToSingle(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16();
+    const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
+    const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+    const size_t fbits = args[1].GetImmediateU8();
+    [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
+
+    code.movzx(tmp, from);
+    code.cvtsi2ss(result, tmp);
+
+    if (fbits != 0) {
+        const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
+        code.mulss(result, code.MConst(xword, scale_factor));
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
 void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
     auto args = ctx.reg_alloc.GetArgumentInfo(inst);
 
@@ -1367,9 +1442,15 @@ void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
     const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
     const size_t fbits = args[1].GetImmediateU8();
     const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
-    ASSERT(rounding_mode == ctx.FPCR().RMode());
 
-    code.cvtsi2ss(result, from);
+    if (rounding_mode == ctx.FPCR().RMode()) {
+        code.cvtsi2ss(result, from);
+    } else {
+        ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven);
+        code.EnterStandardASIMD();
+        code.cvtsi2ss(result, from);
+        code.LeaveStandardASIMD();
+    }
 
     if (fbits != 0) {
         const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
@@ -1385,16 +1466,26 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
     const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
     const size_t fbits = args[1].GetImmediateU8();
     const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
-    ASSERT(rounding_mode == ctx.FPCR().RMode());
 
-    if (code.HasAVX512_Skylake()) {
-        const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
-        code.vcvtusi2ss(result, result, from.cvt32());
+    const auto op = [&]{
+        if (code.HasAVX512_Skylake()) {
+            const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
+            code.vcvtusi2ss(result, result, from.cvt32());
+        } else {
+            // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
+            const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
+            code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
+            code.cvtsi2ss(result, from);
+        }
+    };
+
+    if (rounding_mode == ctx.FPCR().RMode()) {
+        op();
     } else {
-        // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
-        const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
-        code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
-        code.cvtsi2ss(result, from);
+        ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven);
+        code.EnterStandardASIMD();
+        op();
+        code.LeaveStandardASIMD();
     }
 
     if (fbits != 0) {
@@ -1405,14 +1496,53 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
     ctx.reg_alloc.DefineValue(inst, result);
 }
 
+void EmitX64::EmitFPFixedS16ToDouble(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16();
+    const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
+    const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+    const size_t fbits = args[1].GetImmediateU8();
+    [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
+
+    code.movsx(tmp, from);
+    code.cvtsi2sd(result, tmp);
+
+    if (fbits != 0) {
+        const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
+        code.mulsd(result, code.MConst(xword, scale_factor));
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitX64::EmitFPFixedU16ToDouble(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16();
+    const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
+    const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+    const size_t fbits = args[1].GetImmediateU8();
+    [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
+
+    code.movzx(tmp, from);
+    code.cvtsi2sd(result, tmp);
+
+    if (fbits != 0) {
+        const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
+        code.mulsd(result, code.MConst(xword, scale_factor));
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
 void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
     auto args = ctx.reg_alloc.GetArgumentInfo(inst);
 
     const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32();
     const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
     const size_t fbits = args[1].GetImmediateU8();
-    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
-    ASSERT(rounding_mode == ctx.FPCR().RMode());
+    [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
 
     code.cvtsi2sd(result, from);
 
@@ -1424,6 +1554,31 @@ void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
     ctx.reg_alloc.DefineValue(inst, result);
 }
 
+void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm();
+    const size_t fbits = args[1].GetImmediateU8();
+    [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8()); // Not required
+
+    if (code.HasAVX512_Skylake()) {
+        const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
+        code.vcvtusi2sd(to, to, from.cvt32());
+    } else {
+        // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
+        const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
+        code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
+        code.cvtsi2sd(to, from);
+    }
+
+    if (fbits != 0) {
+        const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
+        code.mulsd(to, code.MConst(xword, scale_factor));
+    }
+
+    ctx.reg_alloc.DefineValue(inst, to);
+}
+
 void EmitX64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
     auto args = ctx.reg_alloc.GetArgumentInfo(inst);
 
@@ -1462,32 +1617,6 @@ void EmitX64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) {
     ctx.reg_alloc.DefineValue(inst, result);
 }
 
-void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-
-    const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm();
-    const size_t fbits = args[1].GetImmediateU8();
-    const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
-    ASSERT(rounding_mode == ctx.FPCR().RMode());
-
-    if (code.HasAVX512_Skylake()) {
-        const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]);
-        code.vcvtusi2sd(to, to, from.cvt32());
-    } else {
-        // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
-        const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]);
-        code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
-        code.cvtsi2sd(to, from);
-    }
-
-    if (fbits != 0) {
-        const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
-        code.mulsd(to, code.MConst(xword, scale_factor));
-    }
-
-    ctx.reg_alloc.DefineValue(inst, to);
-}
-
 void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
     auto args = ctx.reg_alloc.GetArgumentInfo(inst);
 
diff --git a/src/frontend/A32/decoder/vfp.inc b/src/frontend/A32/decoder/vfp.inc
index 83dc4529..36283634 100644
--- a/src/frontend/A32/decoder/vfp.inc
+++ b/src/frontend/A32/decoder/vfp.inc
@@ -30,10 +30,10 @@ INST(vfp_VRINTR,           "VRINTR",                  "cccc11101D110110dddd101z0
 INST(vfp_VRINTZ,           "VRINTZ",                  "cccc11101D110110dddd101z11M0mmmm") // VFPv5
 INST(vfp_VCVT_f_to_f,      "VCVT (f32<->f64)",        "cccc11101D110111dddd101z11M0mmmm") // VFPv2
 INST(vfp_VCVT_from_int,    "VCVT (from int)",         "cccc11101D111000dddd101zs1M0mmmm") // VFPv2
-//INST(vfp_VCVT_from_fixed,  "VCVT (from fixed)",       "cccc11101D11101Udddd101zx1i0vvvv") // VFPv3
+INST(vfp_VCVT_from_fixed,  "VCVT (from fixed)",       "cccc11101D11101Udddd101zx1i0vvvv") // VFPv3
 INST(vfp_VCVT_to_u32,      "VCVT (to u32)",           "cccc11101D111100dddd101zr1M0mmmm") // VFPv2
 INST(vfp_VCVT_to_s32,      "VCVT (to s32)",           "cccc11101D111101dddd101zr1M0mmmm") // VFPv2
-//INST(vfp_VCVT_to_fixed,    "VCVT (to fixed)",         "cccc11101D11111Udddd101zx1i0vvvv") // VFPv3
+INST(vfp_VCVT_to_fixed,    "VCVT (to fixed)",         "cccc11101D11111Udddd101zx1i0vvvv") // VFPv3
 INST(vfp_VRINT_rm,         "VRINT{A,N,P,M}",          "111111101D1110mmdddd101z01M0mmmm") // VFPv5
 INST(vfp_VCVT_rm,          "VCVT{A,N,P,M}",           "111111101D1111mmdddd101zU1M0mmmm") // VFPv5
 
diff --git a/src/frontend/A32/disassembler/disassembler_arm.cpp b/src/frontend/A32/disassembler/disassembler_arm.cpp
index 087a697d..3a693303 100644
--- a/src/frontend/A32/disassembler/disassembler_arm.cpp
+++ b/src/frontend/A32/disassembler/disassembler_arm.cpp
@@ -1436,6 +1436,12 @@ public:
         return fmt::format("vcvt{}.{}.{} {}, {}", CondToString(cond), sz ? "f64" : "f32", is_signed ? "s32" : "u32", FPRegStr(sz, Vd, D), FPRegStr(false, Vm, M));
     }
 
+    std::string vfp_VCVT_from_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4) {
+        const size_t size = sx ? 32 : 16;
+        const size_t fbits = size - concatenate(imm4, i).ZeroExtend();
+        return fmt::format("vcvt{}.{}.{}{} {}, {}, #{}", CondToString(cond), sz ? "f64" : "f32", U ? 'u' : 's', size, FPRegStr(sz, Vd, D), FPRegStr(sz, Vd, D), fbits);
+    }
+
     std::string vfp_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) {
         return fmt::format("vcvt{}{}.u32.{} {}, {}", round_towards_zero ? "" : "r", CondToString(cond), sz ? "f64" : "f32", FPRegStr(false, Vd, D), FPRegStr(sz, Vm, M));
     }
@@ -1444,6 +1450,12 @@ public:
         return fmt::format("vcvt{}{}.s32.{} {}, {}", round_towards_zero ? "" : "r", CondToString(cond), sz ? "f64" : "f32", FPRegStr(false, Vd, D), FPRegStr(sz, Vm, M));
     }
 
+    std::string vfp_VCVT_to_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4) {
+        const size_t size = sx ? 32 : 16;
+        const size_t fbits = size - concatenate(imm4, i).ZeroExtend();
+        return fmt::format("vcvt{}.{}{}.{} {}, {}, #{}", CondToString(cond), U ? 'u' : 's', size, sz ? "f64" : "f32", FPRegStr(sz, Vd, D), FPRegStr(sz, Vd, D), fbits);
+    }
+
     std::string vfp_VRINT_rm(bool D, size_t rm, size_t Vd, bool sz, bool M, size_t Vm) {
         return fmt::format("vrint{}.{} {}, {}", "anpm"[rm], sz ? "f64" : "f32", FPRegStr(sz, Vd, D), FPRegStr(sz, Vm, M));
     }
diff --git a/src/frontend/A32/translate/impl/translate_arm.h b/src/frontend/A32/translate/impl/translate_arm.h
index 2db6acfc..3a6f335d 100644
--- a/src/frontend/A32/translate/impl/translate_arm.h
+++ b/src/frontend/A32/translate/impl/translate_arm.h
@@ -427,8 +427,10 @@ struct ArmTranslatorVisitor final {
     bool vfp_VRINTZ(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
     bool vfp_VCVT_f_to_f(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
     bool vfp_VCVT_from_int(Cond cond, bool D, size_t Vd, bool sz, bool is_signed, bool M, size_t Vm);
+    bool vfp_VCVT_from_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4);
     bool vfp_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm);
     bool vfp_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm);
+    bool vfp_VCVT_to_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4);
     bool vfp_VRINT_rm(bool D, size_t rm, size_t Vd, bool sz, bool M, size_t Vm);
     bool vfp_VCVT_rm(bool D, size_t rm, size_t Vd, bool sz, bool U, bool M, size_t Vm);
 
diff --git a/src/frontend/A32/translate/impl/vfp.cpp b/src/frontend/A32/translate/impl/vfp.cpp
index 7a1bba69..8ef34d38 100644
--- a/src/frontend/A32/translate/impl/vfp.cpp
+++ b/src/frontend/A32/translate/impl/vfp.cpp
@@ -966,6 +966,38 @@ bool ArmTranslatorVisitor::vfp_VCVT_from_int(Cond cond, bool D, size_t Vd, bool
     return true;
 }
 
+// VCVT.F32.{S16,U16,S32,U32} <Sdm>, <Sdm>
+// VCVT.F64.{S16,U16,S32,U32} <Ddm>, <Ddm>
+bool ArmTranslatorVisitor::vfp_VCVT_from_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4) {
+    if (!ConditionPassed(cond)) {
+        return true;
+    }
+
+    const size_t size = sx ? 32 : 16;
+    const size_t fbits = size - concatenate(imm4, i).ZeroExtend();
+
+    if (fbits > size) {
+        return UnpredictableInstruction();
+    }
+
+    const auto d = ToExtReg(sz, Vd, D);
+    const auto rounding_mode = FP::RoundingMode::ToNearest_TieEven;
+    const auto reg_d = ir.GetExtendedRegister(d);
+    const auto source = ir.LeastSignificant(size, reg_d);
+
+    if (sz) {
+        const auto result = U ? ir.FPUnsignedFixedToDouble(source, fbits, rounding_mode)
+                              : ir.FPSignedFixedToDouble(source, fbits, rounding_mode);
+        ir.SetExtendedRegister(d, result);
+    } else {
+        const auto result = U ? ir.FPUnsignedFixedToSingle(source, fbits, rounding_mode)
+                              : ir.FPSignedFixedToSingle(source, fbits, rounding_mode);
+        ir.SetExtendedRegister(d, result);
+    }
+
+    return true;
+}
+
 // VCVT{,R}.U32.F32 <Sd>, <Sm>
 // VCVT{,R}.U32.F64 <Sd>, <Dm>
 bool ArmTranslatorVisitor::vfp_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) {
@@ -998,6 +1030,42 @@ bool ArmTranslatorVisitor::vfp_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz
     return true;
 }
 
+// VCVT.{S16,U16,S32,U32}.F32 <Sdm>, <Sdm>
+// VCVT.{S16,U16,S32,U32}.F64 <Ddm>, <Ddm>
+bool ArmTranslatorVisitor::vfp_VCVT_to_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4) {
+    if (!ConditionPassed(cond)) {
+        return true;
+    }
+
+    const size_t size = sx ? 32 : 16;
+    const size_t fbits = size - concatenate(imm4, i).ZeroExtend();
+
+    if (fbits > size) {
+        return UnpredictableInstruction();
+    }
+
+    const auto d = ToExtReg(sz, Vd, D);
+    const auto rounding_mode = FP::RoundingMode::TowardsZero;
+    const auto reg_d = ir.GetExtendedRegister(d);
+
+    const auto result = [&]() -> IR::U16U32U64 {
+        if (sx) {
+            return U ? ir.FPToFixedU32(reg_d, fbits, rounding_mode)
+                     : ir.FPToFixedS32(reg_d, fbits, rounding_mode);
+        } else {
+            return U ? ir.FPToFixedU16(reg_d, fbits, rounding_mode)
+                     : ir.FPToFixedS16(reg_d, fbits, rounding_mode);
+        }
+    }();
+
+    if (sz) {
+        ir.SetExtendedRegister(d, U ? ir.ZeroExtendToLong(result) : ir.SignExtendToLong(result));
+    } else {
+        ir.SetExtendedRegister(d, U ? ir.ZeroExtendToWord(result) : ir.SignExtendToWord(result));
+    }
+    return true;
+}
+
 // VRINT{A,N,P,M}.F32 <Sd>, <Sm>
 // VRINT{A,N,P,M}.F64 <Dd>, <Dm>
 bool ArmTranslatorVisitor::vfp_VRINT_rm(bool D, size_t rm, size_t Vd, bool sz, bool M, size_t Vm) {
diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp
index 7d876d7c..7f19e4f8 100644
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -2150,6 +2150,24 @@ U16 IREmitter::FPSingleToHalf(const U32& a, FP::RoundingMode rounding) {
     return Inst<U16>(Opcode::FPSingleToHalf, a, Imm8(static_cast<u8>(rounding)));
 }
 
+U16 IREmitter::FPToFixedS16(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 16);
+
+    const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
+    const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
+
+    switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U16>(Opcode::FPHalfToFixedS16, a, fbits_imm, rounding_imm);
+    case Type::U32:
+        return Inst<U16>(Opcode::FPSingleToFixedS16, a, fbits_imm, rounding_imm);
+    case Type::U64:
+        return Inst<U16>(Opcode::FPDoubleToFixedS16, a, fbits_imm, rounding_imm);
+    default:
+        UNREACHABLE();
+    }
+}
+
 U32 IREmitter::FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
     ASSERT(fbits <= 32);
 
@@ -2186,6 +2204,24 @@ U64 IREmitter::FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode r
     }
 }
 
+U16 IREmitter::FPToFixedU16(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 16);
+
+    const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
+    const U8 rounding_imm = Imm8(static_cast<u8>(rounding));
+
+    switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U16>(Opcode::FPHalfToFixedU16, a, fbits_imm, rounding_imm);
+    case Type::U32:
+        return Inst<U16>(Opcode::FPSingleToFixedU16, a, fbits_imm, rounding_imm);
+    case Type::U64:
+        return Inst<U16>(Opcode::FPDoubleToFixedU16, a, fbits_imm, rounding_imm);
+    default:
+        UNREACHABLE();
+    }
+}
+
 U32 IREmitter::FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
     ASSERT(fbits <= 32);
 
@@ -2222,13 +2258,15 @@ U64 IREmitter::FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode r
     }
 }
 
-U32 IREmitter::FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
-    ASSERT(fbits <= (a.GetType() == Type::U32 ? 32 : 64));
+U32 IREmitter::FPSignedFixedToSingle(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= (a.GetType() == Type::U16 ? 16 : (a.GetType() == Type::U32 ? 32 : 64)));
 
     const IR::U8 fbits_imm = Imm8(static_cast<u8>(fbits));
     const IR::U8 rounding_imm = Imm8(static_cast<u8>(rounding));
 
     switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U32>(Opcode::FPFixedS16ToSingle, a, fbits_imm, rounding_imm);
     case Type::U32:
         return Inst<U32>(Opcode::FPFixedS32ToSingle, a, fbits_imm, rounding_imm);
     case Type::U64:
@@ -2238,13 +2276,15 @@ U32 IREmitter::FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::Rounding
     }
 }
 
-U32 IREmitter::FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
-    ASSERT(fbits <= (a.GetType() == Type::U32 ? 32 : 64));
+U32 IREmitter::FPUnsignedFixedToSingle(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= (a.GetType() == Type::U16 ? 16 : (a.GetType() == Type::U32 ? 32 : 64)));
 
     const IR::U8 fbits_imm = Imm8(static_cast<u8>(fbits));
     const IR::U8 rounding_imm = Imm8(static_cast<u8>(rounding));
 
     switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U32>(Opcode::FPFixedU16ToSingle, a, fbits_imm, rounding_imm);
     case Type::U32:
         return Inst<U32>(Opcode::FPFixedU32ToSingle, a, fbits_imm, rounding_imm);
     case Type::U64:
@@ -2254,13 +2294,15 @@ U32 IREmitter::FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::Roundi
     }
 }
 
-U64 IREmitter::FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
-    ASSERT(fbits <= (a.GetType() == Type::U32 ? 32 : 64));
+U64 IREmitter::FPSignedFixedToDouble(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= (a.GetType() == Type::U16 ? 16 : (a.GetType() == Type::U32 ? 32 : 64)));
 
     const IR::U8 fbits_imm = Imm8(static_cast<u8>(fbits));
     const IR::U8 rounding_imm = Imm8(static_cast<u8>(rounding));
 
     switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U64>(Opcode::FPFixedS16ToDouble, a, fbits_imm, rounding_imm);
     case Type::U32:
         return Inst<U64>(Opcode::FPFixedS32ToDouble, a, fbits_imm, rounding_imm);
     case Type::U64:
@@ -2270,13 +2312,15 @@ U64 IREmitter::FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::Rounding
     }
 }
 
-U64 IREmitter::FPUnsignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding) {
-    ASSERT(fbits <= (a.GetType() == Type::U32 ? 32 : 64));
+U64 IREmitter::FPUnsignedFixedToDouble(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= (a.GetType() == Type::U16 ? 16 : (a.GetType() == Type::U32 ? 32 : 64)));
 
     const IR::U8 fbits_imm = Imm8(static_cast<u8>(fbits));
     const IR::U8 rounding_imm = Imm8(static_cast<u8>(rounding));
 
     switch (a.GetType()) {
+    case Type::U16:
+        return Inst<U64>(Opcode::FPFixedU16ToDouble, a, fbits_imm, rounding_imm);
     case Type::U32:
         return Inst<U64>(Opcode::FPFixedU32ToDouble, a, fbits_imm, rounding_imm);
     case Type::U64:
diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h
index 163068c8..95051edb 100644
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@@ -338,14 +338,16 @@ public:
     U32 FPHalfToSingle(const U16& a, FP::RoundingMode rounding);
     U16 FPSingleToHalf(const U32& a, FP::RoundingMode rounding);
     U64 FPSingleToDouble(const U32& a, FP::RoundingMode rounding);
+    U16 FPToFixedS16(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
     U32 FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
     U64 FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
+    U16 FPToFixedU16(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
     U32 FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
     U64 FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
-    U32 FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
-    U32 FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
-    U64 FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
-    U64 FPUnsignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPSignedFixedToSingle(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPUnsignedFixedToSingle(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPSignedFixedToDouble(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPUnsignedFixedToDouble(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding);
 
     U128 FPVectorAbs(size_t esize, const U128& a);
     U128 FPVectorAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
index b6f35a8f..4314d5a3 100644
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -560,24 +560,34 @@ OPCODE(FPSingleToDouble,                                    U64,            U32,
 OPCODE(FPSingleToHalf,                                      U16,            U32,            U8                                              )
 OPCODE(FPDoubleToHalf,                                      U16,            U64,            U8                                              )
 OPCODE(FPDoubleToSingle,                                    U32,            U64,            U8                                              )
+OPCODE(FPDoubleToFixedS16,                                  U16,            U64,            U8,             U8                              )
 OPCODE(FPDoubleToFixedS32,                                  U32,            U64,            U8,             U8                              )
 OPCODE(FPDoubleToFixedS64,                                  U64,            U64,            U8,             U8                              )
+OPCODE(FPDoubleToFixedU16,                                  U16,            U64,            U8,             U8                              )
 OPCODE(FPDoubleToFixedU32,                                  U32,            U64,            U8,             U8                              )
 OPCODE(FPDoubleToFixedU64,                                  U64,            U64,            U8,             U8                              )
+OPCODE(FPHalfToFixedS16,                                    U16,            U16,            U8,             U8                              )
 OPCODE(FPHalfToFixedS32,                                    U32,            U16,            U8,             U8                              )
 OPCODE(FPHalfToFixedS64,                                    U64,            U16,            U8,             U8                              )
+OPCODE(FPHalfToFixedU16,                                    U16,            U16,            U8,             U8                              )
 OPCODE(FPHalfToFixedU32,                                    U32,            U16,            U8,             U8                              )
 OPCODE(FPHalfToFixedU64,                                    U64,            U16,            U8,             U8                              )
+OPCODE(FPSingleToFixedS16,                                  U16,            U32,            U8,             U8                              )
 OPCODE(FPSingleToFixedS32,                                  U32,            U32,            U8,             U8                              )
 OPCODE(FPSingleToFixedS64,                                  U64,            U32,            U8,             U8                              )
+OPCODE(FPSingleToFixedU16,                                  U16,            U32,            U8,             U8                              )
 OPCODE(FPSingleToFixedU32,                                  U32,            U32,            U8,             U8                              )
 OPCODE(FPSingleToFixedU64,                                  U64,            U32,            U8,             U8                              )
+OPCODE(FPFixedU16ToSingle,                                  U32,            U16,            U8,             U8                              )
+OPCODE(FPFixedS16ToSingle,                                  U32,            U16,            U8,             U8                              )
+OPCODE(FPFixedU16ToDouble,                                  U64,            U16,            U8,             U8                              )
+OPCODE(FPFixedS16ToDouble,                                  U64,            U16,            U8,             U8                              )
 OPCODE(FPFixedU32ToSingle,                                  U32,            U32,            U8,             U8                              )
 OPCODE(FPFixedS32ToSingle,                                  U32,            U32,            U8,             U8                              )
 OPCODE(FPFixedU32ToDouble,                                  U64,            U32,            U8,             U8                              )
+OPCODE(FPFixedS32ToDouble,                                  U64,            U32,            U8,             U8                              )
 OPCODE(FPFixedU64ToDouble,                                  U64,            U64,            U8,             U8                              )
 OPCODE(FPFixedU64ToSingle,                                  U32,            U64,            U8,             U8                              )
-OPCODE(FPFixedS32ToDouble,                                  U64,            U32,            U8,             U8                              )
 OPCODE(FPFixedS64ToDouble,                                  U64,            U64,            U8,             U8                              )
 OPCODE(FPFixedS64ToSingle,                                  U32,            U64,            U8,             U8                              )
 
diff --git a/tests/A32/fuzz_arm.cpp b/tests/A32/fuzz_arm.cpp
index 47a2bfac..6515c2cd 100644
--- a/tests/A32/fuzz_arm.cpp
+++ b/tests/A32/fuzz_arm.cpp
@@ -120,6 +120,7 @@ u32 GenRandomInst(u32 pc, bool is_last_inst) {
             // Incorrect Unicorn implementations
             "asimd_VRECPS", // Unicorn does not fuse the multiply and subtraction, resulting in being off by 1ULP.
             "asimd_VRSQRTS", // Unicorn does not fuse the multiply and subtraction, resulting in being off by 1ULP.
+            "vfp_VCVT_from_fixed", // Unicorn does not do round-to-nearest-even for this instruction correctly.
         };
 
         for (const auto& [fn, bitstring] : list) {