Merge pull request #289 from MerryMage/fptofixed

Implement most of the scalar fp -> integer instructions
2018-07-15 17:12:52 +01:00 · 2018-07-15 17:12:52 +01:00 · d50eaedaa7
commit d50eaedaa7
parent e7409fdfe4 304cc7f61e
52 changed files with 1931 additions and 217 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -16,8 +16,17 @@ add_library(dynarmic
    common/common_types.h
    common/crc32.cpp
    common/crc32.h
-    common/fp_util.h
+    common/fp/fpsr.h
+    common/fp/info.h
+    common/fp/mantissa_util.h
+    common/fp/op.cpp
+    common/fp/op.h
+    common/fp/process_exception.cpp
+    common/fp/process_exception.h
    common/fp/rounding_mode.h
+    common/fp/unpacked.cpp
+    common/fp/unpacked.h
+    common/fp/util.h
    common/intrusive_list.h
    common/iterator_util.h
    common/llvm_disassemble.cpp
@ -27,10 +36,24 @@ add_library(dynarmic
    common/memory_pool.cpp
    common/memory_pool.h
    common/mp.h
+    common/mp/append.h
+    common/mp/bind.h
+    common/mp/cartesian_product.h
+    common/mp/concat.h
+    common/mp/fapply.h
+    common/mp/fmap.h
+    common/mp/list.h
+    common/mp/lut.h
+    common/mp/to_tuple.h
+    common/mp/vlift.h
+    common/mp/vllift.h
+    common/safe_ops.h
    common/scope_exit.h
    common/sm4.cpp
    common/sm4.h
    common/string_util.h
+    common/u128.cpp
+    common/u128.h
    common/variant_util.h
    frontend/A32/decoder/arm.h
    frontend/A32/decoder/thumb16.h
--- a/src/backend_x64/a32_emit_x64.cpp
+++ b/src/backend_x64/a32_emit_x64.cpp
@ -62,6 +62,10 @@ FP::RoundingMode A32EmitContext::FPSCR_RMode() const {
    return Location().FPSCR().RMode();
 }

+u32 A32EmitContext::FPCR() const {
+    return Location().FPSCR().Value();
+}
+
 bool A32EmitContext::FPSCR_RoundTowardsZero() const {
    return Location().FPSCR().RMode() != FP::RoundingMode::TowardsZero;
 }
--- a/src/backend_x64/a32_emit_x64.h
+++ b/src/backend_x64/a32_emit_x64.h
@ -24,6 +24,7 @@ struct A32EmitContext final : public EmitContext {
    A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
    A32::LocationDescriptor Location() const;
    FP::RoundingMode FPSCR_RMode() const override;
+    u32 FPCR() const override;
    bool FPSCR_RoundTowardsZero() const override;
    bool FPSCR_FTZ() const override;
    bool FPSCR_DN() const override;
--- a/src/backend_x64/a32_jitstate.cpp
+++ b/src/backend_x64/a32_jitstate.cpp
@ -163,6 +163,7 @@ u32 A32JitState::Fpscr() const {
    FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
    FPSCR |= FPSCR_IDC;
    FPSCR |= FPSCR_UFC;
+    FPSCR |= fpsr_exc;

    return FPSCR;
 }
@ -183,13 +184,10 @@ void A32JitState::SetFpscr(u32 FPSCR) {
    const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
    guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];

-    // Cumulative flags IOC, IXC, UFC, OFC, DZC
-    guest_MXCSR |= ( FPSCR     ) & 0b0000000000001;  // IE = IOC
-    guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100;  // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
-
-    // Cumulative flag IDC, UFC
-    FPSCR_IDC = FPSCR & (1 << 7);
-    FPSCR_UFC = FPSCR & (1 << 3);
+    // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
+    FPSCR_IDC = 0;
+    FPSCR_UFC = 0;
+    fpsr_exc = FPSCR & 0x9F;

    if (Common::Bit<24>(FPSCR)) {
        // VFP Flush to Zero
--- a/src/backend_x64/a32_jitstate.h
+++ b/src/backend_x64/a32_jitstate.h
@ -66,6 +66,7 @@ struct A32JitState {
    std::array<u64, RSBSize> rsb_codeptrs;
    void ResetRSB();

+    u32 fpsr_exc = 0;
    u32 FPSCR_IDC = 0;
    u32 FPSCR_UFC = 0;
    u32 FPSCR_mode = 0;
--- a/src/backend_x64/a64_emit_x64.cpp
+++ b/src/backend_x64/a64_emit_x64.cpp
@ -44,6 +44,10 @@ FP::RoundingMode A64EmitContext::FPSCR_RMode() const {
    return Location().FPCR().RMode();
 }

+u32 A64EmitContext::FPCR() const {
+    return Location().FPCR().Value();
+}
+
 bool A64EmitContext::FPSCR_RoundTowardsZero() const {
    return Location().FPCR().RMode() != FP::RoundingMode::TowardsZero;
 }
--- a/src/backend_x64/a64_emit_x64.h
+++ b/src/backend_x64/a64_emit_x64.h
@ -24,6 +24,7 @@ struct A64EmitContext final : public EmitContext {
    A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
    A64::LocationDescriptor Location() const;
    FP::RoundingMode FPSCR_RMode() const override;
+    u32 FPCR() const override;
    bool FPSCR_RoundTowardsZero() const override;
    bool FPSCR_FTZ() const override;
    bool FPSCR_DN() const override;
--- a/src/backend_x64/a64_jitstate.cpp
+++ b/src/backend_x64/a64_jitstate.cpp
@ -105,16 +105,15 @@ u32 A64JitState::GetFpsr() const {
    fpsr |= (guest_MXCSR & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
    fpsr |= FPSCR_IDC;
    fpsr |= FPSCR_UFC;
+    fpsr |= fpsr_exc;
    return fpsr;
 }

 void A64JitState::SetFpsr(u32 value) {
    guest_MXCSR &= ~0x0000003D;
-    guest_MXCSR |= ( value     ) & 0b0000000000001;  // IE = IOC
-    guest_MXCSR |= ( value << 1) & 0b0000000111100;  // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
-
-    FPSCR_IDC = value & (1 << 7);
-    FPSCR_UFC = value & (1 << 3);
+    FPSCR_IDC = 0;
+    FPSCR_UFC = 0;
+    fpsr_exc = value & 0x9F;
 }

 } // namespace Dynarmic::BackendX64
--- a/src/backend_x64/a64_jitstate.h
+++ b/src/backend_x64/a64_jitstate.h
@ -71,6 +71,7 @@ struct A64JitState {
        rsb_codeptrs.fill(0);
    }

+    u32 fpsr_exc = 0;
    u32 FPSCR_IDC = 0;
    u32 FPSCR_UFC = 0;
    u32 fpcr = 0;
--- a/src/backend_x64/emit_x64.h
+++ b/src/backend_x64/emit_x64.h
@ -35,6 +35,7 @@ struct EmitContext {
    void EraseInstruction(IR::Inst* inst);

    virtual FP::RoundingMode FPSCR_RMode() const = 0;
+    virtual u32 FPCR() const = 0;
    virtual bool FPSCR_RoundTowardsZero() const = 0;
    virtual bool FPSCR_FTZ() const = 0;
    virtual bool FPSCR_DN() const = 0;
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@ -5,13 +5,22 @@
 */

 #include <type_traits>
+#include <utility>

 #include "backend_x64/abi.h"
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "common/fp_util.h"
+#include "common/fp/op.h"
+#include "common/fp/util.h"
+#include "common/mp/cartesian_product.h"
+#include "common/mp/integer.h"
+#include "common/mp/list.h"
+#include "common/mp/lut.h"
+#include "common/mp/to_tuple.h"
+#include "common/mp/vlift.h"
+#include "common/mp/vllift.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 #include "frontend/ir/opcodes.h"
@ -19,6 +28,7 @@
 namespace Dynarmic::BackendX64 {

 using namespace Xbyak::util;
+namespace mp = Dynarmic::Common::mp;

 constexpr u64 f32_negative_zero = 0x80000000u;
 constexpr u64 f32_nan = 0x7fc00000u;
@ -33,6 +43,10 @@ constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
 constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
 constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
 constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
+constexpr u64 f64_min_s64 = 0xc3e0000000000000u; // -2^63 as a double
+constexpr u64 f64_max_s64_lim = 0x43e0000000000000u; // 2^63 as a double (actual maximum unrepresentable)
+constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double
+constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable)

 static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
    Xbyak::Label end;
@ -120,7 +134,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
    code.movd(code.ABI_PARAM1.cvt32(), a);
    code.movd(code.ABI_PARAM2.cvt32(), b);
    code.CallFunction(static_cast<u32(*)(u32, u32)>([](u32 a, u32 b) -> u32 {
-        return *Common::ProcessNaNs(a, b);
+        return *FP::ProcessNaNs(a, b);
    }));
    code.movd(a, code.ABI_RETURN.cvt32());
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -149,7 +163,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
    code.movd(code.ABI_PARAM2.cvt32(), b);
    code.movd(code.ABI_PARAM3.cvt32(), c);
    code.CallFunction(static_cast<u32(*)(u32, u32, u32)>([](u32 a, u32 b, u32 c) -> u32 {
-        return *Common::ProcessNaNs(a, b, c);
+        return *FP::ProcessNaNs(a, b, c);
    }));
    code.movd(a, code.ABI_RETURN.cvt32());
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -187,7 +201,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
    code.movq(code.ABI_PARAM1, a);
    code.movq(code.ABI_PARAM2, b);
    code.CallFunction(static_cast<u64(*)(u64, u64)>([](u64 a, u64 b) -> u64 {
-        return *Common::ProcessNaNs(a, b);
+        return *FP::ProcessNaNs(a, b);
    }));
    code.movq(a, code.ABI_RETURN);
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -213,7 +227,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
    code.movq(code.ABI_PARAM2, b);
    code.movq(code.ABI_PARAM3, c);
    code.CallFunction(static_cast<u64(*)(u64, u64, u64)>([](u64 a, u64 b, u64 c) -> u64 {
-        return *Common::ProcessNaNs(a, b, c);
+        return *FP::ProcessNaNs(a, b, c);
    }));
    code.movq(a, code.ABI_RETURN);
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -892,129 +906,160 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
    ctx.reg_alloc.DefineValue(inst, result);
 }

-void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) {
+static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    bool round_towards_zero = args[1].GetImmediateU1();

-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-    // Conversion to double is lossless, and allows for clamping.
+    const size_t fbits = args[1].GetImmediateU8();
+    const auto rounding = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());

-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero32(code, from, to);
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero){
+        const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
+
+        const int round_imm = [&]{
+            switch (rounding) {
+            case FP::RoundingMode::ToNearest_TieEven:
+            default:
+                return 0b00;
+            case FP::RoundingMode::TowardsPlusInfinity:
+                return 0b10;
+            case FP::RoundingMode::TowardsMinusInfinity:
+                return 0b01;
+            case FP::RoundingMode::TowardsZero:
+                return 0b11;
            }
-    code.cvtss2sd(from, from);
-    // First time is to set flags
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 32 bit gpr
+        }();
+
+        const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
+
+        if (fsize == 64) {
+            if (fbits != 0) {
+                const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
+                code.mulsd(src, code.MConst(xword, scale_factor));
+            }
+
+            code.roundsd(src, src, round_imm);
+            ZeroIfNaN64(code, src, scratch);
        } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
+            if (fbits != 0) {
+                const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
+                code.mulss(src, code.MConst(xword, scale_factor));
            }
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_s32));
-    code.maxsd(from, code.MConst(xword, f64_min_s32));
-    // Second time is for real
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 32 bit gpr
+
+            code.roundss(src, src, round_imm);
+            code.cvtss2sd(src, src);
+            ZeroIfNaN64(code, src, scratch);
+        }
+
+        if (isize == 64) {
+            Xbyak::Label saturate_max, end;
+
+            code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u64 : f64_min_s64));
+            code.movsd(scratch, code.MConst(xword, unsigned_ ? f64_max_u64_lim : f64_max_s64_lim));
+            code.comisd(scratch, src);
+            code.jna(saturate_max, code.T_NEAR);
+            if (unsigned_) {
+                Xbyak::Label below_max;
+
+                code.movsd(scratch, code.MConst(xword, f64_max_s64_lim));
+                code.comisd(src, scratch);
+                code.jb(below_max);
+                code.subsd(src, scratch);
+                code.cvttsd2si(result, src);
+                code.btc(result, 63);
+                code.jmp(end);
+                code.L(below_max);
+            }
+            code.cvttsd2si(result, src); // 64 bit gpr
+            code.L(end);
+
+            code.SwitchToFarCode();
+            code.L(saturate_max);
+            code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF);
+            code.jmp(end, code.T_NEAR);
+            code.SwitchToNearCode();
        } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
+            code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
+            code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u32 : f64_min_s32));
+            code.cvttsd2si(result, src); // 64 bit gpr
        }

-    ctx.reg_alloc.DefineValue(inst, to);
+        ctx.reg_alloc.DefineValue(inst, result);
+
+        return;
    }

-void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    bool round_towards_zero = args[1].GetImmediateU1();
+    using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
+    using unsigned_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
+    using isize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
+    using rounding_list = mp::list<
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
+    >;

-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-    // Conversion to double is lossless, and allows for accurate clamping.
-    //
-    // Since SSE2 doesn't provide an unsigned conversion, we use a 64-bit signed conversion.
-    //
-    // FIXME: None of the FPSR exception bits are correctly signalled with the below code
+    using key_type = std::tuple<size_t, bool, size_t, FP::RoundingMode>;
+    using value_type = u64(*)(u64, u8, FP::FPSR&, A64::FPCR);

-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero64(code, from, to);
+    static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
+        [](auto args) {
+            return std::pair<key_type, value_type>{
+                mp::to_tuple<decltype(args)>,
+                static_cast<value_type>(
+                    [](u64 input, u8 fbits, FP::FPSR& fpsr, A64::FPCR fpcr) {
+                        constexpr auto t = mp::to_tuple<decltype(args)>;
+                        constexpr size_t fsize = std::get<0>(t);
+                        constexpr bool unsigned_ = std::get<1>(t);
+                        constexpr size_t isize = std::get<2>(t);
+                        constexpr FP::RoundingMode rounding_mode = std::get<3>(t);
+                        using InputSize = mp::unsigned_integer_of_size<fsize>;
+
+                        return FP::FPToFixed<InputSize>(isize, static_cast<InputSize>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
                    }
-    code.cvtss2sd(from, from);
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_u32));
-    code.maxsd(from, code.MConst(xword, f64_min_u32));
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 64 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 64 bit gpr
+                )
+            };
+        },
+        mp::cartesian_product<fsize_list, unsigned_list, isize_list, rounding_list>{}
+    );
+
+    ctx.reg_alloc.HostCall(inst, args[0], args[1]);
+    code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
+    code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
+    code.CallFunction(lut.at(std::make_tuple(fsize, unsigned_, isize, rounding)));
 }

-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed(code, ctx, inst, 64, false, 32);
 }

-void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
-    bool round_towards_zero = args[1].GetImmediateU1();
-
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-
-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero64(code, from, gpr_scratch.cvt64());
-    }
-    // First time is to set flags
-    if (round_towards_zero) {
-        code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
-    }
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_s32));
-    code.maxsd(from, code.MConst(xword, f64_min_s32));
-    // Second time is for real
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
+void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed(code, ctx, inst, 64, false, 64);
 }

-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed(code, ctx, inst, 64, true, 32);
 }

-void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    bool round_towards_zero = args[1].GetImmediateU1();
-
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-    // TODO: Use VCVTPD2UDQ when AVX512VL is available.
-    // FIXME: None of the FPSR exception bits are correctly signalled with the below code
-
-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero64(code, from, to);
-    }
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_u32));
-    code.maxsd(from, code.MConst(xword, f64_min_u32));
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 64 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 64 bit gpr
+void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed(code, ctx, inst, 64, true, 64);
 }

-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed(code, ctx, inst, 32, false, 32);
+}
+
+void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed(code, ctx, inst, 32, false, 64);
+}
+
+void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed(code, ctx, inst, 32, true, 32);
+}
+
+void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixed(code, ctx, inst, 32, true, 64);
 }

 void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
--- a/src/backend_x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend_x64/emit_x64_vector_floating_point.cpp
@ -10,7 +10,7 @@
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/bit_util.h"
-#include "common/fp_util.h"
+#include "common/fp/util.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"

@ -69,9 +69,9 @@ static void HandleNaNs(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& xm
    code.CallFunction(static_cast<void(*)(RegArray&, const RegArray&, const RegArray&)>(
        [](RegArray& result, const RegArray& a, const RegArray& b) {
            for (size_t i = 0; i < result.size(); ++i) {
-                if (auto r = Common::ProcessNaNs(a[i], b[i])) {
+                if (auto r = FP::ProcessNaNs(a[i], b[i])) {
                    result[i] = *r;
-                } else if (Common::IsNaN(result[i])) {
+                } else if (FP::IsNaN(result[i])) {
                    result[i] = NaNWrapper<T>::value;
                }
            }
--- a/src/backend_x64/jitstate_info.h
+++ b/src/backend_x64/jitstate_info.h
@ -26,6 +26,7 @@ struct JitStateInfo {
        , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
        , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
        , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
+        , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
    {}

    const size_t offsetof_cycles_remaining;
@ -39,6 +40,7 @@ struct JitStateInfo {
    const size_t offsetof_CPSR_nzcv;
    const size_t offsetof_FPSCR_IDC;
    const size_t offsetof_FPSCR_UFC;
+    const size_t offsetof_fpsr_exc;
 };

 } // namespace Dynarmic::BackendX64
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@ -21,29 +21,29 @@ constexpr size_t BitSize() {
    return sizeof(T) * CHAR_BIT;
 }

+template <typename T>
+inline T Ones(size_t count) {
+    ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
+    if (count == BitSize<T>())
+        return static_cast<T>(~static_cast<T>(0));
+    return ~(static_cast<T>(~static_cast<T>(0)) << count);
+}
+
 /// Extract bits [begin_bit, end_bit] inclusive from value of type T.
 template<size_t begin_bit, size_t end_bit, typename T>
 constexpr T Bits(const T value) {
    static_assert(begin_bit <= end_bit,
                  "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
    static_assert(begin_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
-    static_assert(end_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
+    static_assert(end_bit < BitSize<T>(), "end_bit must be smaller than size of T");

-    return (value >> begin_bit) & ((1 << (end_bit - begin_bit + 1)) - 1);
+    return (value >> begin_bit) & Ones<T>(end_bit - begin_bit + 1);
 }

 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable:4554)
 #endif
-/// Extracts a single bit at bit_position from value of type T.
-template<size_t bit_position, typename T>
-constexpr bool Bit(const T value) {
-    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
-
-    return ((value >> bit_position) & 1) != 0;
-}
-
 /// Extracts a single bit at bit_position from value of type T.
 template<typename T>
 inline bool Bit(size_t bit_position, const T value) {
@ -51,6 +51,46 @@ inline bool Bit(size_t bit_position, const T value) {

    return ((value >> bit_position) & 1) != 0;
 }
+
+/// Extracts a single bit at bit_position from value of type T.
+template<size_t bit_position, typename T>
+constexpr bool Bit(const T value) {
+    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return Bit<T>(bit_position, value);
+}
+
+/// Clears a single bit at bit_position from value of type T.
+template<typename T>
+inline T ClearBit(size_t bit_position, const T value) {
+    ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return value & ~(static_cast<T>(1) << bit_position);
+}
+
+/// Clears a single bit at bit_position from value of type T.
+template<size_t bit_position, typename T>
+constexpr T ClearBit(const T value) {
+    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ClearBit<T>(bit_position, value);
+}
+
+/// Modifies a single bit at bit_position from value of type T.
+template<typename T>
+inline T ModifyBit(size_t bit_position, const T value, bool new_bit) {
+    ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ClearBit<T>(bit_position, value) | (static_cast<T>(new_bit) << bit_position);
+}
+
+/// Modifies a single bit at bit_position from value of type T.
+template<size_t bit_position, typename T>
+constexpr T ModifyBit(const T value, bool new_bit) {
+    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ModifyBit<T>(bit_position, value, new_bit);
+}
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
@ -112,11 +152,8 @@ inline size_t LowestSetBit(T value) {
 }

 template <typename T>
-inline T Ones(size_t count) {
-    ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
-    if (count == BitSize<T>())
-        return ~static_cast<T>(0);
-    return ~(~static_cast<T>(0) << count);
+inline bool MostSignificantBit(T value) {
+    return Bit<BitSize<T>() - 1, T>(value);
 }

 template <typename T>
--- a/src/common/fp/fpsr.h
+++ b/src/common/fp/fpsr.h
@ -0,0 +1,162 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <boost/optional.hpp>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+/**
+ * Representation of the Floating-Point Status Register.
+ */
+class FPSR final {
+public:
+    FPSR() = default;
+    FPSR(const FPSR&) = default;
+    FPSR(FPSR&&) = default;
+    explicit FPSR(u32 data) : value{data & mask} {}
+
+    FPSR& operator=(const FPSR&) = default;
+    FPSR& operator=(FPSR&&) = default;
+    FPSR& operator=(u32 data) {
+        value = data & mask;
+        return *this;
+    }
+
+    /// Get negative condition flag
+    bool N() const {
+        return Common::Bit<31>(value);
+    }
+
+    /// Set negative condition flag
+    void N(bool N_) {
+        value = Common::ModifyBit<31>(value, N_);
+    }
+
+    /// Get zero condition flag
+    bool Z() const {
+        return Common::Bit<30>(value);
+    }
+
+    /// Set zero condition flag
+    void Z(bool Z_) {
+        value = Common::ModifyBit<30>(value, Z_);
+    }
+
+    /// Get carry condition flag
+    bool C() const {
+        return Common::Bit<29>(value);
+    }
+
+    /// Set carry condition flag
+    void C(bool C_) {
+        value = Common::ModifyBit<29>(value, C_);
+    }
+
+    /// Get overflow condition flag
+    bool V() const {
+        return Common::Bit<28>(value);
+    }
+
+    /// Set overflow condition flag
+    void V(bool V_) {
+        value = Common::ModifyBit<28>(value, V_);
+    }
+
+    /// Get cumulative saturation bit
+    bool QC() const {
+        return Common::Bit<27>(value);
+    }
+
+    /// Set cumulative saturation bit
+    void QC(bool QC_) {
+        value = Common::ModifyBit<27>(value, QC_);
+    }
+
+    /// Get input denormal floating-point exception bit
+    bool IDC() const {
+        return Common::Bit<7>(value);
+    }
+
+    /// Set input denormal floating-point exception bit
+    void IDC(bool IDC_) {
+        value = Common::ModifyBit<7>(value, IDC_);
+    }
+
+    /// Get inexact cumulative floating-point exception bit
+    bool IXC() const {
+        return Common::Bit<4>(value);
+    }
+
+    /// Set inexact cumulative floating-point exception bit
+    void IXC(bool IXC_) {
+        value = Common::ModifyBit<4>(value, IXC_);
+    }
+
+    /// Get underflow cumulative floating-point exception bit
+    bool UFC() const {
+        return Common::Bit<3>(value);
+    }
+
+    /// Set underflow cumulative floating-point exception bit
+    void UFC(bool UFC_) {
+        value = Common::ModifyBit<3>(value, UFC_);
+    }
+
+    /// Get overflow cumulative floating-point exception bit
+    bool OFC() const {
+        return Common::Bit<2>(value);
+    }
+
+    /// Set overflow cumulative floating-point exception bit
+    void OFC(bool OFC_) {
+        value = Common::ModifyBit<2>(value, OFC_);
+    }
+
+    /// Get divide by zero cumulative floating-point exception bit
+    bool DZC() const {
+        return Common::Bit<1>(value);
+    }
+
+    /// Set divide by zero cumulative floating-point exception bit
+    void DZC(bool DZC_) {
+        value = Common::ModifyBit<1>(value, DZC_);
+    }
+
+    /// Get invalid operation cumulative floating-point exception bit
+    bool IOC() const {
+        return Common::Bit<0>(value);
+    }
+
+    /// Set invalid operation cumulative floating-point exception bit
+    void IOC(bool IOC_) {
+        value = Common::ModifyBit<0>(value, IOC_);
+    }
+
+    /// Gets the underlying raw value within the FPSR.
+    u32 Value() const {
+        return value;
+    }
+
+private:
+    // Bits 5-6 and 8-26 are reserved.
+    static constexpr u32 mask = 0xF800009F;
+    u32 value = 0;
+};
+
+inline bool operator==(FPSR lhs, FPSR rhs) {
+    return lhs.Value() == rhs.Value();
+}
+
+inline bool operator!=(FPSR lhs, FPSR rhs) {
+    return !operator==(lhs, rhs);
+}
+
+} // namespace Dynarmic::FP
--- a/src/common/fp/info.h
+++ b/src/common/fp/info.h
@ -0,0 +1,58 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+struct FPInfo {};
+
+template<>
+struct FPInfo<u32> {
+    static constexpr size_t total_width = 32;
+    static constexpr size_t exponent_width = 8;
+    static constexpr size_t explicit_mantissa_width = 23;
+    static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
+
+    static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width;
+    static constexpr u32 sign_mask = 0x80000000;
+    static constexpr u32 exponent_mask = 0x7F800000;
+    static constexpr u32 mantissa_mask = 0x007FFFFF;
+
+    static constexpr int exponent_min = -126;
+    static constexpr int exponent_max = 127;
+    static constexpr int exponent_bias = 127;
+
+    static constexpr u32 Zero(bool sign) { return sign ? sign_mask : 0; }
+    static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
+    static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
+};
+
+template<>
+struct FPInfo<u64> {
+    static constexpr size_t total_width = 64;
+    static constexpr size_t exponent_width = 11;
+    static constexpr size_t explicit_mantissa_width = 52;
+    static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
+
+    static constexpr u64 implicit_leading_bit = u64(1) << explicit_mantissa_width;
+    static constexpr u64 sign_mask = 0x8000'0000'0000'0000;
+    static constexpr u64 exponent_mask = 0x7FF0'0000'0000'0000;
+    static constexpr u64 mantissa_mask = 0x000F'FFFF'FFFF'FFFF;
+
+    static constexpr int exponent_min = -1022;
+    static constexpr int exponent_max = 1023;
+    static constexpr int exponent_bias = 1023;
+
+    static constexpr u64 Zero(bool sign) { return sign ? sign_mask : 0; }
+    static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); }
+    static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
+};
+
+} // namespace Dynarmic::FP 
--- a/src/common/fp/mantissa_util.h
+++ b/src/common/fp/mantissa_util.h
@ -0,0 +1,48 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+enum class ResidualError {
+    Zero,
+    LessThanHalf,
+    Half,
+    GreaterThanHalf,
+};
+
+template<typename MantissaT>
+ResidualError ResidualErrorOnRightShift(MantissaT mantissa, int shift_amount) {
+    if (shift_amount <= 0 || mantissa == 0) {
+        return ResidualError::Zero;
+    }
+
+    if (shift_amount > static_cast<int>(Common::BitSize<MantissaT>())) {
+        return Common::MostSignificantBit(mantissa) ? ResidualError::GreaterThanHalf : ResidualError::LessThanHalf;
+    }
+
+    const size_t half_bit_position = static_cast<size_t>(shift_amount - 1);
+    const MantissaT half = static_cast<MantissaT>(1) << half_bit_position;
+    const MantissaT error_mask = Common::Ones<MantissaT>(static_cast<size_t>(shift_amount));
+    const MantissaT error = mantissa & error_mask;
+
+    if (error == 0) {
+        return ResidualError::Zero;
+    }
+    if (error < half) {
+        return ResidualError::LessThanHalf;
+    }
+    if (error == half) {
+        return ResidualError::Half;
+    }
+    return ResidualError::GreaterThanHalf;
+}
+
+} // namespace Dynarmic::FP 
--- a/src/common/fp/op.cpp
+++ b/src/common/fp/op.cpp
@ -0,0 +1,101 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/safe_ops.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/mantissa_util.h"
+#include "common/fp/op.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/rounding_mode.h"
+#include "common/fp/unpacked.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    ASSERT(rounding != RoundingMode::ToOdd);
+    ASSERT(ibits <= 64);
+    ASSERT(fbits <= ibits);
+
+    auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
+
+    if (type == FPType::SNaN || type == FPType::QNaN) {
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+    }
+
+    // Handle zero
+    if (value.mantissa == 0) {
+        return 0;
+    }
+
+    if (sign && unsigned_) {
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        return 0;
+    }
+
+    // value *= 2.0^fbits
+    value.exponent += static_cast<int>(fbits);
+
+    u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
+    const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
+    int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
+
+    bool round_up = false;
+    switch (rounding) {
+    case RoundingMode::ToNearest_TieEven:
+        round_up = error > ResidualError::Half || (error == ResidualError::Half && Common::Bit<0>(int_result));
+        break;
+    case RoundingMode::TowardsPlusInfinity:
+        round_up = error != ResidualError::Zero;
+        break;
+    case RoundingMode::TowardsMinusInfinity:
+        round_up = false;
+        break;
+    case RoundingMode::TowardsZero:
+        round_up = error != ResidualError::Zero && Common::MostSignificantBit(int_result);
+        break;
+    case RoundingMode::ToNearest_TieAwayFromZero:
+        round_up = error > ResidualError::Half || (error == ResidualError::Half && !Common::MostSignificantBit(int_result));
+        break;
+    case RoundingMode::ToOdd:
+        UNREACHABLE();
+    }
+
+    if (round_up) {
+        int_result++;
+    }
+
+    // Detect Overflow
+    const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1);
+    if (value.exponent >= min_exponent_for_overflow) {
+        // Positive overflow
+        if (unsigned_ || !sign) {
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+            return Common::Ones<u64>(ibits - (unsigned_ ? 0 : 1));
+        }
+
+        // Negative overflow
+        const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1));
+        if (!(value.exponent == min_exponent_for_overflow && int_result == min_value)) {
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+            return static_cast<u64>(1) << (ibits - 1);
+        }
+    }
+
+    if (error != ResidualError::Zero) {
+        FPProcessException(FPExc::Inexact, fpcr, fpsr);
+    }
+    return int_result & Common::Ones<u64>(ibits);
+}
+
+template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+} // namespace Dynarmic::FP 
--- a/src/common/fp/op.h
+++ b/src/common/fp/op.h
@ -0,0 +1,21 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/common_types.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/rounding_mode.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+using FPCR = A64::FPCR;
+
+template<typename FPT>
+u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+} // namespace Dynarmic::FP 
--- a/src/common/fp/process_exception.cpp
+++ b/src/common/fp/process_exception.cpp
@ -0,0 +1,58 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "common/assert.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/process_exception.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr) {
+    switch (exception) {
+    case FPExc::InvalidOp:
+        if (fpcr.IOE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.IOC(true);
+        break;
+    case FPExc::DivideByZero:
+        if (fpcr.DZE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.DZC(true);
+        break;
+    case FPExc::Overflow:
+        if (fpcr.OFE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.OFC(true);
+        break;
+    case FPExc::Underflow:
+        if (fpcr.UFE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.UFC(true);
+        break;
+    case FPExc::Inexact:
+        if (fpcr.IXE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.IXC(true);
+        break;
+    case FPExc::InputDenorm:
+        if (fpcr.IDE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.IDC(true);
+        break;
+    default:
+        UNREACHABLE();
+        break;
+    }
+}
+
+} // namespace Dynarmic::FP 
--- a/src/common/fp/process_exception.h
+++ b/src/common/fp/process_exception.h
@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/fp/fpsr.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+using FPCR = A64::FPCR;
+
+enum class FPExc {
+    InvalidOp,
+    DivideByZero,
+    Overflow,
+    Underflow,
+    Inexact,
+    InputDenorm,
+};
+
+void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP 
--- a/src/common/fp/unpacked.cpp
+++ b/src/common/fp/unpacked.cpp
@ -0,0 +1,179 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "common/fp/info.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/unpacked.h"
+#include "common/safe_ops.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
+    constexpr size_t sign_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width;
+    constexpr size_t exponent_high_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width - 1;
+    constexpr size_t exponent_low_bit = FPInfo<FPT>::explicit_mantissa_width;
+    constexpr size_t mantissa_high_bit = FPInfo<FPT>::explicit_mantissa_width - 1;
+    constexpr size_t mantissa_low_bit = 0;
+    constexpr int denormal_exponent = FPInfo<FPT>::exponent_min - int(FPInfo<FPT>::explicit_mantissa_width);
+
+    const bool sign = Common::Bit<sign_bit>(op);
+    const FPT exp_raw = Common::Bits<exponent_low_bit, exponent_high_bit>(op);
+    const FPT frac_raw = Common::Bits<mantissa_low_bit, mantissa_high_bit>(op);
+
+    if (exp_raw == 0) {
+        if (frac_raw == 0 || fpcr.FZ()) {
+            if (frac_raw != 0) {
+                FPProcessException(FPExc::InputDenorm, fpcr, fpsr);
+            }
+            return {FPType::Zero, sign, {sign, 0, 0}};
+        }
+
+        return {FPType::Nonzero, sign, {sign, denormal_exponent, frac_raw}};
+    }
+
+    if (exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width)) {
+        if (frac_raw == 0) {
+            return {FPType::Infinity, sign, {sign, 1000000, 1}};
+        }
+
+        const bool is_quiet = Common::Bit<mantissa_high_bit>(frac_raw);
+        return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}};
+    }
+
+    const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias - FPInfo<FPT>::explicit_mantissa_width;
+    const u64 frac = frac_raw | FPInfo<FPT>::implicit_leading_bit;
+    return {FPType::Nonzero, sign, {sign, exp, frac}};
+}
+
+template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
+template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
+
+template<size_t F, typename MantissaT>
+std::tuple<bool, int, MantissaT, MantissaT> Normalize(FPUnpacked<MantissaT> op) {
+    const int highest_set_bit = Common::HighestSetBit(op.mantissa);
+    const int shift_amount = highest_set_bit - static_cast<int>(F);
+    const MantissaT mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount);
+    const MantissaT error = Safe::LogicalShiftRightDouble(op.mantissa, static_cast<MantissaT>(0), shift_amount);
+    const int exponent = op.exponent + highest_set_bit;
+    return std::make_tuple(op.sign, exponent, mantissa, error);
+}
+
+template<typename FPT, typename MantissaT>
+FPT FPRoundBase(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    ASSERT(op.mantissa != 0);
+    ASSERT(rounding != RoundingMode::ToNearest_TieAwayFromZero);
+
+    constexpr int minimum_exp = FPInfo<FPT>::exponent_min;
+    constexpr size_t E = FPInfo<FPT>::exponent_width;
+    constexpr size_t F = FPInfo<FPT>::explicit_mantissa_width;
+    constexpr bool isFP16 = FPInfo<FPT>::total_width == 16;
+
+    auto [sign, exponent, mantissa, error] = Normalize<F>(op);
+
+    if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) {
+        fpsr.UFC(true);
+        return FPInfo<FPT>::Zero(sign);
+    }
+
+    int biased_exp = std::max<int>(exponent - minimum_exp + 1, 0);
+    if (biased_exp == 0) {
+        error = Safe::LogicalShiftRightDouble(mantissa, error, minimum_exp - exponent);
+        mantissa = Safe::LogicalShiftRight(mantissa, minimum_exp - exponent);
+    }
+
+    if (biased_exp == 0 && (error != 0 || fpcr.UFE())) {
+        FPProcessException(FPExc::Underflow, fpcr, fpsr);
+    }
+
+    bool round_up = false, overflow_to_inf = false;
+    switch (rounding) {
+    case RoundingMode::ToNearest_TieEven: {
+        constexpr MantissaT half = static_cast<MantissaT>(1) << (Common::BitSize<MantissaT>() - 1);
+        round_up = (error > half) || (error == half && Common::Bit<0>(mantissa));
+        overflow_to_inf = true;
+        break;
+    }
+    case RoundingMode::TowardsPlusInfinity:
+        round_up = error != 0 && !sign;
+        overflow_to_inf = !sign;
+        break;
+    case RoundingMode::TowardsMinusInfinity:
+        round_up = error != 0 && sign;
+        overflow_to_inf = sign;
+        break;
+    default:
+        break;
+    }
+
+    if (round_up) {
+        if ((mantissa & FPInfo<FPT>::mantissa_mask) == FPInfo<FPT>::mantissa_mask) {
+            // Overflow on rounding up is going to happen
+            if (mantissa == FPInfo<FPT>::mantissa_mask) {
+                // Rounding up from denormal to normal
+                mantissa++;
+                biased_exp++;
+            } else {
+                // Rounding up to next exponent
+                mantissa = (mantissa + 1) / 2;
+                biased_exp++;
+            }
+        } else {
+            mantissa++;
+        }
+    }
+
+    if (error != 0 && rounding == RoundingMode::ToOdd) {
+        mantissa = Common::ModifyBit<0>(mantissa, true);
+    }
+
+    FPT result = 0;
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4127) // C4127: conditional expression is constant
+#endif
+    if (!isFP16 || !fpcr.AHP()) {
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+        constexpr int max_biased_exp = (1 << E) - 1;
+        if (biased_exp >= max_biased_exp) {
+            result = overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
+            FPProcessException(FPExc::Overflow, fpcr, fpsr);
+            FPProcessException(FPExc::Inexact, fpcr, fpsr);
+        } else {
+            result = sign ? 1 : 0;
+            result <<= E;
+            result += biased_exp;
+            result <<= F;
+            result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
+            if (error != 0) {
+                FPProcessException(FPExc::Inexact, fpcr, fpsr);
+            }
+        }
+    } else {
+        constexpr int max_biased_exp = (1 << E);
+        if (biased_exp >= max_biased_exp) {
+            result = sign ? 0xFFFF : 0x7FFF;
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        } else {
+            result = sign ? 1 : 0;
+            result <<= E;
+            result += biased_exp;
+            result <<= F;
+            result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
+            if (error != 0) {
+                FPProcessException(FPExc::Inexact, fpcr, fpsr);
+            }
+        }
+    }
+    return result;
+}
+
+template u32 FPRoundBase<u32, u64>(FPUnpacked<u64> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+template u64 FPRoundBase<u64, u64>(FPUnpacked<u64> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/src/common/fp/unpacked.h
+++ b/src/common/fp/unpacked.h
@ -0,0 +1,57 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <tuple>
+
+#include "common/common_types.h"
+#include "common/fp/fpsr.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+using FPCR = A64::FPCR;
+
+enum class FPType {
+    Nonzero,
+    Zero,
+    Infinity,
+    QNaN,
+    SNaN,
+};
+
+/// value = (sign ? -1 : +1) * mantissa * 2^exponent
+template<typename MantissaT>
+struct FPUnpacked {
+    bool sign;
+    int exponent;
+    MantissaT mantissa;
+};
+
+template<typename MantissaT>
+inline bool operator==(const FPUnpacked<MantissaT>& a, const FPUnpacked<MantissaT>& b) {
+    return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa);
+}
+
+template<typename FPT>
+std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr);
+
+template<typename FPT, typename MantissaT>
+FPT FPRoundBase(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+template<typename FPT, typename MantissaT>
+FPT FPRound(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    fpcr.AHP(false);
+    return FPRoundBase<FPT, MantissaT>(op, fpcr, rounding, fpsr);
+}
+
+template<typename FPT, typename MantissaT>
+FPT FPRound(FPUnpacked<MantissaT> op, FPCR fpcr, FPSR& fpsr) {
+    return FPRound<FPT, MantissaT>(op, fpcr, fpcr.RMode(), fpsr);
+}
+
+} // namespace Dynarmic::FP
--- a/src/common/fp/util.h
+++ b/src/common/fp/util.h
@ -8,8 +8,7 @@

 #include <boost/optional.hpp>

-namespace Dynarmic {
-namespace Common {
+namespace Dynarmic::FP {

 /// Is 32-bit floating point value a QNaN?
 constexpr bool IsQNaN(u32 value) {
@ -110,5 +109,4 @@ inline boost::optional<u64> ProcessNaNs(u64 a, u64 b, u64 c) {
    return boost::none;
 }

-} // namespace Common
-} // namespace Dynarmic
+} // namespace Dynarmic::FP
--- a/src/common/mp/append.h
+++ b/src/common/mp/append.h
@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class... L>
+struct append_impl;
+
+template<template<class...> class LT, class... T1, class... T2>
+struct append_impl<LT<T1...>, T2...> {
+    using type = LT<T1..., T2...>;
+};
+
+} // namespace detail
+
+/// Append items T to list L
+template<class L, class... T>
+using append = typename detail::append_impl<L, T...>::type;
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/bind.h
+++ b/src/common/mp/bind.h
@ -0,0 +1,18 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+/// Binds the first sizeof...(A) arguments of metafunction F with arguments A
+template<template<class...> class F, class... A>
+struct bind {
+    template<class... T>
+    using type = F<A..., T...>;
+};
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/cartesian_product.h
+++ b/src/common/mp/cartesian_product.h
@ -0,0 +1,51 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/mp/append.h"
+#include "common/mp/bind.h"
+#include "common/mp/concat.h"
+#include "common/mp/fmap.h"
+#include "common/mp/list.h"
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class... Ls>
+struct cartesian_product_impl{};
+
+template<class RL>
+struct cartesian_product_impl<RL> {
+    using type = RL;
+};
+
+template<template<class...> class LT, class... RT, class... T1>
+struct cartesian_product_impl<LT<RT...>, LT<T1...>> {
+    using type = concat<
+        fmap<bind<append, RT>::template type, list<T1...>>...
+    >;
+};
+
+template<class RL, class L1, class L2, class... Ls>
+struct cartesian_product_impl<RL, L1, L2, Ls...> {
+    using type = typename cartesian_product_impl<
+        typename cartesian_product_impl<RL, L1>::type,
+        L2,
+        Ls...
+    >::type;
+};
+
+} // namespace detail
+
+/// Produces the cartesian product of a set of lists
+/// For example: 
+/// cartesian_product<list<A, B>, list<D, E>> == list<list<A, D>, list<A, E>, list<B, D>, list<B, E>
+template<typename L1, typename... Ls>
+using cartesian_product = typename detail::cartesian_product_impl<fmap<list, L1>, Ls...>::type;
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/concat.h
+++ b/src/common/mp/concat.h
@ -0,0 +1,57 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/mp/list.h"
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class... L>
+struct concat_impl;
+
+template<>
+struct concat_impl<> {
+    using type = list<>;
+};
+
+template<class L>
+struct concat_impl<L> {
+    using type = L;
+};
+
+template<template<class...> class LT, class... T1, class... T2, class... Ls>
+struct concat_impl<LT<T1...>, LT<T2...>, Ls...> {
+    using type = typename concat_impl<LT<T1..., T2...>, Ls...>::type;
+};
+
+template<template<class...> class LT,
+         class... T1, class... T2, class... T3, class... T4, class... T5, class... T6, class... T7, class... T8,
+         class... T9, class... T10, class... T11, class... T12, class... T13, class... T14, class... T15, class... T16,
+         class... Ls>
+struct concat_impl<
+        LT<T1...>, LT<T2...>, LT<T3...>, LT<T4...>, LT<T5...>, LT<T6...>, LT<T7...>, LT<T8...>,
+        LT<T9...>, LT<T10...>, LT<T11...>, LT<T12...>, LT<T13...>, LT<T14...>, LT<T15...>, LT<T16...>,
+        Ls...>
+{
+    using type = typename concat_impl<
+        LT<
+            T1..., T2..., T3..., T4..., T5..., T6..., T7..., T8...,
+            T9..., T10..., T11..., T12..., T13..., T14..., T15..., T16...
+        >,
+        Ls...
+    >::type;
+};
+
+} // namespace detail
+
+/// Concatenate lists together
+template<class... L>
+using concat = typename detail::concat_impl<L...>::type;
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/fapply.h
+++ b/src/common/mp/fapply.h
@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<template<class...> class F, class L>
+struct fapply_impl;
+
+template<template<class...> class F, template<class...> class LT, class... T>
+struct fapply_impl<F, LT<T...>> {
+    using type = F<T...>;
+};
+
+} // namespace detail
+
+/// Invokes metafunction F where the arguments are all the members of list L
+template<template<class...> class F, class L>
+using fapply = typename detail::fapply_impl<F, L>::type;
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/fmap.h
+++ b/src/common/mp/fmap.h
@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<template<class...> class F, class L>
+struct fmap_impl;
+
+template<template<class...> class F, template<class...> class LT, class... T>
+struct fmap_impl<F, LT<T...>> {
+    using type = LT<F<T>...>;
+};
+
+} // namespace detail
+
+/// Metafunction that applies each element of list L to metafunction F
+template<template<class...> class F, class L>
+using fmap = typename detail::fmap_impl<F, L>::type;
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/integer.h
+++ b/src/common/mp/integer.h
@ -0,0 +1,51 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<std::size_t size>
+struct integer_of_size_impl{};
+
+template<>
+struct integer_of_size_impl<8> {
+    using unsigned_type = std::uint8_t;
+    using signed_type = std::int8_t;
+};
+
+template<>
+struct integer_of_size_impl<16> {
+    using unsigned_type = std::uint16_t;
+    using signed_type = std::int16_t;
+};
+
+template<>
+struct integer_of_size_impl<32> {
+    using unsigned_type = std::uint32_t;
+    using signed_type = std::int32_t;
+};
+
+template<>
+struct integer_of_size_impl<64> {
+    using unsigned_type = std::uint64_t;
+    using signed_type = std::int64_t;
+};
+
+} // namespace detail
+
+template<std::size_t size>
+using unsigned_integer_of_size = typename detail::integer_of_size_impl<size>::unsigned_type;
+
+template<std::size_t size>
+using signed_integer_of_size = typename detail::integer_of_size_impl<size>::signed_type;
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/list.h
+++ b/src/common/mp/list.h
@ -0,0 +1,15 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+/// Contains a list of types
+template<class... T>
+struct list {};
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/lut.h
+++ b/src/common/mp/lut.h
@ -0,0 +1,23 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+#include <map>
+#include <type_traits>
+
+#include "common/mp/list.h"
+
+namespace Dynarmic::Common::mp {
+
+template <typename KeyT, typename ValueT, typename Function, typename ...Values>
+inline auto GenerateLookupTableFromList(Function f, list<Values...>) {
+    static const std::array<std::pair<KeyT, ValueT>, sizeof...(Values)> pair_array{f(Values{})...};
+    return std::map<KeyT, ValueT>(pair_array.begin(), pair_array.end());
+}
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/to_tuple.h
+++ b/src/common/mp/to_tuple.h
@ -0,0 +1,29 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <tuple>
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class L>
+struct to_tuple_impl;
+
+template<template<class...> class LT, class... T>
+struct to_tuple_impl<LT<T...>> {
+    static constexpr auto value = std::make_tuple(static_cast<typename T::value_type>(T::value)...);
+};
+
+} // namespace detail
+
+/// Metafunction that converts a list of metavalues to a tuple value.
+template<class L>
+constexpr auto to_tuple = detail::to_tuple_impl<L>::value;
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/vlift.h
+++ b/src/common/mp/vlift.h
@ -0,0 +1,17 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <type_traits>
+
+namespace Dynarmic::Common::mp {
+
+/// Lifts a value into a type
+template<auto V>
+using vlift = std::integral_constant<decltype(V), V>;
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/mp/vllift.h
+++ b/src/common/mp/vllift.h
@ -0,0 +1,31 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <type_traits>
+
+#include "common/mp/list.h"
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class VL>
+struct vllift_impl{};
+
+template<class T, T... values>
+struct vllift_impl<std::integer_sequence<T, values...>> {
+    using type = list<std::integral_constant<T, values>...>;
+};
+
+} // namespace detail
+
+/// Lifts values in value list VL to create a type list.
+template<class VL>
+using vllift = typename detail::vllift_impl<VL>::type;
+
+} // namespace Dynarmic::Common::mp
--- a/src/common/safe_ops.h
+++ b/src/common/safe_ops.h
@ -0,0 +1,109 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <type_traits>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/u128.h"
+
+namespace Dynarmic::Safe {
+
+template<typename T> T LogicalShiftLeft(T value, int shift_amount);
+template<typename T> T LogicalShiftRight(T value, int shift_amount);
+template<typename T> T ArithmeticShiftLeft(T value, int shift_amount);
+template<typename T> T ArithmeticShiftRight(T value, int shift_amount);
+
+template<typename T>
+T LogicalShiftLeft(T value, int shift_amount) {
+    static_assert(std::is_integral_v<T>);
+
+    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
+        return 0;
+    }
+
+    if (shift_amount < 0) {
+        return LogicalShiftRight(value, -shift_amount);
+    }
+
+    auto unsigned_value = static_cast<std::make_unsigned_t<T>>(value);
+    return static_cast<T>(unsigned_value << shift_amount);
+}
+
+template<>
+inline u128 LogicalShiftLeft(u128 value, int shift_amount) {
+    return value << shift_amount;
+}
+
+template<typename T>
+T LogicalShiftRight(T value, int shift_amount) {
+    static_assert(std::is_integral_v<T>);
+
+    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
+        return 0;
+    }
+
+    if (shift_amount < 0) {
+        return LogicalShiftLeft(value, -shift_amount);
+    }
+
+    auto unsigned_value = static_cast<std::make_unsigned_t<T>>(value);
+    return static_cast<T>(unsigned_value >> shift_amount);
+}
+
+template<>
+inline u128 LogicalShiftRight(u128 value, int shift_amount) {
+    return value >> shift_amount;
+}
+
+template<typename T>
+T LogicalShiftRightDouble(T top, T bottom, int shift_amount) {
+    return LogicalShiftLeft(top, int(Common::BitSize<T>()) - shift_amount) | LogicalShiftRight(bottom, shift_amount);
+}
+
+template<typename T>
+T ArithmeticShiftLeft(T value, int shift_amount) {
+    static_assert(std::is_integral_v<T>);
+
+    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
+        return 0;
+    }
+
+    if (shift_amount < 0) {
+        return ArithmeticShiftRight(value, -shift_amount);
+    }
+
+    auto signed_value = static_cast<std::make_signed_t<T>>(value);
+    return static_cast<T>(signed_value << shift_amount);
+}
+
+template<typename T>
+T ArithmeticShiftRight(T value, int shift_amount) {
+    static_assert(std::is_integral_v<T>);
+
+    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
+        return Common::MostSignificantBit(value) ? ~static_cast<T>(0) : 0;
+    }
+
+    if (shift_amount < 0) {
+        return ArithmeticShiftLeft(value, -shift_amount);
+    }
+
+    auto signed_value = static_cast<std::make_signed_t<T>>(value);
+    return static_cast<T>(signed_value >> shift_amount);
+}
+
+template<typename T>
+T ArithmeticShiftRightDouble(T top, T bottom, int shift_amount) {
+    return ArithmeticShiftLeft(top, int(Common::BitSize<T>()) - shift_amount) | LogicalShiftRight(bottom, shift_amount);
+}
+
+template<typename T>
+T Negate(T value) {
+    return static_cast<T>(-static_cast<std::make_signed_t<T>>(value));
+}
+
+} // namespace Dynarmic::Safe
--- a/src/common/u128.cpp
+++ b/src/common/u128.cpp
@ -0,0 +1,64 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <array>
+
+#include "common/common_types.h"
+#include "common/u128.h"
+
+namespace Dynarmic {
+
+u128 operator<<(u128 operand, int amount) {
+    if (amount < 0) {
+        return operand >> -amount;
+    }
+
+    if (amount == 0) {
+        return operand;
+    }
+
+    if (amount < 64) {
+        u128 result;
+        result.lower = (operand.lower << amount);
+        result.upper = (operand.upper << amount) | (operand.lower >> (64 - amount));
+        return result;
+    }
+
+    if (amount < 128) {
+        u128 result;
+        result.upper = operand.lower << (amount - 64);
+        return result;
+    }
+
+    return {};
+}
+
+u128 operator>>(u128 operand, int amount) {
+    if (amount < 0) {
+        return operand << -amount;
+    }
+
+    if (amount == 0) {
+        return operand;
+    }
+
+    if (amount < 64) {
+        u128 result;
+        result.lower = (operand.lower >> amount) | (operand.upper << (64 - amount));
+        result.upper = (operand.upper >> amount);
+        return result;
+    }
+
+    if (amount < 128) {
+        u128 result;
+        result.lower = operand.upper >> (amount - 64);
+        return result;
+    }
+
+    return {};
+}
+
+} // namespace Dynarmic
--- a/src/common/u128.h
+++ b/src/common/u128.h
@ -0,0 +1,57 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <cstring>
+#include <type_traits>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic {
+
+struct u128 {
+    u128() = default;
+    u128(const u128&) = default;
+    u128(u128&&) = default;
+    u128& operator=(const u128&) = default;
+    u128& operator=(u128&&) = default;
+
+    u128(u64 lower_, u64 upper_) : lower(lower_), upper(upper_) {}
+
+    template <typename T>
+    /* implicit */ u128(T value) : lower(value), upper(0) {
+        static_assert(std::is_integral_v<T>);
+        static_assert(Common::BitSize<T>() <= Common::BitSize<u64>());
+    }
+
+    u64 lower = 0;
+    u64 upper = 0;
+};
+
+static_assert(Common::BitSize<u128>() == 128);
+static_assert(std::is_standard_layout_v<u128>);
+static_assert(std::is_trivially_copyable_v<u128>);
+
+inline u128 operator+(u128 a, u128 b) {
+    u128 result;
+    result.lower = a.lower + b.lower;
+    result.upper = a.upper + b.upper + (a.lower > result.lower);
+    return result;
+}
+
+inline u128 operator-(u128 a, u128 b) {
+    u128 result;
+    result.lower = a.lower - b.lower;
+    result.upper = a.upper - b.upper - (a.lower < result.lower);
+    return result;
+}
+
+u128 operator<<(u128 operand, int amount);
+u128 operator>>(u128 operand, int amount);
+
+} // namespace Dynarmic
--- a/src/frontend/A32/translate/translate_arm/vfp2.cpp
+++ b/src/frontend/A32/translate/translate_arm/vfp2.cpp
@ -442,8 +442,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool s
    if (ConditionPassed(cond)) {
        auto reg_m = ir.GetExtendedRegister(m);
        auto result = sz
-                      ? ir.FPDoubleToU32(reg_m, round_towards_zero, true)
-                      : ir.FPSingleToU32(reg_m, round_towards_zero, true);
+                      ? ir.FPDoubleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
+                      : ir.FPSingleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
        ir.SetExtendedRegister(d, result);
    }
    return true;
@ -457,8 +457,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s
    if (ConditionPassed(cond)) {
        auto reg_m = ir.GetExtendedRegister(m);
        auto result = sz
-                      ? ir.FPDoubleToS32(reg_m, round_towards_zero, true)
-                      : ir.FPSingleToS32(reg_m, round_towards_zero, true);
+                      ? ir.FPDoubleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
+                      : ir.FPSingleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
        ir.SetExtendedRegister(d, result);
    }
    return true;
--- a/src/frontend/A64/FPCR.h
+++ b/src/frontend/A64/FPCR.h
@ -37,6 +37,11 @@ public:
        return Common::Bit<26>(value);
    }

+    /// Alternate half-precision control flag.
+    void AHP(bool AHP_) {
+        value = Common::ModifyBit<26>(value, AHP_);
+    }
+
    /// Default NaN mode control bit.
    bool DN() const {
        return Common::Bit<25>(value);
@ -52,6 +57,10 @@ public:
        return static_cast<FP::RoundingMode>(Common::Bits<22, 23>(value));
    }

+    bool FZ16() const {
+        return Common::Bit<19>(value);
+    }
+
    /// Input denormal exception trap enable flag.
    bool IDE() const {
        return Common::Bit<15>(value);
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@ -884,17 +884,17 @@ INST(FCVTZS_float_fix,       "FCVTZS (scalar, fixed-point)",              "z0011
 INST(FCVTZU_float_fix,       "FCVTZU (scalar, fixed-point)",              "z0011110yy011001ppppppnnnnnddddd")

 // Data Processing - FP and SIMD - Conversion between floating point and integer
-//INST(FCVTNS_float,           "FCVTNS (scalar)",                           "z0011110yy100000000000nnnnnddddd")
-//INST(FCVTNU_float,           "FCVTNU (scalar)",                           "z0011110yy100001000000nnnnnddddd")
+INST(FCVTNS_float,           "FCVTNS (scalar)",                           "z0011110yy100000000000nnnnnddddd")
+INST(FCVTNU_float,           "FCVTNU (scalar)",                           "z0011110yy100001000000nnnnnddddd")
 INST(SCVTF_float_int,        "SCVTF (scalar, integer)",                   "z0011110yy100010000000nnnnnddddd")
 INST(UCVTF_float_int,        "UCVTF (scalar, integer)",                   "z0011110yy100011000000nnnnnddddd")
-//INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011110yy100100000000nnnnnddddd")
-//INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011110yy100101000000nnnnnddddd")
+INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011110yy100100000000nnnnnddddd")
+INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011110yy100101000000nnnnnddddd")
 INST(FMOV_float_gen,         "FMOV (general)",                            "z0011110yy10r11o000000nnnnnddddd")
-//INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
-//INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
-//INST(FCVTMS_float,           "FCVTMS (scalar)",                           "z0011110yy110000000000nnnnnddddd")
-//INST(FCVTMU_float,           "FCVTMU (scalar)",                           "z0011110yy110001000000nnnnnddddd")
+INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
+INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
+INST(FCVTMS_float,           "FCVTMS (scalar)",                           "z0011110yy110000000000nnnnnddddd")
+INST(FCVTMU_float,           "FCVTMU (scalar)",                           "z0011110yy110001000000nnnnnddddd")
 INST(FCVTZS_float_int,       "FCVTZS (scalar, integer)",                  "z0011110yy111000000000nnnnnddddd")
 INST(FCVTZU_float_int,       "FCVTZU (scalar, integer)",                  "z0011110yy111001000000nnnnnddddd")
 //INST(FJCVTZS,                "FJCVTZS",                                   "0001111001111110000000nnnnnddddd")
--- a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
@ -38,13 +38,13 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec

    IR::U32U64 intval;
    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToS32(fltval, true, true);
+        intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToS32(fltval, true, true);
+        intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else {
        UNREACHABLE();
    }
@ -69,13 +69,13 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec

    IR::U32U64 intval;
    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToU32(fltval, true, true);
+        intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToU32(fltval, true, true);
+        intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else {
        UNREACHABLE();
    }
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@ -6,6 +6,7 @@

 #include <boost/optional.hpp>

+#include "common/fp/rounding_mode.h"
 #include "frontend/A64/translate/impl/impl.h"

 namespace Dynarmic::A64 {
@ -135,58 +136,98 @@ bool TranslatorVisitor::FMOV_float_gen(bool sf, Imm<2> type, Imm<1> rmode_0, Imm
    return true;
 }

-bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+static bool FloaingPointConvertSignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
    const size_t intsize = sf ? 64 : 32;
    const auto fltsize = GetDataSize(type);
    if (!fltsize || *fltsize == 16) {
-        return UnallocatedEncoding();
+        return v.UnallocatedEncoding();
    }

-    const IR::U32U64 fltval = V_scalar(*fltsize, Vn);
+    const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
    IR::U32U64 intval;

    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToS32(fltval, true, true);
+        intval = v.ir.FPSingleToFixedS32(fltval, 0, rounding_mode);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToS32(fltval, true, true);
+        intval = v.ir.FPDoubleToFixedS32(fltval, 0, rounding_mode);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = v.ir.FPSingleToFixedS64(fltval, 0, rounding_mode);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = v.ir.FPDoubleToFixedS64(fltval, 0, rounding_mode);
    } else {
        UNREACHABLE();
    }

-    X(intsize, Rd, intval);
+    v.X(intsize, Rd, intval);

    return true;
 }

+static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
+    const size_t intsize = sf ? 64 : 32;
+    const auto fltsize = GetDataSize(type);
+    if (!fltsize || *fltsize == 16) {
+        return v.UnallocatedEncoding();
+    }
+
+    const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
+    IR::U32U64 intval;
+
+    if (intsize == 32 && *fltsize == 32) {
+        intval = v.ir.FPSingleToFixedU32(fltval, 0, rounding_mode);
+    } else if (intsize == 32 && *fltsize == 64) {
+        intval = v.ir.FPDoubleToFixedU32(fltval, 0, rounding_mode);
+    } else if (intsize == 64 && *fltsize == 32) {
+        intval = v.ir.FPSingleToFixedU64(fltval, 0, rounding_mode);
+    } else if (intsize == 64 && *fltsize == 64) {
+        intval = v.ir.FPDoubleToFixedU64(fltval, 0, rounding_mode);
+    } else {
+        UNREACHABLE();
+    }
+
+    v.X(intsize, Rd, intval);
+
+    return true;
+}
+
+bool TranslatorVisitor::FCVTNS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven);
+}
+
+bool TranslatorVisitor::FCVTNU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven);
+}
+
+bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
+}
+
 bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
-    const size_t intsize = sf ? 64 : 32;
-    const auto fltsize = GetDataSize(type);
-    if (!fltsize || *fltsize == 16) {
-        return UnallocatedEncoding();
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
 }

-    const IR::U32U64 fltval = V_scalar(*fltsize, Vn);
-    IR::U32U64 intval;
-
-    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToU32(fltval, true, true);
-    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToU32(fltval, true, true);
-    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
-    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
-    } else {
-        UNREACHABLE();
+bool TranslatorVisitor::FCVTAS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
 }

-    X(intsize, Rd, intval);
+bool TranslatorVisitor::FCVTAU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
+}

-    return true;
+bool TranslatorVisitor::FCVTPS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
+}
+
+bool TranslatorVisitor::FCVTPU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
+}
+
+bool TranslatorVisitor::FCVTMS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity);
+}
+
+bool TranslatorVisitor::FCVTMU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity);
 }

 } // namespace Dynarmic::A64
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1451,24 +1451,44 @@ U64 IREmitter::FPSingleToDouble(const U32& a, bool fpscr_controlled) {
    return Inst<U64>(Opcode::FPSingleToDouble, a);
 }

-U32 IREmitter::FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPSingleToS32, a, Imm1(round_towards_zero));
+U32 IREmitter::FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPDoubleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }

-U32 IREmitter::FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPSingleToU32, a, Imm1(round_towards_zero));
+U64 IREmitter::FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPDoubleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }

-U32 IREmitter::FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPDoubleToS32, a, Imm1(round_towards_zero));
+U32 IREmitter::FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPDoubleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }

-U32 IREmitter::FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPDoubleToU32, a, Imm1(round_towards_zero));
+U64 IREmitter::FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPDoubleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U32 IREmitter::FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPSingleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U64 IREmitter::FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPSingleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U32 IREmitter::FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPSingleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U64 IREmitter::FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPSingleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }

 U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) {
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -12,6 +12,10 @@
 #include "frontend/ir/terminal.h"
 #include "frontend/ir/value.h"

+namespace Dynarmic::FP {
+enum class RoundingMode;
+} // namespace Dynarmic::FP
+
 // ARM JIT Microinstruction Intermediate Representation
 //
 // This intermediate representation is an SSA IR. It is designed primarily for analysis,
@ -264,10 +268,14 @@ public:
    U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
    U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
    U64 FPSingleToDouble(const U32& a, bool fpscr_controlled);
-    U32 FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
-    U32 FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
-    U32 FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
-    U32 FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
+    U32 FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding);
    U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
    U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
    U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -386,10 +386,14 @@ OPCODE(FPSub64,                             T::U64,         T::U64,         T::U
 // Floating-point conversions
 OPCODE(FPSingleToDouble,                    T::U64,         T::U32                                          )
 OPCODE(FPDoubleToSingle,                    T::U32,         T::U64                                          )
-OPCODE(FPSingleToU32,                       T::U32,         T::U32,         T::U1                           )
-OPCODE(FPSingleToS32,                       T::U32,         T::U32,         T::U1                           )
-OPCODE(FPDoubleToU32,                       T::U32,         T::U64,         T::U1                           )
-OPCODE(FPDoubleToS32,                       T::U32,         T::U64,         T::U1                           )
+OPCODE(FPDoubleToFixedS32,                  T::U32,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPDoubleToFixedS64,                  T::U64,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPDoubleToFixedU32,                  T::U32,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPDoubleToFixedU64,                  T::U64,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedS32,                  T::U32,         T::U32,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedS64,                  T::U64,         T::U32,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedU32,                  T::U32,         T::U32,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedU64,                  T::U64,         T::U32,         T::U8,          T::U8           )
 OPCODE(FPU32ToSingle,                       T::U32,         T::U32,         T::U1                           )
 OPCODE(FPS32ToSingle,                       T::U32,         T::U32,         T::U1                           )
 OPCODE(FPU32ToDouble,                       T::U64,         T::U32,         T::U1                           )
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -29,7 +29,11 @@ add_executable(dynarmic_tests
    A64/inst_gen.cpp
    A64/inst_gen.h
    A64/testenv.h
+    fp/FPToFixed.cpp
+    fp/mantissa_util_tests.cpp
+    fp/unpacked_tests.cpp
    main.cpp
+    mp.cpp
    rand_int.h
 )

--- a/tests/fp/FPToFixed.cpp
+++ b/tests/fp/FPToFixed.cpp
@ -0,0 +1,38 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <tuple>
+#include <vector>
+
+#include <catch.hpp>
+
+#include "common/fp/fpsr.h"
+#include "common/fp/op.h"
+#include "rand_int.h"
+
+using namespace Dynarmic;
+using namespace Dynarmic::FP;
+
+TEST_CASE("FPToFixed", "[fp]") {
+    const std::vector<std::tuple<u32, size_t, u64, u32>> test_cases {
+        {0x447A0000, 64, 0x000003E8, 0x00},
+        {0xC47A0000, 32, 0xFFFFFC18, 0x00},
+        {0x4479E000, 64, 0x000003E8, 0x10},
+        {0x50800000, 32, 0x7FFFFFFF, 0x01},
+        {0xD0800000, 32, 0x80000000, 0x01},
+        {0xCF000000, 32, 0x80000000, 0x00},
+        {0x80002B94, 64, 0x00000000, 0x10},
+        {0x80636D24, 64, 0x00000000, 0x10},
+    };
+
+    const FPCR fpcr;
+    for (auto [input, ibits, expected_output, expected_fpsr] : test_cases) {
+        FPSR fpsr;
+        const u64 output = FPToFixed<u32>(ibits, input, 0, false, fpcr, RoundingMode::ToNearest_TieEven, fpsr);
+        REQUIRE(output == expected_output);
+        REQUIRE(fpsr.Value() == expected_fpsr);
+    }
+}
--- a/tests/fp/mantissa_util_tests.cpp
+++ b/tests/fp/mantissa_util_tests.cpp
@ -0,0 +1,63 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <tuple>
+#include <vector>
+
+#include <catch.hpp>
+
+#include "common/fp/mantissa_util.h"
+#include "common/safe_ops.h"
+#include "rand_int.h"
+
+using namespace Dynarmic;
+using namespace Dynarmic::FP;
+
+TEST_CASE("ResidualErrorOnRightShift", "[fp]") {
+    const std::vector<std::tuple<u32, int, ResidualError>> test_cases {
+        {0x00000001, 1, ResidualError::Half},
+        {0x00000002, 1, ResidualError::Zero},
+        {0x00000001, 2, ResidualError::LessThanHalf},
+        {0x00000002, 2, ResidualError::Half},
+        {0x00000003, 2, ResidualError::GreaterThanHalf},
+        {0x00000004, 2, ResidualError::Zero},
+        {0x00000005, 2, ResidualError::LessThanHalf},
+        {0x00000006, 2, ResidualError::Half},
+        {0x00000007, 2, ResidualError::GreaterThanHalf},
+    };
+
+    for (auto [mantissa, shift, expected_result] : test_cases) {
+        const ResidualError result = ResidualErrorOnRightShift(mantissa, shift);
+        REQUIRE(result == expected_result);
+    }
+}
+
+TEST_CASE("ResidualErrorOnRightShift Randomized", "[fp]") {
+    for (size_t test = 0; test < 100000; test++) {
+        const u32 mantissa = RandInt<u32>(0, 0xFFFFFFFF);
+        const int shift = RandInt<int>(-60, 60);
+
+        const ResidualError result = ResidualErrorOnRightShift(mantissa, shift);
+
+        const u64 calculated_error = Safe::ArithmeticShiftRightDouble(Common::SignExtend<32, u64>(mantissa), u64(0), shift);
+        const ResidualError expected_result = [&]{
+            constexpr u64 half_error = 0x8000'0000'0000'0000ull;
+            if (calculated_error == 0) {
+                return ResidualError::Zero;
+            }
+            if (calculated_error < half_error) {
+                return ResidualError::LessThanHalf;
+            }
+            if (calculated_error == half_error) {
+                return ResidualError::Half;
+            }
+            return ResidualError::GreaterThanHalf;
+        }();
+
+        INFO(std::hex << "mantissa " << mantissa << " shift " << shift << " calculated_error " << calculated_error);
+        REQUIRE(result == expected_result);
+    }
+}
--- a/tests/fp/unpacked_tests.cpp
+++ b/tests/fp/unpacked_tests.cpp
@ -0,0 +1,71 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <catch.hpp>
+
+#include "common/fp/unpacked.h"
+#include "rand_int.h"
+
+using namespace Dynarmic;
+using namespace Dynarmic::FP;
+
+TEST_CASE("FPUnpack Tests", "[fp]") {
+    const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked<u64>>, u32>> test_cases {
+        {0x00000000, {FPType::Zero, false, {false, 0, 0}}, 0},
+        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0},
+        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0},
+        {0x7F800001, {FPType::SNaN, false, {false, 0, 0}}, 0},
+        {0xFF800001, {FPType::SNaN, true, {true, 0, 0}}, 0},
+        {0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0},
+        {0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0},
+        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
+        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
+    };
+
+    const FPCR fpcr;
+    for (const auto& [input, expected_output, expected_fpsr] : test_cases) {
+        FPSR fpsr;
+        const auto output = FPUnpack<u32>(input, fpcr, fpsr);
+
+        INFO("Input: " << std::hex << input);
+        REQUIRE(output == expected_output);
+        REQUIRE(fpsr.Value() == expected_fpsr);
+    }
+}
+
+TEST_CASE("FPRound Tests", "[fp]") {
+    const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked<u64>>, u32>> test_cases {
+        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0x14},
+        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0x14},
+        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
+        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
+        {0x3F800000, {FPType::Nonzero, false, {false, -28, 0xFFFFFFF}}, 0x10}, // rounds to 1.0
+    };
+
+    const FPCR fpcr;
+    for (const auto& [expected_output, input, expected_fpsr] : test_cases) {
+        FPSR fpsr;
+        const auto output = FPRound<u32>(std::get<2>(input), fpcr, fpsr);
+
+        INFO("Expected Output: " << std::hex << expected_output);
+        REQUIRE(output == expected_output);
+        REQUIRE(fpsr.Value() == expected_fpsr);
+    }
+}
+
+TEST_CASE("FPUnpack<->FPRound Round-trip Tests", "[fp]") {
+    const FPCR fpcr;
+    for (size_t count = 0; count < 100000; count++) {
+        FPSR fpsr;
+        const u32 input = RandInt(0, 1) == 0 ? RandInt<u32>(0x00000001, 0x7F800000) : RandInt<u32>(0x80000001, 0xFF800000);
+        const auto intermediate = std::get<2>(FPUnpack<u32>(input, fpcr, fpsr));
+        const u32 output = FPRound<u32>(intermediate, fpcr, fpsr);
+
+        INFO("Count: " << count);
+        INFO("Intermediate Values: " << std::hex << intermediate.sign << ';' << intermediate.exponent << ';' << intermediate.mantissa);
+        REQUIRE(input == output);
+    }
+}
--- a/tests/mp.cpp
+++ b/tests/mp.cpp
@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <type_traits>
+
+#include "common/mp/cartesian_product.h"
+
+using namespace Dynarmic::Common::mp;
+
+static_assert(
+    std::is_same_v<
+        cartesian_product<list<int, bool>, list<double, float>, list<char, unsigned>>,
+        list<
+            list<int, double, char>,
+            list<int, double, unsigned>,
+            list<int, float, char>,
+            list<int, float, unsigned>,
+            list<bool, double, char>,
+            list<bool, double, unsigned>,
+            list<bool, float, char>,
+            list<bool, float, unsigned>
+        >
+    >
+);