From 8651c2d10ee3f0c44ee941e05aac0a9e241f1b1a Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Thu, 28 Jun 2018 21:44:21 +0100
Subject: [PATCH 01/28] u128: Implement u128

For when we need a 128-bit integer
---
 src/CMakeLists.txt  |  2 ++
 src/common/u128.cpp | 64 +++++++++++++++++++++++++++++++++++++++++++++
 src/common/u128.h   | 57 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 123 insertions(+)
 create mode 100644 src/common/u128.cpp
 create mode 100644 src/common/u128.h
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7334de8e..0fa66d10 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -31,6 +31,8 @@ add_library(dynarmic
     common/sm4.cpp
     common/sm4.h
     common/string_util.h
+    common/u128.cpp
+    common/u128.h
     common/variant_util.h
     frontend/A32/decoder/arm.h
     frontend/A32/decoder/thumb16.h
diff --git a/src/common/u128.cpp b/src/common/u128.cpp
new file mode 100644
index 00000000..5d97d222
--- /dev/null
+++ b/src/common/u128.cpp
@@ -0,0 +1,64 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <array>
+
+#include "common/common_types.h"
+#include "common/u128.h"
+
+namespace Dynarmic {
+
+u128 operator<<(u128 operand, int amount) {
+    if (amount < 0) {
+        return operand >> -amount;
+    }
+
+    if (amount == 0) {
+        return operand;
+    }
+
+    if (amount < 64) {
+        u128 result;
+        result.lower = (operand.lower << amount);
+        result.upper = (operand.upper << amount) | (operand.lower >> (64 - amount));
+        return result;
+    }
+
+    if (amount < 128) {
+        u128 result;
+        result.upper = operand.lower << (amount - 64);
+        return result;
+    }
+
+    return {};
+}
+
+u128 operator>>(u128 operand, int amount) {
+    if (amount < 0) {
+        return operand << -amount;
+    }
+
+    if (amount == 0) {
+        return operand;
+    }
+
+    if (amount < 64) {
+        u128 result;
+        result.lower = (operand.lower >> amount) | (operand.upper << (64 - amount));
+        result.upper = (operand.upper >> amount);
+        return result;
+    }
+
+    if (amount < 128) {
+        u128 result;
+        result.lower = operand.upper >> (amount - 64);
+        return result;
+    }
+
+    return {};
+}
+
+} // namespace Dynarmic
diff --git a/src/common/u128.h b/src/common/u128.h
new file mode 100644
index 00000000..b08143c8
--- /dev/null
+++ b/src/common/u128.h
@@ -0,0 +1,57 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <cstring>
+#include <type_traits>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic {
+
+struct u128 {
+    u128() = default;
+    u128(const u128&) = default;
+    u128(u128&&) = default;
+    u128& operator=(const u128&) = default;
+    u128& operator=(u128&&) = default;
+
+    u128(u64 lower_, u64 upper_) : lower(lower_), upper(upper_) {}
+
+    template <typename T>
+    /* implicit */ u128(T value) : lower(value), upper(0) {
+        static_assert(std::is_integral_v<T>);
+        static_assert(Common::BitSize<T>() <= Common::BitSize<u64>());
+    }
+
+    u64 lower = 0;
+    u64 upper = 0;
+};
+
+static_assert(Common::BitSize<u128>() == 128);
+static_assert(std::is_standard_layout_v<u128>);
+static_assert(std::is_trivially_copyable_v<u128>);
+
+inline u128 operator+(u128 a, u128 b) {
+    u128 result;
+    result.lower = a.lower + b.lower;
+    result.upper = a.upper + b.upper + (a.lower > result.lower);
+    return result;
+}
+
+inline u128 operator-(u128 a, u128 b) {
+    u128 result;
+    result.lower = a.lower - b.lower;
+    result.upper = a.upper - b.upper - (a.lower < result.lower);
+    return result;
+}
+
+u128 operator<<(u128 operand, int amount);
+u128 operator>>(u128 operand, int amount);
+
+} // namespace Dynarmic

From 62b640b2fa143c39ce545d56485f22008cab6959 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Tue, 26 Jun 2018 20:50:39 +0100
Subject: [PATCH 02/28] bit_util: Add ClearBit and ModifyBit

---
 src/common/bit_util.h | 48 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 775ccda0..0ed24da5 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -36,14 +36,6 @@ constexpr T Bits(const T value) {
 #pragma warning(push)
 #pragma warning(disable:4554)
 #endif
-/// Extracts a single bit at bit_position from value of type T.
-template<size_t bit_position, typename T>
-constexpr bool Bit(const T value) {
-    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
-
-    return ((value >> bit_position) & 1) != 0;
-}
-
 /// Extracts a single bit at bit_position from value of type T.
 template<typename T>
 inline bool Bit(size_t bit_position, const T value) {
@@ -51,6 +43,46 @@ inline bool Bit(size_t bit_position, const T value) {
 
     return ((value >> bit_position) & 1) != 0;
 }
+
+/// Extracts a single bit at bit_position from value of type T.
+template<size_t bit_position, typename T>
+constexpr bool Bit(const T value) {
+    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return Bit<T>(bit_position, value);
+}
+
+/// Clears a single bit at bit_position from value of type T.
+template<typename T>
+inline T ClearBit(size_t bit_position, const T value) {
+    ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return value & ~(static_cast<T>(1) << bit_position);
+}
+
+/// Clears a single bit at bit_position from value of type T.
+template<size_t bit_position, typename T>
+constexpr T ClearBit(const T value) {
+    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ClearBit<T>(bit_position, value);
+}
+
+/// Modifies a single bit at bit_position from value of type T.
+template<typename T>
+inline T ModifyBit(size_t bit_position, const T value, bool new_bit) {
+    ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ClearBit<T>(bit_position, value) | (static_cast<T>(new_bit) << bit_position);
+}
+
+/// Modifies a single bit at bit_position from value of type T.
+template<size_t bit_position, typename T>
+constexpr T ModifyBit(const T value, bool new_bit) {
+    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ModifyBit<T>(bit_position, value, new_bit);
+}
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif

From 95ad0d0a66ee935fd7b0c5b0392abaffe14f9ff2 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Wed, 27 Jun 2018 14:37:52 +0100
Subject: [PATCH 03/28] bit_util: Use Ones to implement Bits

---
 src/common/bit_util.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 0ed24da5..b83fc6ec 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -21,15 +21,23 @@ constexpr size_t BitSize() {
     return sizeof(T) * CHAR_BIT;
 }
 
+template <typename T>
+inline T Ones(size_t count) {
+    ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
+    if (count == BitSize<T>())
+        return static_cast<T>(~static_cast<T>(0));
+    return ~(static_cast<T>(~static_cast<T>(0)) << count);
+}
+
 /// Extract bits [begin_bit, end_bit] inclusive from value of type T.
 template<size_t begin_bit, size_t end_bit, typename T>
 constexpr T Bits(const T value) {
     static_assert(begin_bit <= end_bit,
                   "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
     static_assert(begin_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
-    static_assert(end_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
+    static_assert(end_bit < BitSize<T>(), "end_bit must be smaller than size of T");
 
-    return (value >> begin_bit) & ((1 << (end_bit - begin_bit + 1)) - 1);
+    return (value >> begin_bit) & Ones<T>(end_bit - begin_bit + 1);
 }
 
 #ifdef _MSC_VER
@@ -143,14 +151,6 @@ inline size_t LowestSetBit(T value) {
     return result;
 }
 
-template <typename T>
-inline T Ones(size_t count) {
-    ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
-    if (count == BitSize<T>())
-        return ~static_cast<T>(0);
-    return ~(~static_cast<T>(0) << count);
-}
-
 template <typename T>
 inline T Replicate(T value, size_t element_size) {
     ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size");

From b00fe23b91dc653e65d814e34cd61d41ecd44b2d Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Thu, 28 Jun 2018 21:44:35 +0100
Subject: [PATCH 04/28] bit_util: Implement MostSignificantBit

---
 src/common/bit_util.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index b83fc6ec..25ed3fc3 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -151,6 +151,11 @@ inline size_t LowestSetBit(T value) {
     return result;
 }
 
+template <typename T>
+inline bool MostSignificantBit(T value) {
+    return Bit<BitSize<T>() - 1, T>(value);
+}
+
 template <typename T>
 inline T Replicate(T value, size_t element_size) {
     ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size");

From d21659152ce7e52699782670cf139125bd345320 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Thu, 28 Jun 2018 21:45:13 +0100
Subject: [PATCH 05/28] safe_ops: Implement safe shifting operations

Implement shifiting operations that perform consistently across architectures
without running into undefined or implemented-defined behaviour.
---
 src/CMakeLists.txt    |   1 +
 src/common/safe_ops.h | 109 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+)
 create mode 100644 src/common/safe_ops.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0fa66d10..5807525a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -27,6 +27,7 @@ add_library(dynarmic
     common/memory_pool.cpp
     common/memory_pool.h
     common/mp.h
+    common/safe_ops.h
     common/scope_exit.h
     common/sm4.cpp
     common/sm4.h
diff --git a/src/common/safe_ops.h b/src/common/safe_ops.h
new file mode 100644
index 00000000..bc6d7b88
--- /dev/null
+++ b/src/common/safe_ops.h
@@ -0,0 +1,109 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <type_traits>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/u128.h"
+
+namespace Dynarmic::Safe {
+
+template<typename T> T LogicalShiftLeft(T value, int shift_amount);
+template<typename T> T LogicalShiftRight(T value, int shift_amount);
+template<typename T> T ArithmeticShiftLeft(T value, int shift_amount);
+template<typename T> T ArithmeticShiftRight(T value, int shift_amount);
+
+template<typename T>
+T LogicalShiftLeft(T value, int shift_amount) {
+    static_assert(std::is_integral_v<T>);
+
+    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
+        return 0;
+    }
+
+    if (shift_amount < 0) {
+        return LogicalShiftRight(value, -shift_amount);
+    }
+
+    auto unsigned_value = static_cast<std::make_unsigned_t<T>>(value);
+    return static_cast<T>(unsigned_value << shift_amount);
+}
+
+template<>
+inline u128 LogicalShiftLeft(u128 value, int shift_amount) {
+    return value << shift_amount;
+}
+
+template<typename T>
+T LogicalShiftRight(T value, int shift_amount) {
+    static_assert(std::is_integral_v<T>);
+
+    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
+        return 0;
+    }
+
+    if (shift_amount < 0) {
+        return LogicalShiftLeft(value, -shift_amount);
+    }
+
+    auto unsigned_value = static_cast<std::make_unsigned_t<T>>(value);
+    return static_cast<T>(unsigned_value >> shift_amount);
+}
+
+template<>
+inline u128 LogicalShiftRight(u128 value, int shift_amount) {
+    return value >> shift_amount;
+}
+
+template<typename T>
+T LogicalShiftRightDouble(T top, T bottom, int shift_amount) {
+    return LogicalShiftLeft(top, int(Common::BitSize<T>()) - shift_amount) | LogicalShiftRight(bottom, shift_amount);
+}
+
+template<typename T>
+T ArithmeticShiftLeft(T value, int shift_amount) {
+    static_assert(std::is_integral_v<T>);
+
+    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
+        return 0;
+    }
+
+    if (shift_amount < 0) {
+        return ArithmeticShiftRight(value, -shift_amount);
+    }
+
+    auto signed_value = static_cast<std::make_signed_t<T>>(value);
+    return static_cast<T>(signed_value << shift_amount);
+}
+
+template<typename T>
+T ArithmeticShiftRight(T value, int shift_amount) {
+    static_assert(std::is_integral_v<T>);
+
+    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
+        return Common::MostSignificantBit(value) ? ~static_cast<T>(0) : 0;
+    }
+
+    if (shift_amount < 0) {
+        return ArithmeticShiftLeft(value, -shift_amount);
+    }
+
+    auto signed_value = static_cast<std::make_signed_t<T>>(value);
+    return static_cast<T>(signed_value >> shift_amount);
+}
+
+template<typename T>
+T ArithmeticShiftRightDouble(T top, T bottom, int shift_amount) {
+    return ArithmeticShiftLeft(top, int(Common::BitSize<T>()) - shift_amount) | LogicalShiftRight(bottom, shift_amount);
+}
+
+template<typename T>
+T Negate(T value) {
+    return static_cast<T>(-static_cast<std::make_signed_t<T>>(value));
+}
+
+} // namespace Dynarmic::Safe

From 66381352f376df1fab2337cf42e95b7b378ee824 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 14 Jul 2018 09:41:36 +0100
Subject: [PATCH 06/28] fp: Add FPInfo

Provides information about floating-point format for various bit sizes
---
 src/CMakeLists.txt   |  1 +
 src/common/fp/info.h | 58 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 src/common/fp/info.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5807525a..f3c59dfa 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,6 +17,7 @@ add_library(dynarmic
     common/crc32.cpp
     common/crc32.h
     common/fp_util.h
+    common/fp/info.h
     common/fp/rounding_mode.h
     common/intrusive_list.h
     common/iterator_util.h
diff --git a/src/common/fp/info.h b/src/common/fp/info.h
new file mode 100644
index 00000000..1c4032e0
--- /dev/null
+++ b/src/common/fp/info.h
@@ -0,0 +1,58 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+struct FPInfo {};
+
+template<>
+struct FPInfo<u32> {
+    static constexpr size_t total_width = 32;
+    static constexpr size_t exponent_width = 8;
+    static constexpr size_t explicit_mantissa_width = 23;
+    static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
+
+    static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width;
+    static constexpr u32 sign_mask = 0x80000000;
+    static constexpr u32 exponent_mask = 0x7F800000;
+    static constexpr u32 mantissa_mask = 0x007FFFFF;
+
+    static constexpr int exponent_min = -126;
+    static constexpr int exponent_max = 127;
+    static constexpr int exponent_bias = 127;
+
+    static constexpr u32 Zero(bool sign) { return sign ? sign_mask : 0; }
+    static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
+    static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
+};
+
+template<>
+struct FPInfo<u64> {
+    static constexpr size_t total_width = 64;
+    static constexpr size_t exponent_width = 11;
+    static constexpr size_t explicit_mantissa_width = 52;
+    static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
+
+    static constexpr u64 implicit_leading_bit = u64(1) << explicit_mantissa_width;
+    static constexpr u64 sign_mask = 0x8000'0000'0000'0000;
+    static constexpr u64 exponent_mask = 0x7FF0'0000'0000'0000;
+    static constexpr u64 mantissa_mask = 0x000F'FFFF'FFFF'FFFF;
+
+    static constexpr int exponent_min = -1022;
+    static constexpr int exponent_max = 1023;
+    static constexpr int exponent_bias = 1023;
+
+    static constexpr u64 Zero(bool sign) { return sign ? sign_mask : 0; }
+    static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); }
+    static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
+};
+
+} // namespace Dynarmic::FP 

From c41a38b13e74f1ab9d361231c56b3c73d4b4d329 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Tue, 26 Jun 2018 20:51:25 +0100
Subject: [PATCH 07/28] fp: Add FPSR

---
 src/CMakeLists.txt   |   1 +
 src/common/fp/fpsr.h | 162 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+)
 create mode 100644 src/common/fp/fpsr.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f3c59dfa..30028224 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,6 +17,7 @@ add_library(dynarmic
     common/crc32.cpp
     common/crc32.h
     common/fp_util.h
+    common/fp/fpsr.h
     common/fp/info.h
     common/fp/rounding_mode.h
     common/intrusive_list.h
diff --git a/src/common/fp/fpsr.h b/src/common/fp/fpsr.h
new file mode 100644
index 00000000..3b8ae7e0
--- /dev/null
+++ b/src/common/fp/fpsr.h
@@ -0,0 +1,162 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <boost/optional.hpp>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+/**
+ * Representation of the Floating-Point Status Register.
+ */
+class FPSR final {
+public:
+    FPSR() = default;
+    FPSR(const FPSR&) = default;
+    FPSR(FPSR&&) = default;
+    explicit FPSR(u32 data) : value{data & mask} {}
+
+    FPSR& operator=(const FPSR&) = default;
+    FPSR& operator=(FPSR&&) = default;
+    FPSR& operator=(u32 data) {
+        value = data & mask;
+        return *this;
+    }
+
+    /// Get negative condition flag
+    bool N() const {
+        return Common::Bit<31>(value);
+    }
+
+    /// Set negative condition flag
+    void N(bool N_) {
+        value = Common::ModifyBit<31>(value, N_);
+    }
+
+    /// Get zero condition flag
+    bool Z() const {
+        return Common::Bit<30>(value);
+    }
+
+    /// Set zero condition flag
+    void Z(bool Z_) {
+        value = Common::ModifyBit<30>(value, Z_);
+    }
+
+    /// Get carry condition flag
+    bool C() const {
+        return Common::Bit<29>(value);
+    }
+
+    /// Set carry condition flag
+    void C(bool C_) {
+        value = Common::ModifyBit<29>(value, C_);
+    }
+
+    /// Get overflow condition flag
+    bool V() const {
+        return Common::Bit<28>(value);
+    }
+
+    /// Set overflow condition flag
+    void V(bool V_) {
+        value = Common::ModifyBit<28>(value, V_);
+    }
+
+    /// Get cumulative saturation bit
+    bool QC() const {
+        return Common::Bit<27>(value);
+    }
+
+    /// Set cumulative saturation bit
+    void QC(bool QC_) {
+        value = Common::ModifyBit<27>(value, QC_);
+    }
+
+    /// Get input denormal floating-point exception bit
+    bool IDC() const {
+        return Common::Bit<7>(value);
+    }
+
+    /// Set input denormal floating-point exception bit
+    void IDC(bool IDC_) {
+        value = Common::ModifyBit<7>(value, IDC_);
+    }
+
+    /// Get inexact cumulative floating-point exception bit
+    bool IXC() const {
+        return Common::Bit<4>(value);
+    }
+
+    /// Set inexact cumulative floating-point exception bit
+    void IXC(bool IXC_) {
+        value = Common::ModifyBit<4>(value, IXC_);
+    }
+
+    /// Get underflow cumulative floating-point exception bit
+    bool UFC() const {
+        return Common::Bit<3>(value);
+    }
+
+    /// Set underflow cumulative floating-point exception bit
+    void UFC(bool UFC_) {
+        value = Common::ModifyBit<3>(value, UFC_);
+    }
+
+    /// Get overflow cumulative floating-point exception bit
+    bool OFC() const {
+        return Common::Bit<2>(value);
+    }
+
+    /// Set overflow cumulative floating-point exception bit
+    void OFC(bool OFC_) {
+        value = Common::ModifyBit<2>(value, OFC_);
+    }
+
+    /// Get divide by zero cumulative floating-point exception bit
+    bool DZC() const {
+        return Common::Bit<1>(value);
+    }
+
+    /// Set divide by zero cumulative floating-point exception bit
+    void DZC(bool DZC_) {
+        value = Common::ModifyBit<1>(value, DZC_);
+    }
+
+    /// Get invalid operation cumulative floating-point exception bit
+    bool IOC() const {
+        return Common::Bit<0>(value);
+    }
+
+    /// Set invalid operation cumulative floating-point exception bit
+    void IOC(bool IOC_) {
+        value = Common::ModifyBit<0>(value, IOC_);
+    }
+
+    /// Gets the underlying raw value within the FPSR.
+    u32 Value() const {
+        return value;
+    }
+
+private:
+    // Bits 5-6 and 8-26 are reserved.
+    static constexpr u32 mask = 0xF800009F;
+    u32 value = 0;
+};
+
+inline bool operator==(FPSR lhs, FPSR rhs) {
+    return lhs.Value() == rhs.Value();
+}
+
+inline bool operator!=(FPSR lhs, FPSR rhs) {
+    return !operator==(lhs, rhs);
+}
+
+} // namespace Dynarmic::FP

From 3cb98e15603ed7994390c6408831e23d46a59771 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Tue, 26 Jun 2018 20:54:42 +0100
Subject: [PATCH 08/28] fp: Move fp_util to fp/util

---
 src/CMakeLists.txt                                 |  2 +-
 src/backend_x64/emit_x64_floating_point.cpp        | 10 +++++-----
 src/backend_x64/emit_x64_vector_floating_point.cpp |  6 +++---
 src/common/{fp_util.h => fp/util.h}                |  6 ++----
 4 files changed, 11 insertions(+), 13 deletions(-)
 rename src/common/{fp_util.h => fp/util.h} (97%)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 30028224..0a722b13 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -16,10 +16,10 @@ add_library(dynarmic
     common/common_types.h
     common/crc32.cpp
     common/crc32.h
-    common/fp_util.h
     common/fp/fpsr.h
     common/fp/info.h
     common/fp/rounding_mode.h
+    common/fp/util.h
     common/intrusive_list.h
     common/iterator_util.h
     common/llvm_disassemble.cpp
diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp
index 12ac6fda..253c235d 100644
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@@ -11,7 +11,7 @@
 #include "backend_x64/emit_x64.h"
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "common/fp_util.h"
+#include "common/fp/util.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 #include "frontend/ir/opcodes.h"
@@ -120,7 +120,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
     code.movd(code.ABI_PARAM1.cvt32(), a);
     code.movd(code.ABI_PARAM2.cvt32(), b);
     code.CallFunction(static_cast<u32(*)(u32, u32)>([](u32 a, u32 b) -> u32 {
-        return *Common::ProcessNaNs(a, b);
+        return *FP::ProcessNaNs(a, b);
     }));
     code.movd(a, code.ABI_RETURN.cvt32());
     ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@@ -149,7 +149,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
     code.movd(code.ABI_PARAM2.cvt32(), b);
     code.movd(code.ABI_PARAM3.cvt32(), c);
     code.CallFunction(static_cast<u32(*)(u32, u32, u32)>([](u32 a, u32 b, u32 c) -> u32 {
-        return *Common::ProcessNaNs(a, b, c);
+        return *FP::ProcessNaNs(a, b, c);
     }));
     code.movd(a, code.ABI_RETURN.cvt32());
     ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@@ -187,7 +187,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
     code.movq(code.ABI_PARAM1, a);
     code.movq(code.ABI_PARAM2, b);
     code.CallFunction(static_cast<u64(*)(u64, u64)>([](u64 a, u64 b) -> u64 {
-        return *Common::ProcessNaNs(a, b);
+        return *FP::ProcessNaNs(a, b);
     }));
     code.movq(a, code.ABI_RETURN);
     ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@@ -213,7 +213,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
     code.movq(code.ABI_PARAM2, b);
     code.movq(code.ABI_PARAM3, c);
     code.CallFunction(static_cast<u64(*)(u64, u64, u64)>([](u64 a, u64 b, u64 c) -> u64 {
-        return *Common::ProcessNaNs(a, b, c);
+        return *FP::ProcessNaNs(a, b, c);
     }));
     code.movq(a, code.ABI_RETURN);
     ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp
index df1c7634..052b53f1 100644
--- a/src/backend_x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend_x64/emit_x64_vector_floating_point.cpp
@@ -10,7 +10,7 @@
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/bit_util.h"
-#include "common/fp_util.h"
+#include "common/fp/util.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 
@@ -69,9 +69,9 @@ static void HandleNaNs(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& xm
     code.CallFunction(static_cast<void(*)(RegArray&, const RegArray&, const RegArray&)>(
         [](RegArray& result, const RegArray& a, const RegArray& b) {
             for (size_t i = 0; i < result.size(); ++i) {
-                if (auto r = Common::ProcessNaNs(a[i], b[i])) {
+                if (auto r = FP::ProcessNaNs(a[i], b[i])) {
                     result[i] = *r;
-                } else if (Common::IsNaN(result[i])) {
+                } else if (FP::IsNaN(result[i])) {
                     result[i] = NaNWrapper<T>::value;
                 }
             }
diff --git a/src/common/fp_util.h b/src/common/fp/util.h
similarity index 97%
rename from src/common/fp_util.h
rename to src/common/fp/util.h
index 9469a223..8241dbf3 100644
--- a/src/common/fp_util.h
+++ b/src/common/fp/util.h
@@ -8,8 +8,7 @@
 
 #include <boost/optional.hpp>
 
-namespace Dynarmic {
-namespace Common {
+namespace Dynarmic::FP {
 
 /// Is 32-bit floating point value a QNaN?
 constexpr bool IsQNaN(u32 value) {
@@ -110,5 +109,4 @@ inline boost::optional<u64> ProcessNaNs(u64 a, u64 b, u64 c) {
     return boost::none;
 }
 
-} // namespace Common
-} // namespace Dynarmic
+} // namespace Dynarmic::FP

From 487565891724b9687ba153573d294621f981fd89 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Wed, 27 Jun 2018 13:51:39 +0100
Subject: [PATCH 09/28] fp: Implement FPProcessException

---
 src/CMakeLists.txt                  |  2 +
 src/common/fp/process_exception.cpp | 58 +++++++++++++++++++++++++++++
 src/common/fp/process_exception.h   | 27 ++++++++++++++
 3 files changed, 87 insertions(+)
 create mode 100644 src/common/fp/process_exception.cpp
 create mode 100644 src/common/fp/process_exception.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0a722b13..0f311f91 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,6 +18,8 @@ add_library(dynarmic
     common/crc32.h
     common/fp/fpsr.h
     common/fp/info.h
+    common/fp/process_exception.cpp
+    common/fp/process_exception.h
     common/fp/rounding_mode.h
     common/fp/util.h
     common/intrusive_list.h
diff --git a/src/common/fp/process_exception.cpp b/src/common/fp/process_exception.cpp
new file mode 100644
index 00000000..9bb5a8a6
--- /dev/null
+++ b/src/common/fp/process_exception.cpp
@@ -0,0 +1,58 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "common/assert.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/process_exception.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr) {
+    switch (exception) {
+    case FPExc::InvalidOp:
+        if (fpcr.IOE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.IOC(true);
+        break;
+    case FPExc::DivideByZero:
+        if (fpcr.DZE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.DZC(true);
+        break;
+    case FPExc::Overflow:
+        if (fpcr.OFE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.OFC(true);
+        break;
+    case FPExc::Underflow:
+        if (fpcr.UFE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.UFC(true);
+        break;
+    case FPExc::Inexact:
+        if (fpcr.IXE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.IXC(true);
+        break;
+    case FPExc::InputDenorm:
+        if (fpcr.IDE()) {
+            UNIMPLEMENTED();
+        }
+        fpsr.IDC(true);
+        break;
+    default:
+        UNREACHABLE();
+        break;
+    }
+}
+
+} // namespace Dynarmic::FP 
diff --git a/src/common/fp/process_exception.h b/src/common/fp/process_exception.h
new file mode 100644
index 00000000..637f1d77
--- /dev/null
+++ b/src/common/fp/process_exception.h
@@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/fp/fpsr.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+using FPCR = A64::FPCR;
+
+enum class FPExc {
+    InvalidOp,
+    DivideByZero,
+    Overflow,
+    Underflow,
+    Inexact,
+    InputDenorm,
+};
+
+void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP 

From 4ab029c11430bf4fb4bf4d5924556710606767f1 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Wed, 27 Jun 2018 13:52:20 +0100
Subject: [PATCH 10/28] fp: Implement FPUnpack

---
 src/CMakeLists.txt          |  2 ++
 src/common/fp/unpacked.cpp  | 54 +++++++++++++++++++++++++++++++++++++
 src/common/fp/unpacked.h    | 43 +++++++++++++++++++++++++++++
 tests/CMakeLists.txt        |  1 +
 tests/fp/unpacked_tests.cpp | 35 ++++++++++++++++++++++++
 5 files changed, 135 insertions(+)
 create mode 100644 src/common/fp/unpacked.cpp
 create mode 100644 src/common/fp/unpacked.h
 create mode 100644 tests/fp/unpacked_tests.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0f311f91..c282e8d8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -21,6 +21,8 @@ add_library(dynarmic
     common/fp/process_exception.cpp
     common/fp/process_exception.h
     common/fp/rounding_mode.h
+    common/fp/unpacked.cpp
+    common/fp/unpacked.h
     common/fp/util.h
     common/intrusive_list.h
     common/iterator_util.h
diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp
new file mode 100644
index 00000000..1432bf36
--- /dev/null
+++ b/src/common/fp/unpacked.cpp
@@ -0,0 +1,54 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "common/fp/info.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
+    constexpr size_t sign_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width;
+    constexpr size_t exponent_high_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width - 1;
+    constexpr size_t exponent_low_bit = FPInfo<FPT>::explicit_mantissa_width;
+    constexpr size_t mantissa_high_bit = FPInfo<FPT>::explicit_mantissa_width - 1;
+    constexpr size_t mantissa_low_bit = 0;
+    constexpr int denormal_exponent = FPInfo<FPT>::exponent_min - int(FPInfo<FPT>::explicit_mantissa_width);
+
+    const bool sign = Common::Bit<sign_bit>(op);
+    const FPT exp_raw = Common::Bits<exponent_low_bit, exponent_high_bit>(op);
+    const FPT frac_raw = Common::Bits<mantissa_low_bit, mantissa_high_bit>(op);
+
+    if (exp_raw == 0) {
+        if (frac_raw == 0 || fpcr.FZ()) {
+            if (frac_raw != 0) {
+                FPProcessException(FPExc::InputDenorm, fpcr, fpsr);
+            }
+            return {FPType::Zero, sign, {sign, 0, 0}};
+        }
+
+        return {FPType::Nonzero, sign, {sign, denormal_exponent, frac_raw}};
+    }
+
+    if (exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width)) {
+        if (frac_raw == 0) {
+            return {FPType::Infinity, sign, {sign, 1000000, 1}};
+        }
+
+        const bool is_quiet = Common::Bit<mantissa_high_bit>(frac_raw);
+        return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}};
+    }
+
+    const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias - FPInfo<FPT>::explicit_mantissa_width;
+    const u64 frac = frac_raw | FPInfo<FPT>::implicit_leading_bit;
+    return {FPType::Nonzero, sign, {sign, exp, frac}};
+}
+
+template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
+template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
diff --git a/src/common/fp/unpacked.h b/src/common/fp/unpacked.h
new file mode 100644
index 00000000..f815aece
--- /dev/null
+++ b/src/common/fp/unpacked.h
@@ -0,0 +1,43 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <tuple>
+
+#include "common/common_types.h"
+#include "common/fp/fpsr.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+using FPCR = A64::FPCR;
+
+enum class FPType {
+    Nonzero,
+    Zero,
+    Infinity,
+    QNaN,
+    SNaN,
+};
+
+/// value = (sign ? -1 : +1) * mantissa * 2^exponent
+template<typename MantissaT>
+struct FPUnpacked {
+    bool sign;
+    int exponent;
+    MantissaT mantissa;
+};
+
+template<typename MantissaT>
+inline bool operator==(const FPUnpacked<MantissaT>& a, const FPUnpacked<MantissaT>& b) {
+    return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa);
+}
+
+template<typename FPT>
+std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 7a915fab..2d1902ee 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -29,6 +29,7 @@ add_executable(dynarmic_tests
     A64/inst_gen.cpp
     A64/inst_gen.h
     A64/testenv.h
+    fp/unpacked_tests.cpp
     main.cpp
     rand_int.h
 )
diff --git a/tests/fp/unpacked_tests.cpp b/tests/fp/unpacked_tests.cpp
new file mode 100644
index 00000000..4c5a28b9
--- /dev/null
+++ b/tests/fp/unpacked_tests.cpp
@@ -0,0 +1,35 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <catch.hpp>
+
+#include "common/fp/unpacked.h"
+
+using namespace Dynarmic;
+using namespace Dynarmic::FP;
+
+TEST_CASE("FPUnpack Tests", "[fp]") {
+    const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked<u64>>, u32>> test_cases {
+        {0x00000000, {FPType::Zero, false, {false, 0, 0}}, 0},
+        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0},
+        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0},
+        {0x7F800001, {FPType::SNaN, false, {false, 0, 0}}, 0},
+        {0xFF800001, {FPType::SNaN, true, {true, 0, 0}}, 0},
+        {0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0},
+        {0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0},
+        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
+    };
+
+    const FPCR fpcr;
+    for (const auto& [input, expected_output, expected_fpsr] : test_cases) {
+        FPSR fpsr;
+        const auto output = FPUnpack<u32>(input, fpcr, fpsr);
+
+        INFO("Input: " << std::hex << input);
+        REQUIRE(output == expected_output);
+        REQUIRE(fpsr.Value() == expected_fpsr);
+    }
+}

From 7360a2579b3ac3c19debd2ec4115e1805b0e9547 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 15 Jul 2018 14:25:31 +0100
Subject: [PATCH 11/28] mp: Implement metaprogramming library

---
 src/CMakeLists.txt                | 11 ++++++
 src/common/mp/append.h            | 27 +++++++++++++++
 src/common/mp/bind.h              | 18 ++++++++++
 src/common/mp/cartesian_product.h | 51 +++++++++++++++++++++++++++
 src/common/mp/concat.h            | 57 +++++++++++++++++++++++++++++++
 src/common/mp/fapply.h            | 27 +++++++++++++++
 src/common/mp/fmap.h              | 27 +++++++++++++++
 src/common/mp/integer.h           | 51 +++++++++++++++++++++++++++
 src/common/mp/list.h              | 15 ++++++++
 src/common/mp/lut.h               | 23 +++++++++++++
 src/common/mp/to_tuple.h          | 29 ++++++++++++++++
 src/common/mp/vlift.h             | 17 +++++++++
 src/common/mp/vllift.h            | 31 +++++++++++++++++
 tests/CMakeLists.txt              |  1 +
 tests/mp.cpp                      | 27 +++++++++++++++
 15 files changed, 412 insertions(+)
 create mode 100644 src/common/mp/append.h
 create mode 100644 src/common/mp/bind.h
 create mode 100644 src/common/mp/cartesian_product.h
 create mode 100644 src/common/mp/concat.h
 create mode 100644 src/common/mp/fapply.h
 create mode 100644 src/common/mp/fmap.h
 create mode 100644 src/common/mp/integer.h
 create mode 100644 src/common/mp/list.h
 create mode 100644 src/common/mp/lut.h
 create mode 100644 src/common/mp/to_tuple.h
 create mode 100644 src/common/mp/vlift.h
 create mode 100644 src/common/mp/vllift.h
 create mode 100644 tests/mp.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c282e8d8..38ad8601 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -33,6 +33,17 @@ add_library(dynarmic
     common/memory_pool.cpp
     common/memory_pool.h
     common/mp.h
+    common/mp/append.h
+    common/mp/bind.h
+    common/mp/cartesian_product.h
+    common/mp/concat.h
+    common/mp/fapply.h
+    common/mp/fmap.h
+    common/mp/list.h
+    common/mp/lut.h
+    common/mp/to_tuple.h
+    common/mp/vlift.h
+    common/mp/vllift.h
     common/safe_ops.h
     common/scope_exit.h
     common/sm4.cpp
diff --git a/src/common/mp/append.h b/src/common/mp/append.h
new file mode 100644
index 00000000..e6ad75ed
--- /dev/null
+++ b/src/common/mp/append.h
@@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class... L>
+struct append_impl;
+
+template<template<class...> class LT, class... T1, class... T2>
+struct append_impl<LT<T1...>, T2...> {
+    using type = LT<T1..., T2...>;
+};
+
+} // namespace detail
+
+/// Append items T to list L
+template<class L, class... T>
+using append = typename detail::append_impl<L, T...>::type;
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/bind.h b/src/common/mp/bind.h
new file mode 100644
index 00000000..3666ea5b
--- /dev/null
+++ b/src/common/mp/bind.h
@@ -0,0 +1,18 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+/// Binds the first sizeof...(A) arguments of metafunction F with arguments A
+template<template<class...> class F, class... A>
+struct bind {
+    template<class... T>
+    using type = F<A..., T...>;
+};
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/cartesian_product.h b/src/common/mp/cartesian_product.h
new file mode 100644
index 00000000..919c7eef
--- /dev/null
+++ b/src/common/mp/cartesian_product.h
@@ -0,0 +1,51 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/mp/append.h"
+#include "common/mp/bind.h"
+#include "common/mp/concat.h"
+#include "common/mp/fmap.h"
+#include "common/mp/list.h"
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class... Ls>
+struct cartesian_product_impl{};
+
+template<class RL>
+struct cartesian_product_impl<RL> {
+    using type = RL;
+};
+
+template<template<class...> class LT, class... RT, class... T1>
+struct cartesian_product_impl<LT<RT...>, LT<T1...>> {
+    using type = concat<
+        fmap<bind<append, RT>::template type, list<T1...>>...
+    >;
+};
+
+template<class RL, class L1, class L2, class... Ls>
+struct cartesian_product_impl<RL, L1, L2, Ls...> {
+    using type = typename cartesian_product_impl<
+        typename cartesian_product_impl<RL, L1>::type,
+        L2,
+        Ls...
+    >::type;
+};
+
+} // namespace detail
+
+/// Produces the cartesian product of a set of lists
+/// For example: 
+/// cartesian_product<list<A, B>, list<D, E>> == list<list<A, D>, list<A, E>, list<B, D>, list<B, E>
+template<typename L1, typename... Ls>
+using cartesian_product = typename detail::cartesian_product_impl<fmap<list, L1>, Ls...>::type;
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/concat.h b/src/common/mp/concat.h
new file mode 100644
index 00000000..e41dd9e6
--- /dev/null
+++ b/src/common/mp/concat.h
@@ -0,0 +1,57 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/mp/list.h"
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class... L>
+struct concat_impl;
+
+template<>
+struct concat_impl<> {
+    using type = list<>;
+};
+
+template<class L>
+struct concat_impl<L> {
+    using type = L;
+};
+
+template<template<class...> class LT, class... T1, class... T2, class... Ls>
+struct concat_impl<LT<T1...>, LT<T2...>, Ls...> {
+    using type = typename concat_impl<LT<T1..., T2...>, Ls...>::type;
+};
+
+template<template<class...> class LT,
+         class... T1, class... T2, class... T3, class... T4, class... T5, class... T6, class... T7, class... T8,
+         class... T9, class... T10, class... T11, class... T12, class... T13, class... T14, class... T15, class... T16,
+         class... Ls>
+struct concat_impl<
+        LT<T1...>, LT<T2...>, LT<T3...>, LT<T4...>, LT<T5...>, LT<T6...>, LT<T7...>, LT<T8...>,
+        LT<T9...>, LT<T10...>, LT<T11...>, LT<T12...>, LT<T13...>, LT<T14...>, LT<T15...>, LT<T16...>,
+        Ls...>
+{
+    using type = typename concat_impl<
+        LT<
+            T1..., T2..., T3..., T4..., T5..., T6..., T7..., T8...,
+            T9..., T10..., T11..., T12..., T13..., T14..., T15..., T16...
+        >,
+        Ls...
+    >::type;
+};
+
+} // namespace detail
+
+/// Concatenate lists together
+template<class... L>
+using concat = typename detail::concat_impl<L...>::type;
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/fapply.h b/src/common/mp/fapply.h
new file mode 100644
index 00000000..1b84efc4
--- /dev/null
+++ b/src/common/mp/fapply.h
@@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<template<class...> class F, class L>
+struct fapply_impl;
+
+template<template<class...> class F, template<class...> class LT, class... T>
+struct fapply_impl<F, LT<T...>> {
+    using type = F<T...>;
+};
+
+} // namespace detail
+
+/// Invokes metafunction F where the arguments are all the members of list L
+template<template<class...> class F, class L>
+using fapply = typename detail::fapply_impl<F, L>::type;
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/fmap.h b/src/common/mp/fmap.h
new file mode 100644
index 00000000..d05766a6
--- /dev/null
+++ b/src/common/mp/fmap.h
@@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<template<class...> class F, class L>
+struct fmap_impl;
+
+template<template<class...> class F, template<class...> class LT, class... T>
+struct fmap_impl<F, LT<T...>> {
+    using type = LT<F<T>...>;
+};
+
+} // namespace detail
+
+/// Metafunction that applies each element of list L to metafunction F
+template<template<class...> class F, class L>
+using fmap = typename detail::fmap_impl<F, L>::type;
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/integer.h b/src/common/mp/integer.h
new file mode 100644
index 00000000..ee9f6201
--- /dev/null
+++ b/src/common/mp/integer.h
@@ -0,0 +1,51 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<std::size_t size>
+struct integer_of_size_impl{};
+
+template<>
+struct integer_of_size_impl<8> {
+    using unsigned_type = std::uint8_t;
+    using signed_type = std::int8_t;
+};
+
+template<>
+struct integer_of_size_impl<16> {
+    using unsigned_type = std::uint16_t;
+    using signed_type = std::int16_t;
+};
+
+template<>
+struct integer_of_size_impl<32> {
+    using unsigned_type = std::uint32_t;
+    using signed_type = std::int32_t;
+};
+
+template<>
+struct integer_of_size_impl<64> {
+    using unsigned_type = std::uint64_t;
+    using signed_type = std::int64_t;
+};
+
+} // namespace detail
+
+template<std::size_t size>
+using unsigned_integer_of_size = typename detail::integer_of_size_impl<size>::unsigned_type;
+
+template<std::size_t size>
+using signed_integer_of_size = typename detail::integer_of_size_impl<size>::signed_type;
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/list.h b/src/common/mp/list.h
new file mode 100644
index 00000000..96c00697
--- /dev/null
+++ b/src/common/mp/list.h
@@ -0,0 +1,15 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::Common::mp {
+
+/// Contains a list of types
+template<class... T>
+struct list {};
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/lut.h b/src/common/mp/lut.h
new file mode 100644
index 00000000..5d644197
--- /dev/null
+++ b/src/common/mp/lut.h
@@ -0,0 +1,23 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <array>
+#include <map>
+#include <type_traits>
+
+#include "common/mp/list.h"
+
+namespace Dynarmic::Common::mp {
+
+template <typename KeyT, typename ValueT, typename Function, typename ...Values>
+inline auto GenerateLookupTableFromList(Function f, list<Values...>) {
+    static const std::array<std::pair<KeyT, ValueT>, sizeof...(Values)> pair_array{f(Values{})...};
+    return std::map<KeyT, ValueT>(pair_array.begin(), pair_array.end());
+}
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/to_tuple.h b/src/common/mp/to_tuple.h
new file mode 100644
index 00000000..1e782a1d
--- /dev/null
+++ b/src/common/mp/to_tuple.h
@@ -0,0 +1,29 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <tuple>
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class L>
+struct to_tuple_impl;
+
+template<template<class...> class LT, class... T>
+struct to_tuple_impl<LT<T...>> {
+    static constexpr auto value = std::make_tuple(static_cast<typename T::value_type>(T::value)...);
+};
+
+} // namespace detail
+
+/// Metafunction that converts a list of metavalues to a tuple value.
+template<class L>
+constexpr auto to_tuple = detail::to_tuple_impl<L>::value;
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/vlift.h b/src/common/mp/vlift.h
new file mode 100644
index 00000000..c46874d3
--- /dev/null
+++ b/src/common/mp/vlift.h
@@ -0,0 +1,17 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <type_traits>
+
+namespace Dynarmic::Common::mp {
+
+/// Lifts a value into a type
+template<auto V>
+using vlift = std::integral_constant<decltype(V), V>;
+
+} // namespace Dynarmic::Common::mp
diff --git a/src/common/mp/vllift.h b/src/common/mp/vllift.h
new file mode 100644
index 00000000..25eb323d
--- /dev/null
+++ b/src/common/mp/vllift.h
@@ -0,0 +1,31 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <type_traits>
+
+#include "common/mp/list.h"
+
+namespace Dynarmic::Common::mp {
+
+namespace detail {
+
+template<class VL>
+struct vllift_impl{};
+
+template<class T, T... values>
+struct vllift_impl<std::integer_sequence<T, values...>> {
+    using type = list<std::integral_constant<T, values>...>;
+};
+
+} // namespace detail
+
+/// Lifts values in value list VL to create a type list.
+template<class VL>
+using vllift = typename detail::vllift_impl<VL>::type;
+
+} // namespace Dynarmic::Common::mp
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 2d1902ee..576bf9e6 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -31,6 +31,7 @@ add_executable(dynarmic_tests
     A64/testenv.h
     fp/unpacked_tests.cpp
     main.cpp
+    mp.cpp
     rand_int.h
 )
 
diff --git a/tests/mp.cpp b/tests/mp.cpp
new file mode 100644
index 00000000..7f34a525
--- /dev/null
+++ b/tests/mp.cpp
@@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <type_traits>
+
+#include "common/mp/cartesian_product.h"
+
+using namespace Dynarmic::Common::mp;
+
+static_assert(
+    std::is_same_v<
+        cartesian_product<list<int, bool>, list<double, float>, list<char, unsigned>>,
+        list<
+            list<int, double, char>,
+            list<int, double, unsigned>,
+            list<int, float, char>,
+            list<int, float, unsigned>,
+            list<bool, double, char>,
+            list<bool, double, unsigned>,
+            list<bool, float, char>,
+            list<bool, float, unsigned>
+        >
+    >
+);

From 55d590c01fc50657c2e6e8b4cdf9bf5cdaf879bd Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Thu, 28 Jun 2018 21:45:48 +0100
Subject: [PATCH 12/28] FPCR: Add AHP setter and FZ16 getter

---
 src/frontend/A64/FPCR.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/frontend/A64/FPCR.h b/src/frontend/A64/FPCR.h
index 9e82733e..1ebf1a56 100644
--- a/src/frontend/A64/FPCR.h
+++ b/src/frontend/A64/FPCR.h
@@ -37,6 +37,11 @@ public:
         return Common::Bit<26>(value);
     }
 
+    /// Alternate half-precision control flag.
+    void AHP(bool AHP_) {
+        value = Common::ModifyBit<26>(value, AHP_);
+    }
+
     /// Default NaN mode control bit.
     bool DN() const {
         return Common::Bit<25>(value);
@@ -52,6 +57,10 @@ public:
         return static_cast<FP::RoundingMode>(Common::Bits<22, 23>(value));
     }
 
+    bool FZ16() const {
+        return Common::Bit<19>(value);
+    }
+
     /// Input denormal exception trap enable flag.
     bool IDE() const {
         return Common::Bit<15>(value);

From 8668d6188163c64b58d33ce2709bb6f0d6dace9e Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Thu, 28 Jun 2018 21:46:02 +0100
Subject: [PATCH 13/28] fp/unpacked: Implement FPRound

---
 src/common/fp/unpacked.cpp | 125 +++++++++++++++++++++++++++++++++++++
 src/common/fp/unpacked.h   |  14 +++++
 2 files changed, 139 insertions(+)

diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp
index 1432bf36..6f145723 100644
--- a/src/common/fp/unpacked.cpp
+++ b/src/common/fp/unpacked.cpp
@@ -7,6 +7,7 @@
 #include "common/fp/info.h"
 #include "common/fp/process_exception.h"
 #include "common/fp/unpacked.h"
+#include "common/safe_ops.h"
 
 namespace Dynarmic::FP {
 
@@ -51,4 +52,128 @@ std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr
 template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
 template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
 
+template<size_t F, typename MantissaT>
+std::tuple<bool, int, MantissaT, MantissaT> Normalize(FPUnpacked<MantissaT> op) {
+    const int highest_set_bit = Common::HighestSetBit(op.mantissa);
+    const int shift_amount = highest_set_bit - static_cast<int>(F);
+    const MantissaT mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount);
+    const MantissaT error = Safe::LogicalShiftRightDouble(op.mantissa, static_cast<MantissaT>(0), shift_amount);
+    const int exponent = op.exponent + highest_set_bit;
+    return std::make_tuple(op.sign, exponent, mantissa, error);
+}
+
+template<typename FPT, typename MantissaT>
+FPT FPRoundBase(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    ASSERT(op.mantissa != 0);
+    ASSERT(rounding != RoundingMode::ToNearest_TieAwayFromZero);
+
+    constexpr int minimum_exp = FPInfo<FPT>::exponent_min;
+    constexpr size_t E = FPInfo<FPT>::exponent_width;
+    constexpr size_t F = FPInfo<FPT>::explicit_mantissa_width;
+    constexpr bool isFP16 = FPInfo<FPT>::total_width == 16;
+
+    auto [sign, exponent, mantissa, error] = Normalize<F>(op);
+
+    if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) {
+        fpsr.UFC(true);
+        return FPInfo<FPT>::Zero(sign);
+    }
+
+    int biased_exp = std::max<int>(exponent - minimum_exp + 1, 0);
+    if (biased_exp == 0) {
+        error = Safe::LogicalShiftRightDouble(mantissa, error, minimum_exp - exponent);
+        mantissa = Safe::LogicalShiftRight(mantissa, minimum_exp - exponent);
+    }
+
+    if (biased_exp == 0 && (error != 0 || fpcr.UFE())) {
+        FPProcessException(FPExc::Underflow, fpcr, fpsr);
+    }
+
+    bool round_up = false, overflow_to_inf = false;
+    switch (rounding) {
+    case RoundingMode::ToNearest_TieEven: {
+        constexpr MantissaT half = static_cast<MantissaT>(1) << (Common::BitSize<MantissaT>() - 1);
+        round_up = (error > half) || (error == half && Common::Bit<0>(mantissa));
+        overflow_to_inf = true;
+        break;
+    }
+    case RoundingMode::TowardsPlusInfinity:
+        round_up = error != 0 && !sign;
+        overflow_to_inf = !sign;
+        break;
+    case RoundingMode::TowardsMinusInfinity:
+        round_up = error != 0 && sign;
+        overflow_to_inf = sign;
+        break;
+    default:
+        break;
+    }
+
+    if (round_up) {
+        if ((mantissa & FPInfo<FPT>::mantissa_mask) == FPInfo<FPT>::mantissa_mask) {
+            // Overflow on rounding up is going to happen
+            if (mantissa == FPInfo<FPT>::mantissa_mask) {
+                // Rounding up from denormal to normal
+                mantissa++;
+                biased_exp++;
+            } else {
+                // Rounding up to next exponent
+                mantissa = (mantissa + 1) / 2;
+                biased_exp++;
+            }
+        } else {
+            mantissa++;
+        }
+    }
+
+    if (error != 0 && rounding == RoundingMode::ToOdd) {
+        mantissa = Common::ModifyBit<0>(mantissa, true);
+    }
+
+    FPT result = 0;
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4127) // C4127: conditional expression is constant
+#endif
+    if (!isFP16 || !fpcr.AHP()) {
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+        constexpr int max_biased_exp = (1 << E) - 1;
+        if (biased_exp >= max_biased_exp) {
+            result = overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
+            FPProcessException(FPExc::Overflow, fpcr, fpsr);
+            FPProcessException(FPExc::Inexact, fpcr, fpsr);
+        } else {
+            result = sign ? 1 : 0;
+            result <<= E;
+            result += biased_exp;
+            result <<= F;
+            result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
+            if (error != 0) {
+                FPProcessException(FPExc::Inexact, fpcr, fpsr);
+            }
+        }
+    } else {
+        constexpr int max_biased_exp = (1 << E);
+        if (biased_exp >= max_biased_exp) {
+            result = sign ? 0xFFFF : 0x7FFF;
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        } else {
+            result = sign ? 1 : 0;
+            result <<= E;
+            result += biased_exp;
+            result <<= F;
+            result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
+            if (error != 0) {
+                FPProcessException(FPExc::Inexact, fpcr, fpsr);
+            }
+        }
+    }
+    return result;
+}
+
+template u32 FPRoundBase<u32, u64>(FPUnpacked<u64> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+template u64 FPRoundBase<u64, u64>(FPUnpacked<u64> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
 } // namespace Dynarmic::FP
diff --git a/src/common/fp/unpacked.h b/src/common/fp/unpacked.h
index f815aece..af8513e6 100644
--- a/src/common/fp/unpacked.h
+++ b/src/common/fp/unpacked.h
@@ -40,4 +40,18 @@ inline bool operator==(const FPUnpacked<MantissaT>& a, const FPUnpacked<Mantissa
 template<typename FPT>
 std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr);
 
+template<typename FPT, typename MantissaT>
+FPT FPRoundBase(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+template<typename FPT, typename MantissaT>
+FPT FPRound(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    fpcr.AHP(false);
+    return FPRoundBase<FPT, MantissaT>(op, fpcr, rounding, fpsr);
+}
+
+template<typename FPT, typename MantissaT>
+FPT FPRound(FPUnpacked<MantissaT> op, FPCR fpcr, FPSR& fpsr) {
+    return FPRound<FPT, MantissaT>(op, fpcr, fpcr.RMode(), fpsr);
+}
+
 } // namespace Dynarmic::FP

From 52ed365158b9c785352579966d466b1cfd26534e Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Thu, 28 Jun 2018 21:46:16 +0100
Subject: [PATCH 14/28] tests/fp: Add FPRound tests

---
 tests/fp/unpacked_tests.cpp | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tests/fp/unpacked_tests.cpp b/tests/fp/unpacked_tests.cpp
index 4c5a28b9..f17fc99c 100644
--- a/tests/fp/unpacked_tests.cpp
+++ b/tests/fp/unpacked_tests.cpp
@@ -7,6 +7,7 @@
 #include <catch.hpp>
 
 #include "common/fp/unpacked.h"
+#include "rand_int.h"
 
 using namespace Dynarmic;
 using namespace Dynarmic::FP;
@@ -21,6 +22,7 @@ TEST_CASE("FPUnpack Tests", "[fp]") {
         {0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0},
         {0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0},
         {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
+        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
     };
 
     const FPCR fpcr;
@@ -33,3 +35,37 @@ TEST_CASE("FPUnpack Tests", "[fp]") {
         REQUIRE(fpsr.Value() == expected_fpsr);
     }
 }
+
+TEST_CASE("FPRound Tests", "[fp]") {
+    const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked<u64>>, u32>> test_cases {
+        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0x14},
+        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0x14},
+        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
+        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
+        {0x3F800000, {FPType::Nonzero, false, {false, -28, 0xFFFFFFF}}, 0x10}, // rounds to 1.0
+    };
+
+    const FPCR fpcr;
+    for (const auto& [expected_output, input, expected_fpsr] : test_cases) {
+        FPSR fpsr;
+        const auto output = FPRound<u32>(std::get<2>(input), fpcr, fpsr);
+
+        INFO("Expected Output: " << std::hex << expected_output);
+        REQUIRE(output == expected_output);
+        REQUIRE(fpsr.Value() == expected_fpsr);
+    }
+}
+
+TEST_CASE("FPUnpack<->FPRound Round-trip Tests", "[fp]") {
+    const FPCR fpcr;
+    for (size_t count = 0; count < 100000; count++) {
+        FPSR fpsr;
+        const u32 input = RandInt(0, 1) == 0 ? RandInt<u32>(0x00000001, 0x7F800000) : RandInt<u32>(0x80000001, 0xFF800000);
+        const auto intermediate = std::get<2>(FPUnpack<u32>(input, fpcr, fpsr));
+        const u32 output = FPRound<u32>(intermediate, fpcr, fpsr);
+
+        INFO("Count: " << count);
+        INFO("Intermediate Values: " << std::hex << intermediate.sign << ';' << intermediate.exponent << ';' << intermediate.mantissa);
+        REQUIRE(input == output);
+    }
+}

From 8087e8df05fcb90a0050e2ce3c6c97589890fdd5 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Fri, 29 Jun 2018 18:52:48 +0100
Subject: [PATCH 15/28] mantissa_util: Implement ResidualErrorOnRightShift

Accurately calculate residual error that is shifted out
---
 src/CMakeLists.txt               |  1 +
 src/common/fp/mantissa_util.h    | 48 ++++++++++++++++++++++++
 tests/CMakeLists.txt             |  1 +
 tests/fp/mantissa_util_tests.cpp | 63 ++++++++++++++++++++++++++++++++
 4 files changed, 113 insertions(+)
 create mode 100644 src/common/fp/mantissa_util.h
 create mode 100644 tests/fp/mantissa_util_tests.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 38ad8601..daf5b7d8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,6 +18,7 @@ add_library(dynarmic
     common/crc32.h
     common/fp/fpsr.h
     common/fp/info.h
+    common/fp/mantissa_util.h
     common/fp/process_exception.cpp
     common/fp/process_exception.h
     common/fp/rounding_mode.h
diff --git a/src/common/fp/mantissa_util.h b/src/common/fp/mantissa_util.h
new file mode 100644
index 00000000..2551c40c
--- /dev/null
+++ b/src/common/fp/mantissa_util.h
@@ -0,0 +1,48 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+enum class ResidualError {
+    Zero,
+    LessThanHalf,
+    Half,
+    GreaterThanHalf,
+};
+
+template<typename MantissaT>
+ResidualError ResidualErrorOnRightShift(MantissaT mantissa, int shift_amount) {
+    if (shift_amount <= 0 || mantissa == 0) {
+        return ResidualError::Zero;
+    }
+
+    if (shift_amount > static_cast<int>(Common::BitSize<MantissaT>())) {
+        return Common::MostSignificantBit(mantissa) ? ResidualError::GreaterThanHalf : ResidualError::LessThanHalf;
+    }
+
+    const size_t half_bit_position = static_cast<size_t>(shift_amount - 1);
+    const MantissaT half = static_cast<MantissaT>(1) << half_bit_position;
+    const MantissaT error_mask = Common::Ones<MantissaT>(static_cast<size_t>(shift_amount));
+    const MantissaT error = mantissa & error_mask;
+
+    if (error == 0) {
+        return ResidualError::Zero;
+    }
+    if (error < half) {
+        return ResidualError::LessThanHalf;
+    }
+    if (error == half) {
+        return ResidualError::Half;
+    }
+    return ResidualError::GreaterThanHalf;
+}
+
+} // namespace Dynarmic::FP 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 576bf9e6..e48e18c1 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -29,6 +29,7 @@ add_executable(dynarmic_tests
     A64/inst_gen.cpp
     A64/inst_gen.h
     A64/testenv.h
+    fp/mantissa_util_tests.cpp
     fp/unpacked_tests.cpp
     main.cpp
     mp.cpp
diff --git a/tests/fp/mantissa_util_tests.cpp b/tests/fp/mantissa_util_tests.cpp
new file mode 100644
index 00000000..04703e1d
--- /dev/null
+++ b/tests/fp/mantissa_util_tests.cpp
@@ -0,0 +1,63 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <tuple>
+#include <vector>
+
+#include <catch.hpp>
+
+#include "common/fp/mantissa_util.h"
+#include "common/safe_ops.h"
+#include "rand_int.h"
+
+using namespace Dynarmic;
+using namespace Dynarmic::FP;
+
+TEST_CASE("ResidualErrorOnRightShift", "[fp]") {
+    const std::vector<std::tuple<u32, int, ResidualError>> test_cases {
+        {0x00000001, 1, ResidualError::Half},
+        {0x00000002, 1, ResidualError::Zero},
+        {0x00000001, 2, ResidualError::LessThanHalf},
+        {0x00000002, 2, ResidualError::Half},
+        {0x00000003, 2, ResidualError::GreaterThanHalf},
+        {0x00000004, 2, ResidualError::Zero},
+        {0x00000005, 2, ResidualError::LessThanHalf},
+        {0x00000006, 2, ResidualError::Half},
+        {0x00000007, 2, ResidualError::GreaterThanHalf},
+    };
+
+    for (auto [mantissa, shift, expected_result] : test_cases) {
+        const ResidualError result = ResidualErrorOnRightShift(mantissa, shift);
+        REQUIRE(result == expected_result);
+    }
+}
+
+TEST_CASE("ResidualErrorOnRightShift Randomized", "[fp]") {
+    for (size_t test = 0; test < 100000; test++) {
+        const u32 mantissa = RandInt<u32>(0, 0xFFFFFFFF);
+        const int shift = RandInt<int>(-60, 60);
+
+        const ResidualError result = ResidualErrorOnRightShift(mantissa, shift);
+
+        const u64 calculated_error = Safe::ArithmeticShiftRightDouble(Common::SignExtend<32, u64>(mantissa), u64(0), shift);
+        const ResidualError expected_result = [&]{
+            constexpr u64 half_error = 0x8000'0000'0000'0000ull;
+            if (calculated_error == 0) {
+                return ResidualError::Zero;
+            }
+            if (calculated_error < half_error) {
+                return ResidualError::LessThanHalf;
+            }
+            if (calculated_error == half_error) {
+                return ResidualError::Half;
+            }
+            return ResidualError::GreaterThanHalf;
+        }();
+
+        INFO(std::hex << "mantissa " << mantissa << " shift " << shift << " calculated_error " << calculated_error);
+        REQUIRE(result == expected_result);
+    }
+}

From 95712695529de970684e3c78d9f898ee02bbcd15 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Fri, 29 Jun 2018 19:34:46 +0100
Subject: [PATCH 16/28] fp/op: Implement FPToFixed

---
 src/CMakeLists.txt     |   2 +
 src/common/fp/op.cpp   | 101 +++++++++++++++++++++++++++++++++++++++++
 src/common/fp/op.h     |  21 +++++++++
 tests/CMakeLists.txt   |   1 +
 tests/fp/FPToFixed.cpp |  38 ++++++++++++++++
 5 files changed, 163 insertions(+)
 create mode 100644 src/common/fp/op.cpp
 create mode 100644 src/common/fp/op.h
 create mode 100644 tests/fp/FPToFixed.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index daf5b7d8..32a2095d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -19,6 +19,8 @@ add_library(dynarmic
     common/fp/fpsr.h
     common/fp/info.h
     common/fp/mantissa_util.h
+    common/fp/op.cpp
+    common/fp/op.h
     common/fp/process_exception.cpp
     common/fp/process_exception.h
     common/fp/rounding_mode.h
diff --git a/src/common/fp/op.cpp b/src/common/fp/op.cpp
new file mode 100644
index 00000000..38d9ec91
--- /dev/null
+++ b/src/common/fp/op.cpp
@@ -0,0 +1,101 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/safe_ops.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/mantissa_util.h"
+#include "common/fp/op.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/rounding_mode.h"
+#include "common/fp/unpacked.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    ASSERT(rounding != RoundingMode::ToOdd);
+    ASSERT(ibits <= 64);
+    ASSERT(fbits <= ibits);
+
+    auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
+
+    if (type == FPType::SNaN || type == FPType::QNaN) {
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+    }
+
+    // Handle zero
+    if (value.mantissa == 0) {
+        return 0;
+    }
+
+    if (sign && unsigned_) {
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        return 0;
+    }
+
+    // value *= 2.0^fbits
+    value.exponent += static_cast<int>(fbits);
+
+    u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
+    const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
+    int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
+
+    bool round_up = false;
+    switch (rounding) {
+    case RoundingMode::ToNearest_TieEven:
+        round_up = error > ResidualError::Half || (error == ResidualError::Half && Common::Bit<0>(int_result));
+        break;
+    case RoundingMode::TowardsPlusInfinity:
+        round_up = error != ResidualError::Zero;
+        break;
+    case RoundingMode::TowardsMinusInfinity:
+        round_up = false;
+        break;
+    case RoundingMode::TowardsZero:
+        round_up = error != ResidualError::Zero && Common::MostSignificantBit(int_result);
+        break;
+    case RoundingMode::ToNearest_TieAwayFromZero:
+        round_up = error > ResidualError::Half || (error == ResidualError::Half && !Common::MostSignificantBit(int_result));
+        break;
+    case RoundingMode::ToOdd:
+        UNREACHABLE();
+    }
+
+    if (round_up) {
+        int_result++;
+    }
+
+    // Detect Overflow
+    const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1);
+    if (value.exponent >= min_exponent_for_overflow) {
+        // Positive overflow
+        if (unsigned_ || !sign) {
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+            return Common::Ones<u64>(ibits - (unsigned_ ? 0 : 1));
+        }
+
+        // Negative overflow
+        const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1));
+        if (!(value.exponent == min_exponent_for_overflow && int_result == min_value)) {
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+            return static_cast<u64>(1) << (ibits - 1);
+        }
+    }
+
+    if (error != ResidualError::Zero) {
+        FPProcessException(FPExc::Inexact, fpcr, fpsr);
+    }
+    return int_result & Common::Ones<u64>(ibits);
+}
+
+template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+} // namespace Dynarmic::FP 
diff --git a/src/common/fp/op.h b/src/common/fp/op.h
new file mode 100644
index 00000000..435070b0
--- /dev/null
+++ b/src/common/fp/op.h
@@ -0,0 +1,21 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include "common/common_types.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/rounding_mode.h"
+#include "frontend/A64/FPCR.h"
+
+namespace Dynarmic::FP {
+
+using FPCR = A64::FPCR;
+
+template<typename FPT>
+u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+} // namespace Dynarmic::FP 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index e48e18c1..7680dc4a 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -29,6 +29,7 @@ add_executable(dynarmic_tests
     A64/inst_gen.cpp
     A64/inst_gen.h
     A64/testenv.h
+    fp/FPToFixed.cpp
     fp/mantissa_util_tests.cpp
     fp/unpacked_tests.cpp
     main.cpp
diff --git a/tests/fp/FPToFixed.cpp b/tests/fp/FPToFixed.cpp
new file mode 100644
index 00000000..1a507dfb
--- /dev/null
+++ b/tests/fp/FPToFixed.cpp
@@ -0,0 +1,38 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <tuple>
+#include <vector>
+
+#include <catch.hpp>
+
+#include "common/fp/fpsr.h"
+#include "common/fp/op.h"
+#include "rand_int.h"
+
+using namespace Dynarmic;
+using namespace Dynarmic::FP;
+
+TEST_CASE("FPToFixed", "[fp]") {
+    const std::vector<std::tuple<u32, size_t, u64, u32>> test_cases {
+        {0x447A0000, 64, 0x000003E8, 0x00},
+        {0xC47A0000, 32, 0xFFFFFC18, 0x00},
+        {0x4479E000, 64, 0x000003E8, 0x10},
+        {0x50800000, 32, 0x7FFFFFFF, 0x01},
+        {0xD0800000, 32, 0x80000000, 0x01},
+        {0xCF000000, 32, 0x80000000, 0x00},
+        {0x80002B94, 64, 0x00000000, 0x10},
+        {0x80636D24, 64, 0x00000000, 0x10},
+    };
+
+    const FPCR fpcr;
+    for (auto [input, ibits, expected_output, expected_fpsr] : test_cases) {
+        FPSR fpsr;
+        const u64 output = FPToFixed<u32>(ibits, input, 0, false, fpcr, RoundingMode::ToNearest_TieEven, fpsr);
+        REQUIRE(output == expected_output);
+        REQUIRE(fpsr.Value() == expected_fpsr);
+    }
+}

From 760cc3ca893ce8dc36cb0a6d6ce287fad4007846 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 15 Jul 2018 14:23:50 +0100
Subject: [PATCH 17/28] EmitContext: Expose FPCR

---
 src/backend_x64/a32_emit_x64.cpp | 4 ++++
 src/backend_x64/a32_emit_x64.h   | 1 +
 src/backend_x64/a64_emit_x64.cpp | 4 ++++
 src/backend_x64/a64_emit_x64.h   | 1 +
 src/backend_x64/emit_x64.h       | 1 +
 5 files changed, 11 insertions(+)

diff --git a/src/backend_x64/a32_emit_x64.cpp b/src/backend_x64/a32_emit_x64.cpp
index bf9fadcf..c027761d 100644
--- a/src/backend_x64/a32_emit_x64.cpp
+++ b/src/backend_x64/a32_emit_x64.cpp
@@ -62,6 +62,10 @@ FP::RoundingMode A32EmitContext::FPSCR_RMode() const {
     return Location().FPSCR().RMode();
 }
 
+u32 A32EmitContext::FPCR() const {
+    return Location().FPSCR().Value();
+}
+
 bool A32EmitContext::FPSCR_RoundTowardsZero() const {
     return Location().FPSCR().RMode() != FP::RoundingMode::TowardsZero;
 }
diff --git a/src/backend_x64/a32_emit_x64.h b/src/backend_x64/a32_emit_x64.h
index bec1663d..254d4a9e 100644
--- a/src/backend_x64/a32_emit_x64.h
+++ b/src/backend_x64/a32_emit_x64.h
@@ -24,6 +24,7 @@ struct A32EmitContext final : public EmitContext {
     A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
     A32::LocationDescriptor Location() const;
     FP::RoundingMode FPSCR_RMode() const override;
+    u32 FPCR() const override;
     bool FPSCR_RoundTowardsZero() const override;
     bool FPSCR_FTZ() const override;
     bool FPSCR_DN() const override;
diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp
index 2c30b9f7..70e41b28 100644
--- a/src/backend_x64/a64_emit_x64.cpp
+++ b/src/backend_x64/a64_emit_x64.cpp
@@ -44,6 +44,10 @@ FP::RoundingMode A64EmitContext::FPSCR_RMode() const {
     return Location().FPCR().RMode();
 }
 
+u32 A64EmitContext::FPCR() const {
+    return Location().FPCR().Value();
+}
+
 bool A64EmitContext::FPSCR_RoundTowardsZero() const {
     return Location().FPCR().RMode() != FP::RoundingMode::TowardsZero;
 }
diff --git a/src/backend_x64/a64_emit_x64.h b/src/backend_x64/a64_emit_x64.h
index 2ecf0883..e251a963 100644
--- a/src/backend_x64/a64_emit_x64.h
+++ b/src/backend_x64/a64_emit_x64.h
@@ -24,6 +24,7 @@ struct A64EmitContext final : public EmitContext {
     A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
     A64::LocationDescriptor Location() const;
     FP::RoundingMode FPSCR_RMode() const override;
+    u32 FPCR() const override;
     bool FPSCR_RoundTowardsZero() const override;
     bool FPSCR_FTZ() const override;
     bool FPSCR_DN() const override;
diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h
index 47f47bd3..64f66967 100644
--- a/src/backend_x64/emit_x64.h
+++ b/src/backend_x64/emit_x64.h
@@ -35,6 +35,7 @@ struct EmitContext {
     void EraseInstruction(IR::Inst* inst);
 
     virtual FP::RoundingMode FPSCR_RMode() const = 0;
+    virtual u32 FPCR() const = 0;
     virtual bool FPSCR_RoundTowardsZero() const = 0;
     virtual bool FPSCR_FTZ() const = 0;
     virtual bool FPSCR_DN() const = 0;

From caaf36dfd6ee24da83b46995d9753519c100a3e7 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 10:49:47 +0100
Subject: [PATCH 18/28] IR: Initial implementation of
 FP{Double,Single}ToFixed{S,U}{32,64}

This implementation just falls-back to the software floating point implementation.
---
 src/backend_x64/a32_jitstate.cpp              |  12 +-
 src/backend_x64/a32_jitstate.h                |   1 +
 src/backend_x64/a64_jitstate.cpp              |   9 +-
 src/backend_x64/a64_jitstate.h                |   1 +
 src/backend_x64/emit_x64_floating_point.cpp   | 195 +++++++-----------
 src/backend_x64/jitstate_info.h               |   2 +
 .../A32/translate/translate_arm/vfp2.cpp      |   8 +-
 .../floating_point_conversion_fixed_point.cpp |  16 +-
 .../floating_point_conversion_integer.cpp     |  16 +-
 src/frontend/ir/ir_emitter.cpp                |  44 ++--
 src/frontend/ir/ir_emitter.h                  |  16 +-
 src/frontend/ir/opcodes.inc                   |  12 +-
 12 files changed, 159 insertions(+), 173 deletions(-)

diff --git a/src/backend_x64/a32_jitstate.cpp b/src/backend_x64/a32_jitstate.cpp
index 5a9ee868..c274d7fc 100644
--- a/src/backend_x64/a32_jitstate.cpp
+++ b/src/backend_x64/a32_jitstate.cpp
@@ -163,6 +163,7 @@ u32 A32JitState::Fpscr() const {
     FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
     FPSCR |= FPSCR_IDC;
     FPSCR |= FPSCR_UFC;
+    FPSCR |= fpsr_exc;
 
     return FPSCR;
 }
@@ -183,13 +184,10 @@ void A32JitState::SetFpscr(u32 FPSCR) {
     const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
     guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
 
-    // Cumulative flags IOC, IXC, UFC, OFC, DZC
-    guest_MXCSR |= ( FPSCR     ) & 0b0000000000001;  // IE = IOC
-    guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100;  // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
-
-    // Cumulative flag IDC, UFC
-    FPSCR_IDC = FPSCR & (1 << 7);
-    FPSCR_UFC = FPSCR & (1 << 3);
+    // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
+    FPSCR_IDC = 0;
+    FPSCR_UFC = 0;
+    fpsr_exc = FPSCR & 0x9F;
 
     if (Common::Bit<24>(FPSCR)) {
         // VFP Flush to Zero
diff --git a/src/backend_x64/a32_jitstate.h b/src/backend_x64/a32_jitstate.h
index 5294f8e4..3688ac82 100644
--- a/src/backend_x64/a32_jitstate.h
+++ b/src/backend_x64/a32_jitstate.h
@@ -66,6 +66,7 @@ struct A32JitState {
     std::array<u64, RSBSize> rsb_codeptrs;
     void ResetRSB();
 
+    u32 fpsr_exc = 0;
     u32 FPSCR_IDC = 0;
     u32 FPSCR_UFC = 0;
     u32 FPSCR_mode = 0;
diff --git a/src/backend_x64/a64_jitstate.cpp b/src/backend_x64/a64_jitstate.cpp
index efc8d167..eee952f5 100644
--- a/src/backend_x64/a64_jitstate.cpp
+++ b/src/backend_x64/a64_jitstate.cpp
@@ -105,16 +105,15 @@ u32 A64JitState::GetFpsr() const {
     fpsr |= (guest_MXCSR & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
     fpsr |= FPSCR_IDC;
     fpsr |= FPSCR_UFC;
+    fpsr |= fpsr_exc;
     return fpsr;
 }
 
 void A64JitState::SetFpsr(u32 value) {
     guest_MXCSR &= ~0x0000003D;
-    guest_MXCSR |= ( value     ) & 0b0000000000001;  // IE = IOC
-    guest_MXCSR |= ( value << 1) & 0b0000000111100;  // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
-
-    FPSCR_IDC = value & (1 << 7);
-    FPSCR_UFC = value & (1 << 3);
+    FPSCR_IDC = 0;
+    FPSCR_UFC = 0;
+    fpsr_exc = value & 0x9F;
 }
 
 } // namespace Dynarmic::BackendX64
diff --git a/src/backend_x64/a64_jitstate.h b/src/backend_x64/a64_jitstate.h
index a1c3a2dc..b8fdd5e1 100644
--- a/src/backend_x64/a64_jitstate.h
+++ b/src/backend_x64/a64_jitstate.h
@@ -71,6 +71,7 @@ struct A64JitState {
         rsb_codeptrs.fill(0);
     }
 
+    u32 fpsr_exc = 0;
     u32 FPSCR_IDC = 0;
     u32 FPSCR_UFC = 0;
     u32 fpcr = 0;
diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp
index 253c235d..7bcfe410 100644
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@@ -5,13 +5,22 @@
  */
 
 #include <type_traits>
+#include <utility>
 
 #include "backend_x64/abi.h"
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "common/fp/op.h"
 #include "common/fp/util.h"
+#include "common/mp/cartesian_product.h"
+#include "common/mp/integer.h"
+#include "common/mp/list.h"
+#include "common/mp/lut.h"
+#include "common/mp/to_tuple.h"
+#include "common/mp/vlift.h"
+#include "common/mp/vllift.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 #include "frontend/ir/opcodes.h"
@@ -19,6 +28,7 @@
 namespace Dynarmic::BackendX64 {
 
 using namespace Xbyak::util;
+namespace mp = Dynarmic::Common::mp;
 
 constexpr u64 f32_negative_zero = 0x80000000u;
 constexpr u64 f32_nan = 0x7fc00000u;
@@ -29,10 +39,6 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
 constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
 
 constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
-constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
-constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
-constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
-constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
 
 static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
     Xbyak::Label end;
@@ -99,12 +105,6 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64
     code.L(end);
 }
 
-static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
-    code.pxor(xmm_scratch, xmm_scratch);
-    code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
-    code.pand(xmm_value, xmm_scratch);
-}
-
 static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) {
     Xbyak::Label nan;
 
@@ -892,129 +892,82 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
     ctx.reg_alloc.DefineValue(inst, result);
 }
 
-void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) {
+static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) {
+    using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
+    using unsigned_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
+    using isize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
+    using rounding_list = mp::list<
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
+    >;
+
+    using key_type = std::tuple<size_t, bool, size_t, FP::RoundingMode>;
+    using value_type = u64(*)(u64, u8, FP::FPSR&, A64::FPCR);
+
+    static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
+        [](auto args) {
+            return std::pair<key_type, value_type>{
+                mp::to_tuple<decltype(args)>,
+                static_cast<value_type>(
+                    [](u64 input, u8 fbits, FP::FPSR& fpsr, A64::FPCR fpcr) {
+                        constexpr auto t = mp::to_tuple<decltype(args)>;
+                        constexpr size_t fsize = std::get<0>(t);
+                        constexpr bool unsigned_ = std::get<1>(t);
+                        constexpr size_t isize = std::get<2>(t);
+                        constexpr FP::RoundingMode rounding_mode = std::get<3>(t);
+                        using InputSize = mp::unsigned_integer_of_size<fsize>;
+
+                        return FP::FPToFixed<InputSize>(isize, static_cast<InputSize>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
+                    }
+                )
+            };
+        },
+        mp::cartesian_product<fsize_list, unsigned_list, isize_list, rounding_list>{}
+    );
+
     auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    bool round_towards_zero = args[1].GetImmediateU1();
 
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-    // Conversion to double is lossless, and allows for clamping.
+    const auto rounding = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
 
-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero32(code, from, to);
-    }
-    code.cvtss2sd(from, from);
-    // First time is to set flags
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
-    }
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_s32));
-    code.maxsd(from, code.MConst(xword, f64_min_s32));
-    // Second time is for real
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
-    }
-
-    ctx.reg_alloc.DefineValue(inst, to);
+    ctx.reg_alloc.HostCall(inst, args[0], args[1]);
+    code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
+    code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
+    code.CallFunction(lut.at(std::make_tuple(fsize, unsigned_, isize, rounding)));
 }
 
-void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    bool round_towards_zero = args[1].GetImmediateU1();
-
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-    // Conversion to double is lossless, and allows for accurate clamping.
-    //
-    // Since SSE2 doesn't provide an unsigned conversion, we use a 64-bit signed conversion.
-    //
-    // FIXME: None of the FPSR exception bits are correctly signalled with the below code
-
-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero64(code, from, to);
-    }
-    code.cvtss2sd(from, from);
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_u32));
-    code.maxsd(from, code.MConst(xword, f64_min_u32));
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 64 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 64 bit gpr
-    }
-
-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 64, false, 32);
 }
 
-void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
-    bool round_towards_zero = args[1].GetImmediateU1();
-
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-
-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero64(code, from, gpr_scratch.cvt64());
-    }
-    // First time is to set flags
-    if (round_towards_zero) {
-        code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
-    }
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_s32));
-    code.maxsd(from, code.MConst(xword, f64_min_s32));
-    // Second time is for real
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
-    }
-
-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 64, false, 64);
 }
 
-void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    bool round_towards_zero = args[1].GetImmediateU1();
+void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 64, true, 32);
+}
 
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-    // TODO: Use VCVTPD2UDQ when AVX512VL is available.
-    // FIXME: None of the FPSR exception bits are correctly signalled with the below code
+void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 64, true, 64);
+}
 
-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero64(code, from, to);
-    }
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_u32));
-    code.maxsd(from, code.MConst(xword, f64_min_u32));
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 64 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 64 bit gpr
-    }
+void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 32, false, 32);
+}
 
-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 32, false, 64);
+}
+
+void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 32, true, 32);
+}
+
+void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 32, true, 64);
 }
 
 void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
diff --git a/src/backend_x64/jitstate_info.h b/src/backend_x64/jitstate_info.h
index 2c267bb6..3caca413 100644
--- a/src/backend_x64/jitstate_info.h
+++ b/src/backend_x64/jitstate_info.h
@@ -26,6 +26,7 @@ struct JitStateInfo {
         , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
         , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
         , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
+        , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
     {}
 
     const size_t offsetof_cycles_remaining;
@@ -39,6 +40,7 @@ struct JitStateInfo {
     const size_t offsetof_CPSR_nzcv;
     const size_t offsetof_FPSCR_IDC;
     const size_t offsetof_FPSCR_UFC;
+    const size_t offsetof_fpsr_exc;
 };
 
 } // namespace Dynarmic::BackendX64
diff --git a/src/frontend/A32/translate/translate_arm/vfp2.cpp b/src/frontend/A32/translate/translate_arm/vfp2.cpp
index defc7b7a..898a8418 100644
--- a/src/frontend/A32/translate/translate_arm/vfp2.cpp
+++ b/src/frontend/A32/translate/translate_arm/vfp2.cpp
@@ -442,8 +442,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool s
     if (ConditionPassed(cond)) {
         auto reg_m = ir.GetExtendedRegister(m);
         auto result = sz
-                      ? ir.FPDoubleToU32(reg_m, round_towards_zero, true)
-                      : ir.FPSingleToU32(reg_m, round_towards_zero, true);
+                      ? ir.FPDoubleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
+                      : ir.FPSingleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
         ir.SetExtendedRegister(d, result);
     }
     return true;
@@ -457,8 +457,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s
     if (ConditionPassed(cond)) {
         auto reg_m = ir.GetExtendedRegister(m);
         auto result = sz
-                      ? ir.FPDoubleToS32(reg_m, round_towards_zero, true)
-                      : ir.FPSingleToS32(reg_m, round_towards_zero, true);
+                      ? ir.FPDoubleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
+                      : ir.FPSingleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
         ir.SetExtendedRegister(d, result);
     }
     return true;
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
index a86fbb10..8f4eb7df 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
@@ -38,13 +38,13 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
 
     IR::U32U64 intval;
     if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToS32(fltval, true, true);
+        intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToS32(fltval, true, true);
+        intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
     } else {
         UNREACHABLE();
     }
@@ -69,13 +69,13 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
 
     IR::U32U64 intval;
     if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToU32(fltval, true, true);
+        intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToU32(fltval, true, true);
+        intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
     } else {
         UNREACHABLE();
     }
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index fd634dcc..009744bd 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -146,13 +146,13 @@ bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     IR::U32U64 intval;
 
     if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToS32(fltval, true, true);
+        intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToS32(fltval, true, true);
+        intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
     } else {
         UNREACHABLE();
     }
@@ -173,13 +173,13 @@ bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     IR::U32U64 intval;
 
     if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToU32(fltval, true, true);
+        intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToU32(fltval, true, true);
+        intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
     } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
     } else {
         UNREACHABLE();
     }
diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp
index 3f7c6d33..e3858a7a 100644
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -1451,24 +1451,44 @@ U64 IREmitter::FPSingleToDouble(const U32& a, bool fpscr_controlled) {
     return Inst<U64>(Opcode::FPSingleToDouble, a);
 }
 
-U32 IREmitter::FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPSingleToS32, a, Imm1(round_towards_zero));
+U32 IREmitter::FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPDoubleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
 
-U32 IREmitter::FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPSingleToU32, a, Imm1(round_towards_zero));
+U64 IREmitter::FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPDoubleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
 
-U32 IREmitter::FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPDoubleToS32, a, Imm1(round_towards_zero));
+U32 IREmitter::FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPDoubleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
 
-U32 IREmitter::FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPDoubleToU32, a, Imm1(round_towards_zero));
+U64 IREmitter::FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPDoubleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U32 IREmitter::FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPSingleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U64 IREmitter::FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPSingleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U32 IREmitter::FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPSingleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U64 IREmitter::FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPSingleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
 
 U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) {
diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h
index 8bc248d0..c783c164 100644
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@@ -12,6 +12,10 @@
 #include "frontend/ir/terminal.h"
 #include "frontend/ir/value.h"
 
+namespace Dynarmic::FP {
+enum class RoundingMode;
+} // namespace Dynarmic::FP
+
 // ARM JIT Microinstruction Intermediate Representation
 //
 // This intermediate representation is an SSA IR. It is designed primarily for analysis,
@@ -264,10 +268,14 @@ public:
     U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
     U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
     U64 FPSingleToDouble(const U32& a, bool fpscr_controlled);
-    U32 FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
-    U32 FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
-    U32 FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
-    U32 FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
+    U32 FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding);
     U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
     U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
     U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled);
diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc
index 83ec4820..c4b5e165 100644
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -386,10 +386,14 @@ OPCODE(FPSub64,                             T::U64,         T::U64,         T::U
 // Floating-point conversions
 OPCODE(FPSingleToDouble,                    T::U64,         T::U32                                          )
 OPCODE(FPDoubleToSingle,                    T::U32,         T::U64                                          )
-OPCODE(FPSingleToU32,                       T::U32,         T::U32,         T::U1                           )
-OPCODE(FPSingleToS32,                       T::U32,         T::U32,         T::U1                           )
-OPCODE(FPDoubleToU32,                       T::U32,         T::U64,         T::U1                           )
-OPCODE(FPDoubleToS32,                       T::U32,         T::U64,         T::U1                           )
+OPCODE(FPDoubleToFixedS32,                  T::U32,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPDoubleToFixedS64,                  T::U64,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPDoubleToFixedU32,                  T::U32,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPDoubleToFixedU64,                  T::U64,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedS32,                  T::U32,         T::U32,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedS64,                  T::U64,         T::U32,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedU32,                  T::U32,         T::U32,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedU64,                  T::U64,         T::U32,         T::U8,          T::U8           )
 OPCODE(FPU32ToSingle,                       T::U32,         T::U32,         T::U1                           )
 OPCODE(FPS32ToSingle,                       T::U32,         T::U32,         T::U1                           )
 OPCODE(FPU32ToDouble,                       T::U64,         T::U32,         T::U1                           )

From 617ca0adf007e862f4f1f50751a4aaa07900ef90 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 11:36:46 +0100
Subject: [PATCH 19/28] floating_point_conversion_integer: Refactor
 implementation of FCVTZS_float_int and FCVTZU_float_int

---
 .../floating_point_conversion_integer.cpp     | 73 +++++++++++--------
 1 file changed, 41 insertions(+), 32 deletions(-)

diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index 009744bd..98143f86 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -6,6 +6,7 @@
 
 #include <boost/optional.hpp>
 
+#include "common/fp/rounding_mode.h"
 #include "frontend/A64/translate/impl/impl.h"
 
 namespace Dynarmic::A64 {
@@ -135,58 +136,66 @@ bool TranslatorVisitor::FMOV_float_gen(bool sf, Imm<2> type, Imm<1> rmode_0, Imm
     return true;
 }
 
-bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+static bool FloaingPointConvertSignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
     const size_t intsize = sf ? 64 : 32;
     const auto fltsize = GetDataSize(type);
     if (!fltsize || *fltsize == 16) {
-        return UnallocatedEncoding();
+        return v.UnallocatedEncoding();
     }
 
-    const IR::U32U64 fltval = V_scalar(*fltsize, Vn);
+    const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
     IR::U32U64 intval;
 
     if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
+        intval = v.ir.FPSingleToFixedS32(fltval, 0, rounding_mode);
     } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
+        intval = v.ir.FPDoubleToFixedS32(fltval, 0, rounding_mode);
     } else if (intsize == 64 && *fltsize == 32) {
-        intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
+        intval = v.ir.FPSingleToFixedS64(fltval, 0, rounding_mode);
     } else if (intsize == 64 && *fltsize == 64) {
-        intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
+        intval = v.ir.FPDoubleToFixedS64(fltval, 0, rounding_mode);
     } else {
         UNREACHABLE();
     }
 
-    X(intsize, Rd, intval);
+    v.X(intsize, Rd, intval);
 
     return true;
 }
 
+static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
+    const size_t intsize = sf ? 64 : 32;
+    const auto fltsize = GetDataSize(type);
+    if (!fltsize || *fltsize == 16) {
+        return v.UnallocatedEncoding();
+    }
+
+    const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
+    IR::U32U64 intval;
+
+    if (intsize == 32 && *fltsize == 32) {
+        intval = v.ir.FPSingleToFixedU32(fltval, 0, rounding_mode);
+    } else if (intsize == 32 && *fltsize == 64) {
+        intval = v.ir.FPDoubleToFixedU32(fltval, 0, rounding_mode);
+    } else if (intsize == 64 && *fltsize == 32) {
+        intval = v.ir.FPSingleToFixedU64(fltval, 0, rounding_mode);
+    } else if (intsize == 64 && *fltsize == 64) {
+        intval = v.ir.FPDoubleToFixedU64(fltval, 0, rounding_mode);
+    } else {
+        UNREACHABLE();
+    }
+
+    v.X(intsize, Rd, intval);
+
+    return true;
+}
+
+bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
+}
+
 bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
-    const size_t intsize = sf ? 64 : 32;
-    const auto fltsize = GetDataSize(type);
-    if (!fltsize || *fltsize == 16) {
-        return UnallocatedEncoding();
-    }
-
-    const IR::U32U64 fltval = V_scalar(*fltsize, Vn);
-    IR::U32U64 intval;
-
-    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
-    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
-    } else if (intsize == 64 && *fltsize == 32) {
-        intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
-    } else if (intsize == 64 && *fltsize == 64) {
-        intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
-    } else {
-        UNREACHABLE();
-    }
-
-    X(intsize, Rd, intval);
-
-    return true;
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
 }
 
 } // namespace Dynarmic::A64

From 7d36dbcdfd6618d9cff97bd7a54c59e3d42f4ce7 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 11:39:07 +0100
Subject: [PATCH 20/28] A64: Implement FCVTNS (scalar)

---
 src/frontend/A64/decoder/a64.inc                              | 2 +-
 .../A64/translate/impl/floating_point_conversion_integer.cpp  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index a29efee6..c92e8f22 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -884,7 +884,7 @@ INST(FCVTZS_float_fix,       "FCVTZS (scalar, fixed-point)",              "z0011
 INST(FCVTZU_float_fix,       "FCVTZU (scalar, fixed-point)",              "z0011110yy011001ppppppnnnnnddddd")
 
 // Data Processing - FP and SIMD - Conversion between floating point and integer
-//INST(FCVTNS_float,           "FCVTNS (scalar)",                           "z0011110yy100000000000nnnnnddddd")
+INST(FCVTNS_float,           "FCVTNS (scalar)",                           "z0011110yy100000000000nnnnnddddd")
 //INST(FCVTNU_float,           "FCVTNU (scalar)",                           "z0011110yy100001000000nnnnnddddd")
 INST(SCVTF_float_int,        "SCVTF (scalar, integer)",                   "z0011110yy100010000000nnnnnddddd")
 INST(UCVTF_float_int,        "UCVTF (scalar, integer)",                   "z0011110yy100011000000nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index 98143f86..1bd624c8 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -190,6 +190,10 @@ static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Im
     return true;
 }
 
+bool TranslatorVisitor::FCVTNS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven);
+}
+
 bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
 }

From a1965a74a093fc0d2afd51a11674c0493ea56eae Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 11:39:30 +0100
Subject: [PATCH 21/28] A64: Implement FCVTNU (scalar)

---
 src/frontend/A64/decoder/a64.inc                              | 2 +-
 .../A64/translate/impl/floating_point_conversion_integer.cpp  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index c92e8f22..24bc1b50 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -885,7 +885,7 @@ INST(FCVTZU_float_fix,       "FCVTZU (scalar, fixed-point)",              "z0011
 
 // Data Processing - FP and SIMD - Conversion between floating point and integer
 INST(FCVTNS_float,           "FCVTNS (scalar)",                           "z0011110yy100000000000nnnnnddddd")
-//INST(FCVTNU_float,           "FCVTNU (scalar)",                           "z0011110yy100001000000nnnnnddddd")
+INST(FCVTNU_float,           "FCVTNU (scalar)",                           "z0011110yy100001000000nnnnnddddd")
 INST(SCVTF_float_int,        "SCVTF (scalar, integer)",                   "z0011110yy100010000000nnnnnddddd")
 INST(UCVTF_float_int,        "UCVTF (scalar, integer)",                   "z0011110yy100011000000nnnnnddddd")
 //INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011110yy100100000000nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index 1bd624c8..9112792e 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -194,6 +194,10 @@ bool TranslatorVisitor::FCVTNS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven);
 }
 
+bool TranslatorVisitor::FCVTNU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven);
+}
+
 bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
 }

From c0c7a263143e9cef8b1b70ae36fc250fb375f2be Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 12:15:35 +0100
Subject: [PATCH 22/28] A64: Implement FCVTAS (scalar)

---
 src/frontend/A64/decoder/a64.inc                              | 2 +-
 .../A64/translate/impl/floating_point_conversion_integer.cpp  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index 24bc1b50..bcbf1e28 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -888,7 +888,7 @@ INST(FCVTNS_float,           "FCVTNS (scalar)",                           "z0011
 INST(FCVTNU_float,           "FCVTNU (scalar)",                           "z0011110yy100001000000nnnnnddddd")
 INST(SCVTF_float_int,        "SCVTF (scalar, integer)",                   "z0011110yy100010000000nnnnnddddd")
 INST(UCVTF_float_int,        "UCVTF (scalar, integer)",                   "z0011110yy100011000000nnnnnddddd")
-//INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011110yy100100000000nnnnnddddd")
+INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011110yy100100000000nnnnnddddd")
 //INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011110yy100101000000nnnnnddddd")
 INST(FMOV_float_gen,         "FMOV (general)",                            "z0011110yy10r11o000000nnnnnddddd")
 //INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index 9112792e..deb17f42 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -206,4 +206,8 @@ bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
 }
 
+bool TranslatorVisitor::FCVTAS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
+}
+
 } // namespace Dynarmic::A64

From 27319822bb8188cded73936a702ca16405c12270 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 12:17:37 +0100
Subject: [PATCH 23/28] A64: Implement FCVTAU (scalar)

---
 src/frontend/A64/decoder/a64.inc                              | 2 +-
 .../A64/translate/impl/floating_point_conversion_integer.cpp  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index bcbf1e28..39a5a430 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -889,7 +889,7 @@ INST(FCVTNU_float,           "FCVTNU (scalar)",                           "z0011
 INST(SCVTF_float_int,        "SCVTF (scalar, integer)",                   "z0011110yy100010000000nnnnnddddd")
 INST(UCVTF_float_int,        "UCVTF (scalar, integer)",                   "z0011110yy100011000000nnnnnddddd")
 INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011110yy100100000000nnnnnddddd")
-//INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011110yy100101000000nnnnnddddd")
+INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011110yy100101000000nnnnnddddd")
 INST(FMOV_float_gen,         "FMOV (general)",                            "z0011110yy10r11o000000nnnnnddddd")
 //INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
 //INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index deb17f42..d3d2951d 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -210,4 +210,8 @@ bool TranslatorVisitor::FCVTAS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
 }
 
+bool TranslatorVisitor::FCVTAU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
+}
+
 } // namespace Dynarmic::A64

From af661ef5a647d0e0b54e9d08756010bc218c9886 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 12:18:40 +0100
Subject: [PATCH 24/28] A64: Implement FCVTPS (scalar)

---
 src/frontend/A64/decoder/a64.inc                              | 2 +-
 .../A64/translate/impl/floating_point_conversion_integer.cpp  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index 39a5a430..0ad67dc6 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -891,7 +891,7 @@ INST(UCVTF_float_int,        "UCVTF (scalar, integer)",                   "z0011
 INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011110yy100100000000nnnnnddddd")
 INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011110yy100101000000nnnnnddddd")
 INST(FMOV_float_gen,         "FMOV (general)",                            "z0011110yy10r11o000000nnnnnddddd")
-//INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
+INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
 //INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
 //INST(FCVTMS_float,           "FCVTMS (scalar)",                           "z0011110yy110000000000nnnnnddddd")
 //INST(FCVTMU_float,           "FCVTMU (scalar)",                           "z0011110yy110001000000nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index d3d2951d..367fc725 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -214,4 +214,8 @@ bool TranslatorVisitor::FCVTAU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
 }
 
+bool TranslatorVisitor::FCVTPS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
+}
+
 } // namespace Dynarmic::A64

From 49c4499a875e4e09899c06e6f3060fe18c0cbd11 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 12:19:02 +0100
Subject: [PATCH 25/28] A64: Implement FCVTPU (scalar)

---
 src/frontend/A64/decoder/a64.inc                              | 2 +-
 .../A64/translate/impl/floating_point_conversion_integer.cpp  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index 0ad67dc6..bfaee2c7 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -892,7 +892,7 @@ INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011
 INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011110yy100101000000nnnnnddddd")
 INST(FMOV_float_gen,         "FMOV (general)",                            "z0011110yy10r11o000000nnnnnddddd")
 INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
-//INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
+INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
 //INST(FCVTMS_float,           "FCVTMS (scalar)",                           "z0011110yy110000000000nnnnnddddd")
 //INST(FCVTMU_float,           "FCVTMU (scalar)",                           "z0011110yy110001000000nnnnnddddd")
 INST(FCVTZS_float_int,       "FCVTZS (scalar, integer)",                  "z0011110yy111000000000nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index 367fc725..b409cfce 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -218,4 +218,8 @@ bool TranslatorVisitor::FCVTPS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
 }
 
+bool TranslatorVisitor::FCVTPU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
+}
+
 } // namespace Dynarmic::A64

From 79c9018d60eea53bff4418eb9e6f8c3141812996 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 12:19:38 +0100
Subject: [PATCH 26/28] A64: Implement FCVTMS (scalar)

---
 src/frontend/A64/decoder/a64.inc                              | 2 +-
 .../A64/translate/impl/floating_point_conversion_integer.cpp  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index bfaee2c7..c3c0c0e5 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -893,7 +893,7 @@ INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011
 INST(FMOV_float_gen,         "FMOV (general)",                            "z0011110yy10r11o000000nnnnnddddd")
 INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
 INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
-//INST(FCVTMS_float,           "FCVTMS (scalar)",                           "z0011110yy110000000000nnnnnddddd")
+INST(FCVTMS_float,           "FCVTMS (scalar)",                           "z0011110yy110000000000nnnnnddddd")
 //INST(FCVTMU_float,           "FCVTMU (scalar)",                           "z0011110yy110001000000nnnnnddddd")
 INST(FCVTZS_float_int,       "FCVTZS (scalar, integer)",                  "z0011110yy111000000000nnnnnddddd")
 INST(FCVTZU_float_int,       "FCVTZU (scalar, integer)",                  "z0011110yy111001000000nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index b409cfce..53b0e877 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -222,4 +222,8 @@ bool TranslatorVisitor::FCVTPU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
 }
 
+bool TranslatorVisitor::FCVTMS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity);
+}
+
 } // namespace Dynarmic::A64

From 3d9677d09409f5f1fef06ede170356cc2154b6e5 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 30 Jun 2018 12:21:07 +0100
Subject: [PATCH 27/28] A64: Implement FCVTMU (scalar)

---
 src/frontend/A64/decoder/a64.inc                              | 2 +-
 .../A64/translate/impl/floating_point_conversion_integer.cpp  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index c3c0c0e5..6dcd2a63 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -894,7 +894,7 @@ INST(FMOV_float_gen,         "FMOV (general)",                            "z0011
 INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
 INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
 INST(FCVTMS_float,           "FCVTMS (scalar)",                           "z0011110yy110000000000nnnnnddddd")
-//INST(FCVTMU_float,           "FCVTMU (scalar)",                           "z0011110yy110001000000nnnnnddddd")
+INST(FCVTMU_float,           "FCVTMU (scalar)",                           "z0011110yy110001000000nnnnnddddd")
 INST(FCVTZS_float_int,       "FCVTZS (scalar, integer)",                  "z0011110yy111000000000nnnnnddddd")
 INST(FCVTZU_float_int,       "FCVTZU (scalar, integer)",                  "z0011110yy111001000000nnnnnddddd")
 //INST(FJCVTZS,                "FJCVTZS",                                   "0001111001111110000000nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
index 53b0e877..ad581687 100644
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@@ -226,4 +226,8 @@ bool TranslatorVisitor::FCVTMS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
     return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity);
 }
 
+bool TranslatorVisitor::FCVTMU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity);
+}
+
 } // namespace Dynarmic::A64

From 304cc7f61e21e423a35f8e5b98ba0d73b5db3500 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 15 Jul 2018 17:03:35 +0100
Subject: [PATCH 28/28] emit_x64_floating_point: SSE4.1 implementation for
 FP{Double,Single}ToFixed{S,U}{32,64}

---
 src/backend_x64/emit_x64_floating_point.cpp | 118 +++++++++++++++++---
 1 file changed, 105 insertions(+), 13 deletions(-)

diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp
index 7bcfe410..68ebfcd4 100644
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@@ -39,6 +39,14 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
 constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;
 
 constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
+constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
+constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
+constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
+constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
+constexpr u64 f64_min_s64 = 0xc3e0000000000000u; // -2^63 as a double
+constexpr u64 f64_max_s64_lim = 0x43e0000000000000u; // 2^63 as a double (actual maximum unrepresentable)
+constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double
+constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable)
 
 static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
     Xbyak::Label end;
@@ -105,6 +113,12 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64
     code.L(end);
 }
 
+static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
+    code.pxor(xmm_scratch, xmm_scratch);
+    code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
+    code.pand(xmm_value, xmm_scratch);
+}
+
 static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) {
     Xbyak::Label nan;
 
@@ -892,7 +906,89 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
     ctx.reg_alloc.DefineValue(inst, result);
 }
 
-static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) {
+static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const size_t fbits = args[1].GetImmediateU8();
+    const auto rounding = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
+
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero){
+        const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
+
+        const int round_imm = [&]{
+            switch (rounding) {
+            case FP::RoundingMode::ToNearest_TieEven:
+            default:
+                return 0b00;
+            case FP::RoundingMode::TowardsPlusInfinity:
+                return 0b10;
+            case FP::RoundingMode::TowardsMinusInfinity:
+                return 0b01;
+            case FP::RoundingMode::TowardsZero:
+                return 0b11;
+            }
+        }();
+
+        const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
+
+        if (fsize == 64) {
+            if (fbits != 0) {
+                const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
+                code.mulsd(src, code.MConst(xword, scale_factor));
+            }
+
+            code.roundsd(src, src, round_imm);
+            ZeroIfNaN64(code, src, scratch);
+        } else {
+            if (fbits != 0) {
+                const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
+                code.mulss(src, code.MConst(xword, scale_factor));
+            }
+
+            code.roundss(src, src, round_imm);
+            code.cvtss2sd(src, src);
+            ZeroIfNaN64(code, src, scratch);
+        }
+
+        if (isize == 64) {
+            Xbyak::Label saturate_max, end;
+
+            code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u64 : f64_min_s64));
+            code.movsd(scratch, code.MConst(xword, unsigned_ ? f64_max_u64_lim : f64_max_s64_lim));
+            code.comisd(scratch, src);
+            code.jna(saturate_max, code.T_NEAR);
+            if (unsigned_) {
+                Xbyak::Label below_max;
+
+                code.movsd(scratch, code.MConst(xword, f64_max_s64_lim));
+                code.comisd(src, scratch);
+                code.jb(below_max);
+                code.subsd(src, scratch);
+                code.cvttsd2si(result, src);
+                code.btc(result, 63);
+                code.jmp(end);
+                code.L(below_max);
+            }
+            code.cvttsd2si(result, src); // 64 bit gpr
+            code.L(end);
+
+            code.SwitchToFarCode();
+            code.L(saturate_max);
+            code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF);
+            code.jmp(end, code.T_NEAR);
+            code.SwitchToNearCode();
+        } else {
+            code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
+            code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u32 : f64_min_s32));
+            code.cvttsd2si(result, src); // 64 bit gpr
+        }
+
+        ctx.reg_alloc.DefineValue(inst, result);
+
+        return;
+    }
+
     using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
     using unsigned_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
     using isize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
@@ -928,10 +1024,6 @@ static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst*
         mp::cartesian_product<fsize_list, unsigned_list, isize_list, rounding_list>{}
     );
 
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-
-    const auto rounding = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
-
     ctx.reg_alloc.HostCall(inst, args[0], args[1]);
     code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
     code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
@@ -939,35 +1031,35 @@ static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst*
 }
 
 void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
-    EmitFPToFixedFallback(code, ctx, inst, 64, false, 32);
+    EmitFPToFixed(code, ctx, inst, 64, false, 32);
 }
 
 void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
-    EmitFPToFixedFallback(code, ctx, inst, 64, false, 64);
+    EmitFPToFixed(code, ctx, inst, 64, false, 64);
 }
 
 void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
-    EmitFPToFixedFallback(code, ctx, inst, 64, true, 32);
+    EmitFPToFixed(code, ctx, inst, 64, true, 32);
 }
 
 void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
-    EmitFPToFixedFallback(code, ctx, inst, 64, true, 64);
+    EmitFPToFixed(code, ctx, inst, 64, true, 64);
 }
 
 void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
-    EmitFPToFixedFallback(code, ctx, inst, 32, false, 32);
+    EmitFPToFixed(code, ctx, inst, 32, false, 32);
 }
 
 void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
-    EmitFPToFixedFallback(code, ctx, inst, 32, false, 64);
+    EmitFPToFixed(code, ctx, inst, 32, false, 64);
 }
 
 void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
-    EmitFPToFixedFallback(code, ctx, inst, 32, true, 32);
+    EmitFPToFixed(code, ctx, inst, 32, true, 32);
 }
 
 void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
-    EmitFPToFixedFallback(code, ctx, inst, 32, true, 64);
+    EmitFPToFixed(code, ctx, inst, 32, true, 64);
 }
 
 void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {