From 8651c2d10ee3f0c44ee941e05aac0a9e241f1b1a Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 28 Jun 2018 21:44:21 +0100 Subject: [PATCH 01/28] u128: Implement u128 For when we need a 128-bit integer --- src/CMakeLists.txt | 2 ++ src/common/u128.cpp | 64 +++++++++++++++++++++++++++++++++++++++++++++ src/common/u128.h | 57 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 src/common/u128.cpp create mode 100644 src/common/u128.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7334de8e..0fa66d10 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -31,6 +31,8 @@ add_library(dynarmic common/sm4.cpp common/sm4.h common/string_util.h + common/u128.cpp + common/u128.h common/variant_util.h frontend/A32/decoder/arm.h frontend/A32/decoder/thumb16.h diff --git a/src/common/u128.cpp b/src/common/u128.cpp new file mode 100644 index 00000000..5d97d222 --- /dev/null +++ b/src/common/u128.cpp @@ -0,0 +1,64 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include "common/common_types.h" +#include "common/u128.h" + +namespace Dynarmic { + +u128 operator<<(u128 operand, int amount) { + if (amount < 0) { + return operand >> -amount; + } + + if (amount == 0) { + return operand; + } + + if (amount < 64) { + u128 result; + result.lower = (operand.lower << amount); + result.upper = (operand.upper << amount) | (operand.lower >> (64 - amount)); + return result; + } + + if (amount < 128) { + u128 result; + result.upper = operand.lower << (amount - 64); + return result; + } + + return {}; +} + +u128 operator>>(u128 operand, int amount) { + if (amount < 0) { + return operand << -amount; + } + + if (amount == 0) { + return operand; + } + + if (amount < 64) { + u128 result; + result.lower = (operand.lower >> amount) | (operand.upper << (64 - amount)); + result.upper = (operand.upper >> amount); + return result; + } + + if (amount < 128) { + u128 result; + result.lower = operand.upper >> (amount - 64); + return result; + } + + return {}; +} + +} // namespace Dynarmic diff --git a/src/common/u128.h b/src/common/u128.h new file mode 100644 index 00000000..b08143c8 --- /dev/null +++ b/src/common/u128.h @@ -0,0 +1,57 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include + +#include "common/bit_util.h" +#include "common/common_types.h" + +namespace Dynarmic { + +struct u128 { + u128() = default; + u128(const u128&) = default; + u128(u128&&) = default; + u128& operator=(const u128&) = default; + u128& operator=(u128&&) = default; + + u128(u64 lower_, u64 upper_) : lower(lower_), upper(upper_) {} + + template + /* implicit */ u128(T value) : lower(value), upper(0) { + static_assert(std::is_integral_v); + static_assert(Common::BitSize() <= Common::BitSize()); + } + + u64 lower = 0; + u64 upper = 0; +}; + +static_assert(Common::BitSize() == 128); +static_assert(std::is_standard_layout_v); +static_assert(std::is_trivially_copyable_v); + +inline u128 operator+(u128 a, u128 b) { + u128 result; + result.lower = a.lower + b.lower; + result.upper = a.upper + b.upper + (a.lower > result.lower); + return result; +} + +inline u128 operator-(u128 a, u128 b) { + u128 result; + result.lower = a.lower - b.lower; + result.upper = a.upper - b.upper - (a.lower < result.lower); + return result; +} + +u128 operator<<(u128 operand, int amount); +u128 operator>>(u128 operand, int amount); + +} // namespace Dynarmic From 62b640b2fa143c39ce545d56485f22008cab6959 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 26 Jun 2018 20:50:39 +0100 Subject: [PATCH 02/28] bit_util: Add ClearBit and ModifyBit --- src/common/bit_util.h | 48 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/src/common/bit_util.h b/src/common/bit_util.h index 775ccda0..0ed24da5 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -36,14 +36,6 @@ constexpr T Bits(const T value) { #pragma warning(push) #pragma warning(disable:4554) #endif -/// Extracts a single bit at bit_position from value of type T. -template -constexpr bool Bit(const T value) { - static_assert(bit_position < BitSize(), "bit_position must be smaller than size of T"); - - return ((value >> bit_position) & 1) != 0; -} - /// Extracts a single bit at bit_position from value of type T. template inline bool Bit(size_t bit_position, const T value) { @@ -51,6 +43,46 @@ inline bool Bit(size_t bit_position, const T value) { return ((value >> bit_position) & 1) != 0; } + +/// Extracts a single bit at bit_position from value of type T. +template +constexpr bool Bit(const T value) { + static_assert(bit_position < BitSize(), "bit_position must be smaller than size of T"); + + return Bit(bit_position, value); +} + +/// Clears a single bit at bit_position from value of type T. +template +inline T ClearBit(size_t bit_position, const T value) { + ASSERT_MSG(bit_position < BitSize(), "bit_position must be smaller than size of T"); + + return value & ~(static_cast(1) << bit_position); +} + +/// Clears a single bit at bit_position from value of type T. +template +constexpr T ClearBit(const T value) { + static_assert(bit_position < BitSize(), "bit_position must be smaller than size of T"); + + return ClearBit(bit_position, value); +} + +/// Modifies a single bit at bit_position from value of type T. +template +inline T ModifyBit(size_t bit_position, const T value, bool new_bit) { + ASSERT_MSG(bit_position < BitSize(), "bit_position must be smaller than size of T"); + + return ClearBit(bit_position, value) | (static_cast(new_bit) << bit_position); +} + +/// Modifies a single bit at bit_position from value of type T. +template +constexpr T ModifyBit(const T value, bool new_bit) { + static_assert(bit_position < BitSize(), "bit_position must be smaller than size of T"); + + return ModifyBit(bit_position, value, new_bit); +} #ifdef _MSC_VER #pragma warning(pop) #endif From 95ad0d0a66ee935fd7b0c5b0392abaffe14f9ff2 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 27 Jun 2018 14:37:52 +0100 Subject: [PATCH 03/28] bit_util: Use Ones to implement Bits --- src/common/bit_util.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/common/bit_util.h b/src/common/bit_util.h index 0ed24da5..b83fc6ec 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -21,15 +21,23 @@ constexpr size_t BitSize() { return sizeof(T) * CHAR_BIT; } +template +inline T Ones(size_t count) { + ASSERT_MSG(count <= BitSize(), "count larger than bitsize of T"); + if (count == BitSize()) + return static_cast(~static_cast(0)); + return ~(static_cast(~static_cast(0)) << count); +} + /// Extract bits [begin_bit, end_bit] inclusive from value of type T. template constexpr T Bits(const T value) { static_assert(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)"); static_assert(begin_bit < BitSize(), "begin_bit must be smaller than size of T"); - static_assert(end_bit < BitSize(), "begin_bit must be smaller than size of T"); + static_assert(end_bit < BitSize(), "end_bit must be smaller than size of T"); - return (value >> begin_bit) & ((1 << (end_bit - begin_bit + 1)) - 1); + return (value >> begin_bit) & Ones(end_bit - begin_bit + 1); } #ifdef _MSC_VER @@ -143,14 +151,6 @@ inline size_t LowestSetBit(T value) { return result; } -template -inline T Ones(size_t count) { - ASSERT_MSG(count <= BitSize(), "count larger than bitsize of T"); - if (count == BitSize()) - return ~static_cast(0); - return ~(~static_cast(0) << count); -} - template inline T Replicate(T value, size_t element_size) { ASSERT_MSG(BitSize() % element_size == 0, "bitsize of T not divisible by element_size"); From b00fe23b91dc653e65d814e34cd61d41ecd44b2d Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 28 Jun 2018 21:44:35 +0100 Subject: [PATCH 04/28] bit_util: Implement MostSignificantBit --- src/common/bit_util.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/common/bit_util.h b/src/common/bit_util.h index b83fc6ec..25ed3fc3 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -151,6 +151,11 @@ inline size_t LowestSetBit(T value) { return result; } +template +inline bool MostSignificantBit(T value) { + return Bit() - 1, T>(value); +} + template inline T Replicate(T value, size_t element_size) { ASSERT_MSG(BitSize() % element_size == 0, "bitsize of T not divisible by element_size"); From d21659152ce7e52699782670cf139125bd345320 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 28 Jun 2018 21:45:13 +0100 Subject: [PATCH 05/28] safe_ops: Implement safe shifting operations Implement shifiting operations that perform consistently across architectures without running into undefined or implemented-defined behaviour. --- src/CMakeLists.txt | 1 + src/common/safe_ops.h | 109 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 src/common/safe_ops.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0fa66d10..5807525a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -27,6 +27,7 @@ add_library(dynarmic common/memory_pool.cpp common/memory_pool.h common/mp.h + common/safe_ops.h common/scope_exit.h common/sm4.cpp common/sm4.h diff --git a/src/common/safe_ops.h b/src/common/safe_ops.h new file mode 100644 index 00000000..bc6d7b88 --- /dev/null +++ b/src/common/safe_ops.h @@ -0,0 +1,109 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/u128.h" + +namespace Dynarmic::Safe { + +template T LogicalShiftLeft(T value, int shift_amount); +template T LogicalShiftRight(T value, int shift_amount); +template T ArithmeticShiftLeft(T value, int shift_amount); +template T ArithmeticShiftRight(T value, int shift_amount); + +template +T LogicalShiftLeft(T value, int shift_amount) { + static_assert(std::is_integral_v); + + if (shift_amount >= static_cast(Common::BitSize())) { + return 0; + } + + if (shift_amount < 0) { + return LogicalShiftRight(value, -shift_amount); + } + + auto unsigned_value = static_cast>(value); + return static_cast(unsigned_value << shift_amount); +} + +template<> +inline u128 LogicalShiftLeft(u128 value, int shift_amount) { + return value << shift_amount; +} + +template +T LogicalShiftRight(T value, int shift_amount) { + static_assert(std::is_integral_v); + + if (shift_amount >= static_cast(Common::BitSize())) { + return 0; + } + + if (shift_amount < 0) { + return LogicalShiftLeft(value, -shift_amount); + } + + auto unsigned_value = static_cast>(value); + return static_cast(unsigned_value >> shift_amount); +} + +template<> +inline u128 LogicalShiftRight(u128 value, int shift_amount) { + return value >> shift_amount; +} + +template +T LogicalShiftRightDouble(T top, T bottom, int shift_amount) { + return LogicalShiftLeft(top, int(Common::BitSize()) - shift_amount) | LogicalShiftRight(bottom, shift_amount); +} + +template +T ArithmeticShiftLeft(T value, int shift_amount) { + static_assert(std::is_integral_v); + + if (shift_amount >= static_cast(Common::BitSize())) { + return 0; + } + + if (shift_amount < 0) { + return ArithmeticShiftRight(value, -shift_amount); + } + + auto signed_value = static_cast>(value); + return static_cast(signed_value << shift_amount); +} + +template +T ArithmeticShiftRight(T value, int shift_amount) { + static_assert(std::is_integral_v); + + if (shift_amount >= static_cast(Common::BitSize())) { + return Common::MostSignificantBit(value) ? ~static_cast(0) : 0; + } + + if (shift_amount < 0) { + return ArithmeticShiftLeft(value, -shift_amount); + } + + auto signed_value = static_cast>(value); + return static_cast(signed_value >> shift_amount); +} + +template +T ArithmeticShiftRightDouble(T top, T bottom, int shift_amount) { + return ArithmeticShiftLeft(top, int(Common::BitSize()) - shift_amount) | LogicalShiftRight(bottom, shift_amount); +} + +template +T Negate(T value) { + return static_cast(-static_cast>(value)); +} + +} // namespace Dynarmic::Safe From 66381352f376df1fab2337cf42e95b7b378ee824 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 14 Jul 2018 09:41:36 +0100 Subject: [PATCH 06/28] fp: Add FPInfo Provides information about floating-point format for various bit sizes --- src/CMakeLists.txt | 1 + src/common/fp/info.h | 58 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 src/common/fp/info.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5807525a..f3c59dfa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,6 +17,7 @@ add_library(dynarmic common/crc32.cpp common/crc32.h common/fp_util.h + common/fp/info.h common/fp/rounding_mode.h common/intrusive_list.h common/iterator_util.h diff --git a/src/common/fp/info.h b/src/common/fp/info.h new file mode 100644 index 00000000..1c4032e0 --- /dev/null +++ b/src/common/fp/info.h @@ -0,0 +1,58 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include "common/common_types.h" + +namespace Dynarmic::FP { + +template +struct FPInfo {}; + +template<> +struct FPInfo { + static constexpr size_t total_width = 32; + static constexpr size_t exponent_width = 8; + static constexpr size_t explicit_mantissa_width = 23; + static constexpr size_t mantissa_width = explicit_mantissa_width + 1; + + static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width; + static constexpr u32 sign_mask = 0x80000000; + static constexpr u32 exponent_mask = 0x7F800000; + static constexpr u32 mantissa_mask = 0x007FFFFF; + + static constexpr int exponent_min = -126; + static constexpr int exponent_max = 127; + static constexpr int exponent_bias = 127; + + static constexpr u32 Zero(bool sign) { return sign ? sign_mask : 0; } + static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); } + static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); } +}; + +template<> +struct FPInfo { + static constexpr size_t total_width = 64; + static constexpr size_t exponent_width = 11; + static constexpr size_t explicit_mantissa_width = 52; + static constexpr size_t mantissa_width = explicit_mantissa_width + 1; + + static constexpr u64 implicit_leading_bit = u64(1) << explicit_mantissa_width; + static constexpr u64 sign_mask = 0x8000'0000'0000'0000; + static constexpr u64 exponent_mask = 0x7FF0'0000'0000'0000; + static constexpr u64 mantissa_mask = 0x000F'FFFF'FFFF'FFFF; + + static constexpr int exponent_min = -1022; + static constexpr int exponent_max = 1023; + static constexpr int exponent_bias = 1023; + + static constexpr u64 Zero(bool sign) { return sign ? sign_mask : 0; } + static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); } + static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); } +}; + +} // namespace Dynarmic::FP From c41a38b13e74f1ab9d361231c56b3c73d4b4d329 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 26 Jun 2018 20:51:25 +0100 Subject: [PATCH 07/28] fp: Add FPSR --- src/CMakeLists.txt | 1 + src/common/fp/fpsr.h | 162 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 src/common/fp/fpsr.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f3c59dfa..30028224 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,6 +17,7 @@ add_library(dynarmic common/crc32.cpp common/crc32.h common/fp_util.h + common/fp/fpsr.h common/fp/info.h common/fp/rounding_mode.h common/intrusive_list.h diff --git a/src/common/fp/fpsr.h b/src/common/fp/fpsr.h new file mode 100644 index 00000000..3b8ae7e0 --- /dev/null +++ b/src/common/fp/fpsr.h @@ -0,0 +1,162 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include + +#include "common/bit_util.h" +#include "common/common_types.h" + +namespace Dynarmic::FP { + +/** + * Representation of the Floating-Point Status Register. + */ +class FPSR final { +public: + FPSR() = default; + FPSR(const FPSR&) = default; + FPSR(FPSR&&) = default; + explicit FPSR(u32 data) : value{data & mask} {} + + FPSR& operator=(const FPSR&) = default; + FPSR& operator=(FPSR&&) = default; + FPSR& operator=(u32 data) { + value = data & mask; + return *this; + } + + /// Get negative condition flag + bool N() const { + return Common::Bit<31>(value); + } + + /// Set negative condition flag + void N(bool N_) { + value = Common::ModifyBit<31>(value, N_); + } + + /// Get zero condition flag + bool Z() const { + return Common::Bit<30>(value); + } + + /// Set zero condition flag + void Z(bool Z_) { + value = Common::ModifyBit<30>(value, Z_); + } + + /// Get carry condition flag + bool C() const { + return Common::Bit<29>(value); + } + + /// Set carry condition flag + void C(bool C_) { + value = Common::ModifyBit<29>(value, C_); + } + + /// Get overflow condition flag + bool V() const { + return Common::Bit<28>(value); + } + + /// Set overflow condition flag + void V(bool V_) { + value = Common::ModifyBit<28>(value, V_); + } + + /// Get cumulative saturation bit + bool QC() const { + return Common::Bit<27>(value); + } + + /// Set cumulative saturation bit + void QC(bool QC_) { + value = Common::ModifyBit<27>(value, QC_); + } + + /// Get input denormal floating-point exception bit + bool IDC() const { + return Common::Bit<7>(value); + } + + /// Set input denormal floating-point exception bit + void IDC(bool IDC_) { + value = Common::ModifyBit<7>(value, IDC_); + } + + /// Get inexact cumulative floating-point exception bit + bool IXC() const { + return Common::Bit<4>(value); + } + + /// Set inexact cumulative floating-point exception bit + void IXC(bool IXC_) { + value = Common::ModifyBit<4>(value, IXC_); + } + + /// Get underflow cumulative floating-point exception bit + bool UFC() const { + return Common::Bit<3>(value); + } + + /// Set underflow cumulative floating-point exception bit + void UFC(bool UFC_) { + value = Common::ModifyBit<3>(value, UFC_); + } + + /// Get overflow cumulative floating-point exception bit + bool OFC() const { + return Common::Bit<2>(value); + } + + /// Set overflow cumulative floating-point exception bit + void OFC(bool OFC_) { + value = Common::ModifyBit<2>(value, OFC_); + } + + /// Get divide by zero cumulative floating-point exception bit + bool DZC() const { + return Common::Bit<1>(value); + } + + /// Set divide by zero cumulative floating-point exception bit + void DZC(bool DZC_) { + value = Common::ModifyBit<1>(value, DZC_); + } + + /// Get invalid operation cumulative floating-point exception bit + bool IOC() const { + return Common::Bit<0>(value); + } + + /// Set invalid operation cumulative floating-point exception bit + void IOC(bool IOC_) { + value = Common::ModifyBit<0>(value, IOC_); + } + + /// Gets the underlying raw value within the FPSR. + u32 Value() const { + return value; + } + +private: + // Bits 5-6 and 8-26 are reserved. + static constexpr u32 mask = 0xF800009F; + u32 value = 0; +}; + +inline bool operator==(FPSR lhs, FPSR rhs) { + return lhs.Value() == rhs.Value(); +} + +inline bool operator!=(FPSR lhs, FPSR rhs) { + return !operator==(lhs, rhs); +} + +} // namespace Dynarmic::FP From 3cb98e15603ed7994390c6408831e23d46a59771 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 26 Jun 2018 20:54:42 +0100 Subject: [PATCH 08/28] fp: Move fp_util to fp/util --- src/CMakeLists.txt | 2 +- src/backend_x64/emit_x64_floating_point.cpp | 10 +++++----- src/backend_x64/emit_x64_vector_floating_point.cpp | 6 +++--- src/common/{fp_util.h => fp/util.h} | 6 ++---- 4 files changed, 11 insertions(+), 13 deletions(-) rename src/common/{fp_util.h => fp/util.h} (97%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 30028224..0a722b13 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,10 +16,10 @@ add_library(dynarmic common/common_types.h common/crc32.cpp common/crc32.h - common/fp_util.h common/fp/fpsr.h common/fp/info.h common/fp/rounding_mode.h + common/fp/util.h common/intrusive_list.h common/iterator_util.h common/llvm_disassemble.cpp diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 12ac6fda..253c235d 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -11,7 +11,7 @@ #include "backend_x64/emit_x64.h" #include "common/assert.h" #include "common/common_types.h" -#include "common/fp_util.h" +#include "common/fp/util.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/microinstruction.h" #include "frontend/ir/opcodes.h" @@ -120,7 +120,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya code.movd(code.ABI_PARAM1.cvt32(), a); code.movd(code.ABI_PARAM2.cvt32(), b); code.CallFunction(static_cast([](u32 a, u32 b) -> u32 { - return *Common::ProcessNaNs(a, b); + return *FP::ProcessNaNs(a, b); })); code.movd(a, code.ABI_RETURN.cvt32()); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); @@ -149,7 +149,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya code.movd(code.ABI_PARAM2.cvt32(), b); code.movd(code.ABI_PARAM3.cvt32(), c); code.CallFunction(static_cast([](u32 a, u32 b, u32 c) -> u32 { - return *Common::ProcessNaNs(a, b, c); + return *FP::ProcessNaNs(a, b, c); })); code.movd(a, code.ABI_RETURN.cvt32()); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); @@ -187,7 +187,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya code.movq(code.ABI_PARAM1, a); code.movq(code.ABI_PARAM2, b); code.CallFunction(static_cast([](u64 a, u64 b) -> u64 { - return *Common::ProcessNaNs(a, b); + return *FP::ProcessNaNs(a, b); })); code.movq(a, code.ABI_RETURN); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); @@ -213,7 +213,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya code.movq(code.ABI_PARAM2, b); code.movq(code.ABI_PARAM3, c); code.CallFunction(static_cast([](u64 a, u64 b, u64 c) -> u64 { - return *Common::ProcessNaNs(a, b, c); + return *FP::ProcessNaNs(a, b, c); })); code.movq(a, code.ABI_RETURN); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index df1c7634..052b53f1 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -10,7 +10,7 @@ #include "backend_x64/block_of_code.h" #include "backend_x64/emit_x64.h" #include "common/bit_util.h" -#include "common/fp_util.h" +#include "common/fp/util.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/microinstruction.h" @@ -69,9 +69,9 @@ static void HandleNaNs(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& xm code.CallFunction(static_cast( [](RegArray& result, const RegArray& a, const RegArray& b) { for (size_t i = 0; i < result.size(); ++i) { - if (auto r = Common::ProcessNaNs(a[i], b[i])) { + if (auto r = FP::ProcessNaNs(a[i], b[i])) { result[i] = *r; - } else if (Common::IsNaN(result[i])) { + } else if (FP::IsNaN(result[i])) { result[i] = NaNWrapper::value; } } diff --git a/src/common/fp_util.h b/src/common/fp/util.h similarity index 97% rename from src/common/fp_util.h rename to src/common/fp/util.h index 9469a223..8241dbf3 100644 --- a/src/common/fp_util.h +++ b/src/common/fp/util.h @@ -8,8 +8,7 @@ #include -namespace Dynarmic { -namespace Common { +namespace Dynarmic::FP { /// Is 32-bit floating point value a QNaN? constexpr bool IsQNaN(u32 value) { @@ -110,5 +109,4 @@ inline boost::optional ProcessNaNs(u64 a, u64 b, u64 c) { return boost::none; } -} // namespace Common -} // namespace Dynarmic +} // namespace Dynarmic::FP From 487565891724b9687ba153573d294621f981fd89 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 27 Jun 2018 13:51:39 +0100 Subject: [PATCH 09/28] fp: Implement FPProcessException --- src/CMakeLists.txt | 2 + src/common/fp/process_exception.cpp | 58 +++++++++++++++++++++++++++++ src/common/fp/process_exception.h | 27 ++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 src/common/fp/process_exception.cpp create mode 100644 src/common/fp/process_exception.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0a722b13..0f311f91 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,8 @@ add_library(dynarmic common/crc32.h common/fp/fpsr.h common/fp/info.h + common/fp/process_exception.cpp + common/fp/process_exception.h common/fp/rounding_mode.h common/fp/util.h common/intrusive_list.h diff --git a/src/common/fp/process_exception.cpp b/src/common/fp/process_exception.cpp new file mode 100644 index 00000000..9bb5a8a6 --- /dev/null +++ b/src/common/fp/process_exception.cpp @@ -0,0 +1,58 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "common/assert.h" +#include "common/fp/fpsr.h" +#include "common/fp/process_exception.h" +#include "frontend/A64/FPCR.h" + +namespace Dynarmic::FP { + +void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr) { + switch (exception) { + case FPExc::InvalidOp: + if (fpcr.IOE()) { + UNIMPLEMENTED(); + } + fpsr.IOC(true); + break; + case FPExc::DivideByZero: + if (fpcr.DZE()) { + UNIMPLEMENTED(); + } + fpsr.DZC(true); + break; + case FPExc::Overflow: + if (fpcr.OFE()) { + UNIMPLEMENTED(); + } + fpsr.OFC(true); + break; + case FPExc::Underflow: + if (fpcr.UFE()) { + UNIMPLEMENTED(); + } + fpsr.UFC(true); + break; + case FPExc::Inexact: + if (fpcr.IXE()) { + UNIMPLEMENTED(); + } + fpsr.IXC(true); + break; + case FPExc::InputDenorm: + if (fpcr.IDE()) { + UNIMPLEMENTED(); + } + fpsr.IDC(true); + break; + default: + UNREACHABLE(); + break; + } +} + +} // namespace Dynarmic::FP diff --git a/src/common/fp/process_exception.h b/src/common/fp/process_exception.h new file mode 100644 index 00000000..637f1d77 --- /dev/null +++ b/src/common/fp/process_exception.h @@ -0,0 +1,27 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include "common/fp/fpsr.h" +#include "frontend/A64/FPCR.h" + +namespace Dynarmic::FP { + +using FPCR = A64::FPCR; + +enum class FPExc { + InvalidOp, + DivideByZero, + Overflow, + Underflow, + Inexact, + InputDenorm, +}; + +void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP From 4ab029c11430bf4fb4bf4d5924556710606767f1 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 27 Jun 2018 13:52:20 +0100 Subject: [PATCH 10/28] fp: Implement FPUnpack --- src/CMakeLists.txt | 2 ++ src/common/fp/unpacked.cpp | 54 +++++++++++++++++++++++++++++++++++++ src/common/fp/unpacked.h | 43 +++++++++++++++++++++++++++++ tests/CMakeLists.txt | 1 + tests/fp/unpacked_tests.cpp | 35 ++++++++++++++++++++++++ 5 files changed, 135 insertions(+) create mode 100644 src/common/fp/unpacked.cpp create mode 100644 src/common/fp/unpacked.h create mode 100644 tests/fp/unpacked_tests.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f311f91..c282e8d8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,6 +21,8 @@ add_library(dynarmic common/fp/process_exception.cpp common/fp/process_exception.h common/fp/rounding_mode.h + common/fp/unpacked.cpp + common/fp/unpacked.h common/fp/util.h common/intrusive_list.h common/iterator_util.h diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp new file mode 100644 index 00000000..1432bf36 --- /dev/null +++ b/src/common/fp/unpacked.cpp @@ -0,0 +1,54 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "common/fp/info.h" +#include "common/fp/process_exception.h" +#include "common/fp/unpacked.h" + +namespace Dynarmic::FP { + +template +std::tuple> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { + constexpr size_t sign_bit = FPInfo::exponent_width + FPInfo::explicit_mantissa_width; + constexpr size_t exponent_high_bit = FPInfo::exponent_width + FPInfo::explicit_mantissa_width - 1; + constexpr size_t exponent_low_bit = FPInfo::explicit_mantissa_width; + constexpr size_t mantissa_high_bit = FPInfo::explicit_mantissa_width - 1; + constexpr size_t mantissa_low_bit = 0; + constexpr int denormal_exponent = FPInfo::exponent_min - int(FPInfo::explicit_mantissa_width); + + const bool sign = Common::Bit(op); + const FPT exp_raw = Common::Bits(op); + const FPT frac_raw = Common::Bits(op); + + if (exp_raw == 0) { + if (frac_raw == 0 || fpcr.FZ()) { + if (frac_raw != 0) { + FPProcessException(FPExc::InputDenorm, fpcr, fpsr); + } + return {FPType::Zero, sign, {sign, 0, 0}}; + } + + return {FPType::Nonzero, sign, {sign, denormal_exponent, frac_raw}}; + } + + if (exp_raw == Common::Ones(FPInfo::exponent_width)) { + if (frac_raw == 0) { + return {FPType::Infinity, sign, {sign, 1000000, 1}}; + } + + const bool is_quiet = Common::Bit(frac_raw); + return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}}; + } + + const int exp = static_cast(exp_raw) - FPInfo::exponent_bias - FPInfo::explicit_mantissa_width; + const u64 frac = frac_raw | FPInfo::implicit_leading_bit; + return {FPType::Nonzero, sign, {sign, exp, frac}}; +} + +template std::tuple> FPUnpack(u32 op, FPCR fpcr, FPSR& fpsr); +template std::tuple> FPUnpack(u64 op, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/unpacked.h b/src/common/fp/unpacked.h new file mode 100644 index 00000000..f815aece --- /dev/null +++ b/src/common/fp/unpacked.h @@ -0,0 +1,43 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include + +#include "common/common_types.h" +#include "common/fp/fpsr.h" +#include "frontend/A64/FPCR.h" + +namespace Dynarmic::FP { + +using FPCR = A64::FPCR; + +enum class FPType { + Nonzero, + Zero, + Infinity, + QNaN, + SNaN, +}; + +/// value = (sign ? -1 : +1) * mantissa * 2^exponent +template +struct FPUnpacked { + bool sign; + int exponent; + MantissaT mantissa; +}; + +template +inline bool operator==(const FPUnpacked& a, const FPUnpacked& b) { + return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa); +} + +template +std::tuple> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7a915fab..2d1902ee 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -29,6 +29,7 @@ add_executable(dynarmic_tests A64/inst_gen.cpp A64/inst_gen.h A64/testenv.h + fp/unpacked_tests.cpp main.cpp rand_int.h ) diff --git a/tests/fp/unpacked_tests.cpp b/tests/fp/unpacked_tests.cpp new file mode 100644 index 00000000..4c5a28b9 --- /dev/null +++ b/tests/fp/unpacked_tests.cpp @@ -0,0 +1,35 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include "common/fp/unpacked.h" + +using namespace Dynarmic; +using namespace Dynarmic::FP; + +TEST_CASE("FPUnpack Tests", "[fp]") { + const static std::vector>, u32>> test_cases { + {0x00000000, {FPType::Zero, false, {false, 0, 0}}, 0}, + {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0}, + {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0}, + {0x7F800001, {FPType::SNaN, false, {false, 0, 0}}, 0}, + {0xFF800001, {FPType::SNaN, true, {true, 0, 0}}, 0}, + {0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0}, + {0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0}, + {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149. + }; + + const FPCR fpcr; + for (const auto& [input, expected_output, expected_fpsr] : test_cases) { + FPSR fpsr; + const auto output = FPUnpack(input, fpcr, fpsr); + + INFO("Input: " << std::hex << input); + REQUIRE(output == expected_output); + REQUIRE(fpsr.Value() == expected_fpsr); + } +} From 7360a2579b3ac3c19debd2ec4115e1805b0e9547 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 15 Jul 2018 14:25:31 +0100 Subject: [PATCH 11/28] mp: Implement metaprogramming library --- src/CMakeLists.txt | 11 ++++++ src/common/mp/append.h | 27 +++++++++++++++ src/common/mp/bind.h | 18 ++++++++++ src/common/mp/cartesian_product.h | 51 +++++++++++++++++++++++++++ src/common/mp/concat.h | 57 +++++++++++++++++++++++++++++++ src/common/mp/fapply.h | 27 +++++++++++++++ src/common/mp/fmap.h | 27 +++++++++++++++ src/common/mp/integer.h | 51 +++++++++++++++++++++++++++ src/common/mp/list.h | 15 ++++++++ src/common/mp/lut.h | 23 +++++++++++++ src/common/mp/to_tuple.h | 29 ++++++++++++++++ src/common/mp/vlift.h | 17 +++++++++ src/common/mp/vllift.h | 31 +++++++++++++++++ tests/CMakeLists.txt | 1 + tests/mp.cpp | 27 +++++++++++++++ 15 files changed, 412 insertions(+) create mode 100644 src/common/mp/append.h create mode 100644 src/common/mp/bind.h create mode 100644 src/common/mp/cartesian_product.h create mode 100644 src/common/mp/concat.h create mode 100644 src/common/mp/fapply.h create mode 100644 src/common/mp/fmap.h create mode 100644 src/common/mp/integer.h create mode 100644 src/common/mp/list.h create mode 100644 src/common/mp/lut.h create mode 100644 src/common/mp/to_tuple.h create mode 100644 src/common/mp/vlift.h create mode 100644 src/common/mp/vllift.h create mode 100644 tests/mp.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c282e8d8..38ad8601 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,6 +33,17 @@ add_library(dynarmic common/memory_pool.cpp common/memory_pool.h common/mp.h + common/mp/append.h + common/mp/bind.h + common/mp/cartesian_product.h + common/mp/concat.h + common/mp/fapply.h + common/mp/fmap.h + common/mp/list.h + common/mp/lut.h + common/mp/to_tuple.h + common/mp/vlift.h + common/mp/vllift.h common/safe_ops.h common/scope_exit.h common/sm4.cpp diff --git a/src/common/mp/append.h b/src/common/mp/append.h new file mode 100644 index 00000000..e6ad75ed --- /dev/null +++ b/src/common/mp/append.h @@ -0,0 +1,27 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::Common::mp { + +namespace detail { + +template +struct append_impl; + +template class LT, class... T1, class... T2> +struct append_impl, T2...> { + using type = LT; +}; + +} // namespace detail + +/// Append items T to list L +template +using append = typename detail::append_impl::type; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/bind.h b/src/common/mp/bind.h new file mode 100644 index 00000000..3666ea5b --- /dev/null +++ b/src/common/mp/bind.h @@ -0,0 +1,18 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::Common::mp { + +/// Binds the first sizeof...(A) arguments of metafunction F with arguments A +template class F, class... A> +struct bind { + template + using type = F; +}; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/cartesian_product.h b/src/common/mp/cartesian_product.h new file mode 100644 index 00000000..919c7eef --- /dev/null +++ b/src/common/mp/cartesian_product.h @@ -0,0 +1,51 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include "common/mp/append.h" +#include "common/mp/bind.h" +#include "common/mp/concat.h" +#include "common/mp/fmap.h" +#include "common/mp/list.h" + +namespace Dynarmic::Common::mp { + +namespace detail { + +template +struct cartesian_product_impl{}; + +template +struct cartesian_product_impl { + using type = RL; +}; + +template class LT, class... RT, class... T1> +struct cartesian_product_impl, LT> { + using type = concat< + fmap::template type, list>... + >; +}; + +template +struct cartesian_product_impl { + using type = typename cartesian_product_impl< + typename cartesian_product_impl::type, + L2, + Ls... + >::type; +}; + +} // namespace detail + +/// Produces the cartesian product of a set of lists +/// For example: +/// cartesian_product, list> == list, list, list, list +template +using cartesian_product = typename detail::cartesian_product_impl, Ls...>::type; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/concat.h b/src/common/mp/concat.h new file mode 100644 index 00000000..e41dd9e6 --- /dev/null +++ b/src/common/mp/concat.h @@ -0,0 +1,57 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include "common/mp/list.h" + +namespace Dynarmic::Common::mp { + +namespace detail { + +template +struct concat_impl; + +template<> +struct concat_impl<> { + using type = list<>; +}; + +template +struct concat_impl { + using type = L; +}; + +template class LT, class... T1, class... T2, class... Ls> +struct concat_impl, LT, Ls...> { + using type = typename concat_impl, Ls...>::type; +}; + +template class LT, + class... T1, class... T2, class... T3, class... T4, class... T5, class... T6, class... T7, class... T8, + class... T9, class... T10, class... T11, class... T12, class... T13, class... T14, class... T15, class... T16, + class... Ls> +struct concat_impl< + LT, LT, LT, LT, LT, LT, LT, LT, + LT, LT, LT, LT, LT, LT, LT, LT, + Ls...> +{ + using type = typename concat_impl< + LT< + T1..., T2..., T3..., T4..., T5..., T6..., T7..., T8..., + T9..., T10..., T11..., T12..., T13..., T14..., T15..., T16... + >, + Ls... + >::type; +}; + +} // namespace detail + +/// Concatenate lists together +template +using concat = typename detail::concat_impl::type; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/fapply.h b/src/common/mp/fapply.h new file mode 100644 index 00000000..1b84efc4 --- /dev/null +++ b/src/common/mp/fapply.h @@ -0,0 +1,27 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::Common::mp { + +namespace detail { + +template class F, class L> +struct fapply_impl; + +template class F, template class LT, class... T> +struct fapply_impl> { + using type = F; +}; + +} // namespace detail + +/// Invokes metafunction F where the arguments are all the members of list L +template class F, class L> +using fapply = typename detail::fapply_impl::type; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/fmap.h b/src/common/mp/fmap.h new file mode 100644 index 00000000..d05766a6 --- /dev/null +++ b/src/common/mp/fmap.h @@ -0,0 +1,27 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::Common::mp { + +namespace detail { + +template class F, class L> +struct fmap_impl; + +template class F, template class LT, class... T> +struct fmap_impl> { + using type = LT...>; +}; + +} // namespace detail + +/// Metafunction that applies each element of list L to metafunction F +template class F, class L> +using fmap = typename detail::fmap_impl::type; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/integer.h b/src/common/mp/integer.h new file mode 100644 index 00000000..ee9f6201 --- /dev/null +++ b/src/common/mp/integer.h @@ -0,0 +1,51 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include + +namespace Dynarmic::Common::mp { + +namespace detail { + +template +struct integer_of_size_impl{}; + +template<> +struct integer_of_size_impl<8> { + using unsigned_type = std::uint8_t; + using signed_type = std::int8_t; +}; + +template<> +struct integer_of_size_impl<16> { + using unsigned_type = std::uint16_t; + using signed_type = std::int16_t; +}; + +template<> +struct integer_of_size_impl<32> { + using unsigned_type = std::uint32_t; + using signed_type = std::int32_t; +}; + +template<> +struct integer_of_size_impl<64> { + using unsigned_type = std::uint64_t; + using signed_type = std::int64_t; +}; + +} // namespace detail + +template +using unsigned_integer_of_size = typename detail::integer_of_size_impl::unsigned_type; + +template +using signed_integer_of_size = typename detail::integer_of_size_impl::signed_type; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/list.h b/src/common/mp/list.h new file mode 100644 index 00000000..96c00697 --- /dev/null +++ b/src/common/mp/list.h @@ -0,0 +1,15 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::Common::mp { + +/// Contains a list of types +template +struct list {}; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/lut.h b/src/common/mp/lut.h new file mode 100644 index 00000000..5d644197 --- /dev/null +++ b/src/common/mp/lut.h @@ -0,0 +1,23 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include +#include + +#include "common/mp/list.h" + +namespace Dynarmic::Common::mp { + +template +inline auto GenerateLookupTableFromList(Function f, list) { + static const std::array, sizeof...(Values)> pair_array{f(Values{})...}; + return std::map(pair_array.begin(), pair_array.end()); +} + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/to_tuple.h b/src/common/mp/to_tuple.h new file mode 100644 index 00000000..1e782a1d --- /dev/null +++ b/src/common/mp/to_tuple.h @@ -0,0 +1,29 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include + +namespace Dynarmic::Common::mp { + +namespace detail { + +template +struct to_tuple_impl; + +template class LT, class... T> +struct to_tuple_impl> { + static constexpr auto value = std::make_tuple(static_cast(T::value)...); +}; + +} // namespace detail + +/// Metafunction that converts a list of metavalues to a tuple value. +template +constexpr auto to_tuple = detail::to_tuple_impl::value; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/vlift.h b/src/common/mp/vlift.h new file mode 100644 index 00000000..c46874d3 --- /dev/null +++ b/src/common/mp/vlift.h @@ -0,0 +1,17 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include + +namespace Dynarmic::Common::mp { + +/// Lifts a value into a type +template +using vlift = std::integral_constant; + +} // namespace Dynarmic::Common::mp diff --git a/src/common/mp/vllift.h b/src/common/mp/vllift.h new file mode 100644 index 00000000..25eb323d --- /dev/null +++ b/src/common/mp/vllift.h @@ -0,0 +1,31 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include + +#include "common/mp/list.h" + +namespace Dynarmic::Common::mp { + +namespace detail { + +template +struct vllift_impl{}; + +template +struct vllift_impl> { + using type = list...>; +}; + +} // namespace detail + +/// Lifts values in value list VL to create a type list. +template +using vllift = typename detail::vllift_impl::type; + +} // namespace Dynarmic::Common::mp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2d1902ee..576bf9e6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -31,6 +31,7 @@ add_executable(dynarmic_tests A64/testenv.h fp/unpacked_tests.cpp main.cpp + mp.cpp rand_int.h ) diff --git a/tests/mp.cpp b/tests/mp.cpp new file mode 100644 index 00000000..7f34a525 --- /dev/null +++ b/tests/mp.cpp @@ -0,0 +1,27 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include "common/mp/cartesian_product.h" + +using namespace Dynarmic::Common::mp; + +static_assert( + std::is_same_v< + cartesian_product, list, list>, + list< + list, + list, + list, + list, + list, + list, + list, + list + > + > +); From 55d590c01fc50657c2e6e8b4cdf9bf5cdaf879bd Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 28 Jun 2018 21:45:48 +0100 Subject: [PATCH 12/28] FPCR: Add AHP setter and FZ16 getter --- src/frontend/A64/FPCR.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/frontend/A64/FPCR.h b/src/frontend/A64/FPCR.h index 9e82733e..1ebf1a56 100644 --- a/src/frontend/A64/FPCR.h +++ b/src/frontend/A64/FPCR.h @@ -37,6 +37,11 @@ public: return Common::Bit<26>(value); } + /// Alternate half-precision control flag. + void AHP(bool AHP_) { + value = Common::ModifyBit<26>(value, AHP_); + } + /// Default NaN mode control bit. bool DN() const { return Common::Bit<25>(value); @@ -52,6 +57,10 @@ public: return static_cast(Common::Bits<22, 23>(value)); } + bool FZ16() const { + return Common::Bit<19>(value); + } + /// Input denormal exception trap enable flag. bool IDE() const { return Common::Bit<15>(value); From 8668d6188163c64b58d33ce2709bb6f0d6dace9e Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 28 Jun 2018 21:46:02 +0100 Subject: [PATCH 13/28] fp/unpacked: Implement FPRound --- src/common/fp/unpacked.cpp | 125 +++++++++++++++++++++++++++++++++++++ src/common/fp/unpacked.h | 14 +++++ 2 files changed, 139 insertions(+) diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp index 1432bf36..6f145723 100644 --- a/src/common/fp/unpacked.cpp +++ b/src/common/fp/unpacked.cpp @@ -7,6 +7,7 @@ #include "common/fp/info.h" #include "common/fp/process_exception.h" #include "common/fp/unpacked.h" +#include "common/safe_ops.h" namespace Dynarmic::FP { @@ -51,4 +52,128 @@ std::tuple> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr template std::tuple> FPUnpack(u32 op, FPCR fpcr, FPSR& fpsr); template std::tuple> FPUnpack(u64 op, FPCR fpcr, FPSR& fpsr); +template +std::tuple Normalize(FPUnpacked op) { + const int highest_set_bit = Common::HighestSetBit(op.mantissa); + const int shift_amount = highest_set_bit - static_cast(F); + const MantissaT mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount); + const MantissaT error = Safe::LogicalShiftRightDouble(op.mantissa, static_cast(0), shift_amount); + const int exponent = op.exponent + highest_set_bit; + return std::make_tuple(op.sign, exponent, mantissa, error); +} + +template +FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { + ASSERT(op.mantissa != 0); + ASSERT(rounding != RoundingMode::ToNearest_TieAwayFromZero); + + constexpr int minimum_exp = FPInfo::exponent_min; + constexpr size_t E = FPInfo::exponent_width; + constexpr size_t F = FPInfo::explicit_mantissa_width; + constexpr bool isFP16 = FPInfo::total_width == 16; + + auto [sign, exponent, mantissa, error] = Normalize(op); + + if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) { + fpsr.UFC(true); + return FPInfo::Zero(sign); + } + + int biased_exp = std::max(exponent - minimum_exp + 1, 0); + if (biased_exp == 0) { + error = Safe::LogicalShiftRightDouble(mantissa, error, minimum_exp - exponent); + mantissa = Safe::LogicalShiftRight(mantissa, minimum_exp - exponent); + } + + if (biased_exp == 0 && (error != 0 || fpcr.UFE())) { + FPProcessException(FPExc::Underflow, fpcr, fpsr); + } + + bool round_up = false, overflow_to_inf = false; + switch (rounding) { + case RoundingMode::ToNearest_TieEven: { + constexpr MantissaT half = static_cast(1) << (Common::BitSize() - 1); + round_up = (error > half) || (error == half && Common::Bit<0>(mantissa)); + overflow_to_inf = true; + break; + } + case RoundingMode::TowardsPlusInfinity: + round_up = error != 0 && !sign; + overflow_to_inf = !sign; + break; + case RoundingMode::TowardsMinusInfinity: + round_up = error != 0 && sign; + overflow_to_inf = sign; + break; + default: + break; + } + + if (round_up) { + if ((mantissa & FPInfo::mantissa_mask) == FPInfo::mantissa_mask) { + // Overflow on rounding up is going to happen + if (mantissa == FPInfo::mantissa_mask) { + // Rounding up from denormal to normal + mantissa++; + biased_exp++; + } else { + // Rounding up to next exponent + mantissa = (mantissa + 1) / 2; + biased_exp++; + } + } else { + mantissa++; + } + } + + if (error != 0 && rounding == RoundingMode::ToOdd) { + mantissa = Common::ModifyBit<0>(mantissa, true); + } + + FPT result = 0; +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4127) // C4127: conditional expression is constant +#endif + if (!isFP16 || !fpcr.AHP()) { +#ifdef _MSC_VER +#pragma warning(pop) +#endif + constexpr int max_biased_exp = (1 << E) - 1; + if (biased_exp >= max_biased_exp) { + result = overflow_to_inf ? FPInfo::Infinity(sign) : FPInfo::MaxNormal(sign); + FPProcessException(FPExc::Overflow, fpcr, fpsr); + FPProcessException(FPExc::Inexact, fpcr, fpsr); + } else { + result = sign ? 1 : 0; + result <<= E; + result += biased_exp; + result <<= F; + result |= static_cast(mantissa) & FPInfo::mantissa_mask; + if (error != 0) { + FPProcessException(FPExc::Inexact, fpcr, fpsr); + } + } + } else { + constexpr int max_biased_exp = (1 << E); + if (biased_exp >= max_biased_exp) { + result = sign ? 0xFFFF : 0x7FFF; + FPProcessException(FPExc::InvalidOp, fpcr, fpsr); + } else { + result = sign ? 1 : 0; + result <<= E; + result += biased_exp; + result <<= F; + result |= static_cast(mantissa) & FPInfo::mantissa_mask; + if (error != 0) { + FPProcessException(FPExc::Inexact, fpcr, fpsr); + } + } + } + return result; +} + +template u32 FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); +template u64 FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); + } // namespace Dynarmic::FP diff --git a/src/common/fp/unpacked.h b/src/common/fp/unpacked.h index f815aece..af8513e6 100644 --- a/src/common/fp/unpacked.h +++ b/src/common/fp/unpacked.h @@ -40,4 +40,18 @@ inline bool operator==(const FPUnpacked& a, const FPUnpacked std::tuple> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr); +template +FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); + +template +FPT FPRound(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { + fpcr.AHP(false); + return FPRoundBase(op, fpcr, rounding, fpsr); +} + +template +FPT FPRound(FPUnpacked op, FPCR fpcr, FPSR& fpsr) { + return FPRound(op, fpcr, fpcr.RMode(), fpsr); +} + } // namespace Dynarmic::FP From 52ed365158b9c785352579966d466b1cfd26534e Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 28 Jun 2018 21:46:16 +0100 Subject: [PATCH 14/28] tests/fp: Add FPRound tests --- tests/fp/unpacked_tests.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/fp/unpacked_tests.cpp b/tests/fp/unpacked_tests.cpp index 4c5a28b9..f17fc99c 100644 --- a/tests/fp/unpacked_tests.cpp +++ b/tests/fp/unpacked_tests.cpp @@ -7,6 +7,7 @@ #include #include "common/fp/unpacked.h" +#include "rand_int.h" using namespace Dynarmic; using namespace Dynarmic::FP; @@ -21,6 +22,7 @@ TEST_CASE("FPUnpack Tests", "[fp]") { {0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0}, {0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0}, {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149. + {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon }; const FPCR fpcr; @@ -33,3 +35,37 @@ TEST_CASE("FPUnpack Tests", "[fp]") { REQUIRE(fpsr.Value() == expected_fpsr); } } + +TEST_CASE("FPRound Tests", "[fp]") { + const static std::vector>, u32>> test_cases { + {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0x14}, + {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0x14}, + {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149. + {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon + {0x3F800000, {FPType::Nonzero, false, {false, -28, 0xFFFFFFF}}, 0x10}, // rounds to 1.0 + }; + + const FPCR fpcr; + for (const auto& [expected_output, input, expected_fpsr] : test_cases) { + FPSR fpsr; + const auto output = FPRound(std::get<2>(input), fpcr, fpsr); + + INFO("Expected Output: " << std::hex << expected_output); + REQUIRE(output == expected_output); + REQUIRE(fpsr.Value() == expected_fpsr); + } +} + +TEST_CASE("FPUnpack<->FPRound Round-trip Tests", "[fp]") { + const FPCR fpcr; + for (size_t count = 0; count < 100000; count++) { + FPSR fpsr; + const u32 input = RandInt(0, 1) == 0 ? RandInt(0x00000001, 0x7F800000) : RandInt(0x80000001, 0xFF800000); + const auto intermediate = std::get<2>(FPUnpack(input, fpcr, fpsr)); + const u32 output = FPRound(intermediate, fpcr, fpsr); + + INFO("Count: " << count); + INFO("Intermediate Values: " << std::hex << intermediate.sign << ';' << intermediate.exponent << ';' << intermediate.mantissa); + REQUIRE(input == output); + } +} From 8087e8df05fcb90a0050e2ce3c6c97589890fdd5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 29 Jun 2018 18:52:48 +0100 Subject: [PATCH 15/28] mantissa_util: Implement ResidualErrorOnRightShift Accurately calculate residual error that is shifted out --- src/CMakeLists.txt | 1 + src/common/fp/mantissa_util.h | 48 ++++++++++++++++++++++++ tests/CMakeLists.txt | 1 + tests/fp/mantissa_util_tests.cpp | 63 ++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+) create mode 100644 src/common/fp/mantissa_util.h create mode 100644 tests/fp/mantissa_util_tests.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 38ad8601..daf5b7d8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,7 @@ add_library(dynarmic common/crc32.h common/fp/fpsr.h common/fp/info.h + common/fp/mantissa_util.h common/fp/process_exception.cpp common/fp/process_exception.h common/fp/rounding_mode.h diff --git a/src/common/fp/mantissa_util.h b/src/common/fp/mantissa_util.h new file mode 100644 index 00000000..2551c40c --- /dev/null +++ b/src/common/fp/mantissa_util.h @@ -0,0 +1,48 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include "common/bit_util.h" +#include "common/common_types.h" + +namespace Dynarmic::FP { + +enum class ResidualError { + Zero, + LessThanHalf, + Half, + GreaterThanHalf, +}; + +template +ResidualError ResidualErrorOnRightShift(MantissaT mantissa, int shift_amount) { + if (shift_amount <= 0 || mantissa == 0) { + return ResidualError::Zero; + } + + if (shift_amount > static_cast(Common::BitSize())) { + return Common::MostSignificantBit(mantissa) ? ResidualError::GreaterThanHalf : ResidualError::LessThanHalf; + } + + const size_t half_bit_position = static_cast(shift_amount - 1); + const MantissaT half = static_cast(1) << half_bit_position; + const MantissaT error_mask = Common::Ones(static_cast(shift_amount)); + const MantissaT error = mantissa & error_mask; + + if (error == 0) { + return ResidualError::Zero; + } + if (error < half) { + return ResidualError::LessThanHalf; + } + if (error == half) { + return ResidualError::Half; + } + return ResidualError::GreaterThanHalf; +} + +} // namespace Dynarmic::FP diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 576bf9e6..e48e18c1 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -29,6 +29,7 @@ add_executable(dynarmic_tests A64/inst_gen.cpp A64/inst_gen.h A64/testenv.h + fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp main.cpp mp.cpp diff --git a/tests/fp/mantissa_util_tests.cpp b/tests/fp/mantissa_util_tests.cpp new file mode 100644 index 00000000..04703e1d --- /dev/null +++ b/tests/fp/mantissa_util_tests.cpp @@ -0,0 +1,63 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include +#include + +#include + +#include "common/fp/mantissa_util.h" +#include "common/safe_ops.h" +#include "rand_int.h" + +using namespace Dynarmic; +using namespace Dynarmic::FP; + +TEST_CASE("ResidualErrorOnRightShift", "[fp]") { + const std::vector> test_cases { + {0x00000001, 1, ResidualError::Half}, + {0x00000002, 1, ResidualError::Zero}, + {0x00000001, 2, ResidualError::LessThanHalf}, + {0x00000002, 2, ResidualError::Half}, + {0x00000003, 2, ResidualError::GreaterThanHalf}, + {0x00000004, 2, ResidualError::Zero}, + {0x00000005, 2, ResidualError::LessThanHalf}, + {0x00000006, 2, ResidualError::Half}, + {0x00000007, 2, ResidualError::GreaterThanHalf}, + }; + + for (auto [mantissa, shift, expected_result] : test_cases) { + const ResidualError result = ResidualErrorOnRightShift(mantissa, shift); + REQUIRE(result == expected_result); + } +} + +TEST_CASE("ResidualErrorOnRightShift Randomized", "[fp]") { + for (size_t test = 0; test < 100000; test++) { + const u32 mantissa = RandInt(0, 0xFFFFFFFF); + const int shift = RandInt(-60, 60); + + const ResidualError result = ResidualErrorOnRightShift(mantissa, shift); + + const u64 calculated_error = Safe::ArithmeticShiftRightDouble(Common::SignExtend<32, u64>(mantissa), u64(0), shift); + const ResidualError expected_result = [&]{ + constexpr u64 half_error = 0x8000'0000'0000'0000ull; + if (calculated_error == 0) { + return ResidualError::Zero; + } + if (calculated_error < half_error) { + return ResidualError::LessThanHalf; + } + if (calculated_error == half_error) { + return ResidualError::Half; + } + return ResidualError::GreaterThanHalf; + }(); + + INFO(std::hex << "mantissa " << mantissa << " shift " << shift << " calculated_error " << calculated_error); + REQUIRE(result == expected_result); + } +} From 95712695529de970684e3c78d9f898ee02bbcd15 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 29 Jun 2018 19:34:46 +0100 Subject: [PATCH 16/28] fp/op: Implement FPToFixed --- src/CMakeLists.txt | 2 + src/common/fp/op.cpp | 101 +++++++++++++++++++++++++++++++++++++++++ src/common/fp/op.h | 21 +++++++++ tests/CMakeLists.txt | 1 + tests/fp/FPToFixed.cpp | 38 ++++++++++++++++ 5 files changed, 163 insertions(+) create mode 100644 src/common/fp/op.cpp create mode 100644 src/common/fp/op.h create mode 100644 tests/fp/FPToFixed.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index daf5b7d8..32a2095d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -19,6 +19,8 @@ add_library(dynarmic common/fp/fpsr.h common/fp/info.h common/fp/mantissa_util.h + common/fp/op.cpp + common/fp/op.h common/fp/process_exception.cpp common/fp/process_exception.h common/fp/rounding_mode.h diff --git a/src/common/fp/op.cpp b/src/common/fp/op.cpp new file mode 100644 index 00000000..38d9ec91 --- /dev/null +++ b/src/common/fp/op.cpp @@ -0,0 +1,101 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/safe_ops.h" +#include "common/fp/fpsr.h" +#include "common/fp/mantissa_util.h" +#include "common/fp/op.h" +#include "common/fp/process_exception.h" +#include "common/fp/rounding_mode.h" +#include "common/fp/unpacked.h" +#include "frontend/A64/FPCR.h" + +namespace Dynarmic::FP { + +template +u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { + ASSERT(rounding != RoundingMode::ToOdd); + ASSERT(ibits <= 64); + ASSERT(fbits <= ibits); + + auto [type, sign, value] = FPUnpack(op, fpcr, fpsr); + + if (type == FPType::SNaN || type == FPType::QNaN) { + FPProcessException(FPExc::InvalidOp, fpcr, fpsr); + } + + // Handle zero + if (value.mantissa == 0) { + return 0; + } + + if (sign && unsigned_) { + FPProcessException(FPExc::InvalidOp, fpcr, fpsr); + return 0; + } + + // value *= 2.0^fbits + value.exponent += static_cast(fbits); + + u64 int_result = sign ? Safe::Negate(value.mantissa) : static_cast(value.mantissa); + const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent); + int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent); + + bool round_up = false; + switch (rounding) { + case RoundingMode::ToNearest_TieEven: + round_up = error > ResidualError::Half || (error == ResidualError::Half && Common::Bit<0>(int_result)); + break; + case RoundingMode::TowardsPlusInfinity: + round_up = error != ResidualError::Zero; + break; + case RoundingMode::TowardsMinusInfinity: + round_up = false; + break; + case RoundingMode::TowardsZero: + round_up = error != ResidualError::Zero && Common::MostSignificantBit(int_result); + break; + case RoundingMode::ToNearest_TieAwayFromZero: + round_up = error > ResidualError::Half || (error == ResidualError::Half && !Common::MostSignificantBit(int_result)); + break; + case RoundingMode::ToOdd: + UNREACHABLE(); + } + + if (round_up) { + int_result++; + } + + // Detect Overflow + const int min_exponent_for_overflow = static_cast(ibits) - static_cast(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1); + if (value.exponent >= min_exponent_for_overflow) { + // Positive overflow + if (unsigned_ || !sign) { + FPProcessException(FPExc::InvalidOp, fpcr, fpsr); + return Common::Ones(ibits - (unsigned_ ? 0 : 1)); + } + + // Negative overflow + const u64 min_value = Safe::Negate(static_cast(1) << (ibits - 1)); + if (!(value.exponent == min_exponent_for_overflow && int_result == min_value)) { + FPProcessException(FPExc::InvalidOp, fpcr, fpsr); + return static_cast(1) << (ibits - 1); + } + } + + if (error != ResidualError::Zero) { + FPProcessException(FPExc::Inexact, fpcr, fpsr); + } + return int_result & Common::Ones(ibits); +} + +template u64 FPToFixed(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); +template u64 FPToFixed(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/op.h b/src/common/fp/op.h new file mode 100644 index 00000000..435070b0 --- /dev/null +++ b/src/common/fp/op.h @@ -0,0 +1,21 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include "common/common_types.h" +#include "common/fp/fpsr.h" +#include "common/fp/rounding_mode.h" +#include "frontend/A64/FPCR.h" + +namespace Dynarmic::FP { + +using FPCR = A64::FPCR; + +template +u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e48e18c1..7680dc4a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -29,6 +29,7 @@ add_executable(dynarmic_tests A64/inst_gen.cpp A64/inst_gen.h A64/testenv.h + fp/FPToFixed.cpp fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp main.cpp diff --git a/tests/fp/FPToFixed.cpp b/tests/fp/FPToFixed.cpp new file mode 100644 index 00000000..1a507dfb --- /dev/null +++ b/tests/fp/FPToFixed.cpp @@ -0,0 +1,38 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include +#include + +#include + +#include "common/fp/fpsr.h" +#include "common/fp/op.h" +#include "rand_int.h" + +using namespace Dynarmic; +using namespace Dynarmic::FP; + +TEST_CASE("FPToFixed", "[fp]") { + const std::vector> test_cases { + {0x447A0000, 64, 0x000003E8, 0x00}, + {0xC47A0000, 32, 0xFFFFFC18, 0x00}, + {0x4479E000, 64, 0x000003E8, 0x10}, + {0x50800000, 32, 0x7FFFFFFF, 0x01}, + {0xD0800000, 32, 0x80000000, 0x01}, + {0xCF000000, 32, 0x80000000, 0x00}, + {0x80002B94, 64, 0x00000000, 0x10}, + {0x80636D24, 64, 0x00000000, 0x10}, + }; + + const FPCR fpcr; + for (auto [input, ibits, expected_output, expected_fpsr] : test_cases) { + FPSR fpsr; + const u64 output = FPToFixed(ibits, input, 0, false, fpcr, RoundingMode::ToNearest_TieEven, fpsr); + REQUIRE(output == expected_output); + REQUIRE(fpsr.Value() == expected_fpsr); + } +} From 760cc3ca893ce8dc36cb0a6d6ce287fad4007846 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 15 Jul 2018 14:23:50 +0100 Subject: [PATCH 17/28] EmitContext: Expose FPCR --- src/backend_x64/a32_emit_x64.cpp | 4 ++++ src/backend_x64/a32_emit_x64.h | 1 + src/backend_x64/a64_emit_x64.cpp | 4 ++++ src/backend_x64/a64_emit_x64.h | 1 + src/backend_x64/emit_x64.h | 1 + 5 files changed, 11 insertions(+) diff --git a/src/backend_x64/a32_emit_x64.cpp b/src/backend_x64/a32_emit_x64.cpp index bf9fadcf..c027761d 100644 --- a/src/backend_x64/a32_emit_x64.cpp +++ b/src/backend_x64/a32_emit_x64.cpp @@ -62,6 +62,10 @@ FP::RoundingMode A32EmitContext::FPSCR_RMode() const { return Location().FPSCR().RMode(); } +u32 A32EmitContext::FPCR() const { + return Location().FPSCR().Value(); +} + bool A32EmitContext::FPSCR_RoundTowardsZero() const { return Location().FPSCR().RMode() != FP::RoundingMode::TowardsZero; } diff --git a/src/backend_x64/a32_emit_x64.h b/src/backend_x64/a32_emit_x64.h index bec1663d..254d4a9e 100644 --- a/src/backend_x64/a32_emit_x64.h +++ b/src/backend_x64/a32_emit_x64.h @@ -24,6 +24,7 @@ struct A32EmitContext final : public EmitContext { A32EmitContext(RegAlloc& reg_alloc, IR::Block& block); A32::LocationDescriptor Location() const; FP::RoundingMode FPSCR_RMode() const override; + u32 FPCR() const override; bool FPSCR_RoundTowardsZero() const override; bool FPSCR_FTZ() const override; bool FPSCR_DN() const override; diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index 2c30b9f7..70e41b28 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -44,6 +44,10 @@ FP::RoundingMode A64EmitContext::FPSCR_RMode() const { return Location().FPCR().RMode(); } +u32 A64EmitContext::FPCR() const { + return Location().FPCR().Value(); +} + bool A64EmitContext::FPSCR_RoundTowardsZero() const { return Location().FPCR().RMode() != FP::RoundingMode::TowardsZero; } diff --git a/src/backend_x64/a64_emit_x64.h b/src/backend_x64/a64_emit_x64.h index 2ecf0883..e251a963 100644 --- a/src/backend_x64/a64_emit_x64.h +++ b/src/backend_x64/a64_emit_x64.h @@ -24,6 +24,7 @@ struct A64EmitContext final : public EmitContext { A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block); A64::LocationDescriptor Location() const; FP::RoundingMode FPSCR_RMode() const override; + u32 FPCR() const override; bool FPSCR_RoundTowardsZero() const override; bool FPSCR_FTZ() const override; bool FPSCR_DN() const override; diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h index 47f47bd3..64f66967 100644 --- a/src/backend_x64/emit_x64.h +++ b/src/backend_x64/emit_x64.h @@ -35,6 +35,7 @@ struct EmitContext { void EraseInstruction(IR::Inst* inst); virtual FP::RoundingMode FPSCR_RMode() const = 0; + virtual u32 FPCR() const = 0; virtual bool FPSCR_RoundTowardsZero() const = 0; virtual bool FPSCR_FTZ() const = 0; virtual bool FPSCR_DN() const = 0; From caaf36dfd6ee24da83b46995d9753519c100a3e7 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 10:49:47 +0100 Subject: [PATCH 18/28] IR: Initial implementation of FP{Double,Single}ToFixed{S,U}{32,64} This implementation just falls-back to the software floating point implementation. --- src/backend_x64/a32_jitstate.cpp | 12 +- src/backend_x64/a32_jitstate.h | 1 + src/backend_x64/a64_jitstate.cpp | 9 +- src/backend_x64/a64_jitstate.h | 1 + src/backend_x64/emit_x64_floating_point.cpp | 195 +++++++----------- src/backend_x64/jitstate_info.h | 2 + .../A32/translate/translate_arm/vfp2.cpp | 8 +- .../floating_point_conversion_fixed_point.cpp | 16 +- .../floating_point_conversion_integer.cpp | 16 +- src/frontend/ir/ir_emitter.cpp | 44 ++-- src/frontend/ir/ir_emitter.h | 16 +- src/frontend/ir/opcodes.inc | 12 +- 12 files changed, 159 insertions(+), 173 deletions(-) diff --git a/src/backend_x64/a32_jitstate.cpp b/src/backend_x64/a32_jitstate.cpp index 5a9ee868..c274d7fc 100644 --- a/src/backend_x64/a32_jitstate.cpp +++ b/src/backend_x64/a32_jitstate.cpp @@ -163,6 +163,7 @@ u32 A32JitState::Fpscr() const { FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE FPSCR |= FPSCR_IDC; FPSCR |= FPSCR_UFC; + FPSCR |= fpsr_exc; return FPSCR; } @@ -183,13 +184,10 @@ void A32JitState::SetFpscr(u32 FPSCR) { const std::array MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3]; - // Cumulative flags IOC, IXC, UFC, OFC, DZC - guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC - guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC - - // Cumulative flag IDC, UFC - FPSCR_IDC = FPSCR & (1 << 7); - FPSCR_UFC = FPSCR & (1 << 3); + // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC + FPSCR_IDC = 0; + FPSCR_UFC = 0; + fpsr_exc = FPSCR & 0x9F; if (Common::Bit<24>(FPSCR)) { // VFP Flush to Zero diff --git a/src/backend_x64/a32_jitstate.h b/src/backend_x64/a32_jitstate.h index 5294f8e4..3688ac82 100644 --- a/src/backend_x64/a32_jitstate.h +++ b/src/backend_x64/a32_jitstate.h @@ -66,6 +66,7 @@ struct A32JitState { std::array rsb_codeptrs; void ResetRSB(); + u32 fpsr_exc = 0; u32 FPSCR_IDC = 0; u32 FPSCR_UFC = 0; u32 FPSCR_mode = 0; diff --git a/src/backend_x64/a64_jitstate.cpp b/src/backend_x64/a64_jitstate.cpp index efc8d167..eee952f5 100644 --- a/src/backend_x64/a64_jitstate.cpp +++ b/src/backend_x64/a64_jitstate.cpp @@ -105,16 +105,15 @@ u32 A64JitState::GetFpsr() const { fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE fpsr |= FPSCR_IDC; fpsr |= FPSCR_UFC; + fpsr |= fpsr_exc; return fpsr; } void A64JitState::SetFpsr(u32 value) { guest_MXCSR &= ~0x0000003D; - guest_MXCSR |= ( value ) & 0b0000000000001; // IE = IOC - guest_MXCSR |= ( value << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC - - FPSCR_IDC = value & (1 << 7); - FPSCR_UFC = value & (1 << 3); + FPSCR_IDC = 0; + FPSCR_UFC = 0; + fpsr_exc = value & 0x9F; } } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/a64_jitstate.h b/src/backend_x64/a64_jitstate.h index a1c3a2dc..b8fdd5e1 100644 --- a/src/backend_x64/a64_jitstate.h +++ b/src/backend_x64/a64_jitstate.h @@ -71,6 +71,7 @@ struct A64JitState { rsb_codeptrs.fill(0); } + u32 fpsr_exc = 0; u32 FPSCR_IDC = 0; u32 FPSCR_UFC = 0; u32 fpcr = 0; diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 253c235d..7bcfe410 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -5,13 +5,22 @@ */ #include +#include #include "backend_x64/abi.h" #include "backend_x64/block_of_code.h" #include "backend_x64/emit_x64.h" #include "common/assert.h" #include "common/common_types.h" +#include "common/fp/op.h" #include "common/fp/util.h" +#include "common/mp/cartesian_product.h" +#include "common/mp/integer.h" +#include "common/mp/list.h" +#include "common/mp/lut.h" +#include "common/mp/to_tuple.h" +#include "common/mp/vlift.h" +#include "common/mp/vllift.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/microinstruction.h" #include "frontend/ir/opcodes.h" @@ -19,6 +28,7 @@ namespace Dynarmic::BackendX64 { using namespace Xbyak::util; +namespace mp = Dynarmic::Common::mp; constexpr u64 f32_negative_zero = 0x80000000u; constexpr u64 f32_nan = 0x7fc00000u; @@ -29,10 +39,6 @@ constexpr u64 f64_nan = 0x7ff8000000000000u; constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu; constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu; -constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double -constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double -constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double -constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { Xbyak::Label end; @@ -99,12 +105,6 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 code.L(end); } -static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { - code.pxor(xmm_scratch, xmm_scratch); - code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) - code.pand(xmm_value, xmm_scratch); -} - static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) { Xbyak::Label nan; @@ -892,129 +892,82 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { +static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) { + using fsize_list = mp::list, mp::vlift>; + using unsigned_list = mp::list, mp::vlift>; + using isize_list = mp::list, mp::vlift>; + using rounding_list = mp::list< + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant + >; + + using key_type = std::tuple; + using value_type = u64(*)(u64, u8, FP::FPSR&, A64::FPCR); + + static const auto lut = mp::GenerateLookupTableFromList( + [](auto args) { + return std::pair{ + mp::to_tuple, + static_cast( + [](u64 input, u8 fbits, FP::FPSR& fpsr, A64::FPCR fpcr) { + constexpr auto t = mp::to_tuple; + constexpr size_t fsize = std::get<0>(t); + constexpr bool unsigned_ = std::get<1>(t); + constexpr size_t isize = std::get<2>(t); + constexpr FP::RoundingMode rounding_mode = std::get<3>(t); + using InputSize = mp::unsigned_integer_of_size; + + return FP::FPToFixed(isize, static_cast(input), fbits, unsigned_, fpcr, rounding_mode, fpsr); + } + ) + }; + }, + mp::cartesian_product{} + ); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); - Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); - bool round_towards_zero = args[1].GetImmediateU1(); - // ARM saturates on conversion; this differs from x64 which returns a sentinel value. - // Conversion to double is lossless, and allows for clamping. + const auto rounding = static_cast(args[2].GetImmediateU8()); - if (ctx.FPSCR_FTZ()) { - DenormalsAreZero32(code, from, to); - } - code.cvtss2sd(from, from); - // First time is to set flags - if (round_towards_zero) { - code.cvttsd2si(to, from); // 32 bit gpr - } else { - code.cvtsd2si(to, from); // 32 bit gpr - } - // Clamp to output range - ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(xword, f64_max_s32)); - code.maxsd(from, code.MConst(xword, f64_min_s32)); - // Second time is for real - if (round_towards_zero) { - code.cvttsd2si(to, from); // 32 bit gpr - } else { - code.cvtsd2si(to, from); // 32 bit gpr - } - - ctx.reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.HostCall(inst, args[0], args[1]); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR()); + code.CallFunction(lut.at(std::make_tuple(fsize, unsigned_, isize, rounding))); } -void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64(); - Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); - bool round_towards_zero = args[1].GetImmediateU1(); - - // ARM saturates on conversion; this differs from x64 which returns a sentinel value. - // Conversion to double is lossless, and allows for accurate clamping. - // - // Since SSE2 doesn't provide an unsigned conversion, we use a 64-bit signed conversion. - // - // FIXME: None of the FPSR exception bits are correctly signalled with the below code - - if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, from, to); - } - code.cvtss2sd(from, from); - // Clamp to output range - ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(xword, f64_max_u32)); - code.maxsd(from, code.MConst(xword, f64_min_u32)); - if (round_towards_zero) { - code.cvttsd2si(to, from); // 64 bit gpr - } else { - code.cvtsd2si(to, from); // 64 bit gpr - } - - ctx.reg_alloc.DefineValue(inst, to); +void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 64, false, 32); } -void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); - Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); - Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); - bool round_towards_zero = args[1].GetImmediateU1(); - - // ARM saturates on conversion; this differs from x64 which returns a sentinel value. - - if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, from, gpr_scratch.cvt64()); - } - // First time is to set flags - if (round_towards_zero) { - code.cvttsd2si(gpr_scratch, from); // 32 bit gpr - } else { - code.cvtsd2si(gpr_scratch, from); // 32 bit gpr - } - // Clamp to output range - ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(xword, f64_max_s32)); - code.maxsd(from, code.MConst(xword, f64_min_s32)); - // Second time is for real - if (round_towards_zero) { - code.cvttsd2si(to, from); // 32 bit gpr - } else { - code.cvtsd2si(to, from); // 32 bit gpr - } - - ctx.reg_alloc.DefineValue(inst, to); +void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 64, false, 64); } -void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64(); - Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); - bool round_towards_zero = args[1].GetImmediateU1(); +void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 64, true, 32); +} - // ARM saturates on conversion; this differs from x64 which returns a sentinel value. - // TODO: Use VCVTPD2UDQ when AVX512VL is available. - // FIXME: None of the FPSR exception bits are correctly signalled with the below code +void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 64, true, 64); +} - if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, from, to); - } - // Clamp to output range - ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(xword, f64_max_u32)); - code.maxsd(from, code.MConst(xword, f64_min_u32)); - if (round_towards_zero) { - code.cvttsd2si(to, from); // 64 bit gpr - } else { - code.cvtsd2si(to, from); // 64 bit gpr - } +void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 32, false, 32); +} - ctx.reg_alloc.DefineValue(inst, to); +void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 32, false, 64); +} + +void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 32, true, 32); +} + +void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 32, true, 64); } void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/backend_x64/jitstate_info.h b/src/backend_x64/jitstate_info.h index 2c267bb6..3caca413 100644 --- a/src/backend_x64/jitstate_info.h +++ b/src/backend_x64/jitstate_info.h @@ -26,6 +26,7 @@ struct JitStateInfo { , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv)) , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC)) , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC)) + , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) {} const size_t offsetof_cycles_remaining; @@ -39,6 +40,7 @@ struct JitStateInfo { const size_t offsetof_CPSR_nzcv; const size_t offsetof_FPSCR_IDC; const size_t offsetof_FPSCR_UFC; + const size_t offsetof_fpsr_exc; }; } // namespace Dynarmic::BackendX64 diff --git a/src/frontend/A32/translate/translate_arm/vfp2.cpp b/src/frontend/A32/translate/translate_arm/vfp2.cpp index defc7b7a..898a8418 100644 --- a/src/frontend/A32/translate/translate_arm/vfp2.cpp +++ b/src/frontend/A32/translate/translate_arm/vfp2.cpp @@ -442,8 +442,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool s if (ConditionPassed(cond)) { auto reg_m = ir.GetExtendedRegister(m); auto result = sz - ? ir.FPDoubleToU32(reg_m, round_towards_zero, true) - : ir.FPSingleToU32(reg_m, round_towards_zero, true); + ? ir.FPDoubleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode()) + : ir.FPSingleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode()); ir.SetExtendedRegister(d, result); } return true; @@ -457,8 +457,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s if (ConditionPassed(cond)) { auto reg_m = ir.GetExtendedRegister(m); auto result = sz - ? ir.FPDoubleToS32(reg_m, round_towards_zero, true) - : ir.FPSingleToS32(reg_m, round_towards_zero, true); + ? ir.FPDoubleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode()) + : ir.FPSingleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode()); ir.SetExtendedRegister(d, result); } return true; diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp index a86fbb10..8f4eb7df 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp @@ -38,13 +38,13 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec IR::U32U64 intval; if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToS32(fltval, true, true); + intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToS32(fltval, true, true); + intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 32) { - return InterpretThisInstruction(); + intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 64) { - return InterpretThisInstruction(); + intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); } else { UNREACHABLE(); } @@ -69,13 +69,13 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec IR::U32U64 intval; if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToU32(fltval, true, true); + intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToU32(fltval, true, true); + intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 32) { - return InterpretThisInstruction(); + intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 64) { - return InterpretThisInstruction(); + intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); } else { UNREACHABLE(); } diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index fd634dcc..009744bd 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -146,13 +146,13 @@ bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { IR::U32U64 intval; if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToS32(fltval, true, true); + intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToS32(fltval, true, true); + intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 32) { - return InterpretThisInstruction(); + intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 64) { - return InterpretThisInstruction(); + intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); } else { UNREACHABLE(); } @@ -173,13 +173,13 @@ bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { IR::U32U64 intval; if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToU32(fltval, true, true); + intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToU32(fltval, true, true); + intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 32) { - return InterpretThisInstruction(); + intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 64) { - return InterpretThisInstruction(); + intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); } else { UNREACHABLE(); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 3f7c6d33..e3858a7a 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1451,24 +1451,44 @@ U64 IREmitter::FPSingleToDouble(const U32& a, bool fpscr_controlled) { return Inst(Opcode::FPSingleToDouble, a); } -U32 IREmitter::FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled) { - ASSERT(fpscr_controlled); - return Inst(Opcode::FPSingleToS32, a, Imm1(round_towards_zero)); +U32 IREmitter::FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 32); + return Inst(Opcode::FPDoubleToFixedS32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); } -U32 IREmitter::FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled) { - ASSERT(fpscr_controlled); - return Inst(Opcode::FPSingleToU32, a, Imm1(round_towards_zero)); +U64 IREmitter::FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 64); + return Inst(Opcode::FPDoubleToFixedS64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); } -U32 IREmitter::FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled) { - ASSERT(fpscr_controlled); - return Inst(Opcode::FPDoubleToS32, a, Imm1(round_towards_zero)); +U32 IREmitter::FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 32); + return Inst(Opcode::FPDoubleToFixedU32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); } -U32 IREmitter::FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled) { - ASSERT(fpscr_controlled); - return Inst(Opcode::FPDoubleToU32, a, Imm1(round_towards_zero)); +U64 IREmitter::FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 64); + return Inst(Opcode::FPDoubleToFixedU64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); +} + +U32 IREmitter::FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 32); + return Inst(Opcode::FPSingleToFixedS32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); +} + +U64 IREmitter::FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 64); + return Inst(Opcode::FPSingleToFixedS64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); +} + +U32 IREmitter::FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 32); + return Inst(Opcode::FPSingleToFixedU32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); +} + +U64 IREmitter::FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 64); + return Inst(Opcode::FPSingleToFixedU64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); } U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) { diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 8bc248d0..c783c164 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -12,6 +12,10 @@ #include "frontend/ir/terminal.h" #include "frontend/ir/value.h" +namespace Dynarmic::FP { +enum class RoundingMode; +} // namespace Dynarmic::FP + // ARM JIT Microinstruction Intermediate Representation // // This intermediate representation is an SSA IR. It is designed primarily for analysis, @@ -264,10 +268,14 @@ public: U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled); U64 FPSingleToDouble(const U32& a, bool fpscr_controlled); - U32 FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled); - U32 FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled); - U32 FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled); - U32 FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled); + U32 FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding); + U64 FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding); + U32 FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding); + U64 FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding); + U32 FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding); + U64 FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding); + U32 FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding); + U64 FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding); U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 83ec4820..c4b5e165 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -386,10 +386,14 @@ OPCODE(FPSub64, T::U64, T::U64, T::U // Floating-point conversions OPCODE(FPSingleToDouble, T::U64, T::U32 ) OPCODE(FPDoubleToSingle, T::U32, T::U64 ) -OPCODE(FPSingleToU32, T::U32, T::U32, T::U1 ) -OPCODE(FPSingleToS32, T::U32, T::U32, T::U1 ) -OPCODE(FPDoubleToU32, T::U32, T::U64, T::U1 ) -OPCODE(FPDoubleToS32, T::U32, T::U64, T::U1 ) +OPCODE(FPDoubleToFixedS32, T::U32, T::U64, T::U8, T::U8 ) +OPCODE(FPDoubleToFixedS64, T::U64, T::U64, T::U8, T::U8 ) +OPCODE(FPDoubleToFixedU32, T::U32, T::U64, T::U8, T::U8 ) +OPCODE(FPDoubleToFixedU64, T::U64, T::U64, T::U8, T::U8 ) +OPCODE(FPSingleToFixedS32, T::U32, T::U32, T::U8, T::U8 ) +OPCODE(FPSingleToFixedS64, T::U64, T::U32, T::U8, T::U8 ) +OPCODE(FPSingleToFixedU32, T::U32, T::U32, T::U8, T::U8 ) +OPCODE(FPSingleToFixedU64, T::U64, T::U32, T::U8, T::U8 ) OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 ) From 617ca0adf007e862f4f1f50751a4aaa07900ef90 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 11:36:46 +0100 Subject: [PATCH 19/28] floating_point_conversion_integer: Refactor implementation of FCVTZS_float_int and FCVTZU_float_int --- .../floating_point_conversion_integer.cpp | 73 +++++++++++-------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index 009744bd..98143f86 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -6,6 +6,7 @@ #include +#include "common/fp/rounding_mode.h" #include "frontend/A64/translate/impl/impl.h" namespace Dynarmic::A64 { @@ -135,58 +136,66 @@ bool TranslatorVisitor::FMOV_float_gen(bool sf, Imm<2> type, Imm<1> rmode_0, Imm return true; } -bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { +static bool FloaingPointConvertSignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) { const size_t intsize = sf ? 64 : 32; const auto fltsize = GetDataSize(type); if (!fltsize || *fltsize == 16) { - return UnallocatedEncoding(); + return v.UnallocatedEncoding(); } - const IR::U32U64 fltval = V_scalar(*fltsize, Vn); + const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn); IR::U32U64 intval; if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); + intval = v.ir.FPSingleToFixedS32(fltval, 0, rounding_mode); } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); + intval = v.ir.FPDoubleToFixedS32(fltval, 0, rounding_mode); } else if (intsize == 64 && *fltsize == 32) { - intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); + intval = v.ir.FPSingleToFixedS64(fltval, 0, rounding_mode); } else if (intsize == 64 && *fltsize == 64) { - intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); + intval = v.ir.FPDoubleToFixedS64(fltval, 0, rounding_mode); } else { UNREACHABLE(); } - X(intsize, Rd, intval); + v.X(intsize, Rd, intval); return true; } +static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) { + const size_t intsize = sf ? 64 : 32; + const auto fltsize = GetDataSize(type); + if (!fltsize || *fltsize == 16) { + return v.UnallocatedEncoding(); + } + + const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn); + IR::U32U64 intval; + + if (intsize == 32 && *fltsize == 32) { + intval = v.ir.FPSingleToFixedU32(fltval, 0, rounding_mode); + } else if (intsize == 32 && *fltsize == 64) { + intval = v.ir.FPDoubleToFixedU32(fltval, 0, rounding_mode); + } else if (intsize == 64 && *fltsize == 32) { + intval = v.ir.FPSingleToFixedU64(fltval, 0, rounding_mode); + } else if (intsize == 64 && *fltsize == 64) { + intval = v.ir.FPDoubleToFixedU64(fltval, 0, rounding_mode); + } else { + UNREACHABLE(); + } + + v.X(intsize, Rd, intval); + + return true; +} + +bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { + return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero); +} + bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { - const size_t intsize = sf ? 64 : 32; - const auto fltsize = GetDataSize(type); - if (!fltsize || *fltsize == 16) { - return UnallocatedEncoding(); - } - - const IR::U32U64 fltval = V_scalar(*fltsize, Vn); - IR::U32U64 intval; - - if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); - } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); - } else if (intsize == 64 && *fltsize == 32) { - intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); - } else if (intsize == 64 && *fltsize == 64) { - intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); - } else { - UNREACHABLE(); - } - - X(intsize, Rd, intval); - - return true; + return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero); } } // namespace Dynarmic::A64 From 7d36dbcdfd6618d9cff97bd7a54c59e3d42f4ce7 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 11:39:07 +0100 Subject: [PATCH 20/28] A64: Implement FCVTNS (scalar) --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/floating_point_conversion_integer.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index a29efee6..c92e8f22 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -884,7 +884,7 @@ INST(FCVTZS_float_fix, "FCVTZS (scalar, fixed-point)", "z0011 INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011110yy011001ppppppnnnnnddddd") // Data Processing - FP and SIMD - Conversion between floating point and integer -//INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") +INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") //INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index 98143f86..1bd624c8 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -190,6 +190,10 @@ static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Im return true; } +bool TranslatorVisitor::FCVTNS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { + return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven); +} + bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero); } From a1965a74a093fc0d2afd51a11674c0493ea56eae Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 11:39:30 +0100 Subject: [PATCH 21/28] A64: Implement FCVTNU (scalar) --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/floating_point_conversion_integer.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index c92e8f22..24bc1b50 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -885,7 +885,7 @@ INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011 // Data Processing - FP and SIMD - Conversion between floating point and integer INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") -//INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") +INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") //INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index 1bd624c8..9112792e 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -194,6 +194,10 @@ bool TranslatorVisitor::FCVTNS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven); } +bool TranslatorVisitor::FCVTNU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { + return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven); +} + bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero); } From c0c7a263143e9cef8b1b70ae36fc250fb375f2be Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 12:15:35 +0100 Subject: [PATCH 22/28] A64: Implement FCVTAS (scalar) --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/floating_point_conversion_integer.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 24bc1b50..bcbf1e28 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -888,7 +888,7 @@ INST(FCVTNS_float, "FCVTNS (scalar)", "z0011 INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") -//INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") +INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") //INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") //INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index 9112792e..deb17f42 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -206,4 +206,8 @@ bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero); } +bool TranslatorVisitor::FCVTAS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { + return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero); +} + } // namespace Dynarmic::A64 From 27319822bb8188cded73936a702ca16405c12270 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 12:17:37 +0100 Subject: [PATCH 23/28] A64: Implement FCVTAU (scalar) --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/floating_point_conversion_integer.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index bcbf1e28..39a5a430 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -889,7 +889,7 @@ INST(FCVTNU_float, "FCVTNU (scalar)", "z0011 INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") -//INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") +INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") //INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") //INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index deb17f42..d3d2951d 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -210,4 +210,8 @@ bool TranslatorVisitor::FCVTAS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero); } +bool TranslatorVisitor::FCVTAU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { + return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero); +} + } // namespace Dynarmic::A64 From af661ef5a647d0e0b54e9d08756010bc218c9886 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 12:18:40 +0100 Subject: [PATCH 24/28] A64: Implement FCVTPS (scalar) --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/floating_point_conversion_integer.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 39a5a430..0ad67dc6 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -891,7 +891,7 @@ INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011 INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") -//INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") +INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") //INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") //INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") //INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index d3d2951d..367fc725 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -214,4 +214,8 @@ bool TranslatorVisitor::FCVTAU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero); } +bool TranslatorVisitor::FCVTPS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { + return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity); +} + } // namespace Dynarmic::A64 From 49c4499a875e4e09899c06e6f3060fe18c0cbd11 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 12:19:02 +0100 Subject: [PATCH 25/28] A64: Implement FCVTPU (scalar) --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/floating_point_conversion_integer.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 0ad67dc6..bfaee2c7 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -892,7 +892,7 @@ INST(FCVTAS_float, "FCVTAS (scalar)", "z0011 INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") -//INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") +INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") //INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") //INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index 367fc725..b409cfce 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -218,4 +218,8 @@ bool TranslatorVisitor::FCVTPS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity); } +bool TranslatorVisitor::FCVTPU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { + return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity); +} + } // namespace Dynarmic::A64 From 79c9018d60eea53bff4418eb9e6f8c3141812996 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 12:19:38 +0100 Subject: [PATCH 26/28] A64: Implement FCVTMS (scalar) --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/floating_point_conversion_integer.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index bfaee2c7..c3c0c0e5 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -893,7 +893,7 @@ INST(FCVTAU_float, "FCVTAU (scalar)", "z0011 INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") -//INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") +INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") //INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index b409cfce..53b0e877 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -222,4 +222,8 @@ bool TranslatorVisitor::FCVTPU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity); } +bool TranslatorVisitor::FCVTMS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { + return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity); +} + } // namespace Dynarmic::A64 From 3d9677d09409f5f1fef06ede170356cc2154b6e5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 30 Jun 2018 12:21:07 +0100 Subject: [PATCH 27/28] A64: Implement FCVTMU (scalar) --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/floating_point_conversion_integer.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index c3c0c0e5..6dcd2a63 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -894,7 +894,7 @@ INST(FMOV_float_gen, "FMOV (general)", "z0011 INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") -//INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") +INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd") //INST(FJCVTZS, "FJCVTZS", "0001111001111110000000nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index 53b0e877..ad581687 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -226,4 +226,8 @@ bool TranslatorVisitor::FCVTMS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity); } +bool TranslatorVisitor::FCVTMU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) { + return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity); +} + } // namespace Dynarmic::A64 From 304cc7f61e21e423a35f8e5b98ba0d73b5db3500 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 15 Jul 2018 17:03:35 +0100 Subject: [PATCH 28/28] emit_x64_floating_point: SSE4.1 implementation for FP{Double,Single}ToFixed{S,U}{32,64} --- src/backend_x64/emit_x64_floating_point.cpp | 118 +++++++++++++++++--- 1 file changed, 105 insertions(+), 13 deletions(-) diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 7bcfe410..68ebfcd4 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -39,6 +39,14 @@ constexpr u64 f64_nan = 0x7ff8000000000000u; constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu; constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu; +constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double +constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double +constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double +constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double +constexpr u64 f64_min_s64 = 0xc3e0000000000000u; // -2^63 as a double +constexpr u64 f64_max_s64_lim = 0x43e0000000000000u; // 2^63 as a double (actual maximum unrepresentable) +constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double +constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable) static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { Xbyak::Label end; @@ -105,6 +113,12 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 code.L(end); } +static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { + code.pxor(xmm_scratch, xmm_scratch); + code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) + code.pand(xmm_value, xmm_scratch); +} + static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) { Xbyak::Label nan; @@ -892,7 +906,89 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) { +static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const size_t fbits = args[1].GetImmediateU8(); + const auto rounding = static_cast(args[2].GetImmediateU8()); + + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero){ + const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); + + const int round_imm = [&]{ + switch (rounding) { + case FP::RoundingMode::ToNearest_TieEven: + default: + return 0b00; + case FP::RoundingMode::TowardsPlusInfinity: + return 0b10; + case FP::RoundingMode::TowardsMinusInfinity: + return 0b01; + case FP::RoundingMode::TowardsZero: + return 0b11; + } + }(); + + const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); + + if (fsize == 64) { + if (fbits != 0) { + const u64 scale_factor = static_cast((fbits + 1023) << 52); + code.mulsd(src, code.MConst(xword, scale_factor)); + } + + code.roundsd(src, src, round_imm); + ZeroIfNaN64(code, src, scratch); + } else { + if (fbits != 0) { + const u32 scale_factor = static_cast((fbits + 127) << 23); + code.mulss(src, code.MConst(xword, scale_factor)); + } + + code.roundss(src, src, round_imm); + code.cvtss2sd(src, src); + ZeroIfNaN64(code, src, scratch); + } + + if (isize == 64) { + Xbyak::Label saturate_max, end; + + code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u64 : f64_min_s64)); + code.movsd(scratch, code.MConst(xword, unsigned_ ? f64_max_u64_lim : f64_max_s64_lim)); + code.comisd(scratch, src); + code.jna(saturate_max, code.T_NEAR); + if (unsigned_) { + Xbyak::Label below_max; + + code.movsd(scratch, code.MConst(xword, f64_max_s64_lim)); + code.comisd(src, scratch); + code.jb(below_max); + code.subsd(src, scratch); + code.cvttsd2si(result, src); + code.btc(result, 63); + code.jmp(end); + code.L(below_max); + } + code.cvttsd2si(result, src); // 64 bit gpr + code.L(end); + + code.SwitchToFarCode(); + code.L(saturate_max); + code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + } else { + code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32)); + code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u32 : f64_min_s32)); + code.cvttsd2si(result, src); // 64 bit gpr + } + + ctx.reg_alloc.DefineValue(inst, result); + + return; + } + using fsize_list = mp::list, mp::vlift>; using unsigned_list = mp::list, mp::vlift>; using isize_list = mp::list, mp::vlift>; @@ -928,10 +1024,6 @@ static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* mp::cartesian_product{} ); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const auto rounding = static_cast(args[2].GetImmediateU8()); - ctx.reg_alloc.HostCall(inst, args[0], args[1]); code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR()); @@ -939,35 +1031,35 @@ static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* } void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) { - EmitFPToFixedFallback(code, ctx, inst, 64, false, 32); + EmitFPToFixed(code, ctx, inst, 64, false, 32); } void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) { - EmitFPToFixedFallback(code, ctx, inst, 64, false, 64); + EmitFPToFixed(code, ctx, inst, 64, false, 64); } void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) { - EmitFPToFixedFallback(code, ctx, inst, 64, true, 32); + EmitFPToFixed(code, ctx, inst, 64, true, 32); } void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) { - EmitFPToFixedFallback(code, ctx, inst, 64, true, 64); + EmitFPToFixed(code, ctx, inst, 64, true, 64); } void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) { - EmitFPToFixedFallback(code, ctx, inst, 32, false, 32); + EmitFPToFixed(code, ctx, inst, 32, false, 32); } void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) { - EmitFPToFixedFallback(code, ctx, inst, 32, false, 64); + EmitFPToFixed(code, ctx, inst, 32, false, 64); } void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) { - EmitFPToFixedFallback(code, ctx, inst, 32, true, 32); + EmitFPToFixed(code, ctx, inst, 32, true, 32); } void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) { - EmitFPToFixedFallback(code, ctx, inst, 32, true, 64); + EmitFPToFixed(code, ctx, inst, 32, true, 64); } void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {