fp: Change FPUnpacked to a normalized representation

Having a known position for the highest set bit makes writing algorithms easier
2018-07-25 17:39:14 +01:00 · 2018-07-25 17:39:14 +01:00 · 7a673a8a43
commit 7a673a8a43
parent 680395a803
10 changed files with 71 additions and 56 deletions
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@ -150,7 +150,7 @@ inline size_t BitCount(Integral value) {
 }

 template <typename T>
-inline int HighestSetBit(T value) {
+constexpr int HighestSetBit(T value) {
    auto x = static_cast<std::make_unsigned_t<T>>(value);
    int result = -1;
    while (x != 0) {
--- a/src/common/fp/fused.cpp
+++ b/src/common/fp/fused.cpp
@ -11,26 +11,15 @@

 namespace Dynarmic::FP {

-constexpr size_t normalized_point_position = 62;
 constexpr size_t product_point_position = normalized_point_position * 2;

-static FPUnpacked NormalizeUnpacked(FPUnpacked op) {
-    constexpr int desired_highest = static_cast<int>(normalized_point_position);
-
-    const int highest_bit = Common::HighestSetBit(op.mantissa);
-    DEBUG_ASSERT(highest_bit < desired_highest);
-
-    const int offset = desired_highest - highest_bit;
-    op.mantissa <<= offset;
-    op.exponent -= offset;
-    return op;
+static FPUnpacked ReduceMantissa(bool sign, int exponent, const u128& mantissa) {
+    constexpr int point_position_correction = normalized_point_position - (product_point_position - 64);
+    // We round-to-odd here when reducing the bitwidth of the mantissa so that subsequent roundings are accurate.
+    return {sign, exponent + point_position_correction, mantissa.upper | static_cast<u64>(mantissa.lower != 0)};
 }

 FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
-    addend = NormalizeUnpacked(addend);
-    op1 = NormalizeUnpacked(op1);
-    op2 = NormalizeUnpacked(op2);
-
    const bool product_sign = op1.sign != op2.sign;
    const auto [product_exponent, product_value] = [op1, op2]{
        int exponent = op1.exponent + op2.exponent;
@ -47,10 +36,10 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
    }

    if (addend.mantissa == 0) {
-        return FPUnpacked{product_sign, product_exponent + 64, product_value.upper | u64(product_value.lower != 0)};
+        return ReduceMantissa(product_sign, product_exponent, product_value);
    }

-    const int exp_diff = product_exponent - (addend.exponent - normalized_point_position);
+    const int exp_diff = product_exponent - addend.exponent;

    if (product_sign == addend.sign) {
        // Addition
@ -63,7 +52,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {

        // addend < product
        const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position);
-        return FPUnpacked{product_sign, product_exponent + 64, result.upper | u64(result.lower != 0)};
+        return ReduceMantissa(product_sign, product_exponent, result);
    }

    // Subtraction
@ -80,7 +69,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
        result = product_value - addend_long;
    } else if (exp_diff <= 0) {
        result_sign = !product_sign;
-        result_exponent = addend.exponent - normalized_point_position;
+        result_exponent = addend.exponent;
        result = addend_long - StickyLogicalShiftRight(product_value, -exp_diff);
    } else {
        result_sign = product_sign;
@ -95,7 +84,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
    const int required_shift = normalized_point_position - Common::HighestSetBit(result.upper);
    result = result << required_shift;
    result_exponent -= required_shift;
-    return FPUnpacked{result_sign, result_exponent + 64, result.upper | u64(result.lower != 0)};
+    return ReduceMantissa(result_sign, result_exponent, result);
 }

 } // namespace Dynarmic::FP
--- a/src/common/fp/fused.h
+++ b/src/common/fp/fused.h
@ -10,6 +10,7 @@ namespace Dynarmic::FP {

 struct FPUnpacked;

+/// This function assumes all arguments have been normalized.
 FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2);

 } // namespace Dynarmic::FP
--- a/src/common/fp/op/FPRSqrtEstimate.cpp
+++ b/src/common/fp/op/FPRSqrtEstimate.cpp
@ -79,11 +79,10 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
        return FPInfo<FPT>::Zero(false);
    }

-    const int highest_bit = Common::HighestSetBit(value.mantissa);
-    const int result_exponent = (-(value.exponent + highest_bit + 1)) >> 1;
-    const bool was_exponent_odd = (value.exponent + highest_bit) % 2 == 0;
+    const int result_exponent = (-(value.exponent + 1)) >> 1;
+    const bool was_exponent_odd = (value.exponent) % 2 == 0;

-    const u64 scaled = Safe::LogicalShiftRight(value.mantissa, highest_bit - (was_exponent_odd ? 7 : 8));
+    const u64 scaled = Safe::LogicalShiftRight(value.mantissa, normalized_point_position - (was_exponent_odd ? 7 : 8));
    const u64 estimate = RecipSqrtEstimate(scaled);

    const FPT bits_exponent = static_cast<FPT>(result_exponent + FPInfo<FPT>::exponent_bias);
--- a/src/common/fp/op/FPRSqrtStepFused.cpp
+++ b/src/common/fp/op/FPRSqrtStepFused.cpp
@ -41,7 +41,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
    }

    // result_value = (3.0 + (value1 * value2)) / 2.0
-    FPUnpacked result_value = FusedMulAdd({false, 0, 3}, value1, value2);
+    FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 3), value1, value2);
    result_value.exponent--;

    if (result_value.mantissa == 0) {
--- a/src/common/fp/op/FPRoundInt.cpp
+++ b/src/common/fp/op/FPRoundInt.cpp
@ -38,14 +38,17 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
        return FPInfo<FPT>::Zero(sign);
    }

-    if (value.exponent >= 0) {
+    // Reshift decimal point back to bit zero.
+    const int exponent = value.exponent - normalized_point_position;
+
+    if (exponent >= 0) {
        // Guaranteed to be an integer
        return op;
    }

    u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
-    const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
-    int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
+    const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent);
+    int_result = Safe::ArithmeticShiftLeft(int_result, exponent);

    bool round_up = false;
    switch (rounding) {
@ -77,7 +80,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)

    const FPT result = int_result == 0
                     ? FPInfo<FPT>::Zero(sign)
-                     : FPRound<FPT>(FPUnpacked{new_sign, 0, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
+                     : FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);

    if (error != ResidualError::Zero && exact) {
        FPProcessException(FPExc::Inexact, fpcr, fpsr);
--- a/src/common/fp/op/FPToFixed.cpp
+++ b/src/common/fp/op/FPToFixed.cpp
@ -40,12 +40,12 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
        return 0;
    }

-    // value *= 2.0^fbits
-    value.exponent += static_cast<int>(fbits);
+    // value *= 2.0^fbits and reshift the decimal point back to bit zero.
+    int exponent = value.exponent + static_cast<int>(fbits) - normalized_point_position;

    u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
-    const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
-    int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
+    const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent);
+    int_result = Safe::ArithmeticShiftLeft(int_result, exponent);

    bool round_up = false;
    switch (rounding) {
@ -74,7 +74,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou

    // Detect Overflow
    const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1);
-    if (value.exponent >= min_exponent_for_overflow) {
+    if (exponent >= min_exponent_for_overflow) {
        // Positive overflow
        if (unsigned_ || !sign) {
            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
@ -83,7 +83,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou

        // Negative overflow
        const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1));
-        if (!(value.exponent == min_exponent_for_overflow && int_result == min_value)) {
+        if (!(exponent == min_exponent_for_overflow && int_result == min_value)) {
            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
            return static_cast<u64>(1) << (ibits - 1);
        }
--- a/src/common/fp/unpacked.cpp
+++ b/src/common/fp/unpacked.cpp
@ -35,20 +35,20 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
            return {FPType::Zero, sign, {sign, 0, 0}};
        }

-        return {FPType::Nonzero, sign, {sign, denormal_exponent, frac_raw}};
+        return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)};
    }

    if (exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width)) {
        if (frac_raw == 0) {
-            return {FPType::Infinity, sign, {sign, 1000000, 1}};
+            return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)};
        }

        const bool is_quiet = Common::Bit<mantissa_high_bit>(frac_raw);
        return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}};
    }

-    const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias - FPInfo<FPT>::explicit_mantissa_width;
-    const u64 frac = frac_raw | FPInfo<FPT>::implicit_leading_bit;
+    const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias;
+    const u64 frac = static_cast<u64>(frac_raw | FPInfo<FPT>::implicit_leading_bit) << (normalized_point_position - FPInfo<FPT>::explicit_mantissa_width);
    return {FPType::Nonzero, sign, {sign, exp, frac}};
 }

@ -61,7 +61,7 @@ std::tuple<bool, int, u64, ResidualError> Normalize(FPUnpacked op, int extra_rig
    const int shift_amount = highest_set_bit - static_cast<int>(F) + extra_right_shift;
    const u64 mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount);
    const ResidualError error = ResidualErrorOnRightShift(op.mantissa, shift_amount);
-    const int exponent = op.exponent + highest_set_bit;
+    const int exponent = op.exponent + highest_set_bit - normalized_point_position;
    return std::make_tuple(op.sign, exponent, mantissa, error);
 }

--- a/src/common/fp/unpacked.h
+++ b/src/common/fp/unpacked.h
@ -24,7 +24,10 @@ enum class FPType {
    SNaN,
 };

-/// value = (sign ? -1 : +1) * mantissa * 2^exponent
+constexpr size_t normalized_point_position = 62;
+
+/// value = (sign ? -1 : +1) * mantissa/(2^62) * 2^exponent
+/// 63rd bit of mantissa is always set (unless value is zero)
 struct FPUnpacked {
    bool sign;
    int exponent;
@ -35,6 +38,19 @@ inline bool operator==(const FPUnpacked& a, const FPUnpacked& b) {
    return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa);
 }

+/// return value = (sign ? -1 : +1) * value * 2^exponent
+constexpr FPUnpacked ToNormalized(bool sign, int exponent, u64 value) {
+    if (value == 0) {
+        return {sign, 0, 0};
+    }
+
+    const int highest_bit = Common::HighestSetBit(value);
+    const int offset = static_cast<int>(normalized_point_position) - highest_bit;
+    value <<= offset;
+    exponent -= offset - normalized_point_position;
+    return {sign, exponent, value};
+}
+
 template<typename FPT>
 std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr);

--- a/tests/fp/unpacked_tests.cpp
+++ b/tests/fp/unpacked_tests.cpp
@ -20,15 +20,15 @@ using namespace Dynarmic::FP;

 TEST_CASE("FPUnpack Tests", "[fp]") {
    const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked>, u32>> test_cases {
-        {0x00000000, {FPType::Zero, false, {false, 0, 0}}, 0},
-        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0},
-        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0},
-        {0x7F800001, {FPType::SNaN, false, {false, 0, 0}}, 0},
-        {0xFF800001, {FPType::SNaN, true, {true, 0, 0}}, 0},
-        {0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0},
-        {0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0},
-        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
-        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
+        {0x00000000, {FPType::Zero, false, ToNormalized(false, 0, 0)}, 0},
+        {0x7F800000, {FPType::Infinity, false, ToNormalized(false, 1000000, 1)}, 0},
+        {0xFF800000, {FPType::Infinity, true, ToNormalized(true, 1000000, 1)}, 0},
+        {0x7F800001, {FPType::SNaN, false, ToNormalized(false, 0, 0)}, 0},
+        {0xFF800001, {FPType::SNaN, true, ToNormalized(true, 0, 0)}, 0},
+        {0x7FC00001, {FPType::QNaN, false, ToNormalized(false, 0, 0)}, 0},
+        {0xFFC00001, {FPType::QNaN, true, ToNormalized(true, 0, 0)}, 0},
+        {0x00000001, {FPType::Nonzero, false, ToNormalized(false, -149, 1)}, 0}, // Smallest single precision denormal is 2^-149.
+        {0x3F7FFFFF, {FPType::Nonzero, false, ToNormalized(false, -24, 0xFFFFFF)}, 0}, // 1.0 - epsilon
    };

    const FPCR fpcr;
@ -37,6 +37,13 @@ TEST_CASE("FPUnpack Tests", "[fp]") {
        const auto output = FPUnpack<u32>(input, fpcr, fpsr);

        INFO("Input: " << std::hex << input);
+        INFO("Output Sign: " << std::get<2>(output).sign);
+        INFO("Output Exponent: " << std::get<2>(output).exponent);
+        INFO("Output Mantissa: " << std::hex << std::get<2>(output).mantissa);
+        INFO("Expected Sign: " << std::get<2>(expected_output).sign);
+        INFO("Expected Exponent: " << std::get<2>(expected_output).exponent);
+        INFO("Expected Mantissa: " << std::hex << std::get<2>(expected_output).mantissa);
+
        REQUIRE(output == expected_output);
        REQUIRE(fpsr.Value() == expected_fpsr);
    }
@ -44,11 +51,11 @@ TEST_CASE("FPUnpack Tests", "[fp]") {

 TEST_CASE("FPRound Tests", "[fp]") {
    const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked>, u32>> test_cases {
-        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0x14},
-        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0x14},
-        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
-        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
-        {0x3F800000, {FPType::Nonzero, false, {false, -28, 0xFFFFFFF}}, 0x10}, // rounds to 1.0
+        {0x7F800000, {FPType::Infinity, false, ToNormalized(false, 1000000, 1)}, 0x14},
+        {0xFF800000, {FPType::Infinity, true, ToNormalized(true, 1000000, 1)}, 0x14},
+        {0x00000001, {FPType::Nonzero, false, ToNormalized(false, -149, 1)}, 0}, // Smallest single precision denormal is 2^-149.
+        {0x3F7FFFFF, {FPType::Nonzero, false, ToNormalized(false, -24, 0xFFFFFF)}, 0}, // 1.0 - epsilon
+        {0x3F800000, {FPType::Nonzero, false, ToNormalized(false, -28, 0xFFFFFFF)}, 0x10}, // rounds to 1.0
    };

    const FPCR fpcr;