fp: Change FPUnpacked to a normalized representation

Having a known position for the highest set bit makes writing algorithms easier
This commit is contained in:
MerryMage 2018-07-25 17:39:14 +01:00
parent 680395a803
commit 7a673a8a43
10 changed files with 71 additions and 56 deletions

View file

@ -150,7 +150,7 @@ inline size_t BitCount(Integral value) {
} }
template <typename T> template <typename T>
inline int HighestSetBit(T value) { constexpr int HighestSetBit(T value) {
auto x = static_cast<std::make_unsigned_t<T>>(value); auto x = static_cast<std::make_unsigned_t<T>>(value);
int result = -1; int result = -1;
while (x != 0) { while (x != 0) {

View file

@ -11,26 +11,15 @@
namespace Dynarmic::FP { namespace Dynarmic::FP {
constexpr size_t normalized_point_position = 62;
constexpr size_t product_point_position = normalized_point_position * 2; constexpr size_t product_point_position = normalized_point_position * 2;
static FPUnpacked NormalizeUnpacked(FPUnpacked op) { static FPUnpacked ReduceMantissa(bool sign, int exponent, const u128& mantissa) {
constexpr int desired_highest = static_cast<int>(normalized_point_position); constexpr int point_position_correction = normalized_point_position - (product_point_position - 64);
// We round-to-odd here when reducing the bitwidth of the mantissa so that subsequent roundings are accurate.
const int highest_bit = Common::HighestSetBit(op.mantissa); return {sign, exponent + point_position_correction, mantissa.upper | static_cast<u64>(mantissa.lower != 0)};
DEBUG_ASSERT(highest_bit < desired_highest);
const int offset = desired_highest - highest_bit;
op.mantissa <<= offset;
op.exponent -= offset;
return op;
} }
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) { FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
addend = NormalizeUnpacked(addend);
op1 = NormalizeUnpacked(op1);
op2 = NormalizeUnpacked(op2);
const bool product_sign = op1.sign != op2.sign; const bool product_sign = op1.sign != op2.sign;
const auto [product_exponent, product_value] = [op1, op2]{ const auto [product_exponent, product_value] = [op1, op2]{
int exponent = op1.exponent + op2.exponent; int exponent = op1.exponent + op2.exponent;
@ -47,10 +36,10 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
} }
if (addend.mantissa == 0) { if (addend.mantissa == 0) {
return FPUnpacked{product_sign, product_exponent + 64, product_value.upper | u64(product_value.lower != 0)}; return ReduceMantissa(product_sign, product_exponent, product_value);
} }
const int exp_diff = product_exponent - (addend.exponent - normalized_point_position); const int exp_diff = product_exponent - addend.exponent;
if (product_sign == addend.sign) { if (product_sign == addend.sign) {
// Addition // Addition
@ -63,7 +52,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
// addend < product // addend < product
const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position); const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position);
return FPUnpacked{product_sign, product_exponent + 64, result.upper | u64(result.lower != 0)}; return ReduceMantissa(product_sign, product_exponent, result);
} }
// Subtraction // Subtraction
@ -80,7 +69,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
result = product_value - addend_long; result = product_value - addend_long;
} else if (exp_diff <= 0) { } else if (exp_diff <= 0) {
result_sign = !product_sign; result_sign = !product_sign;
result_exponent = addend.exponent - normalized_point_position; result_exponent = addend.exponent;
result = addend_long - StickyLogicalShiftRight(product_value, -exp_diff); result = addend_long - StickyLogicalShiftRight(product_value, -exp_diff);
} else { } else {
result_sign = product_sign; result_sign = product_sign;
@ -95,7 +84,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
const int required_shift = normalized_point_position - Common::HighestSetBit(result.upper); const int required_shift = normalized_point_position - Common::HighestSetBit(result.upper);
result = result << required_shift; result = result << required_shift;
result_exponent -= required_shift; result_exponent -= required_shift;
return FPUnpacked{result_sign, result_exponent + 64, result.upper | u64(result.lower != 0)}; return ReduceMantissa(result_sign, result_exponent, result);
} }
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -10,6 +10,7 @@ namespace Dynarmic::FP {
struct FPUnpacked; struct FPUnpacked;
/// This function assumes all arguments have been normalized.
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2); FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2);
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -79,11 +79,10 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
return FPInfo<FPT>::Zero(false); return FPInfo<FPT>::Zero(false);
} }
const int highest_bit = Common::HighestSetBit(value.mantissa); const int result_exponent = (-(value.exponent + 1)) >> 1;
const int result_exponent = (-(value.exponent + highest_bit + 1)) >> 1; const bool was_exponent_odd = (value.exponent) % 2 == 0;
const bool was_exponent_odd = (value.exponent + highest_bit) % 2 == 0;
const u64 scaled = Safe::LogicalShiftRight(value.mantissa, highest_bit - (was_exponent_odd ? 7 : 8)); const u64 scaled = Safe::LogicalShiftRight(value.mantissa, normalized_point_position - (was_exponent_odd ? 7 : 8));
const u64 estimate = RecipSqrtEstimate(scaled); const u64 estimate = RecipSqrtEstimate(scaled);
const FPT bits_exponent = static_cast<FPT>(result_exponent + FPInfo<FPT>::exponent_bias); const FPT bits_exponent = static_cast<FPT>(result_exponent + FPInfo<FPT>::exponent_bias);

View file

@ -41,7 +41,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
} }
// result_value = (3.0 + (value1 * value2)) / 2.0 // result_value = (3.0 + (value1 * value2)) / 2.0
FPUnpacked result_value = FusedMulAdd({false, 0, 3}, value1, value2); FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 3), value1, value2);
result_value.exponent--; result_value.exponent--;
if (result_value.mantissa == 0) { if (result_value.mantissa == 0) {

View file

@ -38,14 +38,17 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
return FPInfo<FPT>::Zero(sign); return FPInfo<FPT>::Zero(sign);
} }
if (value.exponent >= 0) { // Reshift decimal point back to bit zero.
const int exponent = value.exponent - normalized_point_position;
if (exponent >= 0) {
// Guaranteed to be an integer // Guaranteed to be an integer
return op; return op;
} }
u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa); u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent); const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent);
int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent); int_result = Safe::ArithmeticShiftLeft(int_result, exponent);
bool round_up = false; bool round_up = false;
switch (rounding) { switch (rounding) {
@ -77,7 +80,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
const FPT result = int_result == 0 const FPT result = int_result == 0
? FPInfo<FPT>::Zero(sign) ? FPInfo<FPT>::Zero(sign)
: FPRound<FPT>(FPUnpacked{new_sign, 0, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr); : FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
if (error != ResidualError::Zero && exact) { if (error != ResidualError::Zero && exact) {
FPProcessException(FPExc::Inexact, fpcr, fpsr); FPProcessException(FPExc::Inexact, fpcr, fpsr);

View file

@ -40,12 +40,12 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
return 0; return 0;
} }
// value *= 2.0^fbits // value *= 2.0^fbits and reshift the decimal point back to bit zero.
value.exponent += static_cast<int>(fbits); int exponent = value.exponent + static_cast<int>(fbits) - normalized_point_position;
u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa); u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent); const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent);
int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent); int_result = Safe::ArithmeticShiftLeft(int_result, exponent);
bool round_up = false; bool round_up = false;
switch (rounding) { switch (rounding) {
@ -74,7 +74,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
// Detect Overflow // Detect Overflow
const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1); const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1);
if (value.exponent >= min_exponent_for_overflow) { if (exponent >= min_exponent_for_overflow) {
// Positive overflow // Positive overflow
if (unsigned_ || !sign) { if (unsigned_ || !sign) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr); FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
@ -83,7 +83,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
// Negative overflow // Negative overflow
const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1)); const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1));
if (!(value.exponent == min_exponent_for_overflow && int_result == min_value)) { if (!(exponent == min_exponent_for_overflow && int_result == min_value)) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr); FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
return static_cast<u64>(1) << (ibits - 1); return static_cast<u64>(1) << (ibits - 1);
} }

View file

@ -35,20 +35,20 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
return {FPType::Zero, sign, {sign, 0, 0}}; return {FPType::Zero, sign, {sign, 0, 0}};
} }
return {FPType::Nonzero, sign, {sign, denormal_exponent, frac_raw}}; return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)};
} }
if (exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width)) { if (exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width)) {
if (frac_raw == 0) { if (frac_raw == 0) {
return {FPType::Infinity, sign, {sign, 1000000, 1}}; return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)};
} }
const bool is_quiet = Common::Bit<mantissa_high_bit>(frac_raw); const bool is_quiet = Common::Bit<mantissa_high_bit>(frac_raw);
return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}}; return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}};
} }
const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias - FPInfo<FPT>::explicit_mantissa_width; const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias;
const u64 frac = frac_raw | FPInfo<FPT>::implicit_leading_bit; const u64 frac = static_cast<u64>(frac_raw | FPInfo<FPT>::implicit_leading_bit) << (normalized_point_position - FPInfo<FPT>::explicit_mantissa_width);
return {FPType::Nonzero, sign, {sign, exp, frac}}; return {FPType::Nonzero, sign, {sign, exp, frac}};
} }
@ -61,7 +61,7 @@ std::tuple<bool, int, u64, ResidualError> Normalize(FPUnpacked op, int extra_rig
const int shift_amount = highest_set_bit - static_cast<int>(F) + extra_right_shift; const int shift_amount = highest_set_bit - static_cast<int>(F) + extra_right_shift;
const u64 mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount); const u64 mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount);
const ResidualError error = ResidualErrorOnRightShift(op.mantissa, shift_amount); const ResidualError error = ResidualErrorOnRightShift(op.mantissa, shift_amount);
const int exponent = op.exponent + highest_set_bit; const int exponent = op.exponent + highest_set_bit - normalized_point_position;
return std::make_tuple(op.sign, exponent, mantissa, error); return std::make_tuple(op.sign, exponent, mantissa, error);
} }

View file

@ -24,7 +24,10 @@ enum class FPType {
SNaN, SNaN,
}; };
/// value = (sign ? -1 : +1) * mantissa * 2^exponent constexpr size_t normalized_point_position = 62;
/// value = (sign ? -1 : +1) * mantissa/(2^62) * 2^exponent
/// 63rd bit of mantissa is always set (unless value is zero)
struct FPUnpacked { struct FPUnpacked {
bool sign; bool sign;
int exponent; int exponent;
@ -35,6 +38,19 @@ inline bool operator==(const FPUnpacked& a, const FPUnpacked& b) {
return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa); return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa);
} }
/// return value = (sign ? -1 : +1) * value * 2^exponent
constexpr FPUnpacked ToNormalized(bool sign, int exponent, u64 value) {
if (value == 0) {
return {sign, 0, 0};
}
const int highest_bit = Common::HighestSetBit(value);
const int offset = static_cast<int>(normalized_point_position) - highest_bit;
value <<= offset;
exponent -= offset - normalized_point_position;
return {sign, exponent, value};
}
template<typename FPT> template<typename FPT>
std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr); std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr);

View file

@ -20,15 +20,15 @@ using namespace Dynarmic::FP;
TEST_CASE("FPUnpack Tests", "[fp]") { TEST_CASE("FPUnpack Tests", "[fp]") {
const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked>, u32>> test_cases { const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked>, u32>> test_cases {
{0x00000000, {FPType::Zero, false, {false, 0, 0}}, 0}, {0x00000000, {FPType::Zero, false, ToNormalized(false, 0, 0)}, 0},
{0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0}, {0x7F800000, {FPType::Infinity, false, ToNormalized(false, 1000000, 1)}, 0},
{0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0}, {0xFF800000, {FPType::Infinity, true, ToNormalized(true, 1000000, 1)}, 0},
{0x7F800001, {FPType::SNaN, false, {false, 0, 0}}, 0}, {0x7F800001, {FPType::SNaN, false, ToNormalized(false, 0, 0)}, 0},
{0xFF800001, {FPType::SNaN, true, {true, 0, 0}}, 0}, {0xFF800001, {FPType::SNaN, true, ToNormalized(true, 0, 0)}, 0},
{0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0}, {0x7FC00001, {FPType::QNaN, false, ToNormalized(false, 0, 0)}, 0},
{0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0}, {0xFFC00001, {FPType::QNaN, true, ToNormalized(true, 0, 0)}, 0},
{0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149. {0x00000001, {FPType::Nonzero, false, ToNormalized(false, -149, 1)}, 0}, // Smallest single precision denormal is 2^-149.
{0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon {0x3F7FFFFF, {FPType::Nonzero, false, ToNormalized(false, -24, 0xFFFFFF)}, 0}, // 1.0 - epsilon
}; };
const FPCR fpcr; const FPCR fpcr;
@ -37,6 +37,13 @@ TEST_CASE("FPUnpack Tests", "[fp]") {
const auto output = FPUnpack<u32>(input, fpcr, fpsr); const auto output = FPUnpack<u32>(input, fpcr, fpsr);
INFO("Input: " << std::hex << input); INFO("Input: " << std::hex << input);
INFO("Output Sign: " << std::get<2>(output).sign);
INFO("Output Exponent: " << std::get<2>(output).exponent);
INFO("Output Mantissa: " << std::hex << std::get<2>(output).mantissa);
INFO("Expected Sign: " << std::get<2>(expected_output).sign);
INFO("Expected Exponent: " << std::get<2>(expected_output).exponent);
INFO("Expected Mantissa: " << std::hex << std::get<2>(expected_output).mantissa);
REQUIRE(output == expected_output); REQUIRE(output == expected_output);
REQUIRE(fpsr.Value() == expected_fpsr); REQUIRE(fpsr.Value() == expected_fpsr);
} }
@ -44,11 +51,11 @@ TEST_CASE("FPUnpack Tests", "[fp]") {
TEST_CASE("FPRound Tests", "[fp]") { TEST_CASE("FPRound Tests", "[fp]") {
const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked>, u32>> test_cases { const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked>, u32>> test_cases {
{0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0x14}, {0x7F800000, {FPType::Infinity, false, ToNormalized(false, 1000000, 1)}, 0x14},
{0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0x14}, {0xFF800000, {FPType::Infinity, true, ToNormalized(true, 1000000, 1)}, 0x14},
{0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149. {0x00000001, {FPType::Nonzero, false, ToNormalized(false, -149, 1)}, 0}, // Smallest single precision denormal is 2^-149.
{0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon {0x3F7FFFFF, {FPType::Nonzero, false, ToNormalized(false, -24, 0xFFFFFF)}, 0}, // 1.0 - epsilon
{0x3F800000, {FPType::Nonzero, false, {false, -28, 0xFFFFFFF}}, 0x10}, // rounds to 1.0 {0x3F800000, {FPType::Nonzero, false, ToNormalized(false, -28, 0xFFFFFFF)}, 0x10}, // rounds to 1.0
}; };
const FPCR fpcr; const FPCR fpcr;