From c9777ef997a7d69ae659f7d3d42244bc93e71797 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 15 Apr 2019 04:41:58 -0400 Subject: [PATCH] common/fp/info: Make half-precision info struct functions return correctly sized types While initially done to potentially prevent creating bugs due to C++ having a silly type-promotion mechanism involving types < sizeof(int) and unsignedness, given that the bulk of these functions' usages are on exit paths, these can return the correct type to avoid the need to cast at every usage point. --- src/common/fp/info.h | 27 +++++++++++++++++++-------- src/common/fp/op/FPMulAdd.cpp | 12 ++++++------ src/common/fp/op/FPRSqrtEstimate.cpp | 6 +++--- src/common/fp/op/FPRSqrtStepFused.cpp | 4 ++-- src/common/fp/op/FPRecipEstimate.cpp | 10 +++++----- src/common/fp/op/FPRecipExponent.cpp | 2 +- src/common/fp/op/FPRecipStepFused.cpp | 4 ++-- src/common/fp/op/FPRoundInt.cpp | 6 +++--- src/common/fp/process_nan.cpp | 2 +- src/common/fp/unpacked.cpp | 4 ++-- 10 files changed, 44 insertions(+), 33 deletions(-) diff --git a/src/common/fp/info.h b/src/common/fp/info.h index 9253a2ee..c245fd36 100644 --- a/src/common/fp/info.h +++ b/src/common/fp/info.h @@ -20,21 +20,32 @@ struct FPInfo { static constexpr size_t exponent_width = 5; static constexpr size_t explicit_mantissa_width = 10; static constexpr size_t mantissa_width = explicit_mantissa_width + 1; - + static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width; static constexpr u32 sign_mask = 0x8000; static constexpr u32 exponent_mask = 0x7C00; static constexpr u32 mantissa_mask = 0x3FF; static constexpr u32 mantissa_msb = 0x200; - + static constexpr int exponent_min = -14; static constexpr int exponent_max = 15; static constexpr int exponent_bias = 15; - - static constexpr u32 Zero(bool sign) { return sign ? sign_mask : 0; } - static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); } - static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); } - static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); } + + static constexpr u16 Zero(bool sign) { + return sign ? static_cast(sign_mask) : u16{0}; + } + + static constexpr u16 Infinity(bool sign) { + return static_cast(exponent_mask | Zero(sign)); + } + + static constexpr u16 MaxNormal(bool sign) { + return static_cast((exponent_mask - 1) | Zero(sign)); + } + + static constexpr u16 DefaultNaN() { + return static_cast(exponent_mask | (u32(1) << (explicit_mantissa_width - 1))); + } }; template<> @@ -88,7 +99,7 @@ struct FPInfo { template constexpr FPT FPValue() { if constexpr (value == 0) { - return FPT(FPInfo::Zero(sign)); + return FPInfo::Zero(sign); } constexpr int point_position = static_cast(FPInfo::explicit_mantissa_width); diff --git a/src/common/fp/op/FPMulAdd.cpp b/src/common/fp/op/FPMulAdd.cpp index 1e994820..edc3d054 100644 --- a/src/common/fp/op/FPMulAdd.cpp +++ b/src/common/fp/op/FPMulAdd.cpp @@ -35,7 +35,7 @@ FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) { if (typeA == FPType::QNaN && ((inf1 && zero2) || (zero1 && inf2))) { FPProcessException(FPExc::InvalidOp, fpcr, fpsr); - return FPT(FPInfo::DefaultNaN()); + return FPInfo::DefaultNaN(); } if (maybe_nan) { @@ -50,25 +50,25 @@ FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) { // Raise NaN on (inf * inf) of opposite signs or (inf * zero). if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) { FPProcessException(FPExc::InvalidOp, fpcr, fpsr); - return FPT(FPInfo::DefaultNaN()); + return FPInfo::DefaultNaN(); } // Handle infinities if ((infA && !signA) || (infP && !signP)) { - return FPT(FPInfo::Infinity(false)); + return FPInfo::Infinity(false); } if ((infA && signA) || (infP && signP)) { - return FPT(FPInfo::Infinity(true)); + return FPInfo::Infinity(true); } // Result is exactly zero if (zeroA && zeroP && signA == signP) { - return FPT(FPInfo::Zero(signA)); + return FPInfo::Zero(signA); } const FPUnpacked result_value = FusedMulAdd(valueA, value1, value2); if (result_value.mantissa == 0) { - return FPT(FPInfo::Zero(rounding == RoundingMode::TowardsMinusInfinity)); + return FPInfo::Zero(rounding == RoundingMode::TowardsMinusInfinity); } return FPRound(result_value, fpcr, fpsr); } diff --git a/src/common/fp/op/FPRSqrtEstimate.cpp b/src/common/fp/op/FPRSqrtEstimate.cpp index 969ade1c..c9605fa8 100644 --- a/src/common/fp/op/FPRSqrtEstimate.cpp +++ b/src/common/fp/op/FPRSqrtEstimate.cpp @@ -27,16 +27,16 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { if (type == FPType::Zero) { FPProcessException(FPExc::DivideByZero, fpcr, fpsr); - return FPT(FPInfo::Infinity(sign)); + return FPInfo::Infinity(sign); } if (sign) { FPProcessException(FPExc::InvalidOp, fpcr, fpsr); - return FPT(FPInfo::DefaultNaN()); + return FPInfo::DefaultNaN(); } if (type == FPType::Infinity) { - return FPT(FPInfo::Zero(false)); + return FPInfo::Zero(false); } const int result_exponent = (-(value.exponent + 1)) >> 1; diff --git a/src/common/fp/op/FPRSqrtStepFused.cpp b/src/common/fp/op/FPRSqrtStepFused.cpp index 84a193b4..29788010 100644 --- a/src/common/fp/op/FPRSqrtStepFused.cpp +++ b/src/common/fp/op/FPRSqrtStepFused.cpp @@ -37,7 +37,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) { } if (inf1 || inf2) { - return FPT(FPInfo::Infinity(sign1 != sign2)); + return FPInfo::Infinity(sign1 != sign2); } // result_value = (3.0 + (value1 * value2)) / 2.0 @@ -45,7 +45,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) { result_value.exponent--; if (result_value.mantissa == 0) { - return FPT(FPInfo::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity)); + return FPInfo::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity); } return FPRound(result_value, fpcr, fpsr); } diff --git a/src/common/fp/op/FPRecipEstimate.cpp b/src/common/fp/op/FPRecipEstimate.cpp index c35a6569..5e13767d 100644 --- a/src/common/fp/op/FPRecipEstimate.cpp +++ b/src/common/fp/op/FPRecipEstimate.cpp @@ -31,12 +31,12 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { } if (type == FPType::Infinity) { - return FPT(FPInfo::Zero(sign)); + return FPInfo::Zero(sign); } if (type == FPType::Zero) { FPProcessException(FPExc::DivideByZero, fpcr, fpsr); - return FPT(FPInfo::Infinity(sign)); + return FPInfo::Infinity(sign); } if (value.exponent < FPInfo::exponent_min - 2) { @@ -58,13 +58,13 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { FPProcessException(FPExc::Overflow, fpcr, fpsr); FPProcessException(FPExc::Inexact, fpcr, fpsr); - return overflow_to_inf ? FPT(FPInfo::Infinity(sign)) : FPT(FPInfo::MaxNormal(sign)); + return overflow_to_inf ? FPInfo::Infinity(sign) : FPInfo::MaxNormal(sign); } if ((fpcr.FZ() && !std::is_same_v) || (fpcr.FZ16() && std::is_same_v)) { if (value.exponent >= -FPInfo::exponent_min) { fpsr.UFC(true); - return FPT(FPInfo::Zero(sign)); + return FPInfo::Zero(sign); } } @@ -87,7 +87,7 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { } } - const FPT bits_sign = FPT(FPInfo::Zero(sign)); + const FPT bits_sign = FPInfo::Zero(sign); const FPT bits_exponent = static_cast(result_exponent + FPInfo::exponent_bias); const FPT bits_mantissa = static_cast(estimate); return FPT((bits_exponent << FPInfo::explicit_mantissa_width) | (bits_mantissa & FPInfo::mantissa_mask) | bits_sign); diff --git a/src/common/fp/op/FPRecipExponent.cpp b/src/common/fp/op/FPRecipExponent.cpp index eafad6a9..e6f04ae6 100644 --- a/src/common/fp/op/FPRecipExponent.cpp +++ b/src/common/fp/op/FPRecipExponent.cpp @@ -38,7 +38,7 @@ FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) { return FPProcessNaN(type, op, fpcr, fpsr); } - const FPT sign_bits = FPT(FPInfo::Zero(sign)); + const FPT sign_bits = FPInfo::Zero(sign); const FPT exponent = DetermineExponentValue(op); // Zero and denormals diff --git a/src/common/fp/op/FPRecipStepFused.cpp b/src/common/fp/op/FPRecipStepFused.cpp index 938650ce..f82fea8e 100644 --- a/src/common/fp/op/FPRecipStepFused.cpp +++ b/src/common/fp/op/FPRecipStepFused.cpp @@ -37,14 +37,14 @@ FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) { } if (inf1 || inf2) { - return FPT(FPInfo::Infinity(sign1 != sign2)); + return FPInfo::Infinity(sign1 != sign2); } // result_value = 2.0 + (value1 * value2) const FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 2), value1, value2); if (result_value.mantissa == 0) { - return FPT(FPInfo::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity)); + return FPInfo::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity); } return FPRound(result_value, fpcr, fpsr); } diff --git a/src/common/fp/op/FPRoundInt.cpp b/src/common/fp/op/FPRoundInt.cpp index d2456394..2eff07fe 100644 --- a/src/common/fp/op/FPRoundInt.cpp +++ b/src/common/fp/op/FPRoundInt.cpp @@ -31,11 +31,11 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr) } if (type == FPType::Infinity) { - return FPT(FPInfo::Infinity(sign)); + return FPInfo::Infinity(sign); } if (type == FPType::Zero) { - return FPT(FPInfo::Zero(sign)); + return FPInfo::Zero(sign); } // Reshift decimal point back to bit zero. @@ -79,7 +79,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr) const u64 abs_int_result = new_sign ? Safe::Negate(int_result) : static_cast(int_result); const FPT result = int_result == 0 - ? FPT(FPInfo::Zero(sign)) + ? FPInfo::Zero(sign) : FPRound(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr); if (error != ResidualError::Zero && exact) { diff --git a/src/common/fp/process_nan.cpp b/src/common/fp/process_nan.cpp index 3fb3666a..5013ba33 100644 --- a/src/common/fp/process_nan.cpp +++ b/src/common/fp/process_nan.cpp @@ -31,7 +31,7 @@ FPT FPProcessNaN(FPType type, FPT op, FPCR fpcr, FPSR& fpsr) { } if (fpcr.DN()) { - result = FPT(FPInfo::DefaultNaN()); + result = FPInfo::DefaultNaN(); } return result; diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp index 96251779..4ae48396 100644 --- a/src/common/fp/unpacked.cpp +++ b/src/common/fp/unpacked.cpp @@ -90,7 +90,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) { fpsr.UFC(true); - return FPT(FPInfo::Zero(sign)); + return FPInfo::Zero(sign); } int biased_exp = std::max(exponent - minimum_exp + 1, 0); @@ -153,7 +153,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { #endif constexpr int max_biased_exp = (1 << E) - 1; if (biased_exp >= max_biased_exp) { - result = overflow_to_inf ? FPT(FPInfo::Infinity(sign)) : FPT(FPInfo::MaxNormal(sign)); + result = overflow_to_inf ? FPInfo::Infinity(sign) : FPInfo::MaxNormal(sign); FPProcessException(FPExc::Overflow, fpcr, fpsr); FPProcessException(FPExc::Inexact, fpcr, fpsr); } else {