common/fp/info: Make half-precision info struct functions return correctly sized types

While initially done to potentially prevent creating bugs due to C++
having a silly type-promotion mechanism involving types < sizeof(int)
and unsignedness, given that the bulk of these functions' usages
are on exit paths, these can return the correct type to avoid the need
to cast at every usage point.
This commit is contained in:
Lioncash 2019-04-15 04:41:58 -04:00 committed by MerryMage
parent 699ad98b2a
commit c9777ef997
10 changed files with 44 additions and 33 deletions

View file

@ -20,21 +20,32 @@ struct FPInfo<u16> {
static constexpr size_t exponent_width = 5;
static constexpr size_t explicit_mantissa_width = 10;
static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width;
static constexpr u32 sign_mask = 0x8000;
static constexpr u32 exponent_mask = 0x7C00;
static constexpr u32 mantissa_mask = 0x3FF;
static constexpr u32 mantissa_msb = 0x200;
static constexpr int exponent_min = -14;
static constexpr int exponent_max = 15;
static constexpr int exponent_bias = 15;
static constexpr u32 Zero(bool sign) { return sign ? sign_mask : 0; }
static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); }
static constexpr u16 Zero(bool sign) {
return sign ? static_cast<u16>(sign_mask) : u16{0};
}
static constexpr u16 Infinity(bool sign) {
return static_cast<u16>(exponent_mask | Zero(sign));
}
static constexpr u16 MaxNormal(bool sign) {
return static_cast<u16>((exponent_mask - 1) | Zero(sign));
}
static constexpr u16 DefaultNaN() {
return static_cast<u16>(exponent_mask | (u32(1) << (explicit_mantissa_width - 1)));
}
};
template<>
@ -88,7 +99,7 @@ struct FPInfo<u64> {
template<typename FPT, bool sign, int exponent, FPT value>
constexpr FPT FPValue() {
if constexpr (value == 0) {
return FPT(FPInfo<FPT>::Zero(sign));
return FPInfo<FPT>::Zero(sign);
}
constexpr int point_position = static_cast<int>(FPInfo<FPT>::explicit_mantissa_width);

View file

@ -35,7 +35,7 @@ FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
if (typeA == FPType::QNaN && ((inf1 && zero2) || (zero1 && inf2))) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
return FPT(FPInfo<FPT>::DefaultNaN());
return FPInfo<FPT>::DefaultNaN();
}
if (maybe_nan) {
@ -50,25 +50,25 @@ FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
// Raise NaN on (inf * inf) of opposite signs or (inf * zero).
if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
return FPT(FPInfo<FPT>::DefaultNaN());
return FPInfo<FPT>::DefaultNaN();
}
// Handle infinities
if ((infA && !signA) || (infP && !signP)) {
return FPT(FPInfo<FPT>::Infinity(false));
return FPInfo<FPT>::Infinity(false);
}
if ((infA && signA) || (infP && signP)) {
return FPT(FPInfo<FPT>::Infinity(true));
return FPInfo<FPT>::Infinity(true);
}
// Result is exactly zero
if (zeroA && zeroP && signA == signP) {
return FPT(FPInfo<FPT>::Zero(signA));
return FPInfo<FPT>::Zero(signA);
}
const FPUnpacked result_value = FusedMulAdd(valueA, value1, value2);
if (result_value.mantissa == 0) {
return FPT(FPInfo<FPT>::Zero(rounding == RoundingMode::TowardsMinusInfinity));
return FPInfo<FPT>::Zero(rounding == RoundingMode::TowardsMinusInfinity);
}
return FPRound<FPT>(result_value, fpcr, fpsr);
}

View file

@ -27,16 +27,16 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
if (type == FPType::Zero) {
FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
return FPT(FPInfo<FPT>::Infinity(sign));
return FPInfo<FPT>::Infinity(sign);
}
if (sign) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
return FPT(FPInfo<FPT>::DefaultNaN());
return FPInfo<FPT>::DefaultNaN();
}
if (type == FPType::Infinity) {
return FPT(FPInfo<FPT>::Zero(false));
return FPInfo<FPT>::Zero(false);
}
const int result_exponent = (-(value.exponent + 1)) >> 1;

View file

@ -37,7 +37,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
}
if (inf1 || inf2) {
return FPT(FPInfo<FPT>::Infinity(sign1 != sign2));
return FPInfo<FPT>::Infinity(sign1 != sign2);
}
// result_value = (3.0 + (value1 * value2)) / 2.0
@ -45,7 +45,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
result_value.exponent--;
if (result_value.mantissa == 0) {
return FPT(FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity));
return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
}
return FPRound<FPT>(result_value, fpcr, fpsr);
}

View file

@ -31,12 +31,12 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
}
if (type == FPType::Infinity) {
return FPT(FPInfo<FPT>::Zero(sign));
return FPInfo<FPT>::Zero(sign);
}
if (type == FPType::Zero) {
FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
return FPT(FPInfo<FPT>::Infinity(sign));
return FPInfo<FPT>::Infinity(sign);
}
if (value.exponent < FPInfo<FPT>::exponent_min - 2) {
@ -58,13 +58,13 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
FPProcessException(FPExc::Overflow, fpcr, fpsr);
FPProcessException(FPExc::Inexact, fpcr, fpsr);
return overflow_to_inf ? FPT(FPInfo<FPT>::Infinity(sign)) : FPT(FPInfo<FPT>::MaxNormal(sign));
return overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
}
if ((fpcr.FZ() && !std::is_same_v<FPT, u16>) || (fpcr.FZ16() && std::is_same_v<FPT, u16>)) {
if (value.exponent >= -FPInfo<FPT>::exponent_min) {
fpsr.UFC(true);
return FPT(FPInfo<FPT>::Zero(sign));
return FPInfo<FPT>::Zero(sign);
}
}
@ -87,7 +87,7 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
}
}
const FPT bits_sign = FPT(FPInfo<FPT>::Zero(sign));
const FPT bits_sign = FPInfo<FPT>::Zero(sign);
const FPT bits_exponent = static_cast<FPT>(result_exponent + FPInfo<FPT>::exponent_bias);
const FPT bits_mantissa = static_cast<FPT>(estimate);
return FPT((bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask) | bits_sign);

View file

@ -38,7 +38,7 @@ FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
return FPProcessNaN(type, op, fpcr, fpsr);
}
const FPT sign_bits = FPT(FPInfo<FPT>::Zero(sign));
const FPT sign_bits = FPInfo<FPT>::Zero(sign);
const FPT exponent = DetermineExponentValue<FPT>(op);
// Zero and denormals

View file

@ -37,14 +37,14 @@ FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
}
if (inf1 || inf2) {
return FPT(FPInfo<FPT>::Infinity(sign1 != sign2));
return FPInfo<FPT>::Infinity(sign1 != sign2);
}
// result_value = 2.0 + (value1 * value2)
const FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 2), value1, value2);
if (result_value.mantissa == 0) {
return FPT(FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity));
return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
}
return FPRound<FPT>(result_value, fpcr, fpsr);
}

View file

@ -31,11 +31,11 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
}
if (type == FPType::Infinity) {
return FPT(FPInfo<FPT>::Infinity(sign));
return FPInfo<FPT>::Infinity(sign);
}
if (type == FPType::Zero) {
return FPT(FPInfo<FPT>::Zero(sign));
return FPInfo<FPT>::Zero(sign);
}
// Reshift decimal point back to bit zero.
@ -79,7 +79,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
const u64 abs_int_result = new_sign ? Safe::Negate<u64>(int_result) : static_cast<u64>(int_result);
const FPT result = int_result == 0
? FPT(FPInfo<FPT>::Zero(sign))
? FPInfo<FPT>::Zero(sign)
: FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
if (error != ResidualError::Zero && exact) {

View file

@ -31,7 +31,7 @@ FPT FPProcessNaN(FPType type, FPT op, FPCR fpcr, FPSR& fpsr) {
}
if (fpcr.DN()) {
result = FPT(FPInfo<FPT>::DefaultNaN());
result = FPInfo<FPT>::DefaultNaN();
}
return result;

View file

@ -90,7 +90,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) {
fpsr.UFC(true);
return FPT(FPInfo<FPT>::Zero(sign));
return FPInfo<FPT>::Zero(sign);
}
int biased_exp = std::max<int>(exponent - minimum_exp + 1, 0);
@ -153,7 +153,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
#endif
constexpr int max_biased_exp = (1 << E) - 1;
if (biased_exp >= max_biased_exp) {
result = overflow_to_inf ? FPT(FPInfo<FPT>::Infinity(sign)) : FPT(FPInfo<FPT>::MaxNormal(sign));
result = overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
FPProcessException(FPExc::Overflow, fpcr, fpsr);
FPProcessException(FPExc::Inexact, fpcr, fpsr);
} else {