From 6eb069e80dc38eab8d3e0f5ccc4f61daf93471b5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 23 Jul 2018 22:02:12 +0100 Subject: [PATCH] fp: Implement FPRSqrtStepFused --- src/CMakeLists.txt | 2 + src/common/fp/info.h | 2 + src/common/fp/op.h | 1 + src/common/fp/op/FPRSqrtStepFused.cpp | 56 +++++++++++++++++++++++++++ src/common/fp/op/FPRSqrtStepFused.h | 17 ++++++++ 5 files changed, 78 insertions(+) create mode 100644 src/common/fp/op/FPRSqrtStepFused.cpp create mode 100644 src/common/fp/op/FPRSqrtStepFused.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 19857141..e1b11f02 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,6 +29,8 @@ add_library(dynarmic common/fp/op/FPRoundInt.h common/fp/op/FPRSqrtEstimate.cpp common/fp/op/FPRSqrtEstimate.h + common/fp/op/FPRSqrtStepFused.cpp + common/fp/op/FPRSqrtStepFused.h common/fp/op/FPToFixed.cpp common/fp/op/FPToFixed.h common/fp/process_exception.cpp diff --git a/src/common/fp/info.h b/src/common/fp/info.h index 50a1e81c..a0f265fd 100644 --- a/src/common/fp/info.h +++ b/src/common/fp/info.h @@ -33,6 +33,7 @@ struct FPInfo { static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); } static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); } static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); } + static constexpr u32 OnePointFive(bool sign) { return Zero(sign) | (u32(1) << (explicit_mantissa_width - 1)) | (u32(exponent_bias) << explicit_mantissa_width); } }; template<> @@ -55,6 +56,7 @@ struct FPInfo { static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); } static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); } static constexpr u64 DefaultNaN() { return exponent_mask | (u64(1) << (explicit_mantissa_width - 1)); } + static constexpr u64 OnePointFive(bool sign) { return Zero(sign) | (u64(1) << (explicit_mantissa_width - 1)) | (u64(exponent_bias) << explicit_mantissa_width); } }; } // namespace Dynarmic::FP diff --git a/src/common/fp/op.h b/src/common/fp/op.h index 1fee5180..b74cc14d 100644 --- a/src/common/fp/op.h +++ b/src/common/fp/op.h @@ -9,4 +9,5 @@ #include "common/fp/op/FPMulAdd.h" #include "common/fp/op/FPRoundInt.h" #include "common/fp/op/FPRSqrtEstimate.h" +#include "common/fp/op/FPRSqrtStepFused.h" #include "common/fp/op/FPToFixed.h" diff --git a/src/common/fp/op/FPRSqrtStepFused.cpp b/src/common/fp/op/FPRSqrtStepFused.cpp new file mode 100644 index 00000000..b5ce5570 --- /dev/null +++ b/src/common/fp/op/FPRSqrtStepFused.cpp @@ -0,0 +1,56 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "common/fp/fpcr.h" +#include "common/fp/fpsr.h" +#include "common/fp/fused.h" +#include "common/fp/info.h" +#include "common/fp/op/FPNeg.h" +#include "common/fp/op/FPRSqrtStepFused.h" +#include "common/fp/process_exception.h" +#include "common/fp/process_nan.h" +#include "common/fp/unpacked.h" + +namespace Dynarmic::FP { + +template +FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) { + op1 = FPNeg(op1); + + const auto [type1, sign1, value1] = FPUnpack(op1, fpcr, fpsr); + const auto [type2, sign2, value2] = FPUnpack(op2, fpcr, fpsr); + + if (const auto maybe_nan = FPProcessNaNs(type1, type2, op1, op2, fpcr, fpsr)) { + return *maybe_nan; + } + + const bool inf1 = type1 == FPType::Infinity; + const bool inf2 = type2 == FPType::Infinity; + const bool zero1 = type1 == FPType::Zero; + const bool zero2 = type2 == FPType::Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) { + return FPInfo::OnePointFive(false); + } + + if (inf1 || inf2) { + return FPInfo::Infinity(sign1 != sign2); + } + + // result_value = (3.0 + (value1 * value2)) / 2.0 + FPUnpacked result_value = FusedMulAdd({false, 0, 3}, value1, value2); + result_value.exponent--; + + if (result_value.mantissa == 0) { + return FPInfo::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity); + } + return FPRound(result_value, fpcr, fpsr); +} + +template u32 FPRSqrtStepFused(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr); +template u64 FPRSqrtStepFused(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/op/FPRSqrtStepFused.h b/src/common/fp/op/FPRSqrtStepFused.h new file mode 100644 index 00000000..4847809c --- /dev/null +++ b/src/common/fp/op/FPRSqrtStepFused.h @@ -0,0 +1,17 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::FP { + +class FPCR; +class FPSR; + +template +FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP