fp: Implement FPRSqrtStepFused

2018-07-23 22:02:12 +01:00 · 2018-07-23 22:02:12 +01:00 · 6eb069e80d
commit 6eb069e80d
parent b0ff35fcd1
5 changed files with 78 additions and 0 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -29,6 +29,8 @@ add_library(dynarmic
    common/fp/op/FPRoundInt.h
    common/fp/op/FPRSqrtEstimate.cpp
    common/fp/op/FPRSqrtEstimate.h
+    common/fp/op/FPRSqrtStepFused.cpp
+    common/fp/op/FPRSqrtStepFused.h
    common/fp/op/FPToFixed.cpp
    common/fp/op/FPToFixed.h
    common/fp/process_exception.cpp
--- a/src/common/fp/info.h
+++ b/src/common/fp/info.h
@ -33,6 +33,7 @@ struct FPInfo<u32> {
    static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
    static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
    static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); }
+    static constexpr u32 OnePointFive(bool sign) { return Zero(sign) | (u32(1) << (explicit_mantissa_width - 1)) | (u32(exponent_bias) << explicit_mantissa_width); }
 };

 template<>
@ -55,6 +56,7 @@ struct FPInfo<u64> {
    static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); }
    static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
    static constexpr u64 DefaultNaN() { return exponent_mask | (u64(1) << (explicit_mantissa_width - 1)); }
+    static constexpr u64 OnePointFive(bool sign) { return Zero(sign) | (u64(1) << (explicit_mantissa_width - 1)) | (u64(exponent_bias) << explicit_mantissa_width); }
 };

 } // namespace Dynarmic::FP 
--- a/src/common/fp/op.h
+++ b/src/common/fp/op.h
@ -9,4 +9,5 @@
 #include "common/fp/op/FPMulAdd.h"
 #include "common/fp/op/FPRoundInt.h"
 #include "common/fp/op/FPRSqrtEstimate.h"
+#include "common/fp/op/FPRSqrtStepFused.h"
 #include "common/fp/op/FPToFixed.h"
--- a/src/common/fp/op/FPRSqrtStepFused.cpp
+++ b/src/common/fp/op/FPRSqrtStepFused.cpp
@ -0,0 +1,56 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/fused.h"
+#include "common/fp/info.h"
+#include "common/fp/op/FPNeg.h"
+#include "common/fp/op/FPRSqrtStepFused.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
+    op1 = FPNeg(op1);
+
+    const auto [type1, sign1, value1] = FPUnpack<FPT>(op1, fpcr, fpsr);
+    const auto [type2, sign2, value2] = FPUnpack<FPT>(op2, fpcr, fpsr);
+    
+    if (const auto maybe_nan = FPProcessNaNs(type1, type2, op1, op2, fpcr, fpsr)) {
+        return *maybe_nan;
+    }
+
+    const bool inf1 = type1 == FPType::Infinity;
+    const bool inf2 = type2 == FPType::Infinity;
+    const bool zero1 = type1 == FPType::Zero;
+    const bool zero2 = type2 == FPType::Zero;
+
+    if ((inf1 && zero2) || (zero1 && inf2)) {
+        return FPInfo<FPT>::OnePointFive(false);
+    }
+
+    if (inf1 || inf2) {
+        return FPInfo<FPT>::Infinity(sign1 != sign2);
+    }
+
+    // result_value = (3.0 + (value1 * value2)) / 2.0
+    FPUnpacked result_value = FusedMulAdd({false, 0, 3}, value1, value2);
+    result_value.exponent--;
+
+    if (result_value.mantissa == 0) {
+        return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
+    }
+    return FPRound<FPT>(result_value, fpcr, fpsr);
+}
+
+template u32 FPRSqrtStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
+template u64 FPRSqrtStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP 
--- a/src/common/fp/op/FPRSqrtStepFused.h
+++ b/src/common/fp/op/FPRSqrtStepFused.h
@ -0,0 +1,17 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+template<typename FPT>
+FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP