Merge pull request #437 from lioncash/frecpx

A64: Implement FRECPX (single, double precision)
2019-03-03 14:42:23 +00:00 · 2019-03-03 14:42:23 +00:00 · 9f11720a69
commit 9f11720a69
parent 34d917f34e e44730ba6d
10 changed files with 128 additions and 1 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -28,6 +28,8 @@ add_library(dynarmic
    common/fp/op/FPMulAdd.h
    common/fp/op/FPRecipEstimate.cpp
    common/fp/op/FPRecipEstimate.h
+    common/fp/op/FPRecipExponent.cpp
+    common/fp/op/FPRecipExponent.h
    common/fp/op/FPRecipStepFused.cpp
    common/fp/op/FPRecipStepFused.h
    common/fp/op/FPRoundInt.cpp
--- a/src/backend/x64/emit_x64_floating_point.cpp
+++ b/src/backend/x64/emit_x64_floating_point.cpp
@ -719,6 +719,23 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
    EmitFPRecipEstimate<u64>(code, ctx, inst);
 }

+template <typename FPT>
+static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    ctx.reg_alloc.HostCall(inst, args[0]);
+    code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR());
+    code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
+    code.CallFunction(&FP::FPRecipExponent<FPT>);
+}
+
+void EmitX64::EmitFPRecipExponent32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPRecipExponent<u32>(code, ctx, inst);
+}
+
+void EmitX64::EmitFPRecipExponent64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPRecipExponent<u64>(code, ctx, inst);
+}
+
 template<size_t fsize>
 static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    using FPT = mp::unsigned_integer_of_size<fsize>;
--- a/src/common/fp/op.h
+++ b/src/common/fp/op.h
@ -8,6 +8,7 @@

 #include "common/fp/op/FPMulAdd.h"
 #include "common/fp/op/FPRecipEstimate.h"
+#include "common/fp/op/FPRecipExponent.h"
 #include "common/fp/op/FPRecipStepFused.h"
 #include "common/fp/op/FPRoundInt.h"
 #include "common/fp/op/FPRSqrtEstimate.h"
--- a/src/common/fp/op/FPRecipExponent.cpp
+++ b/src/common/fp/op/FPRecipExponent.cpp
@ -0,0 +1,70 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include <tuple>
+
+#include "common/common_types.h"
+#include "common/bit_util.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/op/FPRecipExponent.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+namespace {
+// We don't care about unreachable code warnings here
+// TODO: Remove this warning disabling of warnings when
+// half-float support is added.
+#ifdef _MSC_VER
+#pragma warning(disable:4702)
+#endif
+template <typename FPT>
+FPT DetermineExponentValue(size_t value) {
+    if constexpr (sizeof(FPT) == sizeof(u32)) {
+        return static_cast<FPT>(Common::Bits<23, 30>(value));
+    }
+
+    if constexpr (sizeof(FPT) == sizeof(u64)) {
+        return static_cast<FPT>(Common::Bits<52, 62>(value));
+    }
+
+    // Half-float
+    return static_cast<FPT>(Common::Bits<10, 14>(value));
+}
+#ifdef _MSC_VER
+#pragma warning(default:4702)
+#endif
+} // Anonymous namespace
+
+template <typename FPT>
+FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
+    const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
+    (void)value;
+
+    if (type == FPType::SNaN || type == FPType::QNaN) {
+        return FPProcessNaN(type, op, fpcr, fpsr);
+    }
+
+    const FPT sign_bits = FPInfo<FPT>::Zero(sign);
+    const FPT exponent = DetermineExponentValue<FPT>(op);
+
+    // Zero and denormals
+    if (exponent == 0) {
+        const FPT max_exponent = Common::Ones<FPT>(FPInfo<FPT>::exponent_width) - 1;
+        return FPT(sign_bits | (max_exponent << FPInfo<FPT>::explicit_mantissa_width));
+    }
+
+    // Infinities and normals
+    const auto negated_exponent = (~exponent << FPInfo<FPT>::explicit_mantissa_width) & FPInfo<FPT>::exponent_mask;
+    return FPT(sign_bits | negated_exponent);
+}
+
+template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
+template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/src/common/fp/op/FPRecipExponent.h
+++ b/src/common/fp/op/FPRecipExponent.h
@ -0,0 +1,17 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+template <typename FPT>
+FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@ -423,7 +423,7 @@ INST(FCVTZS_int_2,           "FCVTZS (vector, integer)",                  "01011
 //INST(FRECPE_1,               "FRECPE",                                    "0101111011111001110110nnnnnddddd")
 INST(FRECPE_2,               "FRECPE",                                    "010111101z100001110110nnnnnddddd")
 //INST(FRECPX_1,               "FRECPX",                                    "0101111011111001111110nnnnnddddd")
-//INST(FRECPX_2,               "FRECPX",                                    "010111101z100001111110nnnnnddddd")
+INST(FRECPX_2,               "FRECPX",                                    "010111101z100001111110nnnnnddddd")
 //INST(FCVTNU_1,               "FCVTNU (vector)",                           "0111111001111001101010nnnnnddddd")
 INST(FCVTNU_2,               "FCVTNU (vector)",                           "011111100z100001101010nnnnnddddd")
 //INST(FCVTMU_1,               "FCVTMU (vector)",                           "0111111001111001101110nnnnnddddd")
--- a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp
+++ b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp
@ -170,6 +170,16 @@ bool TranslatorVisitor::FRECPE_2(bool sz, Vec Vn, Vec Vd) {
    return true;
 }

+bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) {
+    const size_t esize = sz ? 64 : 32;
+
+    const IR::U32U64 operand = V_scalar(esize, Vn);
+    const IR::U32U64 result = ir.FPRecipExponent(operand);
+
+    V_scalar(esize, Vd, result);
+    return true;
+}
+
 bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) {
    const size_t esize = sz ? 64 : 32;

--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1895,6 +1895,13 @@ U32U64 IREmitter::FPRecipEstimate(const U32U64& a) {
    return Inst<U64>(Opcode::FPRecipEstimate64, a);
 }

+U32U64 IREmitter::FPRecipExponent(const U32U64& a) {
+    if (a.GetType() == Type::U32) {
+        return Inst<U32>(Opcode::FPRecipExponent32, a);
+    }
+    return Inst<U64>(Opcode::FPRecipExponent64, a);
+}
+
 U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) {
    if (a.GetType() == Type::U32) {
        return Inst<U32>(Opcode::FPRecipStepFused32, a, b);
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -305,6 +305,7 @@ public:
    U32U64 FPMulX(const U32U64& a, const U32U64& b);
    U32U64 FPNeg(const U32U64& a);
    U32U64 FPRecipEstimate(const U32U64& a);
+    U32U64 FPRecipExponent(const U32U64& a);
    U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b);
    U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact);
    U32U64 FPRSqrtEstimate(const U32U64& a);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -484,6 +484,8 @@ OPCODE(FPNeg32,                                             U32,            U32
 OPCODE(FPNeg64,                                             U64,            U64                                                             )
 OPCODE(FPRecipEstimate32,                                   U32,            U32                                                             )
 OPCODE(FPRecipEstimate64,                                   U64,            U64                                                             )
+OPCODE(FPRecipExponent32,                                   U32,            U32                                                             )
+OPCODE(FPRecipExponent64,                                   U64,            U64                                                             )
 OPCODE(FPRecipStepFused32,                                  U32,            U32,            U32                                             )
 OPCODE(FPRecipStepFused64,                                  U64,            U64,            U64                                             )
 OPCODE(FPRoundInt32,                                        U32,            U32,            U8,             U1                              )