Merge pull request #437 from lioncash/frecpx

A64: Implement FRECPX (single, double precision)
2019-03-03 14:42:23 +00:00 · 2019-03-03 14:42:23 +00:00 · 9f11720a69
commit 9f11720a69
parent 34d917f34e e44730ba6d
10 changed files with 128 additions and 1 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -28,6 +28,8 @@ add_library(dynarmic
    common/fp/op/FPMulAdd.h
    common/fp/op/FPRecipEstimate.cpp
    common/fp/op/FPRecipEstimate.h
    common/fp/op/FPRecipExponent.cpp
    common/fp/op/FPRecipExponent.h
    common/fp/op/FPRecipStepFused.cpp
    common/fp/op/FPRecipStepFused.h
    common/fp/op/FPRoundInt.cpp
--- a/src/backend/x64/emit_x64_floating_point.cpp
+++ b/src/backend/x64/emit_x64_floating_point.cpp
@ -719,6 +719,23 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
    EmitFPRecipEstimate<u64>(code, ctx, inst);
 }
 template <typename FPT>
 static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    ctx.reg_alloc.HostCall(inst, args[0]);
    code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR());
    code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
    code.CallFunction(&FP::FPRecipExponent<FPT>);
 }
 void EmitX64::EmitFPRecipExponent32(EmitContext& ctx, IR::Inst* inst) {
    EmitFPRecipExponent<u32>(code, ctx, inst);
 }
 void EmitX64::EmitFPRecipExponent64(EmitContext& ctx, IR::Inst* inst) {
    EmitFPRecipExponent<u64>(code, ctx, inst);
 }
 template<size_t fsize>
 static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    using FPT = mp::unsigned_integer_of_size<fsize>;
--- a/src/common/fp/op.h
+++ b/src/common/fp/op.h
@ -8,6 +8,7 @@
 #include "common/fp/op/FPMulAdd.h"
 #include "common/fp/op/FPRecipEstimate.h"
 #include "common/fp/op/FPRecipExponent.h"
 #include "common/fp/op/FPRecipStepFused.h"
 #include "common/fp/op/FPRoundInt.h"
 #include "common/fp/op/FPRSqrtEstimate.h"
--- a/src/common/fp/op/FPRecipExponent.cpp
+++ b/src/common/fp/op/FPRecipExponent.cpp
@ -0,0 +1,70 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include <tuple>
 #include "common/common_types.h"
 #include "common/bit_util.h"
 #include "common/fp/fpcr.h"
 #include "common/fp/fpsr.h"
 #include "common/fp/info.h"
 #include "common/fp/op/FPRecipExponent.h"
 #include "common/fp/process_nan.h"
 #include "common/fp/unpacked.h"
 namespace Dynarmic::FP {
 namespace {
 // We don't care about unreachable code warnings here
 // TODO: Remove this warning disabling of warnings when
 // half-float support is added.
 #ifdef _MSC_VER
 #pragma warning(disable:4702)
 #endif
 template <typename FPT>
 FPT DetermineExponentValue(size_t value) {
    if constexpr (sizeof(FPT) == sizeof(u32)) {
        return static_cast<FPT>(Common::Bits<23, 30>(value));
    }
    if constexpr (sizeof(FPT) == sizeof(u64)) {
        return static_cast<FPT>(Common::Bits<52, 62>(value));
    }
    // Half-float
    return static_cast<FPT>(Common::Bits<10, 14>(value));
 }
 #ifdef _MSC_VER
 #pragma warning(default:4702)
 #endif
 } // Anonymous namespace
 template <typename FPT>
 FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
    const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
    (void)value;
    if (type == FPType::SNaN || type == FPType::QNaN) {
        return FPProcessNaN(type, op, fpcr, fpsr);
    }
    const FPT sign_bits = FPInfo<FPT>::Zero(sign);
    const FPT exponent = DetermineExponentValue<FPT>(op);
    // Zero and denormals
    if (exponent == 0) {
        const FPT max_exponent = Common::Ones<FPT>(FPInfo<FPT>::exponent_width) - 1;
        return FPT(sign_bits | (max_exponent << FPInfo<FPT>::explicit_mantissa_width));
    }
    // Infinities and normals
    const auto negated_exponent = (~exponent << FPInfo<FPT>::explicit_mantissa_width) & FPInfo<FPT>::exponent_mask;
    return FPT(sign_bits | negated_exponent);
 }
 template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
 template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
 } // namespace Dynarmic::FP
--- a/src/common/fp/op/FPRecipExponent.h
+++ b/src/common/fp/op/FPRecipExponent.h
@ -0,0 +1,17 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2019 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 namespace Dynarmic::FP {
 class FPCR;
 class FPSR;
 template <typename FPT>
 FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr);
 } // namespace Dynarmic::FP
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@ -423,7 +423,7 @@ INST(FCVTZS_int_2,           "FCVTZS (vector, integer)",                  "01011
 //INST(FRECPE_1,               "FRECPE",                                    "0101111011111001110110nnnnnddddd")
 INST(FRECPE_2,               "FRECPE",                                    "010111101z100001110110nnnnnddddd")
 //INST(FRECPX_1,               "FRECPX",                                    "0101111011111001111110nnnnnddddd")
-//INST(FRECPX_2,               "FRECPX",                                    "010111101z100001111110nnnnnddddd")
+INST(FRECPX_2,               "FRECPX",                                    "010111101z100001111110nnnnnddddd")
 //INST(FCVTNU_1,               "FCVTNU (vector)",                           "0111111001111001101010nnnnnddddd")
 INST(FCVTNU_2,               "FCVTNU (vector)",                           "011111100z100001101010nnnnnddddd")
 //INST(FCVTMU_1,               "FCVTMU (vector)",                           "0111111001111001101110nnnnnddddd")
--- a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp
+++ b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp
@ -170,6 +170,16 @@ bool TranslatorVisitor::FRECPE_2(bool sz, Vec Vn, Vec Vd) {
    return true;
 }
 bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) {
    const size_t esize = sz ? 64 : 32;
    const IR::U32U64 operand = V_scalar(esize, Vn);
    const IR::U32U64 result = ir.FPRecipExponent(operand);
    V_scalar(esize, Vd, result);
    return true;
 }
 bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) {
    const size_t esize = sz ? 64 : 32;
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1895,6 +1895,13 @@ U32U64 IREmitter::FPRecipEstimate(const U32U64& a) {
    return Inst<U64>(Opcode::FPRecipEstimate64, a);
 }
 U32U64 IREmitter::FPRecipExponent(const U32U64& a) {
    if (a.GetType() == Type::U32) {
        return Inst<U32>(Opcode::FPRecipExponent32, a);
    }
    return Inst<U64>(Opcode::FPRecipExponent64, a);
 }
 U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) {
    if (a.GetType() == Type::U32) {
        return Inst<U32>(Opcode::FPRecipStepFused32, a, b);
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -305,6 +305,7 @@ public:
    U32U64 FPMulX(const U32U64& a, const U32U64& b);
    U32U64 FPNeg(const U32U64& a);
    U32U64 FPRecipEstimate(const U32U64& a);
    U32U64 FPRecipExponent(const U32U64& a);
    U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b);
    U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact);
    U32U64 FPRSqrtEstimate(const U32U64& a);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -484,6 +484,8 @@ OPCODE(FPNeg32,                                             U32,            U32
 OPCODE(FPNeg64,                                             U64,            U64                                                             )
 OPCODE(FPRecipEstimate32,                                   U32,            U32                                                             )
 OPCODE(FPRecipEstimate64,                                   U64,            U64                                                             )
 OPCODE(FPRecipExponent32,                                   U32,            U32                                                             )
 OPCODE(FPRecipExponent64,                                   U64,            U64                                                             )
 OPCODE(FPRecipStepFused32,                                  U32,            U32,            U32                                             )
 OPCODE(FPRecipStepFused64,                                  U64,            U64,            U64                                             )
 OPCODE(FPRoundInt32,                                        U32,            U32,            U8,             U1                              )