Merge pull request #437 from lioncash/frecpx

A64: Implement FRECPX (single, double precision)
This commit is contained in:
Merry 2019-03-03 14:42:23 +00:00 committed by MerryMage
commit 9f11720a69
10 changed files with 128 additions and 1 deletions

View file

@ -28,6 +28,8 @@ add_library(dynarmic
common/fp/op/FPMulAdd.h common/fp/op/FPMulAdd.h
common/fp/op/FPRecipEstimate.cpp common/fp/op/FPRecipEstimate.cpp
common/fp/op/FPRecipEstimate.h common/fp/op/FPRecipEstimate.h
common/fp/op/FPRecipExponent.cpp
common/fp/op/FPRecipExponent.h
common/fp/op/FPRecipStepFused.cpp common/fp/op/FPRecipStepFused.cpp
common/fp/op/FPRecipStepFused.h common/fp/op/FPRecipStepFused.h
common/fp/op/FPRoundInt.cpp common/fp/op/FPRoundInt.cpp

View file

@ -719,6 +719,23 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
EmitFPRecipEstimate<u64>(code, ctx, inst); EmitFPRecipEstimate<u64>(code, ctx, inst);
} }
template <typename FPT>
static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipExponent<FPT>);
}
void EmitX64::EmitFPRecipExponent32(EmitContext& ctx, IR::Inst* inst) {
EmitFPRecipExponent<u32>(code, ctx, inst);
}
void EmitX64::EmitFPRecipExponent64(EmitContext& ctx, IR::Inst* inst) {
EmitFPRecipExponent<u64>(code, ctx, inst);
}
template<size_t fsize> template<size_t fsize>
static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;

View file

@ -8,6 +8,7 @@
#include "common/fp/op/FPMulAdd.h" #include "common/fp/op/FPMulAdd.h"
#include "common/fp/op/FPRecipEstimate.h" #include "common/fp/op/FPRecipEstimate.h"
#include "common/fp/op/FPRecipExponent.h"
#include "common/fp/op/FPRecipStepFused.h" #include "common/fp/op/FPRecipStepFused.h"
#include "common/fp/op/FPRoundInt.h" #include "common/fp/op/FPRoundInt.h"
#include "common/fp/op/FPRSqrtEstimate.h" #include "common/fp/op/FPRSqrtEstimate.h"

View file

@ -0,0 +1,70 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <tuple>
#include "common/common_types.h"
#include "common/bit_util.h"
#include "common/fp/fpcr.h"
#include "common/fp/fpsr.h"
#include "common/fp/info.h"
#include "common/fp/op/FPRecipExponent.h"
#include "common/fp/process_nan.h"
#include "common/fp/unpacked.h"
namespace Dynarmic::FP {
namespace {
// We don't care about unreachable code warnings here
// TODO: Remove this warning disabling of warnings when
// half-float support is added.
#ifdef _MSC_VER
#pragma warning(disable:4702)
#endif
template <typename FPT>
FPT DetermineExponentValue(size_t value) {
if constexpr (sizeof(FPT) == sizeof(u32)) {
return static_cast<FPT>(Common::Bits<23, 30>(value));
}
if constexpr (sizeof(FPT) == sizeof(u64)) {
return static_cast<FPT>(Common::Bits<52, 62>(value));
}
// Half-float
return static_cast<FPT>(Common::Bits<10, 14>(value));
}
#ifdef _MSC_VER
#pragma warning(default:4702)
#endif
} // Anonymous namespace
template <typename FPT>
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
(void)value;
if (type == FPType::SNaN || type == FPType::QNaN) {
return FPProcessNaN(type, op, fpcr, fpsr);
}
const FPT sign_bits = FPInfo<FPT>::Zero(sign);
const FPT exponent = DetermineExponentValue<FPT>(op);
// Zero and denormals
if (exponent == 0) {
const FPT max_exponent = Common::Ones<FPT>(FPInfo<FPT>::exponent_width) - 1;
return FPT(sign_bits | (max_exponent << FPInfo<FPT>::explicit_mantissa_width));
}
// Infinities and normals
const auto negated_exponent = (~exponent << FPInfo<FPT>::explicit_mantissa_width) & FPInfo<FPT>::exponent_mask;
return FPT(sign_bits | negated_exponent);
}
template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP

View file

@ -0,0 +1,17 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2019 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
namespace Dynarmic::FP {
class FPCR;
class FPSR;
template <typename FPT>
FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP

View file

@ -423,7 +423,7 @@ INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "01011
//INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") //INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd")
INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd")
//INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") //INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd")
//INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd")
//INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd") //INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd")
INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd")
//INST(FCVTMU_1, "FCVTMU (vector)", "0111111001111001101110nnnnnddddd") //INST(FCVTMU_1, "FCVTMU (vector)", "0111111001111001101110nnnnnddddd")

View file

@ -170,6 +170,16 @@ bool TranslatorVisitor::FRECPE_2(bool sz, Vec Vn, Vec Vd) {
return true; return true;
} }
bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) {
const size_t esize = sz ? 64 : 32;
const IR::U32U64 operand = V_scalar(esize, Vn);
const IR::U32U64 result = ir.FPRecipExponent(operand);
V_scalar(esize, Vd, result);
return true;
}
bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) { bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) {
const size_t esize = sz ? 64 : 32; const size_t esize = sz ? 64 : 32;

View file

@ -1895,6 +1895,13 @@ U32U64 IREmitter::FPRecipEstimate(const U32U64& a) {
return Inst<U64>(Opcode::FPRecipEstimate64, a); return Inst<U64>(Opcode::FPRecipEstimate64, a);
} }
U32U64 IREmitter::FPRecipExponent(const U32U64& a) {
if (a.GetType() == Type::U32) {
return Inst<U32>(Opcode::FPRecipExponent32, a);
}
return Inst<U64>(Opcode::FPRecipExponent64, a);
}
U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) { U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) {
if (a.GetType() == Type::U32) { if (a.GetType() == Type::U32) {
return Inst<U32>(Opcode::FPRecipStepFused32, a, b); return Inst<U32>(Opcode::FPRecipStepFused32, a, b);

View file

@ -305,6 +305,7 @@ public:
U32U64 FPMulX(const U32U64& a, const U32U64& b); U32U64 FPMulX(const U32U64& a, const U32U64& b);
U32U64 FPNeg(const U32U64& a); U32U64 FPNeg(const U32U64& a);
U32U64 FPRecipEstimate(const U32U64& a); U32U64 FPRecipEstimate(const U32U64& a);
U32U64 FPRecipExponent(const U32U64& a);
U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b); U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b);
U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact);
U32U64 FPRSqrtEstimate(const U32U64& a); U32U64 FPRSqrtEstimate(const U32U64& a);

View file

@ -484,6 +484,8 @@ OPCODE(FPNeg32, U32, U32
OPCODE(FPNeg64, U64, U64 ) OPCODE(FPNeg64, U64, U64 )
OPCODE(FPRecipEstimate32, U32, U32 ) OPCODE(FPRecipEstimate32, U32, U32 )
OPCODE(FPRecipEstimate64, U64, U64 ) OPCODE(FPRecipEstimate64, U64, U64 )
OPCODE(FPRecipExponent32, U32, U32 )
OPCODE(FPRecipExponent64, U64, U64 )
OPCODE(FPRecipStepFused32, U32, U32, U32 ) OPCODE(FPRecipStepFused32, U32, U32, U32 )
OPCODE(FPRecipStepFused64, U64, U64, U64 ) OPCODE(FPRecipStepFused64, U64, U64, U64 )
OPCODE(FPRoundInt32, U32, U32, U8, U1 ) OPCODE(FPRoundInt32, U32, U32, U8, U1 )