From 822fd4a87561ce22997122b1e7fd88c961f0df8d Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 31 Jul 2018 16:07:46 +0100 Subject: [PATCH] backend_x64: Fix FPVectorMulAdd and FPMulAdd NaN handling with denormals Denormals should be treated as zero in NaN handler --- src/backend_x64/emit_x64_floating_point.cpp | 11 ++++++----- .../emit_x64_vector_floating_point.cpp | 11 ++++++----- src/common/fp/util.h | 6 +++++- tests/A64/a64.cpp | 19 +++++++++++++++++++ 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 4f2a8bc0..4a37a676 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -158,7 +158,7 @@ void PreProcessNaNs(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& } template -void PreProcessNaNs(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Xmm c, Xbyak::Label& end, NaNHandler nan_handler) { +void PreProcessNaNs(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Xmm c, Xbyak::Label& end, NaNHandler nan_handler) { using FPT = mp::unsigned_integer_of_size; Xbyak::Label nan; @@ -175,7 +175,8 @@ void PreProcessNaNs(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Xmm c, code.movq(code.ABI_PARAM1, a); code.movq(code.ABI_PARAM2, b); code.movq(code.ABI_PARAM3, c); - code.CallFunction(static_cast(nan_handler)); + code.mov(code.ABI_PARAM4, ctx.FPCR()); + code.CallFunction(static_cast(nan_handler)); code.movq(a, code.ABI_RETURN); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); code.add(rsp, 8); @@ -317,7 +318,7 @@ void FPFourOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, DenormalsAreZero(code, operand3, gpr_scratch); } if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) { - PreProcessNaNs(code, result, operand2, operand3, end, nan_handler); + PreProcessNaNs(code, ctx, result, operand2, operand3, end, nan_handler); } fn(result, operand2, operand3); if (ctx.FPSCR_FTZ()) { @@ -656,8 +657,8 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tFMA)) { FPFourOp(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand2, Xbyak::Xmm operand3) { FCODE(vfmadd231s)(result, operand2, operand3); - }, [](FPT a, FPT b, FPT c) -> FPT { - if (FP::IsQNaN(a) && ((FP::IsInf(b) && FP::IsZero(c)) || (FP::IsZero(b) && FP::IsInf(c)))) { + }, [](FPT a, FPT b, FPT c, FP::FPCR fpcr) -> FPT { + if (FP::IsQNaN(a) && ((FP::IsInf(b) && FP::IsZero(c, fpcr)) || (FP::IsZero(b, fpcr) && FP::IsInf(c)))) { return FP::FPInfo::DefaultNaN(); } return *FP::ProcessNaNs(a, b, c); diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index bbb181b6..91282a48 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -54,7 +54,7 @@ struct NaNHandler { public: using FPT = mp::unsigned_integer_of_size; - using function_type = void(*)(std::array, narg>&); + using function_type = void(*)(std::array, narg>&, FP::FPCR); static function_type GetDefault() { return GetDefaultImpl(std::make_index_sequence{}); @@ -63,7 +63,7 @@ public: private: template static function_type GetDefaultImpl(std::index_sequence) { - const auto result = [](std::array, narg>& values) { + const auto result = [](std::array, narg>& values, FP::FPCR) { VectorArray& result = values[0]; for (size_t elementi = 0; elementi < result.size(); ++elementi) { const auto current_values = Indexer{}(elementi, values[argi + 1]...); @@ -111,6 +111,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, std::array(code, ctx, inst, x64_instruction, - static_cast, 4>& values)>( - [](std::array, 4>& values) { + static_cast, 4>& values, FP::FPCR fpcr)>( + [](std::array, 4>& values, FP::FPCR fpcr) { VectorArray& result = values[0]; const VectorArray& a = values[1]; const VectorArray& b = values[2]; const VectorArray& c = values[3]; for (size_t i = 0; i < result.size(); i++) { - if (FP::IsQNaN(a[i]) && ((FP::IsInf(b[i]) && FP::IsZero(c[i])) || (FP::IsZero(b[i]) && FP::IsInf(c[i])))) { + if (FP::IsQNaN(a[i]) && ((FP::IsInf(b[i]) && FP::IsZero(c[i], fpcr)) || (FP::IsZero(b[i], fpcr) && FP::IsInf(c[i])))) { result[i] = FP::FPInfo::DefaultNaN(); } else if (auto r = FP::ProcessNaNs(a[i], b[i], c[i])) { result[i] = *r; diff --git a/src/common/fp/util.h b/src/common/fp/util.h index 1284bbe4..94ab2b50 100644 --- a/src/common/fp/util.h +++ b/src/common/fp/util.h @@ -9,13 +9,17 @@ #include #include "common/common_types.h" +#include "common/fp/fpcr.h" #include "common/fp/info.h" namespace Dynarmic::FP { /// Is floating point value a zero? template -constexpr bool IsZero(FPT value) { +inline bool IsZero(FPT value, FPCR fpcr) { + if (fpcr.FZ()) { + return (value & FPInfo::exponent_mask) == 0; + } return (value & ~FPInfo::sign_mask) == 0; } diff --git a/tests/A64/a64.cpp b/tests/A64/a64.cpp index 9a13e4ca..4fa3a8ec 100644 --- a/tests/A64/a64.cpp +++ b/tests/A64/a64.cpp @@ -390,3 +390,22 @@ TEST_CASE("A64: FMADD", "[a64]") { REQUIRE(jit.GetVector(10) == Vector{0x3f059921bf0dbfff, 0x0000000000000000}); } + +TEST_CASE("A64: FMLA.4S (denormal)", "[a64]") { + TestEnv env; + Dynarmic::A64::Jit jit{Dynarmic::A64::UserConfig{&env}}; + + env.code_mem[0] = 0x4e2fcccc; // FMLA.4S V12, V6, V15 + env.code_mem[1] = 0x14000000; // B . + + jit.SetPC(0); + jit.SetVector(12, {0x3c9623b17ff80000, 0xbff0000080000076}); + jit.SetVector(6, {0x7ff80000ff800000, 0x09503366c1200000}); + jit.SetVector(15, {0x3ff0000080636d24, 0xbf800000e73a5134}); + jit.SetFpcr(0x01000000); + + env.ticks_left = 2; + jit.Run(); + + REQUIRE(jit.GetVector(12) == Vector{0x7ff800007fc00000, 0xbff0000068e8e581}); +}