From 29dde29560e04bce126795de8eeaef52221b9d22 Mon Sep 17 00:00:00 2001 From: Merry Date: Mon, 28 Aug 2023 14:13:59 +0100 Subject: [PATCH] emit_x64_vector_floating_point: RSqrtEstimate: Add AVX implementation --- .../x64/emit_x64_vector_floating_point.cpp | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index daed1806..0120ce60 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -1795,6 +1795,50 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins ctx.reg_alloc.DefineValue(inst, result); return; } + + if (code.HasHostFeature(HostFeature::AVX)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const bool fpcr_controlled = args[1].GetImmediateU1(); + + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(); + + SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel(); + + code.movaps(value, operand); + + code.movaps(xmm0, GetVectorOf(code)); + code.pand(value, xmm0); + code.por(value, GetVectorOf(code)); + + // Detect NaNs, negatives, zeros, denormals and infinities + FCODE(vcmpnge_uqp)(result, value, GetVectorOf::explicit_mantissa_width)>(code)); + code.vptest(result, result); + code.jnz(*bad_values, code.T_NEAR); + + FCODE(sqrtp)(value, value); + code.vmovaps(result, GetVectorOf()>(code)); + FCODE(divp)(result, value); + + ICODE(padd)(result, GetVectorOf(code)); + code.pand(result, xmm0); + + code.L(*end); + + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*bad_values); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + EmitTwoOpFallbackWithoutRegAlloc(code, ctx, result, operand, fallback_fn, fpcr_controlled); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + }); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } } EmitTwoOpFallback(code, ctx, inst, fallback_fn);