diff --git a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 5b8969b0..2ff633a1 100644 --- a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -766,12 +766,16 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - if constexpr (fsize == 32) { - code.rcpss(result, operand); + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + FCODE(vrcp14s)(result, operand, operand); } else { - code.cvtsd2ss(result, operand); - code.rcpss(result, result); - code.cvtss2sd(result, result); + if constexpr (fsize == 32) { + code.rcpss(result, operand); + } else { + code.cvtsd2ss(result, operand); + code.rcpss(result, result); + code.cvtss2sd(result, result); + } } ctx.reg_alloc.DefineValue(inst, result); @@ -984,20 +988,22 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - if constexpr (fsize == 32) { - code.rsqrtss(result, operand); + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + FCODE(vrsqrt14s)(result, operand, operand); } else { - code.cvtsd2ss(result, operand); - code.rsqrtss(result, result); - code.cvtss2sd(result, result); + if constexpr (fsize == 32) { + code.rsqrtss(result, operand); + } else { + code.cvtsd2ss(result, operand); + code.rsqrtss(result, result); + code.cvtss2sd(result, result); + } } ctx.reg_alloc.DefineValue(inst, result); return; } - // TODO: VRSQRT14SS implementation (AVX512F) - auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); diff --git a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index aeba7e2b..e6a881f7 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -1288,12 +1288,16 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - if constexpr (fsize == 32) { - code.rcpps(result, operand); + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + FCODE(vrcp14p)(result, operand); } else { - code.cvtpd2ps(result, operand); - code.rcpps(result, result); - code.cvtps2pd(result, result); + if constexpr (fsize == 32) { + code.rcpps(result, operand); + } else { + code.cvtpd2ps(result, operand); + code.rcpps(result, result); + code.cvtps2pd(result, result); + } } ctx.reg_alloc.DefineValue(inst, result); @@ -1502,12 +1506,16 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - if constexpr (fsize == 32) { - code.rsqrtps(result, operand); + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + FCODE(vrsqrt14p)(result, operand); } else { - code.cvtpd2ps(result, operand); - code.rsqrtps(result, result); - code.cvtps2pd(result, result); + if constexpr (fsize == 32) { + code.rsqrtps(result, operand); + } else { + code.cvtpd2ps(result, operand); + code.rsqrtps(result, result); + code.cvtps2pd(result, result); + } } ctx.reg_alloc.DefineValue(inst, result);