emit_x64_floating_point: AVX512 implementation of EmitFPMinMaxNumeric

This commit is contained in:
Wunkolo 2021-06-13 17:51:43 -07:00 committed by merry
parent a1192a51d8
commit c6125082ea

View file

@ -444,12 +444,27 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize, bool is_max>
static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>;
constexpr FPT default_nan = FP::FPInfo<FPT>::DefaultNaN();
constexpr u8 mantissa_msb_bit = static_cast<u8>(FP::FPInfo<FPT>::explicit_mantissa_width - 1);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm op1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm op2 = ctx.reg_alloc.UseScratchXmm(args[1]); // Result stored here!
DenormalsAreZero<fsize>(code, ctx, {op1, op2});
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
// vrangep{s,d} will already correctly handle comparing
// signed zeros and propagating NaNs similar to ARM
constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min;
FCODE(vranges)(op2, op1, op2, FpRangeLUT(range_select, FpRangeSign::Preserve));
if (ctx.FPCR().DN()) {
FCODE(vcmps)(k1, op2, op2, Cmp::Unordered_Q);
FCODE(vmovs)(op2 | k1, code.MConst(xword, default_nan));
}
} else {
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
tmp.setBit(fsize);
@ -463,7 +478,6 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal;
DenormalsAreZero<fsize>(code, ctx, {op1, op2});
FCODE(ucomis)(op1, op2);
code.jz(z, code.T_NEAR);
code.L(normal);
@ -503,7 +517,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
code.jc(maybe_both_nan);
if (ctx.FPCR().DN()) {
code.L(snan);
code.movaps(op2, code.MConst(xword, FP::FPInfo<FPT>::DefaultNaN()));
code.movaps(op2, code.MConst(xword, default_nan));
code.jmp(end);
} else {
code.movaps(op2, op1);
@ -534,6 +548,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
code.jmp(end);
code.SwitchToNearCode();
}
ctx.reg_alloc.DefineValue(inst, op2);
}