emit_x64_vector: Optimize VectorSignedAbsoluteDifference

This commit is contained in:
zmt00 2024-01-20 17:34:54 -08:00 committed by merry
parent 7e66e082fd
commit ba9009abd8

View file

@ -3744,36 +3744,52 @@ static void EmitVectorSignedAbsoluteDifference(size_t esize, EmitContext& ctx, I
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
code.movdqa(mask, x); // only signed 16-bit min/max are available below SSE4.1
code.movdqa(tmp1, y); if (code.HasHostFeature(HostFeature::SSE41) || esize == 16) {
code.movdqa(tmp, x);
switch (esize) { switch (esize) {
case 8: case 8:
code.pcmpgtb(mask, y); code.pminsb(tmp, y);
code.psubb(tmp1, x); code.pmaxsb(x, y);
code.psubb(x, y); code.psubb(x, tmp);
break; break;
case 16: case 16:
code.pcmpgtw(mask, y); code.pminsw(tmp, y);
code.psubw(tmp1, x); code.pmaxsw(x, y);
code.psubw(x, y); code.psubw(x, tmp);
break; break;
case 32: case 32:
code.pcmpgtd(mask, y); code.pminsd(tmp, y);
code.psubd(tmp1, x); code.pmaxsd(x, y);
code.psubd(x, y); code.psubd(x, tmp);
break; break;
default:
UNREACHABLE();
}
} else {
code.movdqa(tmp, y);
switch (esize) {
case 8:
code.pcmpgtb(tmp, x);
code.psubb(x, y);
code.pxor(x, tmp);
code.psubb(x, tmp);
break;
case 32:
code.pcmpgtd(tmp, x);
code.psubd(x, y);
code.pxor(x, tmp);
code.psubd(x, tmp);
break;
default:
UNREACHABLE();
}
} }
code.movdqa(tmp2, mask);
code.pand(x, mask);
code.pandn(tmp2, tmp1);
code.por(x, tmp2);
ctx.reg_alloc.DefineValue(inst, x); ctx.reg_alloc.DefineValue(inst, x);
} }