emit_x64_vector: Use non-scratch Use* variants of registers within EmitVectorUnsignedAbsoluteDifference()

In some cases, a register isn't modified, depending on the branch taken,
so we can signify this by using the non-scratch variants in certain
cases.
This commit is contained in:
Lioncash 2018-07-15 22:06:29 -04:00 committed by MerryMage
parent f7f83b76b7
commit 7797bc2fb2

View file

@ -2188,29 +2188,41 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx,
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
switch (esize) { switch (esize) {
case 8: case 8: {
const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
code.movdqa(temp, x); code.movdqa(temp, x);
code.psubusb(temp, y); code.psubusb(temp, y);
code.psubusb(y, x); code.psubusb(y, x);
code.por(temp, y); code.por(temp, y);
break; break;
case 16: }
case 16: {
const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
code.movdqa(temp, x); code.movdqa(temp, x);
code.psubusw(temp, y); code.psubusw(temp, y);
code.psubusw(y, x); code.psubusw(y, x);
code.por(temp, y); code.por(temp, y);
break; break;
}
case 32: case 32:
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
code.movdqa(temp, x); code.movdqa(temp, x);
code.pminud(x, y); code.pminud(x, y);
code.pmaxud(temp, y); code.pmaxud(temp, y);
code.psubd(temp, x); code.psubd(temp, x);
} else { } else {
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
code.movdqa(temp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.movdqa(temp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
code.pxor(x, temp); code.pxor(x, temp);
code.pxor(y, temp); code.pxor(y, temp);