emit_x64_packed: Do not use XmmBConst here
Broadcasting is inappropriate
This commit is contained in:
parent
2d4602a651
commit
fbdcfeab99
1 changed files with 5 additions and 5 deletions
|
@ -94,8 +94,8 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// !(b <= a+b) == b > a+b
|
// !(b <= a+b) == b > a+b
|
||||||
code.movdqa(tmp_a, xmm_a);
|
code.movdqa(tmp_a, xmm_a);
|
||||||
code.movdqa(tmp_b, xmm_b);
|
code.movdqa(tmp_b, xmm_b);
|
||||||
code.paddw(tmp_a, code.XmmBConst<16>(xword, 0x8000));
|
code.paddw(tmp_a, code.XmmConst(xword, 0x80008000, 0));
|
||||||
code.paddw(tmp_b, code.XmmBConst<16>(xword, 0x8000));
|
code.paddw(tmp_b, code.XmmConst(xword, 0x80008000, 0));
|
||||||
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
||||||
|
@ -217,8 +217,8 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
// (a >= b) == !(b > a)
|
// (a >= b) == !(b > a)
|
||||||
code.pcmpeqb(ones, ones);
|
code.pcmpeqb(ones, ones);
|
||||||
code.paddw(xmm_a, code.XmmBConst<16>(xword, 0x8000));
|
code.paddw(xmm_a, code.XmmConst(xword, 0x80008000, 0));
|
||||||
code.paddw(xmm_b, code.XmmBConst<16>(xword, 0x8000));
|
code.paddw(xmm_b, code.XmmConst(xword, 0x80008000, 0));
|
||||||
code.movdqa(xmm_ge, xmm_b);
|
code.movdqa(xmm_ge, xmm_b);
|
||||||
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
||||||
code.pxor(xmm_ge, ones);
|
code.pxor(xmm_ge, ones);
|
||||||
|
@ -654,7 +654,7 @@ void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
// TODO: Optimize with zero-extension detection
|
// TODO: Optimize with zero-extension detection
|
||||||
code.movaps(tmp, code.XmmBConst<8>(xword, 0xFF));
|
code.movaps(tmp, code.XmmConst(xword, 0x0000'0000'ffff'ffff, 0));
|
||||||
code.pand(xmm_a, tmp);
|
code.pand(xmm_a, tmp);
|
||||||
code.pand(xmm_b, tmp);
|
code.pand(xmm_b, tmp);
|
||||||
code.psadbw(xmm_a, xmm_b);
|
code.psadbw(xmm_a, xmm_b);
|
||||||
|
|
Loading…
Reference in a new issue