emit_x64_vector: Add SSE4.1 implementation of VUZP1.8H

This commit is contained in:
zmt00 2023-12-09 15:46:10 -08:00 committed by merry
parent a43c176fc3
commit 73a75b5034

View file

@ -1079,6 +1079,14 @@ void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
if (code.HasHostFeature(HostFeature::SSE41)) {
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
code.pxor(zero, zero);
code.pblendw(lhs, zero, 0b10101010);
code.pblendw(rhs, zero, 0b10101010);
code.packusdw(lhs, rhs);
} else {
code.pslld(lhs, 16); code.pslld(lhs, 16);
code.psrad(lhs, 16); code.psrad(lhs, 16);
@ -1086,6 +1094,7 @@ void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) {
code.psrad(rhs, 16); code.psrad(rhs, 16);
code.packssdw(lhs, rhs); code.packssdw(lhs, rhs);
}
ctx.reg_alloc.DefineValue(inst, lhs); ctx.reg_alloc.DefineValue(inst, lhs);
} }