emit_x64_vector: Add SSSE3 implementation of VUZP{1,2}.4H

This commit is contained in:
zmt00 2023-12-09 15:43:08 -08:00 committed by merry
parent 7ef11ee311
commit a43c176fc3

View file

@ -1129,6 +1129,13 @@ void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst)
void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code.HasHostFeature(HostFeature::SSSE3)) {
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
code.punpcklwd(lhs, rhs);
code.pshufb(lhs, code.MConst(xword, 0x0B0A'0302'0908'0100, 0x8080'8080'8080'8080));
} else {
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
code.pslld(lhs, 16); code.pslld(lhs, 16);
@ -1140,6 +1147,7 @@ void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst
code.packssdw(lhs, rhs); code.packssdw(lhs, rhs);
code.pshufd(lhs, lhs, 0b11011000); code.pshufd(lhs, lhs, 0b11011000);
code.movq(lhs, lhs); code.movq(lhs, lhs);
}
ctx.reg_alloc.DefineValue(inst, lhs); ctx.reg_alloc.DefineValue(inst, lhs);
} }
@ -1221,6 +1229,13 @@ void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst)
void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code.HasHostFeature(HostFeature::SSSE3)) {
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
code.punpcklwd(lhs, rhs);
code.pshufb(lhs, code.MConst(xword, 0x0F0E'0706'0D0C'0504, 0x8080'8080'8080'8080));
} else {
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
code.psrad(lhs, 16); code.psrad(lhs, 16);
@ -1228,6 +1243,7 @@ void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst)
code.packssdw(lhs, rhs); code.packssdw(lhs, rhs);
code.pshufd(lhs, lhs, 0b11011000); code.pshufd(lhs, lhs, 0b11011000);
code.movq(lhs, lhs); code.movq(lhs, lhs);
}
ctx.reg_alloc.DefineValue(inst, lhs); ctx.reg_alloc.DefineValue(inst, lhs);
} }