A64: Implement UADDLP
This commit is contained in:
parent
5563bbbd79
commit
70ff2d73b5
6 changed files with 104 additions and 8 deletions
|
@ -1735,6 +1735,66 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movdqa(c, a);
|
||||
code.psllq(a, 32);
|
||||
code.psraq(c, 32);
|
||||
code.psraq(a, 32);
|
||||
code.paddq(a, c);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movdqa(c, a);
|
||||
code.psllw(a, 8);
|
||||
code.psrlw(c, 8);
|
||||
code.psrlw(a, 8);
|
||||
code.paddw(a, c);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movdqa(c, a);
|
||||
code.pslld(a, 16);
|
||||
code.psrld(c, 16);
|
||||
code.psrld(a, 16);
|
||||
code.paddd(a, c);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movdqa(c, a);
|
||||
code.psllq(a, 32);
|
||||
code.psrlq(c, 32);
|
||||
code.psrlq(a, 32);
|
||||
code.paddq(a, c);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512_BITALG)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
|
|
@ -614,7 +614,7 @@ INST(FABS_2, "FABS (vector)", "0Q001
|
|||
//INST(FRECPE_3, "FRECPE", "0Q00111011111001110110nnnnnddddd")
|
||||
//INST(FRECPE_4, "FRECPE", "0Q0011101z100001110110nnnnnddddd")
|
||||
INST(REV32_asimd, "REV32 (vector)", "0Q101110zz100000000010nnnnnddddd")
|
||||
//INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd")
|
||||
INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd")
|
||||
//INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd")
|
||||
//INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd")
|
||||
//INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd")
|
||||
|
|
|
@ -365,6 +365,25 @@ bool TranslatorVisitor::REV64_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::UADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||
if (size == 0b11) {
|
||||
return ReservedValue();
|
||||
}
|
||||
|
||||
const size_t esize = 8 << size.ZeroExtend();
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
|
||||
const IR::U128 operand = V(datasize, Vn);
|
||||
IR::U128 result = ir.VectorPairedAddUnsignedWiden(esize, operand);
|
||||
|
||||
if (datasize == 64) {
|
||||
result = ir.VectorZeroUpper(result);
|
||||
}
|
||||
|
||||
V(datasize, Vd, result);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::SCVTF_int_4(bool Q, bool sz, Vec Vn, Vec Vd) {
|
||||
return IntegerConvertToFloat(*this, Q, sz, Vn, Vd, Signedness::Signed);
|
||||
}
|
||||
|
|
|
@ -1140,6 +1140,21 @@ U128 IREmitter::VectorOr(const U128& a, const U128& b) {
|
|||
return Inst<U128>(Opcode::VectorOr, a, b);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPairedAdd(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorPairedAdd8, a, b);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorPairedAdd16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorPairedAdd32, a, b);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::VectorPairedAdd64, a, b);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPairedAddLower(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
|
@ -1153,16 +1168,14 @@ U128 IREmitter::VectorPairedAddLower(size_t esize, const U128& a, const U128& b)
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPairedAdd(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
U128 IREmitter::VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a) {
|
||||
switch (original_esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorPairedAdd8, a, b);
|
||||
return Inst<U128>(Opcode::VectorPairedAddUnsignedWiden8, a);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorPairedAdd16, a, b);
|
||||
return Inst<U128>(Opcode::VectorPairedAddUnsignedWiden16, a);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorPairedAdd32, a, b);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::VectorPairedAdd64, a, b);
|
||||
return Inst<U128>(Opcode::VectorPairedAddUnsignedWiden32, a);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
|
|
|
@ -236,6 +236,7 @@ public:
|
|||
U128 VectorOr(const U128& a, const U128& b);
|
||||
U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a);
|
||||
U128 VectorPopulationCount(const U128& a);
|
||||
U128 VectorReverseBits(const U128& a);
|
||||
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);
|
||||
|
|
|
@ -319,6 +319,9 @@ OPCODE(VectorOr, T::U128, T::U128, T::U
|
|||
OPCODE(VectorPairedAddLower8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAddLower16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAddLower32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAddUnsignedWiden8, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAddUnsignedWiden16, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAddUnsignedWiden32, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAdd8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
|
||||
|
|
Loading…
Reference in a new issue