A64: Implement SQXTN (vector)

This commit is contained in:
MerryMage 2018-07-24 17:59:14 +01:00
parent 8ef114d48f
commit 3874cb37e3
8 changed files with 139 additions and 38 deletions

View file

@ -2193,6 +2193,73 @@ void EmitX64::EmitVectorSignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* i
EmitVectorSignedAbsoluteDifference(32, ctx, inst, code);
}
static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm sign = ctx.reg_alloc.ScratchXmm();
code.movdqa(dest, src);
switch (original_esize) {
case 16:
code.packsswb(dest, dest);
code.movdqa(sign, src);
code.psraw(sign, 15);
code.packsswb(sign, sign);
code.movdqa(reconstructed, dest);
code.punpcklbw(reconstructed, sign);
break;
case 32:
code.packssdw(dest, dest);
code.movdqa(reconstructed, dest);
code.movdqa(sign, dest);
code.psraw(sign, 15);
code.punpcklwd(reconstructed, sign);
break;
default:
UNREACHABLE();
break;
}
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
code.pxor(reconstructed, src);
code.ptest(reconstructed, reconstructed);
} else {
code.pcmpeqd(reconstructed, src);
code.movmskps(bit, reconstructed);
code.cmp(bit, 0);
}
code.setnz(bit.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit.cvt8());
ctx.reg_alloc.DefineValue(inst, dest);
}
void EmitX64::EmitVectorSignedSaturatedNarrowToSigned16(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturatedNarrowToSigned(16, code, ctx, inst);
}
void EmitX64::EmitVectorSignedSaturatedNarrowToSigned32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturatedNarrowToSigned(32, code, ctx, inst);
}
void EmitX64::EmitVectorSignedSaturatedNarrowToSigned64(EmitContext& ctx, IR::Inst* inst) {
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<s32>& result, const VectorArray<s64>& a) {
bool qc_flag = false;
for (size_t i = 0; i < a.size(); ++i) {
const s64 saturated = std::clamp<s64>(a[i], -0x80000000, 0x7FFFFFFF);
result[i] = static_cast<s32>(saturated);
qc_flag |= saturated != a[i];
}
return qc_flag;
});
}
static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]);

View file

@ -579,7 +579,7 @@ INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001
INST(CMLT_2, "CMLT (zero)", "0Q001110zz100000101010nnnnnddddd")
INST(ABS_2, "ABS", "0Q001110zz100000101110nnnnnddddd")
INST(XTN, "XTN, XTN2", "0Q001110zz100001001010nnnnnddddd")
//INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd")
INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd")
//INST(FCVTN, "FCVTN, FCVTN2", "0Q0011100z100001011010nnnnnddddd")
//INST(FCVTL, "FCVTL, FCVTL2", "0Q0011100z100001011110nnnnnddddd")
//INST(FRINTN_1, "FRINTN (vector)", "0Q00111001111001100010nnnnnddddd")

View file

@ -515,35 +515,20 @@ struct TranslatorVisitor final {
// Data Processing - FP and SIMD - Scalar two-register misc
bool SUQADD_1(Imm<2> size, Vec Vn, Vec Vd);
bool SUQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool SQABS_1(Imm<2> size, Vec Vn, Vec Vd);
bool SQABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMGT_zero_1(Imm<2> size, Vec Vn, Vec Vd);
bool CMGT_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMEQ_zero_1(Imm<2> size, Vec Vn, Vec Vd);
bool CMEQ_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMLT_1(Imm<2> size, Vec Vn, Vec Vd);
bool CMLT_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool ABS_1(Imm<2> size, Vec Vn, Vec Vd);
bool ABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool SQXTN_1(Imm<2> size, Vec Vn, Reg Rd);
bool SQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
bool USQADD_1(Imm<2> size, Vec Vn, Vec Vd);
bool USQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool SQNEG_1(Imm<2> size, Vec Vn, Vec Vd);
bool SQNEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMGE_zero_1(Imm<2> size, Vec Vn, Vec Vd);
bool CMGE_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMLE_1(Imm<2> size, Vec Vn, Vec Vd);
bool CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool NEG_1(Imm<2> size, Vec Vn, Vec Vd);
bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool SQXTUN_1(Imm<2> size, Vec Vn, Reg Rd);
bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool UQXTN_1(Imm<2> size, Vec Vn, Reg Rd);
bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
bool FCVTXN_1(bool sz, Vec Vn, Reg Rd);
bool FCVTXN_2(bool Q, bool sz, Vec Vn, Reg Rd);
// Data Processing - FP and SIMD - SIMD Scalar pairwise
bool ADDP_pair(Imm<2> size, Vec Vn, Vec Vd);
@ -704,28 +689,6 @@ struct TranslatorVisitor final {
bool FMINNMP_vec_1(bool Q, Vec Vm, Vec Vn, Vec Vd);
bool FMINP_vec_1(bool Q, Vec Vm, Vec Vn, Vec Vd);
// Data Processing - FP and SIMD - SIMD Two-register misc
bool FRINTN_1(bool Q, Vec Vn, Vec Vd);
bool FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTM_1(bool Q, Vec Vn, Vec Vd);
bool FRINTM_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FABS_1(bool Q, Vec Vn, Vec Vd);
bool FABS_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTP_1(bool Q, Vec Vn, Vec Vd);
bool FRINTP_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTZ_1(bool Q, Vec Vn, Vec Vd);
bool FRINTZ_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTA_1(bool Q, Vec Vn, Vec Vd);
bool FRINTA_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTX_1(bool Q, Vec Vn, Vec Vd);
bool FRINTX_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FNEG_1(bool Q, Vec Vn, Vec Vd);
bool FNEG_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTI_1(bool Q, Vec Vn, Vec Vd);
bool FRINTI_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FSQRT_1(bool Q, Vec Vn, Vec Vd);
bool FSQRT_2(bool Q, bool sz, Vec Vn, Vec Vd);
// Data Processing - FP and SIMD - SIMD Three same extra
bool SDOT_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
bool UDOT_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
@ -751,6 +714,41 @@ struct TranslatorVisitor final {
bool NOT(bool Q, Vec Vn, Vec Vd);
bool RBIT_asimd(bool Q, Vec Vn, Vec Vd);
bool URSQRTE(bool Q, bool sz, Vec Vn, Vec Vd);
bool SUQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool SQABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMGT_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMEQ_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMLT_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool ABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool USQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool SQNEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMGE_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
bool FCVTXN_2(bool Q, bool sz, Vec Vn, Reg Rd);
bool FRINTN_1(bool Q, Vec Vn, Vec Vd);
bool FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTM_1(bool Q, Vec Vn, Vec Vd);
bool FRINTM_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FABS_1(bool Q, Vec Vn, Vec Vd);
bool FABS_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTP_1(bool Q, Vec Vn, Vec Vd);
bool FRINTP_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTZ_1(bool Q, Vec Vn, Vec Vd);
bool FRINTZ_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTA_1(bool Q, Vec Vn, Vec Vd);
bool FRINTA_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTX_1(bool Q, Vec Vn, Vec Vd);
bool FRINTX_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FNEG_1(bool Q, Vec Vn, Vec Vd);
bool FNEG_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FRINTI_1(bool Q, Vec Vn, Vec Vd);
bool FRINTI_2(bool Q, bool sz, Vec Vn, Vec Vd);
bool FSQRT_1(bool Q, Vec Vn, Vec Vd);
bool FSQRT_2(bool Q, bool sz, Vec Vn, Vec Vd);
// Data Processing - FP and SIMD - SIMD across lanes
bool SADDLV(bool Q, Imm<2> size, Vec Vn, Vec Vd);

View file

@ -291,6 +291,22 @@ bool TranslatorVisitor::SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
return true;
}
bool TranslatorVisitor::SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
if (size == 0b11) {
return ReservedValue();
}
const size_t esize = 8 << size.ZeroExtend<size_t>();
const size_t datasize = 64;
const size_t part = Q ? 1 : 0;
const IR::U128 operand = V(2 * datasize, Vn);
const IR::U128 result = ir.VectorSignedSaturatedNarrowToSigned(2 * esize, operand);
Vpart(datasize, Vd, part, result);
return true;
}
bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) {
const size_t datasize = Q ? 128 : 64;

View file

@ -1292,6 +1292,19 @@ U128 IREmitter::VectorSignedAbsoluteDifference(size_t esize, const U128& a, cons
return {};
}
U128 IREmitter::VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a) {
switch (original_esize) {
case 16:
return Inst<U128>(Opcode::VectorSignedSaturatedNarrowToSigned16, a);
case 32:
return Inst<U128>(Opcode::VectorSignedSaturatedNarrowToSigned32, a);
case 64:
return Inst<U128>(Opcode::VectorSignedSaturatedNarrowToSigned64, a);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a) {
switch (original_esize) {
case 16:

View file

@ -249,6 +249,7 @@ public:
U128 VectorShuffleWords(const U128& a, u8 mask);
U128 VectorSignExtend(size_t original_esize, const U128& a);
U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
U128 VectorSub(size_t esize, const U128& a, const U128& b);
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);

View file

@ -341,6 +341,9 @@ bool Inst::ReadsFromFPSRCumulativeSaturationBit() const {
bool Inst::WritesToFPSRCumulativeSaturationBit() const {
switch (op) {
case Opcode::VectorSignedSaturatedNarrowToSigned16:
case Opcode::VectorSignedSaturatedNarrowToSigned32:
case Opcode::VectorSignedSaturatedNarrowToSigned64:
case Opcode::VectorSignedSaturatedNarrowToUnsigned16:
case Opcode::VectorSignedSaturatedNarrowToUnsigned32:
case Opcode::VectorSignedSaturatedNarrowToUnsigned64:

View file

@ -347,6 +347,9 @@ OPCODE(VectorSignExtend64, T::U128, T::U128
OPCODE(VectorSignedAbsoluteDifference8, T::U128, T::U128, T::U128 )
OPCODE(VectorSignedAbsoluteDifference16, T::U128, T::U128, T::U128 )
OPCODE(VectorSignedAbsoluteDifference32, T::U128, T::U128, T::U128 )
OPCODE(VectorSignedSaturatedNarrowToSigned16, T::U128, T::U128 )
OPCODE(VectorSignedSaturatedNarrowToSigned32, T::U128, T::U128 )
OPCODE(VectorSignedSaturatedNarrowToSigned64, T::U128, T::U128 )
OPCODE(VectorSignedSaturatedNarrowToUnsigned16, T::U128, T::U128 )
OPCODE(VectorSignedSaturatedNarrowToUnsigned32, T::U128, T::U128 )
OPCODE(VectorSignedSaturatedNarrowToUnsigned64, T::U128, T::U128 )