A64: Implement SADALP and UADALP

While we're at it we can join the code for SADDLP and UADDLP with these
instructions, since the only difference is we do an accumulate at the
end of the operation.
This commit is contained in:
Lioncash 2018-08-17 18:12:36 -04:00 committed by MerryMage
parent 29f8b30634
commit cb5e5c5d49
2 changed files with 46 additions and 33 deletions

View file

@ -572,7 +572,7 @@ INST(SADDLP, "SADDLP", "0Q001
//INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd")
//INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd")
INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd")
//INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd")
INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd")
//INST(SQABS_2, "SQABS", "0Q001110zz100000011110nnnnnddddd")
INST(CMGT_zero_2, "CMGT (zero)", "0Q001110zz100000100010nnnnnddddd")
INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001110zz100000100110nnnnnddddd")
@ -617,7 +617,7 @@ INST(REV32_asimd, "REV32 (vector)", "0Q101
INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd")
//INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd")
//INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd")
//INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd")
INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd")
//INST(SQNEG_2, "SQNEG", "0Q101110zz100000011110nnnnnddddd")
INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd")
INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd")

View file

@ -156,6 +156,41 @@ bool SaturatedNarrow(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd,
return true;
}
enum class PairedAddLongExtraBehavior {
None,
Accumulate,
};
bool PairedAddLong(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, Signedness sign,
PairedAddLongExtraBehavior behavior) {
if (size == 0b11) {
return v.ReservedValue();
}
const size_t esize = 8 << size.ZeroExtend();
const size_t datasize = Q ? 128 : 64;
const IR::U128 operand = v.V(datasize, Vn);
IR::U128 result = [&] {
if (sign == Signedness::Signed) {
return v.ir.VectorPairedAddSignedWiden(esize, operand);
}
return v.ir.VectorPairedAddUnsignedWiden(esize, operand);
}();
if (behavior == PairedAddLongExtraBehavior::Accumulate) {
result = v.ir.VectorAdd(esize * 2, v.V(datasize, Vd), result);
}
if (datasize == 64) {
result = v.ir.VectorZeroUpper(result);
}
v.V(datasize, Vd, result);
return true;
}
} // Anonymous namespace
bool TranslatorVisitor::CNT(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
@ -564,42 +599,20 @@ bool TranslatorVisitor::REV64_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
return true;
}
bool TranslatorVisitor::UADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
if (size == 0b11) {
return ReservedValue();
}
const size_t esize = 8 << size.ZeroExtend();
const size_t datasize = Q ? 128 : 64;
const IR::U128 operand = V(datasize, Vn);
IR::U128 result = ir.VectorPairedAddUnsignedWiden(esize, operand);
if (datasize == 64) {
result = ir.VectorZeroUpper(result);
}
V(datasize, Vd, result);
return true;
bool TranslatorVisitor::SADALP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Signed, PairedAddLongExtraBehavior::Accumulate);
}
bool TranslatorVisitor::SADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
if (size == 0b11) {
return ReservedValue();
}
return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Signed, PairedAddLongExtraBehavior::None);
}
const size_t esize = 8 << size.ZeroExtend();
const size_t datasize = Q ? 128 : 64;
bool TranslatorVisitor::UADALP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Unsigned, PairedAddLongExtraBehavior::Accumulate);
}
const IR::U128 operand = V(datasize, Vn);
IR::U128 result = ir.VectorPairedAddSignedWiden(esize, operand);
if (datasize == 64) {
result = ir.VectorZeroUpper(result);
}
V(datasize, Vd, result);
return true;
bool TranslatorVisitor::UADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Unsigned, PairedAddLongExtraBehavior::None);
}
bool TranslatorVisitor::SCVTF_int_4(bool Q, bool sz, Vec Vn, Vec Vd) {