A64: Implement SADALP and UADALP
While we're at it we can join the code for SADDLP and UADDLP with these instructions, since the only difference is we do an accumulate at the end of the operation.
This commit is contained in:
parent
29f8b30634
commit
cb5e5c5d49
2 changed files with 46 additions and 33 deletions
|
@ -572,7 +572,7 @@ INST(SADDLP, "SADDLP", "0Q001
|
||||||
//INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd")
|
//INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd")
|
||||||
//INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd")
|
//INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd")
|
||||||
INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd")
|
INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd")
|
||||||
//INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd")
|
INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd")
|
||||||
//INST(SQABS_2, "SQABS", "0Q001110zz100000011110nnnnnddddd")
|
//INST(SQABS_2, "SQABS", "0Q001110zz100000011110nnnnnddddd")
|
||||||
INST(CMGT_zero_2, "CMGT (zero)", "0Q001110zz100000100010nnnnnddddd")
|
INST(CMGT_zero_2, "CMGT (zero)", "0Q001110zz100000100010nnnnnddddd")
|
||||||
INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001110zz100000100110nnnnnddddd")
|
INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001110zz100000100110nnnnnddddd")
|
||||||
|
@ -617,7 +617,7 @@ INST(REV32_asimd, "REV32 (vector)", "0Q101
|
||||||
INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd")
|
INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd")
|
||||||
//INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd")
|
//INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd")
|
||||||
//INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd")
|
//INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd")
|
||||||
//INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd")
|
INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd")
|
||||||
//INST(SQNEG_2, "SQNEG", "0Q101110zz100000011110nnnnnddddd")
|
//INST(SQNEG_2, "SQNEG", "0Q101110zz100000011110nnnnnddddd")
|
||||||
INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd")
|
INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd")
|
||||||
INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd")
|
INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd")
|
||||||
|
|
|
@ -156,6 +156,41 @@ bool SaturatedNarrow(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum class PairedAddLongExtraBehavior {
|
||||||
|
None,
|
||||||
|
Accumulate,
|
||||||
|
};
|
||||||
|
|
||||||
|
bool PairedAddLong(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, Signedness sign,
|
||||||
|
PairedAddLongExtraBehavior behavior) {
|
||||||
|
if (size == 0b11) {
|
||||||
|
return v.ReservedValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t esize = 8 << size.ZeroExtend();
|
||||||
|
const size_t datasize = Q ? 128 : 64;
|
||||||
|
|
||||||
|
const IR::U128 operand = v.V(datasize, Vn);
|
||||||
|
IR::U128 result = [&] {
|
||||||
|
if (sign == Signedness::Signed) {
|
||||||
|
return v.ir.VectorPairedAddSignedWiden(esize, operand);
|
||||||
|
}
|
||||||
|
|
||||||
|
return v.ir.VectorPairedAddUnsignedWiden(esize, operand);
|
||||||
|
}();
|
||||||
|
|
||||||
|
if (behavior == PairedAddLongExtraBehavior::Accumulate) {
|
||||||
|
result = v.ir.VectorAdd(esize * 2, v.V(datasize, Vd), result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (datasize == 64) {
|
||||||
|
result = v.ir.VectorZeroUpper(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
v.V(datasize, Vd, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
bool TranslatorVisitor::CNT(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::CNT(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
|
@ -564,42 +599,20 @@ bool TranslatorVisitor::REV64_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::UADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::SADALP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
if (size == 0b11) {
|
return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Signed, PairedAddLongExtraBehavior::Accumulate);
|
||||||
return ReservedValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t esize = 8 << size.ZeroExtend();
|
|
||||||
const size_t datasize = Q ? 128 : 64;
|
|
||||||
|
|
||||||
const IR::U128 operand = V(datasize, Vn);
|
|
||||||
IR::U128 result = ir.VectorPairedAddUnsignedWiden(esize, operand);
|
|
||||||
|
|
||||||
if (datasize == 64) {
|
|
||||||
result = ir.VectorZeroUpper(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
V(datasize, Vd, result);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::SADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::SADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
if (size == 0b11) {
|
return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Signed, PairedAddLongExtraBehavior::None);
|
||||||
return ReservedValue();
|
}
|
||||||
}
|
|
||||||
|
|
||||||
const size_t esize = 8 << size.ZeroExtend();
|
bool TranslatorVisitor::UADALP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
const size_t datasize = Q ? 128 : 64;
|
return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Unsigned, PairedAddLongExtraBehavior::Accumulate);
|
||||||
|
}
|
||||||
|
|
||||||
const IR::U128 operand = V(datasize, Vn);
|
bool TranslatorVisitor::UADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
IR::U128 result = ir.VectorPairedAddSignedWiden(esize, operand);
|
return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Unsigned, PairedAddLongExtraBehavior::None);
|
||||||
|
|
||||||
if (datasize == 64) {
|
|
||||||
result = ir.VectorZeroUpper(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
V(datasize, Vd, result);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::SCVTF_int_4(bool Q, bool sz, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::SCVTF_int_4(bool Q, bool sz, Vec Vn, Vec Vd) {
|
||||||
|
|
Loading…
Reference in a new issue