A64: Implement TRN2
This commit is contained in:
parent
30e158f8d0
commit
3b6db59850
2 changed files with 54 additions and 17 deletions
|
@ -518,7 +518,7 @@ INST(USRA_1, "USRA", "01111
|
|||
INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd")
|
||||
INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd")
|
||||
//INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd")
|
||||
//INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd")
|
||||
INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd")
|
||||
INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd")
|
||||
|
||||
// Data Processing - FP and SIMD - SIMD Extract
|
||||
|
|
|
@ -4,43 +4,80 @@
|
|||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <tuple>
|
||||
#include "frontend/A64/translate/impl/impl.h"
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
|
||||
bool TranslatorVisitor::TRN1(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||
if (!Q && size == 0b11) {
|
||||
return ReservedValue();
|
||||
}
|
||||
enum class Transposition {
|
||||
TRN1,
|
||||
TRN2,
|
||||
};
|
||||
|
||||
static void VectorTranspose(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd,
|
||||
Transposition type) {
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
const size_t esize = 8 << size.ZeroExtend();
|
||||
const u8 esize = static_cast<u8>(8 << size.ZeroExtend());
|
||||
|
||||
const IR::U128 m = V(datasize, Vm);
|
||||
const IR::U128 n = V(datasize, Vn);
|
||||
const IR::U128 m = v.V(datasize, Vm);
|
||||
const IR::U128 n = v.V(datasize, Vn);
|
||||
|
||||
const IR::U128 result = [&] {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
case 16:
|
||||
case 32: {
|
||||
// Create a mask of elements we care about (e.g. for 8-bit 0x00FF00FF00FF00FF)
|
||||
const u64 mask_element = Common::Ones<u64>(esize);
|
||||
const u64 mask_value = Common::Replicate<u64>(mask_element, esize * 2);
|
||||
// Create a mask of elements we care about (e.g. for 8-bit: 0x00FF00FF00FF00FF for TRN1
|
||||
// and 0xFF00FF00FF00FF00 for TRN2)
|
||||
const u64 mask_element = [&] {
|
||||
const size_t shift = type == Transposition::TRN1 ? 0 : esize;
|
||||
return Common::Ones<u64>(esize) << shift;
|
||||
}();
|
||||
const size_t doubled_esize = esize * 2;
|
||||
const u64 mask_value = Common::Replicate<u64>(mask_element, doubled_esize);
|
||||
|
||||
const IR::U128 mask = ir.VectorBroadcast(64, I(64, mask_value));
|
||||
const IR::U128 anded_m = ir.VectorAnd(m, mask);
|
||||
const IR::U128 anded_n = ir.VectorAnd(n, mask);
|
||||
return ir.VectorOr(ir.VectorLogicalShiftLeft(esize * 2, anded_m, esize), anded_n);
|
||||
const IR::U128 mask = v.ir.VectorBroadcast(64, v.I(64, mask_value));
|
||||
const IR::U128 anded_m = v.ir.VectorAnd(m, mask);
|
||||
const IR::U128 anded_n = v.ir.VectorAnd(n, mask);
|
||||
|
||||
if (type == Transposition::TRN1) {
|
||||
return v.ir.VectorOr(v.ir.VectorLogicalShiftLeft(doubled_esize, anded_m, esize), anded_n);
|
||||
}
|
||||
|
||||
return v.ir.VectorOr(v.ir.VectorLogicalShiftRight(doubled_esize, anded_n, esize), anded_m);
|
||||
}
|
||||
case 64: {
|
||||
default:
|
||||
return ir.VectorSetElement(esize, n, 1, ir.VectorGetElement(esize, m, 0));
|
||||
const auto [src, src_idx, dst, dst_idx] = [type, m, n] {
|
||||
if (type == Transposition::TRN1) {
|
||||
return std::make_tuple(m, 0, n, 1);
|
||||
}
|
||||
return std::make_tuple(n, 1, m, 0);
|
||||
}();
|
||||
|
||||
return v.ir.VectorSetElement(esize, dst, dst_idx, v.ir.VectorGetElement(esize, src, src_idx));
|
||||
}
|
||||
}
|
||||
}();
|
||||
|
||||
V(datasize, Vd, result);
|
||||
v.V(datasize, Vd, result);
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::TRN1(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||
if (!Q && size == 0b11) {
|
||||
return ReservedValue();
|
||||
}
|
||||
|
||||
VectorTranspose(*this, Q, size, Vm, Vn, Vd, Transposition::TRN1);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::TRN2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||
if (!Q && size == 0b11) {
|
||||
return ReservedValue();
|
||||
}
|
||||
|
||||
VectorTranspose(*this, Q, size, Vm, Vn, Vd, Transposition::TRN2);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue