A64: Implement MUL (by element)
This commit is contained in:
parent
a86d4093cd
commit
1651e60462
3 changed files with 45 additions and 26 deletions
|
@ -823,7 +823,7 @@ INST(USHLL, "USHLL, USHLL2", "0Q101
|
||||||
//INST(SQDMLAL_elt_2, "SQDMLAL, SQDMLAL2 (by element)", "0Q001111zzLMmmmm0011H0nnnnnddddd")
|
//INST(SQDMLAL_elt_2, "SQDMLAL, SQDMLAL2 (by element)", "0Q001111zzLMmmmm0011H0nnnnnddddd")
|
||||||
//INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd")
|
//INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd")
|
||||||
//INST(SQDMLSL_elt_2, "SQDMLSL, SQDMLSL2 (by element)", "0Q001111zzLMmmmm0111H0nnnnnddddd")
|
//INST(SQDMLSL_elt_2, "SQDMLSL, SQDMLSL2 (by element)", "0Q001111zzLMmmmm0111H0nnnnnddddd")
|
||||||
//INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd")
|
INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd")
|
||||||
//INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd")
|
//INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd")
|
||||||
//INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd")
|
//INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd")
|
||||||
//INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd")
|
//INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd")
|
||||||
|
|
|
@ -901,7 +901,7 @@ struct TranslatorVisitor final {
|
||||||
// Data Processing - FP and SIMD - SIMD vector x indexed element
|
// Data Processing - FP and SIMD - SIMD vector x indexed element
|
||||||
bool SMLAL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
bool SMLAL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||||
bool SMLSL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
bool SMLSL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||||
bool MUL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
bool MUL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd);
|
||||||
bool SMULL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
bool SMULL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||||
bool SDOT_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
bool SDOT_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||||
bool FMLAL_elt_1(bool Q, bool sz, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
bool FMLAL_elt_1(bool Q, bool sz, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||||
|
|
|
@ -4,40 +4,59 @@
|
||||||
* General Public License version 2 or any later version.
|
* General Public License version 2 or any later version.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
#include "frontend/A64/translate/impl/impl.h"
|
#include "frontend/A64/translate/impl/impl.h"
|
||||||
|
|
||||||
namespace Dynarmic::A64 {
|
namespace Dynarmic::A64 {
|
||||||
|
namespace {
|
||||||
|
std::pair<size_t, Vec> Combine(Imm<2> size, Imm<1> H, Imm<1> L, Imm<1> M, Imm<4> Vmlo) {
|
||||||
|
if (size == 0b01) {
|
||||||
|
return {concatenate(H, L, M).ZeroExtend(), Vmlo.ZeroExtend<Vec>()};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {concatenate(H, L).ZeroExtend(), concatenate(M, Vmlo).ZeroExtend<Vec>()};
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class ExtraBehavior {
|
||||||
|
None,
|
||||||
|
Accumulate,
|
||||||
|
};
|
||||||
|
|
||||||
|
void MultiplyByElement(TranslatorVisitor& v, bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd,
|
||||||
|
ExtraBehavior extra_behavior) {
|
||||||
|
const auto [index, Vm] = Combine(size, H, L, M, Vmlo);
|
||||||
|
const size_t idxdsize = H == 1 ? 128 : 64;
|
||||||
|
const size_t esize = 8 << size.ZeroExtend();
|
||||||
|
const size_t datasize = Q ? 128 : 64;
|
||||||
|
|
||||||
|
const IR::U128 operand1 = v.V(datasize, Vn);
|
||||||
|
const IR::U128 operand2 = v.ir.VectorBroadcast(esize, v.ir.VectorGetElement(esize, v.V(idxdsize, Vm), index));
|
||||||
|
const IR::U128 operand3 = v.V(datasize, Vd);
|
||||||
|
|
||||||
|
IR::U128 result = v.ir.VectorMultiply(esize, operand1, operand2);
|
||||||
|
if (extra_behavior == ExtraBehavior::Accumulate) {
|
||||||
|
result = v.ir.VectorAdd(esize, operand3, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
v.V(datasize, Vd, result);
|
||||||
|
}
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
bool TranslatorVisitor::MLA_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::MLA_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) {
|
||||||
const size_t idxdsize = H == 1 ? 128 : 64;
|
if (size != 0b01 && size != 0b10) {
|
||||||
|
|
||||||
size_t index;
|
|
||||||
Imm<1> Vmhi{0};
|
|
||||||
switch (size.ZeroExtend()) {
|
|
||||||
case 0b01:
|
|
||||||
index = concatenate(H, L, M).ZeroExtend();
|
|
||||||
break;
|
|
||||||
case 0b10:
|
|
||||||
index = concatenate(H, L).ZeroExtend();
|
|
||||||
Vmhi = M;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return UnallocatedEncoding();
|
return UnallocatedEncoding();
|
||||||
}
|
}
|
||||||
|
|
||||||
const Vec Vm = concatenate(Vmhi, Vmlo).ZeroExtend<Vec>();
|
MultiplyByElement(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::Accumulate);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
const size_t esize = 8 << size.ZeroExtend();
|
bool TranslatorVisitor::MUL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) {
|
||||||
const size_t datasize = Q ? 128 : 64;
|
if (size != 0b01 && size != 0b10) {
|
||||||
|
return UnallocatedEncoding();
|
||||||
const IR::U128 operand1 = V(datasize, Vn);
|
}
|
||||||
const IR::U128 operand2 = ir.VectorBroadcast(esize, ir.VectorGetElement(esize, V(idxdsize, Vm), index));
|
|
||||||
const IR::U128 operand3 = V(datasize, Vd);
|
|
||||||
|
|
||||||
const IR::U128 product = ir.VectorMultiply(esize, operand1, operand2);
|
|
||||||
const IR::U128 result = ir.VectorAdd(esize, operand3, product);
|
|
||||||
V(datasize, Vd, result);
|
|
||||||
|
|
||||||
|
MultiplyByElement(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::None);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue