Merge pull request #455 from lioncash/sqrdmulh-scalar
A64: Implement SQRDMULH and SQDMULL's scalar indexed variants
This commit is contained in:
commit
c1ce94872d
3 changed files with 49 additions and 13 deletions
|
@ -535,9 +535,9 @@ INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "01111
|
|||
// Data Processing - FP and SIMD - SIMD Scalar x indexed element
|
||||
//INST(SQDMLAL_elt_1, "SQDMLAL, SQDMLAL2 (by element)", "01011111zzLMmmmm0011H0nnnnnddddd")
|
||||
//INST(SQDMLSL_elt_1, "SQDMLSL, SQDMLSL2 (by element)", "01011111zzLMmmmm0111H0nnnnnddddd")
|
||||
//INST(SQDMULL_elt_1, "SQDMULL, SQDMULL2 (by element)", "01011111zzLMmmmm1011H0nnnnnddddd")
|
||||
INST(SQDMULL_elt_1, "SQDMULL, SQDMULL2 (by element)", "01011111zzLMmmmm1011H0nnnnnddddd")
|
||||
INST(SQDMULH_elt_1, "SQDMULH (by element)", "01011111zzLMmmmm1100H0nnnnnddddd")
|
||||
//INST(SQRDMULH_elt_1, "SQRDMULH (by element)", "01011111zzLMmmmm1101H0nnnnnddddd")
|
||||
INST(SQRDMULH_elt_1, "SQRDMULH (by element)", "01011111zzLMmmmm1101H0nnnnnddddd")
|
||||
//INST(FMLA_elt_1, "FMLA (by element)", "0101111100LMmmmm0001H0nnnnnddddd")
|
||||
INST(FMLA_elt_2, "FMLA (by element)", "010111111zLMmmmm0001H0nnnnnddddd")
|
||||
//INST(FMLS_elt_1, "FMLS (by element)", "0101111100LMmmmm0101H0nnnnnddddd")
|
||||
|
|
|
@ -642,9 +642,9 @@ struct TranslatorVisitor final {
|
|||
bool FCVTZU_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
|
||||
|
||||
// Data Processing - FP and SIMD - SIMD Scalar x indexed element
|
||||
bool SQDMLAL_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Reg Rn, Vec Vd);
|
||||
bool SQDMLSL_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Reg Rn, Vec Vd);
|
||||
bool SQDMULL_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Reg Rn, Vec Vd);
|
||||
bool SQDMLAL_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd);
|
||||
bool SQDMLSL_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd);
|
||||
bool SQDMULL_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd);
|
||||
bool SQDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd);
|
||||
bool SQRDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd);
|
||||
bool FMLA_elt_1(Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd);
|
||||
|
|
|
@ -9,6 +9,14 @@
|
|||
|
||||
namespace Dynarmic::A64 {
|
||||
namespace {
|
||||
std::pair<size_t, Vec> Combine(Imm<2> size, Imm<1> H, Imm<1> L, Imm<1> M, Imm<4> Vmlo) {
|
||||
if (size == 0b01) {
|
||||
return {concatenate(H, L, M).ZeroExtend(), Vmlo.ZeroExtend<Vec>()};
|
||||
}
|
||||
|
||||
return {concatenate(H, L).ZeroExtend(), concatenate(M, Vmlo).ZeroExtend<Vec>()};
|
||||
}
|
||||
|
||||
enum class ExtraBehavior {
|
||||
None,
|
||||
Accumulate,
|
||||
|
@ -74,14 +82,7 @@ bool TranslatorVisitor::SQDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vm
|
|||
}
|
||||
|
||||
const size_t esize = 8 << size.ZeroExtend();
|
||||
const auto [index, Vmhi] = [=] {
|
||||
if (size == 0b01) {
|
||||
return std::make_pair(concatenate(H, L, M).ZeroExtend(), Imm<1>{0});
|
||||
}
|
||||
|
||||
return std::make_pair(concatenate(H, L).ZeroExtend(), M);
|
||||
}();
|
||||
const Vec Vm = concatenate(Vmhi, Vmlo).ZeroExtend<Vec>();
|
||||
const auto [index, Vm] = Combine(size, H, L, M, Vmlo);
|
||||
|
||||
const IR::UAny operand1 = V_scalar(esize, Vn);
|
||||
const IR::UAny operand2 = ir.VectorGetElement(esize, V(128, Vm), index);
|
||||
|
@ -93,4 +94,39 @@ bool TranslatorVisitor::SQDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vm
|
|||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::SQRDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) {
|
||||
if (size == 0b00 || size == 0b11) {
|
||||
return UnallocatedEncoding();
|
||||
}
|
||||
|
||||
const size_t esize = 8 << size.ZeroExtend();
|
||||
const auto [index, Vm] = Combine(size, H, L, M, Vmlo);
|
||||
|
||||
const IR::U128 operand1 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vn), 0));
|
||||
const IR::UAny operand2 = ir.VectorGetElement(esize, V(128, Vm), index);
|
||||
const IR::U128 broadcast = ir.VectorBroadcast(esize, operand2);
|
||||
const IR::UpperAndLower multiply = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, broadcast);
|
||||
const IR::U128 result = ir.VectorAdd(esize, multiply.upper, ir.VectorLogicalShiftRight(esize, multiply.lower, static_cast<u8>(esize - 1)));
|
||||
|
||||
V(128, Vd, result);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::SQDMULL_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) {
|
||||
if (size == 0b00 || size == 0b11) {
|
||||
return UnallocatedEncoding();
|
||||
}
|
||||
|
||||
const size_t esize = 8 << size.ZeroExtend();
|
||||
const auto [index, Vm] = Combine(size, H, L, M, Vmlo);
|
||||
|
||||
const IR::U128 operand1 = ir.ZeroExtendToQuad(ir.VectorGetElement(esize, V(128, Vn), 0));
|
||||
const IR::UAny operand2 = ir.VectorGetElement(esize, V(128, Vm), index);
|
||||
const IR::U128 broadcast = ir.VectorBroadcast(esize, operand2);
|
||||
const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyLong(esize, operand1, broadcast);
|
||||
|
||||
V(128, Vd, result);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A64
|
||||
|
|
Loading…
Reference in a new issue