From 43344c5400e8fba315bc7d37bcd425c1b1d983d1 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 29 Jul 2018 19:00:18 -0400 Subject: [PATCH] A64: Implement SMAXV, SMINV, UMAXV, and UMINV --- src/frontend/A64/decoder/a64.inc | 8 +- .../A64/translate/impl/simd_across_lanes.cpp | 79 +++++++++++++++++++ 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index ac2b50f6..acc62190 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -660,8 +660,8 @@ INST(FRSQRTE_4, "FRSQRTE", "0Q101 // Data Processing - FP and SIMD - SIMD across lanes INST(SADDLV, "SADDLV", "0Q001110zz110000001110nnnnnddddd") -//INST(SMAXV, "SMAXV", "0Q001110zz110000101010nnnnnddddd") -//INST(SMINV, "SMINV", "0Q001110zz110001101010nnnnnddddd") +INST(SMAXV, "SMAXV", "0Q001110zz110000101010nnnnnddddd") +INST(SMINV, "SMINV", "0Q001110zz110001101010nnnnnddddd") INST(ADDV, "ADDV", "0Q001110zz110001101110nnnnnddddd") //INST(FMAXNMV_1, "FMAXNMV", "0Q00111000110000110010nnnnnddddd") INST(FMAXNMV_2, "FMAXNMV", "0Q1011100z110000110010nnnnnddddd") @@ -672,8 +672,8 @@ INST(FMINNMV_2, "FMINNMV", "0Q101 //INST(FMINV_1, "FMINV", "0Q00111010110000111110nnnnnddddd") INST(FMINV_2, "FMINV", "0Q1011101z110000111110nnnnnddddd") INST(UADDLV, "UADDLV", "0Q101110zz110000001110nnnnnddddd") -//INST(UMAXV, "UMAXV", "0Q101110zz110000101010nnnnnddddd") -//INST(UMINV, "UMINV", "0Q101110zz110001101010nnnnnddddd") +INST(UMAXV, "UMAXV", "0Q101110zz110000101010nnnnnddddd") +INST(UMINV, "UMINV", "0Q101110zz110001101010nnnnnddddd") // Data Processing - FP and SIMD - SIMD three different INST(SADDL, "SADDL, SADDL2", "0Q001110zz1mmmmm000000nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_across_lanes.cpp b/src/frontend/A64/translate/impl/simd_across_lanes.cpp index 771c1f57..0e8b4d71 100644 --- a/src/frontend/A64/translate/impl/simd_across_lanes.cpp +++ b/src/frontend/A64/translate/impl/simd_across_lanes.cpp @@ -103,6 +103,69 @@ bool FPMinMax(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd, MinMaxOpera v.V_scalar(esize, Vd, result); return true; } + +enum class ScalarMinMaxOperation { + Max, + Min, +}; + +bool ScalarMinMax(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, + ScalarMinMaxOperation operation, Signedness sign) { + if ((size == 0b10 && !Q) || size == 0b11) { + return v.ReservedValue(); + } + + const size_t esize = 8 << size.ZeroExtend(); + const size_t datasize = Q ? 128 : 64; + const size_t elements = datasize / esize; + + const auto get_element = [&](IR::U128 vec, size_t element) { + const auto vec_element = v.ir.VectorGetElement(esize, vec, element); + + if (sign == Signedness::Signed) { + return v.ir.SignExtendToWord(vec_element); + } + + return v.ir.ZeroExtendToWord(vec_element); + }; + + const auto op_func = [&](const auto& a, const auto& b) { + switch (operation) { + case ScalarMinMaxOperation::Max: + if (sign == Signedness::Signed) { + return v.ir.MaxSigned(a, b); + } + return v.ir.MaxUnsigned(a, b); + + case ScalarMinMaxOperation::Min: + if (sign == Signedness::Signed) { + return v.ir.MinSigned(a, b); + } + return v.ir.MinUnsigned(a, b); + + default: + UNREACHABLE(); + return IR::U32U64{}; + } + }; + + const IR::U128 operand = v.V(datasize, Vn); + + IR::U32 value = get_element(operand, 0); + for (size_t i = 1; i < elements; i++) { + value = op_func(value, get_element(operand, i)); + } + + if (size == 0b00) { + v.V(datasize, Vd, v.ir.ZeroExtendToQuad(v.ir.LeastSignificantByte(value))); + } else if (size == 0b01) { + v.V(datasize, Vd, v.ir.ZeroExtendToQuad(v.ir.LeastSignificantHalf(value))); + } else { + v.V(datasize, Vd, v.ir.ZeroExtendToQuad(value)); + } + + return true; +} } // Anonymous namespace bool TranslatorVisitor::ADDV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { @@ -156,7 +219,23 @@ bool TranslatorVisitor::SADDLV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { return LongAdd(*this, Q, size, Vn, Vd, Signedness::Signed); } +bool TranslatorVisitor::SMAXV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { + return ScalarMinMax(*this, Q, size, Vn, Vd, ScalarMinMaxOperation::Max, Signedness::Signed); +} + +bool TranslatorVisitor::SMINV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { + return ScalarMinMax(*this, Q, size, Vn, Vd, ScalarMinMaxOperation::Min, Signedness::Signed); +} + bool TranslatorVisitor::UADDLV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { return LongAdd(*this, Q, size, Vn, Vd, Signedness::Unsigned); } + +bool TranslatorVisitor::UMAXV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { + return ScalarMinMax(*this, Q, size, Vn, Vd, ScalarMinMaxOperation::Max, Signedness::Unsigned); +} + +bool TranslatorVisitor::UMINV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { + return ScalarMinMax(*this, Q, size, Vn, Vd, ScalarMinMaxOperation::Min, Signedness::Unsigned); +} } // namespace Dynarmic::A64