From dd7433f9d373052522bc97c15cf2c87b7752af11 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Thu, 7 Mar 2019 23:59:07 -0500
Subject: [PATCH 1/2] A64: Amend prototypes of some SIMD scalar shift by
 immediate opcodes

These take a vector for a destination.
---
 src/frontend/A64/translate/impl/impl.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h
index dd764d5d..1f502aa1 100644
--- a/src/frontend/A64/translate/impl/impl.h
+++ b/src/frontend/A64/translate/impl/impl.h
@@ -622,8 +622,8 @@ struct TranslatorVisitor final {
     bool SRSRA_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool SHL_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool SQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
-    bool SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
-    bool SQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
+    bool SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
+    bool SQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool SCVTF_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool FCVTZS_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool USHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
@@ -634,10 +634,10 @@ struct TranslatorVisitor final {
     bool SLI_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool SQSHLU_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool UQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
-    bool SQSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
-    bool SQRSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
-    bool UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
-    bool UQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd);
+    bool SQSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
+    bool SQRSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
+    bool UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
+    bool UQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool UCVTF_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool FCVTZU_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
 

From 126c29a9e9a5c41ceb9a263be55ee8287273d554 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Fri, 8 Mar 2019 00:19:48 -0500
Subject: [PATCH 2/2] A64: Implement SQSHRN, SQSHRUN, and UQSHRN's scalar
 variants

These can just be implemented in terms of the vector variants for the
time being.
---
 src/frontend/A64/decoder/a64.inc              |  6 +-
 .../impl/simd_scalar_shift_by_immediate.cpp   | 63 +++++++++++++++++++
 2 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index 6a3036df..6bef2c70 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -513,7 +513,7 @@ INST(SRSHR_1,                "SRSHR",                                     "01011
 INST(SRSRA_1,                "SRSRA",                                     "010111110IIIIiii001101nnnnnddddd")
 INST(SHL_1,                  "SHL",                                       "010111110IIIIiii010101nnnnnddddd")
 //INST(SQSHL_imm_1,            "SQSHL (immediate)",                         "010111110IIIIiii011101nnnnnddddd")
-//INST(SQSHRN_1,               "SQSHRN, SQSHRN2",                           "010111110IIIIiii100101nnnnnddddd")
+INST(SQSHRN_1,               "SQSHRN, SQSHRN2",                           "010111110IIIIiii100101nnnnnddddd")
 //INST(SQRSHRN_1,              "SQRSHRN, SQRSHRN2",                         "010111110IIIIiii100111nnnnnddddd")
 INST(SCVTF_fix_1,            "SCVTF (vector, fixed-point)",               "010111110IIIIiii111001nnnnnddddd")
 INST(FCVTZS_fix_1,           "FCVTZS (vector, fixed-point)",              "010111110IIIIiii111111nnnnnddddd")
@@ -525,9 +525,9 @@ INST(SRI_1,                  "SRI",                                       "01111
 INST(SLI_1,                  "SLI",                                       "011111110IIIIiii010101nnnnnddddd")
 //INST(SQSHLU_1,               "SQSHLU",                                    "011111110IIIIiii011001nnnnnddddd")
 //INST(UQSHL_imm_1,            "UQSHL (immediate)",                         "011111110IIIIiii011101nnnnnddddd")
-//INST(SQSHRUN_1,              "SQSHRUN, SQSHRUN2",                         "011111110IIIIiii100001nnnnnddddd")
+INST(SQSHRUN_1,              "SQSHRUN, SQSHRUN2",                         "011111110IIIIiii100001nnnnnddddd")
 //INST(SQRSHRUN_1,             "SQRSHRUN, SQRSHRUN2",                       "011111110IIIIiii100011nnnnnddddd")
-//INST(UQSHRN_1,               "UQSHRN, UQSHRN2",                           "011111110IIIIiii100101nnnnnddddd")
+INST(UQSHRN_1,               "UQSHRN, UQSHRN2",                           "011111110IIIIiii100101nnnnnddddd")
 //INST(UQRSHRN_1,              "UQRSHRN, UQRSHRN2",                         "011111110IIIIiii100111nnnnnddddd")
 INST(UCVTF_fix_1,            "UCVTF (vector, fixed-point)",               "011111110IIIIiii111001nnnnnddddd")
 INST(FCVTZU_fix_1,           "FCVTZU (vector, fixed-point)",              "011111110IIIIiii111111nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp b/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp
index a379d140..1f4ae50a 100644
--- a/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp
+++ b/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp
@@ -9,6 +9,12 @@
 
 namespace Dynarmic::A64 {
 namespace {
+enum class Narrowing {
+    Truncation,
+    SaturateToUnsigned,
+    SaturateToSigned,
+};
+
 enum class ShiftExtraBehavior {
     None,
     Accumulate,
@@ -127,6 +133,51 @@ bool ShiftAndInsert(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec
     return true;
 }
 
+bool ShiftRightNarrowing(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd,
+                         Narrowing narrowing, Signedness signedness) {
+    if (immh == 0b0000) {
+        return v.UnallocatedEncoding();
+    }
+
+    if (immh.Bit<3>()) {
+        return v.UnallocatedEncoding();
+    }
+
+    const size_t esize = 8 << Common::HighestSetBit(immh.ZeroExtend());
+    const size_t source_esize = 2 * esize;
+    const u8 shift_amount = static_cast<u8>(source_esize - concatenate(immh, immb).ZeroExtend());
+
+    const IR::U128 operand = v.ir.ZeroExtendToQuad(v.ir.VectorGetElement(source_esize, v.V(128, Vn), 0));
+
+    IR::U128 wide_result = [&] {
+        if (signedness == Signedness::Signed) {
+            return v.ir.VectorArithmeticShiftRight(source_esize, operand, shift_amount);
+        }
+        return v.ir.VectorLogicalShiftRight(source_esize, operand, shift_amount);
+    }();
+
+    const IR::U128 result = [&] {
+        switch (narrowing) {
+        case Narrowing::Truncation:
+            return v.ir.VectorNarrow(source_esize, wide_result);
+        case Narrowing::SaturateToUnsigned:
+            if (signedness == Signedness::Signed) {
+                return v.ir.VectorSignedSaturatedNarrowToUnsigned(source_esize, wide_result);
+            }
+            return v.ir.VectorUnsignedSaturatedNarrow(source_esize, wide_result);
+        case Narrowing::SaturateToSigned:
+            ASSERT(signedness == Signedness::Signed);
+            return v.ir.VectorSignedSaturatedNarrowToSigned(source_esize, wide_result);
+        }
+        UNREACHABLE();
+        return IR::U128{};
+    }();
+
+    const IR::UAny segment = v.ir.VectorGetElement(esize, result, 0);
+    v.V_scalar(esize, Vd, segment);
+    return true;
+}
+
 bool ScalarFPConvertWithRound(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd, Signedness sign, FloatConversionDirection direction, FP::RoundingMode rounding_mode) {
     const u32 immh_value = immh.ZeroExtend();
 
@@ -202,6 +253,14 @@ bool TranslatorVisitor::SRI_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
     return ShiftAndInsert(*this, immh, immb, Vn, Vd, ShiftDirection::Right);
 }
 
+bool TranslatorVisitor::SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
+    return ShiftRightNarrowing(*this, immh, immb, Vn, Vd, Narrowing::SaturateToSigned, Signedness::Signed);
+}
+
+bool TranslatorVisitor::SQSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
+    return ShiftRightNarrowing(*this, immh, immb, Vn, Vd, Narrowing::SaturateToUnsigned, Signedness::Signed);
+}
+
 bool TranslatorVisitor::SRSHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
     return RoundingShiftRight(*this, immh, immb, Vn, Vd, ShiftExtraBehavior::None, Signedness::Signed);
 }
@@ -233,6 +292,10 @@ bool TranslatorVisitor::SHL_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
     return true;
 }
 
+bool TranslatorVisitor::UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
+    return ShiftRightNarrowing(*this, immh, immb, Vn, Vd, Narrowing::SaturateToUnsigned, Signedness::Unsigned);
+}
+
 bool TranslatorVisitor::URSHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) {
     return RoundingShiftRight(*this, immh, immb, Vn, Vd, ShiftExtraBehavior::None, Signedness::Unsigned);
 }