From dd7433f9d373052522bc97c15cf2c87b7752af11 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 7 Mar 2019 23:59:07 -0500 Subject: [PATCH 1/2] A64: Amend prototypes of some SIMD scalar shift by immediate opcodes These take a vector for a destination. --- src/frontend/A64/translate/impl/impl.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index dd764d5d..1f502aa1 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -622,8 +622,8 @@ struct TranslatorVisitor final { bool SRSRA_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); bool SHL_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); bool SQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); - bool SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd); - bool SQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd); + bool SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); + bool SQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); bool SCVTF_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); bool FCVTZS_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); bool USHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); @@ -634,10 +634,10 @@ struct TranslatorVisitor final { bool SLI_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); bool SQSHLU_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); bool UQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); - bool SQSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd); - bool SQRSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd); - bool UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd); - bool UQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Reg Rd); + bool SQSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); + bool SQRSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); + bool UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); + bool UQRSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); bool UCVTF_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); bool FCVTZU_fix_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); From 126c29a9e9a5c41ceb9a263be55ee8287273d554 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 8 Mar 2019 00:19:48 -0500 Subject: [PATCH 2/2] A64: Implement SQSHRN, SQSHRUN, and UQSHRN's scalar variants These can just be implemented in terms of the vector variants for the time being. --- src/frontend/A64/decoder/a64.inc | 6 +- .../impl/simd_scalar_shift_by_immediate.cpp | 63 +++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 6a3036df..6bef2c70 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -513,7 +513,7 @@ INST(SRSHR_1, "SRSHR", "01011 INST(SRSRA_1, "SRSRA", "010111110IIIIiii001101nnnnnddddd") INST(SHL_1, "SHL", "010111110IIIIiii010101nnnnnddddd") //INST(SQSHL_imm_1, "SQSHL (immediate)", "010111110IIIIiii011101nnnnnddddd") -//INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd") +INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd") //INST(SQRSHRN_1, "SQRSHRN, SQRSHRN2", "010111110IIIIiii100111nnnnnddddd") INST(SCVTF_fix_1, "SCVTF (vector, fixed-point)", "010111110IIIIiii111001nnnnnddddd") INST(FCVTZS_fix_1, "FCVTZS (vector, fixed-point)", "010111110IIIIiii111111nnnnnddddd") @@ -525,9 +525,9 @@ INST(SRI_1, "SRI", "01111 INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd") //INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd") //INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd") -//INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd") +INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd") //INST(SQRSHRUN_1, "SQRSHRUN, SQRSHRUN2", "011111110IIIIiii100011nnnnnddddd") -//INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd") +INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd") //INST(UQRSHRN_1, "UQRSHRN, UQRSHRN2", "011111110IIIIiii100111nnnnnddddd") INST(UCVTF_fix_1, "UCVTF (vector, fixed-point)", "011111110IIIIiii111001nnnnnddddd") INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "011111110IIIIiii111111nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp b/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp index a379d140..1f4ae50a 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp @@ -9,6 +9,12 @@ namespace Dynarmic::A64 { namespace { +enum class Narrowing { + Truncation, + SaturateToUnsigned, + SaturateToSigned, +}; + enum class ShiftExtraBehavior { None, Accumulate, @@ -127,6 +133,51 @@ bool ShiftAndInsert(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec return true; } +bool ShiftRightNarrowing(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd, + Narrowing narrowing, Signedness signedness) { + if (immh == 0b0000) { + return v.UnallocatedEncoding(); + } + + if (immh.Bit<3>()) { + return v.UnallocatedEncoding(); + } + + const size_t esize = 8 << Common::HighestSetBit(immh.ZeroExtend()); + const size_t source_esize = 2 * esize; + const u8 shift_amount = static_cast(source_esize - concatenate(immh, immb).ZeroExtend()); + + const IR::U128 operand = v.ir.ZeroExtendToQuad(v.ir.VectorGetElement(source_esize, v.V(128, Vn), 0)); + + IR::U128 wide_result = [&] { + if (signedness == Signedness::Signed) { + return v.ir.VectorArithmeticShiftRight(source_esize, operand, shift_amount); + } + return v.ir.VectorLogicalShiftRight(source_esize, operand, shift_amount); + }(); + + const IR::U128 result = [&] { + switch (narrowing) { + case Narrowing::Truncation: + return v.ir.VectorNarrow(source_esize, wide_result); + case Narrowing::SaturateToUnsigned: + if (signedness == Signedness::Signed) { + return v.ir.VectorSignedSaturatedNarrowToUnsigned(source_esize, wide_result); + } + return v.ir.VectorUnsignedSaturatedNarrow(source_esize, wide_result); + case Narrowing::SaturateToSigned: + ASSERT(signedness == Signedness::Signed); + return v.ir.VectorSignedSaturatedNarrowToSigned(source_esize, wide_result); + } + UNREACHABLE(); + return IR::U128{}; + }(); + + const IR::UAny segment = v.ir.VectorGetElement(esize, result, 0); + v.V_scalar(esize, Vd, segment); + return true; +} + bool ScalarFPConvertWithRound(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd, Signedness sign, FloatConversionDirection direction, FP::RoundingMode rounding_mode) { const u32 immh_value = immh.ZeroExtend(); @@ -202,6 +253,14 @@ bool TranslatorVisitor::SRI_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { return ShiftAndInsert(*this, immh, immb, Vn, Vd, ShiftDirection::Right); } +bool TranslatorVisitor::SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { + return ShiftRightNarrowing(*this, immh, immb, Vn, Vd, Narrowing::SaturateToSigned, Signedness::Signed); +} + +bool TranslatorVisitor::SQSHRUN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { + return ShiftRightNarrowing(*this, immh, immb, Vn, Vd, Narrowing::SaturateToUnsigned, Signedness::Signed); +} + bool TranslatorVisitor::SRSHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { return RoundingShiftRight(*this, immh, immb, Vn, Vd, ShiftExtraBehavior::None, Signedness::Signed); } @@ -233,6 +292,10 @@ bool TranslatorVisitor::SHL_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { + return ShiftRightNarrowing(*this, immh, immb, Vn, Vd, Narrowing::SaturateToUnsigned, Signedness::Unsigned); +} + bool TranslatorVisitor::URSHR_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { return RoundingShiftRight(*this, immh, immb, Vn, Vd, ShiftExtraBehavior::None, Signedness::Unsigned); }