From 5cf1478620492e738f5f1e5903b25f1081afac5d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 4 Mar 2019 12:43:24 -0500 Subject: [PATCH 1/2] frontend/ir: Add opcodes for vector square roots --- src/backend/x64/emit_x64_vector_floating_point.cpp | 12 ++++++++++++ src/frontend/ir/ir_emitter.cpp | 11 +++++++++++ src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/microinstruction.cpp | 2 ++ src/frontend/ir/opcodes.inc | 2 ++ 5 files changed, 28 insertions(+) diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index 0cc3b0b5..f0f9520f 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -1300,6 +1300,18 @@ void EmitX64::EmitFPVectorRSqrtStepFused64(EmitContext& ctx, IR::Inst* inst) { EmitRSqrtStepFused<64>(code, ctx, inst); } +void EmitX64::EmitFPVectorSqrt32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoOpVectorOperation<32, DefaultIndexer>(code, ctx, inst, [this](const Xbyak::Xmm& result, const Xbyak::Xmm& operand) { + code.sqrtps(result, operand); + }); +} + +void EmitX64::EmitFPVectorSqrt64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, [this](const Xbyak::Xmm& result, const Xbyak::Xmm& operand) { + code.sqrtpd(result, operand); + }); +} + void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) { EmitThreeOpVectorOperation<32, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::subps); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 6452e7f7..c9302970 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2243,6 +2243,17 @@ U128 IREmitter::FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& return {}; } +U128 IREmitter::FPVectorSqrt(size_t esize, const U128& a) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorSqrt32, a); + case 64: + return Inst(Opcode::FPVectorSqrt64, a); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index f58c6efa..99d0169b 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -344,6 +344,7 @@ public: U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact); U128 FPVectorRSqrtEstimate(size_t esize, const U128& a); U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b); + U128 FPVectorSqrt(size_t esize, const U128& a); U128 FPVectorSub(size_t esize, const U128& a, const U128& b); U128 FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); U128 FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 36acf6dc..c3849bab 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -333,6 +333,8 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const { case Opcode::FPVectorRSqrtEstimate64: case Opcode::FPVectorRSqrtStepFused32: case Opcode::FPVectorRSqrtStepFused64: + case Opcode::FPVectorSqrt32: + case Opcode::FPVectorSqrt64: case Opcode::FPVectorSub32: case Opcode::FPVectorSub64: return true; diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 7306e3d0..256eeba0 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -564,6 +564,8 @@ OPCODE(FPVectorRSqrtEstimate32, U128, U128 OPCODE(FPVectorRSqrtEstimate64, U128, U128 ) OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 ) OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 ) +OPCODE(FPVectorSqrt32, U128, U128 ) +OPCODE(FPVectorSqrt64, U128, U128 ) OPCODE(FPVectorSub32, U128, U128, U128 ) OPCODE(FPVectorSub64, U128, U128, U128 ) OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 ) From c99d4b762e9a4a17eef93d6225aaf44ed8faa9ad Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 4 Mar 2019 13:00:21 -0500 Subject: [PATCH 2/2] A64: Implement single and double-precision vector variant of FSQRT --- src/frontend/A64/decoder/a64.inc | 2 +- .../translate/impl/simd_two_register_misc.cpp | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 9186a024..e84957ea 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -697,7 +697,7 @@ INST(URSQRTE, "URSQRTE", "0Q101 //INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") //INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd") -//INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") +INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") //INST(FRINT32X_1, "FRINT32X (vector)", "0Q1011100z100001111110nnnnnddddd") // ARMv8.5 //INST(FRINT64X_1, "FRINT64X (vector)", "0Q1011100z100001111010nnnnnddddd") // ARMv8.5 //INST(FRINT32Z_1, "FRINT32Z (vector)", "0Q0011100z100001111010nnnnnddddd") // ARMv8.5 diff --git a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp index d1b5ab50..2df3b011 100644 --- a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp @@ -447,7 +447,6 @@ bool TranslatorVisitor::FRINTI_2(bool Q, bool sz, Vec Vn, Vec Vd) { return FloatRoundToIntegral(*this, Q, sz, Vn, Vd,ir.current_location->FPCR().RMode(), false); } - bool TranslatorVisitor::FRECPE_4(bool Q, bool sz, Vec Vn, Vec Vd) { if (sz && !Q) { return ReservedValue(); @@ -463,6 +462,21 @@ bool TranslatorVisitor::FRECPE_4(bool Q, bool sz, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FSQRT_2(bool Q, bool sz, Vec Vn, Vec Vd) { + if (sz && !Q) { + return ReservedValue(); + } + + const size_t datasize = Q ? 128 : 64; + const size_t esize = sz ? 64 : 32; + + const IR::U128 operand = V(datasize, Vn); + const IR::U128 result = ir.FPVectorSqrt(esize, operand); + + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::FRSQRTE_4(bool Q, bool sz, Vec Vn, Vec Vd) { if (sz && !Q) { return ReservedValue();