From 3df0eb30beac54539304582edc4dcf2a1130d3f7 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 6 Aug 2022 17:46:05 +0100 Subject: [PATCH] emit_arm64_vector: Implement Saturated Accumulate --- .../backend/arm64/emit_arm64_vector.cpp | 61 +++++++++---------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp index ed1750a4..bfb17053 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp @@ -198,6 +198,27 @@ static void EmitThreeOpArrangedLower(oaknut::CodeGenerator& code, EmitContext& c }); } +template +static void EmitSaturatedAccumulate(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Qaccumulator = ctx.reg_alloc.ReadWriteQ(args[0], inst); + auto Qoperand = ctx.reg_alloc.ReadQ(args[1]); + RegAlloc::Realize(Qaccumulator, Qoperand); + ctx.fpsr.Load(); + + if constexpr (size == 8) { + emit(Qaccumulator->B16(), Qoperand->B16()); + } else if constexpr (size == 16) { + emit(Qaccumulator->H8(), Qoperand->H8()); + } else if constexpr (size == 32) { + emit(Qaccumulator->S4(), Qoperand->S4()); + } else if constexpr (size == 64) { + emit(Qaccumulator->D2(), Qoperand->D2()); + } else { + static_assert(size == 8 || size == 16 || size == 32 || size == 64); + } +} + template static void EmitImmShift(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1358,34 +1379,22 @@ void EmitIR(oaknut::CodeGenerator& code, template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitSaturatedAccumulate<8>(code, ctx, inst, [&](auto Vaccumulator, auto Voperand) { code.SUQADD(Vaccumulator, Voperand); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitSaturatedAccumulate<16>(code, ctx, inst, [&](auto Vaccumulator, auto Voperand) { code.SUQADD(Vaccumulator, Voperand); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitSaturatedAccumulate<32>(code, ctx, inst, [&](auto Vaccumulator, auto Voperand) { code.SUQADD(Vaccumulator, Voperand); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitSaturatedAccumulate<64>(code, ctx, inst, [&](auto Vaccumulator, auto Voperand) { code.SUQADD(Vaccumulator, Voperand); }); } template<> @@ -1732,34 +1741,22 @@ void EmitIR(oaknut::CodeGenerator& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitSaturatedAccumulate<8>(code, ctx, inst, [&](auto Vaccumulator, auto Voperand) { code.USQADD(Vaccumulator, Voperand); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitSaturatedAccumulate<16>(code, ctx, inst, [&](auto Vaccumulator, auto Voperand) { code.USQADD(Vaccumulator, Voperand); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitSaturatedAccumulate<32>(code, ctx, inst, [&](auto Vaccumulator, auto Voperand) { code.USQADD(Vaccumulator, Voperand); }); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - (void)code; - (void)ctx; - (void)inst; - ASSERT_FALSE("Unimplemented"); + EmitSaturatedAccumulate<64>(code, ctx, inst, [&](auto Vaccumulator, auto Voperand) { code.USQADD(Vaccumulator, Voperand); }); } template<>