From f58e247ef3e029ffd61bc217a6b09eff384befdb Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 20 Jun 2020 13:55:28 +0100 Subject: [PATCH] A32: Implement ASIMD VPADD (floating-point) --- src/backend/x64/emit_x64_vector_floating_point.cpp | 8 ++++---- src/frontend/A32/decoder/asimd.inc | 2 +- src/frontend/A32/translate/impl/asimd_three_same.cpp | 7 +++++++ src/frontend/A32/translate/impl/translate_arm.h | 1 + src/frontend/ir/ir_emitter.cpp | 12 ++++++------ src/frontend/ir/ir_emitter.h | 4 ++-- src/frontend/ir/opcodes.inc | 8 ++++---- tests/A32/fuzz_arm.cpp | 2 ++ 8 files changed, 27 insertions(+), 17 deletions(-) diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index aa353e23..b72414fa 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -1117,15 +1117,15 @@ void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { - EmitThreeOpVectorOperation<32, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddps); + EmitThreeOpVectorOperation<32, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddps); } void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { - EmitThreeOpVectorOperation<64, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd); + EmitThreeOpVectorOperation<64, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd); } void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { - EmitThreeOpVectorOperation<32, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) { + EmitThreeOpVectorOperation<32, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) { const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); code.xorps(zero, zero); code.punpcklqdq(result, xmm_b); @@ -1134,7 +1134,7 @@ void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) { - EmitThreeOpVectorOperation<64, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) { + EmitThreeOpVectorOperation<64, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) { const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); code.xorps(zero, zero); code.punpcklqdq(result, xmm_b); diff --git a/src/frontend/A32/decoder/asimd.inc b/src/frontend/A32/decoder/asimd.inc index f87c4f59..4962a884 100644 --- a/src/frontend/A32/decoder/asimd.inc +++ b/src/frontend/A32/decoder/asimd.inc @@ -35,7 +35,7 @@ INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd100 //INST(asimd_VFMA, "VFMA/VFMS", "111100100-CC--------1100---1----") // ASIMD INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD -//INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110-0C--------1101---0----") // ASIMD +INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") // ASIMD //INST(asimd_VABD_float, "VABD (floating-point)", "111100110-1C--------1101---0----") // ASIMD //INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100-CC--------1101---1----") // ASIMD INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD diff --git a/src/frontend/A32/translate/impl/asimd_three_same.cpp b/src/frontend/A32/translate/impl/asimd_three_same.cpp index 75d87db3..19c20215 100644 --- a/src/frontend/A32/translate/impl/asimd_three_same.cpp +++ b/src/frontend/A32/translate/impl/asimd_three_same.cpp @@ -368,6 +368,13 @@ bool ArmTranslatorVisitor::asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t V }); } +bool ArmTranslatorVisitor::asimd_VPADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) { + return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this, Q](const auto&, const auto& reg_n, const auto& reg_m) { + return Q ? ir.FPVectorPairedAdd(32, reg_n, reg_m, false) + : ir.FPVectorPairedAddLower(32, reg_n, reg_m, false); + }); +} + bool ArmTranslatorVisitor::asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) { return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this](const auto&, const auto& reg_n, const auto& reg_m) { return ir.FPVectorMul(32, reg_n, reg_m, false); diff --git a/src/frontend/A32/translate/impl/translate_arm.h b/src/frontend/A32/translate/impl/translate_arm.h index b07b3e6f..dbd3ffcb 100644 --- a/src/frontend/A32/translate/impl/translate_arm.h +++ b/src/frontend/A32/translate/impl/translate_arm.h @@ -464,6 +464,7 @@ struct ArmTranslatorVisitor final { bool asimd_VMUL(bool P, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); + bool asimd_VPADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VMAX_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VMIN_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 1eaf36b4..04c03970 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2420,22 +2420,22 @@ U128 IREmitter::FPVectorNeg(size_t esize, const U128& a) { UNREACHABLE(); } -U128 IREmitter::FPVectorPairedAdd(size_t esize, const U128& a, const U128& b) { +U128 IREmitter::FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) { switch (esize) { case 32: - return Inst(Opcode::FPVectorPairedAdd32, a, b); + return Inst(Opcode::FPVectorPairedAdd32, a, b, Imm1(fpcr_controlled)); case 64: - return Inst(Opcode::FPVectorPairedAdd64, a, b); + return Inst(Opcode::FPVectorPairedAdd64, a, b, Imm1(fpcr_controlled)); } UNREACHABLE(); } -U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b) { +U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) { switch (esize) { case 32: - return Inst(Opcode::FPVectorPairedAddLower32, a, b); + return Inst(Opcode::FPVectorPairedAddLower32, a, b, Imm1(fpcr_controlled)); case 64: - return Inst(Opcode::FPVectorPairedAddLower64, a, b); + return Inst(Opcode::FPVectorPairedAddLower64, a, b, Imm1(fpcr_controlled)); } UNREACHABLE(); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index f2243688..f3ad48d6 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -358,8 +358,8 @@ public: U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2); U128 FPVectorMulX(size_t esize, const U128& a, const U128& b); U128 FPVectorNeg(size_t esize, const U128& a); - U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b); - U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b); + U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); + U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); U128 FPVectorRecipEstimate(size_t esize, const U128& a); U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b); U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 0eacd22b..0098f1d2 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -609,10 +609,10 @@ OPCODE(FPVectorMulX64, U128, U128 OPCODE(FPVectorNeg16, U128, U128 ) OPCODE(FPVectorNeg32, U128, U128 ) OPCODE(FPVectorNeg64, U128, U128 ) -OPCODE(FPVectorPairedAdd32, U128, U128, U128 ) -OPCODE(FPVectorPairedAdd64, U128, U128, U128 ) -OPCODE(FPVectorPairedAddLower32, U128, U128, U128 ) -OPCODE(FPVectorPairedAddLower64, U128, U128, U128 ) +OPCODE(FPVectorPairedAdd32, U128, U128, U128, U1 ) +OPCODE(FPVectorPairedAdd64, U128, U128, U128, U1 ) +OPCODE(FPVectorPairedAddLower32, U128, U128, U128, U1 ) +OPCODE(FPVectorPairedAddLower64, U128, U128, U128, U1 ) OPCODE(FPVectorRecipEstimate16, U128, U128 ) OPCODE(FPVectorRecipEstimate32, U128, U128 ) OPCODE(FPVectorRecipEstimate64, U128, U128 ) diff --git a/tests/A32/fuzz_arm.cpp b/tests/A32/fuzz_arm.cpp index 8a5565fa..8517e808 100644 --- a/tests/A32/fuzz_arm.cpp +++ b/tests/A32/fuzz_arm.cpp @@ -110,6 +110,8 @@ u32 GenRandomInst(u32 pc, bool is_last_inst) { "arm_UDF", // FPSCR is inaccurate "vfp_VMRS", + // Unimplemented in Unicorn + "asimd_VPADD_float", }; for (const auto& [fn, bitstring] : list) {