A32: Implement ASIMD VPADD (floating-point)

This commit is contained in:
MerryMage 2020-06-20 13:55:28 +01:00
parent e006f0a205
commit f58e247ef3
8 changed files with 27 additions and 17 deletions

View file

@ -1117,15 +1117,15 @@ void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<32, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddps); EmitThreeOpVectorOperation<32, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddps);
} }
void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<64, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd); EmitThreeOpVectorOperation<64, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd);
} }
void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<32, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) { EmitThreeOpVectorOperation<32, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
code.xorps(zero, zero); code.xorps(zero, zero);
code.punpcklqdq(result, xmm_b); code.punpcklqdq(result, xmm_b);
@ -1134,7 +1134,7 @@ void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<64, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) { EmitThreeOpVectorOperation<64, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
code.xorps(zero, zero); code.xorps(zero, zero);
code.punpcklqdq(result, xmm_b); code.punpcklqdq(result, xmm_b);

View file

@ -35,7 +35,7 @@ INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd100
//INST(asimd_VFMA, "VFMA/VFMS", "111100100-CC--------1100---1----") // ASIMD //INST(asimd_VFMA, "VFMA/VFMS", "111100100-CC--------1100---1----") // ASIMD
INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD
INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD
//INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110-0C--------1101---0----") // ASIMD INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") // ASIMD
//INST(asimd_VABD_float, "VABD (floating-point)", "111100110-1C--------1101---0----") // ASIMD //INST(asimd_VABD_float, "VABD (floating-point)", "111100110-1C--------1101---0----") // ASIMD
//INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100-CC--------1101---1----") // ASIMD //INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100-CC--------1101---1----") // ASIMD
INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD

View file

@ -368,6 +368,13 @@ bool ArmTranslatorVisitor::asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t V
}); });
} }
bool ArmTranslatorVisitor::asimd_VPADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this, Q](const auto&, const auto& reg_n, const auto& reg_m) {
return Q ? ir.FPVectorPairedAdd(32, reg_n, reg_m, false)
: ir.FPVectorPairedAddLower(32, reg_n, reg_m, false);
});
}
bool ArmTranslatorVisitor::asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) { bool ArmTranslatorVisitor::asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this](const auto&, const auto& reg_n, const auto& reg_m) { return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this](const auto&, const auto& reg_n, const auto& reg_m) {
return ir.FPVectorMul(32, reg_n, reg_m, false); return ir.FPVectorMul(32, reg_n, reg_m, false);

View file

@ -464,6 +464,7 @@ struct ArmTranslatorVisitor final {
bool asimd_VMUL(bool P, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VMUL(bool P, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VPADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VMAX_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VMAX_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VMIN_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm); bool asimd_VMIN_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);

View file

@ -2420,22 +2420,22 @@ U128 IREmitter::FPVectorNeg(size_t esize, const U128& a) {
UNREACHABLE(); UNREACHABLE();
} }
U128 IREmitter::FPVectorPairedAdd(size_t esize, const U128& a, const U128& b) { U128 IREmitter::FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
switch (esize) { switch (esize) {
case 32: case 32:
return Inst<U128>(Opcode::FPVectorPairedAdd32, a, b); return Inst<U128>(Opcode::FPVectorPairedAdd32, a, b, Imm1(fpcr_controlled));
case 64: case 64:
return Inst<U128>(Opcode::FPVectorPairedAdd64, a, b); return Inst<U128>(Opcode::FPVectorPairedAdd64, a, b, Imm1(fpcr_controlled));
} }
UNREACHABLE(); UNREACHABLE();
} }
U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b) { U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
switch (esize) { switch (esize) {
case 32: case 32:
return Inst<U128>(Opcode::FPVectorPairedAddLower32, a, b); return Inst<U128>(Opcode::FPVectorPairedAddLower32, a, b, Imm1(fpcr_controlled));
case 64: case 64:
return Inst<U128>(Opcode::FPVectorPairedAddLower64, a, b); return Inst<U128>(Opcode::FPVectorPairedAddLower64, a, b, Imm1(fpcr_controlled));
} }
UNREACHABLE(); UNREACHABLE();
} }

View file

@ -358,8 +358,8 @@ public:
U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2); U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2);
U128 FPVectorMulX(size_t esize, const U128& a, const U128& b); U128 FPVectorMulX(size_t esize, const U128& a, const U128& b);
U128 FPVectorNeg(size_t esize, const U128& a); U128 FPVectorNeg(size_t esize, const U128& a);
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b); U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
U128 FPVectorRecipEstimate(size_t esize, const U128& a); U128 FPVectorRecipEstimate(size_t esize, const U128& a);
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b); U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b);
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact); U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);

View file

@ -609,10 +609,10 @@ OPCODE(FPVectorMulX64, U128, U128
OPCODE(FPVectorNeg16, U128, U128 ) OPCODE(FPVectorNeg16, U128, U128 )
OPCODE(FPVectorNeg32, U128, U128 ) OPCODE(FPVectorNeg32, U128, U128 )
OPCODE(FPVectorNeg64, U128, U128 ) OPCODE(FPVectorNeg64, U128, U128 )
OPCODE(FPVectorPairedAdd32, U128, U128, U128 ) OPCODE(FPVectorPairedAdd32, U128, U128, U128, U1 )
OPCODE(FPVectorPairedAdd64, U128, U128, U128 ) OPCODE(FPVectorPairedAdd64, U128, U128, U128, U1 )
OPCODE(FPVectorPairedAddLower32, U128, U128, U128 ) OPCODE(FPVectorPairedAddLower32, U128, U128, U128, U1 )
OPCODE(FPVectorPairedAddLower64, U128, U128, U128 ) OPCODE(FPVectorPairedAddLower64, U128, U128, U128, U1 )
OPCODE(FPVectorRecipEstimate16, U128, U128 ) OPCODE(FPVectorRecipEstimate16, U128, U128 )
OPCODE(FPVectorRecipEstimate32, U128, U128 ) OPCODE(FPVectorRecipEstimate32, U128, U128 )
OPCODE(FPVectorRecipEstimate64, U128, U128 ) OPCODE(FPVectorRecipEstimate64, U128, U128 )

View file

@ -110,6 +110,8 @@ u32 GenRandomInst(u32 pc, bool is_last_inst) {
"arm_UDF", "arm_UDF",
// FPSCR is inaccurate // FPSCR is inaccurate
"vfp_VMRS", "vfp_VMRS",
// Unimplemented in Unicorn
"asimd_VPADD_float",
}; };
for (const auto& [fn, bitstring] : list) { for (const auto& [fn, bitstring] : list) {