A32: Implement ASIMD VPADD (floating-point)

This commit is contained in:
MerryMage 2020-06-20 13:55:28 +01:00
parent e006f0a205
commit f58e247ef3
8 changed files with 27 additions and 17 deletions

View file

@ -1117,15 +1117,15 @@ void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) {
}
void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<32, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddps);
EmitThreeOpVectorOperation<32, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddps);
}
void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<64, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd);
EmitThreeOpVectorOperation<64, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd);
}
void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<32, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
EmitThreeOpVectorOperation<32, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
code.xorps(zero, zero);
code.punpcklqdq(result, xmm_b);
@ -1134,7 +1134,7 @@ void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
}
void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<64, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
EmitThreeOpVectorOperation<64, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
code.xorps(zero, zero);
code.punpcklqdq(result, xmm_b);

View file

@ -35,7 +35,7 @@ INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd100
//INST(asimd_VFMA, "VFMA/VFMS", "111100100-CC--------1100---1----") // ASIMD
INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD
INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD
//INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110-0C--------1101---0----") // ASIMD
INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") // ASIMD
//INST(asimd_VABD_float, "VABD (floating-point)", "111100110-1C--------1101---0----") // ASIMD
//INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100-CC--------1101---1----") // ASIMD
INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD

View file

@ -368,6 +368,13 @@ bool ArmTranslatorVisitor::asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t V
});
}
bool ArmTranslatorVisitor::asimd_VPADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this, Q](const auto&, const auto& reg_n, const auto& reg_m) {
return Q ? ir.FPVectorPairedAdd(32, reg_n, reg_m, false)
: ir.FPVectorPairedAddLower(32, reg_n, reg_m, false);
});
}
bool ArmTranslatorVisitor::asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this](const auto&, const auto& reg_n, const auto& reg_m) {
return ir.FPVectorMul(32, reg_n, reg_m, false);

View file

@ -464,6 +464,7 @@ struct ArmTranslatorVisitor final {
bool asimd_VMUL(bool P, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VPADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VMAX_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
bool asimd_VMIN_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);

View file

@ -2420,22 +2420,22 @@ U128 IREmitter::FPVectorNeg(size_t esize, const U128& a) {
UNREACHABLE();
}
U128 IREmitter::FPVectorPairedAdd(size_t esize, const U128& a, const U128& b) {
U128 IREmitter::FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
switch (esize) {
case 32:
return Inst<U128>(Opcode::FPVectorPairedAdd32, a, b);
return Inst<U128>(Opcode::FPVectorPairedAdd32, a, b, Imm1(fpcr_controlled));
case 64:
return Inst<U128>(Opcode::FPVectorPairedAdd64, a, b);
return Inst<U128>(Opcode::FPVectorPairedAdd64, a, b, Imm1(fpcr_controlled));
}
UNREACHABLE();
}
U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b) {
U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
switch (esize) {
case 32:
return Inst<U128>(Opcode::FPVectorPairedAddLower32, a, b);
return Inst<U128>(Opcode::FPVectorPairedAddLower32, a, b, Imm1(fpcr_controlled));
case 64:
return Inst<U128>(Opcode::FPVectorPairedAddLower64, a, b);
return Inst<U128>(Opcode::FPVectorPairedAddLower64, a, b, Imm1(fpcr_controlled));
}
UNREACHABLE();
}

View file

@ -358,8 +358,8 @@ public:
U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2);
U128 FPVectorMulX(size_t esize, const U128& a, const U128& b);
U128 FPVectorNeg(size_t esize, const U128& a);
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b);
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
U128 FPVectorRecipEstimate(size_t esize, const U128& a);
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b);
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);

View file

@ -609,10 +609,10 @@ OPCODE(FPVectorMulX64, U128, U128
OPCODE(FPVectorNeg16, U128, U128 )
OPCODE(FPVectorNeg32, U128, U128 )
OPCODE(FPVectorNeg64, U128, U128 )
OPCODE(FPVectorPairedAdd32, U128, U128, U128 )
OPCODE(FPVectorPairedAdd64, U128, U128, U128 )
OPCODE(FPVectorPairedAddLower32, U128, U128, U128 )
OPCODE(FPVectorPairedAddLower64, U128, U128, U128 )
OPCODE(FPVectorPairedAdd32, U128, U128, U128, U1 )
OPCODE(FPVectorPairedAdd64, U128, U128, U128, U1 )
OPCODE(FPVectorPairedAddLower32, U128, U128, U128, U1 )
OPCODE(FPVectorPairedAddLower64, U128, U128, U128, U1 )
OPCODE(FPVectorRecipEstimate16, U128, U128 )
OPCODE(FPVectorRecipEstimate32, U128, U128 )
OPCODE(FPVectorRecipEstimate64, U128, U128 )

View file

@ -110,6 +110,8 @@ u32 GenRandomInst(u32 pc, bool is_last_inst) {
"arm_UDF",
// FPSCR is inaccurate
"vfp_VMRS",
// Unimplemented in Unicorn
"asimd_VPADD_float",
};
for (const auto& [fn, bitstring] : list) {