VFP: Implement VMUL

This commit is contained in:
MerryMage 2016-08-07 10:21:14 +01:00
parent 97b5fa173f
commit 12e7f2c359
8 changed files with 93 additions and 73 deletions

View file

@ -1090,7 +1090,49 @@ static void DefaultNaN64(XEmitter* code, Routines* routines, X64Reg xmm_value) {
code->SetJumpTarget(fixup); code->SetJumpTarget(fixup);
} }
void EmitX64::EmitFPAbs32(IR::Block& block, IR::Inst* inst) { static void FPOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
X64Reg operand = reg_alloc.UseRegister(b, any_xmm);
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
if (block.location.FPSCR_FTZ()) {
DenormalsAreZero32(code, result, gpr_scratch);
DenormalsAreZero32(code, operand, gpr_scratch);
}
(code->*fn)(result, R(operand));
if (block.location.FPSCR_FTZ()) {
FlushToZero32(code, result, gpr_scratch);
}
if (block.location.FPSCR_DN()) {
DefaultNaN32(code, routines, result);
}
}
static void FPOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
X64Reg operand = reg_alloc.UseRegister(b, any_xmm);
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
if (block.location.FPSCR_FTZ()) {
DenormalsAreZero64(code, routines, result, gpr_scratch);
DenormalsAreZero64(code, routines, operand, gpr_scratch);
}
(code->*fn)(result, R(operand));
if (block.location.FPSCR_FTZ()) {
FlushToZero64(code, routines, result, gpr_scratch);
}
if (block.location.FPSCR_DN()) {
DefaultNaN64(code, routines, result);
}
}
void EmitX64::EmitFPAbs32(IR::Block&, IR::Inst* inst) {
IR::Value a = inst->GetArg(0); IR::Value a = inst->GetArg(0);
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
@ -1098,7 +1140,7 @@ void EmitX64::EmitFPAbs32(IR::Block& block, IR::Inst* inst) {
code->PAND(result, routines->MFloatNonSignMask32()); code->PAND(result, routines->MFloatNonSignMask32());
} }
void EmitX64::EmitFPAbs64(IR::Block& block, IR::Inst* inst) { void EmitX64::EmitFPAbs64(IR::Block&, IR::Inst* inst) {
IR::Value a = inst->GetArg(0); IR::Value a = inst->GetArg(0);
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
@ -1107,87 +1149,27 @@ void EmitX64::EmitFPAbs64(IR::Block& block, IR::Inst* inst) {
} }
void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) { void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0); FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::ADDSS);
IR::Value b = inst->GetArg(1);
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
X64Reg operand = reg_alloc.UseRegister(b, any_xmm);
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
if (block.location.FPSCR_FTZ()) {
DenormalsAreZero32(code, result, gpr_scratch);
DenormalsAreZero32(code, operand, gpr_scratch);
}
code->ADDSS(result, R(operand));
if (block.location.FPSCR_FTZ()) {
FlushToZero32(code, result, gpr_scratch);
}
if (block.location.FPSCR_DN()) {
DefaultNaN32(code, routines, result);
}
} }
void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) { void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0); FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::ADDSD);
IR::Value b = inst->GetArg(1); }
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); void EmitX64::EmitFPMul32(IR::Block& block, IR::Inst* inst) {
X64Reg operand = reg_alloc.UseRegister(b, any_xmm); FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::MULSS);
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); }
if (block.location.FPSCR_FTZ()) { void EmitX64::EmitFPMul64(IR::Block& block, IR::Inst* inst) {
DenormalsAreZero64(code, routines, result, gpr_scratch); FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::MULSD);
DenormalsAreZero64(code, routines, operand, gpr_scratch);
}
code->ADDSD(result, R(operand));
if (block.location.FPSCR_FTZ()) {
FlushToZero64(code, routines, result, gpr_scratch);
}
if (block.location.FPSCR_DN()) {
DefaultNaN64(code, routines, result);
}
} }
void EmitX64::EmitFPSub32(IR::Block& block, IR::Inst* inst) { void EmitX64::EmitFPSub32(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0); FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::SUBSS);
IR::Value b = inst->GetArg(1);
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
X64Reg operand = reg_alloc.UseRegister(b, any_xmm);
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
if (block.location.FPSCR_FTZ()) {
DenormalsAreZero32(code, result, gpr_scratch);
DenormalsAreZero32(code, operand, gpr_scratch);
}
code->SUBSS(result, R(operand));
if (block.location.FPSCR_FTZ()) {
FlushToZero32(code, result, gpr_scratch);
}
if (block.location.FPSCR_DN()) {
DefaultNaN32(code, routines, result);
}
} }
void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) { void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0); FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::SUBSD);
IR::Value b = inst->GetArg(1);
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
X64Reg operand = reg_alloc.UseRegister(b, any_xmm);
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
if (block.location.FPSCR_FTZ()) {
DenormalsAreZero64(code, routines, result, gpr_scratch);
DenormalsAreZero64(code, routines, operand, gpr_scratch);
}
code->SUBSD(result, R(operand));
if (block.location.FPSCR_FTZ()) {
FlushToZero64(code, routines, result, gpr_scratch);
}
if (block.location.FPSCR_DN()) {
DefaultNaN64(code, routines, result);
}
} }
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) { void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {

View file

@ -69,7 +69,7 @@ boost::optional<const VFP2Matcher<V>&> DecodeVFP2(u32 instruction) {
// VNMLA // VNMLA
// VNMLS // VNMLS
// VNMUL // VNMUL
// VMUL INST(&V::vfp2_VMUL, "VMUL", "cccc11100D10nnnndddd101zN0M0mmmm"),
INST(&V::vfp2_VADD, "VADD", "cccc11100D11nnnndddd101zN0M0mmmm"), INST(&V::vfp2_VADD, "VADD", "cccc11100D11nnnndddd101zN0M0mmmm"),
INST(&V::vfp2_VSUB, "VSUB", "cccc11100D11nnnndddd101zN1M0mmmm"), INST(&V::vfp2_VSUB, "VSUB", "cccc11100D11nnnndddd101zN1M0mmmm"),
// VDIV // VDIV

View file

@ -560,6 +560,10 @@ public:
std::string arm_SRS() { return "ice"; } std::string arm_SRS() { return "ice"; }
// Floating point arithmetic instructions // Floating point arithmetic instructions
std::string vfp2_VMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) {
return Common::StringFromFormat("vmul%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str());
}
std::string vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) { std::string vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) {
return Common::StringFromFormat("vadd%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str()); return Common::StringFromFormat("vadd%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str());
} }

View file

@ -292,6 +292,16 @@ IR::Value IREmitter::FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_
return Inst(IR::Opcode::FPAdd64, {a, b}); return Inst(IR::Opcode::FPAdd64, {a, b});
} }
IR::Value IREmitter::FPMul32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
ASSERT(fpscr_controlled);
return Inst(IR::Opcode::FPMul32, {a, b});
}
IR::Value IREmitter::FPMul64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
ASSERT(fpscr_controlled);
return Inst(IR::Opcode::FPMul64, {a, b});
}
IR::Value IREmitter::FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) { IR::Value IREmitter::FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
ASSERT(fpscr_controlled); ASSERT(fpscr_controlled);
return Inst(IR::Opcode::FPSub32, {a, b}); return Inst(IR::Opcode::FPSub32, {a, b});

View file

@ -96,6 +96,8 @@ public:
IR::Value FPAbs64(const IR::Value& a); IR::Value FPAbs64(const IR::Value& a);
IR::Value FPAdd32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPAdd32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
IR::Value FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
IR::Value FPMul32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
IR::Value FPMul64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);

View file

@ -64,6 +64,8 @@ OPCODE(FPAbs32, T::F32, T::F32
OPCODE(FPAbs64, T::F64, T::F64 ) OPCODE(FPAbs64, T::F64, T::F64 )
OPCODE(FPAdd32, T::F32, T::F32, T::F32 ) OPCODE(FPAdd32, T::F32, T::F32, T::F32 )
OPCODE(FPAdd64, T::F64, T::F64, T::F64 ) OPCODE(FPAdd64, T::F64, T::F64, T::F64 )
OPCODE(FPMul32, T::F32, T::F32, T::F32 )
OPCODE(FPMul64, T::F64, T::F64, T::F64 )
OPCODE(FPSub32, T::F32, T::F32, T::F32 ) OPCODE(FPSub32, T::F32, T::F32, T::F32 )
OPCODE(FPSub64, T::F64, T::F64, T::F64 ) OPCODE(FPSub64, T::F64, T::F64, T::F64 )

View file

@ -321,6 +321,7 @@ struct ArmTranslatorVisitor final {
// Floating-point three-register data processing instructions // Floating-point three-register data processing instructions
bool vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm); bool vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
bool vfp2_VSUB(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm); bool vfp2_VSUB(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
bool vfp2_VMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
// Floating-point misc instructions // Floating-point misc instructions
bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);

View file

@ -55,6 +55,25 @@ bool ArmTranslatorVisitor::vfp2_VSUB(Cond cond, bool D, size_t Vn, size_t Vd, bo
return true; return true;
} }
bool ArmTranslatorVisitor::vfp2_VMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) {
if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1)
return InterpretThisInstruction(); // TODO: Vectorised floating point instructions
ExtReg d = ToExtReg(sz, Vd, D);
ExtReg n = ToExtReg(sz, Vn, N);
ExtReg m = ToExtReg(sz, Vm, M);
// VMUL.{F32,F64} <{S,D}d>, <{S,D}n>, <{S,D}m>
if (ConditionPassed(cond)) {
auto a = ir.GetExtendedRegister(n);
auto b = ir.GetExtendedRegister(m);
auto result = sz
? ir.FPMul64(a, b, true)
: ir.FPMul32(a, b, true);
ir.SetExtendedRegister(d, result);
}
return true;
}
bool ArmTranslatorVisitor::vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) { bool ArmTranslatorVisitor::vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1) if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1)
return InterpretThisInstruction(); // TODO: Vectorised floating point instructions return InterpretThisInstruction(); // TODO: Vectorised floating point instructions