From 12e7f2c3597b48b1c1215691b8282f0310b4ee55 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 7 Aug 2016 10:21:14 +0100 Subject: [PATCH] VFP: Implement VMUL --- src/backend_x64/emit_x64.cpp | 126 ++++++++---------- src/frontend/decoder/vfp2.h | 2 +- .../disassembler/disassembler_arm.cpp | 4 + src/frontend/ir/ir_emitter.cpp | 10 ++ src/frontend/ir/ir_emitter.h | 2 + src/frontend/ir/opcodes.inc | 2 + .../translate/translate_arm/translate_arm.h | 1 + src/frontend/translate/translate_arm/vfp2.cpp | 19 +++ 8 files changed, 93 insertions(+), 73 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 01c12dbc..b7c342b9 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -1090,7 +1090,49 @@ static void DefaultNaN64(XEmitter* code, Routines* routines, X64Reg xmm_value) { code->SetJumpTarget(fixup); } -void EmitX64::EmitFPAbs32(IR::Block& block, IR::Inst* inst) { +static void FPOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) { + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); + X64Reg operand = reg_alloc.UseRegister(b, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero32(code, result, gpr_scratch); + DenormalsAreZero32(code, operand, gpr_scratch); + } + (code->*fn)(result, R(operand)); + if (block.location.FPSCR_FTZ()) { + FlushToZero32(code, result, gpr_scratch); + } + if (block.location.FPSCR_DN()) { + DefaultNaN32(code, routines, result); + } +} + +static void FPOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) { + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); + X64Reg operand = reg_alloc.UseRegister(b, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero64(code, routines, result, gpr_scratch); + DenormalsAreZero64(code, routines, operand, gpr_scratch); + } + (code->*fn)(result, R(operand)); + if (block.location.FPSCR_FTZ()) { + FlushToZero64(code, routines, result, gpr_scratch); + } + if (block.location.FPSCR_DN()) { + DefaultNaN64(code, routines, result); + } +} + +void EmitX64::EmitFPAbs32(IR::Block&, IR::Inst* inst) { IR::Value a = inst->GetArg(0); X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); @@ -1098,7 +1140,7 @@ void EmitX64::EmitFPAbs32(IR::Block& block, IR::Inst* inst) { code->PAND(result, routines->MFloatNonSignMask32()); } -void EmitX64::EmitFPAbs64(IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitFPAbs64(IR::Block&, IR::Inst* inst) { IR::Value a = inst->GetArg(0); X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); @@ -1107,87 +1149,27 @@ void EmitX64::EmitFPAbs64(IR::Block& block, IR::Inst* inst) { } void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); - X64Reg operand = reg_alloc.UseRegister(b, any_xmm); - X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); - - if (block.location.FPSCR_FTZ()) { - DenormalsAreZero32(code, result, gpr_scratch); - DenormalsAreZero32(code, operand, gpr_scratch); - } - code->ADDSS(result, R(operand)); - if (block.location.FPSCR_FTZ()) { - FlushToZero32(code, result, gpr_scratch); - } - if (block.location.FPSCR_DN()) { - DefaultNaN32(code, routines, result); - } + FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::ADDSS); } void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); + FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::ADDSD); +} - X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); - X64Reg operand = reg_alloc.UseRegister(b, any_xmm); - X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); +void EmitX64::EmitFPMul32(IR::Block& block, IR::Inst* inst) { + FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::MULSS); +} - if (block.location.FPSCR_FTZ()) { - DenormalsAreZero64(code, routines, result, gpr_scratch); - DenormalsAreZero64(code, routines, operand, gpr_scratch); - } - code->ADDSD(result, R(operand)); - if (block.location.FPSCR_FTZ()) { - FlushToZero64(code, routines, result, gpr_scratch); - } - if (block.location.FPSCR_DN()) { - DefaultNaN64(code, routines, result); - } +void EmitX64::EmitFPMul64(IR::Block& block, IR::Inst* inst) { + FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::MULSD); } void EmitX64::EmitFPSub32(IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); - X64Reg operand = reg_alloc.UseRegister(b, any_xmm); - X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); - - if (block.location.FPSCR_FTZ()) { - DenormalsAreZero32(code, result, gpr_scratch); - DenormalsAreZero32(code, operand, gpr_scratch); - } - code->SUBSS(result, R(operand)); - if (block.location.FPSCR_FTZ()) { - FlushToZero32(code, result, gpr_scratch); - } - if (block.location.FPSCR_DN()) { - DefaultNaN32(code, routines, result); - } + FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::SUBSS); } void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) { - IR::Value a = inst->GetArg(0); - IR::Value b = inst->GetArg(1); - - X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); - X64Reg operand = reg_alloc.UseRegister(b, any_xmm); - X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); - - if (block.location.FPSCR_FTZ()) { - DenormalsAreZero64(code, routines, result, gpr_scratch); - DenormalsAreZero64(code, routines, operand, gpr_scratch); - } - code->SUBSD(result, R(operand)); - if (block.location.FPSCR_FTZ()) { - FlushToZero64(code, routines, result, gpr_scratch); - } - if (block.location.FPSCR_DN()) { - DefaultNaN64(code, routines, result); - } + FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::SUBSD); } void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) { diff --git a/src/frontend/decoder/vfp2.h b/src/frontend/decoder/vfp2.h index 48d49b6c..3db59308 100644 --- a/src/frontend/decoder/vfp2.h +++ b/src/frontend/decoder/vfp2.h @@ -69,7 +69,7 @@ boost::optional&> DecodeVFP2(u32 instruction) { // VNMLA // VNMLS // VNMUL - // VMUL + INST(&V::vfp2_VMUL, "VMUL", "cccc11100D10nnnndddd101zN0M0mmmm"), INST(&V::vfp2_VADD, "VADD", "cccc11100D11nnnndddd101zN0M0mmmm"), INST(&V::vfp2_VSUB, "VSUB", "cccc11100D11nnnndddd101zN1M0mmmm"), // VDIV diff --git a/src/frontend/disassembler/disassembler_arm.cpp b/src/frontend/disassembler/disassembler_arm.cpp index 6ce15a65..766fde5a 100644 --- a/src/frontend/disassembler/disassembler_arm.cpp +++ b/src/frontend/disassembler/disassembler_arm.cpp @@ -560,6 +560,10 @@ public: std::string arm_SRS() { return "ice"; } // Floating point arithmetic instructions + std::string vfp2_VMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) { + return Common::StringFromFormat("vmul%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str()); + } + std::string vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) { return Common::StringFromFormat("vadd%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str()); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index e2cdd3ee..b923dfc8 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -292,6 +292,16 @@ IR::Value IREmitter::FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_ return Inst(IR::Opcode::FPAdd64, {a, b}); } +IR::Value IREmitter::FPMul32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPMul32, {a, b}); +} + +IR::Value IREmitter::FPMul64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPMul64, {a, b}); +} + IR::Value IREmitter::FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) { ASSERT(fpscr_controlled); return Inst(IR::Opcode::FPSub32, {a, b}); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 29298734..858b9896 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -96,6 +96,8 @@ public: IR::Value FPAbs64(const IR::Value& a); IR::Value FPAdd32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); + IR::Value FPMul32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); + IR::Value FPMul64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 7f32e794..ddb58c5f 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -64,6 +64,8 @@ OPCODE(FPAbs32, T::F32, T::F32 OPCODE(FPAbs64, T::F64, T::F64 ) OPCODE(FPAdd32, T::F32, T::F32, T::F32 ) OPCODE(FPAdd64, T::F64, T::F64, T::F64 ) +OPCODE(FPMul32, T::F32, T::F32, T::F32 ) +OPCODE(FPMul64, T::F64, T::F64, T::F64 ) OPCODE(FPSub32, T::F32, T::F32, T::F32 ) OPCODE(FPSub64, T::F64, T::F64, T::F64 ) diff --git a/src/frontend/translate/translate_arm/translate_arm.h b/src/frontend/translate/translate_arm/translate_arm.h index a1a8d9ab..722c3d8b 100644 --- a/src/frontend/translate/translate_arm/translate_arm.h +++ b/src/frontend/translate/translate_arm/translate_arm.h @@ -321,6 +321,7 @@ struct ArmTranslatorVisitor final { // Floating-point three-register data processing instructions bool vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm); bool vfp2_VSUB(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm); + bool vfp2_VMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm); // Floating-point misc instructions bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); diff --git a/src/frontend/translate/translate_arm/vfp2.cpp b/src/frontend/translate/translate_arm/vfp2.cpp index ed723314..bb8439a7 100644 --- a/src/frontend/translate/translate_arm/vfp2.cpp +++ b/src/frontend/translate/translate_arm/vfp2.cpp @@ -55,6 +55,25 @@ bool ArmTranslatorVisitor::vfp2_VSUB(Cond cond, bool D, size_t Vn, size_t Vd, bo return true; } +bool ArmTranslatorVisitor::vfp2_VMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) { + if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1) + return InterpretThisInstruction(); // TODO: Vectorised floating point instructions + + ExtReg d = ToExtReg(sz, Vd, D); + ExtReg n = ToExtReg(sz, Vn, N); + ExtReg m = ToExtReg(sz, Vm, M); + // VMUL.{F32,F64} <{S,D}d>, <{S,D}n>, <{S,D}m> + if (ConditionPassed(cond)) { + auto a = ir.GetExtendedRegister(n); + auto b = ir.GetExtendedRegister(m); + auto result = sz + ? ir.FPMul64(a, b, true) + : ir.FPMul32(a, b, true); + ir.SetExtendedRegister(d, result); + } + return true; +} + bool ArmTranslatorVisitor::vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) { if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1) return InterpretThisInstruction(); // TODO: Vectorised floating point instructions