diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index d6edd91b..4a95edb5 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -1090,7 +1090,7 @@ static void DefaultNaN64(XEmitter* code, Routines* routines, X64Reg xmm_value) { code->SetJumpTarget(fixup); } -static void FPOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) { +static void FPThreeOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) { IR::Value a = inst->GetArg(0); IR::Value b = inst->GetArg(1); @@ -1111,7 +1111,7 @@ static void FPOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR:: } } -static void FPOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) { +static void FPThreeOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) { IR::Value a = inst->GetArg(0); IR::Value b = inst->GetArg(1); @@ -1132,6 +1132,42 @@ static void FPOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR:: } } +static void FPTwoOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) { + IR::Value a = inst->GetArg(0); + + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero32(code, result, gpr_scratch); + } + (code->*fn)(result, R(result)); + if (block.location.FPSCR_FTZ()) { + FlushToZero32(code, result, gpr_scratch); + } + if (block.location.FPSCR_DN()) { + DefaultNaN32(code, routines, result); + } +} + +static void FPTwoOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) { + IR::Value a = inst->GetArg(0); + + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero64(code, routines, result, gpr_scratch); + } + (code->*fn)(result, R(result)); + if (block.location.FPSCR_FTZ()) { + FlushToZero64(code, routines, result, gpr_scratch); + } + if (block.location.FPSCR_DN()) { + DefaultNaN64(code, routines, result); + } +} + void EmitX64::EmitFPAbs32(IR::Block&, IR::Inst* inst) { IR::Value a = inst->GetArg(0); @@ -1165,35 +1201,43 @@ void EmitX64::EmitFPNeg64(IR::Block&, IR::Inst* inst) { } void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) { - FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::ADDSS); + FPThreeOp32(code, routines, reg_alloc, block, inst, &XEmitter::ADDSS); } void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) { - FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::ADDSD); + FPThreeOp64(code, routines, reg_alloc, block, inst, &XEmitter::ADDSD); } void EmitX64::EmitFPDiv32(IR::Block& block, IR::Inst* inst) { - FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::DIVSS); + FPThreeOp32(code, routines, reg_alloc, block, inst, &XEmitter::DIVSS); } void EmitX64::EmitFPDiv64(IR::Block& block, IR::Inst* inst) { - FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::DIVSD); + FPThreeOp64(code, routines, reg_alloc, block, inst, &XEmitter::DIVSD); } void EmitX64::EmitFPMul32(IR::Block& block, IR::Inst* inst) { - FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::MULSS); + FPThreeOp32(code, routines, reg_alloc, block, inst, &XEmitter::MULSS); } void EmitX64::EmitFPMul64(IR::Block& block, IR::Inst* inst) { - FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::MULSD); + FPThreeOp64(code, routines, reg_alloc, block, inst, &XEmitter::MULSD); +} + +void EmitX64::EmitFPSqrt32(IR::Block& block, IR::Inst* inst) { + FPTwoOp32(code, routines, reg_alloc, block, inst, &XEmitter::SQRTSS); +} + +void EmitX64::EmitFPSqrt64(IR::Block& block, IR::Inst* inst) { + FPTwoOp64(code, routines, reg_alloc, block, inst, &XEmitter::SQRTSD); } void EmitX64::EmitFPSub32(IR::Block& block, IR::Inst* inst) { - FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::SUBSS); + FPThreeOp32(code, routines, reg_alloc, block, inst, &XEmitter::SUBSS); } void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) { - FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::SUBSD); + FPThreeOp64(code, routines, reg_alloc, block, inst, &XEmitter::SUBSD); } void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) { diff --git a/src/frontend/decoder/vfp2.h b/src/frontend/decoder/vfp2.h index 0feb013c..523a235e 100644 --- a/src/frontend/decoder/vfp2.h +++ b/src/frontend/decoder/vfp2.h @@ -79,7 +79,7 @@ boost::optional&> DecodeVFP2(u32 instruction) { // VMOV_reg INST(&V::vfp2_VABS, "VABS", "cccc11101D110000dddd101z11M0mmmm"), INST(&V::vfp2_VNEG, "VNEG", "cccc11101D110001dddd101z01M0mmmm"), - // VSQRT + INST(&V::vfp2_VSQRT, "VSQRT", "cccc11101D110001dddd101z11M0mmmm"), // VCMP // VCMPE // VCVT diff --git a/src/frontend/disassembler/disassembler_arm.cpp b/src/frontend/disassembler/disassembler_arm.cpp index 66bf19ea..fdd135cd 100644 --- a/src/frontend/disassembler/disassembler_arm.cpp +++ b/src/frontend/disassembler/disassembler_arm.cpp @@ -603,6 +603,10 @@ public: std::string vfp2_VNEG(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) { return Common::StringFromFormat("vneg%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str()); } + + std::string vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) { + return Common::StringFromFormat("vsqrt%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str()); + } }; std::string DisassembleArm(u32 instruction) { diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 40c6e890..cdc5ca97 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -320,6 +320,13 @@ IR::Value IREmitter::FPNeg64(const IR::Value& a) { return Inst(IR::Opcode::FPNeg64, {a}); } +IR::Value IREmitter::FPSqrt32(const IR::Value& a) { + return Inst(IR::Opcode::FPSqrt32, {a}); +} + +IR::Value IREmitter::FPSqrt64(const IR::Value& a) { + return Inst(IR::Opcode::FPSqrt64, {a}); +} IR::Value IREmitter::FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) { ASSERT(fpscr_controlled); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 4960690f..f64ad122 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -102,6 +102,8 @@ public: IR::Value FPMul64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPNeg32(const IR::Value& a); IR::Value FPNeg64(const IR::Value& a); + IR::Value FPSqrt32(const IR::Value& a); + IR::Value FPSqrt64(const IR::Value& a); IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 007082eb..891683c8 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -70,6 +70,8 @@ OPCODE(FPMul32, T::F32, T::F32, T::F32 OPCODE(FPMul64, T::F64, T::F64, T::F64 ) OPCODE(FPNeg32, T::F32, T::F32 ) OPCODE(FPNeg64, T::F64, T::F64 ) +OPCODE(FPSqrt32, T::F32, T::F32 ) +OPCODE(FPSqrt64, T::F64, T::F64 ) OPCODE(FPSub32, T::F32, T::F32, T::F32 ) OPCODE(FPSub64, T::F64, T::F64, T::F64 ) diff --git a/src/frontend/translate/translate_arm/translate_arm.h b/src/frontend/translate/translate_arm/translate_arm.h index 5555e7c5..a1bb3dc6 100644 --- a/src/frontend/translate/translate_arm/translate_arm.h +++ b/src/frontend/translate/translate_arm/translate_arm.h @@ -332,6 +332,7 @@ struct ArmTranslatorVisitor final { // Floating-point misc instructions bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); bool vfp2_VNEG(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); + bool vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); }; } // namespace Arm diff --git a/src/frontend/translate/translate_arm/vfp2.cpp b/src/frontend/translate/translate_arm/vfp2.cpp index 52877abe..4e43fadb 100644 --- a/src/frontend/translate/translate_arm/vfp2.cpp +++ b/src/frontend/translate/translate_arm/vfp2.cpp @@ -226,5 +226,22 @@ bool ArmTranslatorVisitor::vfp2_VNEG(Cond cond, bool D, size_t Vd, bool sz, bool return true; } +bool ArmTranslatorVisitor::vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) { + if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1) + return InterpretThisInstruction(); // TODO: Vectorised floating point instructions + + ExtReg d = ToExtReg(sz, Vd, D); + ExtReg m = ToExtReg(sz, Vm, M); + // VSQRT.{F32,F64} <{S,D}d>, <{S,D}m> + if (ConditionPassed(cond)) { + auto a = ir.GetExtendedRegister(m); + auto result = sz + ? ir.FPSqrt64(a) + : ir.FPSqrt32(a); + ir.SetExtendedRegister(d, result); + } + return true; +} + } // namespace Arm } // namespace Dynarmic