diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index f9de17a8..13b5f04d 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -1762,6 +1762,70 @@ void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) { FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::subsd); } +static void SetFpscrNzcvFromLahf(JitState* jit_state, u8 lahf) { + switch (lahf) { + case 0b01000111: + jit_state->FPSCR_nzcv = 0x30000000; + return; + case 0b00000010: + jit_state->FPSCR_nzcv = 0x20000000; + return; + case 0b00000011: + jit_state->FPSCR_nzcv = 0x80000000; + return; + case 0b01000010: + jit_state->FPSCR_nzcv = 0x60000000; + return; + } + ASSERT_MSG(false, ""); +} + +void EmitX64::EmitFPCompare32(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + bool quiet = inst->GetArg(2).GetU1(); + + Xbyak::Xmm reg_a = reg_alloc.UseXmm(a); + Xbyak::Xmm reg_b = reg_alloc.UseXmm(b); + + if (quiet) { + code->ucomiss(reg_a, reg_b); + } else { + code->comiss(reg_a, reg_b); + } + + reg_alloc.EndOfAllocScope(); + reg_alloc.HostCall(); + + code->lahf(); + code->mov(code->ABI_PARAM1, code->r15); + code->mov(code->ABI_PARAM2.cvt8(), code->ah); + code->CallFunction(&SetFpscrNzcvFromLahf); +} + +void EmitX64::EmitFPCompare64(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + bool quiet = inst->GetArg(2).GetU1(); + + Xbyak::Xmm reg_a = reg_alloc.UseXmm(a); + Xbyak::Xmm reg_b = reg_alloc.UseXmm(b); + + if (quiet) { + code->ucomisd(reg_a, reg_b); + } else { + code->comisd(reg_a, reg_b); + } + + reg_alloc.EndOfAllocScope(); + reg_alloc.HostCall(); + + code->lahf(); + code->mov(code->ABI_PARAM1, code->r15); + code->mov(code->ABI_PARAM2.cvt8(), code->ah); + code->CallFunction(&SetFpscrNzcvFromLahf); +} + void EmitX64::EmitFPSingleToDouble(IR::Block& block, IR::Inst* inst) { IR::Value a = inst->GetArg(0); diff --git a/src/frontend/decoder/vfp2.h b/src/frontend/decoder/vfp2.h index b44f285e..fd9162e0 100644 --- a/src/frontend/decoder/vfp2.h +++ b/src/frontend/decoder/vfp2.h @@ -58,8 +58,8 @@ boost::optional&> DecodeVFP2(u32 instruction) { INST(&V::vfp2_VCVT_to_float, "VCVT (to float)", "cccc11101D111000dddd101zs1M0mmmm"), INST(&V::vfp2_VCVT_to_u32, "VCVT (to u32)", "cccc11101D111100dddd101zr1M0mmmm"), INST(&V::vfp2_VCVT_to_s32, "VCVT (to s32)", "cccc11101D111101dddd101zr1M0mmmm"), - // VCMP - // VCMPE + INST(&V::vfp2_VCMP, "VCMP", "cccc11101D110100dddd101zE1M0mmmm"), + INST(&V::vfp2_VCMP_zero, "VCMP (with zero)", "cccc11101D110101dddd101zE1000000"), // Floating-point system register access INST(&V::vfp2_VMSR, "VMSR", "cccc111011100001tttt101000010000"), diff --git a/src/frontend/disassembler/disassembler_arm.cpp b/src/frontend/disassembler/disassembler_arm.cpp index cf8d07ba..47cbfcff 100644 --- a/src/frontend/disassembler/disassembler_arm.cpp +++ b/src/frontend/disassembler/disassembler_arm.cpp @@ -949,6 +949,14 @@ public: return fmt::format("vcvt{}{}.s32.{} {}, {}", round_towards_zero ? "" : "r", CondToString(cond), sz ? "f64" : "f32", FPRegStr(false, Vd, D), FPRegStr(sz, Vm, M)); } + std::string vfp2_VCMP(Cond cond, bool D, size_t Vd, bool sz, bool E, bool M, size_t Vm) { + return fmt::format("vcmp{}{}.{} {}, {}", E ? "e" : "", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D), FPRegStr(sz, Vm, M)); + } + + std::string vfp2_VCMP_zero(Cond cond, bool D, size_t Vd, bool sz, bool E) { + return fmt::format("vcmp{}{}.{} {}, #0.0", E ? "e" : "", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D)); + } + std::string vfp2_VMSR(Cond cond, Reg t) { return fmt::format("vmsr{} fpscr, {}", CondToString(cond), t); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 349c73a5..e557df1a 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -414,6 +414,16 @@ Value IREmitter::FPAdd64(const Value& a, const Value& b, bool fpscr_controlled) return Inst(Opcode::FPAdd64, {a, b}); } +void IREmitter::FPCompare32(const Value& a, const Value& b, bool quiet, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + Inst(Opcode::FPCompare32, {a, b, Imm1(quiet)}); +} + +void IREmitter::FPCompare64(const Value& a, const Value& b, bool quiet, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + Inst(Opcode::FPCompare64, {a, b, Imm1(quiet)}); +} + Value IREmitter::FPDiv32(const Value& a, const Value& b, bool fpscr_controlled) { ASSERT(fpscr_controlled); return Inst(Opcode::FPDiv32, {a, b}); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index a5625329..5f8f7034 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -145,6 +145,8 @@ public: Value FPAbs64(const Value& a); Value FPAdd32(const Value& a, const Value& b, bool fpscr_controlled); Value FPAdd64(const Value& a, const Value& b, bool fpscr_controlled); + void FPCompare32(const Value& a, const Value& b, bool quiet, bool fpscr_controlled); + void FPCompare64(const Value& a, const Value& b, bool quiet, bool fpscr_controlled); Value FPDiv32(const Value& a, const Value& b, bool fpscr_controlled); Value FPDiv64(const Value& a, const Value& b, bool fpscr_controlled); Value FPMul32(const Value& a, const Value& b, bool fpscr_controlled); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index eabfa83e..95ea1260 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -156,6 +156,8 @@ bool Inst::ReadsFromFPSCR() const { case Opcode::FPAbs64: case Opcode::FPAdd32: case Opcode::FPAdd64: + case Opcode::FPCompare32: + case Opcode::FPCompare64: case Opcode::FPDiv32: case Opcode::FPDiv64: case Opcode::FPMul32: @@ -181,6 +183,8 @@ bool Inst::WritesToFPSCR() const { case Opcode::FPAbs64: case Opcode::FPAdd32: case Opcode::FPAdd64: + case Opcode::FPCompare32: + case Opcode::FPCompare64: case Opcode::FPDiv32: case Opcode::FPDiv64: case Opcode::FPMul32: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index ee8dc1fa..1d0eace1 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -95,6 +95,8 @@ OPCODE(FPAbs32, T::F32, T::F32 OPCODE(FPAbs64, T::F64, T::F64 ) OPCODE(FPAdd32, T::F32, T::F32, T::F32 ) OPCODE(FPAdd64, T::F64, T::F64, T::F64 ) +OPCODE(FPCompare32, T::Void, T::F32, T::F32, T::U1 ) +OPCODE(FPCompare64, T::Void, T::F64, T::F64, T::U1 ) OPCODE(FPDiv32, T::F32, T::F32, T::F32 ) OPCODE(FPDiv64, T::F64, T::F64, T::F64 ) OPCODE(FPMul32, T::F32, T::F32, T::F32 ) diff --git a/src/frontend/translate/translate_arm/translate_arm.h b/src/frontend/translate/translate_arm/translate_arm.h index 6b7d3631..664a8345 100644 --- a/src/frontend/translate/translate_arm/translate_arm.h +++ b/src/frontend/translate/translate_arm/translate_arm.h @@ -365,6 +365,8 @@ struct ArmTranslatorVisitor final { bool vfp2_VCVT_to_float(Cond cond, bool D, size_t Vd, bool sz, bool is_signed, bool M, size_t Vm); bool vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm); bool vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm); + bool vfp2_VCMP(Cond cond, bool D, size_t Vd, bool sz, bool E, bool M, size_t Vm); + bool vfp2_VCMP_zero(Cond cond, bool D, size_t Vd, bool sz, bool E); // Floating-point system register access bool vfp2_VMSR(Cond cond, Reg t); diff --git a/src/frontend/translate/translate_arm/vfp2.cpp b/src/frontend/translate/translate_arm/vfp2.cpp index 07439d56..ea7ce5c0 100644 --- a/src/frontend/translate/translate_arm/vfp2.cpp +++ b/src/frontend/translate/translate_arm/vfp2.cpp @@ -425,6 +425,43 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s return true; } +bool ArmTranslatorVisitor::vfp2_VCMP(Cond cond, bool D, size_t Vd, bool sz, bool E, bool M, size_t Vm) { + ExtReg d = ToExtReg(sz, Vd, D); + ExtReg m = ToExtReg(sz, Vm, M); + bool quiet = E; + // VCMP{E}.F32 , + // VCMP{E}.F64
, + if (ConditionPassed(cond)) { + auto a = ir.GetExtendedRegister(d); + auto b = ir.GetExtendedRegister(m); + if (sz) { + ir.FPCompare64(a, b, quiet, true); + } else { + ir.FPCompare32(a, b, quiet, true); + } + } + return true; +} + +bool ArmTranslatorVisitor::vfp2_VCMP_zero(Cond cond, bool D, size_t Vd, bool sz, bool E) { + ExtReg d = ToExtReg(sz, Vd, D); + bool quiet = E; + // VCMP{E}.F32 , #0.0 + // VCMP{E}.F64
, #0.0 + if (ConditionPassed(cond)) { + auto a = ir.GetExtendedRegister(d); + auto b = sz + ? ir.TransferToFP64(ir.Imm64(0)) + : ir.TransferToFP32(ir.Imm32(0)); + if (sz) { + ir.FPCompare64(a, b, quiet, true); + } else { + ir.FPCompare32(a, b, quiet, true); + } + } + return true; +} + bool ArmTranslatorVisitor::vfp2_VMSR(Cond cond, Reg t) { if (t == Reg::PC) return UnpredictableInstruction();