diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5e7cc5a7..0e3827ad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -24,6 +24,8 @@ add_library(dynarmic common/fp/info.h common/fp/mantissa_util.h common/fp/op.h + common/fp/op/FPCompare.cpp + common/fp/op/FPCompare.h common/fp/op/FPConvert.cpp common/fp/op/FPConvert.h common/fp/op/FPMulAdd.cpp diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index 41489bc0..2e5f2b0d 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -540,6 +540,14 @@ void EmitX64::EmitFPVectorDiv64(EmitContext& ctx, IR::Inst* inst) { EmitThreeOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::divpd); } +void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) { + EmitThreeOpFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& op1, const VectorArray& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { + for (size_t i = 0; i < result.size(); i++) { + result[i] = FP::FPCompareEQ(op1[i], op2[i], fpcr, fpsr) ? 0xFFFF : 0; + } + }); +} + void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); diff --git a/src/common/fp/op.h b/src/common/fp/op.h index b8ee1441..b1bd0ee0 100644 --- a/src/common/fp/op.h +++ b/src/common/fp/op.h @@ -6,6 +6,7 @@ #pragma once +#include "common/fp/op/FPCompare.h" #include "common/fp/op/FPConvert.h" #include "common/fp/op/FPMulAdd.h" #include "common/fp/op/FPRecipEstimate.h" diff --git a/src/common/fp/op/FPCompare.cpp b/src/common/fp/op/FPCompare.cpp new file mode 100644 index 00000000..7e16dd60 --- /dev/null +++ b/src/common/fp/op/FPCompare.cpp @@ -0,0 +1,41 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2019 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "common/fp/fpcr.h" +#include "common/fp/fpsr.h" +#include "common/fp/op/FPCompare.h" +#include "common/fp/process_exception.h" +#include "common/fp/unpacked.h" + +namespace Dynarmic::FP { + +template +bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) { + const auto unpacked1 = FPUnpack(lhs, fpcr, fpsr); + const auto unpacked2 = FPUnpack(rhs, fpcr, fpsr); + const auto type1 = std::get(unpacked1); + const auto type2 = std::get(unpacked2); + const auto& value1 = std::get(unpacked1); + const auto& value2 = std::get(unpacked2); + + if (type1 == FPType::QNaN || type1 == FPType::SNaN || + type2 == FPType::QNaN || type2 == FPType::SNaN) { + if (type1 == FPType::SNaN || type2 == FPType::SNaN) { + FPProcessException(FPExc::InvalidOp, fpcr, fpsr); + } + + // Comparisons against NaN are never equal. + return false; + } + + return value1 == value2 || (type1 == FPType::Zero && type2 == FPType::Zero); +} + +template bool FPCompareEQ(u16 lhs, u16 rhs, FPCR fpcr, FPSR& fpsr); +template bool FPCompareEQ(u32 lhs, u32 rhs, FPCR fpcr, FPSR& fpsr); +template bool FPCompareEQ(u64 lhs, u64 rhs, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/op/FPCompare.h b/src/common/fp/op/FPCompare.h new file mode 100644 index 00000000..33802848 --- /dev/null +++ b/src/common/fp/op/FPCompare.h @@ -0,0 +1,17 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2019 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::FP { + +class FPCR; +class FPSR; + +template +bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 23625be7..f743d370 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -382,7 +382,7 @@ INST(DUP_elt_1, "DUP (element)", "01011 // Data Processing - FP and SIMD - Scalar three //INST(FMULX_vec_1, "FMULX", "01011110010mmmmm000111nnnnnddddd") INST(FMULX_vec_2, "FMULX", "010111100z1mmmmm110111nnnnnddddd") -//INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd") +INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd") INST(FCMEQ_reg_2, "FCMEQ (register)", "010111100z1mmmmm111001nnnnnddddd") INST(FRECPS_1, "FRECPS", "01011110010mmmmm001111nnnnnddddd") INST(FRECPS_2, "FRECPS", "010111100z1mmmmm111111nnnnnddddd") @@ -410,7 +410,7 @@ INST(FCVTAS_2, "FCVTAS (vector)", "01011 INST(SCVTF_int_2, "SCVTF (vector, integer)", "010111100z100001110110nnnnnddddd") //INST(FCMGT_zero_1, "FCMGT (zero)", "0101111011111000110010nnnnnddddd") INST(FCMGT_zero_2, "FCMGT (zero)", "010111101z100000110010nnnnnddddd") -//INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd") +INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd") INST(FCMEQ_zero_2, "FCMEQ (zero)", "010111101z100000110110nnnnnddddd") //INST(FCMLT_1, "FCMLT (zero)", "0101111011111000111010nnnnnddddd") INST(FCMLT_2, "FCMLT (zero)", "010111101z100000111010nnnnnddddd") @@ -574,7 +574,7 @@ INST(INS_elt, "INS (element)", "01101 // Data Processing - FP and SIMD - SIMD Three same //INST(FMULX_vec_3, "FMULX", "0Q001110010mmmmm000111nnnnnddddd") -//INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd") +INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd") INST(FRECPS_3, "FRECPS", "0Q001110010mmmmm001111nnnnnddddd") INST(FRSQRTS_3, "FRSQRTS", "0Q001110110mmmmm001111nnnnnddddd") //INST(FCMGE_reg_3, "FCMGE (register)", "0Q101110010mmmmm001001nnnnnddddd") @@ -635,7 +635,7 @@ INST(FCVTAS_4, "FCVTAS (vector)", "0Q001 INST(SCVTF_int_4, "SCVTF (vector, integer)", "0Q0011100z100001110110nnnnnddddd") //INST(FCMGT_zero_3, "FCMGT (zero)", "0Q00111011111000110010nnnnnddddd") INST(FCMGT_zero_4, "FCMGT (zero)", "0Q0011101z100000110010nnnnnddddd") -//INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd") +INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd") INST(FCMEQ_zero_4, "FCMEQ (zero)", "0Q0011101z100000110110nnnnnddddd") //INST(FCMLT_3, "FCMLT (zero)", "0Q00111011111000111010nnnnnddddd") INST(FCMLT_4, "FCMLT (zero)", "0Q0011101z100000111010nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp index 68c15735..7e3c6764 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp @@ -346,6 +346,15 @@ bool TranslatorVisitor::FACGT_2(bool sz, Vec Vm, Vec Vn, Vec Vd) { return ScalarFPCompareRegister(*this, sz, Vm, Vn, Vd, FPComparisonType::AbsoluteGT); } +bool TranslatorVisitor::FCMEQ_reg_1(Vec Vm, Vec Vn, Vec Vd) { + const IR::U128 lhs = V(128, Vn); + const IR::U128 rhs = V(128, Vm); + const IR::U128 result = ir.FPVectorEqual(16, lhs, rhs); + + V_scalar(16, Vd, ir.VectorGetElement(16, result, 0)); + return true; +} + bool TranslatorVisitor::FCMEQ_reg_2(bool sz, Vec Vm, Vec Vn, Vec Vd) { return ScalarFPCompareRegister(*this, sz, Vm, Vn, Vd, FPComparisonType::EQ); } diff --git a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp index 17bad70b..692008a3 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp @@ -100,6 +100,15 @@ bool TranslatorVisitor::ABS_1(Imm<2> size, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FCMEQ_zero_1(Vec Vn, Vec Vd) { + const IR::U128 operand = ir.ZeroExtendToQuad(V_scalar(16, Vn)); + const IR::U128 zero = ir.ZeroVector(); + const IR::U128 result = ir.FPVectorEqual(16, operand, zero); + + V_scalar(16, Vd, ir.VectorGetElement(16, result, 0)); + return true; +} + bool TranslatorVisitor::FCMEQ_zero_2(bool sz, Vec Vn, Vec Vd) { return ScalarFPCompareAgainstZero(*this, sz, Vn, Vd, ComparisonType::EQ); } diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index bf9c2cb8..0268ea14 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -753,6 +753,17 @@ bool TranslatorVisitor::FMLS_vec_2(bool Q, bool sz, Vec Vm, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FCMEQ_reg_3(bool Q, Vec Vm, Vec Vn, Vec Vd) { + const size_t datasize = Q ? 128 : 64; + + const IR::U128 lhs = V(datasize, Vn); + const IR::U128 rhs = V(datasize, Vm); + const IR::U128 result = ir.FPVectorEqual(16, lhs, rhs); + + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::FCMEQ_reg_4(bool Q, bool sz, Vec Vm, Vec Vn, Vec Vd) { return FPCompareRegister(*this, Q, sz, Vm, Vn, Vd, ComparisonType::EQ); } diff --git a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp index e8726c56..b8eccf4d 100644 --- a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp @@ -332,6 +332,17 @@ bool TranslatorVisitor::FABS_2(bool Q, bool sz, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FCMEQ_zero_3(bool Q, Vec Vn, Vec Vd) { + const size_t datasize = Q ? 128 : 64; + + const IR::U128 operand = V(datasize, Vn); + const IR::U128 zero = ir.ZeroVector(); + const IR::U128 result = ir.FPVectorEqual(16, operand, zero); + + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::FCMEQ_zero_4(bool Q, bool sz, Vec Vn, Vec Vd) { return FPCompareAgainstZero(*this, Q, sz, Vn, Vd, ComparisonType::EQ); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index db923f94..b6c93a3f 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2276,6 +2276,8 @@ U128 IREmitter::FPVectorDiv(size_t esize, const U128& a, const U128& b) { U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b) { switch (esize) { + case 16: + return Inst(Opcode::FPVectorEqual16, a, b); case 32: return Inst(Opcode::FPVectorEqual32, a, b); case 64: diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 2eb720df..e6a9c4ff 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -339,6 +339,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const { case Opcode::FPVectorAdd64: case Opcode::FPVectorDiv32: case Opcode::FPVectorDiv64: + case Opcode::FPVectorEqual16: case Opcode::FPVectorEqual32: case Opcode::FPVectorEqual64: case Opcode::FPVectorFromSignedFixed32: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 3b9246fa..8df11d9e 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -554,6 +554,7 @@ OPCODE(FPVectorAdd32, U128, U128 OPCODE(FPVectorAdd64, U128, U128, U128 ) OPCODE(FPVectorDiv32, U128, U128, U128 ) OPCODE(FPVectorDiv64, U128, U128, U128 ) +OPCODE(FPVectorEqual16, U128, U128, U128 ) OPCODE(FPVectorEqual32, U128, U128, U128 ) OPCODE(FPVectorEqual64, U128, U128, U128 ) OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 )