Merge pull request #503 from lioncash/cmp

A64: Implement half-precision variants of FCMEQ
This commit is contained in:
Merry 2019-05-30 00:58:57 +01:00 committed by MerryMage
commit 1c97edac77
13 changed files with 117 additions and 4 deletions

View file

@ -24,6 +24,8 @@ add_library(dynarmic
common/fp/info.h
common/fp/mantissa_util.h
common/fp/op.h
common/fp/op/FPCompare.cpp
common/fp/op/FPCompare.h
common/fp/op/FPConvert.cpp
common/fp/op/FPConvert.h
common/fp/op/FPMulAdd.cpp

View file

@ -540,6 +540,14 @@ void EmitX64::EmitFPVectorDiv64(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::divpd);
}
void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpFallback(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u16>& op1, const VectorArray<u16>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) {
for (size_t i = 0; i < result.size(); i++) {
result[i] = FP::FPCompareEQ(op1[i], op2[i], fpcr, fpsr) ? 0xFFFF : 0;
}
});
}
void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);

View file

@ -6,6 +6,7 @@
#pragma once
#include "common/fp/op/FPCompare.h"
#include "common/fp/op/FPConvert.h"
#include "common/fp/op/FPMulAdd.h"
#include "common/fp/op/FPRecipEstimate.h"

View file

@ -0,0 +1,41 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2019 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "common/fp/fpcr.h"
#include "common/fp/fpsr.h"
#include "common/fp/op/FPCompare.h"
#include "common/fp/process_exception.h"
#include "common/fp/unpacked.h"
namespace Dynarmic::FP {
template <typename FPT>
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) {
const auto unpacked1 = FPUnpack(lhs, fpcr, fpsr);
const auto unpacked2 = FPUnpack(rhs, fpcr, fpsr);
const auto type1 = std::get<FPType>(unpacked1);
const auto type2 = std::get<FPType>(unpacked2);
const auto& value1 = std::get<FPUnpacked>(unpacked1);
const auto& value2 = std::get<FPUnpacked>(unpacked2);
if (type1 == FPType::QNaN || type1 == FPType::SNaN ||
type2 == FPType::QNaN || type2 == FPType::SNaN) {
if (type1 == FPType::SNaN || type2 == FPType::SNaN) {
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
}
// Comparisons against NaN are never equal.
return false;
}
return value1 == value2 || (type1 == FPType::Zero && type2 == FPType::Zero);
}
template bool FPCompareEQ<u16>(u16 lhs, u16 rhs, FPCR fpcr, FPSR& fpsr);
template bool FPCompareEQ<u32>(u32 lhs, u32 rhs, FPCR fpcr, FPSR& fpsr);
template bool FPCompareEQ<u64>(u64 lhs, u64 rhs, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP

View file

@ -0,0 +1,17 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2019 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
namespace Dynarmic::FP {
class FPCR;
class FPSR;
template <typename FPT>
bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP

View file

@ -382,7 +382,7 @@ INST(DUP_elt_1, "DUP (element)", "01011
// Data Processing - FP and SIMD - Scalar three
//INST(FMULX_vec_1, "FMULX", "01011110010mmmmm000111nnnnnddddd")
INST(FMULX_vec_2, "FMULX", "010111100z1mmmmm110111nnnnnddddd")
//INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd")
INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd")
INST(FCMEQ_reg_2, "FCMEQ (register)", "010111100z1mmmmm111001nnnnnddddd")
INST(FRECPS_1, "FRECPS", "01011110010mmmmm001111nnnnnddddd")
INST(FRECPS_2, "FRECPS", "010111100z1mmmmm111111nnnnnddddd")
@ -410,7 +410,7 @@ INST(FCVTAS_2, "FCVTAS (vector)", "01011
INST(SCVTF_int_2, "SCVTF (vector, integer)", "010111100z100001110110nnnnnddddd")
//INST(FCMGT_zero_1, "FCMGT (zero)", "0101111011111000110010nnnnnddddd")
INST(FCMGT_zero_2, "FCMGT (zero)", "010111101z100000110010nnnnnddddd")
//INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd")
INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd")
INST(FCMEQ_zero_2, "FCMEQ (zero)", "010111101z100000110110nnnnnddddd")
//INST(FCMLT_1, "FCMLT (zero)", "0101111011111000111010nnnnnddddd")
INST(FCMLT_2, "FCMLT (zero)", "010111101z100000111010nnnnnddddd")
@ -574,7 +574,7 @@ INST(INS_elt, "INS (element)", "01101
// Data Processing - FP and SIMD - SIMD Three same
//INST(FMULX_vec_3, "FMULX", "0Q001110010mmmmm000111nnnnnddddd")
//INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd")
INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd")
INST(FRECPS_3, "FRECPS", "0Q001110010mmmmm001111nnnnnddddd")
INST(FRSQRTS_3, "FRSQRTS", "0Q001110110mmmmm001111nnnnnddddd")
//INST(FCMGE_reg_3, "FCMGE (register)", "0Q101110010mmmmm001001nnnnnddddd")
@ -635,7 +635,7 @@ INST(FCVTAS_4, "FCVTAS (vector)", "0Q001
INST(SCVTF_int_4, "SCVTF (vector, integer)", "0Q0011100z100001110110nnnnnddddd")
//INST(FCMGT_zero_3, "FCMGT (zero)", "0Q00111011111000110010nnnnnddddd")
INST(FCMGT_zero_4, "FCMGT (zero)", "0Q0011101z100000110010nnnnnddddd")
//INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd")
INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd")
INST(FCMEQ_zero_4, "FCMEQ (zero)", "0Q0011101z100000110110nnnnnddddd")
//INST(FCMLT_3, "FCMLT (zero)", "0Q00111011111000111010nnnnnddddd")
INST(FCMLT_4, "FCMLT (zero)", "0Q0011101z100000111010nnnnnddddd")

View file

@ -346,6 +346,15 @@ bool TranslatorVisitor::FACGT_2(bool sz, Vec Vm, Vec Vn, Vec Vd) {
return ScalarFPCompareRegister(*this, sz, Vm, Vn, Vd, FPComparisonType::AbsoluteGT);
}
bool TranslatorVisitor::FCMEQ_reg_1(Vec Vm, Vec Vn, Vec Vd) {
const IR::U128 lhs = V(128, Vn);
const IR::U128 rhs = V(128, Vm);
const IR::U128 result = ir.FPVectorEqual(16, lhs, rhs);
V_scalar(16, Vd, ir.VectorGetElement(16, result, 0));
return true;
}
bool TranslatorVisitor::FCMEQ_reg_2(bool sz, Vec Vm, Vec Vn, Vec Vd) {
return ScalarFPCompareRegister(*this, sz, Vm, Vn, Vd, FPComparisonType::EQ);
}

View file

@ -100,6 +100,15 @@ bool TranslatorVisitor::ABS_1(Imm<2> size, Vec Vn, Vec Vd) {
return true;
}
bool TranslatorVisitor::FCMEQ_zero_1(Vec Vn, Vec Vd) {
const IR::U128 operand = ir.ZeroExtendToQuad(V_scalar(16, Vn));
const IR::U128 zero = ir.ZeroVector();
const IR::U128 result = ir.FPVectorEqual(16, operand, zero);
V_scalar(16, Vd, ir.VectorGetElement(16, result, 0));
return true;
}
bool TranslatorVisitor::FCMEQ_zero_2(bool sz, Vec Vn, Vec Vd) {
return ScalarFPCompareAgainstZero(*this, sz, Vn, Vd, ComparisonType::EQ);
}

View file

@ -753,6 +753,17 @@ bool TranslatorVisitor::FMLS_vec_2(bool Q, bool sz, Vec Vm, Vec Vn, Vec Vd) {
return true;
}
bool TranslatorVisitor::FCMEQ_reg_3(bool Q, Vec Vm, Vec Vn, Vec Vd) {
const size_t datasize = Q ? 128 : 64;
const IR::U128 lhs = V(datasize, Vn);
const IR::U128 rhs = V(datasize, Vm);
const IR::U128 result = ir.FPVectorEqual(16, lhs, rhs);
V(datasize, Vd, result);
return true;
}
bool TranslatorVisitor::FCMEQ_reg_4(bool Q, bool sz, Vec Vm, Vec Vn, Vec Vd) {
return FPCompareRegister(*this, Q, sz, Vm, Vn, Vd, ComparisonType::EQ);
}

View file

@ -332,6 +332,17 @@ bool TranslatorVisitor::FABS_2(bool Q, bool sz, Vec Vn, Vec Vd) {
return true;
}
bool TranslatorVisitor::FCMEQ_zero_3(bool Q, Vec Vn, Vec Vd) {
const size_t datasize = Q ? 128 : 64;
const IR::U128 operand = V(datasize, Vn);
const IR::U128 zero = ir.ZeroVector();
const IR::U128 result = ir.FPVectorEqual(16, operand, zero);
V(datasize, Vd, result);
return true;
}
bool TranslatorVisitor::FCMEQ_zero_4(bool Q, bool sz, Vec Vn, Vec Vd) {
return FPCompareAgainstZero(*this, Q, sz, Vn, Vd, ComparisonType::EQ);
}

View file

@ -2276,6 +2276,8 @@ U128 IREmitter::FPVectorDiv(size_t esize, const U128& a, const U128& b) {
U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 16:
return Inst<U128>(Opcode::FPVectorEqual16, a, b);
case 32:
return Inst<U128>(Opcode::FPVectorEqual32, a, b);
case 64:

View file

@ -339,6 +339,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
case Opcode::FPVectorAdd64:
case Opcode::FPVectorDiv32:
case Opcode::FPVectorDiv64:
case Opcode::FPVectorEqual16:
case Opcode::FPVectorEqual32:
case Opcode::FPVectorEqual64:
case Opcode::FPVectorFromSignedFixed32:

View file

@ -554,6 +554,7 @@ OPCODE(FPVectorAdd32, U128, U128
OPCODE(FPVectorAdd64, U128, U128, U128 )
OPCODE(FPVectorDiv32, U128, U128, U128 )
OPCODE(FPVectorDiv64, U128, U128, U128 )
OPCODE(FPVectorEqual16, U128, U128, U128 )
OPCODE(FPVectorEqual32, U128, U128, U128 )
OPCODE(FPVectorEqual64, U128, U128, U128 )
OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 )