Merge pull request #477 from lioncash/rsqrt
A64: Handle half-precision variants of FRSQRTE
This commit is contained in:
commit
554c8c27c6
10 changed files with 53 additions and 10 deletions
|
@ -920,6 +920,10 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
|
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPRSqrtEstimate16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPRSqrtEstimate<u16>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPRSqrtEstimate<u32>(code, ctx, inst);
|
EmitFPRSqrtEstimate<u32>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1241,6 +1241,10 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorRSqrtEstimate16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitRSqrtEstimate<u16>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitRSqrtEstimate<u32>(code, ctx, inst);
|
EmitRSqrtEstimate<u32>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ namespace Dynarmic::FP {
|
||||||
|
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
|
FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
|
||||||
auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
|
const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
|
||||||
|
|
||||||
if (type == FPType::SNaN || type == FPType::QNaN) {
|
if (type == FPType::SNaN || type == FPType::QNaN) {
|
||||||
return FPProcessNaN(type, op, fpcr, fpsr);
|
return FPProcessNaN(type, op, fpcr, fpsr);
|
||||||
|
@ -27,16 +27,16 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
|
||||||
|
|
||||||
if (type == FPType::Zero) {
|
if (type == FPType::Zero) {
|
||||||
FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
|
FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
|
||||||
return FPInfo<FPT>::Infinity(sign);
|
return FPT(FPInfo<FPT>::Infinity(sign));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sign) {
|
if (sign) {
|
||||||
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
||||||
return FPInfo<FPT>::DefaultNaN();
|
return FPT(FPInfo<FPT>::DefaultNaN());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == FPType::Infinity) {
|
if (type == FPType::Infinity) {
|
||||||
return FPInfo<FPT>::Zero(false);
|
return FPT(FPInfo<FPT>::Zero(false));
|
||||||
}
|
}
|
||||||
|
|
||||||
const int result_exponent = (-(value.exponent + 1)) >> 1;
|
const int result_exponent = (-(value.exponent + 1)) >> 1;
|
||||||
|
@ -50,6 +50,7 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
|
||||||
return (bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask);
|
return (bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template u16 FPRSqrtEstimate<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
|
||||||
template u32 FPRSqrtEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
|
template u32 FPRSqrtEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
|
||||||
template u64 FPRSqrtEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
|
template u64 FPRSqrtEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
|
|
|
@ -438,7 +438,7 @@ INST(FCMLE_2, "FCMLE (zero)", "01111
|
||||||
INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd")
|
INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd")
|
||||||
//INST(FCVTZU_int_1, "FCVTZU (vector, integer)", "0111111011111001101110nnnnnddddd")
|
//INST(FCVTZU_int_1, "FCVTZU (vector, integer)", "0111111011111001101110nnnnnddddd")
|
||||||
INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd")
|
INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd")
|
||||||
//INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd")
|
INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd")
|
||||||
INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd")
|
INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd")
|
||||||
|
|
||||||
// Data Processing - FP and SIMD - Scalar three same extra
|
// Data Processing - FP and SIMD - Scalar three same extra
|
||||||
|
@ -692,7 +692,7 @@ INST(FCVTPU_4, "FCVTPU (vector)", "0Q101
|
||||||
//INST(FCVTZU_int_3, "FCVTZU (vector, integer)", "0Q10111011111001101110nnnnnddddd")
|
//INST(FCVTZU_int_3, "FCVTZU (vector, integer)", "0Q10111011111001101110nnnnnddddd")
|
||||||
INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd")
|
INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd")
|
||||||
INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd")
|
INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd")
|
||||||
//INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd")
|
INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd")
|
||||||
INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd")
|
INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd")
|
||||||
//INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd")
|
//INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd")
|
||||||
INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd")
|
INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd")
|
||||||
|
|
|
@ -200,6 +200,16 @@ bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::FRSQRTE_1(Vec Vn, Vec Vd) {
|
||||||
|
const size_t esize = 16;
|
||||||
|
|
||||||
|
const IR::U16 operand = V_scalar(esize, Vn);
|
||||||
|
const IR::U16 result = ir.FPRSqrtEstimate(operand);
|
||||||
|
|
||||||
|
V_scalar(esize, Vd, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) {
|
||||||
const size_t esize = sz ? 64 : 32;
|
const size_t esize = sz ? 64 : 32;
|
||||||
|
|
||||||
|
|
|
@ -548,6 +548,17 @@ bool TranslatorVisitor::FSQRT_2(bool Q, bool sz, Vec Vn, Vec Vd) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::FRSQRTE_3(bool Q, Vec Vn, Vec Vd) {
|
||||||
|
const size_t datasize = Q ? 128 : 64;
|
||||||
|
const size_t esize = 16;
|
||||||
|
|
||||||
|
const IR::U128 operand = V(datasize, Vn);
|
||||||
|
const IR::U128 result = ir.FPVectorRSqrtEstimate(esize, operand);
|
||||||
|
|
||||||
|
V(datasize, Vd, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::FRSQRTE_4(bool Q, bool sz, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::FRSQRTE_4(bool Q, bool sz, Vec Vn, Vec Vd) {
|
||||||
if (sz && !Q) {
|
if (sz && !Q) {
|
||||||
return ReservedValue();
|
return ReservedValue();
|
||||||
|
|
|
@ -1967,11 +1967,18 @@ U16U32U64 IREmitter::FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, b
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32U64 IREmitter::FPRSqrtEstimate(const U32U64& a) {
|
U16U32U64 IREmitter::FPRSqrtEstimate(const U16U32U64& a) {
|
||||||
if (a.GetType() == Type::U32) {
|
switch (a.GetType()) {
|
||||||
|
case Type::U16:
|
||||||
|
return Inst<U16>(Opcode::FPRSqrtEstimate16, a);
|
||||||
|
case Type::U32:
|
||||||
return Inst<U32>(Opcode::FPRSqrtEstimate32, a);
|
return Inst<U32>(Opcode::FPRSqrtEstimate32, a);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::FPRSqrtEstimate64, a);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return U16U32U64{};
|
||||||
}
|
}
|
||||||
return Inst<U64>(Opcode::FPRSqrtEstimate64, a);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
U32U64 IREmitter::FPRSqrtStepFused(const U32U64& a, const U32U64& b) {
|
U32U64 IREmitter::FPRSqrtStepFused(const U32U64& a, const U32U64& b) {
|
||||||
|
@ -2295,6 +2302,8 @@ U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::Rounding
|
||||||
|
|
||||||
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
|
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::FPVectorRSqrtEstimate16, a);
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorRSqrtEstimate32, a);
|
return Inst<U128>(Opcode::FPVectorRSqrtEstimate32, a);
|
||||||
case 64:
|
case 64:
|
||||||
|
|
|
@ -309,7 +309,7 @@ public:
|
||||||
U16U32U64 FPRecipExponent(const U16U32U64& a);
|
U16U32U64 FPRecipExponent(const U16U32U64& a);
|
||||||
U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b);
|
U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b);
|
||||||
U16U32U64 FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, bool exact);
|
U16U32U64 FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, bool exact);
|
||||||
U32U64 FPRSqrtEstimate(const U32U64& a);
|
U16U32U64 FPRSqrtEstimate(const U16U32U64& a);
|
||||||
U32U64 FPRSqrtStepFused(const U32U64& a, const U32U64& b);
|
U32U64 FPRSqrtStepFused(const U32U64& a, const U32U64& b);
|
||||||
U32U64 FPSqrt(const U32U64& a);
|
U32U64 FPSqrt(const U32U64& a);
|
||||||
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpcr_controlled);
|
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpcr_controlled);
|
||||||
|
|
|
@ -282,6 +282,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
|
||||||
case Opcode::FPRoundInt16:
|
case Opcode::FPRoundInt16:
|
||||||
case Opcode::FPRoundInt32:
|
case Opcode::FPRoundInt32:
|
||||||
case Opcode::FPRoundInt64:
|
case Opcode::FPRoundInt64:
|
||||||
|
case Opcode::FPRSqrtEstimate16:
|
||||||
case Opcode::FPRSqrtEstimate32:
|
case Opcode::FPRSqrtEstimate32:
|
||||||
case Opcode::FPRSqrtEstimate64:
|
case Opcode::FPRSqrtEstimate64:
|
||||||
case Opcode::FPRSqrtStepFused32:
|
case Opcode::FPRSqrtStepFused32:
|
||||||
|
@ -342,6 +343,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
|
||||||
case Opcode::FPVectorRoundInt16:
|
case Opcode::FPVectorRoundInt16:
|
||||||
case Opcode::FPVectorRoundInt32:
|
case Opcode::FPVectorRoundInt32:
|
||||||
case Opcode::FPVectorRoundInt64:
|
case Opcode::FPVectorRoundInt64:
|
||||||
|
case Opcode::FPVectorRSqrtEstimate16:
|
||||||
case Opcode::FPVectorRSqrtEstimate32:
|
case Opcode::FPVectorRSqrtEstimate32:
|
||||||
case Opcode::FPVectorRSqrtEstimate64:
|
case Opcode::FPVectorRSqrtEstimate64:
|
||||||
case Opcode::FPVectorRSqrtStepFused32:
|
case Opcode::FPVectorRSqrtStepFused32:
|
||||||
|
|
|
@ -501,6 +501,7 @@ OPCODE(FPRecipStepFused64, U64, U64,
|
||||||
OPCODE(FPRoundInt16, U16, U16, U8, U1 )
|
OPCODE(FPRoundInt16, U16, U16, U8, U1 )
|
||||||
OPCODE(FPRoundInt32, U32, U32, U8, U1 )
|
OPCODE(FPRoundInt32, U32, U32, U8, U1 )
|
||||||
OPCODE(FPRoundInt64, U64, U64, U8, U1 )
|
OPCODE(FPRoundInt64, U64, U64, U8, U1 )
|
||||||
|
OPCODE(FPRSqrtEstimate16, U16, U16 )
|
||||||
OPCODE(FPRSqrtEstimate32, U32, U32 )
|
OPCODE(FPRSqrtEstimate32, U32, U32 )
|
||||||
OPCODE(FPRSqrtEstimate64, U64, U64 )
|
OPCODE(FPRSqrtEstimate64, U64, U64 )
|
||||||
OPCODE(FPRSqrtStepFused32, U32, U32, U32 )
|
OPCODE(FPRSqrtStepFused32, U32, U32, U32 )
|
||||||
|
@ -577,6 +578,7 @@ OPCODE(FPVectorRecipStepFused64, U128, U128
|
||||||
OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 )
|
OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 )
|
||||||
OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
|
OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
|
||||||
OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
|
OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
|
||||||
|
OPCODE(FPVectorRSqrtEstimate16, U128, U128 )
|
||||||
OPCODE(FPVectorRSqrtEstimate32, U128, U128 )
|
OPCODE(FPVectorRSqrtEstimate32, U128, U128 )
|
||||||
OPCODE(FPVectorRSqrtEstimate64, U128, U128 )
|
OPCODE(FPVectorRSqrtEstimate64, U128, U128 )
|
||||||
OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 )
|
OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 )
|
||||||
|
|
Loading…
Reference in a new issue