frontend/ir_emitter: Add half-precision variant of FPVectorRoundInt
This commit is contained in:
parent
ad0c698f89
commit
5b4673da4b
3 changed files with 34 additions and 22 deletions
|
@ -1160,28 +1160,30 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||||
const bool exact = inst->GetArg(2).GetU1();
|
const bool exact = inst->GetArg(2).GetU1();
|
||||||
|
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero && !exact) {
|
if constexpr (fsize != 16) {
|
||||||
const u8 round_imm = [&]() -> u8 {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero && !exact) {
|
||||||
switch (rounding) {
|
const u8 round_imm = [&]() -> u8 {
|
||||||
case FP::RoundingMode::ToNearest_TieEven:
|
switch (rounding) {
|
||||||
return 0b00;
|
case FP::RoundingMode::ToNearest_TieEven:
|
||||||
case FP::RoundingMode::TowardsPlusInfinity:
|
return 0b00;
|
||||||
return 0b10;
|
case FP::RoundingMode::TowardsPlusInfinity:
|
||||||
case FP::RoundingMode::TowardsMinusInfinity:
|
return 0b10;
|
||||||
return 0b01;
|
case FP::RoundingMode::TowardsMinusInfinity:
|
||||||
case FP::RoundingMode::TowardsZero:
|
return 0b01;
|
||||||
return 0b11;
|
case FP::RoundingMode::TowardsZero:
|
||||||
default:
|
return 0b11;
|
||||||
UNREACHABLE();
|
default:
|
||||||
}
|
UNREACHABLE();
|
||||||
return 0;
|
}
|
||||||
}();
|
return 0;
|
||||||
|
}();
|
||||||
|
|
||||||
EmitTwoOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){
|
EmitTwoOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){
|
||||||
FCODE(roundp)(result, xmm_a, round_imm);
|
FCODE(roundp)(result, xmm_a, round_imm);
|
||||||
});
|
});
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
using rounding_list = mp::list<
|
using rounding_list = mp::list<
|
||||||
|
@ -1218,6 +1220,10 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorRoundInt16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPVectorRoundInt<16>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorRoundInt32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorRoundInt32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitFPVectorRoundInt<32>(code, ctx, inst);
|
EmitFPVectorRoundInt<32>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2278,11 +2278,16 @@ U128 IREmitter::FPVectorRecipStepFused(size_t esize, const U128& a, const U128&
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact) {
|
U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact) {
|
||||||
|
const IR::U8 rounding_imm = Imm8(static_cast<u8>(rounding));
|
||||||
|
const IR::U1 exact_imm = Imm1(exact);
|
||||||
|
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::FPVectorRoundInt16, operand, rounding_imm, exact_imm);
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorRoundInt32, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
|
return Inst<U128>(Opcode::FPVectorRoundInt32, operand, rounding_imm, exact_imm);
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorRoundInt64, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
|
return Inst<U128>(Opcode::FPVectorRoundInt64, operand, rounding_imm, exact_imm);
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return {};
|
return {};
|
||||||
|
|
|
@ -574,6 +574,7 @@ OPCODE(FPVectorRecipEstimate32, U128, U128
|
||||||
OPCODE(FPVectorRecipEstimate64, U128, U128 )
|
OPCODE(FPVectorRecipEstimate64, U128, U128 )
|
||||||
OPCODE(FPVectorRecipStepFused32, U128, U128, U128 )
|
OPCODE(FPVectorRecipStepFused32, U128, U128, U128 )
|
||||||
OPCODE(FPVectorRecipStepFused64, U128, U128, U128 )
|
OPCODE(FPVectorRecipStepFused64, U128, U128, U128 )
|
||||||
|
OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 )
|
||||||
OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
|
OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
|
||||||
OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
|
OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
|
||||||
OPCODE(FPVectorRSqrtEstimate32, U128, U128 )
|
OPCODE(FPVectorRSqrtEstimate32, U128, U128 )
|
||||||
|
|
Loading…
Reference in a new issue