IR: Initial implementation of FPVectorRoundInt
This commit is contained in:
parent
f2393488fe
commit
f976c47008
4 changed files with 64 additions and 0 deletions
|
@ -24,6 +24,7 @@
|
|||
#include "common/mp/list.h"
|
||||
#include "common/mp/lut.h"
|
||||
#include "common/mp/to_tuple.h"
|
||||
#include "common/mp/vlift.h"
|
||||
#include "common/mp/vllift.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
|
@ -728,6 +729,55 @@ void EmitX64::EmitFPVectorRecipStepFused64(EmitContext& ctx, IR::Inst* inst) {
|
|||
EmitRecipStepFused<u64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<size_t fsize>
|
||||
void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||
|
||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
const bool exact = inst->GetArg(2).GetU1();
|
||||
|
||||
using rounding_list = mp::list<
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
|
||||
>;
|
||||
using exact_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
|
||||
|
||||
using key_type = std::tuple<FP::RoundingMode, bool>;
|
||||
using value_type = void(*)(VectorArray<FPT>&, const VectorArray<FPT>&, FP::FPCR, FP::FPSR&);
|
||||
|
||||
static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
|
||||
[](auto arg) {
|
||||
return std::pair<key_type, value_type>{
|
||||
mp::to_tuple<decltype(arg)>,
|
||||
static_cast<value_type>(
|
||||
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
constexpr FP::RoundingMode rounding_mode = std::get<0>(mp::to_tuple<decltype(arg)>);
|
||||
constexpr bool exact = std::get<1>(mp::to_tuple<decltype(arg)>);
|
||||
|
||||
for (size_t i = 0; i < output.size(); ++i) {
|
||||
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
|
||||
}
|
||||
}
|
||||
)
|
||||
};
|
||||
},
|
||||
mp::cartesian_product<rounding_list, exact_list>{}
|
||||
);
|
||||
|
||||
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorRoundInt32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPVectorRoundInt<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorRoundInt64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPVectorRoundInt<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<typename FPT>
|
||||
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
|
|
|
@ -1932,6 +1932,17 @@ U128 IREmitter::FPVectorRecipStepFused(size_t esize, const U128& a, const U128&
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact) {
|
||||
switch (esize) {
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::FPVectorRoundInt32, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::FPVectorRoundInt64, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
|
||||
switch (esize) {
|
||||
case 32:
|
||||
|
|
|
@ -320,6 +320,7 @@ public:
|
|||
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorRecipEstimate(size_t esize, const U128& a);
|
||||
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);
|
||||
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
|
||||
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorS32ToSingle(const U128& a);
|
||||
|
|
|
@ -491,6 +491,8 @@ OPCODE(FPVectorRecipEstimate32, T::U128, T::U128
|
|||
OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRecipStepFused32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRecipStepFused64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRoundInt32, T::U128, T::U128, T::U8, T::U1 )
|
||||
OPCODE(FPVectorRoundInt64, T::U128, T::U128, T::U8, T::U1 )
|
||||
OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 )
|
||||
OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 )
|
||||
|
|
Loading…
Reference in a new issue