IR: Initial implementation of FPVectorRoundInt

This commit is contained in:
MerryMage 2018-07-30 13:31:51 +01:00
parent f2393488fe
commit f976c47008
4 changed files with 64 additions and 0 deletions

View file

@ -24,6 +24,7 @@
#include "common/mp/list.h"
#include "common/mp/lut.h"
#include "common/mp/to_tuple.h"
#include "common/mp/vlift.h"
#include "common/mp/vllift.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"
@ -728,6 +729,55 @@ void EmitX64::EmitFPVectorRecipStepFused64(EmitContext& ctx, IR::Inst* inst) {
EmitRecipStepFused<u64>(code, ctx, inst);
}
template<size_t fsize>
void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>;
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
const bool exact = inst->GetArg(2).GetU1();
using rounding_list = mp::list<
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
>;
using exact_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
using key_type = std::tuple<FP::RoundingMode, bool>;
using value_type = void(*)(VectorArray<FPT>&, const VectorArray<FPT>&, FP::FPCR, FP::FPSR&);
static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
[](auto arg) {
return std::pair<key_type, value_type>{
mp::to_tuple<decltype(arg)>,
static_cast<value_type>(
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
constexpr FP::RoundingMode rounding_mode = std::get<0>(mp::to_tuple<decltype(arg)>);
constexpr bool exact = std::get<1>(mp::to_tuple<decltype(arg)>);
for (size_t i = 0; i < output.size(); ++i) {
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
}
}
)
};
},
mp::cartesian_product<rounding_list, exact_list>{}
);
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
}
void EmitX64::EmitFPVectorRoundInt32(EmitContext& ctx, IR::Inst* inst) {
EmitFPVectorRoundInt<32>(code, ctx, inst);
}
void EmitX64::EmitFPVectorRoundInt64(EmitContext& ctx, IR::Inst* inst) {
EmitFPVectorRoundInt<64>(code, ctx, inst);
}
template<typename FPT>
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {

View file

@ -1932,6 +1932,17 @@ U128 IREmitter::FPVectorRecipStepFused(size_t esize, const U128& a, const U128&
return {};
}
U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact) {
switch (esize) {
case 32:
return Inst<U128>(Opcode::FPVectorRoundInt32, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
case 64:
return Inst<U128>(Opcode::FPVectorRoundInt64, operand, Imm8(static_cast<u8>(rounding)), Imm1(exact));
}
UNREACHABLE();
return {};
}
U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) {
switch (esize) {
case 32:

View file

@ -320,6 +320,7 @@ public:
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
U128 FPVectorRecipEstimate(size_t esize, const U128& a);
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b);
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a);
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b);
U128 FPVectorS32ToSingle(const U128& a);

View file

@ -491,6 +491,8 @@ OPCODE(FPVectorRecipEstimate32, T::U128, T::U128
OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 )
OPCODE(FPVectorRecipStepFused32, T::U128, T::U128, T::U128 )
OPCODE(FPVectorRecipStepFused64, T::U128, T::U128, T::U128 )
OPCODE(FPVectorRoundInt32, T::U128, T::U128, T::U8, T::U1 )
OPCODE(FPVectorRoundInt64, T::U128, T::U128, T::U8, T::U1 )
OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 )
OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 )
OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 )