ir: Add opcode for performing polynomial multiplication

This commit is contained in:
Lioncash 2018-07-26 03:40:09 -04:00 committed by MerryMage
parent dd4ac86f8e
commit affa312d1d
4 changed files with 28 additions and 0 deletions

View file

@ -5,6 +5,7 @@
*/
#include <algorithm>
#include <bitset>
#include <functional>
#include <type_traits>
@ -1865,6 +1866,27 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins
ctx.reg_alloc.DefineValue(inst, a);
}
template <typename T>
static T PolynomialMultiply(T lhs, T rhs) {
constexpr size_t bit_size = Common::BitSize<T>();
const std::bitset<bit_size> operand(lhs);
T res = 0;
for (size_t i = 0; i < bit_size; i++) {
if (operand[i]) {
res ^= rhs << i;
}
}
return res;
}
void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u8>& a, const VectorArray<u8>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), PolynomialMultiply<u8>);
});
}
void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512_BITALG)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View file

@ -1194,6 +1194,10 @@ U128 IREmitter::VectorPairedAddUnsignedWiden(size_t original_esize, const U128&
return {};
}
U128 IREmitter::VectorPolynomialMultiply(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorPolynomialMultiply8, a, b);
}
U128 IREmitter::VectorPopulationCount(const U128& a) {
return Inst<U128>(Opcode::VectorPopulationCount, a);
}

View file

@ -238,6 +238,7 @@ public:
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
U128 VectorPairedAddSignedWiden(size_t original_esize, const U128& a);
U128 VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a);
U128 VectorPolynomialMultiply(const U128& a, const U128& b);
U128 VectorPopulationCount(const U128& a);
U128 VectorReverseBits(const U128& a);
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);

View file

@ -330,6 +330,7 @@ OPCODE(VectorPairedAdd8, T::U128, T::U128,
OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
OPCODE(VectorPolynomialMultiply8, T::U128, T::U128, T::U128 )
OPCODE(VectorPopulationCount, T::U128, T::U128 )
OPCODE(VectorReverseBits, T::U128, T::U128 )
OPCODE(VectorRoundingHalvingAddS8, T::U128, T::U128, T::U128 )