ir: Add opcode for performing polynomial multiplication
This commit is contained in:
parent
dd4ac86f8e
commit
affa312d1d
4 changed files with 28 additions and 0 deletions
|
@ -5,6 +5,7 @@
|
|||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
#include <functional>
|
||||
#include <type_traits>
|
||||
|
||||
|
@ -1865,6 +1866,27 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins
|
|||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static T PolynomialMultiply(T lhs, T rhs) {
|
||||
constexpr size_t bit_size = Common::BitSize<T>();
|
||||
const std::bitset<bit_size> operand(lhs);
|
||||
|
||||
T res = 0;
|
||||
for (size_t i = 0; i < bit_size; i++) {
|
||||
if (operand[i]) {
|
||||
res ^= rhs << i;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u8>& a, const VectorArray<u8>& b) {
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), PolynomialMultiply<u8>);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512_BITALG)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
|
|
@ -1194,6 +1194,10 @@ U128 IREmitter::VectorPairedAddUnsignedWiden(size_t original_esize, const U128&
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPolynomialMultiply(const U128& a, const U128& b) {
|
||||
return Inst<U128>(Opcode::VectorPolynomialMultiply8, a, b);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPopulationCount(const U128& a) {
|
||||
return Inst<U128>(Opcode::VectorPopulationCount, a);
|
||||
}
|
||||
|
|
|
@ -238,6 +238,7 @@ public:
|
|||
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPairedAddSignedWiden(size_t original_esize, const U128& a);
|
||||
U128 VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a);
|
||||
U128 VectorPolynomialMultiply(const U128& a, const U128& b);
|
||||
U128 VectorPopulationCount(const U128& a);
|
||||
U128 VectorReverseBits(const U128& a);
|
||||
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);
|
||||
|
|
|
@ -330,6 +330,7 @@ OPCODE(VectorPairedAdd8, T::U128, T::U128,
|
|||
OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPolynomialMultiply8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPopulationCount, T::U128, T::U128 )
|
||||
OPCODE(VectorReverseBits, T::U128, T::U128 )
|
||||
OPCODE(VectorRoundingHalvingAddS8, T::U128, T::U128, T::U128 )
|
||||
|
|
Loading…
Reference in a new issue