IR: Remove VectorShuffleWords

Introduce VectorRotateWholeVectorRight
This commit is contained in:
Merry 2022-08-06 18:40:28 +01:00 committed by merry
parent 8fb37e0e4f
commit f92cb5e66f
9 changed files with 41 additions and 40 deletions

View file

@ -1321,6 +1321,15 @@ void EmitIR<IR::Opcode::VectorReduceAdd64>(oaknut::CodeGenerator& code, EmitCont
EmitReduce<64>(code, ctx, inst, [&](auto& Dresult, auto Voperand) { code.ADDP(Dresult, Voperand); });
}
template<>
void EmitIR<IR::Opcode::VectorRotateWholeVectorRight>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitImmShift<8>(code, ctx, inst, [&](auto Vresult, auto Voperand, u8 shift_amount) {
ASSERT(shift_amount % 8 == 0);
const u8 ext_imm = (shift_amount % 128) / 8;
code.EXT(Vresult, Voperand, Voperand, ext_imm);
});
}
template<>
void EmitIR<IR::Opcode::VectorRoundingHalvingAddS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SRHADD(Vresult, Va, Vb); });
@ -1391,14 +1400,6 @@ void EmitIR<IR::Opcode::VectorRoundingShiftLeftU64>(oaknut::CodeGenerator& code,
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.URSHL(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorShuffleWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorSignExtend8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArrangedWiden<8>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.SXTL(Vresult, Voperand); });

View file

@ -4,6 +4,7 @@
*/
#include <algorithm>
#include <bit>
#include <bitset>
#include <cstdlib>
#include <type_traits>
@ -3320,6 +3321,20 @@ void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, data);
}
void EmitX64::EmitVectorRotateWholeVectorRight(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const u8 shift_amount = args[1].GetImmediateU8();
ASSERT(shift_amount % 32 == 0);
const u8 shuffle_imm = std::rotr<u8>(0b11100100, shift_amount / 32 * 2);
code.pshufd(result, operand, shuffle_imm);
ctx.reg_alloc.DefineValue(inst, result);
}
static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -3501,22 +3516,6 @@ void EmitX64::EmitVectorRoundingShiftLeftU64(EmitContext& ctx, IR::Inst* inst) {
});
}
static void VectorShuffleImpl(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx&, const Xbyak::Operand&, u8)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const u8 mask = args[1].GetImmediateU8();
(code.*fn)(result, operand, mask);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitVectorShuffleWords(EmitContext& ctx, IR::Inst* inst) {
VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshufd);
}
void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::SSE41)) {

View file

@ -103,7 +103,7 @@ bool TranslatorVisitor::ZIP2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
// TODO: Urgh.
const IR::U128 interleaved = ir.VectorInterleaveLower(esize, operand1, operand2);
return ir.VectorZeroUpper(ir.VectorShuffleWords(interleaved, 0b01001110));
return ir.VectorZeroUpper(ir.VectorRotateWholeVectorRight(interleaved, 64));
}();
V(datasize, Vd, result);

View file

@ -39,7 +39,7 @@ IR::U128 SHA1HashUpdate(IREmitter& ir, Vec Vm, Vec Vn, Vec Vd, SHA1HashUpdateFun
// Move each 32-bit element to the left once
// e.g. [3, 2, 1, 0], becomes [2, 1, 0, 3]
const IR::U128 shuffled_x = ir.VectorShuffleWords(x, 0b10010011);
const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96);
x = ir.VectorSetElement(32, shuffled_x, 0, y);
y = high_x;
}
@ -91,7 +91,7 @@ bool TranslatorVisitor::SHA1SU1(Vec Vn, Vec Vd) {
const IR::U128 n = ir.GetQ(Vn);
// Shuffle down the whole vector and zero out the top 32 bits
const IR::U128 shuffled_n = ir.VectorSetElement(32, ir.VectorShuffleWords(n, 0b00111001), 3, ir.Imm32(0));
const IR::U128 shuffled_n = ir.VectorSetElement(32, ir.VectorRotateWholeVectorRight(n, 32), 3, ir.Imm32(0));
const IR::U128 t = ir.VectorEor(d, shuffled_n);
const IR::U128 rotated_t = ir.VectorRotateLeft(32, t, 1);

View file

@ -141,7 +141,7 @@ IR::U128 SM4Hash(IREmitter& ir, Vec Vn, Vec Vd, SM4RotationType type) {
const IR::U32 intval_low_word = ir.VectorGetElement(32, intval_vec, 0);
const IR::U32 round_result_low_word = ir.VectorGetElement(32, roundresult, 0);
const IR::U32 intval = SM4Rotation(ir, intval_low_word, round_result_low_word, type);
roundresult = ir.VectorShuffleWords(roundresult, 0b00111001);
roundresult = ir.VectorRotateWholeVectorRight(roundresult, 32);
roundresult = ir.VectorSetElement(32, roundresult, 3, intval);
}
@ -235,7 +235,7 @@ bool TranslatorVisitor::SM3PARTW1(Vec Vm, Vec Vn, Vec Vd) {
const IR::U128 result_low_three_words = [&] {
// Move the top-most 3 words down one element (i.e. [3, 2, 1, 0] -> [0, 3, 2, 1])
const IR::U128 shuffled_m = ir.VectorShuffleWords(m, 0b00111001);
const IR::U128 shuffled_m = ir.VectorRotateWholeVectorRight(m, 32);
// We treat the uppermost word as junk data and don't touch/use it explicitly for now.
// Given we don't do anything with it yet, the fact we EOR into it doesn't matter.

View file

@ -1695,6 +1695,11 @@ U128 IREmitter::VectorRotateRight(size_t esize, const U128& a, u8 amount) {
VectorLogicalShiftLeft(esize, a, static_cast<u8>(esize - amount)));
}
U128 IREmitter::VectorRotateWholeVectorRight(const U128& a, u8 amount) {
ASSERT(amount % 32 == 0);
return Inst<U128>(Opcode::VectorRotateWholeVectorRight, a, Imm8(amount));
}
U128 IREmitter::VectorRoundingHalvingAddSigned(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
@ -1751,10 +1756,6 @@ U128 IREmitter::VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, con
UNREACHABLE();
}
U128 IREmitter::VectorShuffleWords(const U128& a, u8 mask) {
return Inst<U128>(Opcode::VectorShuffleWords, a, mask);
}
U128 IREmitter::VectorSignExtend(size_t original_esize, const U128& a) {
switch (original_esize) {
case 8:

View file

@ -291,11 +291,11 @@ public:
U128 VectorReduceAdd(size_t esize, const U128& a);
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);
U128 VectorRotateRight(size_t esize, const U128& a, u8 amount);
U128 VectorRotateWholeVectorRight(const U128& a, u8 amount);
U128 VectorRoundingHalvingAddSigned(size_t esize, const U128& a, const U128& b);
U128 VectorRoundingHalvingAddUnsigned(size_t esize, const U128& a, const U128& b);
U128 VectorRoundingShiftLeftSigned(size_t esize, const U128& a, const U128& b);
U128 VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, const U128& b);
U128 VectorShuffleWords(const U128& a, u8 mask);
U128 VectorSignExtend(size_t original_esize, const U128& a);
U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
UpperAndLower VectorSignedMultiply(size_t esize, const U128& a, const U128& b);

View file

@ -465,6 +465,7 @@ OPCODE(VectorReduceAdd8, U128, U128
OPCODE(VectorReduceAdd16, U128, U128 )
OPCODE(VectorReduceAdd32, U128, U128 )
OPCODE(VectorReduceAdd64, U128, U128 )
OPCODE(VectorRotateWholeVectorRight, U128, U128, U8 )
OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 )
OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 )
OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 )
@ -479,7 +480,6 @@ OPCODE(VectorRoundingShiftLeftU8, U128, U128
OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 )
OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 )
OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 )
OPCODE(VectorShuffleWords, U128, U128, U8 )
OPCODE(VectorSignExtend8, U128, U128 )
OPCODE(VectorSignExtend16, U128, U128 )
OPCODE(VectorSignExtend32, U128, U128 )

View file

@ -45,7 +45,7 @@ void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
const IR::U128 lower_half = [&] {
const IR::U128 T = ir.VectorShuffleWords(z, 0b01001110);
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19);
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10);
@ -61,8 +61,8 @@ void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
const IR::U128 shuffled_d = ir.VectorShuffleWords(x, 0b01001110);
const IR::U128 shuffled_T0 = ir.VectorShuffleWords(T0, 0b01001110);
const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64);
const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64);
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0));
return ir.VectorGetElement(64, tmp5, 0);
@ -128,8 +128,8 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U32 new_low_y = ir.Add(t, high_x);
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
const IR::U128 shuffled_x = ir.VectorShuffleWords(x, 0b10010011);
const IR::U128 shuffled_y = ir.VectorShuffleWords(y, 0b10010011);
const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96);
const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96);
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x);
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y);