IR: Remove VectorShuffleHighHalfwords and VectorShuffleLowHalfwords
This commit is contained in:
parent
c6667997bc
commit
9313f5ea88
7 changed files with 191 additions and 119 deletions
|
@ -1146,6 +1146,54 @@ void EmitIR<IR::Opcode::VectorReverseBits>(oaknut::CodeGenerator& code, EmitCont
|
|||
EmitTwoOpArranged<8>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.RBIT(Vresult, Voperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorReverseElementsInHalfGroups8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorReverseElementsInWordGroups8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorReverseElementsInWordGroups16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorReverseElementsInLongGroups8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorReverseElementsInLongGroups16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorReverseElementsInLongGroups32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorReduceAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReduce<8>(code, ctx, inst, [&](auto& Bresult, auto Voperand) { code.ADDV(Bresult, Voperand); });
|
||||
|
@ -1236,22 +1284,6 @@ void EmitIR<IR::Opcode::VectorRoundingShiftLeftU64>(oaknut::CodeGenerator& code,
|
|||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.URSHL(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorShuffleHighHalfwords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorShuffleLowHalfwords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorShuffleWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
|
|
|
@ -3023,6 +3023,89 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movdqa(tmp, data);
|
||||
code.psllw(tmp, 8);
|
||||
code.psrlw(data, 8);
|
||||
code.por(data, tmp);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
// TODO: PSHUFB
|
||||
|
||||
code.movdqa(tmp, data);
|
||||
code.psllw(tmp, 8);
|
||||
code.psrlw(data, 8);
|
||||
code.por(data, tmp);
|
||||
code.pshuflw(data, data, 0b10110001);
|
||||
code.pshufhw(data, data, 0b10110001);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code.pshuflw(data, data, 0b10110001);
|
||||
code.pshufhw(data, data, 0b10110001);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
// TODO: PSHUFB
|
||||
|
||||
code.movdqa(tmp, data);
|
||||
code.psllw(tmp, 8);
|
||||
code.psrlw(data, 8);
|
||||
code.por(data, tmp);
|
||||
code.pshuflw(data, data, 0b00011011);
|
||||
code.pshufhw(data, data, 0b00011011);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code.pshuflw(data, data, 0b00011011);
|
||||
code.pshufhw(data, data, 0b00011011);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code.pshuflw(data, data, 0b01001110);
|
||||
code.pshufhw(data, data, 0b01001110);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
|
@ -3308,14 +3391,6 @@ static void VectorShuffleImpl(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorShuffleHighHalfwords(EmitContext& ctx, IR::Inst* inst) {
|
||||
VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshufhw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorShuffleLowHalfwords(EmitContext& ctx, IR::Inst* inst) {
|
||||
VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshuflw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorShuffleWords(EmitContext& ctx, IR::Inst* inst) {
|
||||
VectorShuffleImpl(code, ctx, inst, &Xbyak::CodeGenerator::pshufd);
|
||||
}
|
||||
|
|
|
@ -117,47 +117,18 @@ bool TranslatorVisitor::asimd_VREV(bool D, size_t sz, size_t Vd, size_t op, bool
|
|||
const auto m = ToVector(Q, Vm, M);
|
||||
const auto result = [this, m, op, sz] {
|
||||
const auto reg_m = ir.GetVector(m);
|
||||
const size_t esize = 16U << sz;
|
||||
const auto shift = static_cast<u8>(8U << sz);
|
||||
const size_t esize = 8 << sz;
|
||||
|
||||
// 64-bit regions
|
||||
if (op == 0b00) {
|
||||
IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, reg_m, shift),
|
||||
ir.VectorLogicalShiftLeft(esize, reg_m, shift));
|
||||
|
||||
switch (sz) {
|
||||
case 0: // 8-bit elements
|
||||
result = ir.VectorShuffleLowHalfwords(result, 0b00011011);
|
||||
result = ir.VectorShuffleHighHalfwords(result, 0b00011011);
|
||||
break;
|
||||
case 1: // 16-bit elements
|
||||
result = ir.VectorShuffleLowHalfwords(result, 0b01001110);
|
||||
result = ir.VectorShuffleHighHalfwords(result, 0b01001110);
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
switch (op) {
|
||||
case 0b00:
|
||||
return ir.VectorReverseElementsInLongGroups(esize, reg_m);
|
||||
case 0b01:
|
||||
return ir.VectorReverseElementsInWordGroups(esize, reg_m);
|
||||
case 0b10:
|
||||
return ir.VectorReverseElementsInHalfGroups(esize, reg_m);
|
||||
}
|
||||
|
||||
// 32-bit regions
|
||||
if (op == 0b01) {
|
||||
IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, reg_m, shift),
|
||||
ir.VectorLogicalShiftLeft(esize, reg_m, shift));
|
||||
|
||||
// If dealing with 8-bit elements we'll need to shuffle the bytes in each halfword
|
||||
// e.g. Assume the following numbers point out bytes in a 32-bit word, we're essentially
|
||||
// changing [3, 2, 1, 0] to [2, 3, 0, 1]
|
||||
if (sz == 0) {
|
||||
result = ir.VectorShuffleLowHalfwords(result, 0b10110001);
|
||||
result = ir.VectorShuffleHighHalfwords(result, 0b10110001);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// 16-bit regions
|
||||
return ir.VectorOr(ir.VectorLogicalShiftRight(esize, reg_m, 8),
|
||||
ir.VectorLogicalShiftLeft(esize, reg_m, 8));
|
||||
UNREACHABLE();
|
||||
}();
|
||||
|
||||
ir.SetVector(d, result);
|
||||
|
|
|
@ -673,81 +673,45 @@ bool TranslatorVisitor::RBIT_asimd(bool Q, Vec Vn, Vec Vd) {
|
|||
}
|
||||
|
||||
bool TranslatorVisitor::REV16_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||
if (size != 0) {
|
||||
if (size > 0) {
|
||||
return UnallocatedEncoding();
|
||||
}
|
||||
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
constexpr size_t esize = 16;
|
||||
constexpr size_t esize = 8;
|
||||
|
||||
const IR::U128 data = V(datasize, Vn);
|
||||
const IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, 8),
|
||||
ir.VectorLogicalShiftLeft(esize, data, 8));
|
||||
const IR::U128 result = ir.VectorReverseElementsInHalfGroups(esize, data);
|
||||
|
||||
V(datasize, Vd, result);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::REV32_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||
const u32 zext_size = size.ZeroExtend();
|
||||
|
||||
if (zext_size > 1) {
|
||||
if (size > 1) {
|
||||
return UnallocatedEncoding();
|
||||
}
|
||||
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
const size_t esize = 16 << zext_size;
|
||||
const u8 shift = static_cast<u8>(8 << zext_size);
|
||||
const size_t esize = 8 << size.ZeroExtend();
|
||||
|
||||
const IR::U128 data = V(datasize, Vn);
|
||||
|
||||
// TODO: Consider factoring byte swapping code out into its own opcode.
|
||||
// Technically the rest of the following code can be a PSHUFB
|
||||
// in the presence of SSSE3.
|
||||
IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, shift),
|
||||
ir.VectorLogicalShiftLeft(esize, data, shift));
|
||||
|
||||
// If dealing with 8-bit elements we'll need to shuffle the bytes in each halfword
|
||||
// e.g. Assume the following numbers point out bytes in a 32-bit word, we're essentially
|
||||
// changing [3, 2, 1, 0] to [2, 3, 0, 1]
|
||||
if (zext_size == 0) {
|
||||
result = ir.VectorShuffleLowHalfwords(result, 0b10110001);
|
||||
result = ir.VectorShuffleHighHalfwords(result, 0b10110001);
|
||||
}
|
||||
const IR::U128 result = ir.VectorReverseElementsInWordGroups(esize, data);
|
||||
|
||||
V(datasize, Vd, result);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::REV64_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||
const u32 zext_size = size.ZeroExtend();
|
||||
|
||||
if (zext_size >= 3) {
|
||||
if (size > 2) {
|
||||
return UnallocatedEncoding();
|
||||
}
|
||||
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
const size_t esize = 16 << zext_size;
|
||||
const u8 shift = static_cast<u8>(8 << zext_size);
|
||||
const size_t esize = 8 << size.ZeroExtend();
|
||||
|
||||
const IR::U128 data = V(datasize, Vn);
|
||||
|
||||
// TODO: Consider factoring byte swapping code out into its own opcode.
|
||||
// Technically the rest of the following code can be a PSHUFB
|
||||
// in the presence of SSSE3.
|
||||
IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, shift),
|
||||
ir.VectorLogicalShiftLeft(esize, data, shift));
|
||||
|
||||
switch (zext_size) {
|
||||
case 0: // 8-bit elements
|
||||
result = ir.VectorShuffleLowHalfwords(result, 0b00011011);
|
||||
result = ir.VectorShuffleHighHalfwords(result, 0b00011011);
|
||||
break;
|
||||
case 1: // 16-bit elements
|
||||
result = ir.VectorShuffleLowHalfwords(result, 0b01001110);
|
||||
result = ir.VectorShuffleHighHalfwords(result, 0b01001110);
|
||||
break;
|
||||
}
|
||||
const IR::U128 result = ir.VectorReverseElementsInLongGroups(esize, data);
|
||||
|
||||
V(datasize, Vd, result);
|
||||
return true;
|
||||
|
|
|
@ -1573,6 +1573,39 @@ U128 IREmitter::VectorReverseBits(const U128& a) {
|
|||
return Inst<U128>(Opcode::VectorReverseBits, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorReverseElementsInHalfGroups(size_t esize, const U128& a) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorReverseElementsInHalfGroups8, a);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorReverseElementsInWordGroups(size_t esize, const U128& a) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorReverseElementsInWordGroups8, a);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorReverseElementsInWordGroups16, a);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorReverseElementsInLongGroups(size_t esize, const U128& a) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorReverseElementsInLongGroups8, a);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorReverseElementsInLongGroups16, a);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorReverseElementsInLongGroups32, a);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorReduceAdd(size_t esize, const U128& a) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
|
@ -1666,14 +1699,6 @@ U128 IREmitter::VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, con
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorShuffleHighHalfwords(const U128& a, u8 mask) {
|
||||
return Inst<U128>(Opcode::VectorShuffleHighHalfwords, a, mask);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorShuffleLowHalfwords(const U128& a, u8 mask) {
|
||||
return Inst<U128>(Opcode::VectorShuffleLowHalfwords, a, mask);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorShuffleWords(const U128& a, u8 mask) {
|
||||
return Inst<U128>(Opcode::VectorShuffleWords, a, mask);
|
||||
}
|
||||
|
|
|
@ -281,6 +281,9 @@ public:
|
|||
U128 VectorPolynomialMultiplyLong(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPopulationCount(const U128& a);
|
||||
U128 VectorReverseBits(const U128& a);
|
||||
U128 VectorReverseElementsInHalfGroups(size_t esize, const U128& a);
|
||||
U128 VectorReverseElementsInWordGroups(size_t esize, const U128& a);
|
||||
U128 VectorReverseElementsInLongGroups(size_t esize, const U128& a);
|
||||
U128 VectorReduceAdd(size_t esize, const U128& a);
|
||||
U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount);
|
||||
U128 VectorRotateRight(size_t esize, const U128& a, u8 amount);
|
||||
|
@ -288,8 +291,6 @@ public:
|
|||
U128 VectorRoundingHalvingAddUnsigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorRoundingShiftLeftSigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorRoundingShiftLeftUnsigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorShuffleHighHalfwords(const U128& a, u8 mask);
|
||||
U128 VectorShuffleLowHalfwords(const U128& a, u8 mask);
|
||||
U128 VectorShuffleWords(const U128& a, u8 mask);
|
||||
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
||||
U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||
|
|
|
@ -443,6 +443,12 @@ OPCODE(VectorPolynomialMultiplyLong8, U128, U128
|
|||
OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 )
|
||||
OPCODE(VectorPopulationCount, U128, U128 )
|
||||
OPCODE(VectorReverseBits, U128, U128 )
|
||||
OPCODE(VectorReverseElementsInHalfGroups8, U128, U128 )
|
||||
OPCODE(VectorReverseElementsInWordGroups8, U128, U128 )
|
||||
OPCODE(VectorReverseElementsInWordGroups16, U128, U128 )
|
||||
OPCODE(VectorReverseElementsInLongGroups8, U128, U128 )
|
||||
OPCODE(VectorReverseElementsInLongGroups16, U128, U128 )
|
||||
OPCODE(VectorReverseElementsInLongGroups32, U128, U128 )
|
||||
OPCODE(VectorReduceAdd8, U128, U128 )
|
||||
OPCODE(VectorReduceAdd16, U128, U128 )
|
||||
OPCODE(VectorReduceAdd32, U128, U128 )
|
||||
|
@ -461,8 +467,6 @@ OPCODE(VectorRoundingShiftLeftU8, U128, U128
|
|||
OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 )
|
||||
OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 )
|
||||
OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 )
|
||||
OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 )
|
||||
OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 )
|
||||
OPCODE(VectorShuffleWords, U128, U128, U8 )
|
||||
OPCODE(VectorSignExtend8, U128, U128 )
|
||||
OPCODE(VectorSignExtend16, U128, U128 )
|
||||
|
|
Loading…
Reference in a new issue