IR: Implement Vector{Max,Min}{Signed,Unsigned}
This commit is contained in:
parent
adb7f5f86f
commit
47c0ad0fc8
4 changed files with 195 additions and 0 deletions
|
@ -4,6 +4,8 @@
|
|||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "backend_x64/abi.h"
|
||||
#include "backend_x64/block_of_code.h"
|
||||
#include "backend_x64/emit_x64.h"
|
||||
|
@ -650,6 +652,119 @@ void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMaxS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb);
|
||||
return;
|
||||
}
|
||||
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s8, 16>& result, const std::array<s8, 16>& a, const std::array<s8, 16>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMaxS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMaxS32(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd);
|
||||
return;
|
||||
}
|
||||
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s32, 4>& result, const std::array<s32, 4>& a, const std::array<s32, 4>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s64, 2>& result, const std::array<s64, 2>& a, const std::array<s64, 2>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMaxU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxub);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMaxU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw);
|
||||
return;
|
||||
}
|
||||
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u16, 8>& result, const std::array<u16, 8>& a, const std::array<u16, 8>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u32, 4>& result, const std::array<u32, 4>& a, const std::array<u32, 4>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMinS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsb);
|
||||
return;
|
||||
}
|
||||
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s8, 16>& result, const std::array<s8, 16>& a, const std::array<s8, 16>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMinS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMinS32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s32, 4>& result, const std::array<s32, 4>& a, const std::array<s32, 4>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s64, 2>& result, const std::array<s64, 2>& a, const std::array<s64, 2>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMinU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminub);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMinU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminuw);
|
||||
return;
|
||||
}
|
||||
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u16, 8>& result, const std::array<u16, 8>& a, const std::array<u16, 8>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u32, 4>& result, const std::array<u32, 4>& a, const std::array<u32, 4>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b){
|
||||
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
|
|
@ -932,6 +932,66 @@ U128 IREmitter::VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_am
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorMaxSigned(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorMaxS8, a, b);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorMaxS16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorMaxS32, a, b);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::VectorMaxS64, a, b);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorMaxUnsigned(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorMaxU8, a, b);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorMaxU16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorMaxU32, a, b);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::VectorMaxU64, a, b);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorMinSigned(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorMinS8, a, b);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorMinS16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorMinS32, a, b);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::VectorMinS64, a, b);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorMinUnsigned(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorMinU8, a, b);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorMinU16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorMinU32, a, b);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::VectorMinU64, a, b);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorMultiply(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
|
|
|
@ -219,6 +219,10 @@ public:
|
|||
U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount);
|
||||
U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount);
|
||||
U128 VectorMaxSigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorMaxUnsigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorMinSigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorMinUnsigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorMultiply(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorNarrow(size_t original_esize, const U128& a);
|
||||
U128 VectorNot(const U128& a);
|
||||
|
|
|
@ -242,6 +242,22 @@ OPCODE(VectorLogicalShiftRight8, T::U128, T::U128, T::U8
|
|||
OPCODE(VectorLogicalShiftRight16, T::U128, T::U128, T::U8 )
|
||||
OPCODE(VectorLogicalShiftRight32, T::U128, T::U128, T::U8 )
|
||||
OPCODE(VectorLogicalShiftRight64, T::U128, T::U128, T::U8 )
|
||||
OPCODE(VectorMaxS8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMaxS16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMaxS32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMaxS64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMaxU8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMaxU16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMaxU32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMaxU64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMinS8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMinS16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMinS32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMinS64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMinU8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMinU16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMinU32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMinU64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMultiply8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMultiply16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorMultiply32, T::U128, T::U128, T::U128 )
|
||||
|
|
Loading…
Reference in a new issue