emit_x64_vector{_floating_point}: Add helper alias for sizing arrays relative to vector width

Avoids needing to remember to specify the proper size of the arrays, all
that's needed is to specify the type of the array and the size will
automatically be deduced from it. This helps prevent potential oversized
or undersized arrays from being specified.
This commit is contained in:
Lioncash 2018-07-15 20:16:11 -04:00 committed by MerryMage
parent 58f3399032
commit f939bd0228
3 changed files with 34 additions and 26 deletions

View file

@ -6,6 +6,7 @@
#pragma once #pragma once
#include <type_traits>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
@ -16,6 +17,7 @@
#include "backend_x64/reg_alloc.h" #include "backend_x64/reg_alloc.h"
#include "common/address_range.h" #include "common/address_range.h"
#include "common/bit_util.h"
#include "common/fp/rounding_mode.h" #include "common/fp/rounding_mode.h"
#include "frontend/ir/location_descriptor.h" #include "frontend/ir/location_descriptor.h"
#include "frontend/ir/terminal.h" #include "frontend/ir/terminal.h"
@ -29,6 +31,14 @@ namespace Dynarmic::BackendX64 {
class BlockOfCode; class BlockOfCode;
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>.
template <typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
struct EmitContext { struct EmitContext {
EmitContext(RegAlloc& reg_alloc, IR::Block& block); EmitContext(RegAlloc& reg_alloc, IR::Block& block);

View file

@ -752,7 +752,7 @@ void EmitX64::EmitVectorGreaterS64(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<s64, 2>& a, const std::array<s64, 2>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b) {
for (size_t i = 0; i < result.size(); ++i) { for (size_t i = 0; i < result.size(); ++i) {
result[i] = (a[i] > b[i]) ? ~u64(0) : 0; result[i] = (a[i] > b[i]) ? ~u64(0) : 0;
} }
@ -1140,49 +1140,49 @@ static constexpr T LogicalVShift(T x, T y) {
} }
void EmitX64::EmitVectorLogicalVShiftS8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalVShiftS8(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s8, 16>& result, const std::array<s8, 16>& a, const std::array<s8, 16>& b) { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s8>& result, const VectorArray<s8>& a, const VectorArray<s8>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<s8>); std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<s8>);
}); });
} }
void EmitX64::EmitVectorLogicalVShiftS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalVShiftS16(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s16, 8>& result, const std::array<s16, 8>& a, const std::array<s16, 8>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s16>& result, const VectorArray<s16>& a, const VectorArray<s16>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<s16>); std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<s16>);
}); });
} }
void EmitX64::EmitVectorLogicalVShiftS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalVShiftS32(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s32, 4>& result, const std::array<s32, 4>& a, const std::array<s32, 4>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s32>& result, const VectorArray<s32>& a, const VectorArray<s32>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<s32>); std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<s32>);
}); });
} }
void EmitX64::EmitVectorLogicalVShiftS64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalVShiftS64(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s64, 2>& result, const std::array<s64, 2>& a, const std::array<s64, 2>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<s64>); std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<s64>);
}); });
} }
void EmitX64::EmitVectorLogicalVShiftU8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalVShiftU8(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u8, 16>& result, const std::array<u8, 16>& a, const std::array<u8, 16>& b) { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u8>& a, const VectorArray<u8>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<u8>); std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<u8>);
}); });
} }
void EmitX64::EmitVectorLogicalVShiftU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalVShiftU16(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u16, 8>& result, const std::array<u16, 8>& a, const std::array<u16, 8>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u16>& a, const VectorArray<u16>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<u16>); std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<u16>);
}); });
} }
void EmitX64::EmitVectorLogicalVShiftU32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalVShiftU32(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u32, 4>& result, const std::array<u32, 4>& a, const std::array<u32, 4>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u32>& result, const VectorArray<u32>& a, const VectorArray<u32>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<u32>); std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<u32>);
}); });
} }
void EmitX64::EmitVectorLogicalVShiftU64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalVShiftU64(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a, const VectorArray<u64>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<u64>); std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift<u64>);
}); });
} }
@ -1239,7 +1239,7 @@ void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s64, 2>& result, const std::array<s64, 2>& a, const std::array<s64, 2>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
}); });
} }
@ -1297,7 +1297,7 @@ void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a, const VectorArray<u64>& b) {
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); });
}); });
} }
@ -1354,7 +1354,7 @@ void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<s64, 2>& result, const std::array<s64, 2>& a, const std::array<s64, 2>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s64>& result, const VectorArray<s64>& a, const VectorArray<s64>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
}); });
} }
@ -1417,7 +1417,7 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitTwoArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b){ EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a, const VectorArray<u64>& b){
std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); });
}); });
} }
@ -1878,7 +1878,7 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitOneArgumentFallback(code, ctx, inst, [](std::array<u8, 16>& result, const std::array<u8, 16>& a){ EmitOneArgumentFallback(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u8>& a) {
std::transform(a.begin(), a.end(), result.begin(), [](u8 val) { std::transform(a.begin(), a.end(), result.begin(), [](u8 val) {
return static_cast<u8>(Common::BitCount(val)); return static_cast<u8>(Common::BitCount(val));
}); });
@ -2105,7 +2105,7 @@ void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
EmitOneArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u32, 4>& a){ EmitOneArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u32>& a) {
for (size_t i = 0; i < result.size(); ++i) { for (size_t i = 0; i < result.size(); ++i) {
result[i] = Common::SignExtend<32, u64>(a[i]); result[i] = Common::SignExtend<32, u64>(a[i]);
} }
@ -2113,7 +2113,7 @@ void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) {
EmitOneArgumentFallback(code, ctx, inst, [](std::array<u64, 2>& result, const std::array<u64, 2>& a){ EmitOneArgumentFallback(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& a) {
result[1] = (a[0] >> 63) ? ~u64(0) : 0; result[1] = (a[0] >> 63) ? ~u64(0) : 0;
result[0] = a[0]; result[0] = a[0];
}); });

View file

@ -64,10 +64,8 @@ static void HandleNaNs(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& xm
code.movaps(xword[code.ABI_PARAM2], xmm_a); code.movaps(xword[code.ABI_PARAM2], xmm_a);
code.movaps(xword[code.ABI_PARAM3], xmm_b); code.movaps(xword[code.ABI_PARAM3], xmm_b);
using Elements = std::integral_constant<size_t, 128 / Common::BitSize<T>()>; code.CallFunction(static_cast<void(*)(VectorArray<T>&, const VectorArray<T>&, const VectorArray<T>&)>(
using RegArray = std::array<T, Elements::value>; [](VectorArray<T>& result, const VectorArray<T>& a, const VectorArray<T>& b) {
code.CallFunction(static_cast<void(*)(RegArray&, const RegArray&, const RegArray&)>(
[](RegArray& result, const RegArray& a, const RegArray& b) {
for (size_t i = 0; i < result.size(); ++i) { for (size_t i = 0; i < result.size(); ++i) {
auto [first, second] = IndexFunction(i, a, b); auto [first, second] = IndexFunction(i, a, b);
if (auto r = FP::ProcessNaNs(first, second)) { if (auto r = FP::ProcessNaNs(first, second)) {
@ -87,26 +85,26 @@ static void HandleNaNs(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& xm
code.SwitchToNearCode(); code.SwitchToNearCode();
} }
static std::tuple<u32, u32> DefaultIndexFunction32(size_t i, const std::array<u32, 4>& a, const std::array<u32, 4>& b) { static std::tuple<u32, u32> DefaultIndexFunction32(size_t i, const VectorArray<u32>& a, const VectorArray<u32>& b) {
return std::make_tuple(a[i], b[i]); return std::make_tuple(a[i], b[i]);
} }
static std::tuple<u64, u64> DefaultIndexFunction64(size_t i, const std::array<u64, 2>& a, const std::array<u64, 2>& b) { static std::tuple<u64, u64> DefaultIndexFunction64(size_t i, const VectorArray<u64>& a, const VectorArray<u64>& b) {
return std::make_tuple(a[i], b[i]); return std::make_tuple(a[i], b[i]);
} }
static std::tuple<u32, u32> PairedIndexFunction32(size_t i, const std::array<u32, 4>& a, const std::array<u32, 4>& b) { static std::tuple<u32, u32> PairedIndexFunction32(size_t i, const VectorArray<u32>& a, const VectorArray<u32>& b) {
if (i < 2) { if (i < 2) {
return std::make_tuple(a[2 * i], a[2 * i + 1]); return std::make_tuple(a[2 * i], a[2 * i + 1]);
} }
return std::make_tuple(b[2 * (i - 2)], b[2 * (i - 2) + 1]); return std::make_tuple(b[2 * (i - 2)], b[2 * (i - 2) + 1]);
} }
static std::tuple<u64, u64> PairedIndexFunction64(size_t i, const std::array<u64, 2>& a, const std::array<u64, 2>& b) { static std::tuple<u64, u64> PairedIndexFunction64(size_t i, const VectorArray<u64>& a, const VectorArray<u64>& b) {
return i == 0 ? std::make_tuple(a[0], a[1]) : std::make_tuple(b[0], b[1]); return i == 0 ? std::make_tuple(a[0], a[1]) : std::make_tuple(b[0], b[1]);
} }
static std::tuple<u32, u32> PairedLowerIndexFunction32(size_t i, const std::array<u32, 4>& a, const std::array<u32, 4>& b) { static std::tuple<u32, u32> PairedLowerIndexFunction32(size_t i, const VectorArray<u32>& a, const VectorArray<u32>& b) {
switch (i) { switch (i) {
case 0: case 0:
return std::make_tuple(a[0], a[1]); return std::make_tuple(a[0], a[1]);
@ -117,7 +115,7 @@ static std::tuple<u32, u32> PairedLowerIndexFunction32(size_t i, const std::arra
} }
} }
static std::tuple<u64, u64> PairedLowerIndexFunction64(size_t i, const std::array<u64, 2>& a, const std::array<u64, 2>& b) { static std::tuple<u64, u64> PairedLowerIndexFunction64(size_t i, const VectorArray<u64>& a, const VectorArray<u64>& b) {
return i == 0 ? std::make_tuple(a[0], b[0]) : std::make_tuple(u64(0), u64(0)); return i == 0 ? std::make_tuple(a[0], b[0]) : std::make_tuple(u64(0), u64(0));
} }