IR: Add VectorMultiply{Signed,Unsigned}Widen instructions

Polyfill for x86-64 backend
This commit is contained in:
Merry 2022-08-02 11:03:54 +01:00 committed by merry
parent bbf0179d30
commit 61d509dda2
11 changed files with 180 additions and 15 deletions

View file

@ -1001,6 +1001,54 @@ void EmitIR<IR::Opcode::VectorMultiply64>(oaknut::CodeGenerator& code, EmitConte
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplySignedWiden8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplySignedWiden16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplySignedWiden32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplyUnsignedWiden8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplyUnsignedWiden16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorMultiplyUnsignedWiden32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::VectorNarrow16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;

View file

@ -55,6 +55,7 @@ static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& conf) {
static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) {
return Optimization::PolyfillOptions{
.sha256 = !code.HasHostFeature(HostFeature::SHA),
.vector_multiply_widen = true,
};
}

View file

@ -51,6 +51,7 @@ static std::function<void(BlockOfCode&)> GenRCP(const A64::UserConfig& conf) {
static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) {
return Optimization::PolyfillOptions{
.sha256 = !code.HasHostFeature(HostFeature::SHA),
.vector_multiply_widen = true,
};
}

View file

@ -2221,6 +2221,30 @@ void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, tmp2);
}
void EmitX64::EmitVectorMultiplySignedWiden8(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplySignedWiden8");
}
void EmitX64::EmitVectorMultiplySignedWiden16(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplySignedWiden16");
}
void EmitX64::EmitVectorMultiplySignedWiden32(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplySignedWiden32");
}
void EmitX64::EmitVectorMultiplyUnsignedWiden8(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplyUnsignedWiden8");
}
void EmitX64::EmitVectorMultiplyUnsignedWiden16(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplyUnsignedWiden16");
}
void EmitX64::EmitVectorMultiplyUnsignedWiden32(EmitContext&, IR::Inst*) {
ASSERT_FALSE("Unexpected VectorMultiplyUnsignedWiden32");
}
void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View file

@ -909,11 +909,30 @@ bool TranslatorVisitor::asimd_VABDL(bool U, bool D, size_t sz, size_t Vn, size_t
}
bool TranslatorVisitor::asimd_VMLAL(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool op, bool N, bool M, size_t Vm) {
return WideInstruction(*this, U, D, sz, Vn, Vd, N, M, Vm, WidenBehaviour::Both, [this, op](size_t esize, const auto& reg_d, const auto& reg_n, const auto& reg_m) {
const auto multiply = ir.VectorMultiply(esize, reg_n, reg_m);
return op ? ir.VectorSub(esize, reg_d, multiply)
: ir.VectorAdd(esize, reg_d, multiply);
});
const size_t esize = 8U << sz;
if (sz == 0b11) {
return DecodeError();
}
if (mcl::bit::get_bit<0>(Vd)) {
return UndefinedInstruction();
}
const auto d = ToVector(true, Vd, D);
const auto m = ToVector(false, Vm, M);
const auto n = ToVector(false, Vn, N);
const auto reg_d = ir.GetVector(d);
const auto reg_m = ir.GetVector(m);
const auto reg_n = ir.GetVector(n);
const auto multiply = U ? ir.VectorMultiplyUnsignedWiden(esize, reg_n, reg_m)
: ir.VectorMultiplySignedWiden(esize, reg_n, reg_m);
const auto result = op ? ir.VectorSub(esize * 2, reg_d, multiply)
: ir.VectorAdd(esize * 2, reg_d, multiply);
ir.SetVector(d, result);
return true;
}
bool TranslatorVisitor::asimd_VMULL(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool P, bool N, bool M, size_t Vm) {
@ -930,14 +949,11 @@ bool TranslatorVisitor::asimd_VMULL(bool U, bool D, size_t sz, size_t Vn, size_t
const auto m = ToVector(false, Vm, M);
const auto n = ToVector(false, Vn, N);
const auto extend_reg = [&](const auto& reg) {
return U ? ir.VectorZeroExtend(esize, reg) : ir.VectorSignExtend(esize, reg);
};
const auto reg_n = ir.GetVector(n);
const auto reg_m = ir.GetVector(m);
const auto result = P ? ir.VectorPolynomialMultiplyLong(esize, reg_n, reg_m)
: ir.VectorMultiply(2 * esize, extend_reg(reg_n), extend_reg(reg_m));
: U ? ir.VectorMultiplyUnsignedWiden(esize, reg_n, reg_m)
: ir.VectorMultiplySignedWiden(esize, reg_n, reg_m);
ir.SetVector(d, result);
return true;

View file

@ -85,11 +85,10 @@ bool ScalarMultiplyLong(TranslatorVisitor& v, bool U, bool D, size_t sz, size_t
const auto [m, index] = GetScalarLocation(esize, M, Vm);
const auto scalar = v.ir.VectorGetElement(esize, v.ir.GetVector(m), index);
const auto ext_scalar = U ? (esize == 16 ? IR::U32U64{v.ir.ZeroExtendToWord(scalar)} : IR::U32U64{v.ir.ZeroExtendToLong(scalar)})
: (esize == 16 ? IR::U32U64{v.ir.SignExtendToWord(scalar)} : IR::U32U64{v.ir.SignExtendToLong(scalar)});
const auto reg_n = U ? v.ir.VectorZeroExtend(esize, v.ir.GetVector(n)) : v.ir.VectorSignExtend(esize, v.ir.GetVector(n));
const auto reg_m = v.ir.VectorBroadcast(esize * 2, ext_scalar);
const auto addend = v.ir.VectorMultiply(esize * 2, reg_n, reg_m);
const auto reg_n = v.ir.GetVector(n);
const auto reg_m = v.ir.VectorBroadcast(esize, scalar);
const auto addend = U ? v.ir.VectorMultiplyUnsignedWiden(esize, reg_n, reg_m)
: v.ir.VectorMultiplySignedWiden(esize, reg_n, reg_m);
const auto result = [&] {
switch (multiply) {
case MultiplyBehavior::Multiply:

View file

@ -1404,6 +1404,30 @@ U128 IREmitter::VectorMultiply(size_t esize, const U128& a, const U128& b) {
UNREACHABLE();
}
U128 IREmitter::VectorMultiplySignedWiden(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorMultiplySignedWiden8, a, b);
case 16:
return Inst<U128>(Opcode::VectorMultiplySignedWiden16, a, b);
case 32:
return Inst<U128>(Opcode::VectorMultiplySignedWiden32, a, b);
}
UNREACHABLE();
}
U128 IREmitter::VectorMultiplyUnsignedWiden(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorMultiplyUnsignedWiden8, a, b);
case 16:
return Inst<U128>(Opcode::VectorMultiplyUnsignedWiden16, a, b);
case 32:
return Inst<U128>(Opcode::VectorMultiplyUnsignedWiden32, a, b);
}
UNREACHABLE();
}
U128 IREmitter::VectorNarrow(size_t original_esize, const U128& a) {
switch (original_esize) {
case 16:

View file

@ -264,6 +264,8 @@ public:
U128 VectorMinSigned(size_t esize, const U128& a, const U128& b);
U128 VectorMinUnsigned(size_t esize, const U128& a, const U128& b);
U128 VectorMultiply(size_t esize, const U128& a, const U128& b);
U128 VectorMultiplySignedWiden(size_t esize, const U128& a, const U128& b);
U128 VectorMultiplyUnsignedWiden(size_t esize, const U128& a, const U128& b);
U128 VectorNarrow(size_t original_esize, const U128& a);
U128 VectorNot(const U128& a);
U128 VectorOr(const U128& a, const U128& b);

View file

@ -402,6 +402,12 @@ OPCODE(VectorMultiply8, U128, U128
OPCODE(VectorMultiply16, U128, U128, U128 )
OPCODE(VectorMultiply32, U128, U128, U128 )
OPCODE(VectorMultiply64, U128, U128, U128 )
OPCODE(VectorMultiplySignedWiden8, U128, U128, U128 )
OPCODE(VectorMultiplySignedWiden16, U128, U128, U128 )
OPCODE(VectorMultiplySignedWiden32, U128, U128, U128 )
OPCODE(VectorMultiplyUnsignedWiden8, U128, U128, U128 )
OPCODE(VectorMultiplyUnsignedWiden16, U128, U128, U128 )
OPCODE(VectorMultiplyUnsignedWiden32, U128, U128, U128 )
OPCODE(VectorNarrow16, U128, U128 )
OPCODE(VectorNarrow32, U128, U128 )
OPCODE(VectorNarrow64, U128, U128 )

View file

@ -22,6 +22,7 @@ namespace Dynarmic::Optimization {
struct PolyfillOptions {
bool sha256 = false;
bool vector_multiply_widen = false;
bool operator==(const PolyfillOptions&) const = default;
};

View file

@ -138,6 +138,19 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
inst.ReplaceUsesWith(part1 ? x : y);
}
template<size_t esize, bool is_signed>
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 n = (IR::U128)inst.GetArg(0);
IR::U128 m = (IR::U128)inst.GetArg(1);
const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n);
const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m);
const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m);
inst.ReplaceUsesWith(result);
}
} // namespace
void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
@ -166,6 +179,36 @@ void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
PolyfillSHA256Hash(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, false>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, false>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, false>(ir, inst);
}
break;
default:
break;
}