A64: Implement SQXTUN
This commit is contained in:
parent
6918ef7360
commit
f020dbe4ed
11 changed files with 604 additions and 457 deletions
|
@ -69,6 +69,30 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm0);
|
ctx.reg_alloc.DefineValue(inst, xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Lambda>
|
||||||
|
static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
|
const auto fn = static_cast<mp::equivalent_function_type_t<Lambda>*>(lambda);
|
||||||
|
constexpr u32 stack_space = 2 * 16;
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
ctx.reg_alloc.EndOfAllocScope();
|
||||||
|
|
||||||
|
ctx.reg_alloc.HostCall(nullptr);
|
||||||
|
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||||
|
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||||
|
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||||
|
|
||||||
|
code.movaps(xword[code.ABI_PARAM2], arg1);
|
||||||
|
code.CallFunction(fn);
|
||||||
|
code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||||
|
|
||||||
|
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||||
|
|
||||||
|
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Lambda>
|
template <typename Lambda>
|
||||||
static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
const auto fn = static_cast<mp::equivalent_function_type_t<Lambda>*>(lambda);
|
const auto fn = static_cast<mp::equivalent_function_type_t<Lambda>*>(lambda);
|
||||||
|
@ -2169,6 +2193,70 @@ void EmitX64::EmitVectorSignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* i
|
||||||
EmitVectorSignedAbsoluteDifference(32, ctx, inst, code);
|
EmitVectorSignedAbsoluteDifference(32, ctx, inst, code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.movdqa(dest, src);
|
||||||
|
code.pxor(zero, zero);
|
||||||
|
|
||||||
|
switch (original_esize) {
|
||||||
|
case 16:
|
||||||
|
code.packuswb(dest, dest);
|
||||||
|
code.movdqa(reconstructed, dest);
|
||||||
|
code.punpcklbw(reconstructed, zero);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code.packusdw(dest, dest);
|
||||||
|
code.movdqa(reconstructed, dest);
|
||||||
|
code.punpcklwd(reconstructed, zero);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
code.pxor(reconstructed, src);
|
||||||
|
code.ptest(reconstructed, reconstructed);
|
||||||
|
} else {
|
||||||
|
code.pcmpeqd(reconstructed, src);
|
||||||
|
code.movmskps(bit, reconstructed);
|
||||||
|
code.cmp(bit, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
code.setnz(bit.cvt8());
|
||||||
|
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit.cvt8());
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorSignedSaturatedNarrowToUnsigned(16, code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorSignedSaturatedNarrowToUnsigned(32, code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u32>& result, const VectorArray<s64>& a) {
|
||||||
|
bool qc_flag = false;
|
||||||
|
result.fill(0);
|
||||||
|
for (size_t i = 0; i < a.size(); ++i) {
|
||||||
|
const s64 saturated = std::clamp<s64>(a[i], 0, 0xFFFFFFFF);
|
||||||
|
result[i] = static_cast<u32>(saturated);
|
||||||
|
qc_flag |= saturated != a[i];
|
||||||
|
}
|
||||||
|
return qc_flag;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb);
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb);
|
||||||
}
|
}
|
||||||
|
|
|
@ -622,7 +622,7 @@ INST(UADDLP, "UADDLP", "0Q101
|
||||||
INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd")
|
INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd")
|
||||||
INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd")
|
INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd")
|
||||||
INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd")
|
INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd")
|
||||||
//INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd")
|
INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd")
|
||||||
INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd")
|
INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd")
|
||||||
//INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd")
|
//INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd")
|
||||||
//INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd")
|
//INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd")
|
||||||
|
|
|
@ -539,7 +539,7 @@ struct TranslatorVisitor final {
|
||||||
bool NEG_1(Imm<2> size, Vec Vn, Vec Vd);
|
bool NEG_1(Imm<2> size, Vec Vn, Vec Vd);
|
||||||
bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
bool NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||||
bool SQXTUN_1(Imm<2> size, Vec Vn, Reg Rd);
|
bool SQXTUN_1(Imm<2> size, Vec Vn, Reg Rd);
|
||||||
bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
|
bool SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd);
|
||||||
bool UQXTN_1(Imm<2> size, Vec Vn, Reg Rd);
|
bool UQXTN_1(Imm<2> size, Vec Vn, Reg Rd);
|
||||||
bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
|
bool UQXTN_2(bool Q, Imm<2> size, Vec Vn, Reg Rd);
|
||||||
bool FCVTXN_1(bool sz, Vec Vn, Reg Rd);
|
bool FCVTXN_1(bool sz, Vec Vn, Reg Rd);
|
||||||
|
|
|
@ -275,6 +275,22 @@ bool TranslatorVisitor::NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
|
||||||
|
if (size == 0b11) {
|
||||||
|
return ReservedValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t esize = 8 << size.ZeroExtend<size_t>();
|
||||||
|
const size_t datasize = 64;
|
||||||
|
const size_t part = Q ? 1 : 0;
|
||||||
|
|
||||||
|
const IR::U128 operand = V(2 * datasize, Vn);
|
||||||
|
const IR::U128 result = ir.VectorSignedSaturatedNarrowToUnsigned(2 * esize, operand);
|
||||||
|
|
||||||
|
Vpart(datasize, Vd, part, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) {
|
bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) {
|
||||||
const size_t datasize = Q ? 128 : 64;
|
const size_t datasize = Q ? 128 : 64;
|
||||||
|
|
||||||
|
|
|
@ -1292,6 +1292,19 @@ U128 IREmitter::VectorSignedAbsoluteDifference(size_t esize, const U128& a, cons
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a) {
|
||||||
|
switch (original_esize) {
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorSignedSaturatedNarrowToUnsigned16, a);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorSignedSaturatedNarrowToUnsigned32, a);
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::VectorSignedSaturatedNarrowToUnsigned64, a);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -249,6 +249,7 @@ public:
|
||||||
U128 VectorShuffleWords(const U128& a, u8 mask);
|
U128 VectorShuffleWords(const U128& a, u8 mask);
|
||||||
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
U128 VectorSignExtend(size_t original_esize, const U128& a);
|
||||||
U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
||||||
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
||||||
|
|
|
@ -340,7 +340,15 @@ bool Inst::ReadsFromFPSRCumulativeSaturationBit() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
||||||
|
switch (op) {
|
||||||
|
case Opcode::VectorSignedSaturatedNarrowToUnsigned16:
|
||||||
|
case Opcode::VectorSignedSaturatedNarrowToUnsigned32:
|
||||||
|
case Opcode::VectorSignedSaturatedNarrowToUnsigned64:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Inst::CausesCPUException() const {
|
bool Inst::CausesCPUException() const {
|
||||||
|
|
|
@ -347,6 +347,9 @@ OPCODE(VectorSignExtend64, T::U128, T::U128
|
||||||
OPCODE(VectorSignedAbsoluteDifference8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSignedAbsoluteDifference8, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorSignedAbsoluteDifference16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSignedAbsoluteDifference16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorSignedAbsoluteDifference32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSignedAbsoluteDifference32, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorSignedSaturatedNarrowToUnsigned16, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorSignedSaturatedNarrowToUnsigned32, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorSignedSaturatedNarrowToUnsigned64, T::U128, T::U128 )
|
||||||
OPCODE(VectorSub8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSub8, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorSub16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSub16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorSub32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorSub32, T::U128, T::U128, T::U128 )
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
|
|
||||||
|
#include "common/fp/fpsr.h"
|
||||||
#include "common/llvm_disassemble.h"
|
#include "common/llvm_disassemble.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "frontend/A64/decoder/a64.h"
|
#include "frontend/A64/decoder/a64.h"
|
||||||
|
@ -171,6 +172,7 @@ static void RunTestInstance(const Unicorn::RegisterArray& regs, const Unicorn::V
|
||||||
jit.SetPC(instructions_offset * 4);
|
jit.SetPC(instructions_offset * 4);
|
||||||
jit.SetSP(0x08000000);
|
jit.SetSP(0x08000000);
|
||||||
jit.SetFpcr(fpcr);
|
jit.SetFpcr(fpcr);
|
||||||
|
jit.SetFpsr(0);
|
||||||
jit.SetPstate(pstate);
|
jit.SetPstate(pstate);
|
||||||
jit.ClearCache();
|
jit.ClearCache();
|
||||||
uni.SetRegisters(regs);
|
uni.SetRegisters(regs);
|
||||||
|
@ -178,6 +180,7 @@ static void RunTestInstance(const Unicorn::RegisterArray& regs, const Unicorn::V
|
||||||
uni.SetPC(instructions_offset * 4);
|
uni.SetPC(instructions_offset * 4);
|
||||||
uni.SetSP(0x08000000);
|
uni.SetSP(0x08000000);
|
||||||
uni.SetFpcr(fpcr);
|
uni.SetFpcr(fpcr);
|
||||||
|
uni.SetFpsr(0);
|
||||||
uni.SetPstate(pstate);
|
uni.SetPstate(pstate);
|
||||||
uni.ClearPageCache();
|
uni.ClearPageCache();
|
||||||
|
|
||||||
|
@ -213,6 +216,7 @@ static void RunTestInstance(const Unicorn::RegisterArray& regs, const Unicorn::V
|
||||||
fmt::print("sp : {:016x} {:016x} {}\n", uni.GetSP(), jit.GetSP(), uni.GetSP() != jit.GetSP() ? "*" : "");
|
fmt::print("sp : {:016x} {:016x} {}\n", uni.GetSP(), jit.GetSP(), uni.GetSP() != jit.GetSP() ? "*" : "");
|
||||||
fmt::print("pc : {:016x} {:016x} {}\n", uni.GetPC(), jit.GetPC(), uni.GetPC() != jit.GetPC() ? "*" : "");
|
fmt::print("pc : {:016x} {:016x} {}\n", uni.GetPC(), jit.GetPC(), uni.GetPC() != jit.GetPC() ? "*" : "");
|
||||||
fmt::print("p : {:08x} {:08x} {}\n", uni.GetPstate(), jit.GetPstate(), (uni.GetPstate() & 0xF0000000) != (jit.GetPstate() & 0xF0000000) ? "*" : "");
|
fmt::print("p : {:08x} {:08x} {}\n", uni.GetPstate(), jit.GetPstate(), (uni.GetPstate() & 0xF0000000) != (jit.GetPstate() & 0xF0000000) ? "*" : "");
|
||||||
|
fmt::print("qc : {:08x} {:08x} {}\n", uni.GetFpsr(), jit.GetFpsr(), FP::FPSR{uni.GetFpsr()}.QC() != FP::FPSR{jit.GetFpsr()}.QC() ? "*" : "");
|
||||||
fmt::print("\n");
|
fmt::print("\n");
|
||||||
|
|
||||||
fmt::print("Modified memory:\n");
|
fmt::print("Modified memory:\n");
|
||||||
|
@ -255,6 +259,7 @@ static void RunTestInstance(const Unicorn::RegisterArray& regs, const Unicorn::V
|
||||||
REQUIRE((uni.GetPstate() & 0xF0000000) == (jit.GetPstate() & 0xF0000000));
|
REQUIRE((uni.GetPstate() & 0xF0000000) == (jit.GetPstate() & 0xF0000000));
|
||||||
REQUIRE(uni_env.modified_memory == jit_env.modified_memory);
|
REQUIRE(uni_env.modified_memory == jit_env.modified_memory);
|
||||||
REQUIRE(uni_env.interrupts.empty());
|
REQUIRE(uni_env.interrupts.empty());
|
||||||
|
REQUIRE(FP::FPSR{uni.GetFpsr()}.QC() == FP::FPSR{jit.GetFpsr()}.QC());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("A64: Single random instruction", "[a64]") {
|
TEST_CASE("A64: Single random instruction", "[a64]") {
|
||||||
|
|
|
@ -128,6 +128,16 @@ void Unicorn::SetFpcr(u32 value) {
|
||||||
CHECKED(uc_reg_write(uc, UC_ARM64_REG_FPCR, &value));
|
CHECKED(uc_reg_write(uc, UC_ARM64_REG_FPCR, &value));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 Unicorn::GetFpsr() const {
|
||||||
|
u32 fpsr;
|
||||||
|
CHECKED(uc_reg_read(uc, UC_ARM64_REG_FPSR, &fpsr));
|
||||||
|
return fpsr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Unicorn::SetFpsr(u32 value) {
|
||||||
|
CHECKED(uc_reg_write(uc, UC_ARM64_REG_FPSR, &value));
|
||||||
|
}
|
||||||
|
|
||||||
u32 Unicorn::GetPstate() const {
|
u32 Unicorn::GetPstate() const {
|
||||||
u32 pstate;
|
u32 pstate;
|
||||||
CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &pstate));
|
CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &pstate));
|
||||||
|
|
|
@ -46,6 +46,9 @@ public:
|
||||||
u32 GetFpcr() const;
|
u32 GetFpcr() const;
|
||||||
void SetFpcr(u32 value);
|
void SetFpcr(u32 value);
|
||||||
|
|
||||||
|
u32 GetFpsr() const;
|
||||||
|
void SetFpsr(u32 value);
|
||||||
|
|
||||||
u32 GetPstate() const;
|
u32 GetPstate() const;
|
||||||
void SetPstate(u32 value);
|
void SetPstate(u32 value);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue