A64: Implement ADD (vector, vector)

This commit is contained in:
MerryMage 2018-01-21 17:45:43 +00:00
parent 896cf44f96
commit a63fc6c89b
12 changed files with 195 additions and 3 deletions

View file

@ -73,6 +73,7 @@ add_library(dynarmic
frontend/A64/translate/impl/load_store_register_immediate.cpp
frontend/A64/translate/impl/load_store_register_pair.cpp
frontend/A64/translate/impl/move_wide.cpp
frontend/A64/translate/impl/simd_three_same.cpp
frontend/A64/translate/translate.cpp
frontend/A64/translate/translate.h
frontend/A64/types.cpp

View file

@ -156,6 +156,24 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result);
}
void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code->movq(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
}
void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto addr = code->xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code->movaps(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
}
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
code->mov(result, qword[r15 + offsetof(A64JitState, sp)]);
@ -191,6 +209,25 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
}
}
void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto addr = code->xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
code->movq(to_store, to_store);
code->movaps(addr, to_store);
}
void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto addr = code->xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
code->movaps(addr, to_store);
}
void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto addr = qword[r15 + offsetof(A64JitState, sp)];

View file

@ -2168,6 +2168,37 @@ void EmitX64<JST>::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
}
}
static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
(code->*fn)(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
template <typename JST>
void EmitX64<JST>::EmitVectorAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddb);
}
template <typename JST>
void EmitX64<JST>::EmitVectorAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddw);
}
template <typename JST>
void EmitX64<JST>::EmitVectorAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddd);
}
template <typename JST>
void EmitX64<JST>::EmitVectorAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddq);
}
template <typename JST>
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
Xbyak::Label end;

View file

@ -563,7 +563,7 @@ INST(CSEL, "CSEL", "z0011
//INST(SQRSHL_1, "SQRSHL", "01011110zz1mmmmm010111nnnnnddddd")
//INST(SQRSHL_2, "SQRSHL", "0Q001110zz1mmmmm010111nnnnnddddd")
//INST(ADD_1, "ADD (vector)", "01011110zz1mmmmm100001nnnnnddddd")
//INST(ADD_2, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd")
INST(ADD_vector, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd")
//INST(CMTST_1, "CMTST", "01011110zz1mmmmm100011nnnnnddddd")
//INST(CMTST_2, "CMTST", "0Q001110zz1mmmmm100011nnnnnddddd")
//INST(SQDMULH_vec_1, "SQDMULH (vector)", "01011110zz1mmmmm101101nnnnnddddd")

View file

@ -86,6 +86,14 @@ IR::U64 IREmitter::GetX(Reg reg) {
return Inst<IR::U64>(Opcode::A64GetX, IR::Value(reg));
}
IR::U128 IREmitter::GetD(Vec vec) {
return Inst<IR::U128>(Opcode::A64GetD, IR::Value(vec));
}
IR::U128 IREmitter::GetQ(Vec vec) {
return Inst<IR::U128>(Opcode::A64GetQ, IR::Value(vec));
}
IR::U64 IREmitter::GetSP() {
return Inst<IR::U64>(Opcode::A64GetSP);
}
@ -102,6 +110,14 @@ void IREmitter::SetX(const Reg reg, const IR::U64& value) {
Inst(Opcode::A64SetX, IR::Value(reg), value);
}
void IREmitter::SetD(const Vec vec, const IR::U128& value) {
Inst(Opcode::A64SetD, IR::Value(vec), value);
}
void IREmitter::SetQ(const Vec vec, const IR::U128& value) {
Inst(Opcode::A64SetQ, IR::Value(vec), value);
}
void IREmitter::SetSP(const IR::U64& value) {
Inst(Opcode::A64SetSP, value);
}

View file

@ -51,9 +51,13 @@ public:
IR::U32 GetW(Reg source_reg);
IR::U64 GetX(Reg source_reg);
IR::U128 GetD(Vec source_vec);
IR::U128 GetQ(Vec source_vec);
IR::U64 GetSP();
void SetW(Reg dest_reg, const IR::U32& value);
void SetX(Reg dest_reg, const IR::U64& value);
void SetD(Vec dest_vec, const IR::U128& value);
void SetQ(Vec dest_vec, const IR::U128& value);
void SetSP(const IR::U64& value);
void SetPC(const IR::U64& value);
};

View file

@ -119,7 +119,31 @@ void TranslatorVisitor::SP(size_t bitsize, IR::U32U64 value) {
ir.SetSP(value);
break;
default:
ASSERT_MSG(false, "SP - : Invalid bitsize");
ASSERT_MSG(false, "SP - set : Invalid bitsize");
}
}
IR::U128 TranslatorVisitor::V(size_t bitsize, Vec vec) {
switch (bitsize) {
case 64:
return ir.GetD(vec);
case 128:
return ir.GetQ(vec);
default:
ASSERT_MSG(false, "V - get : Invalid bitsize");
}
}
void TranslatorVisitor::V(size_t bitsize, Vec vec, IR::U128 value) {
switch (bitsize) {
case 64:
ir.SetD(vec, value);
return;
case 128:
ir.SetQ(vec, value);
return;
default:
ASSERT_MSG(false, "V - Set : Invalid bitsize");
}
}

View file

@ -48,6 +48,9 @@ struct TranslatorVisitor final {
IR::U32U64 SP(size_t bitsize);
void SP(size_t bitsize, IR::U32U64 value);
IR::U128 V(size_t bitsize, Vec vec);
void V(size_t bitsize, Vec vec, IR::U128 value);
IR::UAny Mem(IR::U64 address, size_t size, AccType acctype);
void Mem(IR::U64 address, size_t size, AccType acctype, IR::UAny value);
@ -611,7 +614,7 @@ struct TranslatorVisitor final {
bool SQRSHL_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
bool SQRSHL_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
bool ADD_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
bool ADD_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
bool ADD_vector(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
bool CMTST_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
bool CMTST_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
bool SQDMULH_vec_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd);

View file

@ -0,0 +1,39 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "frontend/A64/translate/impl/impl.h"
namespace Dynarmic {
namespace A64 {
bool TranslatorVisitor::ADD_vector(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
if (size == 0b11 && !Q) return ReservedValue();
const size_t esize = 8 << size.ZeroExtend<size_t>();
const size_t datasize = Q ? 128 : 64;
auto operand1 = V(datasize, Vn);
auto operand2 = V(datasize, Vm);
auto result = [&]{
switch (esize) {
case 8:
return ir.VectorAdd8(operand1, operand2);
case 16:
return ir.VectorAdd16(operand1, operand2);
case 32:
return ir.VectorAdd32(operand1, operand2);
default:
return ir.VectorAdd64(operand1, operand2);
}
}();
V(datasize, Vd, result);
return true;
}
} // namespace A64
} // namespace Dynarmic

View file

@ -604,6 +604,22 @@ U32 IREmitter::PackedSelect(const U32& ge, const U32& a, const U32& b) {
return Inst<U32>(Opcode::PackedSelect, ge, a, b);
}
U128 IREmitter::VectorAdd8(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorAdd8, a, b);
}
U128 IREmitter::VectorAdd16(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorAdd16, a, b);
}
U128 IREmitter::VectorAdd32(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorAdd32, a, b);
}
U128 IREmitter::VectorAdd64(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorAdd64, a, b);
}
U32 IREmitter::FPAbs32(const U32& a) {
return Inst<U32>(Opcode::FPAbs32, a);
}

View file

@ -177,6 +177,11 @@ public:
U32 PackedAbsDiffSumS8(const U32& a, const U32& b);
U32 PackedSelect(const U32& ge, const U32& a, const U32& b);
U128 VectorAdd8(const U128& a, const U128& b);
U128 VectorAdd16(const U128& a, const U128& b);
U128 VectorAdd32(const U128& a, const U128& b);
U128 VectorAdd64(const U128& a, const U128& b);
U32 FPAbs32(const U32& a);
U64 FPAbs64(const U64& a);
U32 FPAdd32(const U32& a, const U32& b, bool fpscr_controlled);

View file

@ -40,9 +40,19 @@ A64OPC(GetCFlag, T::U1,
A64OPC(SetNZCV, T::Void, T::NZCVFlags )
A64OPC(GetW, T::U32, T::A64Reg )
A64OPC(GetX, T::U64, T::A64Reg )
//A64OPC(GetB, T::U128, T::A64Vec )
//A64OPC(GetH, T::U128, T::A64Vec )
//A64OPC(GetS, T::U128, T::A64Vec )
A64OPC(GetD, T::U128, T::A64Vec )
A64OPC(GetQ, T::U128, T::A64Vec )
A64OPC(GetSP, T::U64, )
A64OPC(SetW, T::Void, T::A64Reg, T::U32 )
A64OPC(SetX, T::Void, T::A64Reg, T::U64 )
//A64OPC(SetB, T::Void, T::A64Vec, T::U8 )
//A64OPC(SetH, T::Void, T::A64Vec, T::U16 )
//A64OPC(SetS, T::Void, T::A64Vec, T::U32 )
A64OPC(SetD, T::Void, T::A64Vec, T::U128 )
A64OPC(SetQ, T::Void, T::A64Vec, T::U128 )
A64OPC(SetSP, T::Void, T::U64 )
A64OPC(SetPC, T::Void, T::U64 )
A64OPC(CallSupervisor, T::Void, T::U32 )
@ -149,6 +159,12 @@ OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32
OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 )
// Vector instructions
OPCODE(VectorAdd8, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd16, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd64, T::U128, T::U128, T::U128 )
// Floating-point operations
OPCODE(FPAbs32, T::U32, T::U32 )
OPCODE(FPAbs64, T::U64, T::U64 )