A64: Implement ADD (vector, vector)
This commit is contained in:
parent
896cf44f96
commit
a63fc6c89b
12 changed files with 195 additions and 3 deletions
|
@ -73,6 +73,7 @@ add_library(dynarmic
|
|||
frontend/A64/translate/impl/load_store_register_immediate.cpp
|
||||
frontend/A64/translate/impl/load_store_register_pair.cpp
|
||||
frontend/A64/translate/impl/move_wide.cpp
|
||||
frontend/A64/translate/impl/simd_three_same.cpp
|
||||
frontend/A64/translate/translate.cpp
|
||||
frontend/A64/translate/translate.h
|
||||
frontend/A64/types.cpp
|
||||
|
|
|
@ -156,6 +156,24 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code->movq(result, addr);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto addr = code->xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code->movaps(result, addr);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
code->mov(result, qword[r15 + offsetof(A64JitState, sp)]);
|
||||
|
@ -191,6 +209,25 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto addr = code->xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
code->movq(to_store, to_store);
|
||||
code->movaps(addr, to_store);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto addr = code->xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
code->movaps(addr, to_store);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto addr = qword[r15 + offsetof(A64JitState, sp)];
|
||||
|
|
|
@ -2168,6 +2168,37 @@ void EmitX64<JST>::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
}
|
||||
|
||||
static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
(code->*fn)(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
template <typename JST>
|
||||
void EmitX64<JST>::EmitVectorAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddb);
|
||||
}
|
||||
|
||||
template <typename JST>
|
||||
void EmitX64<JST>::EmitVectorAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddw);
|
||||
}
|
||||
|
||||
template <typename JST>
|
||||
void EmitX64<JST>::EmitVectorAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddd);
|
||||
}
|
||||
|
||||
template <typename JST>
|
||||
void EmitX64<JST>::EmitVectorAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddq);
|
||||
}
|
||||
|
||||
template <typename JST>
|
||||
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
||||
Xbyak::Label end;
|
||||
|
|
|
@ -563,7 +563,7 @@ INST(CSEL, "CSEL", "z0011
|
|||
//INST(SQRSHL_1, "SQRSHL", "01011110zz1mmmmm010111nnnnnddddd")
|
||||
//INST(SQRSHL_2, "SQRSHL", "0Q001110zz1mmmmm010111nnnnnddddd")
|
||||
//INST(ADD_1, "ADD (vector)", "01011110zz1mmmmm100001nnnnnddddd")
|
||||
//INST(ADD_2, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd")
|
||||
INST(ADD_vector, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd")
|
||||
//INST(CMTST_1, "CMTST", "01011110zz1mmmmm100011nnnnnddddd")
|
||||
//INST(CMTST_2, "CMTST", "0Q001110zz1mmmmm100011nnnnnddddd")
|
||||
//INST(SQDMULH_vec_1, "SQDMULH (vector)", "01011110zz1mmmmm101101nnnnnddddd")
|
||||
|
|
|
@ -86,6 +86,14 @@ IR::U64 IREmitter::GetX(Reg reg) {
|
|||
return Inst<IR::U64>(Opcode::A64GetX, IR::Value(reg));
|
||||
}
|
||||
|
||||
IR::U128 IREmitter::GetD(Vec vec) {
|
||||
return Inst<IR::U128>(Opcode::A64GetD, IR::Value(vec));
|
||||
}
|
||||
|
||||
IR::U128 IREmitter::GetQ(Vec vec) {
|
||||
return Inst<IR::U128>(Opcode::A64GetQ, IR::Value(vec));
|
||||
}
|
||||
|
||||
IR::U64 IREmitter::GetSP() {
|
||||
return Inst<IR::U64>(Opcode::A64GetSP);
|
||||
}
|
||||
|
@ -102,6 +110,14 @@ void IREmitter::SetX(const Reg reg, const IR::U64& value) {
|
|||
Inst(Opcode::A64SetX, IR::Value(reg), value);
|
||||
}
|
||||
|
||||
void IREmitter::SetD(const Vec vec, const IR::U128& value) {
|
||||
Inst(Opcode::A64SetD, IR::Value(vec), value);
|
||||
}
|
||||
|
||||
void IREmitter::SetQ(const Vec vec, const IR::U128& value) {
|
||||
Inst(Opcode::A64SetQ, IR::Value(vec), value);
|
||||
}
|
||||
|
||||
void IREmitter::SetSP(const IR::U64& value) {
|
||||
Inst(Opcode::A64SetSP, value);
|
||||
}
|
||||
|
|
|
@ -51,9 +51,13 @@ public:
|
|||
|
||||
IR::U32 GetW(Reg source_reg);
|
||||
IR::U64 GetX(Reg source_reg);
|
||||
IR::U128 GetD(Vec source_vec);
|
||||
IR::U128 GetQ(Vec source_vec);
|
||||
IR::U64 GetSP();
|
||||
void SetW(Reg dest_reg, const IR::U32& value);
|
||||
void SetX(Reg dest_reg, const IR::U64& value);
|
||||
void SetD(Vec dest_vec, const IR::U128& value);
|
||||
void SetQ(Vec dest_vec, const IR::U128& value);
|
||||
void SetSP(const IR::U64& value);
|
||||
void SetPC(const IR::U64& value);
|
||||
};
|
||||
|
|
|
@ -119,7 +119,31 @@ void TranslatorVisitor::SP(size_t bitsize, IR::U32U64 value) {
|
|||
ir.SetSP(value);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "SP - : Invalid bitsize");
|
||||
ASSERT_MSG(false, "SP - set : Invalid bitsize");
|
||||
}
|
||||
}
|
||||
|
||||
IR::U128 TranslatorVisitor::V(size_t bitsize, Vec vec) {
|
||||
switch (bitsize) {
|
||||
case 64:
|
||||
return ir.GetD(vec);
|
||||
case 128:
|
||||
return ir.GetQ(vec);
|
||||
default:
|
||||
ASSERT_MSG(false, "V - get : Invalid bitsize");
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::V(size_t bitsize, Vec vec, IR::U128 value) {
|
||||
switch (bitsize) {
|
||||
case 64:
|
||||
ir.SetD(vec, value);
|
||||
return;
|
||||
case 128:
|
||||
ir.SetQ(vec, value);
|
||||
return;
|
||||
default:
|
||||
ASSERT_MSG(false, "V - Set : Invalid bitsize");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -48,6 +48,9 @@ struct TranslatorVisitor final {
|
|||
IR::U32U64 SP(size_t bitsize);
|
||||
void SP(size_t bitsize, IR::U32U64 value);
|
||||
|
||||
IR::U128 V(size_t bitsize, Vec vec);
|
||||
void V(size_t bitsize, Vec vec, IR::U128 value);
|
||||
|
||||
IR::UAny Mem(IR::U64 address, size_t size, AccType acctype);
|
||||
void Mem(IR::U64 address, size_t size, AccType acctype, IR::UAny value);
|
||||
|
||||
|
@ -611,7 +614,7 @@ struct TranslatorVisitor final {
|
|||
bool SQRSHL_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
bool SQRSHL_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
bool ADD_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
bool ADD_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
bool ADD_vector(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
bool CMTST_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
bool CMTST_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
bool SQDMULH_vec_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd);
|
||||
|
|
39
src/frontend/A64/translate/impl/simd_three_same.cpp
Normal file
39
src/frontend/A64/translate/impl/simd_three_same.cpp
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "frontend/A64/translate/impl/impl.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace A64 {
|
||||
|
||||
bool TranslatorVisitor::ADD_vector(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||
if (size == 0b11 && !Q) return ReservedValue();
|
||||
const size_t esize = 8 << size.ZeroExtend<size_t>();
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
|
||||
auto operand1 = V(datasize, Vn);
|
||||
auto operand2 = V(datasize, Vm);
|
||||
|
||||
auto result = [&]{
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return ir.VectorAdd8(operand1, operand2);
|
||||
case 16:
|
||||
return ir.VectorAdd16(operand1, operand2);
|
||||
case 32:
|
||||
return ir.VectorAdd32(operand1, operand2);
|
||||
default:
|
||||
return ir.VectorAdd64(operand1, operand2);
|
||||
}
|
||||
}();
|
||||
|
||||
V(datasize, Vd, result);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace A64
|
||||
} // namespace Dynarmic
|
|
@ -604,6 +604,22 @@ U32 IREmitter::PackedSelect(const U32& ge, const U32& a, const U32& b) {
|
|||
return Inst<U32>(Opcode::PackedSelect, ge, a, b);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorAdd8(const U128& a, const U128& b) {
|
||||
return Inst<U128>(Opcode::VectorAdd8, a, b);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorAdd16(const U128& a, const U128& b) {
|
||||
return Inst<U128>(Opcode::VectorAdd16, a, b);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorAdd32(const U128& a, const U128& b) {
|
||||
return Inst<U128>(Opcode::VectorAdd32, a, b);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorAdd64(const U128& a, const U128& b) {
|
||||
return Inst<U128>(Opcode::VectorAdd64, a, b);
|
||||
}
|
||||
|
||||
U32 IREmitter::FPAbs32(const U32& a) {
|
||||
return Inst<U32>(Opcode::FPAbs32, a);
|
||||
}
|
||||
|
|
|
@ -177,6 +177,11 @@ public:
|
|||
U32 PackedAbsDiffSumS8(const U32& a, const U32& b);
|
||||
U32 PackedSelect(const U32& ge, const U32& a, const U32& b);
|
||||
|
||||
U128 VectorAdd8(const U128& a, const U128& b);
|
||||
U128 VectorAdd16(const U128& a, const U128& b);
|
||||
U128 VectorAdd32(const U128& a, const U128& b);
|
||||
U128 VectorAdd64(const U128& a, const U128& b);
|
||||
|
||||
U32 FPAbs32(const U32& a);
|
||||
U64 FPAbs64(const U64& a);
|
||||
U32 FPAdd32(const U32& a, const U32& b, bool fpscr_controlled);
|
||||
|
|
|
@ -40,9 +40,19 @@ A64OPC(GetCFlag, T::U1,
|
|||
A64OPC(SetNZCV, T::Void, T::NZCVFlags )
|
||||
A64OPC(GetW, T::U32, T::A64Reg )
|
||||
A64OPC(GetX, T::U64, T::A64Reg )
|
||||
//A64OPC(GetB, T::U128, T::A64Vec )
|
||||
//A64OPC(GetH, T::U128, T::A64Vec )
|
||||
//A64OPC(GetS, T::U128, T::A64Vec )
|
||||
A64OPC(GetD, T::U128, T::A64Vec )
|
||||
A64OPC(GetQ, T::U128, T::A64Vec )
|
||||
A64OPC(GetSP, T::U64, )
|
||||
A64OPC(SetW, T::Void, T::A64Reg, T::U32 )
|
||||
A64OPC(SetX, T::Void, T::A64Reg, T::U64 )
|
||||
//A64OPC(SetB, T::Void, T::A64Vec, T::U8 )
|
||||
//A64OPC(SetH, T::Void, T::A64Vec, T::U16 )
|
||||
//A64OPC(SetS, T::Void, T::A64Vec, T::U32 )
|
||||
A64OPC(SetD, T::Void, T::A64Vec, T::U128 )
|
||||
A64OPC(SetQ, T::Void, T::A64Vec, T::U128 )
|
||||
A64OPC(SetSP, T::Void, T::U64 )
|
||||
A64OPC(SetPC, T::Void, T::U64 )
|
||||
A64OPC(CallSupervisor, T::Void, T::U32 )
|
||||
|
@ -149,6 +159,12 @@ OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32
|
|||
OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 )
|
||||
OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 )
|
||||
|
||||
// Vector instructions
|
||||
OPCODE(VectorAdd8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorAdd16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorAdd64, T::U128, T::U128, T::U128 )
|
||||
|
||||
// Floating-point operations
|
||||
OPCODE(FPAbs32, T::U32, T::U32 )
|
||||
OPCODE(FPAbs64, T::U64, T::U64 )
|
||||
|
|
Loading…
Reference in a new issue