diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d5a69974..a2ad95bd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -73,6 +73,7 @@ add_library(dynarmic frontend/A64/translate/impl/load_store_register_immediate.cpp frontend/A64/translate/impl/load_store_register_pair.cpp frontend/A64/translate/impl/move_wide.cpp + frontend/A64/translate/impl/simd_three_same.cpp frontend/A64/translate/translate.cpp frontend/A64/translate/translate.h frontend/A64/types.cpp diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index dc569d8a..cc2238e5 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -156,6 +156,24 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) { + A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + code->movq(result, addr); + ctx.reg_alloc.DefineValue(inst, result); +} + +void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) { + A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto addr = code->xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + code->movaps(result, addr); + ctx.reg_alloc.DefineValue(inst, result); +} + void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); code->mov(result, qword[r15 + offsetof(A64JitState, sp)]); @@ -191,6 +209,25 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) { } } +void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto addr = code->xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + + Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]); + code->movq(to_store, to_store); + code->movaps(addr, to_store); +} + +void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + A64::Vec vec = inst->GetArg(0).GetA64VecRef(); + auto addr = code->xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + + Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); + code->movaps(addr, to_store); +} + void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto addr = qword[r15 + offsetof(A64JitState, sp)]; diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 30f9d3f1..d9987b12 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -2168,6 +2168,37 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { } } +static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + + (code->*fn)(xmm_a, xmm_b); + + ctx.reg_alloc.DefineValue(inst, xmm_a); +} + +template +void EmitX64::EmitVectorAdd8(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddb); +} + +template +void EmitX64::EmitVectorAdd16(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddw); +} + +template +void EmitX64::EmitVectorAdd32(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddd); +} + +template +void EmitX64::EmitVectorAdd64(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddq); +} + template static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { Xbyak::Label end; diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 3977c365..6bf98af3 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -563,7 +563,7 @@ INST(CSEL, "CSEL", "z0011 //INST(SQRSHL_1, "SQRSHL", "01011110zz1mmmmm010111nnnnnddddd") //INST(SQRSHL_2, "SQRSHL", "0Q001110zz1mmmmm010111nnnnnddddd") //INST(ADD_1, "ADD (vector)", "01011110zz1mmmmm100001nnnnnddddd") -//INST(ADD_2, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd") +INST(ADD_vector, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd") //INST(CMTST_1, "CMTST", "01011110zz1mmmmm100011nnnnnddddd") //INST(CMTST_2, "CMTST", "0Q001110zz1mmmmm100011nnnnnddddd") //INST(SQDMULH_vec_1, "SQDMULH (vector)", "01011110zz1mmmmm101101nnnnnddddd") diff --git a/src/frontend/A64/ir_emitter.cpp b/src/frontend/A64/ir_emitter.cpp index cfc15cdf..ee65cff0 100644 --- a/src/frontend/A64/ir_emitter.cpp +++ b/src/frontend/A64/ir_emitter.cpp @@ -86,6 +86,14 @@ IR::U64 IREmitter::GetX(Reg reg) { return Inst(Opcode::A64GetX, IR::Value(reg)); } +IR::U128 IREmitter::GetD(Vec vec) { + return Inst(Opcode::A64GetD, IR::Value(vec)); +} + +IR::U128 IREmitter::GetQ(Vec vec) { + return Inst(Opcode::A64GetQ, IR::Value(vec)); +} + IR::U64 IREmitter::GetSP() { return Inst(Opcode::A64GetSP); } @@ -102,6 +110,14 @@ void IREmitter::SetX(const Reg reg, const IR::U64& value) { Inst(Opcode::A64SetX, IR::Value(reg), value); } +void IREmitter::SetD(const Vec vec, const IR::U128& value) { + Inst(Opcode::A64SetD, IR::Value(vec), value); +} + +void IREmitter::SetQ(const Vec vec, const IR::U128& value) { + Inst(Opcode::A64SetQ, IR::Value(vec), value); +} + void IREmitter::SetSP(const IR::U64& value) { Inst(Opcode::A64SetSP, value); } diff --git a/src/frontend/A64/ir_emitter.h b/src/frontend/A64/ir_emitter.h index ff0a3dfe..24eb7a57 100644 --- a/src/frontend/A64/ir_emitter.h +++ b/src/frontend/A64/ir_emitter.h @@ -51,9 +51,13 @@ public: IR::U32 GetW(Reg source_reg); IR::U64 GetX(Reg source_reg); + IR::U128 GetD(Vec source_vec); + IR::U128 GetQ(Vec source_vec); IR::U64 GetSP(); void SetW(Reg dest_reg, const IR::U32& value); void SetX(Reg dest_reg, const IR::U64& value); + void SetD(Vec dest_vec, const IR::U128& value); + void SetQ(Vec dest_vec, const IR::U128& value); void SetSP(const IR::U64& value); void SetPC(const IR::U64& value); }; diff --git a/src/frontend/A64/translate/impl/impl.cpp b/src/frontend/A64/translate/impl/impl.cpp index b66386a0..1fd5d8c2 100644 --- a/src/frontend/A64/translate/impl/impl.cpp +++ b/src/frontend/A64/translate/impl/impl.cpp @@ -119,7 +119,31 @@ void TranslatorVisitor::SP(size_t bitsize, IR::U32U64 value) { ir.SetSP(value); break; default: - ASSERT_MSG(false, "SP - : Invalid bitsize"); + ASSERT_MSG(false, "SP - set : Invalid bitsize"); + } +} + +IR::U128 TranslatorVisitor::V(size_t bitsize, Vec vec) { + switch (bitsize) { + case 64: + return ir.GetD(vec); + case 128: + return ir.GetQ(vec); + default: + ASSERT_MSG(false, "V - get : Invalid bitsize"); + } +} + +void TranslatorVisitor::V(size_t bitsize, Vec vec, IR::U128 value) { + switch (bitsize) { + case 64: + ir.SetD(vec, value); + return; + case 128: + ir.SetQ(vec, value); + return; + default: + ASSERT_MSG(false, "V - Set : Invalid bitsize"); } } diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index 066b086a..b838e423 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -48,6 +48,9 @@ struct TranslatorVisitor final { IR::U32U64 SP(size_t bitsize); void SP(size_t bitsize, IR::U32U64 value); + IR::U128 V(size_t bitsize, Vec vec); + void V(size_t bitsize, Vec vec, IR::U128 value); + IR::UAny Mem(IR::U64 address, size_t size, AccType acctype); void Mem(IR::U64 address, size_t size, AccType acctype, IR::UAny value); @@ -611,7 +614,7 @@ struct TranslatorVisitor final { bool SQRSHL_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd); bool SQRSHL_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd); bool ADD_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd); - bool ADD_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd); + bool ADD_vector(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd); bool CMTST_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd); bool CMTST_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd); bool SQDMULH_vec_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd); diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp new file mode 100644 index 00000000..09f23ce9 --- /dev/null +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -0,0 +1,39 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "frontend/A64/translate/impl/impl.h" + +namespace Dynarmic { +namespace A64 { + +bool TranslatorVisitor::ADD_vector(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { + if (size == 0b11 && !Q) return ReservedValue(); + const size_t esize = 8 << size.ZeroExtend(); + const size_t datasize = Q ? 128 : 64; + + auto operand1 = V(datasize, Vn); + auto operand2 = V(datasize, Vm); + + auto result = [&]{ + switch (esize) { + case 8: + return ir.VectorAdd8(operand1, operand2); + case 16: + return ir.VectorAdd16(operand1, operand2); + case 32: + return ir.VectorAdd32(operand1, operand2); + default: + return ir.VectorAdd64(operand1, operand2); + } + }(); + + V(datasize, Vd, result); + + return true; +} + +} // namespace A64 +} // namespace Dynarmic diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 5166100f..5d28143f 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -604,6 +604,22 @@ U32 IREmitter::PackedSelect(const U32& ge, const U32& a, const U32& b) { return Inst(Opcode::PackedSelect, ge, a, b); } +U128 IREmitter::VectorAdd8(const U128& a, const U128& b) { + return Inst(Opcode::VectorAdd8, a, b); +} + +U128 IREmitter::VectorAdd16(const U128& a, const U128& b) { + return Inst(Opcode::VectorAdd16, a, b); +} + +U128 IREmitter::VectorAdd32(const U128& a, const U128& b) { + return Inst(Opcode::VectorAdd32, a, b); +} + +U128 IREmitter::VectorAdd64(const U128& a, const U128& b) { + return Inst(Opcode::VectorAdd64, a, b); +} + U32 IREmitter::FPAbs32(const U32& a) { return Inst(Opcode::FPAbs32, a); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 1362924f..0c8549b4 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -177,6 +177,11 @@ public: U32 PackedAbsDiffSumS8(const U32& a, const U32& b); U32 PackedSelect(const U32& ge, const U32& a, const U32& b); + U128 VectorAdd8(const U128& a, const U128& b); + U128 VectorAdd16(const U128& a, const U128& b); + U128 VectorAdd32(const U128& a, const U128& b); + U128 VectorAdd64(const U128& a, const U128& b); + U32 FPAbs32(const U32& a); U64 FPAbs64(const U64& a); U32 FPAdd32(const U32& a, const U32& b, bool fpscr_controlled); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index fd0e1ee6..690b70f0 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -40,9 +40,19 @@ A64OPC(GetCFlag, T::U1, A64OPC(SetNZCV, T::Void, T::NZCVFlags ) A64OPC(GetW, T::U32, T::A64Reg ) A64OPC(GetX, T::U64, T::A64Reg ) +//A64OPC(GetB, T::U128, T::A64Vec ) +//A64OPC(GetH, T::U128, T::A64Vec ) +//A64OPC(GetS, T::U128, T::A64Vec ) +A64OPC(GetD, T::U128, T::A64Vec ) +A64OPC(GetQ, T::U128, T::A64Vec ) A64OPC(GetSP, T::U64, ) A64OPC(SetW, T::Void, T::A64Reg, T::U32 ) A64OPC(SetX, T::Void, T::A64Reg, T::U64 ) +//A64OPC(SetB, T::Void, T::A64Vec, T::U8 ) +//A64OPC(SetH, T::Void, T::A64Vec, T::U16 ) +//A64OPC(SetS, T::Void, T::A64Vec, T::U32 ) +A64OPC(SetD, T::Void, T::A64Vec, T::U128 ) +A64OPC(SetQ, T::Void, T::A64Vec, T::U128 ) A64OPC(SetSP, T::Void, T::U64 ) A64OPC(SetPC, T::Void, T::U64 ) A64OPC(CallSupervisor, T::Void, T::U32 ) @@ -149,6 +159,12 @@ OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 ) OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 ) +// Vector instructions +OPCODE(VectorAdd8, T::U128, T::U128, T::U128 ) +OPCODE(VectorAdd16, T::U128, T::U128, T::U128 ) +OPCODE(VectorAdd32, T::U128, T::U128, T::U128 ) +OPCODE(VectorAdd64, T::U128, T::U128, T::U128 ) + // Floating-point operations OPCODE(FPAbs32, T::U32, T::U32 ) OPCODE(FPAbs64, T::U64, T::U64 )