From 1a0bc5ba91caa7109f210068dc39693f97e7f669 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 16 May 2020 12:30:09 +0100 Subject: [PATCH] A32/ASIMD: ARMv8: Implement VLD{1-4} (multiple) --- src/CMakeLists.txt | 1 + src/frontend/A32/decoder/asimd.inc | 11 ++ src/frontend/A32/ir_emitter.cpp | 28 ++++ src/frontend/A32/ir_emitter.h | 2 + .../impl/asimd_load_store_structures.cpp | 153 ++++++++++++++++++ .../A32/translate/impl/translate_arm.h | 3 + src/frontend/A32/translate/translate_arm.cpp | 5 + tests/A32/fuzz_arm.cpp | 1 + 8 files changed, 204 insertions(+) create mode 100644 src/frontend/A32/translate/impl/asimd_load_store_structures.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7b9baebb..b44cb85e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -122,6 +122,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS) frontend/A32/location_descriptor.cpp frontend/A32/location_descriptor.h frontend/A32/PSR.h + frontend/A32/translate/impl/asimd_load_store_structures.cpp frontend/A32/translate/impl/barrier.cpp frontend/A32/translate/impl/branch.cpp frontend/A32/translate/impl/coprocessor.cpp diff --git a/src/frontend/A32/decoder/asimd.inc b/src/frontend/A32/decoder/asimd.inc index 9f9e0c87..8be4528f 100644 --- a/src/frontend/A32/decoder/asimd.inc +++ b/src/frontend/A32/decoder/asimd.inc @@ -119,3 +119,14 @@ //INST(asimd_VBIC_imm, "VBIC (immediate)", "1111001a1-000bcd----10x10-11efgh") // ASIMD //INST(asimd_VMVN_imm, "VMVN (immediate)", "1111001a1-000bcd----110x0-11efgh") // ASIMD //INST(asimd_VMOV_imm, "VMOV (immediate)", "1111001a1-000bcd----11100-11efgh") // ASIMD + +// Advanced SIMD load/store structures +//INST(v8_VST_multiple, "VST{1-4} (multiple)", "111101000D00nnnnddddxxxxzzaammmm") // v8 +INST(v8_VLD_multiple, "VLD{1-4} (multiple)", "111101000D10nnnnddddxxxxzzaammmm") // v8 +INST(arm_UDF, "UNALLOCATED", "111101000--0--------1011--------") // v8 +INST(arm_UDF, "UNALLOCATED", "111101000--0--------11----------") // v8 +//INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") // v8 +//INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") // v8 +//INST(arm_UDF, "UNALLOCATED", "111101001-10--------1110---1----") // v8 +//INST(v8_VST_single, "VST{1-4} (single)", "111101001D00nnnnddddzzNNaaaammmm") // v8 +//INST(v8_VLD_single, "VLD{1-4} (single)", "111101001D10nnnnddddzzNNaaaammmm") // v8 diff --git a/src/frontend/A32/ir_emitter.cpp b/src/frontend/A32/ir_emitter.cpp index e8b21197..83ddff34 100644 --- a/src/frontend/A32/ir_emitter.cpp +++ b/src/frontend/A32/ir_emitter.cpp @@ -183,6 +183,20 @@ void IREmitter::SetExclusive(const IR::U32& vaddr, size_t byte_size) { Inst(Opcode::A32SetExclusive, vaddr, Imm8(u8(byte_size))); } +IR::UAny IREmitter::ReadMemory(size_t bitsize, const IR::U32& vaddr) { + switch (bitsize) { + case 8: + return ReadMemory8(vaddr); + case 16: + return ReadMemory16(vaddr); + case 32: + return ReadMemory32(vaddr); + case 64: + return ReadMemory64(vaddr); + } + ASSERT_FALSE("Invalid bitsize"); +} + IR::U8 IREmitter::ReadMemory8(const IR::U32& vaddr) { return Inst(Opcode::A32ReadMemory8, vaddr); } @@ -202,6 +216,20 @@ IR::U64 IREmitter::ReadMemory64(const IR::U32& vaddr) { return current_location.EFlag() ? ByteReverseDual(value) : value; } +void IREmitter::WriteMemory(size_t bitsize, const IR::U32& vaddr, const IR::UAny& value) { + switch (bitsize) { + case 8: + return WriteMemory8(vaddr, value); + case 16: + return WriteMemory16(vaddr, value); + case 32: + return WriteMemory32(vaddr, value); + case 64: + return WriteMemory64(vaddr, value); + } + ASSERT_FALSE("Invalid bitsize"); +} + void IREmitter::WriteMemory8(const IR::U32& vaddr, const IR::U8& value) { Inst(Opcode::A32WriteMemory8, vaddr, value); } diff --git a/src/frontend/A32/ir_emitter.h b/src/frontend/A32/ir_emitter.h index e6b2192e..ff625842 100644 --- a/src/frontend/A32/ir_emitter.h +++ b/src/frontend/A32/ir_emitter.h @@ -70,10 +70,12 @@ public: void ClearExclusive(); void SetExclusive(const IR::U32& vaddr, size_t byte_size); + IR::UAny ReadMemory(size_t bitsize, const IR::U32& vaddr); IR::U8 ReadMemory8(const IR::U32& vaddr); IR::U16 ReadMemory16(const IR::U32& vaddr); IR::U32 ReadMemory32(const IR::U32& vaddr); IR::U64 ReadMemory64(const IR::U32& vaddr); + void WriteMemory(size_t bitsize, const IR::U32& vaddr, const IR::UAny& value); void WriteMemory8(const IR::U32& vaddr, const IR::U8& value); void WriteMemory16(const IR::U32& vaddr, const IR::U16& value); void WriteMemory32(const IR::U32& vaddr, const IR::U32& value); diff --git a/src/frontend/A32/translate/impl/asimd_load_store_structures.cpp b/src/frontend/A32/translate/impl/asimd_load_store_structures.cpp new file mode 100644 index 00000000..00be7239 --- /dev/null +++ b/src/frontend/A32/translate/impl/asimd_load_store_structures.cpp @@ -0,0 +1,153 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2020 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "frontend/A32/translate/impl/translate_arm.h" + +#include "common/bit_util.h" + +namespace Dynarmic::A32 { + +static ExtReg ToExtRegD(size_t base, bool bit) { + return static_cast(static_cast(ExtReg::D0) + base + (bit ? 16 : 0)); +} + +bool ArmTranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t size, size_t align, Reg m) { + size_t nelem, regs, inc; + switch (type.ZeroExtend()) { + case 0b0111: // VLD1 A1 + nelem = 1; + regs = 1; + inc = 0; + if (Common::Bit<1>(align)) { + return UndefinedInstruction(); + } + break; + case 0b1010: // VLD1 A2 + nelem = 1; + regs = 2; + inc = 0; + if (align == 0b11) { + return UndefinedInstruction(); + } + break; + case 0b0110: // VLD1 A3 + nelem = 1; + regs = 3; + inc = 0; + if (Common::Bit<1>(align)) { + return UndefinedInstruction(); + } + break; + case 0b0010: // VLD1 A4 + nelem = 1; + regs = 4; + inc = 0; + break; + case 0b1000: // VLD2 A1 + nelem = 2; + regs = 1; + inc = 1; + if (size == 0b11 || align == 0b11) { + return UndefinedInstruction(); + } + break; + case 0b1001: // VLD2 A1 + nelem = 2; + regs = 1; + inc = 2; + if (size == 0b11 || align == 0b11) { + return UndefinedInstruction(); + } + break; + case 0b0011: // VLD2 A2 + nelem = 2; + regs = 2; + inc = 2; + if (size == 0b11) { + return UndefinedInstruction(); + } + break; + case 0b0100: // VLD3 + nelem = 3; + regs = 1; + inc = 1; + if (size == 0b11 || Common::Bit<1>(align)) { + return UndefinedInstruction(); + } + break; + case 0b0101: // VLD3 + nelem = 3; + regs = 1; + inc = 2; + if (size == 0b11 || Common::Bit<1>(align)) { + return UndefinedInstruction(); + } + break; + case 0b0000: // VLD4 + nelem = 4; + regs = 1; + inc = 1; + if (size == 0b11) { + return UndefinedInstruction(); + } + break; + case 0b0001: // VLD4 + nelem = 4; + regs = 1; + inc = 2; + if (size == 0b11) { + return UndefinedInstruction(); + } + break; + default: + ASSERT_FALSE("Decode error"); + } + + const ExtReg d = ToExtRegD(Vd, D); + const size_t d_last = RegNumber(d) + inc * (nelem - 1); + if (n == Reg::R15 || d_last + regs > 32) { + return UnpredictableInstruction(); + } + + [[maybe_unused]] const size_t alignment = align == 0 ? 1 : 4 << align; + const size_t ebytes = static_cast(1) << size; + const size_t elements = 8 / ebytes; + + const bool wback = m != Reg::R15; + const bool register_index = m != Reg::R15 && m != Reg::R13; + + for (size_t r = 0; r < regs; r++) { + for (size_t i = 0; i < nelem; i++) { + const ExtReg ext_reg = d + i * inc + r; + ir.SetExtendedRegister(ext_reg, ir.Imm64(0)); + } + } + + IR::U32 address = ir.GetRegister(n); + for (size_t r = 0; r < regs; r++) { + for (size_t e = 0; e < elements; e++) { + for (size_t i = 0; i < nelem; i++) { + const ExtReg ext_reg = d + i * inc + r; + const IR::U64 element = ir.ZeroExtendToLong(ir.ReadMemory(ebytes * 8, address)); + const IR::U64 shifted_element = ir.LogicalShiftLeft(element, ir.Imm8(static_cast(e * ebytes * 8))); + ir.SetExtendedRegister(ext_reg, ir.Or(ir.GetExtendedRegister(ext_reg), shifted_element)); + + address = ir.Add(address, ir.Imm32(static_cast(ebytes))); + } + } + } + + if (wback) { + if (register_index) { + ir.SetRegister(n, ir.Add(ir.GetRegister(n), ir.GetRegister(m))); + } else { + ir.SetRegister(n, ir.Add(ir.GetRegister(n), ir.Imm32(static_cast(8 * nelem * regs)))); + } + } + + return true; +} + +} // namespace Dynarmic::A32 diff --git a/src/frontend/A32/translate/impl/translate_arm.h b/src/frontend/A32/translate/impl/translate_arm.h index 6100fb42..5851a3fa 100644 --- a/src/frontend/A32/translate/impl/translate_arm.h +++ b/src/frontend/A32/translate/impl/translate_arm.h @@ -428,6 +428,9 @@ struct ArmTranslatorVisitor final { bool vfp_VSTM_a2(Cond cond, bool p, bool u, bool D, bool w, Reg n, size_t Vd, Imm<8> imm8); bool vfp_VLDM_a1(Cond cond, bool p, bool u, bool D, bool w, Reg n, size_t Vd, Imm<8> imm8); bool vfp_VLDM_a2(Cond cond, bool p, bool u, bool D, bool w, Reg n, size_t Vd, Imm<8> imm8); + + // Advanced SIMD load/store structures + bool v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t sz, size_t align, Reg m); }; } // namespace Dynarmic::A32 diff --git a/src/frontend/A32/translate/translate_arm.cpp b/src/frontend/A32/translate/translate_arm.cpp index caff8703..7239042b 100644 --- a/src/frontend/A32/translate/translate_arm.cpp +++ b/src/frontend/A32/translate/translate_arm.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "frontend/A32/decoder/arm.h" +#include "frontend/A32/decoder/asimd.h" #include "frontend/A32/decoder/vfp.h" #include "frontend/A32/location_descriptor.h" #include "frontend/A32/translate/impl/translate_arm.h" @@ -41,6 +42,8 @@ IR::Block TranslateArm(LocationDescriptor descriptor, MemoryReadCodeFuncType mem if (const auto vfp_decoder = DecodeVFP(arm_instruction)) { should_continue = vfp_decoder->get().call(visitor, arm_instruction); + } else if (const auto asimd_decoder = DecodeASIMD(arm_instruction)) { + should_continue = asimd_decoder->get().call(visitor, arm_instruction); } else if (const auto decoder = DecodeArm(arm_instruction)) { should_continue = decoder->get().call(visitor, arm_instruction); } else { @@ -80,6 +83,8 @@ bool TranslateSingleArmInstruction(IR::Block& block, LocationDescriptor descript bool should_continue = true; if (const auto vfp_decoder = DecodeVFP(arm_instruction)) { should_continue = vfp_decoder->get().call(visitor, arm_instruction); + } else if (const auto asimd_decoder = DecodeASIMD(arm_instruction)) { + should_continue = asimd_decoder->get().call(visitor, arm_instruction); } else if (const auto decoder = DecodeArm(arm_instruction)) { should_continue = decoder->get().call(visitor, arm_instruction); } else { diff --git a/tests/A32/fuzz_arm.cpp b/tests/A32/fuzz_arm.cpp index 928f8706..c8990618 100644 --- a/tests/A32/fuzz_arm.cpp +++ b/tests/A32/fuzz_arm.cpp @@ -77,6 +77,7 @@ u32 GenRandomInst(u32 pc, bool is_last_inst) { const std::vector> list { #define INST(fn, name, bitstring) {#fn, bitstring}, #include "frontend/A32/decoder/arm.inc" +#include "frontend/A32/decoder/asimd.inc" #include "frontend/A32/decoder/vfp.inc" #undef INST };