A64: Implement load/store single structure instructions

Implements LD{1, 2, 3, 4}, LD{1, 2, 3, 4}R, and ST{1, 2, 3, 4} single
structure variants.
This commit is contained in:
Lioncash 2018-07-04 14:05:53 -04:00 committed by MerryMage
parent b6e223fc58
commit 593eca7fb1
4 changed files with 256 additions and 40 deletions

View file

@ -100,6 +100,7 @@ add_library(dynarmic
frontend/A64/translate/impl/load_store_register_pair.cpp
frontend/A64/translate/impl/load_store_register_register_offset.cpp
frontend/A64/translate/impl/load_store_register_unprivileged.cpp
frontend/A64/translate/impl/load_store_single_structure.cpp
frontend/A64/translate/impl/move_wide.cpp
frontend/A64/translate/impl/simd_aes.cpp
frontend/A64/translate/impl/simd_copy.cpp

View file

@ -109,30 +109,30 @@ INST(LDx_mult_1, "LDx (multiple structures)", "0Q001
INST(LDx_mult_2, "LDx (multiple structures)", "0Q001100110mmmmmoooozznnnnnttttt")
// Loads and stores - Advanced SIMD Load/Store single structures
//INST(ST1_sngl_1, "ST1 (single structure)", "0Q00110100000000--0Szznnnnnttttt")
//INST(ST1_sngl_2, "ST1 (single structure)", "0Q001101100mmmmm--0Szznnnnnttttt")
//INST(ST3_sngl_1, "ST3 (single structure)", "0Q00110100000000--1Szznnnnnttttt")
//INST(ST3_sngl_2, "ST3 (single structure)", "0Q001101100mmmmm--1Szznnnnnttttt")
//INST(ST2_sngl_1, "ST2 (single structure)", "0Q00110100100000--0Szznnnnnttttt")
//INST(ST2_sngl_2, "ST2 (single structure)", "0Q001101101mmmmm--0Szznnnnnttttt")
//INST(ST4_sngl_1, "ST4 (single structure)", "0Q00110100100000--1Szznnnnnttttt")
//INST(ST4_sngl_2, "ST4 (single structure)", "0Q001101101mmmmm--1Szznnnnnttttt")
//INST(LD1_sngl_1, "LD1 (single structure)", "0Q00110101000000--0Szznnnnnttttt")
//INST(LD1_sngl_2, "LD1 (single structure)", "0Q001101110mmmmm--0Szznnnnnttttt")
//INST(LD3_sngl_1, "LD3 (single structure)", "0Q00110101000000--1Szznnnnnttttt")
//INST(LD3_sngl_2, "LD3 (single structure)", "0Q001101110mmmmm--1Szznnnnnttttt")
//INST(LD1R_1, "LD1R", "0Q001101010000001100zznnnnnttttt")
//INST(LD1R_2, "LD1R", "0Q001101110mmmmm1100zznnnnnttttt")
//INST(LD3R_1, "LD3R", "0Q001101010000001110zznnnnnttttt")
//INST(LD3R_2, "LD3R", "0Q001101110mmmmm1110zznnnnnttttt")
//INST(LD2_sngl_1, "LD2 (single structure)", "0Q00110101100000--0Szznnnnnttttt")
//INST(LD2_sngl_2, "LD2 (single structure)", "0Q001101111mmmmm--0Szznnnnnttttt")
//INST(LD4_sngl_1, "LD4 (single structure)", "0Q00110101100000--1Szznnnnnttttt")
//INST(LD4_sngl_2, "LD4 (single structure)", "0Q001101111mmmmm--1Szznnnnnttttt")
//INST(LD2R_1, "LD2R", "0Q001101011000001100zznnnnnttttt")
//INST(LD2R_2, "LD2R", "0Q001101111mmmmm1100zznnnnnttttt")
//INST(LD4R_1, "LD4R", "0Q001101011000001110zznnnnnttttt")
//INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt")
INST(ST1_sngl_1, "ST1 (single structure)", "0Q00110100000000oo0Szznnnnnttttt")
INST(ST1_sngl_2, "ST1 (single structure)", "0Q001101100mmmmmoo0Szznnnnnttttt")
INST(ST3_sngl_1, "ST3 (single structure)", "0Q00110100000000oo1Szznnnnnttttt")
INST(ST3_sngl_2, "ST3 (single structure)", "0Q001101100mmmmmoo1Szznnnnnttttt")
INST(ST2_sngl_1, "ST2 (single structure)", "0Q00110100100000oo0Szznnnnnttttt")
INST(ST2_sngl_2, "ST2 (single structure)", "0Q001101101mmmmmoo0Szznnnnnttttt")
INST(ST4_sngl_1, "ST4 (single structure)", "0Q00110100100000oo1Szznnnnnttttt")
INST(ST4_sngl_2, "ST4 (single structure)", "0Q001101101mmmmmoo1Szznnnnnttttt")
INST(LD1_sngl_1, "LD1 (single structure)", "0Q00110101000000oo0Szznnnnnttttt")
INST(LD1_sngl_2, "LD1 (single structure)", "0Q001101110mmmmmoo0Szznnnnnttttt")
INST(LD3_sngl_1, "LD3 (single structure)", "0Q00110101000000oo1Szznnnnnttttt")
INST(LD3_sngl_2, "LD3 (single structure)", "0Q001101110mmmmmoo1Szznnnnnttttt")
INST(LD1R_1, "LD1R", "0Q001101010000001100zznnnnnttttt")
INST(LD1R_2, "LD1R", "0Q001101110mmmmm1100zznnnnnttttt")
INST(LD3R_1, "LD3R", "0Q001101010000001110zznnnnnttttt")
INST(LD3R_2, "LD3R", "0Q001101110mmmmm1110zznnnnnttttt")
INST(LD2_sngl_1, "LD2 (single structure)", "0Q00110101100000oo0Szznnnnnttttt")
INST(LD2_sngl_2, "LD2 (single structure)", "0Q001101111mmmmmoo0Szznnnnnttttt")
INST(LD4_sngl_1, "LD4 (single structure)", "0Q00110101100000oo1Szznnnnnttttt")
INST(LD4_sngl_2, "LD4 (single structure)", "0Q001101111mmmmmoo1Szznnnnnttttt")
INST(LD2R_1, "LD2R", "0Q001101011000001100zznnnnnttttt")
INST(LD2R_2, "LD2R", "0Q001101111mmmmm1100zznnnnnttttt")
INST(LD4R_1, "LD4R", "0Q001101011000001110zznnnnnttttt")
INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt")
// Loads and stores - Load/Store Exclusive
INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt")

View file

@ -182,26 +182,26 @@ struct TranslatorVisitor final {
bool LDx_mult_2(bool Q, Reg Rm, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt);
// Loads and stores - Advanced SIMD Load/Store single structures
bool ST1_sngl_1(bool Q, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST1_sngl_2(bool Q, Reg Rm, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST3_sngl_1(bool Q, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST3_sngl_2(bool Q, Reg Rm, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST2_sngl_1(bool Q, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST2_sngl_2(bool Q, Reg Rm, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST4_sngl_1(bool Q, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST4_sngl_2(bool Q, Reg Rm, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD1_sngl_1(bool Q, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD1_sngl_2(bool Q, Reg Rm, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD3_sngl_1(bool Q, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD3_sngl_2(bool Q, Reg Rm, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST1_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST1_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST3_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST3_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST2_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST2_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST4_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool ST4_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD1_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD1_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD3_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD3_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD1R_1(bool Q, Imm<2> size, Reg Rn, Vec Vt);
bool LD1R_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Vec Vt);
bool LD3R_1(bool Q, Imm<2> size, Reg Rn, Vec Vt);
bool LD3R_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Vec Vt);
bool LD2_sngl_1(bool Q, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD2_sngl_2(bool Q, Reg Rm, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD4_sngl_1(bool Q, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD4_sngl_2(bool Q, Reg Rm, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD2_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD2_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD4_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD4_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt);
bool LD2R_1(bool Q, Imm<2> size, Reg Rn, Vec Vt);
bool LD2R_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Vec Vt);
bool LD4R_1(bool Q, Imm<2> size, Reg Rn, Vec Vt);

View file

@ -0,0 +1,215 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <boost/optional.hpp>
#include "frontend/A64/translate/impl/impl.h"
namespace Dynarmic::A64 {
static bool SharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, bool wback, MemOp memop,
bool Q, bool S, bool R, bool replicate, boost::optional<Reg> Rm,
Imm<3> opcode, Imm<2> size, Reg Rn, Vec Vt) {
const size_t selem = (opcode.Bit<0>() << 1 | u32{R}) + 1;
size_t scale = opcode.Bits<1, 2>();
size_t index = 0;
switch (scale) {
case 0:
index = Q << 3 | S << 2 | size.ZeroExtend();
break;
case 1:
if (size.Bit<0>()) {
return tv.UnallocatedEncoding();
}
index = Q << 2 | S << 1 | u32{size.Bit<1>()};
break;
case 2:
if (size.Bit<1>()) {
return tv.UnallocatedEncoding();
}
if (size.Bit<0>()) {
if (S) {
return tv.UnallocatedEncoding();
}
index = Q;
scale = 3;
} else {
index = Q << 1 | u32{S};
}
break;
case 3:
if (memop == MemOp::STORE || S) {
return tv.UnallocatedEncoding();
}
scale = size.ZeroExtend();
break;
}
const size_t datasize = Q ? 128 : 64;
const size_t esize = 8 << scale;
const size_t ebytes = esize / 8;
IR::U64 address;
if (Rn == Reg::SP)
// TODO: Check SP Alignment
address = tv.SP(64);
else
address = tv.X(64, Rn);
IR::U64 offs = ir.Imm64(0);
if (replicate) {
for (size_t s = 0; s < selem; s++) {
const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32);
const IR::UAnyU128 element = tv.Mem(ir.Add(address, offs), ebytes, AccType::VEC);
const IR::U128 broadcasted_element = ir.VectorBroadcast(esize, element);
tv.V(datasize, tt, broadcasted_element);
offs = ir.Add(offs, ir.Imm64(ebytes));
}
} else {
for (size_t s = 0; s < selem; s++) {
const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32);
const IR::U128 rval = tv.V(128, tt);
if (memop == MemOp::LOAD) {
const IR::UAny elem = tv.Mem(ir.Add(address, offs), ebytes, AccType::VEC);
const IR::U128 vec = ir.VectorSetElement(esize, rval, index, elem);
tv.V(128, tt, vec);
} else {
const IR::UAny elem = ir.VectorGetElement(esize, rval, index);
tv.Mem(ir.Add(address, offs), ebytes, AccType::VEC, elem);
}
offs = ir.Add(offs, ir.Imm64(ebytes));
}
}
if (wback) {
if (*Rm != Reg::SP)
offs = tv.X(64, *Rm);
if (Rn == Reg::SP)
tv.SP(64, ir.Add(address, offs));
else
tv.X(64, Rn, ir.Add(address, offs));
}
return true;
}
bool TranslatorVisitor::LD1_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::LOAD, Q, S, false, false, {},
Imm<3>{upper_opcode.ZeroExtend() << 1}, size, Rn, Vt);
}
bool TranslatorVisitor::LD1_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::LOAD, Q, S, false, false, Rm,
Imm<3>{upper_opcode.ZeroExtend() << 1}, size, Rn, Vt);
}
bool TranslatorVisitor::LD1R_1(bool Q, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::LOAD, Q, false, false, true, {}, Imm<3>{0b110}, size, Rn, Vt);
}
bool TranslatorVisitor::LD1R_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::LOAD, Q, false, false, true, Rm, Imm<3>{0b110}, size, Rn, Vt);
}
bool TranslatorVisitor::LD2_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::LOAD, Q, S, true, false, {},
Imm<3>{upper_opcode.ZeroExtend() << 1}, size, Rn, Vt);
}
bool TranslatorVisitor::LD2_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::LOAD, Q, S, true, false, Rm,
Imm<3>{upper_opcode.ZeroExtend() << 1}, size, Rn, Vt);
}
bool TranslatorVisitor::LD2R_1(bool Q, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::LOAD, Q, false, true, true, {}, Imm<3>{0b110}, size, Rn, Vt);
}
bool TranslatorVisitor::LD2R_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::LOAD, Q, false, true, true, Rm, Imm<3>{0b110}, size, Rn, Vt);
}
bool TranslatorVisitor::LD3_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::LOAD, Q, S, false, false, {},
Imm<3>{(upper_opcode.ZeroExtend() << 1) | 1}, size, Rn, Vt);
}
bool TranslatorVisitor::LD3_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::LOAD, Q, S, false, false, Rm,
Imm<3>{(upper_opcode.ZeroExtend() << 1) | 1}, size, Rn, Vt);
}
bool TranslatorVisitor::LD3R_1(bool Q, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::LOAD, Q, false, false, true, {}, Imm<3>{0b111}, size, Rn, Vt);
}
bool TranslatorVisitor::LD3R_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::LOAD, Q, false, false, true, Rm, Imm<3>{0b111}, size, Rn, Vt);
}
bool TranslatorVisitor::LD4_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::LOAD, Q, S, true, false, {},
Imm<3>{(upper_opcode.ZeroExtend() << 1) | 1}, size, Rn, Vt);
}
bool TranslatorVisitor::LD4_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::LOAD, Q, S, true, false, Rm,
Imm<3>{(upper_opcode.ZeroExtend() << 1) | 1}, size, Rn, Vt);
}
bool TranslatorVisitor::LD4R_1(bool Q, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::LOAD, Q, false, true, true, {}, Imm<3>{0b111}, size, Rn, Vt);
}
bool TranslatorVisitor::LD4R_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::LOAD, Q, false, true, true, Rm, Imm<3>{0b111}, size, Rn, Vt);
}
bool TranslatorVisitor::ST1_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::STORE, Q, S, false, false, {},
Imm<3>{upper_opcode.ZeroExtend() << 1}, size, Rn, Vt);
}
bool TranslatorVisitor::ST1_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::STORE, Q, S, false, false, Rm,
Imm<3>{upper_opcode.ZeroExtend() << 1}, size, Rn, Vt);
}
bool TranslatorVisitor::ST2_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::STORE, Q, S, true, false, {},
Imm<3>{upper_opcode.ZeroExtend() << 1}, size, Rn, Vt);
}
bool TranslatorVisitor::ST2_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::STORE, Q, S, true, false, Rm,
Imm<3>{upper_opcode.ZeroExtend() << 1}, size, Rn, Vt);
}
bool TranslatorVisitor::ST3_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::STORE, Q, S, false, false, {},
Imm<3>{(upper_opcode.ZeroExtend() << 1) | 1}, size, Rn, Vt);
}
bool TranslatorVisitor::ST3_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::STORE, Q, S, false, false, Rm,
Imm<3>{(upper_opcode.ZeroExtend() << 1) | 1}, size, Rn, Vt);
}
bool TranslatorVisitor::ST4_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, false, MemOp::STORE, Q, S, true, false, {},
Imm<3>{(upper_opcode.ZeroExtend() << 1) | 1}, size, Rn, Vt);
}
bool TranslatorVisitor::ST4_sngl_2(bool Q, Reg Rm, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
return SharedDecodeAndOperation(*this, ir, true, MemOp::STORE, Q, S, true, false, Rm,
Imm<3>{(upper_opcode.ZeroExtend() << 1) | 1}, size, Rn, Vt);
}
} // namespace Dynarmic::A64