diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 1cb37bfa..c03ffd28 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -225,8 +225,8 @@ INST(LDTRSW, "LDTRSW", "10111 // Loads and stores - Load/Store register (register offset) INST(STRx_reg, "STRx (register)", "zz111000o01mmmmmxxxS10nnnnnttttt") INST(LDRx_reg, "LDRx (register)", "zz111000o11mmmmmxxxS10nnnnnttttt") -//INST(STR_reg_fpsimd, "STR (register, SIMD&FP)", "zz111100-01mmmmmxxxS10nnnnnttttt") -//INST(LDR_reg_fpsimd, "LDR (register, SIMD&FP)", "zz111100-11mmmmmxxxS10nnnnnttttt") +INST(STR_reg_fpsimd, "STR (register, SIMD&FP)", "zz111100o01mmmmmxxxS10nnnnnttttt") +INST(LDR_reg_fpsimd, "LDR (register, SIMD&FP)", "zz111100o11mmmmmxxxS10nnnnnttttt") // Loads and stores - Load/Store register (pointer authentication) //INST(LDRA, "LDRAA, LDRAB", "11111000MS1iiiiiiiiiW1nnnnnttttt") diff --git a/src/frontend/A64/translate/impl/impl.cpp b/src/frontend/A64/translate/impl/impl.cpp index 92e7e079..3b373da8 100644 --- a/src/frontend/A64/translate/impl/impl.cpp +++ b/src/frontend/A64/translate/impl/impl.cpp @@ -210,12 +210,18 @@ void TranslatorVisitor::V(size_t bitsize, Vec vec, IR::U128 value) { } } -IR::UAny TranslatorVisitor::V_scalar(size_t bitsize, Vec vec) { +IR::UAnyU128 TranslatorVisitor::V_scalar(size_t bitsize, Vec vec) { + if (bitsize == 128) { + return V(128, vec); + } // TODO: Optimize return ir.VectorGetElement(bitsize, ir.GetQ(vec), 0); } -void TranslatorVisitor::V_scalar(size_t /*bitsize*/, Vec vec, IR::UAny value) { +void TranslatorVisitor::V_scalar(size_t bitsize, Vec vec, IR::UAnyU128 value) { + if (bitsize == 128) { + return V(128, vec, value); + } // TODO: Optimize ir.SetQ(vec, ir.ZeroExtendToQuad(value)); } diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index b46b660d..8419d095 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -52,8 +52,8 @@ struct TranslatorVisitor final { IR::U128 V(size_t bitsize, Vec vec); void V(size_t bitsize, Vec vec, IR::U128 value); - IR::UAny V_scalar(size_t bitsize, Vec vec); - void V_scalar(size_t bitsize, Vec vec, IR::UAny value); + IR::UAnyU128 V_scalar(size_t bitsize, Vec vec); + void V_scalar(size_t bitsize, Vec vec, IR::UAnyU128 value); IR::U128 Vpart(size_t bitsize, Vec vec, size_t part); void Vpart(size_t bitsize, Vec vec, size_t part, IR::U128 value); @@ -287,8 +287,8 @@ struct TranslatorVisitor final { // Loads and stores - Load/Store register (register offset) bool STRx_reg(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Reg Rt); bool LDRx_reg(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Reg Rt); - bool STR_reg_fpsimd(Imm<2> size, Reg Rm, Imm<3> option, bool S, Reg Rn, Vec Vt); - bool LDR_reg_fpsimd(Imm<2> size, Reg Rm, Imm<3> option, bool S, Reg Rn, Vec Vt); + bool STR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Vec Vt); + bool LDR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Vec Vt); // Loads and stores - Load/Store register (pointer authentication) bool LDRA(bool M, bool S, Imm<9> imm9, bool W, Reg Rn, Reg Rt); diff --git a/src/frontend/A64/translate/impl/load_store_register_register_offset.cpp b/src/frontend/A64/translate/impl/load_store_register_register_offset.cpp index 08742c58..e99d9388 100644 --- a/src/frontend/A64/translate/impl/load_store_register_register_offset.cpp +++ b/src/frontend/A64/translate/impl/load_store_register_register_offset.cpp @@ -8,7 +8,7 @@ namespace Dynarmic::A64 { -static bool SharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, size_t scale, u8 shift, Imm<2> size, Imm<1> opc_1, Imm<1> opc_0, Reg Rm, Imm<3> option, Reg Rn, Reg Rt) { +static bool RegSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, size_t scale, u8 shift, Imm<2> size, Imm<1> opc_1, Imm<1> opc_0, Reg Rm, Imm<3> option, Reg Rn, Reg Rt) { // Shared Decode const AccType acctype = AccType::NORMAL; @@ -81,7 +81,7 @@ bool TranslatorVisitor::STRx_reg(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> optio if (!option.Bit<1>()) { return UnallocatedEncoding(); } - return SharedDecodeAndOperation(*this, ir, scale, shift, size, opc_1, opc_0, Rm, option, Rn, Rt); + return RegSharedDecodeAndOperation(*this, ir, scale, shift, size, opc_1, opc_0, Rm, option, Rn, Rt); } bool TranslatorVisitor::LDRx_reg(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Reg Rt) { @@ -91,7 +91,71 @@ bool TranslatorVisitor::LDRx_reg(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> optio if (!option.Bit<1>()) { return UnallocatedEncoding(); } - return SharedDecodeAndOperation(*this, ir, scale, shift, size, opc_1, opc_0, Rm, option, Rn, Rt); + return RegSharedDecodeAndOperation(*this, ir, scale, shift, size, opc_1, opc_0, Rm, option, Rn, Rt); +} + +static bool VecSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, size_t scale, u8 shift, Imm<1> opc_0, Reg Rm, Imm<3> option, Reg Rn, Vec Vt) { + // Shared Decode + + const AccType acctype = AccType::VEC; + const MemOp memop = opc_0 == 1 ? MemOp::LOAD : MemOp::STORE; + const size_t datasize = 8 << scale; + + // Operation + + const IR::U64 offset = tv.ExtendReg(64, Rm, option, shift); + + IR::U64 address; + if (Rn == Reg::SP) { + // TODO: Check SP alignment + address = tv.SP(64); + } else { + address = tv.X(64, Rn); + } + address = ir.Add(address, offset); + + switch (memop) { + case MemOp::STORE: { + const IR::UAnyU128 data = tv.V_scalar(datasize, Vt); + tv.Mem(address, datasize / 8, acctype, data); + break; + } + case MemOp::LOAD: { + const IR::UAnyU128 data = tv.Mem(address, datasize / 8, acctype); + tv.V_scalar(datasize, Vt, data); + break; + } + default: + UNREACHABLE(); + } + + return true; +} + +bool TranslatorVisitor::STR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Vec Vt) { + const Imm<1> opc_0{0}; + const size_t scale = concatenate(opc_1, size).ZeroExtend(); + if (scale > 4) { + return UnallocatedEncoding(); + } + const u8 shift = S ? static_cast(scale) : 0; + if (!option.Bit<1>()) { + return UnallocatedEncoding(); + } + return VecSharedDecodeAndOperation(*this, ir, scale, shift, opc_0, Rm, option, Rn, Vt); +} + +bool TranslatorVisitor::LDR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Vec Vt) { + const Imm<1> opc_0{1}; + const size_t scale = concatenate(opc_1, size).ZeroExtend(); + if (scale > 4) { + return UnallocatedEncoding(); + } + const u8 shift = S ? static_cast(scale) : 0; + if (!option.Bit<1>()) { + return UnallocatedEncoding(); + } + return VecSharedDecodeAndOperation(*this, ir, scale, shift, opc_0, Rm, option, Rn, Vt); } } // namespace Dynarmic::A64