From b7a2c1a7df8201f6a63f5f797770183e99159914 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 13 Feb 2018 00:19:04 +0000 Subject: [PATCH 1/3] A64: Implement STXRB, STXRH, STXR, STLXRB, STLXRH, STLXR, LDXRB, LDXRH, LDXR, LDAXRB, LDAXRH, LDAXR --- src/backend_x64/a64_emit_x64.cpp | 83 +++++++++++++++++-- src/backend_x64/a64_emit_x64.h | 1 + src/backend_x64/a64_jitstate.h | 5 ++ src/frontend/A64/decoder/a64.inc | 8 +- src/frontend/A64/ir_emitter.cpp | 29 +++++++ src/frontend/A64/ir_emitter.h | 7 ++ src/frontend/A64/translate/impl/impl.cpp | 18 ++++ src/frontend/A64/translate/impl/impl.h | 3 +- .../translate/impl/load_store_exclusive.cpp | 75 +++++++++++++++++ src/frontend/ir/microinstruction.cpp | 7 ++ src/frontend/ir/opcodes.inc | 7 ++ 11 files changed, 233 insertions(+), 10 deletions(-) diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index c253e6c9..ab920c98 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -224,7 +224,7 @@ void A64EmitX64::GenFastmemFallbacks() { ABI_PopCallerSaveRegistersAndAdjustStack(code); code.ret(); - if (vaddr_idx == value_idx || value_idx == 4 || value_idx == 15) { + if (value_idx == 4 || value_idx == 15) { continue; } @@ -249,13 +249,18 @@ void A64EmitX64::GenFastmemFallbacks() { ABI_PushCallerSaveRegistersAndAdjustStack(code); if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); - } else { - if (vaddr_idx != code.ABI_PARAM2.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - } + } else if (vaddr_idx == code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); if (value_idx != code.ABI_PARAM3.getIdx()) { code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); } + } else { + if (value_idx != code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); + } + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } } callback.EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); @@ -480,6 +485,19 @@ void A64EmitX64::EmitA64GetTPIDRRO(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); +} + +void A64EmitX64::EmitA64SetExclusive(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + Xbyak::Reg32 address = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(dword[r15 + offsetof(A64JitState, exclusive_address)], address); +} + static Xbyak::RegExp EmitVAddrLookup(const A64::UserConfig& conf, BlockOfCode& code, A64EmitContext& ctx, Xbyak::Label& abort, Xbyak::Reg64 vaddr, boost::optional arg_scratch = {}) { constexpr size_t PAGE_BITS = 12; constexpr size_t PAGE_SIZE = 1 << PAGE_BITS; @@ -722,6 +740,61 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { code.CallFunction(memory_write_128); } +void A64EmitX64::EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize, Xbyak::Reg64 vaddr, size_t value_idx) { + Xbyak::Label end; + Xbyak::Reg32 passed = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + + code.mov(passed, u32(1)); + code.cmp(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.je(end); + code.mov(tmp, vaddr); + code.xor_(tmp, dword[r15 + offsetof(A64JitState, exclusive_address)]); + code.test(tmp, A64JitState::RESERVATION_GRANULE_MASK); + code.jne(end); + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.call(write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]); + code.xor_(passed, passed); + code.L(end); + + ctx.reg_alloc.DefineValue(inst, passed); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + EmitExclusiveWrite(ctx, inst, 8, vaddr, value.getIdx()); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + EmitExclusiveWrite(ctx, inst, 16, vaddr, value.getIdx()); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + EmitExclusiveWrite(ctx, inst, 32, vaddr, value.getIdx()); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + EmitExclusiveWrite(ctx, inst, 64, vaddr, value.getIdx()); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[1]); + EmitExclusiveWrite(ctx, inst, 128, vaddr, value.getIdx()); +} + void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) { code.SwitchMxcsrOnExit(); DEVIRT(conf.callbacks, &A64::UserCallbacks::InterpreterFallback).EmitCall(code, [&](RegList param) { diff --git a/src/backend_x64/a64_emit_x64.h b/src/backend_x64/a64_emit_x64.h index 415958e5..9ba3e5d4 100644 --- a/src/backend_x64/a64_emit_x64.h +++ b/src/backend_x64/a64_emit_x64.h @@ -57,6 +57,7 @@ protected: void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); + void EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize, Xbyak::Reg64 vaddr, size_t value_idx); // Microinstruction emitters #define OPCODE(...) diff --git a/src/backend_x64/a64_jitstate.h b/src/backend_x64/a64_jitstate.h index 16297191..84b79e19 100644 --- a/src/backend_x64/a64_jitstate.h +++ b/src/backend_x64/a64_jitstate.h @@ -56,6 +56,11 @@ struct A64JitState { bool halt_requested = false; bool check_bit = false; + // Exclusive state + static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8; + u32 exclusive_state = 0; + u32 exclusive_address = 0; + static constexpr size_t RSBSize = 8; // MUST be a power of 2. static constexpr size_t RSBPtrMask = RSBSize - 1; u32 rsb_ptr = 0; diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index f3fdbf4f..d6345a63 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -135,12 +135,12 @@ INST(LDx_mult_2, "LDx (multiple structures)", "0Q001 //INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt") // Loads and stores - Load/Store Exclusive -//INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") -//INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") +INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") +INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") //INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") //INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") -//INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") -//INST(LDAXRB, "LDAXRB", "zz00100001011111111111nnnnnttttt") +INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") +INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt") //INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") //INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt") diff --git a/src/frontend/A64/ir_emitter.cpp b/src/frontend/A64/ir_emitter.cpp index 659ff131..204d76f1 100644 --- a/src/frontend/A64/ir_emitter.cpp +++ b/src/frontend/A64/ir_emitter.cpp @@ -61,6 +61,15 @@ IR::U64 IREmitter::GetTPIDRRO() { return Inst(Opcode::A64GetTPIDRRO); } +void IREmitter::ClearExclusive() { + Inst(Opcode::A64ClearExclusive); +} + +void IREmitter::SetExclusive(const IR::U64& vaddr, size_t byte_size) { + ASSERT(byte_size == 1 || byte_size == 2 || byte_size == 4 || byte_size == 8 || byte_size == 16); + Inst(Opcode::A64SetExclusive, vaddr, Imm8(u8(byte_size))); +} + IR::U8 IREmitter::ReadMemory8(const IR::U64& vaddr) { return Inst(Opcode::A64ReadMemory8, vaddr); } @@ -101,6 +110,26 @@ void IREmitter::WriteMemory128(const IR::U64& vaddr, const IR::U128& value) { Inst(Opcode::A64WriteMemory128, vaddr, value); } +IR::U32 IREmitter::ExclusiveWriteMemory8(const IR::U64& vaddr, const IR::U8& value) { + return Inst(Opcode::A64ExclusiveWriteMemory8, vaddr, value); +} + +IR::U32 IREmitter::ExclusiveWriteMemory16(const IR::U64& vaddr, const IR::U16& value) { + return Inst(Opcode::A64ExclusiveWriteMemory16, vaddr, value); +} + +IR::U32 IREmitter::ExclusiveWriteMemory32(const IR::U64& vaddr, const IR::U32& value) { + return Inst(Opcode::A64ExclusiveWriteMemory32, vaddr, value); +} + +IR::U32 IREmitter::ExclusiveWriteMemory64(const IR::U64& vaddr, const IR::U64& value) { + return Inst(Opcode::A64ExclusiveWriteMemory64, vaddr, value); +} + +IR::U32 IREmitter::ExclusiveWriteMemory128(const IR::U64& vaddr, const IR::U128& value) { + return Inst(Opcode::A64ExclusiveWriteMemory128, vaddr, value); +} + IR::U32 IREmitter::GetW(Reg reg) { if (reg == Reg::ZR) return Imm32(0); diff --git a/src/frontend/A64/ir_emitter.h b/src/frontend/A64/ir_emitter.h index 68784c6e..24d05f1b 100644 --- a/src/frontend/A64/ir_emitter.h +++ b/src/frontend/A64/ir_emitter.h @@ -47,6 +47,8 @@ public: IR::U32 GetDCZID(); IR::U64 GetTPIDRRO(); + void ClearExclusive(); + void SetExclusive(const IR::U64& vaddr, size_t byte_size); IR::U8 ReadMemory8(const IR::U64& vaddr); IR::U16 ReadMemory16(const IR::U64& vaddr); IR::U32 ReadMemory32(const IR::U64& vaddr); @@ -57,6 +59,11 @@ public: void WriteMemory32(const IR::U64& vaddr, const IR::U32& value); void WriteMemory64(const IR::U64& vaddr, const IR::U64& value); void WriteMemory128(const IR::U64& vaddr, const IR::U128& value); + IR::U32 ExclusiveWriteMemory8(const IR::U64& vaddr, const IR::U8& value); + IR::U32 ExclusiveWriteMemory16(const IR::U64& vaddr, const IR::U16& value); + IR::U32 ExclusiveWriteMemory32(const IR::U64& vaddr, const IR::U32& value); + IR::U32 ExclusiveWriteMemory64(const IR::U64& vaddr, const IR::U64& value); + IR::U32 ExclusiveWriteMemory128(const IR::U64& vaddr, const IR::U128& value); IR::U32 GetW(Reg source_reg); IR::U64 GetX(Reg source_reg); diff --git a/src/frontend/A64/translate/impl/impl.cpp b/src/frontend/A64/translate/impl/impl.cpp index bb7440e0..98261947 100644 --- a/src/frontend/A64/translate/impl/impl.cpp +++ b/src/frontend/A64/translate/impl/impl.cpp @@ -308,6 +308,24 @@ void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype* } } +IR::U32 TranslatorVisitor::ExclusiveMem(IR::U64 address, size_t bytesize, AccType /*acctype*/, IR::UAnyU128 value) { + switch (bytesize) { + case 1: + return ir.ExclusiveWriteMemory8(address, value); + case 2: + return ir.ExclusiveWriteMemory16(address, value); + case 4: + return ir.ExclusiveWriteMemory32(address, value); + case 8: + return ir.ExclusiveWriteMemory64(address, value); + case 16: + return ir.ExclusiveWriteMemory128(address, value); + default: + ASSERT_MSG(false, "Invalid bytesize parameter {}", bytesize); + return {}; + } +} + IR::U32U64 TranslatorVisitor::SignExtend(IR::UAny value, size_t to_size) { switch (to_size) { case 32: diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index 26aa7659..c9085ffe 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -63,6 +63,7 @@ struct TranslatorVisitor final { IR::UAnyU128 Mem(IR::U64 address, size_t size, AccType acctype); void Mem(IR::U64 address, size_t size, AccType acctype, IR::UAnyU128 value); + IR::U32 ExclusiveMem(IR::U64 address, size_t size, AccType acctype, IR::UAnyU128 value); IR::U32U64 SignExtend(IR::UAny value, size_t to_size); IR::U32U64 ZeroExtend(IR::UAny value, size_t to_size); @@ -211,7 +212,7 @@ struct TranslatorVisitor final { bool STXP(Imm<1> size, Reg Rs, Reg Rt2, Reg Rn, Reg Rt); bool STLXP(Imm<1> size, Reg Rs, Reg Rt2, Reg Rn, Reg Rt); bool LDXR(Imm<2> size, Reg Rn, Reg Rt); - bool LDAXRB(Imm<2> size, Reg Rn, Reg Rt); + bool LDAXR(Imm<2> size, Reg Rn, Reg Rt); bool LDXP(Imm<1> size, Reg Rt2, Reg Rn, Reg Rt); bool LDAXP(Imm<1> size, Reg Rt2, Reg Rn, Reg Rt); bool STLLR(Imm<2> size, Reg Rn, Reg Rt); diff --git a/src/frontend/A64/translate/impl/load_store_exclusive.cpp b/src/frontend/A64/translate/impl/load_store_exclusive.cpp index 70129ebf..3b4563c5 100644 --- a/src/frontend/A64/translate/impl/load_store_exclusive.cpp +++ b/src/frontend/A64/translate/impl/load_store_exclusive.cpp @@ -4,10 +4,85 @@ * General Public License version 2 or any later version. */ +#include + #include "frontend/A64/translate/impl/impl.h" namespace Dynarmic::A64 { +static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, size_t size, bool L, bool o0, boost::optional Rs, Reg Rn, Reg Rt) { + // Shared Decode + + const AccType acctype = o0 ? AccType::ORDERED : AccType::ATOMIC; + const MemOp memop = L ? MemOp::LOAD : MemOp::STORE; + const size_t elsize = 8 << size; + const size_t regsize = elsize == 64 ? 64 : 32; + const size_t datasize = elsize; + + // Operation + + const size_t dbytes = datasize / 8; + + if (memop == MemOp::STORE && *Rs == Rn && Rn != Reg::R31) { + return tv.UnpredictableInstruction(); + } + + IR::U64 address; + if (Rn == Reg::SP) { + // TODO: Check SP Alignment + address = tv.SP(64); + } else { + address = tv.X(64, Rn); + } + + switch (memop) { + case MemOp::STORE: { + IR::UAny data = tv.X(datasize, Rt); + IR::U32 status = tv.ExclusiveMem(address, dbytes, acctype, data); + tv.X(32, *Rs, status); + break; + } + case MemOp::LOAD: { + ir.SetExclusive(address, dbytes); + IR::UAny data = tv.Mem(address, dbytes, acctype); + tv.X(regsize, Rt, tv.ZeroExtend(data, regsize)); + break; + } + default: + UNREACHABLE(); + } + + return true; +} + +bool TranslatorVisitor::STXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) { + const size_t size = sz.ZeroExtend(); + const bool L = 0; + const bool o0 = 0; + return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, Rs, Rn, Rt); +} + +bool TranslatorVisitor::STLXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) { + const size_t size = sz.ZeroExtend(); + const bool L = 0; + const bool o0 = 1; + return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, Rs, Rn, Rt); +} + +bool TranslatorVisitor::LDXR(Imm<2> sz, Reg Rn, Reg Rt) { + const size_t size = sz.ZeroExtend(); + const bool L = 1; + const bool o0 = 0; + return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, {}, Rn, Rt); +} + +bool TranslatorVisitor::LDAXR(Imm<2> sz, Reg Rn, Reg Rt) { + const size_t size = sz.ZeroExtend(); + const bool L = 1; + const bool o0 = 1; + return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, {}, Rn, Rt); +} + static bool OrderedSharedDecodeAndOperation(TranslatorVisitor& tv, size_t size, bool L, bool o0, Reg Rn, Reg Rt) { // Shared Decode diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index dcdf2b68..7ccd2711 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -89,6 +89,11 @@ bool Inst::IsExclusiveMemoryWrite() const { case Opcode::A32ExclusiveWriteMemory16: case Opcode::A32ExclusiveWriteMemory32: case Opcode::A32ExclusiveWriteMemory64: + case Opcode::A64ExclusiveWriteMemory8: + case Opcode::A64ExclusiveWriteMemory16: + case Opcode::A64ExclusiveWriteMemory32: + case Opcode::A64ExclusiveWriteMemory64: + case Opcode::A64ExclusiveWriteMemory128: return true; default: @@ -249,6 +254,8 @@ bool Inst::CausesCPUException() const { bool Inst::AltersExclusiveState() const { return op == Opcode::A32ClearExclusive || op == Opcode::A32SetExclusive || + op == Opcode::A64ClearExclusive || + op == Opcode::A64SetExclusive || IsExclusiveMemoryWrite(); } diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 461f663d..3796fbca 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -325,6 +325,8 @@ A32OPC(ExclusiveWriteMemory32, T::U32, T::U32, T::U32 A32OPC(ExclusiveWriteMemory64, T::U32, T::U32, T::U32, T::U32 ) // A64 Memory access +A64OPC(ClearExclusive, T::Void, ) +A64OPC(SetExclusive, T::Void, T::U64, T::U8 ) A64OPC(ReadMemory8, T::U8, T::U64 ) A64OPC(ReadMemory16, T::U16, T::U64 ) A64OPC(ReadMemory32, T::U32, T::U64 ) @@ -335,6 +337,11 @@ A64OPC(WriteMemory16, T::Void, T::U64, T::U16 A64OPC(WriteMemory32, T::Void, T::U64, T::U32 ) A64OPC(WriteMemory64, T::Void, T::U64, T::U64 ) A64OPC(WriteMemory128, T::Void, T::U64, T::U128 ) +A64OPC(ExclusiveWriteMemory8, T::U32, T::U64, T::U8 ) +A64OPC(ExclusiveWriteMemory16, T::U32, T::U64, T::U16 ) +A64OPC(ExclusiveWriteMemory32, T::U32, T::U64, T::U32 ) +A64OPC(ExclusiveWriteMemory64, T::U32, T::U64, T::U64 ) +A64OPC(ExclusiveWriteMemory128, T::U32, T::U64, T::U128 ) // Coprocessor A32OPC(CoprocInternalOperation, T::Void, T::CoprocInfo ) From 2a6619d59c2c22d92ff8eac6a26b632545d769dd Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 13 Feb 2018 12:23:04 +0000 Subject: [PATCH 2/3] A64: Implement CLREX --- src/frontend/A64/decoder/a64.inc | 2 +- src/frontend/A64/translate/impl/system.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index d6345a63..4ce46a1d 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -61,7 +61,7 @@ INST(SEVL, "SEVL", "11010 //INST(AUTIB_2, "AUTIB, AUTIB1716, AUTIBSP, AUTIBZ, AUTIZB", "1101010100000011001000-111-11111") //INST(ESB, "ESB", "11010101000000110010001000011111") //INST(PSB, "PSB CSYNC", "11010101000000110010001000111111") -//INST(CLREX, "CLREX", "11010101000000110011MMMM01011111") +INST(CLREX, "CLREX", "11010101000000110011MMMM01011111") INST(DSB, "DSB", "11010101000000110011MMMM10011111") INST(DMB, "DMB", "11010101000000110011MMMM10111111") //INST(ISB, "ISB", "11010101000000110011MMMM11011111") diff --git a/src/frontend/A64/translate/impl/system.cpp b/src/frontend/A64/translate/impl/system.cpp index f6ee1bcd..2bfa0f82 100644 --- a/src/frontend/A64/translate/impl/system.cpp +++ b/src/frontend/A64/translate/impl/system.cpp @@ -36,6 +36,11 @@ bool TranslatorVisitor::SEVL() { return true; } +bool TranslatorVisitor::CLREX(Imm<4> /*CRm*/) { + ir.ClearExclusive(); + return true; +} + bool TranslatorVisitor::DSB(Imm<4> /*CRm*/) { ir.DataSynchronizationBarrier(); return true; From 8698f057d06f675e58236c3002b140e06e71dd4c Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 13 Feb 2018 12:50:50 +0000 Subject: [PATCH 3/3] A64: Implement STXP, STLXP, LDXP, LDAXP --- src/backend_x64/emit_x64_data_processing.cpp | 19 +++++ src/frontend/A64/decoder/a64.inc | 8 +- .../translate/impl/load_store_exclusive.cpp | 75 ++++++++++++++++--- src/frontend/ir/ir_emitter.cpp | 4 + src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 1 + 6 files changed, 94 insertions(+), 14 deletions(-) diff --git a/src/backend_x64/emit_x64_data_processing.cpp b/src/backend_x64/emit_x64_data_processing.cpp index 489d60b1..8746ca09 100644 --- a/src/backend_x64/emit_x64_data_processing.cpp +++ b/src/backend_x64/emit_x64_data_processing.cpp @@ -28,6 +28,25 @@ void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, lo); } +void EmitX64::EmitPack2x64To1x128(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 lo = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 hi = ctx.reg_alloc.UseGpr(args[1]); + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.movq(result, lo); + code.pinsrq(result, hi, 1); + } else { + Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + code.movq(result, lo); + code.movq(tmp, hi); + code.punpcklqdq(result, tmp); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + void EmitX64::EmitLeastSignificantWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.DefineValue(inst, args[0]); diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 4ce46a1d..0a44228b 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -137,12 +137,12 @@ INST(LDx_mult_2, "LDx (multiple structures)", "0Q001 // Loads and stores - Load/Store Exclusive INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") -//INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") -//INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") +INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") +INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt") -//INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") -//INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") +INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") +INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt") INST(STLR, "STLRB, STLRH, STLR", "zz00100010011111111111nnnnnttttt") INST(LDLAR, "LDLARB, LDLARH, LDLAR", "zz00100011011111011111nnnnnttttt") diff --git a/src/frontend/A64/translate/impl/load_store_exclusive.cpp b/src/frontend/A64/translate/impl/load_store_exclusive.cpp index 3b4563c5..78316fac 100644 --- a/src/frontend/A64/translate/impl/load_store_exclusive.cpp +++ b/src/frontend/A64/translate/impl/load_store_exclusive.cpp @@ -10,20 +10,24 @@ namespace Dynarmic::A64 { -static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, size_t size, bool L, bool o0, boost::optional Rs, Reg Rn, Reg Rt) { +static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, bool pair, size_t size, bool L, bool o0, boost::optional Rs, boost::optional Rt2, Reg Rn, Reg Rt) { // Shared Decode const AccType acctype = o0 ? AccType::ORDERED : AccType::ATOMIC; const MemOp memop = L ? MemOp::LOAD : MemOp::STORE; const size_t elsize = 8 << size; const size_t regsize = elsize == 64 ? 64 : 32; - const size_t datasize = elsize; + const size_t datasize = pair ? elsize * 2 : elsize; // Operation const size_t dbytes = datasize / 8; - if (memop == MemOp::STORE && *Rs == Rn && Rn != Reg::R31) { + if (memop == MemOp::LOAD && pair && Rt == *Rt2) { + return tv.UnpredictableInstruction(); + } else if (memop == MemOp::STORE && (*Rs == Rt || (pair && *Rs == *Rt2))) { + return tv.UnpredictableInstruction(); + } else if (memop == MemOp::STORE && *Rs == Rn && Rn != Reg::R31) { return tv.UnpredictableInstruction(); } @@ -37,15 +41,30 @@ static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& switch (memop) { case MemOp::STORE: { - IR::UAny data = tv.X(datasize, Rt); + IR::UAnyU128 data; + if (pair && elsize == 64) { + data = ir.Pack2x64To1x128(tv.X(64, Rt), tv.X(64, *Rt2)); + } else if (pair && elsize == 32) { + data = ir.Pack2x32To1x64(tv.X(32, Rt), tv.X(32, *Rt2)); + } else { + data = tv.X(datasize, Rt); + } IR::U32 status = tv.ExclusiveMem(address, dbytes, acctype, data); tv.X(32, *Rs, status); break; } case MemOp::LOAD: { ir.SetExclusive(address, dbytes); - IR::UAny data = tv.Mem(address, dbytes, acctype); - tv.X(regsize, Rt, tv.ZeroExtend(data, regsize)); + IR::UAnyU128 data = tv.Mem(address, dbytes, acctype); + if (pair && elsize == 64) { + tv.X(64, Rt, ir.VectorGetElement(64, data, 0)); + tv.X(64, *Rt2, ir.VectorGetElement(64, data, 1)); + } else if (pair && elsize == 32) { + tv.X(32, Rt, ir.LeastSignificantWord(data)); + tv.X(32, *Rt2, ir.MostSignificantWord(data).result); + } else { + tv.X(regsize, Rt, tv.ZeroExtend(data, regsize)); + } break; } default: @@ -56,31 +75,67 @@ static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& } bool TranslatorVisitor::STXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) { + const bool pair = false; const size_t size = sz.ZeroExtend(); const bool L = 0; const bool o0 = 0; - return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, Rs, Rn, Rt); + return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, Rs, {}, Rn, Rt); } bool TranslatorVisitor::STLXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) { + const bool pair = false; const size_t size = sz.ZeroExtend(); const bool L = 0; const bool o0 = 1; - return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, Rs, Rn, Rt); + return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, Rs, {}, Rn, Rt); +} + +bool TranslatorVisitor::STXP(Imm<1> sz, Reg Rs, Reg Rt2, Reg Rn, Reg Rt) { + const bool pair = true; + const size_t size = concatenate(Imm<1>{1}, sz).ZeroExtend(); + const bool L = 0; + const bool o0 = 0; + return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, Rs, Rt2, Rn, Rt); +} + +bool TranslatorVisitor::STLXP(Imm<1> sz, Reg Rs, Reg Rt2, Reg Rn, Reg Rt) { + const bool pair = true; + const size_t size = concatenate(Imm<1>{1}, sz).ZeroExtend(); + const bool L = 0; + const bool o0 = 1; + return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, Rs, Rt2, Rn, Rt); } bool TranslatorVisitor::LDXR(Imm<2> sz, Reg Rn, Reg Rt) { + const bool pair = false; const size_t size = sz.ZeroExtend(); const bool L = 1; const bool o0 = 0; - return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, {}, Rn, Rt); + return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, {}, {}, Rn, Rt); } bool TranslatorVisitor::LDAXR(Imm<2> sz, Reg Rn, Reg Rt) { + const bool pair = false; const size_t size = sz.ZeroExtend(); const bool L = 1; const bool o0 = 1; - return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, {}, Rn, Rt); + return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, {}, {}, Rn, Rt); +} + +bool TranslatorVisitor::LDXP(Imm<1> sz, Reg Rt2, Reg Rn, Reg Rt) { + const bool pair = true; + const size_t size = concatenate(Imm<1>{1}, sz).ZeroExtend(); + const bool L = 1; + const bool o0 = 0; + return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, {}, Rt2, Rn, Rt); +} + +bool TranslatorVisitor::LDAXP(Imm<1> sz, Reg Rt2, Reg Rn, Reg Rt) { + const bool pair = true; + const size_t size = concatenate(Imm<1>{1}, sz).ZeroExtend(); + const bool L = 1; + const bool o0 = 1; + return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, {}, Rt2, Rn, Rt); } static bool OrderedSharedDecodeAndOperation(TranslatorVisitor& tv, size_t size, bool L, bool o0, Reg Rn, Reg Rt) { diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 4d33f33a..342815b1 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -38,6 +38,10 @@ U64 IREmitter::Pack2x32To1x64(const U32& lo, const U32& hi) { return Inst(Opcode::Pack2x32To1x64, lo, hi); } +U128 IREmitter::Pack2x64To1x128(const U64& lo, const U64& hi) { + return Inst(Opcode::Pack2x64To1x128, lo, hi); +} + U32 IREmitter::LeastSignificantWord(const U64& value) { return Inst(Opcode::LeastSignificantWord, value); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 3fd1f569..1f94edff 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -68,6 +68,7 @@ public: void PushRSB(const LocationDescriptor& return_location); U64 Pack2x32To1x64(const U32& lo, const U32& hi); + U128 Pack2x64To1x128(const U64& lo, const U64& hi); U32 LeastSignificantWord(const U64& value); ResultAndCarry MostSignificantWord(const U64& value); U16 LeastSignificantHalf(U32U64 value); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 3796fbca..21a7003a 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -77,6 +77,7 @@ OPCODE(NZCVFromPackedFlags, T::NZCVFlags, T::U32 // Calculations OPCODE(Pack2x32To1x64, T::U64, T::U32, T::U32 ) +OPCODE(Pack2x64To1x128, T::U128, T::U64, T::U64 ) OPCODE(LeastSignificantWord, T::U32, T::U64 ) OPCODE(MostSignificantWord, T::U32, T::U64 ) OPCODE(LeastSignificantHalf, T::U16, T::U32 )