From 2068658a82eb7fff72db040fd141357bc788dd1e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 27 Feb 2020 13:01:39 -0400 Subject: [PATCH 1/5] A64 Interface: Allow changing processor id. This commit allows the JIT to be used per guest thread and change it's core when the thread is migrated. --- include/dynarmic/A64/a64.h | 2 ++ src/backend/x64/a64_emit_x64.h | 6 +++++- src/backend/x64/a64_interface.cpp | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/dynarmic/A64/a64.h b/include/dynarmic/A64/a64.h index f51f6fad..5a84e4e0 100644 --- a/include/dynarmic/A64/a64.h +++ b/include/dynarmic/A64/a64.h @@ -105,6 +105,8 @@ public: /// Modify PSTATE void SetPstate(std::uint32_t value); + void ChangeProcessorID(std::size_t new_processor); + /// Clears exclusive state for this core. void ClearExclusiveState(); diff --git a/src/backend/x64/a64_emit_x64.h b/src/backend/x64/a64_emit_x64.h index c8fd2566..71dbbf2f 100644 --- a/src/backend/x64/a64_emit_x64.h +++ b/src/backend/x64/a64_emit_x64.h @@ -47,8 +47,12 @@ public: void InvalidateCacheRanges(const boost::icl::interval_set& ranges); + void ChangeProcessorID(size_t value) { + conf.processor_id = value; + } + protected: - const A64::UserConfig conf; + A64::UserConfig conf; A64::Jit* jit_interface; BlockRangeInformation block_ranges; diff --git a/src/backend/x64/a64_interface.cpp b/src/backend/x64/a64_interface.cpp index ee1763a4..2986fcbe 100644 --- a/src/backend/x64/a64_interface.cpp +++ b/src/backend/x64/a64_interface.cpp @@ -185,6 +185,11 @@ public: jit_state.SetPstate(value); } + void ChangeProcessorID(size_t value) { + conf.processor_id = value; + emitter.ChangeProcessorID(value); + } + void ClearExclusiveState() { jit_state.exclusive_state = 0; } @@ -379,6 +384,10 @@ void Jit::SetPstate(u32 value) { impl->SetPstate(value); } +void Jit::ChangeProcessorID(size_t new_processor) { + impl->ChangeProcessorID(new_processor); +} + void Jit::ClearExclusiveState() { impl->ClearExclusiveState(); } From b5d8b24a3c4a19a6f70cd5da37615605804b54f6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 7 Mar 2020 15:07:29 -0400 Subject: [PATCH 2/5] Exclusive Monitor: Allow clearing a single processor. --- include/dynarmic/A64/exclusive_monitor.h | 4 +++- src/backend/x64/a64_exclusive_monitor.cpp | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/dynarmic/A64/exclusive_monitor.h b/include/dynarmic/A64/exclusive_monitor.h index 0e0c3b11..6e2d167b 100644 --- a/include/dynarmic/A64/exclusive_monitor.h +++ b/include/dynarmic/A64/exclusive_monitor.h @@ -46,6 +46,8 @@ public: /// Unmark everything. void Clear(); + /// Unmark processor id + void Clear(size_t processor_id); private: bool CheckAndClear(size_t processor_id, VAddr address, size_t size); @@ -53,7 +55,7 @@ private: void Lock(); void Unlock(); - static constexpr VAddr RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull; + static constexpr VAddr RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFFFull; static constexpr VAddr INVALID_EXCLUSIVE_ADDRESS = 0xDEAD'DEAD'DEAD'DEADull; std::atomic_flag is_locked; std::vector exclusive_addresses; diff --git a/src/backend/x64/a64_exclusive_monitor.cpp b/src/backend/x64/a64_exclusive_monitor.cpp index bd0582a0..0744c4e1 100644 --- a/src/backend/x64/a64_exclusive_monitor.cpp +++ b/src/backend/x64/a64_exclusive_monitor.cpp @@ -60,5 +60,12 @@ void ExclusiveMonitor::Clear() { Unlock(); } +void ExclusiveMonitor::Clear(size_t processor_id) { + Lock(); + exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS; + Unlock(); +} + + } // namespace A64 } // namespace Dynarmic From 97b9d3e05833e0a1378c2ecf97960071f5fa992a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 10 Mar 2020 18:07:44 -0400 Subject: [PATCH 3/5] Exclusive Monitor: Rework exclusive monitor interface. --- include/dynarmic/A64/config.h | 7 + include/dynarmic/A64/exclusive_monitor.h | 34 ++- src/backend/x64/a64_emit_x64.cpp | 266 ++++++++++-------- src/backend/x64/a64_exclusive_monitor.cpp | 17 +- src/backend/x64/a64_jitstate.h | 1 - src/frontend/A64/ir_emitter.cpp | 25 +- src/frontend/A64/ir_emitter.h | 6 +- src/frontend/A64/translate/impl/impl.cpp | 19 +- src/frontend/A64/translate/impl/impl.h | 1 + .../translate/impl/load_store_exclusive.cpp | 3 +- src/frontend/ir/microinstruction.cpp | 18 +- src/frontend/ir/microinstruction.h | 2 + src/frontend/ir/opcodes.inc | 6 +- tests/A64/a64.cpp | 3 - tests/A64/testenv.h | 21 ++ 15 files changed, 278 insertions(+), 151 deletions(-) diff --git a/include/dynarmic/A64/config.h b/include/dynarmic/A64/config.h index 476fcc48..7c7fe3bf 100644 --- a/include/dynarmic/A64/config.h +++ b/include/dynarmic/A64/config.h @@ -83,6 +83,13 @@ struct UserCallbacks { virtual void MemoryWrite64(VAddr vaddr, std::uint64_t value) = 0; virtual void MemoryWrite128(VAddr vaddr, Vector value) = 0; + // Writes through these callbacks may not be aligned. + virtual bool MemoryWriteExclusive8(VAddr vaddr, std::uint8_t value, std::uint8_t expected) = 0; + virtual bool MemoryWriteExclusive16(VAddr vaddr, std::uint16_t value, std::uint16_t expected) = 0; + virtual bool MemoryWriteExclusive32(VAddr vaddr, std::uint32_t value, std::uint32_t expected) = 0; + virtual bool MemoryWriteExclusive64(VAddr vaddr, std::uint64_t value, std::uint64_t expected) = 0; + virtual bool MemoryWriteExclusive128(VAddr vaddr, Vector value, Vector expected) = 0; + // If this callback returns true, the JIT will assume MemoryRead* callbacks will always // return the same value at any point in time for this vaddr. The JIT may use this information // in optimizations. diff --git a/include/dynarmic/A64/exclusive_monitor.h b/include/dynarmic/A64/exclusive_monitor.h index 6e2d167b..3033d541 100644 --- a/include/dynarmic/A64/exclusive_monitor.h +++ b/include/dynarmic/A64/exclusive_monitor.h @@ -6,14 +6,17 @@ #pragma once #include +#include #include #include +#include #include namespace Dynarmic { namespace A64 { using VAddr = std::uint64_t; +using Vector = std::array; class ExclusiveMonitor { public: @@ -26,31 +29,45 @@ public: /// Marks a region containing [address, address+size) to be exclusive to /// processor processor_id. - void Mark(size_t processor_id, VAddr address, size_t size); + template + T ReadAndMark(size_t processor_id, VAddr address, Function op) { + static_assert(std::is_trivially_copyable_v); + const VAddr masked_address = address & RESERVATION_GRANULE_MASK; + + Lock(); + exclusive_addresses[processor_id] = masked_address; + const T value = op(); + std::memcpy(exclusive_values[processor_id].data(), &value, sizeof(T)); + Unlock(); + return value; + } /// Checks to see if processor processor_id has exclusive access to the /// specified region. If it does, executes the operation then clears /// the exclusive state for processors if their exclusive region(s) /// contain [address, address+size). - template - bool DoExclusiveOperation(size_t processor_id, VAddr address, size_t size, Function op) { - if (!CheckAndClear(processor_id, address, size)) { + template + bool DoExclusiveOperation(size_t processor_id, VAddr address, Function op) { + static_assert(std::is_trivially_copyable_v); + if (!CheckAndClear(processor_id, address)) { return false; } - op(); + T saved_value; + std::memcpy(&saved_value, exclusive_values[processor_id].data(), sizeof(T)); + const bool result = op(saved_value); Unlock(); - return true; + return result; } /// Unmark everything. void Clear(); /// Unmark processor id - void Clear(size_t processor_id); + void ClearProcessor(size_t processor_id); private: - bool CheckAndClear(size_t processor_id, VAddr address, size_t size); + bool CheckAndClear(size_t processor_id, VAddr address); void Lock(); void Unlock(); @@ -59,6 +76,7 @@ private: static constexpr VAddr INVALID_EXCLUSIVE_ADDRESS = 0xDEAD'DEAD'DEAD'DEADull; std::atomic_flag is_locked; std::vector exclusive_addresses; + std::vector exclusive_values; }; } // namespace A64 diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index d8019531..b5980039 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -703,30 +703,6 @@ void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); } -void A64EmitX64::EmitA64SetExclusive(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.global_monitor) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); - - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, u8 size) { - conf.global_monitor->Mark(conf.processor_id, vaddr, size); - } - ); - - return; - } - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ASSERT(args[1].IsImmediate()); - const Xbyak::Reg64 address = ctx.reg_alloc.UseGpr(args[0]); - - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); - code.mov(qword[r15 + offsetof(A64JitState, exclusive_address)], address); -} - namespace { constexpr size_t page_bits = 12; @@ -951,6 +927,89 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm1); } +void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(inst, {}, args[0]); + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr) -> u8 { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> u8 { + return conf.callbacks->MemoryRead8(vaddr); + }); + } + ); +} + +void A64EmitX64::EmitA64ExclusiveReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(inst, {}, args[0]); + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr) -> u16 { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> u16 { + return conf.callbacks->MemoryRead16(vaddr); + }); + } + ); +} + +void A64EmitX64::EmitA64ExclusiveReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(inst, {}, args[0]); + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr) -> u32 { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> u32 { + return conf.callbacks->MemoryRead32(vaddr); + }); + } + ); +} + +void A64EmitX64::EmitA64ExclusiveReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(inst, {}, args[0]); + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr) -> u64 { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> u64 { + return conf.callbacks->MemoryRead64(vaddr); + }); + } + ); +} + +void A64EmitX64::EmitA64ExclusiveReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.sub(rsp, 16 + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) { + ret = conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> A64::Vector { + return conf.callbacks->MemoryRead128(vaddr); + }); + } + ); + code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); + code.add(rsp, 16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.DefineValue(inst, result); +} + void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { if (conf.page_table) { EmitDirectPageTableMemoryWrite(ctx, inst, 8); @@ -1024,105 +1083,84 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) { - if (conf.global_monitor) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (bitsize != 128) { - ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); - } else { - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.Use(args[1], HostLoc::XMM1); - ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(inst); - } - - Xbyak::Label end; - - code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); - code.je(end); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - switch (bitsize) { - case 8: - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, u8 value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 1, [&]{ - conf.callbacks->MemoryWrite8(vaddr, value); - }) ? 0 : 1; - } - ); - break; - case 16: - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, u16 value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 2, [&]{ - conf.callbacks->MemoryWrite16(vaddr, value); - }) ? 0 : 1; - } - ); - break; - case 32: - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, u32 value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 4, [&]{ - conf.callbacks->MemoryWrite32(vaddr, value); - }) ? 0 : 1; - } - ); - break; - case 64: - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, u64 value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 8, [&]{ - conf.callbacks->MemoryWrite64(vaddr, value); - }) ? 0 : 1; - } - ); - break; - case 128: - code.sub(rsp, 16 + ABI_SHADOW_SPACE); - code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); - code.movaps(xword[code.ABI_PARAM3], xmm1); - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 16, [&]{ - conf.callbacks->MemoryWrite128(vaddr, value); - }) ? 0 : 1; - } - ); - code.add(rsp, 16 + ABI_SHADOW_SPACE); - break; - default: - UNREACHABLE(); - } - code.L(end); - - return; + if (bitsize != 128) { + ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); + } else { + ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.Use(args[1], HostLoc::XMM1); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(inst); } - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const int value_idx = bitsize != 128 - ? ctx.reg_alloc.UseGpr(args[1]).getIdx() - : ctx.reg_alloc.UseXmm(args[1]).getIdx(); - Xbyak::Label end; - const Xbyak::Reg32 passed = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - code.mov(passed, u32(1)); + code.mov(code.ABI_RETURN, u32(1)); code.cmp(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); code.je(end); - code.mov(tmp, vaddr); - code.xor_(tmp, qword[r15 + offsetof(A64JitState, exclusive_address)]); - code.test(tmp, static_cast(A64JitState::RESERVATION_GRANULE_MASK & 0xFFFF'FFFF)); - code.jne(end); code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); - code.call(write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]); - code.xor_(passed, passed); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + switch (bitsize) { + case 8: + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr, u8 value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](u8 expected) -> bool { + return conf.callbacks->MemoryWriteExclusive8(vaddr, value, expected); + }) ? 0 : 1; + } + ); + break; + case 16: + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr, u16 value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](u16 expected) -> bool { + return conf.callbacks->MemoryWriteExclusive16(vaddr, value, expected); + }) ? 0 : 1; + } + ); + break; + case 32: + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr, u32 value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](u32 expected) -> bool { + return conf.callbacks->MemoryWriteExclusive32(vaddr, value, expected); + }) ? 0 : 1; + } + ); + break; + case 64: + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr, u64 value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](u64 expected) -> bool { + return conf.callbacks->MemoryWriteExclusive64(vaddr, value, expected); + }) ? 0 : 1; + } + ); + break; + case 128: + code.sub(rsp, 16 + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); + code.movaps(xword[code.ABI_PARAM3], xmm1); + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](A64::Vector expected) -> bool { + return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected); + }) ? 0 : 1; + } + ); + code.add(rsp, 16 + ABI_SHADOW_SPACE); + break; + default: + UNREACHABLE(); + } code.L(end); - - ctx.reg_alloc.DefineValue(inst, passed); } void A64EmitX64::EmitA64ExclusiveWriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { diff --git a/src/backend/x64/a64_exclusive_monitor.cpp b/src/backend/x64/a64_exclusive_monitor.cpp index 0744c4e1..a1a2d22f 100644 --- a/src/backend/x64/a64_exclusive_monitor.cpp +++ b/src/backend/x64/a64_exclusive_monitor.cpp @@ -11,7 +11,8 @@ namespace Dynarmic { namespace A64 { -ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) : exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS) { +ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) : + exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) { Unlock(); } @@ -19,15 +20,6 @@ size_t ExclusiveMonitor::GetProcessorCount() const { return exclusive_addresses.size(); } -void ExclusiveMonitor::Mark(size_t processor_id, VAddr address, size_t size) { - ASSERT(size <= 16); - const VAddr masked_address = address & RESERVATION_GRANULE_MASK; - - Lock(); - exclusive_addresses[processor_id] = masked_address; - Unlock(); -} - void ExclusiveMonitor::Lock() { while (is_locked.test_and_set(std::memory_order_acquire)) {} } @@ -36,8 +28,7 @@ void ExclusiveMonitor::Unlock() { is_locked.clear(std::memory_order_release); } -bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address, size_t size) { - ASSERT(size <= 16); +bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address) { const VAddr masked_address = address & RESERVATION_GRANULE_MASK; Lock(); @@ -60,7 +51,7 @@ void ExclusiveMonitor::Clear() { Unlock(); } -void ExclusiveMonitor::Clear(size_t processor_id) { +void ExclusiveMonitor::ClearProcessor(size_t processor_id) { Lock(); exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS; Unlock(); diff --git a/src/backend/x64/a64_jitstate.h b/src/backend/x64/a64_jitstate.h index a9972eca..eb5574b6 100644 --- a/src/backend/x64/a64_jitstate.h +++ b/src/backend/x64/a64_jitstate.h @@ -59,7 +59,6 @@ struct A64JitState { // Exclusive state static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull; u8 exclusive_state = 0; - u64 exclusive_address = 0; static constexpr size_t RSBSize = 8; // MUST be a power of 2. static constexpr size_t RSBPtrMask = RSBSize - 1; diff --git a/src/frontend/A64/ir_emitter.cpp b/src/frontend/A64/ir_emitter.cpp index 6e87efdf..cfd0acaa 100644 --- a/src/frontend/A64/ir_emitter.cpp +++ b/src/frontend/A64/ir_emitter.cpp @@ -100,11 +100,6 @@ void IREmitter::ClearExclusive() { Inst(Opcode::A64ClearExclusive); } -void IREmitter::SetExclusive(const IR::U64& vaddr, size_t byte_size) { - ASSERT(byte_size == 1 || byte_size == 2 || byte_size == 4 || byte_size == 8 || byte_size == 16); - Inst(Opcode::A64SetExclusive, vaddr, Imm8(u8(byte_size))); -} - IR::U8 IREmitter::ReadMemory8(const IR::U64& vaddr) { return Inst(Opcode::A64ReadMemory8, vaddr); } @@ -125,6 +120,26 @@ IR::U128 IREmitter::ReadMemory128(const IR::U64& vaddr) { return Inst(Opcode::A64ReadMemory128, vaddr); } +IR::U8 IREmitter::ExclusiveReadMemory8(const IR::U64& vaddr) { + return Inst(Opcode::A64ExclusiveReadMemory8, vaddr); +} + +IR::U16 IREmitter::ExclusiveReadMemory16(const IR::U64& vaddr) { + return Inst(Opcode::A64ExclusiveReadMemory16, vaddr); +} + +IR::U32 IREmitter::ExclusiveReadMemory32(const IR::U64& vaddr) { + return Inst(Opcode::A64ExclusiveReadMemory32, vaddr); +} + +IR::U64 IREmitter::ExclusiveReadMemory64(const IR::U64& vaddr) { + return Inst(Opcode::A64ExclusiveReadMemory64, vaddr); +} + +IR::U128 IREmitter::ExclusiveReadMemory128(const IR::U64& vaddr) { + return Inst(Opcode::A64ExclusiveReadMemory128, vaddr); +} + void IREmitter::WriteMemory8(const IR::U64& vaddr, const IR::U8& value) { Inst(Opcode::A64WriteMemory8, vaddr, value); } diff --git a/src/frontend/A64/ir_emitter.h b/src/frontend/A64/ir_emitter.h index f4837d47..8d463b7b 100644 --- a/src/frontend/A64/ir_emitter.h +++ b/src/frontend/A64/ir_emitter.h @@ -54,12 +54,16 @@ public: void SetTPIDR(const IR::U64& value); void ClearExclusive(); - void SetExclusive(const IR::U64& vaddr, size_t byte_size); IR::U8 ReadMemory8(const IR::U64& vaddr); IR::U16 ReadMemory16(const IR::U64& vaddr); IR::U32 ReadMemory32(const IR::U64& vaddr); IR::U64 ReadMemory64(const IR::U64& vaddr); IR::U128 ReadMemory128(const IR::U64& vaddr); + IR::U8 ExclusiveReadMemory8(const IR::U64& vaddr); + IR::U16 ExclusiveReadMemory16(const IR::U64& vaddr); + IR::U32 ExclusiveReadMemory32(const IR::U64& vaddr); + IR::U64 ExclusiveReadMemory64(const IR::U64& vaddr); + IR::U128 ExclusiveReadMemory128(const IR::U64& vaddr); void WriteMemory8(const IR::U64& vaddr, const IR::U8& value); void WriteMemory16(const IR::U64& vaddr, const IR::U16& value); void WriteMemory32(const IR::U64& vaddr, const IR::U32& value); diff --git a/src/frontend/A64/translate/impl/impl.cpp b/src/frontend/A64/translate/impl/impl.cpp index bb57d677..65d464bb 100644 --- a/src/frontend/A64/translate/impl/impl.cpp +++ b/src/frontend/A64/translate/impl/impl.cpp @@ -308,7 +308,24 @@ void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, IR::AccType /*acc_ } } -IR::U32 TranslatorVisitor::ExclusiveMem(IR::U64 address, size_t bytesize, IR::AccType /*acc_type*/, IR::UAnyU128 value) { +IR::UAnyU128 TranslatorVisitor::ExclusiveMem(IR::U64 address, size_t bytesize, IR::AccType /*acctype*/) { + switch (bytesize) { + case 1: + return ir.ExclusiveReadMemory8(address); + case 2: + return ir.ExclusiveReadMemory16(address); + case 4: + return ir.ExclusiveReadMemory32(address); + case 8: + return ir.ExclusiveReadMemory64(address); + case 16: + return ir.ExclusiveReadMemory128(address); + default: + ASSERT_FALSE("Invalid bytesize parameter {}", bytesize); + } +} + +IR::U32 TranslatorVisitor::ExclusiveMem(IR::U64 address, size_t bytesize, IR::AccType /*acctype*/, IR::UAnyU128 value) { switch (bytesize) { case 1: return ir.ExclusiveWriteMemory8(address, value); diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index b3dfb5f0..c199bbd1 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -57,6 +57,7 @@ struct TranslatorVisitor final { IR::UAnyU128 Mem(IR::U64 address, size_t size, IR::AccType acctype); void Mem(IR::U64 address, size_t size, IR::AccType acctype, IR::UAnyU128 value); + IR::UAnyU128 ExclusiveMem(IR::U64 address, size_t size, IR::AccType acctype); IR::U32 ExclusiveMem(IR::U64 address, size_t size, IR::AccType acctype, IR::UAnyU128 value); IR::U32U64 SignExtend(IR::UAny value, size_t to_size); diff --git a/src/frontend/A64/translate/impl/load_store_exclusive.cpp b/src/frontend/A64/translate/impl/load_store_exclusive.cpp index b1ce1d11..9c92b025 100644 --- a/src/frontend/A64/translate/impl/load_store_exclusive.cpp +++ b/src/frontend/A64/translate/impl/load_store_exclusive.cpp @@ -56,8 +56,7 @@ static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& v, bool pair, s break; } case IR::MemOp::LOAD: { - v.ir.SetExclusive(address, dbytes); - const IR::UAnyU128 data = v.Mem(address, dbytes, acctype); + const IR::UAnyU128 data = v.ExclusiveMem(address, dbytes, acctype); if (pair && elsize == 64) { v.X(64, Rt, v.ir.VectorGetElement(64, data, 0)); v.X(64, *Rt2, v.ir.VectorGetElement(64, data, 1)); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 146da373..7aa002b0 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -99,6 +99,20 @@ bool Inst::IsSharedMemoryReadOrWrite() const { return IsSharedMemoryRead() || IsSharedMemoryWrite(); } +bool Inst::IsExclusiveMemoryRead() const { + switch (op) { + case Opcode::A64ExclusiveReadMemory8: + case Opcode::A64ExclusiveReadMemory16: + case Opcode::A64ExclusiveReadMemory32: + case Opcode::A64ExclusiveReadMemory64: + case Opcode::A64ExclusiveReadMemory128: + return true; + + default: + return false; + } +} + bool Inst::IsExclusiveMemoryWrite() const { switch (op) { case Opcode::A32ExclusiveWriteMemory8: @@ -118,7 +132,7 @@ bool Inst::IsExclusiveMemoryWrite() const { } bool Inst::IsMemoryRead() const { - return IsSharedMemoryRead(); + return IsSharedMemoryRead() || IsExclusiveMemoryRead(); } bool Inst::IsMemoryWrite() const { @@ -457,7 +471,7 @@ bool Inst::AltersExclusiveState() const { return op == Opcode::A32ClearExclusive || op == Opcode::A32SetExclusive || op == Opcode::A64ClearExclusive || - op == Opcode::A64SetExclusive || + IsExclusiveMemoryRead() || IsExclusiveMemoryWrite(); } diff --git a/src/frontend/ir/microinstruction.h b/src/frontend/ir/microinstruction.h index 0facfa0b..83f90fe9 100644 --- a/src/frontend/ir/microinstruction.h +++ b/src/frontend/ir/microinstruction.h @@ -44,6 +44,8 @@ public: bool IsSharedMemoryWrite() const; /// Determines whether or not this instruction performs a shared memory read or write. bool IsSharedMemoryReadOrWrite() const; + /// Determines whether or not this instruction performs an atomic memory read. + bool IsExclusiveMemoryRead() const; /// Determines whether or not this instruction performs an atomic memory write. bool IsExclusiveMemoryWrite() const; diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 3a101084..d6fc7a48 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -637,12 +637,16 @@ A32OPC(ExclusiveWriteMemory64, U32, U32, // A64 Memory access A64OPC(ClearExclusive, Void, ) -A64OPC(SetExclusive, Void, U64, U8 ) A64OPC(ReadMemory8, U8, U64 ) A64OPC(ReadMemory16, U16, U64 ) A64OPC(ReadMemory32, U32, U64 ) A64OPC(ReadMemory64, U64, U64 ) A64OPC(ReadMemory128, U128, U64 ) +A64OPC(ExclusiveReadMemory8, U8, U64 ) +A64OPC(ExclusiveReadMemory16, U16, U64 ) +A64OPC(ExclusiveReadMemory32, U32, U64 ) +A64OPC(ExclusiveReadMemory64, U64, U64 ) +A64OPC(ExclusiveReadMemory128, U128, U64 ) A64OPC(WriteMemory8, Void, U64, U8 ) A64OPC(WriteMemory16, Void, U64, U16 ) A64OPC(WriteMemory32, Void, U64, U32 ) diff --git a/tests/A64/a64.cpp b/tests/A64/a64.cpp index 4a971054..d7fef1fa 100644 --- a/tests/A64/a64.cpp +++ b/tests/A64/a64.cpp @@ -287,9 +287,6 @@ TEST_CASE("A64: 128-bit exclusive read/write", "[a64]") { conf.callbacks = &env; conf.processor_id = 0; - SECTION("Local Monitor Only") { - conf.global_monitor = nullptr; - } SECTION("Global Monitor") { conf.global_monitor = &monitor; } diff --git a/tests/A64/testenv.h b/tests/A64/testenv.h index c917f596..6f416888 100644 --- a/tests/A64/testenv.h +++ b/tests/A64/testenv.h @@ -84,6 +84,27 @@ public: MemoryWrite64(vaddr + 8, value[1]); } + bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, [[maybe_unused]] std::uint8_t expected) override { + MemoryWrite8(vaddr, value); + return true; + } + bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, [[maybe_unused]] std::uint16_t expected) override { + MemoryWrite16(vaddr, value); + return true; + } + bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, [[maybe_unused]] std::uint32_t expected) override { + MemoryWrite32(vaddr, value); + return true; + } + bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, [[maybe_unused]] std::uint64_t expected) override { + MemoryWrite64(vaddr, value); + return true; + } + bool MemoryWriteExclusive128(u64 vaddr, Vector value, [[maybe_unused]] Vector expected) override { + MemoryWrite128(vaddr, value); + return true; + } + void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(false, "InterpreterFallback({:016x}, {})", pc, num_instructions); } void CallSVC(std::uint32_t swi) override { ASSERT_MSG(false, "CallSVC({})", swi); } From 41521ed856d496326bb3ea8a199d32b24887f621 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 21 Mar 2020 03:36:01 -0400 Subject: [PATCH 4/5] User Config: Add option to specify wall clock CNTPCT. --- include/dynarmic/A64/config.h | 4 ++++ src/backend/x64/a64_emit_x64.cpp | 4 +++- src/backend/x64/a64_interface.cpp | 3 ++- src/frontend/A64/translate/impl/system.cpp | 2 +- src/frontend/A64/translate/translate.h | 4 ++++ 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/dynarmic/A64/config.h b/include/dynarmic/A64/config.h index 7c7fe3bf..b4c56ec0 100644 --- a/include/dynarmic/A64/config.h +++ b/include/dynarmic/A64/config.h @@ -200,6 +200,10 @@ struct UserConfig { /// definite behaviour for some unpredictable instructions. bool define_unpredictable_behaviour = false; + /// This tells the translator a wall clock will be used, thus allowing it + /// to avoid writting certain unnecessary code only needed for cycle timers. + bool wall_clock_cntpct = false; + /// This enables the fast dispatcher. bool enable_fast_dispatch = true; diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index b5980039..34fa7e54 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -651,7 +651,9 @@ void A64EmitX64::EmitA64GetCNTFRQ(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetCNTPCT(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst); - code.UpdateTicks(); + if (!conf.wall_clock_cntpct) { + code.UpdateTicks(); + } Devirtualize<&A64::UserCallbacks::GetCNTPCT>(conf.callbacks).EmitCall(code); } diff --git a/src/backend/x64/a64_interface.cpp b/src/backend/x64/a64_interface.cpp index 2986fcbe..cf58f281 100644 --- a/src/backend/x64/a64_interface.cpp +++ b/src/backend/x64/a64_interface.cpp @@ -233,7 +233,8 @@ private: // JIT Compile const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; - IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour}); + IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, + {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); Optimization::A64CallbackConfigPass(ir_block, conf); if (conf.enable_optimizations) { Optimization::A64GetSetElimination(ir_block); diff --git a/src/frontend/A64/translate/impl/system.cpp b/src/frontend/A64/translate/impl/system.cpp index 1bac82c2..d82abafe 100644 --- a/src/frontend/A64/translate/impl/system.cpp +++ b/src/frontend/A64/translate/impl/system.cpp @@ -120,7 +120,7 @@ bool TranslatorVisitor::MRS(Imm<1> o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3 return true; case SystemRegisterEncoding::CNTPCT_EL0: // HACK: Ensure that this is the first instruction in the block it's emitted in, so the cycle count is most up-to-date. - if (!ir.block.empty()) { + if (!ir.block.empty() && !options.wall_clock_cntpct) { ir.block.CycleCount()--; ir.SetTerm(IR::Term::LinkBlock{*ir.current_location}); return false; diff --git a/src/frontend/A64/translate/translate.h b/src/frontend/A64/translate/translate.h index 73cf3bbe..355b94c7 100644 --- a/src/frontend/A64/translate/translate.h +++ b/src/frontend/A64/translate/translate.h @@ -26,6 +26,10 @@ struct TranslationOptions { /// If this is true, we define some behaviour for some instructions. bool define_unpredictable_behaviour = false; + /// This tells the translator a wall clock will be used, thus allowing it + /// to avoid writting certain unnecessary code only needed for cycle timers. + bool wall_clock_cntpct = false; + /// This changes what IR we emit when we translate a hint instruction. /// If this is false, we treat the instruction as a NOP. /// If this is true, we emit an ExceptionRaised instruction. From d7abae1e3122192a99af019ea83788221e3d64b6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 5 Apr 2020 12:11:36 -0400 Subject: [PATCH 5/5] A64: Implement Exceptional Exit. --- include/dynarmic/A64/a64.h | 6 ++++++ src/backend/x64/a64_interface.cpp | 13 +++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/dynarmic/A64/a64.h b/include/dynarmic/A64/a64.h index 5a84e4e0..2e9fe15a 100644 --- a/include/dynarmic/A64/a64.h +++ b/include/dynarmic/A64/a64.h @@ -60,6 +60,12 @@ public: */ void HaltExecution(); + /** + * Exits execution from a callback, the callback must rewind the stack or + * never return to dynarmic from it's current stack. + */ + void ExceptionalExit(); + /// Read Stack Pointer std::uint64_t GetSP() const; /// Modify Stack Pointer diff --git a/src/backend/x64/a64_interface.cpp b/src/backend/x64/a64_interface.cpp index cf58f281..05a76478 100644 --- a/src/backend/x64/a64_interface.cpp +++ b/src/backend/x64/a64_interface.cpp @@ -83,6 +83,15 @@ public: PerformRequestedCacheInvalidation(); } + void ExceptionalExit() { + if (!conf.wall_clock_cntpct) { + const s64 ticks = jit_state.cycles_to_run - jit_state.cycles_remaining; + conf.callbacks->AddTicks(ticks); + } + PerformRequestedCacheInvalidation(); + is_executing = false; + } + void ClearCache() { invalidate_entire_cache = true; RequestCacheInvalidation(); @@ -313,6 +322,10 @@ void Jit::HaltExecution() { impl->HaltExecution(); } +void Jit::ExceptionalExit() { + impl->ExceptionalExit(); +} + u64 Jit::GetSP() const { return impl->GetSP(); }