forked from suyu/suyu
Merge pull request #6162 from degasus/no_spin_loops
video_core: Avoid spin loops.
This commit is contained in:
commit
d6e5e053a6
6 changed files with 64 additions and 33 deletions
|
@ -83,11 +83,15 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
T PopWait() {
|
||||
void Wait() {
|
||||
if (Empty()) {
|
||||
std::unique_lock lock{cv_mutex};
|
||||
cv.wait(lock, [this]() { return !Empty(); });
|
||||
}
|
||||
}
|
||||
|
||||
T PopWait() {
|
||||
Wait();
|
||||
T t;
|
||||
Pop(t);
|
||||
return t;
|
||||
|
@ -156,6 +160,10 @@ public:
|
|||
return spsc_queue.Pop(t);
|
||||
}
|
||||
|
||||
void Wait() {
|
||||
spsc_queue.Wait();
|
||||
}
|
||||
|
||||
T PopWait() {
|
||||
return spsc_queue.PopWait();
|
||||
}
|
||||
|
|
|
@ -296,7 +296,7 @@ struct System::Impl {
|
|||
exit_lock = false;
|
||||
|
||||
if (gpu_core) {
|
||||
gpu_core->WaitIdle();
|
||||
gpu_core->ShutDown();
|
||||
}
|
||||
|
||||
services.reset();
|
||||
|
|
|
@ -517,8 +517,8 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
|
|||
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
|
||||
}
|
||||
|
||||
void GPU::WaitIdle() const {
|
||||
gpu_thread.WaitIdle();
|
||||
void GPU::ShutDown() {
|
||||
gpu_thread.ShutDown();
|
||||
}
|
||||
|
||||
void GPU::OnCommandListEnd() {
|
||||
|
|
|
@ -219,8 +219,8 @@ public:
|
|||
return *shader_notify;
|
||||
}
|
||||
|
||||
// Waits for the GPU to finish working
|
||||
void WaitIdle() const;
|
||||
// Stops the GPU execution and waits for the GPU to finish working
|
||||
void ShutDown();
|
||||
|
||||
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
|
||||
void WaitFence(u32 syncpoint_id, u32 value);
|
||||
|
|
|
@ -29,8 +29,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
|||
system.RegisterHostThread();
|
||||
|
||||
// Wait for first GPU command before acquiring the window context
|
||||
while (state.queue.Empty())
|
||||
;
|
||||
state.queue.Wait();
|
||||
|
||||
// If emulation was stopped during disk shader loading, abort before trying to acquire context
|
||||
if (!state.is_running) {
|
||||
|
@ -57,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
|||
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||
rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
|
||||
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
|
||||
return;
|
||||
ASSERT(state.is_running == false);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
state.signaled_fence.store(next.fence);
|
||||
if (next.block) {
|
||||
// We have to lock the write_lock to ensure that the condition_variable wait not get a
|
||||
// race between the check and the lock itself.
|
||||
std::lock_guard lk(state.write_lock);
|
||||
state.cv.notify_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,13 +74,7 @@ ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
|
|||
: system{system_}, is_async{is_async_} {}
|
||||
|
||||
ThreadManager::~ThreadManager() {
|
||||
if (!thread.joinable()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Notify GPU thread that a shutdown is pending
|
||||
PushCommand(EndProcessingCommand());
|
||||
thread.join();
|
||||
ShutDown();
|
||||
}
|
||||
|
||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
||||
|
@ -112,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
|||
case Settings::GPUAccuracy::Extreme: {
|
||||
auto& gpu = system.GPU();
|
||||
u64 fence = gpu.RequestFlush(addr, size);
|
||||
PushCommand(GPUTickCommand());
|
||||
while (fence > gpu.CurrentFlushRequestFence()) {
|
||||
}
|
||||
PushCommand(GPUTickCommand(), true);
|
||||
ASSERT(fence <= gpu.CurrentFlushRequestFence());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -131,23 +129,45 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
|||
rasterizer->OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void ThreadManager::WaitIdle() const {
|
||||
while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) &&
|
||||
system.IsPoweredOn()) {
|
||||
void ThreadManager::ShutDown() {
|
||||
if (!state.is_running) {
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lk(state.write_lock);
|
||||
state.is_running = false;
|
||||
state.cv.notify_all();
|
||||
}
|
||||
|
||||
if (!thread.joinable()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Notify GPU thread that a shutdown is pending
|
||||
PushCommand(EndProcessingCommand());
|
||||
thread.join();
|
||||
}
|
||||
|
||||
void ThreadManager::OnCommandListEnd() {
|
||||
PushCommand(OnCommandListEndCommand());
|
||||
}
|
||||
|
||||
u64 ThreadManager::PushCommand(CommandData&& command_data) {
|
||||
const u64 fence{++state.last_fence};
|
||||
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
|
||||
|
||||
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
|
||||
if (!is_async) {
|
||||
// In synchronous GPU mode, block the caller until the command has executed
|
||||
WaitIdle();
|
||||
block = true;
|
||||
}
|
||||
|
||||
std::unique_lock lk(state.write_lock);
|
||||
const u64 fence{++state.last_fence};
|
||||
state.queue.Push(CommandDataContainer(std::move(command_data), fence, block));
|
||||
|
||||
if (block) {
|
||||
state.cv.wait(lk, [this, fence] {
|
||||
return fence <= state.signaled_fence.load(std::memory_order_relaxed) ||
|
||||
!state.is_running;
|
||||
});
|
||||
}
|
||||
|
||||
return fence;
|
||||
|
|
|
@ -90,21 +90,24 @@ using CommandData =
|
|||
struct CommandDataContainer {
|
||||
CommandDataContainer() = default;
|
||||
|
||||
explicit CommandDataContainer(CommandData&& data_, u64 next_fence_)
|
||||
: data{std::move(data_)}, fence{next_fence_} {}
|
||||
explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_)
|
||||
: data{std::move(data_)}, fence{next_fence_}, block(block_) {}
|
||||
|
||||
CommandData data;
|
||||
u64 fence{};
|
||||
bool block{};
|
||||
};
|
||||
|
||||
/// Struct used to synchronize the GPU thread
|
||||
struct SynchState final {
|
||||
std::atomic_bool is_running{true};
|
||||
|
||||
using CommandQueue = Common::MPSCQueue<CommandDataContainer>;
|
||||
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
|
||||
std::mutex write_lock;
|
||||
CommandQueue queue;
|
||||
u64 last_fence{};
|
||||
std::atomic<u64> signaled_fence{};
|
||||
std::condition_variable cv;
|
||||
};
|
||||
|
||||
/// Class used to manage the GPU thread
|
||||
|
@ -132,14 +135,14 @@ public:
|
|||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
// Wait until the gpu thread is idle.
|
||||
void WaitIdle() const;
|
||||
// Stops the GPU execution and waits for the GPU to finish working
|
||||
void ShutDown();
|
||||
|
||||
void OnCommandListEnd();
|
||||
|
||||
private:
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
u64 PushCommand(CommandData&& command_data);
|
||||
u64 PushCommand(CommandData&& command_data, bool block = false);
|
||||
|
||||
Core::System& system;
|
||||
const bool is_async;
|
||||
|
|
Loading…
Reference in a new issue