1
0
Fork 0
forked from suyu/suyu

Merge pull request #2055 from bunnei/gpu-thread

Asynchronous GPU command processing
This commit is contained in:
bunnei 2019-03-07 10:41:53 -05:00 committed by GitHub
commit 4f352833a5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 529 additions and 52 deletions

View file

@ -36,7 +36,8 @@
#include "frontend/applets/software_keyboard.h" #include "frontend/applets/software_keyboard.h"
#include "frontend/applets/web_browser.h" #include "frontend/applets/web_browser.h"
#include "video_core/debug_utils/debug_utils.h" #include "video_core/debug_utils/debug_utils.h"
#include "video_core/gpu.h" #include "video_core/gpu_asynch.h"
#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
#include "video_core/video_core.h" #include "video_core/video_core.h"
@ -129,10 +130,16 @@ struct System::Impl {
return ResultStatus::ErrorVideoCore; return ResultStatus::ErrorVideoCore;
} }
gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer()); is_powered_on = true;
if (Settings::values.use_asynchronous_gpu_emulation) {
gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
} else {
gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
}
cpu_core_manager.Initialize(system); cpu_core_manager.Initialize(system);
is_powered_on = true;
LOG_DEBUG(Core, "Initialized OK"); LOG_DEBUG(Core, "Initialized OK");
// Reset counters and set time origin to current frame // Reset counters and set time origin to current frame

View file

@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
auto& instance = Core::System::GetInstance(); auto& instance = Core::System::GetInstance();
instance.GetPerfStats().EndGameFrame(); instance.GetPerfStats().EndGameFrame();
instance.Renderer().SwapBuffers(framebuffer); instance.GPU().SwapBuffers(framebuffer);
} }
} // namespace Service::Nvidia::Devices } // namespace Service::Nvidia::Devices

View file

@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
auto& gpu = system_instance.GPU(); auto& gpu = system_instance.GPU();
auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
ASSERT(cpu_addr); ASSERT(cpu_addr);
system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);

View file

@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
return 0; return 0;
} }
static void PushGPUEntries(Tegra::CommandList&& entries) {
if (entries.empty()) {
return;
}
auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
dma_pusher.Push(std::move(entries));
dma_pusher.DispatchCalls();
}
u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) { if (input.size() < sizeof(IoctlSubmitGpfifo)) {
UNIMPLEMENTED(); UNIMPLEMENTED();
@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
params.num_entries * sizeof(Tegra::CommandListHeader)); params.num_entries * sizeof(Tegra::CommandListHeader));
PushGPUEntries(std::move(entries)); Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
params.fence_out.id = 0; params.fence_out.id = 0;
params.fence_out.value = 0; params.fence_out.value = 0;
@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
Memory::ReadBlock(params.address, entries.data(), Memory::ReadBlock(params.address, entries.data(),
params.num_entries * sizeof(Tegra::CommandListHeader)); params.num_entries * sizeof(Tegra::CommandListHeader));
PushGPUEntries(std::move(entries)); Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
params.fence_out.id = 0; params.fence_out.id = 0;
params.fence_out.value = 0; params.fence_out.value = 0;

View file

@ -186,7 +186,7 @@ void NVFlinger::Compose() {
// There was no queued buffer to draw, render previous frame // There was no queued buffer to draw, render previous frame
system_instance.GetPerfStats().EndGameFrame(); system_instance.GetPerfStats().EndGameFrame();
system_instance.Renderer().SwapBuffers({}); system_instance.GPU().SwapBuffers({});
continue; continue;
} }

View file

@ -356,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
const VAddr overlap_end = std::min(end, region_end); const VAddr overlap_end = std::min(end, region_end);
const VAddr overlap_size = overlap_end - overlap_start; const VAddr overlap_size = overlap_end - overlap_start;
auto& rasterizer = system_instance.Renderer().Rasterizer(); auto& gpu = system_instance.GPU();
switch (mode) { switch (mode) {
case FlushMode::Flush: case FlushMode::Flush:
rasterizer.FlushRegion(overlap_start, overlap_size); gpu.FlushRegion(overlap_start, overlap_size);
break; break;
case FlushMode::Invalidate: case FlushMode::Invalidate:
rasterizer.InvalidateRegion(overlap_start, overlap_size); gpu.InvalidateRegion(overlap_start, overlap_size);
break; break;
case FlushMode::FlushAndInvalidate: case FlushMode::FlushAndInvalidate:
rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
break; break;
} }
}; };

View file

@ -393,6 +393,7 @@ struct Values {
u16 frame_limit; u16 frame_limit;
bool use_disk_shader_cache; bool use_disk_shader_cache;
bool use_accurate_gpu_emulation; bool use_accurate_gpu_emulation;
bool use_asynchronous_gpu_emulation;
float bg_red; float bg_red;
float bg_green; float bg_green;

View file

@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
Settings::values.use_disk_shader_cache); Settings::values.use_disk_shader_cache);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
Settings::values.use_accurate_gpu_emulation); Settings::values.use_accurate_gpu_emulation);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation);
AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
Settings::values.use_docked_mode); Settings::values.use_docked_mode);
} }

View file

@ -17,6 +17,12 @@ add_library(video_core STATIC
engines/shader_header.h engines/shader_header.h
gpu.cpp gpu.cpp
gpu.h gpu.h
gpu_asynch.cpp
gpu_asynch.h
gpu_synch.cpp
gpu_synch.h
gpu_thread.cpp
gpu_thread.h
macro_interpreter.cpp macro_interpreter.cpp
macro_interpreter.h macro_interpreter.h
memory_manager.cpp memory_manager.cpp

View file

@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) {
// We have to invalidate the destination region to evict any outdated surfaces from the cache. // We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might contain // We do this before actually writing the new data because the destination address might contain
// a dirty surface that will have to be written back to memory. // a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
Memory::Write32(*dest_address, data); Memory::Write32(*dest_address, data);
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

View file

@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying. // copying.
rasterizer.FlushRegion(*source_cpu, src_size); Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the // We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address // cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory. // might contain a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(*dest_cpu, dst_size); Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
}; };
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {

View file

@ -12,7 +12,7 @@
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h" #include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h"
namespace Tegra { namespace Tegra {
@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
UNREACHABLE(); UNREACHABLE();
} }
GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
auto& rasterizer{renderer.Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>(); memory_manager = std::make_unique<Tegra::MemoryManager>();
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);

View file

@ -16,8 +16,8 @@ class System;
} }
namespace VideoCore { namespace VideoCore {
class RasterizerInterface; class RendererBase;
} } // namespace VideoCore
namespace Tegra { namespace Tegra {
@ -119,9 +119,10 @@ enum class EngineID {
MAXWELL_DMA_COPY_A = 0xB0B5, MAXWELL_DMA_COPY_A = 0xB0B5,
}; };
class GPU final { class GPU {
public: public:
explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
~GPU(); ~GPU();
struct MethodCall { struct MethodCall {
@ -200,8 +201,42 @@ public:
}; };
} regs{}; } regs{};
/// Push GPU command entries to be processed
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
/// Swap buffers (render frame)
virtual void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
private: private:
void ProcessBindMethod(const MethodCall& method_call);
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();
/// Calls a GPU puller method.
void CallPullerMethod(const MethodCall& method_call);
/// Calls a GPU engine method.
void CallEngineMethod(const MethodCall& method_call);
/// Determines where the method should be executed.
bool ExecuteMethodOnEngine(const MethodCall& method_call);
protected:
std::unique_ptr<Tegra::DmaPusher> dma_pusher; std::unique_ptr<Tegra::DmaPusher> dma_pusher;
VideoCore::RendererBase& renderer;
private:
std::unique_ptr<Tegra::MemoryManager> memory_manager; std::unique_ptr<Tegra::MemoryManager> memory_manager;
/// Mapping of command subchannels to their bound engine ids. /// Mapping of command subchannels to their bound engine ids.
@ -217,18 +252,6 @@ private:
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine /// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory; std::unique_ptr<Engines::KeplerMemory> kepler_memory;
void ProcessBindMethod(const MethodCall& method_call);
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();
// Calls a GPU puller method.
void CallPullerMethod(const MethodCall& method_call);
// Calls a GPU engine method.
void CallEngineMethod(const MethodCall& method_call);
// Determines where the method should be executed.
bool ExecuteMethodOnEngine(const MethodCall& method_call);
}; };
#define ASSERT_REG_POSITION(field_name, position) \ #define ASSERT_REG_POSITION(field_name, position) \

View file

@ -0,0 +1,37 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/gpu_asynch.h"
#include "video_core/gpu_thread.h"
#include "video_core/renderer_base.h"
namespace VideoCommon {
GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
: Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
void GPUAsynch::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
gpu_thread.SwapBuffers(std::move(framebuffer));
}
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}
} // namespace VideoCommon

View file

@ -0,0 +1,37 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon {
namespace GPUThread {
class ThreadManager;
} // namespace GPUThread
/// Implementation of GPU interface that runs the GPU asynchronously
class GPUAsynch : public Tegra::GPU {
public:
explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUAsynch();
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
private:
GPUThread::ThreadManager gpu_thread;
};
} // namespace VideoCommon

View file

@ -0,0 +1,37 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
namespace VideoCommon {
GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
: Tegra::GPU(system, renderer) {}
GPUSynch::~GPUSynch() = default;
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->Push(std::move(entries));
dma_pusher->DispatchCalls();
}
void GPUSynch::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
renderer.SwapBuffers(std::move(framebuffer));
}
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
renderer.Rasterizer().FlushRegion(addr, size);
}
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
renderer.Rasterizer().InvalidateRegion(addr, size);
}
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
}
} // namespace VideoCommon

View file

@ -0,0 +1,29 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/gpu.h"
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
class GPUSynch : public Tegra::GPU {
public:
explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUSynch();
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
};
} // namespace VideoCommon

View file

@ -0,0 +1,152 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "core/settings.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
#include "video_core/renderer_base.h"
namespace VideoCommon::GPUThread {
/// Executes a single GPU thread command
static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
Tegra::DmaPusher& dma_pusher) {
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
renderer.SwapBuffers(data->framebuffer);
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
} else {
UNREACHABLE();
}
}
/// Runs the GPU thread
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
SynchState& state) {
MicroProfileOnThreadCreate("GpuThread");
auto WaitForWakeup = [&]() {
std::unique_lock<std::mutex> lock{state.signal_mutex};
state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
};
// Wait for first GPU command before acquiring the window context
WaitForWakeup();
// If emulation was stopped during disk shader loading, abort before trying to acquire context
if (!state.is_running) {
return;
}
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
while (state.is_running) {
if (!state.is_running) {
return;
}
{
// Thread has been woken up, so make the previous write queue the next read queue
std::lock_guard<std::mutex> lock{state.signal_mutex};
std::swap(state.push_queue, state.pop_queue);
}
// Execute all of the GPU commands
while (!state.pop_queue->empty()) {
ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
state.pop_queue->pop();
}
state.UpdateIdleState();
// Signal that the GPU thread has finished processing commands
if (state.is_idle) {
state.idle_condition.notify_one();
}
// Wait for CPU thread to send more GPU commands
WaitForWakeup();
}
}
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
std::ref(dma_pusher), std::ref(state)},
thread_id{thread.get_id()} {}
ThreadManager::~ThreadManager() {
{
// Notify GPU thread that a shutdown is pending
std::lock_guard<std::mutex> lock{state.signal_mutex};
state.is_running = false;
}
state.signal_condition.notify_one();
thread.join();
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
if (entries.empty()) {
return;
}
PushCommand(SubmitListCommand(std::move(entries)), false, false);
}
void ThreadManager::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
}
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
// Block the CPU when using accurate emulation
PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
PushCommand(InvalidateRegionCommand(addr, size), true, true);
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
InvalidateRegion(addr, size);
}
void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
{
std::lock_guard<std::mutex> lock{state.signal_mutex};
if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
// Execute the command synchronously on the current thread
ExecuteCommand(&command_data, renderer, dma_pusher);
return;
}
// Push the command to the GPU thread
state.UpdateIdleState();
state.push_queue->emplace(command_data);
}
// Signal the GPU thread that commands are pending
state.signal_condition.notify_one();
if (wait_for_idle) {
// Wait for the GPU to be idle (all commands to be executed)
std::unique_lock<std::mutex> lock{state.idle_mutex};
state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
}
}
} // namespace VideoCommon::GPUThread

136
src/video_core/gpu_thread.h Normal file
View file

@ -0,0 +1,136 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <atomic>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <optional>
#include <thread>
#include <variant>
namespace Tegra {
struct FramebufferConfig;
class DmaPusher;
} // namespace Tegra
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final {
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
Tegra::CommandList entries;
};
/// Command to signal to the GPU thread that a swap buffers is pending
struct SwapBuffersCommand final {
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
: framebuffer{std::move(framebuffer)} {}
std::optional<const Tegra::FramebufferConfig> framebuffer;
};
/// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final {
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
const VAddr addr;
const u64 size;
};
/// Command to signal to the GPU thread to invalidate a region
struct InvalidateRegionCommand final {
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
const VAddr addr;
const u64 size;
};
/// Command to signal to the GPU thread to flush and invalidate a region
struct FlushAndInvalidateRegionCommand final {
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
: addr{addr}, size{size} {}
const VAddr addr;
const u64 size;
};
using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
/// Struct used to synchronize the GPU thread
struct SynchState final {
std::atomic<bool> is_running{true};
std::atomic<bool> is_idle{true};
std::condition_variable signal_condition;
std::mutex signal_mutex;
std::condition_variable idle_condition;
std::mutex idle_mutex;
// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
// empty. This allows for efficient thread-safe access, as it does not require any copies.
using CommandQueue = std::queue<CommandData>;
std::array<CommandQueue, 2> command_queues;
CommandQueue* push_queue{&command_queues[0]};
CommandQueue* pop_queue{&command_queues[1]};
void UpdateIdleState() {
std::lock_guard<std::mutex> lock{idle_mutex};
is_idle = command_queues[0].empty() && command_queues[1].empty();
}
};
/// Class used to manage the GPU thread
class ThreadManager final {
public:
explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
~ThreadManager();
/// Push GPU command entries to be processed
void SubmitList(Tegra::CommandList&& entries);
/// Swap buffers (render frame)
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size);
/// Waits the caller until the GPU thread is idle, used for synchronization
void WaitForIdle();
private:
/// Pushes a command to be executed by the GPU thread
void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
/// Returns true if this is called by the GPU thread
bool IsGpuThread() const {
return std::this_thread::get_id() == thread_id;
}
private:
SynchState state;
std::thread thread;
std::thread::id thread_id;
VideoCore::RendererBase& renderer;
Tegra::DmaPusher& dma_pusher;
};
} // namespace VideoCommon::GPUThread

View file

@ -749,11 +749,7 @@ void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size);
if (Settings::values.use_accurate_gpu_emulation) {
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
res_cache.FlushRegion(addr, size);
}
} }
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {

View file

@ -20,10 +20,7 @@
EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
void EmuThread::run() { void EmuThread::run() {
if (!Settings::values.use_multi_core) { render_window->MakeCurrent();
// Single core mode must acquire OpenGL context for entire emulation session
render_window->MakeCurrent();
}
MicroProfileOnThreadCreate("EmuThread"); MicroProfileOnThreadCreate("EmuThread");
@ -38,6 +35,11 @@ void EmuThread::run() {
emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
if (Settings::values.use_asynchronous_gpu_emulation) {
// Release OpenGL context for the GPU thread
render_window->DoneCurrent();
}
// holds whether the cpu was running during the last iteration, // holds whether the cpu was running during the last iteration,
// so that the DebugModeLeft signal can be emitted before the // so that the DebugModeLeft signal can be emitted before the
// next execution step // next execution step

View file

@ -374,6 +374,8 @@ void Config::ReadValues() {
qt_config->value("use_disk_shader_cache", false).toBool(); qt_config->value("use_disk_shader_cache", false).toBool();
Settings::values.use_accurate_gpu_emulation = Settings::values.use_accurate_gpu_emulation =
qt_config->value("use_accurate_gpu_emulation", false).toBool(); qt_config->value("use_accurate_gpu_emulation", false).toBool();
Settings::values.use_asynchronous_gpu_emulation =
qt_config->value("use_asynchronous_gpu_emulation", false).toBool();
Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
@ -633,6 +635,8 @@ void Config::SaveValues() {
qt_config->setValue("frame_limit", Settings::values.frame_limit); qt_config->setValue("frame_limit", Settings::values.frame_limit);
qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
qt_config->setValue("use_asynchronous_gpu_emulation",
Settings::values.use_asynchronous_gpu_emulation);
// Cast to double because Qt's written float values are not human-readable // Cast to double because Qt's written float values are not human-readable
qt_config->setValue("bg_red", (double)Settings::values.bg_red); qt_config->setValue("bg_red", (double)Settings::values.bg_red);

View file

@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() {
ui->frame_limit->setValue(Settings::values.frame_limit); ui->frame_limit->setValue(Settings::values.frame_limit);
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
Settings::values.bg_blue)); Settings::values.bg_blue));
} }
@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() {
Settings::values.frame_limit = ui->frame_limit->value(); Settings::values.frame_limit = ui->frame_limit->value();
Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
Settings::values.use_asynchronous_gpu_emulation =
ui->use_asynchronous_gpu_emulation->isChecked();
Settings::values.bg_red = static_cast<float>(bg_color.redF()); Settings::values.bg_red = static_cast<float>(bg_color.redF());
Settings::values.bg_green = static_cast<float>(bg_color.greenF()); Settings::values.bg_green = static_cast<float>(bg_color.greenF());
Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); Settings::values.bg_blue = static_cast<float>(bg_color.blueF());

View file

@ -63,6 +63,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
<property name="text">
<string>Use asynchronous GPU emulation</string>
</property>
</widget>
</item>
<item> <item>
<layout class="QHBoxLayout" name="horizontalLayout"> <layout class="QHBoxLayout" name="horizontalLayout">
<item> <item>

View file

@ -354,6 +354,8 @@ void Config::ReadValues() {
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
Settings::values.use_accurate_gpu_emulation = Settings::values.use_accurate_gpu_emulation =
sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
Settings::values.use_asynchronous_gpu_emulation =
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0);
Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0);

View file

@ -118,6 +118,10 @@ use_disk_shader_cache =
# 0 (default): Off (fast), 1 : On (slow) # 0 (default): Off (fast), 1 : On (slow)
use_accurate_gpu_emulation = use_accurate_gpu_emulation =
# Whether to use asynchronous GPU emulation
# 0 : Off (slow), 1 (default): On (fast)
use_asynchronous_gpu_emulation =
# The clear color for the renderer. What shows up on the sides of the bottom screen. # The clear color for the renderer. What shows up on the sides of the bottom screen.
# Must be in range of 0.0-1.0. Defaults to 1.0 for all. # Must be in range of 0.0-1.0. Defaults to 1.0 for all.
bg_red = bg_red =