video_core: gpu: Refactor out synchronous/asynchronous GPU implementations.

- We must always use a GPU thread now, even with synchronous GPU.
2020-12-11 22:26:14 -08:00 · 2020-12-11 22:26:14 -08:00 · 14c825bd1c
commit 14c825bd1c
parent 5d4715cc6a
10 changed files with 130 additions and 289 deletions
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@ -47,6 +47,7 @@ add_library(video_core STATIC
    engines/shader_bytecode.h
    engines/shader_header.h
    engines/shader_type.h
+    framebuffer_config.h
    macro/macro.cpp
    macro/macro.h
    macro/macro_hle.cpp
@ -58,10 +59,6 @@ add_library(video_core STATIC
    fence_manager.h
    gpu.cpp
    gpu.h
-    gpu_asynch.cpp
-    gpu_asynch.h
-    gpu_synch.cpp
-    gpu_synch.h
    gpu_thread.cpp
    gpu_thread.h
    guest_driver.cpp
--- a/src/video_core/framebuffer_config.h
+++ b/src/video_core/framebuffer_config.h
@ -0,0 +1,31 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Tegra {
+
+/**
+ * Struct describing framebuffer configuration
+ */
+struct FramebufferConfig {
+    enum class PixelFormat : u32 {
+        A8B8G8R8_UNORM = 1,
+        RGB565_UNORM = 4,
+        B8G8R8A8_UNORM = 5,
+    };
+
+    VAddr address{};
+    u32 offset{};
+    u32 width{};
+    u32 height{};
+    u32 stride{};
+    PixelFormat pixel_format{};
+
+    using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
+    TransformFlags transform_flags{};
+    Common::Rectangle<int> crop_rect;
+};
+
+} // namespace Tegra
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@ -10,6 +10,7 @@
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/frontend/emu_window.h"
+#include "core/hardware_interrupt_manager.h"
 #include "core/memory.h"
 #include "core/settings.h"
 #include "video_core/engines/fermi_2d.h"
@ -36,7 +37,8 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
      kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
      maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
      kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
-      shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
+      shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
+      gpu_thread{system_} {}

 GPU::~GPU() = default;

@ -198,10 +200,6 @@ void GPU::SyncGuestHost() {
    renderer->Rasterizer().SyncGuestHost();
 }

-void GPU::OnCommandListEnd() {
-    renderer->Rasterizer().ReleaseFences();
-}
-
 enum class GpuSemaphoreOperation {
    AcquireEqual = 0x1,
    WriteLong = 0x2,
@ -461,4 +459,72 @@ void GPU::ProcessSemaphoreAcquire() {
    }
 }

+void GPU::Start() {
+    gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
+    cpu_context = renderer->GetRenderWindow().CreateSharedContext();
+    cpu_context->MakeCurrent();
+}
+
+void GPU::ObtainContext() {
+    cpu_context->MakeCurrent();
+}
+
+void GPU::ReleaseContext() {
+    cpu_context->DoneCurrent();
+}
+
+void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
+    gpu_thread.SubmitList(std::move(entries));
+}
+
+void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
+    if (!use_nvdec) {
+        return;
+    }
+    // This condition fires when a video stream ends, clear all intermediary data
+    if (entries[0].raw == 0xDEADB33F) {
+        cdma_pusher.reset();
+        return;
+    }
+    if (!cdma_pusher) {
+        cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
+    }
+
+    // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
+    // TODO(ameerj): RE proper async nvdec operation
+    // gpu_thread.SubmitCommandBuffer(std::move(entries));
+
+    cdma_pusher->Push(std::move(entries));
+    cdma_pusher->DispatchCalls();
+}
+
+void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+    gpu_thread.SwapBuffers(framebuffer);
+}
+
+void GPU::FlushRegion(VAddr addr, u64 size) {
+    gpu_thread.FlushRegion(addr, size);
+}
+
+void GPU::InvalidateRegion(VAddr addr, u64 size) {
+    gpu_thread.InvalidateRegion(addr, size);
+}
+
+void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+    gpu_thread.FlushAndInvalidateRegion(addr, size);
+}
+
+void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
+    auto& interrupt_manager = system.InterruptManager();
+    interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
+}
+
+void GPU::WaitIdle() const {
+    gpu_thread.WaitIdle();
+}
+
+void GPU::OnCommandListEnd() {
+    gpu_thread.OnCommandListEnd();
+}
+
 } // namespace Tegra
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@ -15,6 +15,8 @@
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/cdma_pusher.h"
 #include "video_core/dma_pusher.h"
+#include "video_core/framebuffer_config.h"
+#include "video_core/gpu_thread.h"

 using CacheAddr = std::uintptr_t;
 [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
@ -101,28 +103,6 @@ enum class DepthFormat : u32 {
 struct CommandListHeader;
 class DebugContext;

-/**
- * Struct describing framebuffer configuration
- */
-struct FramebufferConfig {
-    enum class PixelFormat : u32 {
-        A8B8G8R8_UNORM = 1,
-        RGB565_UNORM = 4,
-        B8G8R8A8_UNORM = 5,
-    };
-
-    VAddr address;
-    u32 offset;
-    u32 width;
-    u32 height;
-    u32 stride;
-    PixelFormat pixel_format;
-
-    using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
-    TransformFlags transform_flags;
-    Common::Rectangle<int> crop_rect;
-};
-
 namespace Engines {
 class Fermi2D;
 class Maxwell3D;
@ -141,7 +121,7 @@ enum class EngineID {

 class MemoryManager;

-class GPU {
+class GPU final {
 public:
    struct MethodCall {
        u32 method{};
@ -159,7 +139,7 @@ public:
    };

    explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_);
-    virtual ~GPU();
+    ~GPU();

    /// Binds a renderer to the GPU.
    void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
@ -176,7 +156,7 @@ public:
    /// Synchronizes CPU writes with Host GPU memory.
    void SyncGuestHost();
    /// Signal the ending of command list.
-    virtual void OnCommandListEnd();
+    void OnCommandListEnd();

    /// Request a host GPU memory flush from the CPU.
    [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
@ -240,7 +220,7 @@ public:
    }

    // Waits for the GPU to finish working
-    virtual void WaitIdle() const = 0;
+    void WaitIdle() const;

    /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
    void WaitFence(u32 syncpoint_id, u32 value);
@ -330,34 +310,34 @@ public:
    /// Performs any additional setup necessary in order to begin GPU emulation.
    /// This can be used to launch any necessary threads and register any necessary
    /// core timing events.
-    virtual void Start() = 0;
+    void Start();

    /// Obtain the CPU Context
-    virtual void ObtainContext() = 0;
+    void ObtainContext();

    /// Release the CPU Context
-    virtual void ReleaseContext() = 0;
+    void ReleaseContext();

    /// Push GPU command entries to be processed
-    virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
+    void PushGPUEntries(Tegra::CommandList&& entries);

    /// Push GPU command buffer entries to be processed
-    virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0;
+    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);

    /// Swap buffers (render frame)
-    virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
+    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);

    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(VAddr addr, u64 size) = 0;
+    void FlushRegion(VAddr addr, u64 size);

    /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+    void InvalidateRegion(VAddr addr, u64 size);

    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size);

 protected:
-    virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
+    void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;

 private:
    void ProcessBindMethod(const MethodCall& method_call);
@ -426,6 +406,9 @@ private:
    u64 last_flush_fence{};
    std::mutex flush_request_mutex;

+    VideoCommon::GPUThread::ThreadManager gpu_thread;
+    std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
+
    const bool is_async;
 };

--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@ -1,86 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "core/core.h"
-#include "core/hardware_interrupt_manager.h"
-#include "video_core/gpu_asynch.h"
-#include "video_core/gpu_thread.h"
-#include "video_core/renderer_base.h"
-
-namespace VideoCommon {
-
-GPUAsynch::GPUAsynch(Core::System& system_, bool use_nvdec_)
-    : GPU{system_, true, use_nvdec_}, gpu_thread{system_} {}
-
-GPUAsynch::~GPUAsynch() = default;
-
-void GPUAsynch::Start() {
-    gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
-    cpu_context = renderer->GetRenderWindow().CreateSharedContext();
-    cpu_context->MakeCurrent();
-}
-
-void GPUAsynch::ObtainContext() {
-    cpu_context->MakeCurrent();
-}
-
-void GPUAsynch::ReleaseContext() {
-    cpu_context->DoneCurrent();
-}
-
-void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
-    gpu_thread.SubmitList(std::move(entries));
-}
-
-void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
-    if (!use_nvdec) {
-        return;
-    }
-    // This condition fires when a video stream ends, clear all intermediary data
-    if (entries[0].raw == 0xDEADB33F) {
-        cdma_pusher.reset();
-        return;
-    }
-    if (!cdma_pusher) {
-        cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
-    }
-
-    // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
-    // TODO(ameerj): RE proper async nvdec operation
-    // gpu_thread.SubmitCommandBuffer(std::move(entries));
-
-    cdma_pusher->Push(std::move(entries));
-    cdma_pusher->DispatchCalls();
-}
-
-void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
-    gpu_thread.SwapBuffers(framebuffer);
-}
-
-void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
-    gpu_thread.FlushRegion(addr, size);
-}
-
-void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
-    gpu_thread.InvalidateRegion(addr, size);
-}
-
-void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
-    gpu_thread.FlushAndInvalidateRegion(addr, size);
-}
-
-void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
-    auto& interrupt_manager = system.InterruptManager();
-    interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
-}
-
-void GPUAsynch::WaitIdle() const {
-    gpu_thread.WaitIdle();
-}
-
-void GPUAsynch::OnCommandListEnd() {
-    gpu_thread.OnCommandListEnd();
-}
-
-} // namespace VideoCommon
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@ -1,47 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "video_core/gpu.h"
-#include "video_core/gpu_thread.h"
-
-namespace Core::Frontend {
-class GraphicsContext;
-}
-
-namespace VideoCore {
-class RendererBase;
-} // namespace VideoCore
-
-namespace VideoCommon {
-
-/// Implementation of GPU interface that runs the GPU asynchronously
-class GPUAsynch final : public Tegra::GPU {
-public:
-    explicit GPUAsynch(Core::System& system_, bool use_nvdec_);
-    ~GPUAsynch() override;
-
-    void Start() override;
-    void ObtainContext() override;
-    void ReleaseContext() override;
-    void PushGPUEntries(Tegra::CommandList&& entries) override;
-    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
-    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    void FlushRegion(VAddr addr, u64 size) override;
-    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
-    void WaitIdle() const override;
-
-    void OnCommandListEnd() override;
-
-protected:
-    void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
-
-private:
-    GPUThread::ThreadManager gpu_thread;
-    std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
-};
-
-} // namespace VideoCommon
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@ -1,61 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/gpu_synch.h"
-#include "video_core/renderer_base.h"
-
-namespace VideoCommon {
-
-GPUSynch::GPUSynch(Core::System& system_, bool use_nvdec_) : GPU{system_, false, use_nvdec_} {}
-
-GPUSynch::~GPUSynch() = default;
-
-void GPUSynch::Start() {}
-
-void GPUSynch::ObtainContext() {
-    renderer->Context().MakeCurrent();
-}
-
-void GPUSynch::ReleaseContext() {
-    renderer->Context().DoneCurrent();
-}
-
-void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
-    dma_pusher->Push(std::move(entries));
-    dma_pusher->DispatchCalls();
-}
-
-void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
-    if (!use_nvdec) {
-        return;
-    }
-    // This condition fires when a video stream ends, clears all intermediary data
-    if (entries[0].raw == 0xDEADB33F) {
-        cdma_pusher.reset();
-        return;
-    }
-    if (!cdma_pusher) {
-        cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
-    }
-    cdma_pusher->Push(std::move(entries));
-    cdma_pusher->DispatchCalls();
-}
-
-void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
-    renderer->SwapBuffers(framebuffer);
-}
-
-void GPUSynch::FlushRegion(VAddr addr, u64 size) {
-    renderer->Rasterizer().FlushRegion(addr, size);
-}
-
-void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
-    renderer->Rasterizer().InvalidateRegion(addr, size);
-}
-
-void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
-    renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
-}
-
-} // namespace VideoCommon
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@ -1,41 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "video_core/gpu.h"
-
-namespace Core::Frontend {
-class GraphicsContext;
-}
-
-namespace VideoCore {
-class RendererBase;
-} // namespace VideoCore
-
-namespace VideoCommon {
-
-/// Implementation of GPU interface that runs the GPU synchronously
-class GPUSynch final : public Tegra::GPU {
-public:
-    explicit GPUSynch(Core::System& system_, bool use_nvdec_);
-    ~GPUSynch() override;
-
-    void Start() override;
-    void ObtainContext() override;
-    void ReleaseContext() override;
-    void PushGPUEntries(Tegra::CommandList&& entries) override;
-    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
-    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    void FlushRegion(VAddr addr, u64 size) override;
-    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
-    void WaitIdle() const override {}
-
-protected:
-    void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
-                             [[maybe_unused]] u32 value) const override {}
-};
-
-} // namespace VideoCommon
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@ -10,8 +10,9 @@
 #include <optional>
 #include <thread>
 #include <variant>
+
 #include "common/threadsafe_queue.h"
-#include "video_core/gpu.h"
+#include "video_core/framebuffer_config.h"

 namespace Tegra {
 struct FramebufferConfig;
@ -25,6 +26,10 @@ class GraphicsContext;
 class System;
 } // namespace Core

+namespace VideoCore {
+    class RendererBase;
+} // namespace VideoCore
+
 namespace VideoCommon::GPUThread {

 /// Command to signal to the GPU thread that processing has ended
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@ -7,8 +7,6 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/settings.h"
-#include "video_core/gpu_asynch.h"
-#include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
@ -39,13 +37,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
 namespace VideoCore {

 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
-    std::unique_ptr<Tegra::GPU> gpu;
    const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
-    if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
-        gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec);
-    } else {
-        gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec);
-    }
+    std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>(
+        system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec);

    auto context = emu_window.CreateSharedContext();
    const auto scope = context->Acquire();