From c03b8c4c192b10fad93ded9060ff1313bab93d95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 6 Apr 2019 17:59:56 -0300 Subject: [PATCH 1/2] gl_shader_cache: Use shared contexts to build shaders in parallel --- .../renderer_opengl/gl_rasterizer.cpp | 8 +- .../renderer_opengl/gl_rasterizer.h | 3 +- .../renderer_opengl/gl_shader_cache.cpp | 120 ++++++++++++------ .../renderer_opengl/gl_shader_cache.h | 14 +- .../renderer_opengl/renderer_opengl.cpp | 6 +- .../renderer_opengl/renderer_opengl.h | 3 +- src/yuzu/bootmanager.cpp | 18 +-- 7 files changed, 114 insertions(+), 58 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dbd8049f55..f9b6dfeea4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,9 +98,11 @@ struct FramebufferCacheKey { } }; -RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system}, - screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { +RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, + ScreenInfo& info) + : res_cache{*this}, shader_cache{*this, system, emu_window, device}, + global_cache{*this}, system{system}, screen_info{info}, + buffer_cache(*this, STREAM_BUFFER_SIZE) { OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 71b9c5ead9..d78094138c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -48,7 +48,8 @@ struct FramebufferCacheKey; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info); + explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, + ScreenInfo& info); ~RasterizerOpenGL() override; void DrawArrays() override; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f700dc89a9..9d3f96f9ce 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -2,10 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include #include #include "common/assert.h" #include "common/hash.h" +#include "common/scope_exit.h" #include "core/core.h" +#include "core/frontend/emu_window.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -344,8 +348,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device) - : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {} + Core::Frontend::EmuWindow& emu_window, const Device& device) + : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -353,62 +357,106 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (!transferable) { return; } - const auto [raws, usages] = *transferable; + const auto [raws, shader_usages] = *transferable; auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); const auto supported_formats{GetSupportedFormats()}; - const auto unspecialized{ + const auto unspecialized_shaders{ GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; - if (stop_loading) + if (stop_loading) { return; + } // Track if precompiled cache was altered during loading to know if we have to serialize the // virtual precompiled cache file back to the hard drive bool precompiled_cache_altered = false; - // Build shaders - if (callback) - callback(VideoCore::LoadCallbackStage::Build, 0, usages.size()); - for (std::size_t i = 0; i < usages.size(); ++i) { - if (stop_loading) - return; + // Inform the frontend about shader build initialization + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); + } - const auto& usage{usages[i]}; - LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier, - i + 1, usages.size()); + std::mutex mutex; + std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex + std::atomic_bool compilation_failed = false; - const auto& unspec{unspecialized.at(usage.unique_identifier)}; - const auto dump_it = dumps.find(usage); + const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, + std::size_t end) { + context->MakeCurrent(); + SCOPE_EXIT({ return context->DoneCurrent(); }); - CachedProgram shader; - if (dump_it != dumps.end()) { - // If the shader is dumped, attempt to load it with - shader = GeneratePrecompiledProgram(dump_it->second, supported_formats); - if (!shader) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - dumps.clear(); + for (std::size_t i = begin; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; } - } - if (!shader) { - shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type, - usage.bindings, usage.primitive, true); - } - precompiled_programs.insert({usage, std::move(shader)}); + const auto& usage{shader_usages[i]}; + LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", + usage.unique_identifier, i, shader_usages.size()); - if (callback) - callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size()); + const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; + const auto dump{dumps.find(usage)}; + + CachedProgram shader; + if (dump != dumps.end()) { + // If the shader is dumped, attempt to load it with + shader = GeneratePrecompiledProgram(dump->second, supported_formats); + if (!shader) { + compilation_failed = true; + return; + } + } + if (!shader) { + shader = SpecializeShader(unspecialized.code, unspecialized.entries, + unspecialized.program_type, usage.bindings, + usage.primitive, true); + } + + std::scoped_lock lock(mutex); + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, + shader_usages.size()); + } + + precompiled_programs.emplace(usage, std::move(shader)); + } + }; + + const std::size_t num_workers{std::thread::hardware_concurrency() + 1}; + const std::size_t bucket_size{shader_usages.size() / num_workers}; + std::vector> contexts(num_workers); + std::vector threads(num_workers); + for (std::size_t i = 0; i < num_workers; ++i) { + const bool is_last_worker = i + 1 == num_workers; + const std::size_t start{bucket_size * i}; + const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; + + // On some platforms the shared context has to be created from the GUI thread + contexts[i] = emu_window.CreateSharedContext(); + threads[i] = std::thread(Worker, contexts[i].get(), start, end); + } + for (auto& thread : threads) { + thread.join(); + } + + if (compilation_failed) { + // Invalidate the precompiled cache if a shader dumped shader was rejected + disk_cache.InvalidatePrecompiled(); + dumps.clear(); + precompiled_cache_altered = true; + return; + } + if (stop_loading) { + return; } // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before // precompiling them - for (std::size_t i = 0; i < usages.size(); ++i) { - const auto& usage{usages[i]}; + for (std::size_t i = 0; i < shader_usages.size(); ++i) { + const auto& usage{shader_usages[i]}; if (dumps.find(usage) == dumps.end()) { - const auto& program = precompiled_programs.at(usage); + const auto& program{precompiled_programs.at(usage)}; disk_cache.SaveDump(usage, program->handle); precompiled_cache_altered = true; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 31b9799870..64e5a5594d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -22,7 +22,11 @@ namespace Core { class System; -} // namespace Core +} + +namespace Core::Frontend { +class EmuWindow; +} namespace OpenGL { @@ -111,7 +115,7 @@ private: class ShaderCacheOpenGL final : public RasterizerCache { public: explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device); + Core::Frontend::EmuWindow& emu_window, const Device& device); /// Loads disk cache for the current game void LoadDiskCache(const std::atomic_bool& stop_loading, @@ -133,13 +137,13 @@ private: CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, const std::set& supported_formats); + Core::Frontend::EmuWindow& emu_window; const Device& device; - - std::array last_shaders; - ShaderDiskCacheOpenGL disk_cache; + PrecompiledShaders precompiled_shaders; PrecompiledPrograms precompiled_programs; + std::array last_shaders; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d69cba9c3b..3451d321d8 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -97,8 +97,8 @@ static std::array MakeOrthographicMatrix(const float width, cons return matrix; } -RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system) - : VideoCore::RendererBase{window}, system{system} {} +RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) + : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} RendererOpenGL::~RendererOpenGL() = default; @@ -265,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() { } // Initialize sRGB Usage OpenGLState::ClearsRGBUsed(); - rasterizer = std::make_unique(system, screen_info); + rasterizer = std::make_unique(system, emu_window, screen_info); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 6cbf9d2cba..4aebf2321b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -45,7 +45,7 @@ struct ScreenInfo { class RendererOpenGL : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system); + explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); ~RendererOpenGL() override; /// Swap buffers (render frame) @@ -77,6 +77,7 @@ private: void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); + Core::Frontend::EmuWindow& emu_window; Core::System& system; OpenGLState state; diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index c2783d6845..eeee603d1e 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -91,25 +91,25 @@ void EmuThread::run() { class GGLContext : public Core::Frontend::GraphicsContext { public: - explicit GGLContext(QOpenGLContext* shared_context) - : context{std::make_unique(shared_context)} { - surface.setFormat(shared_context->format()); - surface.create(); + explicit GGLContext(QOpenGLContext* shared_context) : shared_context{shared_context} { + context.setFormat(shared_context->format()); + context.setShareContext(shared_context); + context.create(); } void MakeCurrent() override { - context->makeCurrent(&surface); + context.makeCurrent(shared_context->surface()); } void DoneCurrent() override { - context->doneCurrent(); + context.doneCurrent(); } void SwapBuffers() override {} private: - std::unique_ptr context; - QOffscreenSurface surface; + QOpenGLContext* shared_context; + QOpenGLContext context; }; // This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL @@ -358,7 +358,7 @@ void GRenderWindow::OnClientAreaResized(unsigned width, unsigned height) { } std::unique_ptr GRenderWindow::CreateSharedContext() const { - return std::make_unique(shared_context.get()); + return std::make_unique(context.get()); } void GRenderWindow::InitRenderTarget() { From 69215b5a550ef8b2f3a2854bc99af03bcd31a6c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 25 Apr 2019 20:10:20 -0300 Subject: [PATCH 2/2] gl_shader_cache: Fix clang strict standard build issues --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 ++++--- src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 8 +++----- src/video_core/renderer_opengl/gl_shader_disk_cache.h | 7 ++++++- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9d3f96f9ce..7ee1c99c0c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -382,7 +382,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, std::atomic_bool compilation_failed = false; const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end) { + std::size_t end, const std::vector& shader_usages, + const ShaderDumpsMap& dumps) { context->MakeCurrent(); SCOPE_EXIT({ return context->DoneCurrent(); }); @@ -422,7 +423,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } }; - const std::size_t num_workers{std::thread::hardware_concurrency() + 1}; + const auto num_workers{static_cast(std::thread::hardware_concurrency() + 1)}; const std::size_t bucket_size{shader_usages.size() / num_workers}; std::vector> contexts(num_workers); std::vector threads(num_workers); @@ -433,7 +434,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, // On some platforms the shared context has to be created from the GUI thread contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(Worker, contexts[i].get(), start, end); + threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); } for (auto& thread : threads) { thread.join(); diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index fba9c594af..ee4a45ca22 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -183,8 +183,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { return {{raws, usages}}; } -std::pair, - std::unordered_map> +std::pair, ShaderDumpsMap> ShaderDiskCacheOpenGL::LoadPrecompiled() { if (!IsUsable()) return {}; @@ -208,8 +207,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { return *result; } -std::optional, - std::unordered_map>> +std::optional, ShaderDumpsMap>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector compressed(file.GetSize()); @@ -230,7 +228,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { } std::unordered_map decompiled; - std::unordered_map dumps; + ShaderDumpsMap dumps; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { PrecompiledEntryKind kind{}; if (!LoadObjectFromPrecompiled(kind)) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 2da0a4a232..ecd72ba58e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -33,6 +33,11 @@ namespace OpenGL { using ProgramCode = std::vector; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct ShaderDiskCacheUsage; +struct ShaderDiskCacheDump; + +using ShaderDumpsMap = std::unordered_map; + /// Allocated bindings used by an OpenGL shader program struct BaseBindings { u32 cbuf{}; @@ -294,4 +299,4 @@ private: bool tried_to_load{}; }; -} // namespace OpenGL \ No newline at end of file +} // namespace OpenGL