From ac8835659ead30d289ff8b907a2295d87790670f Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 25 Apr 2021 01:04:49 -0300
Subject: [PATCH] vulkan: Defer descriptor set work to the Vulkan thread

Move descriptor lookup and update code to a separate thread. Delaying
this removes work from the main GPU thread and allows creating
descriptor layouts on another thread. This reduces a bit the workload
of the main thread when new pipelines are encountered.
---
 .../renderer_vulkan/vk_compute_pass.cpp       | 45 +++++++++----------
 .../renderer_vulkan/vk_compute_pass.h         |  8 ++--
 .../renderer_vulkan/vk_compute_pipeline.cpp   | 36 ++++++++-------
 .../renderer_vulkan/vk_compute_pipeline.h     |  1 +
 .../renderer_vulkan/vk_graphics_pipeline.cpp  | 40 ++++++++---------
 .../renderer_vulkan/vk_graphics_pipeline.h    |  5 ++-
 .../renderer_vulkan/vk_update_descriptor.cpp  |  9 ----
 .../renderer_vulkan/vk_update_descriptor.h    |  4 +-
 8 files changed, 69 insertions(+), 79 deletions(-)

diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index e2f3d16bfd..7e5ba283b8 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -172,11 +172,12 @@ struct AstcPushConstants {
 };
 } // Anonymous namespace
 
-ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool,
+ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
                          vk::Span<VkDescriptorSetLayoutBinding> bindings,
                          vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
                          const DescriptorBankInfo& bank_info,
-                         vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) {
+                         vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
+    : device{device_} {
     descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
         .pNext = nullptr,
@@ -237,15 +238,6 @@ ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool,
 
 ComputePass::~ComputePass() = default;
 
-VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) {
-    if (!descriptor_template) {
-        return nullptr;
-    }
-    const VkDescriptorSet set = descriptor_allocator.Commit();
-    update_descriptor_queue.Send(descriptor_template.address(), set);
-    return set;
-}
-
 Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool,
                      StagingBufferPool& staging_buffer_pool_,
                      VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -265,10 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
     update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
-    const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
+    const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+    const VkBuffer buffer{staging.buffer};
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
         static constexpr u32 DISPATCH_SIZE = 1024;
         static constexpr VkMemoryBarrier WRITE_BARRIER{
             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -276,6 +269,8 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
             .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
         };
+        const VkDescriptorSet set = descriptor_allocator.Commit();
+        device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
         cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
@@ -321,10 +316,10 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
     update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
-    const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
+    const void* const descriptor_data{update_descriptor_queue.UpdateData()};
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex,
+    scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex,
                       index_shift](vk::CommandBuffer cmdbuf) {
         static constexpr u32 DISPATCH_SIZE = 1024;
         static constexpr VkMemoryBarrier WRITE_BARRIER{
@@ -333,7 +328,9 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
             .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
         };
-        const std::array push_constants = {base_vertex, index_shift};
+        const std::array push_constants{base_vertex, index_shift};
+        const VkDescriptorSet set = descriptor_allocator.Commit();
+        device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
         cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
@@ -353,7 +350,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
     : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
                   ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
                   COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
-      device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
+      scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
       update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
 
 ASTCDecoderPass::~ASTCDecoderPass() = default;
@@ -451,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
         update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
                                           sizeof(SWIZZLE_TABLE));
         update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
-
-        const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
-        const VkPipelineLayout vk_layout = *layout;
+        const void* const descriptor_data{update_descriptor_queue.UpdateData()};
 
         // To unswizzle the ASTC data
         const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
         ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
         ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
-        scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z,
-                          block_dims, params, set](vk::CommandBuffer cmdbuf) {
+        scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
+                          params, descriptor_data](vk::CommandBuffer cmdbuf) {
             const AstcPushConstants uniforms{
                 .blocks_dims = block_dims,
                 .bytes_per_block_log2 = params.bytes_per_block_log2,
@@ -470,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
                 .block_height = params.block_height,
                 .block_height_mask = params.block_height_mask,
             };
-            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {});
-            cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
+            const VkDescriptorSet set = descriptor_allocator.Commit();
+            device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
+            cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
             cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
         });
     }
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 54c1ac4cbd..114aef2bd7 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -36,15 +36,14 @@ public:
     ~ComputePass();
 
 protected:
-    VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
-
+    const Device& device;
     vk::DescriptorUpdateTemplateKHR descriptor_template;
     vk::PipelineLayout layout;
     vk::Pipeline pipeline;
-
-private:
     vk::DescriptorSetLayout descriptor_set_layout;
     DescriptorAllocator descriptor_allocator;
+
+private:
     vk::ShaderModule module;
 };
 
@@ -99,7 +98,6 @@ public:
 private:
     void MakeDataBuffer();
 
-    const Device& device;
     VKScheduler& scheduler;
     StagingBufferPool& staging_buffer_pool;
     VKUpdateDescriptorQueue& update_descriptor_queue;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 54a57c3587..feaace0c56 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -18,21 +18,22 @@
 
 namespace Vulkan {
 
-ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
+ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
                                  VKUpdateDescriptorQueue& update_descriptor_queue_,
                                  Common::ThreadWorker* thread_worker, const Shader::Info& info_,
                                  vk::ShaderModule spv_module_)
-    : update_descriptor_queue{update_descriptor_queue_}, info{info_},
+    : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
       spv_module(std::move(spv_module_)) {
-    DescriptorLayoutBuilder builder{device.GetLogical()};
-    builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
+    auto func{[this, &descriptor_pool] {
+        DescriptorLayoutBuilder builder{device.GetLogical()};
+        builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
 
-    descriptor_set_layout = builder.CreateDescriptorSetLayout();
-    pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
-    descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout);
-    descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
+        descriptor_set_layout = builder.CreateDescriptorSetLayout();
+        pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
+        descriptor_update_template =
+            builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout);
+        descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
 
-    auto func{[this, &device] {
         const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
             .pNext = nullptr,
@@ -166,15 +167,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
             build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
         });
     }
-    scheduler.Record([this](vk::CommandBuffer cmdbuf) {
+    const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+    scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
-    });
-    if (!descriptor_set_layout) {
-        return;
-    }
-    const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
-    update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set);
-    scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) {
+
+        if (!descriptor_set_layout) {
+            return;
+        }
+        const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+        const vk::Device& dev{device.GetLogical()};
+        dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
                                   descriptor_set, nullptr);
     });
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 0d4cd37be5..a560e382e2 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -40,6 +40,7 @@ public:
                    VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
 
 private:
+    const Device& device;
     VKUpdateDescriptorQueue& update_descriptor_queue;
     Shader::Info info;
 
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 0526c197af..76080bde1a 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -205,31 +205,31 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m
 GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
                                    Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_,
                                    BufferCache& buffer_cache_, TextureCache& texture_cache_,
-                                   const Device& device, DescriptorPool& descriptor_pool,
+                                   const Device& device_, DescriptorPool& descriptor_pool,
                                    VKUpdateDescriptorQueue& update_descriptor_queue_,
                                    Common::ThreadWorker* worker_thread,
                                    RenderPassCache& render_pass_cache,
                                    const GraphicsPipelineCacheKey& key_,
                                    std::array<vk::ShaderModule, NUM_STAGES> stages,
                                    const std::array<const Shader::Info*, NUM_STAGES>& infos)
-    : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_},
-      buffer_cache{buffer_cache_}, scheduler{scheduler_},
+    : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
+      texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
       update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
     std::ranges::transform(infos, stage_infos.begin(),
                            [](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
 
-    DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
-    descriptor_set_layout = builder.CreateDescriptorSetLayout();
-    descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
+    auto func{[this, &render_pass_cache, &descriptor_pool] {
+        DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
+        descriptor_set_layout = builder.CreateDescriptorSetLayout();
+        descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
 
-    auto func{[this, &device, &render_pass_cache, builder] {
         const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
         pipeline_layout = builder.CreatePipelineLayout(set_layout);
         descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout);
 
         const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
         Validate();
-        MakePipeline(device, render_pass);
+        MakePipeline(render_pass);
 
         std::lock_guard lock{build_mutex};
         is_built = true;
@@ -440,24 +440,22 @@ void GraphicsPipeline::ConfigureDraw() {
             build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
         });
     }
-    if (scheduler.UpdateGraphicsPipeline(this)) {
-        scheduler.Record([this](vk::CommandBuffer cmdbuf) {
-            cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
-        });
-    }
-    if (!descriptor_set_layout) {
-        return;
-    }
-    const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
-    update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set);
-
-    scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) {
+    const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
+    const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+    scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+        if (!descriptor_set_layout) {
+            return;
+        }
+        const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+        const vk::Device& dev{device.GetLogical()};
+        dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
                                   descriptor_set, nullptr);
     });
 }
 
-void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) {
+void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
     FixedPipelineState::DynamicState dynamic{};
     if (!device.IsExtExtendedDynamicStateSupported()) {
         dynamic = key.state.dynamic_state;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 454fc049ee..85e21f611f 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -109,19 +109,20 @@ private:
 
     void ConfigureDraw();
 
-    void MakePipeline(const Device& device, VkRenderPass render_pass);
+    void MakePipeline(VkRenderPass render_pass);
 
     void Validate();
 
     const GraphicsPipelineCacheKey key;
     Tegra::Engines::Maxwell3D& maxwell3d;
     Tegra::MemoryManager& gpu_memory;
+    const Device& device;
     TextureCache& texture_cache;
     BufferCache& buffer_cache;
     VKScheduler& scheduler;
     VKUpdateDescriptorQueue& update_descriptor_queue;
 
-    void (*configure_func)(GraphicsPipeline*, bool);
+    void (*configure_func)(GraphicsPipeline*, bool){};
 
     std::vector<GraphicsPipelineCacheKey> transition_keys;
     std::vector<GraphicsPipeline*> transitions;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index bea9b80123..ce3427c9bc 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -36,13 +36,4 @@ void VKUpdateDescriptorQueue::Acquire() {
     upload_start = payload_cursor;
 }
 
-void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template,
-                                   VkDescriptorSet set) {
-    const void* const data = upload_start;
-    const vk::Device* const logical = &device.GetLogical();
-    scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
-        logical->UpdateDescriptorSet(set, *update_template, data);
-    });
-}
-
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index 82bc9920c1..d7de4c4908 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -39,7 +39,9 @@ public:
 
     void Acquire();
 
-    void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set);
+    const DescriptorUpdateEntry* UpdateData() const noexcept {
+        return upload_start;
+    }
 
     void AddSampledImage(VkImageView image_view, VkSampler sampler) {
         *(payload_cursor++) = VkDescriptorImageInfo{