3
0
Fork 0
forked from suyu/suyu

vulkan: Defer descriptor set work to the Vulkan thread

Move descriptor lookup and update code to a separate thread. Delaying
this removes work from the main GPU thread and allows creating
descriptor layouts on another thread. This reduces a bit the workload
of the main thread when new pipelines are encountered.
This commit is contained in:
ReinUsesLisp 2021-04-25 01:04:49 -03:00 committed by ameerj
parent 2f3c3dfc10
commit ac8835659e
8 changed files with 69 additions and 79 deletions

View file

@ -172,11 +172,12 @@ struct AstcPushConstants {
};
} // Anonymous namespace
ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool,
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
const DescriptorBankInfo& bank_info,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) {
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
: device{device_} {
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@ -237,15 +238,6 @@ ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool,
ComputePass::~ComputePass() = default;
VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) {
if (!descriptor_template) {
return nullptr;
}
const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(descriptor_template.address(), set);
return set;
}
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
@ -265,10 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
const VkBuffer buffer{staging.buffer};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) {
scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@ -276,6 +269,8 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
@ -321,10 +316,10 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex,
scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex,
index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
@ -333,7 +328,9 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
};
const std::array push_constants = {base_vertex, index_shift};
const std::array push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
@ -353,7 +350,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
ASTCDecoderPass::~ASTCDecoderPass() = default;
@ -451,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
sizeof(SWIZZLE_TABLE));
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
const VkPipelineLayout vk_layout = *layout;
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
// To unswizzle the ASTC data
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z,
block_dims, params, set](vk::CommandBuffer cmdbuf) {
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
params, descriptor_data](vk::CommandBuffer cmdbuf) {
const AstcPushConstants uniforms{
.blocks_dims = block_dims,
.bytes_per_block_log2 = params.bytes_per_block_log2,
@ -470,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
.block_height = params.block_height,
.block_height_mask = params.block_height_mask,
};
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {});
cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
});
}

View file

@ -36,15 +36,14 @@ public:
~ComputePass();
protected:
VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
const Device& device;
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::PipelineLayout layout;
vk::Pipeline pipeline;
private:
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
private:
vk::ShaderModule module;
};
@ -99,7 +98,6 @@ public:
private:
void MakeDataBuffer();
const Device& device;
VKScheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;

View file

@ -18,21 +18,22 @@
namespace Vulkan {
ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker, const Shader::Info& info_,
vk::ShaderModule spv_module_)
: update_descriptor_queue{update_descriptor_queue_}, info{info_},
: device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
spv_module(std::move(spv_module_)) {
DescriptorLayoutBuilder builder{device.GetLogical()};
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
auto func{[this, &descriptor_pool] {
DescriptorLayoutBuilder builder{device.GetLogical()};
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
descriptor_set_layout = builder.CreateDescriptorSetLayout();
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout);
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
descriptor_set_layout = builder.CreateDescriptorSetLayout();
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
descriptor_update_template =
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout);
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
auto func{[this, &device] {
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = nullptr,
@ -166,15 +167,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
});
}
scheduler.Record([this](vk::CommandBuffer cmdbuf) {
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
});
if (!descriptor_set_layout) {
return;
}
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set);
scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) {
if (!descriptor_set_layout) {
return;
}
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
const vk::Device& dev{device.GetLogical()};
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
descriptor_set, nullptr);
});

View file

@ -40,6 +40,7 @@ public:
VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
private:
const Device& device;
VKUpdateDescriptorQueue& update_descriptor_queue;
Shader::Info info;

View file

@ -205,31 +205,31 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m
GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_,
BufferCache& buffer_cache_, TextureCache& texture_cache_,
const Device& device, DescriptorPool& descriptor_pool,
const Device& device_, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* worker_thread,
RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key_,
std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos)
: key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_},
buffer_cache{buffer_cache_}, scheduler{scheduler_},
: key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
std::ranges::transform(infos, stage_infos.begin(),
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
descriptor_set_layout = builder.CreateDescriptorSetLayout();
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
auto func{[this, &render_pass_cache, &descriptor_pool] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
descriptor_set_layout = builder.CreateDescriptorSetLayout();
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
auto func{[this, &device, &render_pass_cache, builder] {
const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
pipeline_layout = builder.CreatePipelineLayout(set_layout);
descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout);
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
Validate();
MakePipeline(device, render_pass);
MakePipeline(render_pass);
std::lock_guard lock{build_mutex};
is_built = true;
@ -440,24 +440,22 @@ void GraphicsPipeline::ConfigureDraw() {
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
});
}
if (scheduler.UpdateGraphicsPipeline(this)) {
scheduler.Record([this](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
});
}
if (!descriptor_set_layout) {
return;
}
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set);
scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) {
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
if (!descriptor_set_layout) {
return;
}
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
const vk::Device& dev{device.GetLogical()};
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_set, nullptr);
});
}
void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) {
void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
FixedPipelineState::DynamicState dynamic{};
if (!device.IsExtExtendedDynamicStateSupported()) {
dynamic = key.state.dynamic_state;

View file

@ -109,19 +109,20 @@ private:
void ConfigureDraw();
void MakePipeline(const Device& device, VkRenderPass render_pass);
void MakePipeline(VkRenderPass render_pass);
void Validate();
const GraphicsPipelineCacheKey key;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::MemoryManager& gpu_memory;
const Device& device;
TextureCache& texture_cache;
BufferCache& buffer_cache;
VKScheduler& scheduler;
VKUpdateDescriptorQueue& update_descriptor_queue;
void (*configure_func)(GraphicsPipeline*, bool);
void (*configure_func)(GraphicsPipeline*, bool){};
std::vector<GraphicsPipelineCacheKey> transition_keys;
std::vector<GraphicsPipeline*> transitions;

View file

@ -36,13 +36,4 @@ void VKUpdateDescriptorQueue::Acquire() {
upload_start = payload_cursor;
}
void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template,
VkDescriptorSet set) {
const void* const data = upload_start;
const vk::Device* const logical = &device.GetLogical();
scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
logical->UpdateDescriptorSet(set, *update_template, data);
});
}
} // namespace Vulkan

View file

@ -39,7 +39,9 @@ public:
void Acquire();
void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set);
const DescriptorUpdateEntry* UpdateData() const noexcept {
return upload_start;
}
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
*(payload_cursor++) = VkDescriptorImageInfo{