From 1e9213632a709716e20d2b8690f8fe31654496ba Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 Feb 2020 02:35:16 -0300 Subject: [PATCH] vk_shader_decompiler: Implement indexed textures Implement accessing textures through an index. It uses the same interface as OpenGL, the main difference is that Vulkan bindings are forced to be arrayed (the binding index doesn't change for stacked textures in SPIR-V). --- .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 101 +++++++++++------- .../renderer_vulkan/vk_pipeline_cache.h | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 21 ++-- .../renderer_vulkan/vk_shader_decompiler.cpp | 26 +++-- 6 files changed, 100 insertions(+), 55 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9d5b8de7a9..60f57d83e7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -73,7 +73,7 @@ UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate std::vector template_entries; u32 binding = 0; u32 offset = 0; - FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries); + FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); if (template_entries.empty()) { // If the shader doesn't use descriptor sets, skip template creation. return UniqueDescriptorUpdateTemplate{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b155dfb493..6a02403c1e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -97,8 +97,7 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat u32 offset = 0; for (const auto& stage : program) { if (stage) { - FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset, - template_entries); + FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); } } if (template_entries.empty()) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7ddf7d3ee6..696e4b2911 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -36,6 +36,13 @@ using Tegra::Engines::ShaderType; namespace { +// C++20's using enum +constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer; +constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer; +constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer; +constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler; +constexpr auto eStorageImage = vk::DescriptorType::eStorageImage; + constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ VideoCommon::Shader::CompileDepth::FullDecompile}; @@ -119,23 +126,32 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { } } +template +void AddBindings(std::vector& bindings, u32& binding, + vk::ShaderStageFlags stage_flags, const Container& container) { + const u32 num_entries = static_cast(std::size(container)); + for (std::size_t i = 0; i < num_entries; ++i) { + u32 count = 1; + if constexpr (descriptor_type == eCombinedImageSampler) { + // Combined image samplers can be arrayed. + count = container[i].Size(); + } + bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr); + } +} + u32 FillDescriptorLayout(const ShaderEntries& entries, std::vector& bindings, Maxwell::ShaderProgram program_type, u32 base_binding) { const ShaderType stage = GetStageFromProgram(program_type); - const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); + const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); u32 binding = base_binding; - const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { - for (std::size_t i = 0; i < num_entries; ++i) { - bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); - } - }; - AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); - AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); - AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); - AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); - AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); + AddBindings(bindings, binding, flags, entries.const_buffers); + AddBindings(bindings, binding, flags, entries.global_buffers); + AddBindings(bindings, binding, flags, entries.texel_buffers); + AddBindings(bindings, binding, flags, entries.samplers); + AddBindings(bindings, binding, flags, entries.images); return binding; } @@ -361,32 +377,45 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { return {std::move(program), std::move(bindings)}; } -void FillDescriptorUpdateTemplateEntries( - const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries) { - static constexpr auto entry_size = static_cast(sizeof(DescriptorUpdateEntry)); - const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { - const u32 count = static_cast(count_); - if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && - device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { - // Nvidia has a bug where updating multiple uniform texels at once causes the driver to - // crash. - for (u32 i = 0; i < count; ++i) { - template_entries.emplace_back(binding + i, 0, 1, descriptor_type, - offset + i * entry_size, entry_size); - } - } else if (count != 0) { - template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); - } - offset += count * entry_size; - binding += count; - }; +template +void AddEntry(std::vector& template_entries, u32& binding, + u32& offset, const Container& container) { + static constexpr u32 entry_size = static_cast(sizeof(DescriptorUpdateEntry)); + const u32 count = static_cast(std::size(container)); - AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); - AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); - AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); - AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); - AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); + if constexpr (descriptor_type == eCombinedImageSampler) { + for (u32 i = 0; i < count; ++i) { + const u32 num_samplers = container[i].Size(); + template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset, + entry_size); + ++binding; + offset += num_samplers * entry_size; + } + return; + } + + if constexpr (descriptor_type == eUniformTexelBuffer) { + // Nvidia has a bug where updating multiple uniform texels at once causes the driver to + // crash. + for (u32 i = 0; i < count; ++i) { + template_entries.emplace_back(binding + i, 0, 1, descriptor_type, + offset + i * entry_size, entry_size); + } + } else if (count > 0) { + template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); + } + offset += count * entry_size; + binding += count; +} + +void FillDescriptorUpdateTemplateEntries( + const ShaderEntries& entries, u32& binding, u32& offset, + std::vector& template_entries) { + AddEntry(template_entries, offset, binding, entries.const_buffers); + AddEntry(template_entries, offset, binding, entries.global_buffers); + AddEntry(template_entries, offset, binding, entries.texel_buffers); + AddEntry(template_entries, offset, binding, entries.samplers); + AddEntry(template_entries, offset, binding, entries.images); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 8678fc9c3c..92a670cc76 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -194,7 +194,7 @@ private: }; void FillDescriptorUpdateTemplateEntries( - const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, + const ShaderEntries& entries, u32& binding, u32& offset, std::vector& template_entries); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 31c078f6ab..ad837dd4ac 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -105,17 +105,20 @@ void TransitionImages(const std::vector& views, vk::PipelineStageFlag template Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - std::size_t stage) { + std::size_t stage, std::size_t index = 0) { const auto stage_type = static_cast(stage); if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); return engine.GetTextureInfo(tex_handle); } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); + const u32 entry_offset = static_cast(index * gpu_profile.GetTextureHandlerSize()); + const u32 offset = entry.GetOffset() + entry_offset; if constexpr (std::is_same_v) { - return engine.GetStageTexture(stage_type, entry.GetOffset()); + return engine.GetStageTexture(stage_type, offset); } else { - return engine.GetTexture(entry.GetOffset()); + return engine.GetTexture(offset); } } @@ -835,8 +838,10 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: MICROPROFILE_SCOPE(Vulkan_Textures); const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.samplers) { - const auto texture = GetTextureInfo(gpu, entry, stage); - SetupTexture(texture, entry); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(gpu, entry, stage, i); + SetupTexture(texture, entry); + } } } @@ -885,8 +890,10 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.samplers) { - const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); - SetupTexture(texture, entry); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i); + SetupTexture(texture, entry); + } } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 2da622d151..9841f0dd13 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -69,8 +69,9 @@ struct TexelBuffer { struct SampledImage { Id image_type{}; - Id sampled_image_type{}; - Id sampler{}; + Id sampler_type{}; + Id sampler_pointer_type{}; + Id variable{}; }; struct StorageImage { @@ -833,16 +834,20 @@ private: constexpr int sampled = 1; constexpr auto format = spv::ImageFormat::Unknown; const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id sampled_image_type = TypeSampledImage(image_type); - const Id pointer_type = - TypePointer(spv::StorageClass::UniformConstant, sampled_image_type); + const Id sampler_type = TypeSampledImage(image_type); + const Id sampler_pointer_type = + TypePointer(spv::StorageClass::UniformConstant, sampler_type); + const Id type = sampler.IsIndexed() + ? TypeArray(sampler_type, Constant(t_uint, sampler.Size())) + : sampler_type; + const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex()))); Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - sampled_images.emplace(sampler.GetIndex(), - SampledImage{image_type, sampled_image_type, id}); + sampled_images.emplace(sampler.GetIndex(), SampledImage{image_type, sampler_type, + sampler_pointer_type, id}); } return binding; } @@ -1525,7 +1530,12 @@ private: ASSERT(!meta.sampler.IsBuffer()); const auto& entry = sampled_images.at(meta.sampler.GetIndex()); - return OpLoad(entry.sampled_image_type, entry.sampler); + Id sampler = entry.variable; + if (meta.sampler.IsIndexed()) { + const Id index = AsInt(Visit(meta.index)); + sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); + } + return OpLoad(entry.sampler_type, sampler); } Id GetTextureImage(Operation operation) {