From 9c548146cad6245d926853703cf4d838799d0c1b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 6 Jan 2020 21:10:56 -0300 Subject: [PATCH 1/6] vk_rasterizer: Add placeholder --- src/video_core/CMakeLists.txt | 1 + src/video_core/renderer_vulkan/vk_rasterizer.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c80171fe60..aa008576df 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -163,6 +163,7 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_image.h renderer_vulkan/vk_memory_manager.cpp renderer_vulkan/vk_memory_manager.h + renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_renderpass_cache.cpp renderer_vulkan/vk_renderpass_cache.h renderer_vulkan/vk_resource_manager.cpp diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h new file mode 100644 index 0000000000..fc324952bb --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -0,0 +1,13 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/rasterizer_interface.h" + +namespace Vulkan { + +class RasterizerVulkan : public VideoCore::RasterizerInterface {}; + +} // namespace Vulkan From 3142f1b597436d0bd1de12c1769da897976c6b32 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 6 Jan 2020 21:11:23 -0300 Subject: [PATCH 2/6] fixed_pipeline_state: Add depth clamp --- .../renderer_vulkan/fixed_pipeline_state.cpp | 18 ++++++++++++------ .../renderer_vulkan/fixed_pipeline_state.h | 10 ++++++---- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5a490f6efb..4e3ff231ea 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -109,6 +109,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) const auto topology = static_cast(regs.draw.topology.Value()); const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]]; + const auto& clip = regs.view_volume_clip_control; + const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; + Maxwell::Cull::FrontFace front_face = regs.cull.front_face; if (regs.screen_y_control.triangle_rast_flip != 0 && regs.viewport_transform[0].scale_y > 0.0f) { @@ -119,8 +122,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) } const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; - return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, gl_ndc, - regs.cull.cull_face, front_face); + return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, + depth_clamp_enabled, gl_ndc, regs.cull.cull_face, + front_face); } } // Anonymous namespace @@ -222,15 +226,17 @@ bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { return static_cast(cull_enable) ^ (static_cast(depth_bias_enable) << 1) ^ - (static_cast(ndc_minus_one_to_one) << 2) ^ + (static_cast(depth_clamp_enable) << 2) ^ + (static_cast(ndc_minus_one_to_one) << 3) ^ (static_cast(cull_face) << 24) ^ (static_cast(front_face) << 48); } bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { - return std::tie(cull_enable, depth_bias_enable, ndc_minus_one_to_one, cull_face, front_face) == - std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.ndc_minus_one_to_one, rhs.cull_face, - rhs.front_face); + return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one, + cull_face, front_face) == + std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable, + rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face); } std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 04152c0d4e..87056ef371 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -170,15 +170,17 @@ struct FixedPipelineState { }; struct Rasterizer { - constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool ndc_minus_one_to_one, - Maxwell::Cull::CullFace cull_face, Maxwell::Cull::FrontFace front_face) + constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, + bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face, + Maxwell::Cull::FrontFace front_face) : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, - ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{ - front_face} {} + depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, + cull_face{cull_face}, front_face{front_face} {} Rasterizer() = default; bool cull_enable; bool depth_bias_enable; + bool depth_clamp_enable; bool ndc_minus_one_to_one; Maxwell::Cull::CullFace cull_face; Maxwell::Cull::FrontFace front_face; From b392a5986ea305d805d259e414f8fb5ecc0f80b4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 6 Jan 2020 21:18:38 -0300 Subject: [PATCH 3/6] vk_pipeline_cache: Add file and define descriptor update template filler This function allows us to share code between compute and graphics pipelines compilation. --- src/video_core/CMakeLists.txt | 2 + .../renderer_vulkan/vk_pipeline_cache.cpp | 43 +++++++++++++++++++ .../renderer_vulkan/vk_pipeline_cache.h | 22 ++++++++++ 3 files changed, 67 insertions(+) create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index aa008576df..efdd2c9029 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -163,6 +163,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_image.h renderer_vulkan/vk_memory_manager.cpp renderer_vulkan/vk_memory_manager.h + renderer_vulkan/vk_pipeline_cache.cpp + renderer_vulkan/vk_pipeline_cache.h renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_renderpass_cache.cpp renderer_vulkan/vk_renderpass_cache.h diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp new file mode 100644 index 0000000000..9bc027cbf8 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -0,0 +1,43 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" + +namespace Vulkan { + +void FillDescriptorUpdateTemplateEntries( + const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, + std::vector& template_entries) { + static constexpr auto entry_size = static_cast(sizeof(DescriptorUpdateEntry)); + const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { + const u32 count = static_cast(count_); + if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && + device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { + // Nvidia has a bug where updating multiple uniform texels at once causes the driver to + // crash. + for (u32 i = 0; i < count; ++i) { + template_entries.emplace_back(binding + i, 0, 1, descriptor_type, + offset + i * entry_size, entry_size); + } + } else if (count != 0) { + template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); + } + offset += count * entry_size; + binding += count; + }; + + AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); + AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); + AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); + AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); + AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h new file mode 100644 index 0000000000..532ee45cc6 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -0,0 +1,22 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/shader/shader_ir.h" + +namespace Vulkan { + +class VKDevice; + +void FillDescriptorUpdateTemplateEntries( + const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, + std::vector& template_entries); + +} // namespace Vulkan From dc96a59fa08c3e1f501964847f87d37f3d6dd035 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 6 Jan 2020 21:25:14 -0300 Subject: [PATCH 4/6] vk_compute_pipeline: Initial implementation This abstraction represents a Vulkan compute pipeline. --- src/video_core/CMakeLists.txt | 2 + .../renderer_vulkan/vk_compute_pipeline.cpp | 112 ++++++++++++++++++ .../renderer_vulkan/vk_compute_pipeline.h | 66 +++++++++++ .../renderer_vulkan/vk_pipeline_cache.h | 39 ++++++ 4 files changed, 219 insertions(+) create mode 100644 src/video_core/renderer_vulkan/vk_compute_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_compute_pipeline.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index efdd2c9029..61ac0f23a1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -155,6 +155,8 @@ if (ENABLE_VULKAN) renderer_vulkan/maxwell_to_vk.h renderer_vulkan/vk_buffer_cache.cpp renderer_vulkan/vk_buffer_cache.h + renderer_vulkan/vk_compute_pipeline.cpp + renderer_vulkan/vk_compute_pipeline.h renderer_vulkan/vk_descriptor_pool.cpp renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_device.cpp diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp new file mode 100644 index 0000000000..9d5b8de7a9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -0,0 +1,112 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" + +namespace Vulkan { + +VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + const SPIRVShader& shader) + : device{device}, scheduler{scheduler}, entries{shader.entries}, + descriptor_set_layout{CreateDescriptorSetLayout()}, + descriptor_allocator{descriptor_pool, *descriptor_set_layout}, + update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, + descriptor_template{CreateDescriptorUpdateTemplate()}, + shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {} + +VKComputePipeline::~VKComputePipeline() = default; + +vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() { + if (!descriptor_template) { + return {}; + } + const auto set = descriptor_allocator.Commit(scheduler.GetFence()); + update_descriptor_queue.Send(*descriptor_template, set); + return set; +} + +UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { + std::vector bindings; + u32 binding = 0; + const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { + // TODO(Rodrigo): Maybe make individual bindings here? + for (u32 bindpoint = 0; bindpoint < static_cast(num_entries); ++bindpoint) { + bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute, + nullptr); + } + }; + AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); + AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); + AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); + AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); + AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); + + const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( + {}, static_cast(bindings.size()), bindings.data()); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); +} + +UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const { + const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr); + const auto dev = device.GetLogical(); + return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader()); +} + +UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const { + std::vector template_entries; + u32 binding = 0; + u32 offset = 0; + FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries); + if (template_entries.empty()) { + // If the shader doesn't use descriptor sets, skip template creation. + return UniqueDescriptorUpdateTemplate{}; + } + + const vk::DescriptorUpdateTemplateCreateInfo template_ci( + {}, static_cast(template_entries.size()), template_entries.data(), + vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, + vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); +} + +UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector& code) const { + const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data()); + const auto dev = device.GetLogical(); + return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader()); +} + +UniquePipeline VKComputePipeline::CreatePipeline() const { + vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute, + *shader_module, "main", nullptr); + vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; + subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; + if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) { + shader_stage_ci.pNext = &subgroup_size_ci; + } + + const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0); + const auto dev = device.GetLogical(); + return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader()); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h new file mode 100644 index 0000000000..22235c6c97 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -0,0 +1,66 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_shader_decompiler.h" + +namespace Vulkan { + +class VKDevice; +class VKScheduler; +class VKUpdateDescriptorQueue; + +class VKComputePipeline final { +public: + explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + const SPIRVShader& shader); + ~VKComputePipeline(); + + vk::DescriptorSet CommitDescriptorSet(); + + vk::Pipeline GetHandle() const { + return *pipeline; + } + + vk::PipelineLayout GetLayout() const { + return *layout; + } + + const ShaderEntries& GetEntries() { + return entries; + } + +private: + UniqueDescriptorSetLayout CreateDescriptorSetLayout() const; + + UniquePipelineLayout CreatePipelineLayout() const; + + UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const; + + UniqueShaderModule CreateShaderModule(const std::vector& code) const; + + UniquePipeline CreatePipeline() const; + + const VKDevice& device; + VKScheduler& scheduler; + ShaderEntries entries; + + UniqueDescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + VKUpdateDescriptorQueue& update_descriptor_queue; + UniquePipelineLayout layout; + UniqueDescriptorUpdateTemplate descriptor_template; + UniqueShaderModule shader_module; + UniquePipeline pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 532ee45cc6..33b1a1d231 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -4,9 +4,12 @@ #pragma once +#include +#include #include #include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/shader_ir.h" @@ -15,6 +18,42 @@ namespace Vulkan { class VKDevice; +struct ComputePipelineCacheKey { + GPUVAddr shader{}; + u32 shared_memory_size{}; + std::array workgroup_size{}; + + std::size_t Hash() const noexcept { + return static_cast(shader) ^ + ((static_cast(shared_memory_size) >> 7) << 40) ^ + static_cast(workgroup_size[0]) ^ + (static_cast(workgroup_size[1]) << 16) ^ + (static_cast(workgroup_size[2]) << 24); + } + + bool operator==(const ComputePipelineCacheKey& rhs) const noexcept { + return std::tie(shader, shared_memory_size, workgroup_size) == + std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size); + } +}; + +} // namespace Vulkan + +namespace std { + +template <> +struct hash { + std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace Vulkan { + +class VKDevice; + void FillDescriptorUpdateTemplateEntries( const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, std::vector& template_entries); From 2effdeb9243b5ca80a696b9bc003fb0179e0b6bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 6 Jan 2020 21:29:13 -0300 Subject: [PATCH 5/6] vk_graphics_pipeline: Initial implementation This abstractio represents the state of the 3D engine at a given draw. Instead of changing individual bits of the pipeline how it's done in APIs like D3D11, OpenGL and NVN; on Vulkan we are forced to put everything together into a single, immutable object. It takes advantage of the few dynamic states Vulkan offers. --- src/video_core/CMakeLists.txt | 2 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 271 ++++++++++++++++++ .../renderer_vulkan/vk_graphics_pipeline.h | 90 ++++++ .../renderer_vulkan/vk_pipeline_cache.h | 32 +++ 4 files changed, 395 insertions(+) create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 61ac0f23a1..caf03c2aed 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -161,6 +161,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_device.cpp renderer_vulkan/vk_device.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_image.cpp renderer_vulkan/vk_image.h renderer_vulkan/vk_memory_manager.cpp diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 0000000000..2e0536bf6d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,271 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "common/assert.h" +#include "common/common_types.h" +#include "common/microprofile.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" + +namespace Vulkan { + +MICROPROFILE_DECLARE(Vulkan_PipelineCache); + +namespace { + +vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { + return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail), + MaxwellToVK::StencilOp(face.action_depth_pass), + MaxwellToVK::StencilOp(face.action_depth_fail), + MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0); +} + +bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) { + static constexpr std::array unsupported_topologies = { + vk::PrimitiveTopology::ePointList, + vk::PrimitiveTopology::eLineList, + vk::PrimitiveTopology::eTriangleList, + vk::PrimitiveTopology::eLineListWithAdjacency, + vk::PrimitiveTopology::eTriangleListWithAdjacency, + vk::PrimitiveTopology::ePatchList}; + return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), + topology) == std::end(unsupported_topologies); +} + +} // Anonymous namespace + +VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + VKRenderPassCache& renderpass_cache, + const GraphicsPipelineCacheKey& key, + const std::vector& bindings, + const SPIRVProgram& program) + : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, + descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, + descriptor_allocator{descriptor_pool, *descriptor_set_layout}, + update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, + descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( + program)}, + renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( + key.renderpass_params, + program)} {} + +VKGraphicsPipeline::~VKGraphicsPipeline() = default; + +vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { + if (!descriptor_template) { + return {}; + } + const auto set = descriptor_allocator.Commit(scheduler.GetFence()); + update_descriptor_queue.Send(*descriptor_template, set); + return set; +} + +UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( + const std::vector& bindings) const { + const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( + {}, static_cast(bindings.size()), bindings.data()); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); +} + +UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { + const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0, + nullptr); + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); +} + +UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate( + const SPIRVProgram& program) const { + std::vector template_entries; + u32 binding = 0; + u32 offset = 0; + for (const auto& stage : program) { + if (stage) { + FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset, + template_entries); + } + } + if (template_entries.empty()) { + // If the shader doesn't use descriptor sets, skip template creation. + return UniqueDescriptorUpdateTemplate{}; + } + + const vk::DescriptorUpdateTemplateCreateInfo template_ci( + {}, static_cast(template_entries.size()), template_entries.data(), + vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, + vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); +} + +std::vector VKGraphicsPipeline::CreateShaderModules( + const SPIRVProgram& program) const { + std::vector modules; + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { + const auto& stage = program[i]; + if (!stage) { + continue; + } + const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32), + stage->code.data()); + modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld)); + } + return modules; +} + +UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, + const SPIRVProgram& program) const { + const auto& vi = fixed_state.vertex_input; + const auto& ia = fixed_state.input_assembly; + const auto& ds = fixed_state.depth_stencil; + const auto& cd = fixed_state.color_blending; + const auto& ts = fixed_state.tessellation; + const auto& rs = fixed_state.rasterizer; + + std::vector vertex_bindings; + std::vector vertex_binding_divisors; + for (std::size_t i = 0; i < vi.num_bindings; ++i) { + const auto& binding = vi.bindings[i]; + const bool instanced = binding.divisor != 0; + const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex; + vertex_bindings.emplace_back(binding.index, binding.stride, rate); + if (instanced) { + vertex_binding_divisors.emplace_back(binding.index, binding.divisor); + } + } + + std::vector vertex_attributes; + const auto& input_attributes = program[0]->entries.attributes; + for (std::size_t i = 0; i < vi.num_attributes; ++i) { + const auto& attribute = vi.attributes[i]; + if (input_attributes.find(attribute.index) == input_attributes.end()) { + // Skip attributes not used by the vertex shaders. + continue; + } + vertex_attributes.emplace_back(attribute.index, attribute.buffer, + MaxwellToVK::VertexFormat(attribute.type, attribute.size), + attribute.offset); + } + + vk::PipelineVertexInputStateCreateInfo vertex_input_ci( + {}, static_cast(vertex_bindings.size()), vertex_bindings.data(), + static_cast(vertex_attributes.size()), vertex_attributes.data()); + + const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci( + static_cast(vertex_binding_divisors.size()), vertex_binding_divisors.data()); + if (!vertex_binding_divisors.empty()) { + vertex_input_ci.pNext = &vertex_input_divisor_ci; + } + + const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); + const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci( + {}, primitive_topology, + ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology)); + + const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points); + + const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr, + Maxwell::NumViewports, nullptr); + + // TODO(Rodrigo): Find out what's the default register value for front face + const vk::PipelineRasterizationStateCreateInfo rasterizer_ci( + {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill, + rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone, + rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise, + rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f); + + const vk::PipelineMultisampleStateCreateInfo multisampling_ci( + {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false); + + const vk::CompareOp depth_test_compare = ds.depth_test_enable + ? MaxwellToVK::ComparisonOp(ds.depth_test_function) + : vk::CompareOp::eAlways; + + const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci( + {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable, + ds.stencil_enable, GetStencilFaceState(ds.front_stencil), + GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f); + + std::array cb_attachments; + const std::size_t num_attachments = + std::min(cd.attachments_count, renderpass_params.color_attachments.size()); + for (std::size_t i = 0; i < num_attachments; ++i) { + constexpr std::array component_table{ + vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG, + vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA}; + const auto& blend = cd.attachments[i]; + + vk::ColorComponentFlags color_components{}; + for (std::size_t j = 0; j < component_table.size(); ++j) { + if (blend.components[j]) + color_components |= component_table[j]; + } + + cb_attachments[i] = vk::PipelineColorBlendAttachmentState( + blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func), + MaxwellToVK::BlendFactor(blend.dst_rgb_func), + MaxwellToVK::BlendEquation(blend.rgb_equation), + MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func), + MaxwellToVK::BlendEquation(blend.a_equation), color_components); + } + const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy, + static_cast(num_attachments), + cb_attachments.data(), {}); + + constexpr std::array dynamic_states = { + vk::DynamicState::eViewport, vk::DynamicState::eScissor, + vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants, + vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask, + vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference}; + const vk::PipelineDynamicStateCreateInfo dynamic_state_ci( + {}, static_cast(dynamic_states.size()), dynamic_states.data()); + + vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; + subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; + + std::vector shader_stages; + std::size_t module_index = 0; + for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + if (!program[stage]) { + continue; + } + const auto stage_enum = static_cast(stage); + const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum); + auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage, + *modules[module_index++], "main", nullptr); + if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) { + stage_ci.pNext = &subgroup_size_ci; + } + } + + const vk::GraphicsPipelineCreateInfo create_info( + {}, static_cast(shader_stages.size()), shader_stages.data(), &vertex_input_ci, + &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci, + &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 0000000000..4f5e4ea2d4 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,90 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_shader_decompiler.h" + +namespace Vulkan { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsPipelineCacheKey; + +class VKDescriptorPool; +class VKDevice; +class VKRenderPassCache; +class VKScheduler; +class VKUpdateDescriptorQueue; + +using SPIRVProgram = std::array, Maxwell::MaxShaderStage>; + +class VKGraphicsPipeline final { +public: + explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + VKRenderPassCache& renderpass_cache, + const GraphicsPipelineCacheKey& key, + const std::vector& bindings, + const SPIRVProgram& program); + ~VKGraphicsPipeline(); + + vk::DescriptorSet CommitDescriptorSet(); + + vk::Pipeline GetHandle() const { + return *pipeline; + } + + vk::PipelineLayout GetLayout() const { + return *layout; + } + + vk::RenderPass GetRenderPass() const { + return renderpass; + } + +private: + UniqueDescriptorSetLayout CreateDescriptorSetLayout( + const std::vector& bindings) const; + + UniquePipelineLayout CreatePipelineLayout() const; + + UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate( + const SPIRVProgram& program) const; + + std::vector CreateShaderModules(const SPIRVProgram& program) const; + + UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params, + const SPIRVProgram& program) const; + + const VKDevice& device; + VKScheduler& scheduler; + const FixedPipelineState fixed_state; + const u64 hash; + + UniqueDescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + VKUpdateDescriptorQueue& update_descriptor_queue; + UniquePipelineLayout layout; + UniqueDescriptorUpdateTemplate descriptor_template; + std::vector modules; + + vk::RenderPass renderpass; + UniquePipeline pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 33b1a1d231..e49ed135d8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -8,9 +8,12 @@ #include #include +#include + #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/shader_ir.h" @@ -18,6 +21,28 @@ namespace Vulkan { class VKDevice; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsPipelineCacheKey { + FixedPipelineState fixed_state; + std::array shaders; + RenderPassParams renderpass_params; + + std::size_t Hash() const noexcept { + std::size_t hash = fixed_state.Hash(); + for (const auto& shader : shaders) { + boost::hash_combine(hash, shader); + } + boost::hash_combine(hash, renderpass_params.Hash()); + return hash; + } + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { + return std::tie(fixed_state, shaders, renderpass_params) == + std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params); + } +}; + struct ComputePipelineCacheKey { GPUVAddr shader{}; u32 shared_memory_size{}; @@ -41,6 +66,13 @@ struct ComputePipelineCacheKey { namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; + template <> struct hash { std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { From 6888d776fffb3d5e105eddc271a2d6231abf0922 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 6 Jan 2020 21:55:06 -0300 Subject: [PATCH 6/6] vk_pipeline_cache: Initial implementation Given a pipeline key, this cache returns a pipeline abstraction (for graphics or compute). --- .../renderer_vulkan/vk_pipeline_cache.cpp | 352 ++++++++++++++++++ .../renderer_vulkan/vk_pipeline_cache.h | 109 +++++- 2 files changed, 460 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9bc027cbf8..48e23d4cd5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -2,16 +2,368 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include +#include #include +#include "common/microprofile.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader/compiler_settings.h" namespace Vulkan { +MICROPROFILE_DECLARE(Vulkan_PipelineCache); + +using Tegra::Engines::ShaderType; + +namespace { + +constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ + VideoCommon::Shader::CompileDepth::FullDecompile}; + +/// Gets the address for the specified shader stage program +GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { + const auto& gpu{system.GPU().Maxwell3D()}; + const auto& shader_config{gpu.regs.shader_config[static_cast(program)]}; + return gpu.regs.code_address.CodeAddress() + shader_config.offset; +} + +/// Gets if the current instruction offset is a scheduler instruction +constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { + // Sched instructions appear once every 4 instructions. + constexpr std::size_t SchedPeriod = 4; + const std::size_t absolute_offset = offset - main_offset; + return (absolute_offset % SchedPeriod) == 0; +} + +/// Calculates the size of a program stream +std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { + const std::size_t start_offset = is_compute ? 0 : 10; + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; + constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; + std::size_t offset = start_offset; + while (offset < program.size()) { + const u64 instruction = program[offset]; + if (!IsSchedInstruction(offset, start_offset)) { + if ((instruction & mask) == self_jumping_branch) { + // End on Maxwell's "nop" instruction + break; + } + if (instruction == 0) { + break; + } + } + ++offset; + } + // The last instruction is included in the program size + return std::min(offset + 1, program.size()); +} + +/// Gets the shader program code from memory for the specified address +ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, + const u8* host_ptr, bool is_compute) { + ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); + ASSERT_OR_EXECUTE(host_ptr != nullptr, { + std::fill(program_code.begin(), program_code.end(), 0); + return program_code; + }); + memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), + program_code.size() * sizeof(u64)); + program_code.resize(CalculateProgramSize(program_code, is_compute)); + return program_code; +} + +constexpr std::size_t GetStageFromProgram(std::size_t program) { + return program == 0 ? 0 : program - 1; +} + +constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { + return static_cast(GetStageFromProgram(static_cast(program))); +} + +ShaderType GetShaderType(Maxwell::ShaderProgram program) { + switch (program) { + case Maxwell::ShaderProgram::VertexB: + return ShaderType::Vertex; + case Maxwell::ShaderProgram::TesselationControl: + return ShaderType::TesselationControl; + case Maxwell::ShaderProgram::TesselationEval: + return ShaderType::TesselationEval; + case Maxwell::ShaderProgram::Geometry: + return ShaderType::Geometry; + case Maxwell::ShaderProgram::Fragment: + return ShaderType::Fragment; + default: + UNIMPLEMENTED_MSG("program={}", static_cast(program)); + return ShaderType::Vertex; + } +} + +u32 FillDescriptorLayout(const ShaderEntries& entries, + std::vector& bindings, + Maxwell::ShaderProgram program_type, u32 base_binding) { + const ShaderType stage = GetStageFromProgram(program_type); + const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); + + u32 binding = base_binding; + const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { + for (std::size_t i = 0; i < num_entries; ++i) { + bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); + } + }; + AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); + AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); + AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); + AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); + AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); + return binding; +} + +} // Anonymous namespace + +CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + ProgramCode program_code, u32 main_offset) + : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, + program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, + shader_ir{this->program_code, main_offset, compiler_settings, locker}, + entries{GenerateShaderEntries(shader_ir)} {} + +CachedShader::~CachedShader() = default; + +Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( + Core::System& system, Tegra::Engines::ShaderType stage) { + if (stage == Tegra::Engines::ShaderType::Compute) { + return system.GPU().KeplerCompute(); + } else { + return system.GPU().Maxwell3D(); + } +} + +VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, + const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue) + : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, + descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, + renderpass_cache(device) {} + +VKPipelineCache::~VKPipelineCache() = default; + +std::array VKPipelineCache::GetShaders() { + const auto& gpu = system.GPU().Maxwell3D(); + auto& dirty = system.GPU().Maxwell3D().dirty.shaders; + if (!dirty) { + return last_shaders; + } + dirty = false; + + std::array shaders; + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + const auto& shader_config = gpu.regs.shader_config[index]; + const auto program{static_cast(index)}; + + // Skip stages that are not enabled + if (!gpu.regs.IsShaderConfigEnabled(index)) { + continue; + } + + auto& memory_manager{system.GPU().MemoryManager()}; + const GPUVAddr program_addr{GetShaderAddress(system, program)}; + const auto host_ptr{memory_manager.GetPointer(program_addr)}; + auto shader = TryGet(host_ptr); + if (!shader) { + // No shader found - create a new one + constexpr u32 stage_offset = 10; + const auto stage = static_cast(index == 0 ? 0 : index - 1); + auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); + + const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + ASSERT(cpu_addr); + + shader = std::make_shared(system, stage, program_addr, *cpu_addr, + host_ptr, std::move(code), stage_offset); + Register(shader); + } + shaders[index] = std::move(shader); + } + return last_shaders = shaders; +} + +VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { + MICROPROFILE_SCOPE(Vulkan_PipelineCache); + + if (last_graphics_pipeline && last_graphics_key == key) { + return *last_graphics_pipeline; + } + last_graphics_key = key; + + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); + auto& entry = pair->second; + if (is_cache_miss) { + LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); + const auto [program, bindings] = DecompileShaders(key); + entry = std::make_unique(device, scheduler, descriptor_pool, + update_descriptor_queue, renderpass_cache, key, + bindings, program); + } + return *(last_graphics_pipeline = entry.get()); +} + +VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { + MICROPROFILE_SCOPE(Vulkan_PipelineCache); + + const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); + auto& entry = pair->second; + if (!is_cache_miss) { + return *entry; + } + LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); + + auto& memory_manager = system.GPU().MemoryManager(); + const auto program_addr = key.shader; + const auto host_ptr = memory_manager.GetPointer(program_addr); + + auto shader = TryGet(host_ptr); + if (!shader) { + // No shader found - create a new one + const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + ASSERT(cpu_addr); + + auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); + constexpr u32 kernel_main_offset = 0; + shader = std::make_shared(system, Tegra::Engines::ShaderType::Compute, + program_addr, *cpu_addr, host_ptr, std::move(code), + kernel_main_offset); + Register(shader); + } + + Specialization specialization; + specialization.workgroup_size = key.workgroup_size; + specialization.shared_memory_size = key.shared_memory_size; + + const SPIRVShader spirv_shader{ + Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), + shader->GetEntries()}; + entry = std::make_unique(device, scheduler, descriptor_pool, + update_descriptor_queue, spirv_shader); + return *entry; +} + +void VKPipelineCache::Unregister(const Shader& shader) { + bool finished = false; + const auto Finish = [&] { + // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and + // flush. + if (finished) { + return; + } + finished = true; + scheduler.Finish(); + }; + + const GPUVAddr invalidated_addr = shader->GetGpuAddr(); + for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { + auto& entry = it->first; + if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == + entry.shaders.end()) { + ++it; + continue; + } + Finish(); + it = graphics_cache.erase(it); + } + for (auto it = compute_cache.begin(); it != compute_cache.end();) { + auto& entry = it->first; + if (entry.shader != invalidated_addr) { + ++it; + continue; + } + Finish(); + it = compute_cache.erase(it); + } + + RasterizerCache::Unregister(shader); +} + +std::pair> +VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { + const auto& fixed_state = key.fixed_state; + auto& memory_manager = system.GPU().MemoryManager(); + const auto& gpu = system.GPU().Maxwell3D(); + + Specialization specialization; + specialization.primitive_topology = fixed_state.input_assembly.topology; + if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { + ASSERT(fixed_state.input_assembly.point_size != 0.0f); + specialization.point_size = fixed_state.input_assembly.point_size; + } + for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { + specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; + } + specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; + specialization.tessellation.primitive = fixed_state.tessellation.primitive; + specialization.tessellation.spacing = fixed_state.tessellation.spacing; + specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; + for (const auto& rt : key.renderpass_params.color_attachments) { + specialization.enabled_rendertargets.set(rt.index); + } + + SPIRVProgram program; + std::vector bindings; + + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + const auto program_enum = static_cast(index); + + // Skip stages that are not enabled + if (!gpu.regs.IsShaderConfigEnabled(index)) { + continue; + } + + const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + const auto shader = TryGet(host_ptr); + ASSERT(shader); + + const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 + const auto program_type = GetShaderType(program_enum); + const auto& entries = shader->GetEntries(); + program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), + entries}; + + if (program_enum == Maxwell::ShaderProgram::VertexA) { + // VertexB was combined with VertexA, so we skip the VertexB iteration + ++index; + } + + const u32 old_binding = specialization.base_binding; + specialization.base_binding = + FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); + ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); + } + return {std::move(program), std::move(bindings)}; +} + void FillDescriptorUpdateTemplateEntries( const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, std::vector& template_entries) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e49ed135d8..8678fc9c3c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,23 +6,49 @@ #include #include +#include +#include +#include +#include +#include #include #include #include "common/common_types.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/rasterizer_cache.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/shader_ir.h" +#include "video_core/surface.h" + +namespace Core { +class System; +} namespace Vulkan { +class RasterizerVulkan; +class VKComputePipeline; +class VKDescriptorPool; class VKDevice; +class VKFence; +class VKScheduler; +class VKUpdateDescriptorQueue; +class CachedShader; +using Shader = std::shared_ptr; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using ProgramCode = std::vector; + struct GraphicsPipelineCacheKey { FixedPipelineState fixed_state; std::array shaders; @@ -84,7 +110,88 @@ struct hash { namespace Vulkan { -class VKDevice; +class CachedShader final : public RasterizerCacheObject { +public: + explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, + VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); + ~CachedShader(); + + GPUVAddr GetGpuAddr() const { + return gpu_addr; + } + + VAddr GetCpuAddr() const override { + return cpu_addr; + } + + std::size_t GetSizeInBytes() const override { + return program_code.size() * sizeof(u64); + } + + VideoCommon::Shader::ShaderIR& GetIR() { + return shader_ir; + } + + const VideoCommon::Shader::ShaderIR& GetIR() const { + return shader_ir; + } + + const ShaderEntries& GetEntries() const { + return entries; + } + +private: + static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system, + Tegra::Engines::ShaderType stage); + + GPUVAddr gpu_addr{}; + VAddr cpu_addr{}; + ProgramCode program_code; + VideoCommon::Shader::ConstBufferLocker locker; + VideoCommon::Shader::ShaderIR shader_ir; + ShaderEntries entries; +}; + +class VKPipelineCache final : public RasterizerCache { +public: + explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, + const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~VKPipelineCache(); + + std::array GetShaders(); + + VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); + + VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + +protected: + void Unregister(const Shader& shader) override; + + void FlushObjectInner(const Shader& object) override {} + +private: + std::pair> DecompileShaders( + const GraphicsPipelineCacheKey& key); + + Core::System& system; + const VKDevice& device; + VKScheduler& scheduler; + VKDescriptorPool& descriptor_pool; + VKUpdateDescriptorQueue& update_descriptor_queue; + + VKRenderPassCache renderpass_cache; + + std::array last_shaders; + + GraphicsPipelineCacheKey last_graphics_key; + VKGraphicsPipeline* last_graphics_pipeline = nullptr; + + std::unordered_map> + graphics_cache; + std::unordered_map> compute_cache; +}; void FillDescriptorUpdateTemplateEntries( const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,