From 3dcaa84ba442ac173c8b5241049296d8fe8a3fd7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 2 Mar 2020 18:54:08 -0300 Subject: [PATCH 01/14] shader/transform_feedback: Add host API friendly TFB builder --- CMakeModules/GenerateSCMRev.cmake | 2 + src/common/CMakeLists.txt | 2 + src/video_core/CMakeLists.txt | 2 + src/video_core/shader/transform_feedback.cpp | 114 +++++++++++++++++++ src/video_core/shader/transform_feedback.h | 22 ++++ 5 files changed, 142 insertions(+) create mode 100644 src/video_core/shader/transform_feedback.cpp create mode 100644 src/video_core/shader/transform_feedback.h diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 8c13a94fb4..83e4e9df2e 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -102,6 +102,8 @@ set(HASH_FILES "${VIDEO_CORE}/shader/shader_ir.cpp" "${VIDEO_CORE}/shader/shader_ir.h" "${VIDEO_CORE}/shader/track.cpp" + "${VIDEO_CORE}/shader/transform_feedback.cpp" + "${VIDEO_CORE}/shader/transform_feedback.h" ) set(COMBINED "") foreach (F IN LISTS HASH_FILES) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 1f621fb1fb..fbebed7159 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -83,6 +83,8 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/shader_ir.cpp" "${VIDEO_CORE}/shader/shader_ir.h" "${VIDEO_CORE}/shader/track.cpp" + "${VIDEO_CORE}/shader/transform_feedback.cpp" + "${VIDEO_CORE}/shader/transform_feedback.h" # and also check that the scm_rev files haven't changed "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0101e5f0e1..91df062d7c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -129,6 +129,8 @@ add_library(video_core STATIC shader/shader_ir.cpp shader/shader_ir.h shader/track.cpp + shader/transform_feedback.cpp + shader/transform_feedback.h surface.cpp surface.h texture_cache/format_lookup_table.cpp diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp new file mode 100644 index 0000000000..db86c940f6 --- /dev/null +++ b/src/video_core/shader/transform_feedback.cpp @@ -0,0 +1,114 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/shader/registry.h" +#include "video_core/shader/transform_feedback.h" + +namespace VideoCommon::Shader { + +namespace { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 + +/// Attribute offsets that describe a vector +constexpr std::array VECTORS = { + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] +}; +} // namespace + +std::unordered_map BuildTransformFeedback(const GraphicsInfo& info) { + + std::unordered_map tfb; + + for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = info.tfb_varying_locs[buffer]; + const auto& layout = info.tfb_layouts[buffer]; + const std::size_t varying_count = layout.varying_count; + + std::size_t highest = 0; + + for (std::size_t offset = 0; offset < varying_count; ++offset) { + const std::size_t base_offset = offset; + const u8 location = locations[offset]; + + VaryingTFB varying; + varying.buffer = layout.stream; + varying.offset = offset * sizeof(u32); + varying.components = 1; + + if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + + [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; + UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); + + highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); + } + + UNIMPLEMENTED_IF(highest != layout.stride); + } + return tfb; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h new file mode 100644 index 0000000000..8a8235019a --- /dev/null +++ b/src/video_core/shader/transform_feedback.h @@ -0,0 +1,22 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/shader/registry.h" + +namespace VideoCommon::Shader { + +struct VaryingTFB { + std::size_t buffer; + std::size_t offset; + std::size_t components; +}; + +std::unordered_map BuildTransformFeedback(const GraphicsInfo& info); + +} // namespace VideoCommon::Shader From 4d711dface5dfc76d5ae0d62c635ec9ba6bd4293 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 2 Mar 2020 18:55:39 -0300 Subject: [PATCH 02/14] gl_shader_decompiler: Decorate output attributes with XFB layout We sometimes have to slice attributes in different parts. This is needed for example in instances where the game feedbacks 3 components but writes 4 from the shader (something that is possible with GL_NV_transform_feedback). --- .../renderer_opengl/gl_shader_decompiler.cpp | 134 ++++++++++++++---- 1 file changed, 105 insertions(+), 29 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 19d6f3dcb4..021edf1f69 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -23,6 +23,7 @@ #include "video_core/shader/ast.h" #include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" +#include "video_core/shader/transform_feedback.h" namespace OpenGL { @@ -36,6 +37,7 @@ using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::Register; +using VideoCommon::Shader::BuildTransformFeedback; using VideoCommon::Shader::Registry; using namespace std::string_literals; @@ -49,6 +51,11 @@ class ExprDecompiler; enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; +constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; + +constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; +constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; + struct TextureOffset {}; struct TextureDerivates {}; using TextureArgument = std::pair; @@ -390,12 +397,19 @@ std::string FlowStackTopName(MetaStackClass stack) { return stage == ShaderType::Vertex; } +struct GenericVaryingDescription { + std::string name; + u8 first_element = 0; + bool is_scalar = false; +}; + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, ShaderType stage, std::string_view identifier, std::string_view suffix) : device{device}, ir{ir}, registry{registry}, stage{stage}, - identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {} + identifier{identifier}, suffix{suffix}, header{ir.GetHeader()}, + transform_feedback{BuildTransformFeedback(registry.GetGraphicsInfo())} {} void Decompile() { DeclareHeader(); @@ -403,17 +417,17 @@ public: DeclareGeometry(); DeclareFragment(); DeclareCompute(); - DeclareRegisters(); - DeclareCustomVariables(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareInternalFlags(); DeclareInputAttributes(); DeclareOutputAttributes(); - DeclareConstantBuffers(); - DeclareGlobalMemory(); - DeclareSamplers(); DeclareImages(); + DeclareSamplers(); + DeclareGlobalMemory(); + DeclareConstantBuffers(); + DeclareLocalMemory(); + DeclareRegisters(); + DeclarePredicates(); + DeclareInternalFlags(); + DeclareCustomVariables(); DeclarePhysicalAttributeReader(); code.AddLine("void main() {{"); @@ -485,7 +499,7 @@ private: if (!identifier.empty()) { code.AddLine("// {}", identifier); } - code.AddLine("#version 430 core"); + code.AddLine("#version 440 core"); code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); if (device.HasShaderBallot()) { code.AddLine("#extension GL_ARB_shader_ballot : require"); @@ -570,7 +584,13 @@ private: code.AddLine("out gl_PerVertex {{"); ++code.scope; - code.AddLine("vec4 gl_Position;"); + auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); + if (!pos_xfb.empty()) { + pos_xfb = fmt::format("layout ({}) ", pos_xfb); + } + const char* pos_type = + FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); + code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); for (const auto attribute : ir.GetOutputAttributes()) { if (attribute == Attribute::Index::ClipDistances0123 || @@ -703,7 +723,7 @@ private: void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { const u32 location{GetGenericAttributeIndex(index)}; - std::string name{GetInputAttribute(index)}; + std::string name{GetGenericInputAttribute(index)}; if (stage == ShaderType::Geometry) { name = "gs_" + name + "[]"; } @@ -740,9 +760,58 @@ private: } } + std::optional GetNumComponents(Attribute::Index index, u8 element = 0) const { + const u8 location = static_cast(index) * 4 + element; + const auto it = transform_feedback.find(location); + if (it == transform_feedback.end()) { + return {}; + } + return it->second.components; + } + + std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { + const u8 location = static_cast(index) * 4 + element; + const auto it = transform_feedback.find(location); + if (it == transform_feedback.end()) { + return {}; + } + + const VaryingTFB& tfb = it->second; + return fmt::format("xfb_buffer = {}, xfb_offset = {}", tfb.buffer, tfb.offset); + } + void DeclareOutputAttribute(Attribute::Index index) { - const u32 location{GetGenericAttributeIndex(index)}; - code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); + static constexpr std::string_view swizzle = "xyzw"; + u8 element = 0; + while (element < 4) { + auto xfb = GetTransformFeedbackDecoration(index, element); + if (!xfb.empty()) { + xfb = fmt::format(", {}", xfb); + } + const std::size_t remainder = 4 - element; + const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); + const char* const type = FLOAT_TYPES.at(num_components - 1); + + const u32 location = GetGenericAttributeIndex(index); + + GenericVaryingDescription description; + description.first_element = static_cast(element); + description.is_scalar = num_components == 1; + description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); + if (element != 0 || num_components != 4) { + const std::string_view name_swizzle = swizzle.substr(element, num_components); + description.name = fmt::format("{}_{}", description.name, name_swizzle); + } + for (std::size_t i = 0; i < num_components; ++i) { + const u8 offset = static_cast(location * 4 + element + i); + varying_description.insert({offset, description}); + } + + code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, + xfb, type, description.name); + + element += static_cast(num_components); + } } void DeclareConstantBuffers() { @@ -1095,7 +1164,7 @@ private: return {"0", Type::Int}; default: if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), + return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), Type::Float}; } break; @@ -1164,8 +1233,7 @@ private: return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; default: if (IsGenericAttribute(attribute)) { - return { - {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; + return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}}; } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast(attribute)); return {}; @@ -2376,27 +2444,34 @@ private: static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); std::string GetRegister(u32 index) const { - return GetDeclarationWithSuffix(index, "gpr"); + return AppendSuffix(index, "gpr"); } std::string GetCustomVariable(u32 index) const { - return GetDeclarationWithSuffix(index, "custom_var"); + return AppendSuffix(index, "custom_var"); } std::string GetPredicate(Tegra::Shader::Pred pred) const { - return GetDeclarationWithSuffix(static_cast(pred), "pred"); + return AppendSuffix(static_cast(pred), "pred"); } - std::string GetInputAttribute(Attribute::Index attribute) const { - return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); + std::string GetGenericInputAttribute(Attribute::Index attribute) const { + return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); } - std::string GetOutputAttribute(Attribute::Index attribute) const { - return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); + std::unordered_map varying_description; + + std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { + const u8 offset = static_cast(GetGenericAttributeIndex(attribute) * 4 + element); + const auto& description = varying_description.at(offset); + if (description.is_scalar) { + return description.name; + } + return fmt::format("{}[{}]", description.name, element - description.first_element); } std::string GetConstBuffer(u32 index) const { - return GetDeclarationWithSuffix(index, "cbuf"); + return AppendSuffix(index, "cbuf"); } std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { @@ -2409,7 +2484,7 @@ private: } std::string GetConstBufferBlock(u32 index) const { - return GetDeclarationWithSuffix(index, "cbuf_block"); + return AppendSuffix(index, "cbuf_block"); } std::string GetLocalMemory() const { @@ -2434,14 +2509,14 @@ private: } std::string GetSampler(const Sampler& sampler) const { - return GetDeclarationWithSuffix(static_cast(sampler.GetIndex()), "sampler"); + return AppendSuffix(static_cast(sampler.GetIndex()), "sampler"); } std::string GetImage(const Image& image) const { - return GetDeclarationWithSuffix(static_cast(image.GetIndex()), "image"); + return AppendSuffix(static_cast(image.GetIndex()), "image"); } - std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { + std::string AppendSuffix(u32 index, std::string_view name) const { if (suffix.empty()) { return fmt::format("{}{}", name, index); } else { @@ -2477,6 +2552,7 @@ private: const std::string_view identifier; const std::string_view suffix; const Header header; + const std::unordered_map transform_feedback; ShaderWriter code; From 8e9f23f393763a6d76605206eeb20f6f8885d9a9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 2 Mar 2020 19:31:26 -0300 Subject: [PATCH 03/14] gl_rasterizer: Implement transform feedback bindings --- src/video_core/engines/maxwell_3d.h | 9 +++ .../renderer_opengl/gl_rasterizer.cpp | 70 +++++++++++++++++-- .../renderer_opengl/gl_rasterizer.h | 14 +++- 3 files changed, 83 insertions(+), 10 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8752a1cfbf..ba9c765939 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -634,6 +634,11 @@ public: u32 address_low; s32 buffer_size; s32 buffer_offset; + + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); + } }; static_assert(sizeof(TransformFeedbackBinding) == 32); @@ -652,6 +657,10 @@ public: return shader_config[index].enable != 0; } + bool IsShaderConfigEnabled(Regs::ShaderProgram type) const { + return IsShaderConfigEnabled(static_cast(type)); + } + union { struct { INSERT_UNION_PADDING_WORDS(0x45); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8a2db8e369..1af4268a4f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -496,7 +496,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncCullMode(); SyncPrimitiveRestart(); SyncScissorTest(); - SyncTransformFeedback(); SyncPointState(); SyncPolygonOffset(); SyncAlphaTest(); @@ -569,7 +568,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { glTextureBarrier(); } - ++num_queued_commands; + BeginTransformFeedback(primitive_mode); const GLuint base_instance = static_cast(gpu.regs.vb_base_instance); const GLsizei num_instances = @@ -608,6 +607,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } + + EndTransformFeedback(); + + ++num_queued_commands; } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { @@ -1290,11 +1293,6 @@ void RasterizerOpenGL::SyncScissorTest() { } } -void RasterizerOpenGL::SyncTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; - UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); -} - void RasterizerOpenGL::SyncPointState() { auto& gpu = system.GPU().Maxwell3D(); auto& flags = gpu.dirty.flags; @@ -1370,4 +1368,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); } +void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { + const auto& regs = system.GPU().Maxwell3D().regs; + if (regs.tfb_enabled == 0) { + return; + } + + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); + + for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { + const auto& binding = regs.tfb_bindings[index]; + if (!binding.buffer_enable) { + if (enabled_transform_feedback_buffers[index]) { + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast(index), 0, 0, + 0); + } + enabled_transform_feedback_buffers[index] = false; + continue; + } + enabled_transform_feedback_buffers[index] = true; + + auto& tfb_buffer = transform_feedback_buffers[index]; + tfb_buffer.Create(); + + const GLuint handle = tfb_buffer.handle; + const std::size_t size = binding.buffer_size; + glNamedBufferData(handle, static_cast(size), nullptr, GL_STREAM_COPY); + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast(index), handle, 0, + static_cast(size)); + } + + glBeginTransformFeedback(GL_POINTS); +} + +void RasterizerOpenGL::EndTransformFeedback() { + const auto& regs = system.GPU().Maxwell3D().regs; + if (regs.tfb_enabled == 0) { + return; + } + + glEndTransformFeedback(); + + for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { + const auto& binding = regs.tfb_bindings[index]; + if (!binding.buffer_enable) { + continue; + } + UNIMPLEMENTED_IF(binding.buffer_offset != 0); + + const GLuint handle = transform_feedback_buffers[index].handle; + const GPUVAddr gpu_addr = binding.Address(); + const std::size_t size = binding.buffer_size; + const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast(size)); + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e6424f5d2d..2d3be24370 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -168,9 +168,6 @@ private: /// Syncs the scissor test state to match the guest state void SyncScissorTest(); - /// Syncs the transform feedback state to match the guest state - void SyncTransformFeedback(); - /// Syncs the point state to match the guest state void SyncPointState(); @@ -192,6 +189,12 @@ private: /// Syncs the framebuffer sRGB state to match the guest state void SyncFramebufferSRGB(); + /// Begin a transform feedback + void BeginTransformFeedback(GLenum primitive_mode); + + /// End a transform feedback + void EndTransformFeedback(); + /// Check for extension that are not strictly required but are needed for correct emulation void CheckExtensions(); @@ -229,6 +232,11 @@ private: BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; + std::array + transform_feedback_buffers; + std::bitset + enabled_transform_feedback_buffers; + /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; From 7acebd7eb67032e51dd2985d847e40c216944f92 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 2 Mar 2020 21:36:25 -0300 Subject: [PATCH 04/14] vk_shader_decompiler: Use registry for specialization --- .../renderer_vulkan/vk_pipeline_cache.cpp | 17 ++++------ .../renderer_vulkan/vk_pipeline_cache.h | 4 +++ .../renderer_vulkan/vk_shader_decompiler.cpp | 34 ++++++++++++------- .../renderer_vulkan/vk_shader_decompiler.h | 13 +++---- 4 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ebf85f311c..056ef495c6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach specialization.workgroup_size = key.workgroup_size; specialization.shared_memory_size = key.shared_memory_size; - const SPIRVShader spirv_shader{ - Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), - shader->GetEntries()}; + const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, + shader->GetRegistry(), specialization), + shader->GetEntries()}; entry = std::make_unique(device, scheduler, descriptor_pool, update_descriptor_queue, spirv_shader); return *entry; @@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto& gpu = system.GPU().Maxwell3D(); Specialization specialization; - specialization.primitive_topology = fixed_state.input_assembly.topology; - if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { + if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { ASSERT(fixed_state.input_assembly.point_size != 0.0f); specialization.point_size = fixed_state.input_assembly.point_size; } @@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; } specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; - specialization.tessellation.primitive = fixed_state.tessellation.primitive; - specialization.tessellation.spacing = fixed_state.tessellation.spacing; - specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; SPIRVProgram program; std::vector bindings; @@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 const auto program_type = GetShaderType(program_enum); const auto& entries = shader->GetEntries(); - program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), - entries}; + program[stage] = { + Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), + entries}; if (program_enum == Maxwell::ShaderProgram::VertexA) { // VertexB was combined with VertexA, so we skip the VertexB iteration diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e292526bbe..21340c9a4b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -132,6 +132,10 @@ public: return shader_ir; } + const VideoCommon::Shader::Registry& GetRegistry() const { + return registry; + } + const VideoCommon::Shader::ShaderIR& GetIR() const { return shader_ir; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index cfcca5af09..699a538d61 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -24,6 +24,7 @@ #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" +#include "video_core/shader/transform_feedback.h" namespace Vulkan { @@ -266,9 +267,10 @@ bool IsPrecise(Operation operand) { class SPIRVDecompiler final : public Sirit::Module { public: explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, - const Specialization& specialization) + const Registry& registry, const Specialization& specialization) : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, - specialization{specialization} { + registry{registry}, specialization{specialization}, + transform_feedback{BuildTransformFeedback(registry.GetGraphicsInfo())} { AddCapability(spv::Capability::Shader); AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); AddCapability(spv::Capability::ImageQuery); @@ -318,25 +320,29 @@ public: AddExecutionMode(main, spv::ExecutionMode::OutputVertices, header.common2.threads_per_input_primitive); break; - case ShaderType::TesselationEval: + case ShaderType::TesselationEval: { + const auto& info = registry.GetGraphicsInfo(); AddCapability(spv::Capability::Tessellation); AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); - AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); - AddExecutionMode(main, specialization.tessellation.clockwise + AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); + AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); + AddExecutionMode(main, info.tessellation_clockwise ? spv::ExecutionMode::VertexOrderCw : spv::ExecutionMode::VertexOrderCcw); break; - case ShaderType::Geometry: + } + case ShaderType::Geometry: { + const auto& info = registry.GetGraphicsInfo(); AddCapability(spv::Capability::Geometry); AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); + AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); AddExecutionMode(main, spv::ExecutionMode::OutputVertices, header.common4.max_output_vertices); // TODO(Rodrigo): Where can we get this info from? AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); break; + } case ShaderType::Fragment: AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); @@ -545,7 +551,8 @@ private: if (stage != ShaderType::Geometry) { return; } - const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); + const auto& info = registry.GetGraphicsInfo(); + const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); DeclareInputVertexArray(num_input); DeclareOutputVertex(); } @@ -898,7 +905,7 @@ private: u32 GetNumInputVertices() const { switch (stage) { case ShaderType::Geometry: - return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); + return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); case ShaderType::TesselationControl: case ShaderType::TesselationEval: return NumInputPatches; @@ -2495,7 +2502,9 @@ private: const ShaderIR& ir; const ShaderType stage; const Tegra::Shader::Header header; + const Registry& registry; const Specialization& specialization; + const std::unordered_map transform_feedback; const Id t_void = Name(TypeVoid(), "void"); @@ -2870,8 +2879,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { } std::vector Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, - ShaderType stage, const Specialization& specialization) { - return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); + ShaderType stage, const VideoCommon::Shader::Registry& registry, + const Specialization& specialization) { + return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f5dc14d9eb..ffea4709e5 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -15,6 +15,7 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace Vulkan { @@ -91,17 +92,9 @@ struct Specialization final { u32 shared_memory_size{}; // Graphics specific - Maxwell::PrimitiveTopology primitive_topology{}; std::optional point_size{}; std::array attribute_types{}; bool ndc_minus_one_to_one{}; - - // Tessellation specific - struct { - Maxwell::TessellationPrimitive primitive{}; - Maxwell::TessellationSpacing spacing{}; - bool clockwise{}; - } tessellation; }; // Old gcc versions don't consider this trivially copyable. // static_assert(std::is_trivially_copyable_v); @@ -114,6 +107,8 @@ struct SPIRVShader { ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); std::vector Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage, const Specialization& specialization); + Tegra::Engines::ShaderType stage, + const VideoCommon::Shader::Registry& registry, + const Specialization& specialization); } // namespace Vulkan From ae6189d7c2d5b5bf7daa4cc5a3ec34805cec7b7e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 6 Mar 2020 05:03:13 -0300 Subject: [PATCH 05/14] shader/transform_feedback: Expose buffer stride --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 3 ++- src/video_core/shader/transform_feedback.cpp | 1 + src/video_core/shader/transform_feedback.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 021edf1f69..175145cc17 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -777,7 +777,8 @@ private: } const VaryingTFB& tfb = it->second; - return fmt::format("xfb_buffer = {}, xfb_offset = {}", tfb.buffer, tfb.offset); + return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, + tfb.offset, tfb.stride); } void DeclareOutputAttribute(Attribute::Index index) { diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp index db86c940f6..22a933761f 100644 --- a/src/video_core/shader/transform_feedback.cpp +++ b/src/video_core/shader/transform_feedback.cpp @@ -87,6 +87,7 @@ std::unordered_map BuildTransformFeedback(const GraphicsInfo& in VaryingTFB varying; varying.buffer = layout.stream; + varying.stride = layout.stride; varying.offset = offset * sizeof(u32); varying.components = 1; diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h index 8a8235019a..77d05f64c7 100644 --- a/src/video_core/shader/transform_feedback.h +++ b/src/video_core/shader/transform_feedback.h @@ -13,6 +13,7 @@ namespace VideoCommon::Shader { struct VaryingTFB { std::size_t buffer; + std::size_t stride; std::size_t offset; std::size_t components; }; From c320702092c81c8acc9b5ff0c18f0613a5f64f04 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 6 Mar 2020 05:06:02 -0300 Subject: [PATCH 06/14] vk_device: Shrink formatless capability name size --- src/video_core/renderer_vulkan/vk_device.cpp | 6 +-- src/video_core/renderer_vulkan/vk_device.h | 39 +++++++++---------- .../renderer_vulkan/vk_shader_decompiler.cpp | 4 +- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 886bde3b94..ddcdb05e6f 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan features.occlusionQueryPrecise = true; features.fragmentStoresAndAtomics = true; features.shaderImageGatherExtended = true; - features.shaderStorageImageReadWithoutFormat = - is_shader_storage_img_read_without_format_supported; + features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; features.shaderStorageImageWriteWithoutFormat = true; features.textureCompressionASTC_LDR = is_optimal_astc_supported; @@ -467,8 +466,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { const auto supported_features{physical.getFeatures(dldi)}; - is_shader_storage_img_read_without_format_supported = - supported_features.shaderStorageImageReadWithoutFormat; + is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 2c27ad730a..8c4ccfefd6 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,11 +122,6 @@ public: return properties.limits.maxPushConstantsSize; } - /// Returns true if Shader storage Image Read Without Format supported. - bool IsShaderStorageImageReadWithoutFormatSupported() const { - return is_shader_storage_img_read_without_format_supported; - } - /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -147,6 +142,11 @@ public: return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; } + /// Returns true if formatless image load is supported. + bool IsFormatlessImageLoadSupported() const { + return is_formatless_image_load_supported; + } + /// Returns true if the device supports VK_EXT_scalar_block_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -214,26 +214,25 @@ private: static std::unordered_map GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device. - vk::DispatchLoaderDynamic dld; ///< Device function pointers. - vk::PhysicalDeviceProperties properties; ///< Device properties. - UniqueDevice logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - vk::DriverIdKHR driver_id{}; ///< Driver ID. - vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetics. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + const vk::PhysicalDevice physical; ///< Physical device. + vk::DispatchLoaderDynamic dld; ///< Device function pointers. + vk::PhysicalDeviceProperties properties; ///< Device properties. + UniqueDevice logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + vk::DriverIdKHR driver_id{}; ///< Driver ID. + vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. - bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage - ///< image read without format // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 699a538d61..802fe87479 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -298,7 +298,7 @@ public: } } - if (device.IsShaderStorageImageReadWithoutFormatSupported()) { + if (device.IsFormatlessImageLoadSupported()) { AddCapability(spv::Capability::StorageImageReadWithoutFormat); } @@ -1800,7 +1800,7 @@ private: } Expression ImageLoad(Operation operation) { - if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { + if (!device.IsFormatlessImageLoadSupported()) { return {v_float_zero, Type::Float}; } From 8d5bdcb17b732124e478f067bd449f76a46c547a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 6 Mar 2020 05:09:06 -0300 Subject: [PATCH 07/14] vk_device: Enable VK_EXT_transform_feedback when available --- src/video_core/renderer_vulkan/vk_device.cpp | 41 ++++++++++++++++---- src/video_core/renderer_vulkan/vk_device.h | 6 +++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index ddcdb05e6f..3847bd722c 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -147,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); } + vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; + if (ext_transform_feedback) { + transform_feedback.transformFeedback = true; + transform_feedback.geometryStreams = true; + SetNext(next, transform_feedback); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -384,7 +393,7 @@ std::vector VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } }; - extensions.reserve(14); + extensions.reserve(15); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); @@ -396,18 +405,22 @@ std::vector VKDevice::LoadExtensions(const vk::DispatchLoaderDynami [[maybe_unused]] const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); - bool khr_shader_float16_int8{}; - bool ext_subgroup_size_control{}; + bool has_khr_shader_float16_int8{}; + bool has_ext_subgroup_size_control{}; + bool has_ext_transform_feedback{}; for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { Test(extension, khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); - Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, + false); Test(extension, ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); Test(extension, ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); - Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, + Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, + false); + Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { Test(extension, nv_device_diagnostic_checkpoints, @@ -415,13 +428,13 @@ std::vector VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } } - if (khr_shader_float16_int8) { + if (has_khr_shader_float16_int8) { is_float16_supported = GetFeatures(physical, dldi).shaderFloat16; extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } - if (ext_subgroup_size_control) { + if (has_ext_subgroup_size_control) { const auto features = GetFeatures(physical, dldi); const auto properties = @@ -438,6 +451,20 @@ std::vector VKDevice::LoadExtensions(const vk::DispatchLoaderDynami is_warp_potentially_bigger = true; } + if (has_ext_transform_feedback) { + const auto features = + GetFeatures(physical, dldi); + const auto properties = + GetProperties(physical, dldi); + + if (features.transformFeedback && features.geometryStreams && + properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && + properties.transformFeedbackQueries && properties.transformFeedbackDraw) { + extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); + ext_transform_feedback = true; + } + } + return extensions; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 8c4ccfefd6..6e656517f6 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -167,6 +167,11 @@ public: return ext_shader_viewport_index_layer; } + /// Returns true if the device supports VK_EXT_transform_feedback. + bool IsExtTransformFeedbackSupported() const { + return ext_transform_feedback; + } + /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. bool IsNvDeviceDiagnosticCheckpoints() const { return nv_device_diagnostic_checkpoints; @@ -232,6 +237,7 @@ private: bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. // Telemetry parameters From b67360c0f86c8acc1e56547382f07f35039fbcbf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 6 Mar 2020 05:10:39 -0300 Subject: [PATCH 08/14] vk_shader_decompiler: Add XFB decorations to generic varyings --- .../renderer_vulkan/vk_shader_decompiler.cpp | 105 +++++++++++++++--- 1 file changed, 89 insertions(+), 16 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 802fe87479..3117a8d74b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include #include @@ -94,6 +96,12 @@ struct VertexIndices { std::optional clip_distances; }; +struct GenericVaryingDescription { + Id id = nullptr; + u32 first_element = 0; + bool is_scalar = false; +}; + spv::Dim GetSamplerDim(const Sampler& sampler) { ASSERT(!sampler.IsBuffer()); switch (sampler.GetType()) { @@ -288,6 +296,15 @@ public: AddExtension("SPV_KHR_variable_pointers"); AddExtension("SPV_KHR_shader_draw_parameters"); + if (!transform_feedback.empty()) { + if (device.IsExtTransformFeedbackSupported()) { + AddCapability(spv::Capability::TransformFeedback); + } else { + LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " + "supported on this device"); + } + } + if (ir.UsesLayer() || ir.UsesViewportIndex()) { if (ir.UsesViewportIndex()) { AddCapability(spv::Capability::MultiViewport); @@ -406,7 +423,7 @@ private: // Clear Position to avoid reading trash on the Z conversion. const auto position_index = out_indices.position.value(); const Id position = AccessElement(t_out_float4, out_vertex, position_index); - OpStore(position, v_varying_default); + OpStore(position, ConstantNull(t_float4)); if (specialization.point_size) { const u32 point_size_index = out_indices.point_size.value(); @@ -749,13 +766,35 @@ private: } void DeclareOutputAttributes() { + if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { + return; + } + + UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); for (const auto index : ir.GetOutputAttributes()) { if (!IsGenericAttribute(index)) { continue; } - const u32 location = GetGenericAttributeLocation(index); - Id type = t_float4; - Id varying_default = v_varying_default; + DeclareOutputAttribute(index); + } + } + + void DeclareOutputAttribute(Attribute::Index index) { + static constexpr std::string_view swizzle = "xyzw"; + + const u32 location = GetGenericAttributeLocation(index); + u8 element = 0; + while (element < 4) { + const std::size_t remainder = 4 - element; + + std::size_t num_components = remainder; + const std::optional tfb = GetTransformFeedbackInfo(index, element); + if (tfb) { + num_components = tfb->components; + } + + Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); + Id varying_default = ConstantNull(type); if (IsOutputAttributeArray()) { const u32 num = GetNumOutputVertices(); type = TypeArray(type, Constant(t_uint, num)); @@ -767,15 +806,47 @@ private: } type = TypePointer(spv::StorageClass::Output, type); + std::string name = fmt::format("out_attr{}", location); + if (num_components < 4 || element > 0) { + name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); + } + const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); - Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); - output_attributes.emplace(index, id); + Name(AddGlobalVariable(id), name); + + GenericVaryingDescription description; + description.id = id; + description.first_element = element; + description.is_scalar = num_components == 1; + for (u32 i = 0; i < num_components; ++i) { + const u8 offset = static_cast(static_cast(index) * 4 + element + i); + output_attributes.emplace(offset, description); + } interfaces.push_back(id); Decorate(id, spv::Decoration::Location, location); + if (element > 0) { + Decorate(id, spv::Decoration::Component, static_cast(element)); + } + if (tfb && device.IsExtTransformFeedbackSupported()) { + Decorate(id, spv::Decoration::XfbBuffer, static_cast(tfb->buffer)); + Decorate(id, spv::Decoration::XfbStride, static_cast(tfb->stride)); + Decorate(id, spv::Decoration::Offset, static_cast(tfb->offset)); + } + + element += static_cast(num_components); } } + std::optional GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { + const u8 location = static_cast(index) * 4 + element; + const auto it = transform_feedback.find(location); + if (it == transform_feedback.end()) { + return {}; + } + return it->second; + } + u32 DeclareConstantBuffers(u32 binding) { for (const auto& [index, size] : ir.GetConstantBuffers()) { const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo @@ -1353,8 +1424,14 @@ private: } default: if (IsGenericAttribute(attribute)) { - const Id composite = output_attributes.at(attribute); - return {ArrayPass(t_out_float, composite, {element}), Type::Float}; + const u8 offset = static_cast(static_cast(attribute) * 4 + element); + const GenericVaryingDescription description = output_attributes.at(offset); + const Id composite = description.id; + std::vector indices; + if (!description.is_scalar) { + indices.push_back(element - description.first_element); + } + return {ArrayPass(t_out_float, composite, indices), Type::Float}; } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast(attribute)); @@ -2265,11 +2342,11 @@ private: std::array GetTypeVectorDefinitionLut(Type type) const { switch (type) { case Type::Float: - return {nullptr, t_float2, t_float3, t_float4}; + return {t_float, t_float2, t_float3, t_float4}; case Type::Int: - return {nullptr, t_int2, t_int3, t_int4}; + return {t_int, t_int2, t_int3, t_int4}; case Type::Uint: - return {nullptr, t_uint2, t_uint3, t_uint4}; + return {t_uint, t_uint2, t_uint3, t_uint4}; default: UNIMPLEMENTED(); return {}; @@ -2573,10 +2650,6 @@ private: const Id v_float_zero = Constant(t_float, 0.0f); const Id v_float_one = Constant(t_float, 1.0f); - // Nvidia uses these defaults for varyings (e.g. position and generic attributes) - const Id v_varying_default = - ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one); - const Id v_true = ConstantTrue(t_bool); const Id v_false = ConstantFalse(t_bool); @@ -2593,7 +2666,7 @@ private: Id shared_memory{}; std::array internal_flags{}; std::map input_attributes; - std::map output_attributes; + std::unordered_map output_attributes; std::map constant_buffers; std::map global_buffers; std::map texel_buffers; From 2fae1e6205c16903625c993c8d46a6c0905a6d41 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 6 Mar 2020 05:11:18 -0300 Subject: [PATCH 09/14] vk_rasterizer: Implement transform feedback binding zero --- .../renderer_vulkan/vk_rasterizer.cpp | 42 +++++++++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 4 ++ 2 files changed, 46 insertions(+) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2bcb17b564..f889019c1d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -347,6 +347,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); } + BeginTransformFeedback(); + const auto pipeline_layout = pipeline.GetLayout(); const auto descriptor_set = pipeline.CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { @@ -356,6 +358,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { } draw_params.Draw(cmdbuf, dld); }); + + EndTransformFeedback(); } void RasterizerVulkan::Clear() { @@ -738,6 +742,44 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateStencilFaces(regs); } +void RasterizerVulkan::BeginTransformFeedback() { + const auto& regs = system.GPU().Maxwell3D().regs; + if (regs.tfb_enabled == 0) { + return; + } + + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); + + UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); + UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); + UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); + + const auto& binding = regs.tfb_bindings[0]; + UNIMPLEMENTED_IF(binding.buffer_enable == 0); + UNIMPLEMENTED_IF(binding.buffer_offset != 0); + + const GPUVAddr gpu_addr = binding.Address(); + const std::size_t size = binding.buffer_size; + const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + + scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { + cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); + cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); + }); +} + +void RasterizerVulkan::EndTransformFeedback() { + const auto& regs = system.GPU().Maxwell3D().regs; + if (regs.tfb_enabled == 0) { + return; + } + + scheduler.Record( + [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); +} + void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, BufferBindings& buffer_bindings) { const auto& regs = system.GPU().Maxwell3D().regs; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 96ea05f0a8..b2e73d98da 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -169,6 +169,10 @@ private: void UpdateDynamicStates(); + void BeginTransformFeedback(); + + void EndTransformFeedback(); + bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, From 47459f6a36f5bd539b6aedf9b07ffbebb0a2be32 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Mon, 9 Mar 2020 20:08:48 -0300 Subject: [PATCH 10/14] vk_shader_decompiler: Fix implicit type conversion Co-Authored-By: Mat M. --- src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 3117a8d74b..ed78bba023 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -839,7 +839,7 @@ private: } std::optional GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { - const u8 location = static_cast(index) * 4 + element; + const u8 location = static_cast(static_cast(index) * 4 + element); const auto it = transform_feedback.find(location); if (it == transform_feedback.end()) { return {}; From 4bc4851d457c6f14feca665d4d729b9df444ec05 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 9 Mar 2020 20:46:16 -0300 Subject: [PATCH 11/14] gl_shader_decompiler: Fix implicit conversion errors --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 175145cc17..973d3fd11b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -761,7 +761,7 @@ private: } std::optional GetNumComponents(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(index) * 4 + element; + const u8 location = static_cast(static_cast(index) * 4 + element); const auto it = transform_feedback.find(location); if (it == transform_feedback.end()) { return {}; @@ -770,7 +770,7 @@ private: } std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(index) * 4 + element; + const u8 location = static_cast(static_cast(index) * 4 + element); const auto it = transform_feedback.find(location); if (it == transform_feedback.end()) { return {}; @@ -811,7 +811,7 @@ private: code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, xfb, type, description.name); - element += static_cast(num_components); + element = static_cast(static_cast(element) + num_components); } } From afebdda2031ca0a39f2816cf56388c04c63ed336 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 11 Mar 2020 01:08:28 -0300 Subject: [PATCH 12/14] maxwell_3d: Add padding words to XFB entries Use INSERT_UNION_PADDING_WORDS instead of alignas to ensure a size requirement. --- src/video_core/engines/maxwell_3d.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ba9c765939..8a9e9992e1 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -628,12 +628,13 @@ public: float depth_range_far; }; - struct alignas(32) TransformFeedbackBinding { + struct TransformFeedbackBinding { u32 buffer_enable; u32 address_high; u32 address_low; s32 buffer_size; s32 buffer_offset; + INSERT_UNION_PADDING_WORDS(3); GPUVAddr Address() const { return static_cast((static_cast(address_high) << 32) | @@ -642,10 +643,11 @@ public: }; static_assert(sizeof(TransformFeedbackBinding) == 32); - struct alignas(16) TransformFeedbackLayout { + struct TransformFeedbackLayout { u32 stream; u32 varying_count; u32 stride; + INSERT_UNION_PADDING_WORDS(1); }; static_assert(sizeof(TransformFeedbackLayout) == 16); From 62560f1e6356747e7a2723eab12528e657a76a4f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 12 Mar 2020 02:35:31 -0300 Subject: [PATCH 13/14] vk_shader_decompiler: Fix default varying regression --- src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index ed78bba023..7d51bf9af7 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -423,7 +423,7 @@ private: // Clear Position to avoid reading trash on the Z conversion. const auto position_index = out_indices.position.value(); const Id position = AccessElement(t_out_float4, out_vertex, position_index); - OpStore(position, ConstantNull(t_float4)); + OpStore(position, v_varying_default); if (specialization.point_size) { const u32 point_size_index = out_indices.point_size.value(); @@ -794,7 +794,7 @@ private: } Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); - Id varying_default = ConstantNull(type); + Id varying_default = v_varying_default; if (IsOutputAttributeArray()) { const u32 num = GetNumOutputVertices(); type = TypeArray(type, Constant(t_uint, num)); @@ -2650,6 +2650,10 @@ private: const Id v_float_zero = Constant(t_float, 0.0f); const Id v_float_one = Constant(t_float, 1.0f); + // Nvidia uses these defaults for varyings (e.g. position and generic attributes) + const Id v_varying_default = + ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one); + const Id v_true = ConstantTrue(t_bool); const Id v_false = ConstantFalse(t_bool); From 69c7a01f88a1839a3d950cab968accfa5100ea18 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 12 Mar 2020 03:27:29 -0300 Subject: [PATCH 14/14] vk/gl_shader_decompiler: Silence assertion on compute --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 9 ++++++--- src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 973d3fd11b..3adf7f0cb7 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -408,8 +408,11 @@ public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, ShaderType stage, std::string_view identifier, std::string_view suffix) : device{device}, ir{ir}, registry{registry}, stage{stage}, - identifier{identifier}, suffix{suffix}, header{ir.GetHeader()}, - transform_feedback{BuildTransformFeedback(registry.GetGraphicsInfo())} {} + identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { + if (stage != ShaderType::Compute) { + transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); + } + } void Decompile() { DeclareHeader(); @@ -2553,7 +2556,7 @@ private: const std::string_view identifier; const std::string_view suffix; const Header header; - const std::unordered_map transform_feedback; + std::unordered_map transform_feedback; ShaderWriter code; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 7d51bf9af7..b2c2980512 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -277,8 +277,11 @@ public: explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, const Registry& registry, const Specialization& specialization) : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, - registry{registry}, specialization{specialization}, - transform_feedback{BuildTransformFeedback(registry.GetGraphicsInfo())} { + registry{registry}, specialization{specialization} { + if (stage != ShaderType::Compute) { + transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); + } + AddCapability(spv::Capability::Shader); AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); AddCapability(spv::Capability::ImageQuery); @@ -2581,7 +2584,7 @@ private: const Tegra::Shader::Header header; const Registry& registry; const Specialization& specialization; - const std::unordered_map transform_feedback; + std::unordered_map transform_feedback; const Id t_void = Name(TypeVoid(), "void");