From ace6c2318be5e8c5b2ad5f489d8144f28505d5f9 Mon Sep 17 00:00:00 2001 From: Kelebek1 Date: Sun, 4 Jun 2023 21:09:57 +0100 Subject: [PATCH] Combine vertex/transform feedback buffer binding into a single call --- src/video_core/buffer_cache/buffer_cache.h | 82 ++++++++++++++----- .../buffer_cache/buffer_cache_base.h | 11 ++- .../renderer_opengl/gl_buffer_cache.cpp | 18 ++++ .../renderer_opengl/gl_buffer_cache.h | 4 +- .../renderer_vulkan/vk_buffer_cache.cpp | 54 +++++++++++- .../renderer_vulkan/vk_buffer_cache.h | 3 + 6 files changed, 148 insertions(+), 24 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2f281b3705..251a4a8804 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -715,20 +715,38 @@ void BufferCache

::BindHostIndexBuffer() { template void BufferCache

::BindHostVertexBuffers() { + HostBindings host_bindings; + bool any_valid{false}; auto& flags = maxwell3d->dirty.flags; for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - const Binding& binding = channel_state->vertex_buffers[index]; - Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer, binding.buffer_id); - SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; } - flags[Dirty::VertexBuffer0 + index] = false; + host_bindings.min_index = std::min(host_bindings.min_index, index); + host_bindings.max_index = std::max(host_bindings.max_index, index); + any_valid = true; + } - const u32 stride = maxwell3d->regs.vertex_streams[index].stride; - const u32 offset = buffer.Offset(binding.cpu_addr); - runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); + if (any_valid) { + host_bindings.max_index++; + for (u32 index = host_bindings.min_index; index < host_bindings.max_index; index++) { + flags[Dirty::VertexBuffer0 + index] = false; + + const Binding& binding = channel_state->vertex_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + + TouchBuffer(buffer, binding.buffer_id); + SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); + + const u32 stride = maxwell3d->regs.vertex_streams[index].stride; + const u32 offset = buffer.Offset(binding.cpu_addr); + + host_bindings.buffers.push_back(reinterpret_cast(&buffer)); + host_bindings.offsets.push_back(offset); + host_bindings.sizes.push_back(binding.size); + host_bindings.strides.push_back(stride); + } + runtime.BindVertexBuffers(host_bindings); } } @@ -882,15 +900,25 @@ void BufferCache

::BindHostTransformFeedbackBuffers() { if (maxwell3d->regs.transform_feedback_enabled == 0) { return; } + HostBindings host_bindings; for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = channel_state->transform_feedback_buffers[index]; + if (maxwell3d->regs.transform_feedback.controls[index].varying_count == 0 && + maxwell3d->regs.transform_feedback.controls[index].stride == 0) { + break; + } Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); - runtime.BindTransformFeedbackBuffer(index, buffer, offset, size); + host_bindings.buffers.push_back(reinterpret_cast(&buffer)); + host_bindings.offsets.push_back(offset); + host_bindings.sizes.push_back(binding.size); + } + if (host_bindings.buffers.size() > 0) { + runtime.BindTransformFeedbackBuffers(host_bindings); } } @@ -1616,6 +1644,8 @@ void BufferCache

::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si template void BufferCache

::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { + bool dirty_index{false}; + boost::container::small_vector dirty_vertex_buffers; const auto scalar_replace = [buffer_id](Binding& binding) { if (binding.buffer_id == buffer_id) { binding.buffer_id = BufferId{}; @@ -1624,8 +1654,19 @@ void BufferCache

::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { const auto replace = [scalar_replace](std::span bindings) { std::ranges::for_each(bindings, scalar_replace); }; - scalar_replace(channel_state->index_buffer); - replace(channel_state->vertex_buffers); + + if (channel_state->index_buffer.buffer_id == buffer_id) { + channel_state->index_buffer.buffer_id = BufferId{}; + dirty_index = true; + } + + for (u32 index = 0; index < channel_state->vertex_buffers.size(); index++) { + auto& binding = channel_state->vertex_buffers[index]; + if (binding.buffer_id == buffer_id) { + binding.buffer_id = BufferId{}; + dirty_vertex_buffers.push_back(index); + } + } std::ranges::for_each(channel_state->uniform_buffers, replace); std::ranges::for_each(channel_state->storage_buffers, replace); replace(channel_state->transform_feedback_buffers); @@ -1642,20 +1683,21 @@ void BufferCache

::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); slot_buffers.erase(buffer_id); - NotifyBufferDeletion(); -} - -template -void BufferCache

::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { channel_state->dirty_uniform_buffers.fill(~u32{0}); channel_state->uniform_buffer_binding_sizes.fill({}); } + auto& flags = maxwell3d->dirty.flags; - flags[Dirty::IndexBuffer] = true; - flags[Dirty::VertexBuffers] = true; - for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - flags[Dirty::VertexBuffer0 + index] = true; + if (dirty_index) { + flags[Dirty::IndexBuffer] = true; + } + + if (dirty_vertex_buffers.size() > 0) { + flags[Dirty::VertexBuffers] = true; + for (auto index : dirty_vertex_buffers) { + flags[Dirty::VertexBuffer0 + index] = true; + } } channel_state->has_deleted_buffers = true; } diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 60a1f285ed..cf359e2413 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -105,6 +105,15 @@ static constexpr Binding NULL_BINDING{ .buffer_id = NULL_BUFFER_ID, }; +struct HostBindings { + boost::container::small_vector buffers; + boost::container::small_vector offsets; + boost::container::small_vector sizes; + boost::container::small_vector strides; + u32 min_index{NUM_VERTEX_BUFFERS}; + u32 max_index{0}; +}; + class BufferCacheChannelInfo : public ChannelInfo { public: BufferCacheChannelInfo() = delete; @@ -519,8 +528,6 @@ private: void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); - void NotifyBufferDeletion(); - [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, bool is_written) const; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c419714d48..0cc546a3a0 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -232,6 +232,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, } } +void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { + for (u32 index = 0; index < bindings.buffers.size(); index++) { + BindVertexBuffer( + bindings.min_index + index, *reinterpret_cast(bindings.buffers[index]), + static_cast(bindings.offsets[index]), static_cast(bindings.sizes[index]), + static_cast(bindings.strides[index])); + } +} + void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size) { if (use_assembly_shaders) { @@ -320,6 +329,15 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast(offset), static_cast(size)); } +void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings) { + for (u32 index = 0; index < bindings.buffers.size(); index++) { + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, + reinterpret_cast(bindings.buffers[index])->Handle(), + static_cast(bindings.offsets[index]), + static_cast(bindings.sizes[index])); + } +} + void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { *texture_handles++ = buffer.View(offset, size, format); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a24991585f..e4e0002848 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -7,7 +7,7 @@ #include #include "common/common_types.h" -#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_device.h" @@ -87,6 +87,7 @@ public: void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); + void BindVertexBuffers(VideoCommon::HostBindings& bindings); void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size); @@ -99,6 +100,7 @@ public: bool is_written); void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings); void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, VideoCore::Surface::PixelFormat format); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index daa128399f..d72d99899b 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -7,7 +7,6 @@ #include #include -#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -502,6 +501,40 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset } } +void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { + boost::container::small_vector buffer_handles; + for (u32 index = 0; index < bindings.buffers.size(); index++) { + auto& buffer = *reinterpret_cast(bindings.buffers[index]); + auto handle = buffer.Handle(); + if (handle == VK_NULL_HANDLE) { + bindings.offsets[index] = 0; + bindings.sizes[index] = VK_WHOLE_SIZE; + if (!device.HasNullDescriptor()) { + ReserveNullBuffer(); + handle = *null_buffer; + } + } + buffer_handles.push_back(handle); + } + if (device.IsExtExtendedDynamicStateSupported()) { + scheduler.Record([bindings = bindings, + buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT( + bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), + reinterpret_cast(bindings.offsets.data()), + reinterpret_cast(bindings.sizes.data()), + reinterpret_cast(bindings.strides.data())); + }); + } else { + scheduler.Record([bindings = bindings, + buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers( + bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), + reinterpret_cast(bindings.offsets.data())); + }); + } +} + void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size) { if (!device.IsExtTransformFeedbackSupported()) { @@ -523,6 +556,25 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, }); } +void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings) { + if (!device.IsExtTransformFeedbackSupported()) { + // Already logged in the rasterizer + return; + } + boost::container::small_vector buffer_handles; + for (u32 index = 0; index < bindings.buffers.size(); index++) { + auto& buffer = *reinterpret_cast(bindings.buffers[index]); + buffer_handles.push_back(buffer.Handle()); + } + scheduler.Record( + [bindings = bindings, buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT( + 0, static_cast(buffer_handles.size()), buffer_handles.data(), + reinterpret_cast(bindings.offsets.data()), + reinterpret_cast(bindings.sizes.data())); + }); +} + void BufferCacheRuntime::ReserveNullBuffer() { if (null_buffer) { return; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 92b4f78596..92d3e9f323 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -18,6 +18,7 @@ namespace Vulkan { class Device; class DescriptorPool; class Scheduler; +struct HostVertexBinding; class BufferCacheRuntime; @@ -96,8 +97,10 @@ public: void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count); void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); + void BindVertexBuffers(VideoCommon::HostBindings& bindings); void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); + void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings); std::span BindMappedUniformBuffer([[maybe_unused]] size_t stage, [[maybe_unused]] u32 binding_index, u32 size) {