From 3fbee093b2bf3b4c15dbc5bb48a3bc768ecedbc9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 12 Mar 2023 21:43:31 +0100 Subject: [PATCH 1/5] TextureCache: refactor DMA downloads to allow multiple buffers. --- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_texture_cache.cpp | 41 ++++++++++-------- .../renderer_opengl/gl_texture_cache.h | 3 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 43 ++++++++++++------- .../renderer_vulkan/vk_texture_cache.h | 7 +-- src/video_core/texture_cache/texture_cache.h | 12 +++++- .../texture_cache/texture_cache_base.h | 6 ++- 8 files changed, 75 insertions(+), 41 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 90e35e307d..2de5335840 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1299,7 +1299,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, if constexpr (IS_IMAGE_UPLOAD) { image->UploadMemory(buffer->Handle(), offset, copy_span); } else { - image->DownloadMemory(buffer->Handle(), offset, copy_span); + texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span); } return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 0b9c4a904d..670d8cafd9 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -801,32 +801,34 @@ void Image::UploadMemory(const ImageBufferMap& map, UploadMemory(map.buffer, map.offset, copies); } -void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset, +void Image::DownloadMemory(std::span buffer_handles, size_t buffer_offset, std::span copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); } glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API - glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); - glPixelStorei(GL_PACK_ALIGNMENT, 1); + for (auto buffer_handle : buffer_handles) { + glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); + glPixelStorei(GL_PACK_ALIGNMENT, 1); - u32 current_row_length = std::numeric_limits::max(); - u32 current_image_height = std::numeric_limits::max(); + u32 current_row_length = std::numeric_limits::max(); + u32 current_image_height = std::numeric_limits::max(); - for (const VideoCommon::BufferImageCopy& copy : copies) { - if (copy.image_subresource.base_level >= gl_num_levels) { - continue; + for (const VideoCommon::BufferImageCopy& copy : copies) { + if (copy.image_subresource.base_level >= gl_num_levels) { + continue; + } + if (current_row_length != copy.buffer_row_length) { + current_row_length = copy.buffer_row_length; + glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); + } + if (current_image_height != copy.buffer_image_height) { + current_image_height = copy.buffer_image_height; + glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); + } + CopyImageToBuffer(copy, buffer_offset); } - if (current_row_length != copy.buffer_row_length) { - current_row_length = copy.buffer_row_length; - glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); - } - if (current_image_height != copy.buffer_image_height) { - current_image_height = copy.buffer_image_height; - glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); - } - CopyImageToBuffer(copy, buffer_offset); } if (is_rescaled) { ScaleUp(true); @@ -835,7 +837,10 @@ void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset, void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { - DownloadMemory(map.buffer, map.offset, copies); + std::array buffers{ + map.buffer, + }; + DownloadMemory(buffers, map.offset, copies); } GLuint Image::StorageHandle() noexcept { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 911e4607aa..67d6910b41 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -212,7 +212,7 @@ public: void UploadMemory(const ImageBufferMap& map, std::span copies); - void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, + void DownloadMemory(std::span buffer_handle, size_t buffer_offset, std::span copies); void DownloadMemory(ImageBufferMap& map, std::span copies); @@ -376,6 +376,7 @@ struct TextureCacheParams { using Sampler = OpenGL::Sampler; using Framebuffer = OpenGL::Framebuffer; using AsyncBuffer = u32; + using BufferType = GLuint; }; using TextureCache = VideoCommon::TextureCache; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 673ab478e3..8fc783cc01 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -793,7 +793,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, if constexpr (IS_IMAGE_UPLOAD) { image->UploadMemory(buffer->Handle(), offset, copy_span); } else { - image->DownloadMemory(buffer->Handle(), offset, copy_span); + texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span); } return true; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ae15f69765..e4d077e63f 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include #include #include #include +#include #include "common/bit_cast.h" #include "common/bit_util.h" @@ -1341,16 +1342,20 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span buffers_span, VkDeviceSize offset, std::span copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); } + boost::container::small_vector buffers_vector{}; + for (auto& buffer : buffers_span) { + buffers_vector.push_back(buffer); + } std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); - scheduler->Record([buffer, image = *original_image, aspect_mask = aspect_mask, - vk_copies](vk::CommandBuffer cmdbuf) { + scheduler->Record([buffers = std::move(buffers_vector), image = *original_image, + aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -1369,6 +1374,20 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + + for (auto buffer : buffers) { + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, + vk_copies); + } + + const VkMemoryBarrier memory_write_barrier{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; const VkImageMemoryBarrier image_write_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -1387,15 +1406,6 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - const VkMemoryBarrier memory_write_barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, memory_write_barrier, nullptr, image_write_barrier); }); @@ -1405,7 +1415,10 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, } void Image::DownloadMemory(const StagingBufferRef& map, std::span copies) { - DownloadMemory(map.buffer, map.offset, copies); + std::array buffers{ + map.buffer, + }; + DownloadMemory(buffers, map.offset, copies); } bool Image::IsRescaled() const noexcept { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d5ee23f8dd..4224761883 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -138,7 +138,7 @@ public: void UploadMemory(const StagingBufferRef& map, std::span copies); - void DownloadMemory(VkBuffer buffer, VkDeviceSize offset, + void DownloadMemory(std::span buffers, VkDeviceSize offset, std::span copies); void DownloadMemory(const StagingBufferRef& map, @@ -371,6 +371,7 @@ struct TextureCacheParams { using Sampler = Vulkan::Sampler; using Framebuffer = Vulkan::Framebuffer; using AsyncBuffer = Vulkan::StagingBufferRef; + using BufferType = VkBuffer; }; using TextureCache = VideoCommon::TextureCache; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ed5c768d8f..2cd5aa31e5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -833,6 +833,16 @@ std::pair::Image*, BufferImageCopy> TextureCache

::Dm return {image, copy}; } +template +void TextureCache

::DownloadImageIntoBuffer( + typename TextureCache

::Image* image, typename TextureCache

::BufferType buffer, + size_t buffer_offset, std::span copies) { + std::array buffers{ + buffer, + }; + image->DownloadMemory(buffers, buffer_offset, copies); +} + template void TextureCache

::RefreshContents(Image& image, ImageId image_id) { if (False(image.flags & ImageFlagBits::CpuModified)) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 5a5b4179c5..51f44aed5a 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -119,6 +119,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches copies); + /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); From e3a2ca96bd2350471ebb6c2907c67b10254a4f7e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Apr 2023 18:07:38 +0200 Subject: [PATCH 2/5] Accelerate DMA: Use texture cache async downloads to perform the copies to host. WIP --- .../renderer_opengl/gl_rasterizer.cpp | 6 +- .../renderer_vulkan/vk_rasterizer.cpp | 6 +- .../renderer_vulkan/vk_texture_cache.cpp | 21 ++-- .../renderer_vulkan/vk_texture_cache.h | 2 +- src/video_core/texture_cache/texture_cache.h | 118 +++++++++++++----- .../texture_cache/texture_cache_base.h | 23 +++- 6 files changed, 123 insertions(+), 53 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2de5335840..4993d47098 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1287,8 +1287,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, } const u32 buffer_size = static_cast(buffer_operand.pitch * buffer_operand.height); static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; - const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing - : VideoCommon::ObtainBufferOperation::MarkAsWritten; + const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; const auto [buffer, offset] = buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); @@ -1299,7 +1298,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, if constexpr (IS_IMAGE_UPLOAD) { image->UploadMemory(buffer->Handle(), offset, copy_span); } else { - texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span); + texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, + buffer_operand.address, buffer_size); } return true; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8fc783cc01..2559a3aa71 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -781,8 +781,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, } const u32 buffer_size = static_cast(buffer_operand.pitch * buffer_operand.height); static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; - const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing - : VideoCommon::ObtainBufferOperation::MarkAsWritten; + const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; const auto [buffer, offset] = buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); @@ -793,7 +792,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, if constexpr (IS_IMAGE_UPLOAD) { image->UploadMemory(buffer->Handle(), offset, copy_span); } else { - texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span); + texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, + buffer_operand.address, buffer_size); } return true; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e4d077e63f..da3841bb34 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1342,17 +1342,19 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span buffers_span, VkDeviceSize offset, +void Image::DownloadMemory(std::span buffers_span, std::span offsets_span, std::span copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); } boost::container::small_vector buffers_vector{}; - for (auto& buffer : buffers_span) { - buffers_vector.push_back(buffer); + boost::container::small_vector, 1> vk_copies; + for (size_t index = 0; index < buffers_span.size(); index++) { + buffers_vector.emplace_back(buffers_span[index]); + vk_copies.emplace_back( + TransformBufferImageCopies(copies, offsets_span[index], aspect_mask)); } - std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); scheduler->Record([buffers = std::move(buffers_vector), image = *original_image, aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { @@ -1377,9 +1379,9 @@ void Image::DownloadMemory(std::span buffers_span, VkDeviceSize offset cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier); - for (auto buffer : buffers) { - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, - vk_copies); + for (size_t index = 0; index < buffers.size(); index++) { + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index], + vk_copies[index]); } const VkMemoryBarrier memory_write_barrier{ @@ -1418,7 +1420,10 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span copies); - void DownloadMemory(std::span buffers, VkDeviceSize offset, + void DownloadMemory(std::span buffers, std::span offsets, std::span copies); void DownloadMemory(const StagingBufferRef& map, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2cd5aa31e5..63b8b5af56 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -661,27 +661,40 @@ template void TextureCache

::CommitAsyncFlushes() { // This is intentionally passing the value by copy if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - const std::span download_ids = uncommitted_downloads; + auto& download_ids = uncommitted_downloads; if (download_ids.empty()) { committed_downloads.emplace_back(std::move(uncommitted_downloads)); uncommitted_downloads.clear(); - async_buffers.emplace_back(std::optional{}); + async_buffers.emplace_back(std::move(uncommitted_async_buffers)); + uncommitted_async_buffers.clear(); return; } size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + size_t last_async_buffer_id = uncommitted_async_buffers.size(); + bool any_none_dma = false; + for (PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; + any_none_dma = true; + download_info.async_buffer_id = last_async_buffer_id; + } } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + if (any_none_dma) { + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + Image& image = slot_images[download_info.object_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + } + } + uncommitted_async_buffers.emplace_back(download_map); } - async_buffers.emplace_back(download_map); } committed_downloads.emplace_back(std::move(uncommitted_downloads)); + async_buffers.emplace_back(std::move(uncommitted_async_buffers)); + uncommitted_async_buffers.clear(); uncommitted_downloads.clear(); } @@ -691,39 +704,57 @@ void TextureCache

::PopAsyncFlushes() { return; } if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - const std::span download_ids = committed_downloads.front(); + const auto& download_ids = committed_downloads.front(); if (download_ids.empty()) { committed_downloads.pop_front(); async_buffers.pop_front(); return; } - auto download_map = *async_buffers.front(); - std::span download_span = download_map.mapped_span; + auto download_map = std::move(async_buffers.front()); for (size_t i = download_ids.size(); i > 0; i--) { - const ImageBase& image = slot_images[download_ids[i - 1]]; - const auto copies = FullDownloadCopies(image.info); - download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); - std::span download_span_alt = download_span.subspan(download_map.offset); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, - swizzle_data_buffer); + auto& download_info = download_ids[i - 1]; + auto& download_buffer = download_map[download_info.async_buffer_id]; + if (download_info.is_swizzle) { + const ImageBase& image = slot_images[download_info.object_id]; + const auto copies = FullDownloadCopies(image.info); + download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); + std::span download_span = + download_buffer.mapped_span.subspan(download_buffer.offset); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, + swizzle_data_buffer); + } else { + const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id]; + std::span download_span = + download_buffer.mapped_span.subspan(download_buffer.offset); + gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(), + buffer_info.size); + slot_buffer_downloads.erase(download_info.object_id); + } + } + for (auto& download_buffer : download_map) { + runtime.FreeDeferredStagingBuffer(download_buffer); } - runtime.FreeDeferredStagingBuffer(download_map); committed_downloads.pop_front(); async_buffers.pop_front(); } else { - const std::span download_ids = committed_downloads.front(); + const auto& download_ids = committed_downloads.front(); if (download_ids.empty()) { committed_downloads.pop_front(); return; } size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; + } } auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + continue; + } + Image& image = slot_images[download_info.object_id]; const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(download_map, copies); download_map.offset += image.unswizzled_size_bytes; @@ -732,8 +763,11 @@ void TextureCache

::PopAsyncFlushes() { runtime.Finish(); download_map.offset = original_offset; std::span download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + continue; + } + const ImageBase& image = slot_images[download_info.object_id]; const auto copies = FullDownloadCopies(image.info); SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, swizzle_data_buffer); @@ -836,11 +870,27 @@ std::pair::Image*, BufferImageCopy> TextureCache

::Dm template void TextureCache

::DownloadImageIntoBuffer( typename TextureCache

::Image* image, typename TextureCache

::BufferType buffer, - size_t buffer_offset, std::span copies) { - std::array buffers{ - buffer, - }; - image->DownloadMemory(buffers, buffer_offset, copies); + size_t buffer_offset, std::span copies, GPUVAddr address, size_t size) { + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + auto slot = slot_buffer_downloads.insert(address, size); + uncommitted_downloads.emplace_back(false, uncommitted_async_buffers.size(), slot); + auto download_map = runtime.DownloadStagingBuffer(size, true); + uncommitted_async_buffers.emplace_back(download_map); + std::array buffers{ + buffer, + download_map.buffer, + }; + std::array buffer_offsets{ + buffer_offset, + download_map.offset, + }; + image->DownloadMemory(buffers, buffer_offsets, copies); + } else { + std::array buffers{ + buffer, + }; + image->DownloadMemory(buffers, buffer_offset, copies); + } } template @@ -2219,7 +2269,7 @@ void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) if (new_id) { const ImageViewBase& old_view = slot_image_views[new_id]; if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.push_back(old_view.image_id); + uncommitted_downloads.emplace_back(true, 0, old_view.image_id); } } *old_id = new_id; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 51f44aed5a..d5bba33793 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -217,7 +217,8 @@ public: const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset, - std::span copies); + std::span copies, + GPUVAddr address = 0, size_t size = 0); /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); @@ -428,17 +429,31 @@ private: u64 critical_memory; size_t critical_gc; + struct BufferDownload { + GPUVAddr address; + size_t size; + }; + + struct PendingDownload { + bool is_swizzle; + size_t async_buffer_id; + SlotId object_id; + }; + SlotVector slot_images; SlotVector slot_map_views; SlotVector slot_image_views; SlotVector slot_image_allocs; SlotVector slot_samplers; SlotVector slot_framebuffers; + SlotVector slot_buffer_downloads; // TODO: This data structure is not optimal and it should be reworked - std::vector uncommitted_downloads; - std::deque> committed_downloads; - std::deque> async_buffers; + + std::vector uncommitted_downloads; + std::deque> committed_downloads; + std::vector uncommitted_async_buffers; + std::deque> async_buffers; struct LRUItemParams { using ObjectType = ImageId; From 56c9730a160b4123c327b26c940f53a2b6e6e8d3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 22 Apr 2023 14:04:59 +0200 Subject: [PATCH 3/5] Maxwell3D: only update parameters on High --- src/video_core/engines/maxwell_3d.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 0932fadc2d..2f986097f9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -223,6 +223,9 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool } void Maxwell3D::RefreshParametersImpl() { + if (!Settings::IsGPULevelHigh()) { + return; + } size_t current_index = 0; for (auto& segment : macro_segments) { if (segment.first == 0) { From 58d1c7c77af1a68efa363c322fc5b26bff37ef00 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 22 Apr 2023 15:27:58 +0200 Subject: [PATCH 4/5] Address Feedback & Clang Format --- src/video_core/texture_cache/texture_cache.h | 26 ++++++++++--------- .../texture_cache/texture_cache_base.h | 5 ---- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 63b8b5af56..543ceb5aab 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "common/alignment.h" #include "common/settings.h" @@ -17,15 +18,10 @@ namespace VideoCommon { -using Tegra::Texture::SwizzleSource; -using Tegra::Texture::TextureType; using Tegra::Texture::TICEntry; using Tegra::Texture::TSCEntry; using VideoCore::Surface::GetFormatType; -using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using VideoCore::Surface::SurfaceType; using namespace Common::Literals; @@ -674,7 +670,8 @@ void TextureCache

::CommitAsyncFlushes() { bool any_none_dma = false; for (PendingDownload& download_info : download_ids) { if (download_info.is_swizzle) { - total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; + total_size_bytes += + Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64); any_none_dma = true; download_info.async_buffer_id = last_async_buffer_id; } @@ -868,12 +865,16 @@ std::pair::Image*, BufferImageCopy> TextureCache

::Dm } template -void TextureCache

::DownloadImageIntoBuffer( - typename TextureCache

::Image* image, typename TextureCache

::BufferType buffer, - size_t buffer_offset, std::span copies, GPUVAddr address, size_t size) { +void TextureCache

::DownloadImageIntoBuffer(typename TextureCache

::Image* image, + typename TextureCache

::BufferType buffer, + size_t buffer_offset, + std::span copies, + GPUVAddr address, size_t size) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - auto slot = slot_buffer_downloads.insert(address, size); - uncommitted_downloads.emplace_back(false, uncommitted_async_buffers.size(), slot); + const BufferDownload new_buffer_download{address, size}; + auto slot = slot_buffer_downloads.insert(new_buffer_download); + const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot}; + uncommitted_downloads.emplace_back(new_download); auto download_map = runtime.DownloadStagingBuffer(size, true); uncommitted_async_buffers.emplace_back(download_map); std::array buffers{ @@ -2269,7 +2270,8 @@ void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) if (new_id) { const ImageViewBase& old_view = slot_image_views[new_id]; if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.emplace_back(true, 0, old_view.image_id); + const PendingDownload new_download{true, 0, old_view.image_id}; + uncommitted_downloads.emplace_back(new_download); } } *old_id = new_id; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index d5bba33793..bb9ddb70e4 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -40,14 +40,9 @@ struct ChannelState; namespace VideoCommon { -using Tegra::Texture::SwizzleSource; using Tegra::Texture::TICEntry; using Tegra::Texture::TSCEntry; -using VideoCore::Surface::GetFormatType; -using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using namespace Common::Literals; struct ImageViewInOut { From 4bc5469f52157cd18e697120df40e40e32365e89 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 23 Apr 2023 21:37:13 +0200 Subject: [PATCH 5/5] Texture Cache: Release stagging buffers on tick frame --- .../renderer_opengl/gl_texture_cache.cpp | 19 ++++++++++------ .../renderer_opengl/gl_texture_cache.h | 5 ++++- .../renderer_vulkan/vk_texture_cache.cpp | 13 ++++++++++- .../renderer_vulkan/vk_texture_cache.h | 5 ++++- src/video_core/texture_cache/texture_cache.h | 22 +++++++++++-------- .../texture_cache/texture_cache_base.h | 1 + 6 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 670d8cafd9..032a8ebc54 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -801,14 +801,22 @@ void Image::UploadMemory(const ImageBufferMap& map, UploadMemory(map.buffer, map.offset, copies); } -void Image::DownloadMemory(std::span buffer_handles, size_t buffer_offset, +void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset, + std::span copies) { + std::array buffer_handles{buffer_handle}; + std::array buffer_offsets{buffer_offset}; + DownloadMemory(buffer_handles, buffer_offsets, copies); +} + +void Image::DownloadMemory(std::span buffer_handles, std::span buffer_offsets, std::span copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); } glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API - for (auto buffer_handle : buffer_handles) { + for (size_t i = 0; i < buffer_handles.size(); i++) { + auto& buffer_handle = buffer_handles[i]; glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); glPixelStorei(GL_PACK_ALIGNMENT, 1); @@ -827,7 +835,7 @@ void Image::DownloadMemory(std::span buffer_handles, size_t buffer_offse current_image_height = copy.buffer_image_height; glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); } - CopyImageToBuffer(copy, buffer_offset); + CopyImageToBuffer(copy, buffer_offsets[i]); } } if (is_rescaled) { @@ -837,10 +845,7 @@ void Image::DownloadMemory(std::span buffer_handles, size_t buffer_offse void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { - std::array buffers{ - map.buffer, - }; - DownloadMemory(buffers, map.offset, copies); + DownloadMemory(map.buffer, map.offset, copies); } GLuint Image::StorageHandle() noexcept { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 67d6910b41..0dd039ed2a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -212,7 +212,10 @@ public: void UploadMemory(const ImageBufferMap& map, std::span copies); - void DownloadMemory(std::span buffer_handle, size_t buffer_offset, + void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, + std::span copies); + + void DownloadMemory(std::span buffer_handle, std::span buffer_offset, std::span copies); void DownloadMemory(ImageBufferMap& map, std::span copies); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index da3841bb34..d0a7d8f35e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -1342,6 +1342,17 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { + std::array buffer_handles{ + buffer, + }; + std::array buffer_offsets{ + offset, + }; + DownloadMemory(buffer_handles, buffer_offsets, copies); +} + void Image::DownloadMemory(std::span buffers_span, std::span offsets_span, std::span copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index bdaf43ba4b..c656c53865 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -138,6 +138,9 @@ public: void UploadMemory(const StagingBufferRef& map, std::span copies); + void DownloadMemory(VkBuffer buffer, VkDeviceSize offset, + std::span copies); + void DownloadMemory(std::span buffers, std::span offsets, std::span copies); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 543ceb5aab..e601f84468 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -139,6 +139,13 @@ void TextureCache

::TickFrame() { runtime.TickFrame(); critical_gc = 0; ++frame_tick; + + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + for (auto& buffer : async_buffers_death_ring) { + runtime.FreeDeferredStagingBuffer(buffer); + } + async_buffers_death_ring.clear(); + } } template @@ -688,10 +695,10 @@ void TextureCache

::CommitAsyncFlushes() { } uncommitted_async_buffers.emplace_back(download_map); } + async_buffers.emplace_back(std::move(uncommitted_async_buffers)); + uncommitted_async_buffers.clear(); } committed_downloads.emplace_back(std::move(uncommitted_downloads)); - async_buffers.emplace_back(std::move(uncommitted_async_buffers)); - uncommitted_async_buffers.clear(); uncommitted_downloads.clear(); } @@ -729,7 +736,7 @@ void TextureCache

::PopAsyncFlushes() { } } for (auto& download_buffer : download_map) { - runtime.FreeDeferredStagingBuffer(download_buffer); + async_buffers_death_ring.emplace_back(download_buffer); } committed_downloads.pop_front(); async_buffers.pop_front(); @@ -748,7 +755,7 @@ void TextureCache

::PopAsyncFlushes() { auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); const size_t original_offset = download_map.offset; for (const PendingDownload& download_info : download_ids) { - if (download_info.is_swizzle) { + if (!download_info.is_swizzle) { continue; } Image& image = slot_images[download_info.object_id]; @@ -761,7 +768,7 @@ void TextureCache

::PopAsyncFlushes() { download_map.offset = original_offset; std::span download_span = download_map.mapped_span; for (const PendingDownload& download_info : download_ids) { - if (download_info.is_swizzle) { + if (!download_info.is_swizzle) { continue; } const ImageBase& image = slot_images[download_info.object_id]; @@ -887,10 +894,7 @@ void TextureCache

::DownloadImageIntoBuffer(typename TextureCache

::Image* i }; image->DownloadMemory(buffers, buffer_offsets, copies); } else { - std::array buffers{ - buffer, - }; - image->DownloadMemory(buffers, buffer_offset, copies); + image->DownloadMemory(buffer, buffer_offset, copies); } } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index bb9ddb70e4..758b7e212f 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -449,6 +449,7 @@ private: std::deque> committed_downloads; std::vector uncommitted_async_buffers; std::deque> async_buffers; + std::deque async_buffers_death_ring; struct LRUItemParams { using ObjectType = ImageId;