From 7f737b022aaead6cb85504d738b769ebcd557018 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 15 Sep 2021 01:02:28 -0400 Subject: [PATCH 1/3] util_shaders: Unify BGRA copy passes --- .../renderer_opengl/gl_texture_cache.cpp | 2 +- .../renderer_opengl/gl_texture_cache.h | 8 ++ .../renderer_opengl/maxwell_to_gl.h | 4 +- .../renderer_opengl/util_shaders.cpp | 85 +++++-------------- src/video_core/renderer_opengl/util_shaders.h | 19 +---- 5 files changed, 36 insertions(+), 82 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b0aee6cc12..3ec78d866f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -461,7 +461,7 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { return false; } - if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { + if (IsPixelFormatBGR(dst.info.format) != IsPixelFormatBGR(src.info.format)) { return false; } return true; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 4a4f6301c3..990a8ddcb5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -162,6 +162,14 @@ public: return texture.handle; } + GLuint GlFormat() const noexcept { + return gl_format; + } + + GLuint GlType() const noexcept { + return gl_type; + } + private: void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 672f94bfc3..39158aa3ea 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -52,7 +52,7 @@ constexpr std::array FORMAT_TAB {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, // B8G8R8A8_UNORM {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT @@ -81,7 +81,7 @@ constexpr std::array FORMAT_TAB {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, // B8G8R8A8_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 333f35a1c6..2cb6e19b30 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -257,38 +257,31 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span copies) { - static constexpr GLuint BINDING_INPUT_IMAGE = 0; - static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; - const u32 bytes_per_block = BytesPerBlock(dst_image.info.format); - switch (bytes_per_block) { - case 2: - // BGR565 copy - for (const ImageCopy& copy : copies) { - ASSERT(copy.src_offset == zero_offset); - ASSERT(copy.dst_offset == zero_offset); - bgr_copy_pass.Execute(dst_image, src_image, copy); + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_offset == zero_offset); + ASSERT(copy.dst_offset == zero_offset); + + if (bgr_pbo_size < dst_image.unswizzled_size_bytes) { + bgr_pbo.Create(); + bgr_pbo_size = dst_image.unswizzled_size_bytes; + glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); } - break; - case 4: { - // BGRA8 copy - program_manager.BindComputeProgram(copy_bgra_program.handle); - constexpr GLenum FORMAT = GL_RGBA8; - for (const ImageCopy& copy : copies) { - ASSERT(copy.src_offset == zero_offset); - ASSERT(copy.dst_offset == zero_offset); - glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), - copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT); - glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), - copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT); - glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); - } - program_manager.RestoreGuestCompute(); - break; - } - default: - UNREACHABLE(); - break; + // Copy from source to PBO + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); + glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.src_subresource.num_layers, src_image.GlFormat(), + src_image.GlType(), static_cast(bgr_pbo_size), nullptr); + + // Copy from PBO to destination in reverse order + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr_pbo.handle); + glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.dst_subresource.num_layers, dst_image.GlFormat(), + dst_image.GlType(), nullptr); } } @@ -309,36 +302,4 @@ GLenum StoreFormat(u32 bytes_per_block) { return GL_R8UI; } -void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image, - const ImageCopy& copy) { - if (CopyBufferCreationNeeded(copy)) { - CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565); - } - // Copy from source to PBO - glPixelStorei(GL_PACK_ALIGNMENT, 1); - glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); - glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle); - glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, - static_cast(bgr16_pbo_size), nullptr); - - // Copy from PBO to destination in reverse order - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle); - glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, - nullptr); -} - -bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) { - return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16); -} - -void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) { - bgr16_pbo.Create(); - bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16); - glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY); -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index ef881e35f6..b474480cb6 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -19,22 +19,6 @@ class ProgramManager; struct ImageBufferMap; -class Bgr565CopyPass { -public: - Bgr565CopyPass() = default; - ~Bgr565CopyPass() = default; - - void Execute(const Image& dst_image, const Image& src_image, - const VideoCommon::ImageCopy& copy); - -private: - [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy); - void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format); - - OGLBuffer bgr16_pbo; - size_t bgr16_pbo_size{}; -}; - class UtilShaders { public: explicit UtilShaders(ProgramManager& program_manager); @@ -70,7 +54,8 @@ private: OGLProgram copy_bgra_program; OGLProgram copy_bc4_program; - Bgr565CopyPass bgr_copy_pass; + OGLBuffer bgr_pbo; + size_t bgr_pbo_size{}; }; GLenum StoreFormat(u32 bytes_per_block); From ab808fe7cf66c143e9323bb51922989d5bb37752 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 15 Sep 2021 21:30:35 -0400 Subject: [PATCH 2/3] gl_texture_cache: Migrate BGRCopyPass from util_shaders The BGR copies no longer use shaders. --- .../renderer_opengl/gl_texture_cache.cpp | 35 ++++++++++++++++++- .../renderer_opengl/gl_texture_cache.h | 14 ++++++++ .../renderer_opengl/util_shaders.cpp | 35 ------------------- src/video_core/renderer_opengl/util_shaders.h | 6 ---- 4 files changed, 48 insertions(+), 42 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 3ec78d866f..54dae2c41b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -473,7 +473,7 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, ASSERT(src.info.type == ImageType::e3D); util_shaders.CopyBC4(dst, src, copies); } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { - util_shaders.CopyBGR(dst, src, copies); + bgr_copy_pass.CopyBGR(dst, src, copies); } else { UNREACHABLE(); } @@ -1112,4 +1112,37 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span copies) { + static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; + const u32 requested_pbo_size = + std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes); + + if (bgr_pbo_size < requested_pbo_size) { + bgr_pbo.Create(); + bgr_pbo_size = requested_pbo_size; + glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); + } + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_offset == zero_offset); + ASSERT(copy.dst_offset == zero_offset); + + // Copy from source to PBO + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); + glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.src_subresource.num_layers, src_image.GlFormat(), + src_image.GlType(), static_cast(bgr_pbo_size), nullptr); + + // Copy from PBO to destination in desired GL format + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr_pbo.handle); + glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.dst_subresource.num_layers, dst_image.GlFormat(), + dst_image.GlType(), nullptr); + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 990a8ddcb5..c498a8a8f6 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -47,6 +47,19 @@ struct FormatProperties { bool is_compressed; }; +class BGRCopyPass { +public: + BGRCopyPass() = default; + ~BGRCopyPass() = default; + + void CopyBGR(Image& dst_image, Image& src_image, + std::span copies); + +private: + OGLBuffer bgr_pbo; + size_t bgr_pbo_size{}; +}; + class TextureCacheRuntime { friend Framebuffer; friend Image; @@ -118,6 +131,7 @@ private: const Device& device; StateTracker& state_tracker; UtilShaders util_shaders; + BGRCopyPass bgr_copy_pass; std::array, 3> format_properties; bool has_broken_texture_view_formats = false; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 2cb6e19b30..b4083cc36b 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -44,11 +44,6 @@ namespace { OGLProgram MakeProgram(std::string_view source) { return CreateProgram(source, GL_COMPUTE_SHADER); } - -size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { - return static_cast(copy.extent.width * copy.extent.height * - copy.src_subresource.num_layers); -} } // Anonymous namespace UtilShaders::UtilShaders(ProgramManager& program_manager_) @@ -255,36 +250,6 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span copies) { - static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; - for (const ImageCopy& copy : copies) { - ASSERT(copy.src_offset == zero_offset); - ASSERT(copy.dst_offset == zero_offset); - - if (bgr_pbo_size < dst_image.unswizzled_size_bytes) { - bgr_pbo.Create(); - bgr_pbo_size = dst_image.unswizzled_size_bytes; - glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); - } - // Copy from source to PBO - glPixelStorei(GL_PACK_ALIGNMENT, 1); - glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); - glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); - glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.src_subresource.num_layers, src_image.GlFormat(), - src_image.GlType(), static_cast(bgr_pbo_size), nullptr); - - // Copy from PBO to destination in reverse order - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr_pbo.handle); - glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.dst_subresource.num_layers, dst_image.GlFormat(), - dst_image.GlType(), nullptr); - } -} - GLenum StoreFormat(u32 bytes_per_block) { switch (bytes_per_block) { case 1: diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index b474480cb6..aaa5fba3a7 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -39,9 +39,6 @@ public: void CopyBC4(Image& dst_image, Image& src_image, std::span copies); - void CopyBGR(Image& dst_image, Image& src_image, - std::span copies); - private: ProgramManager& program_manager; @@ -53,9 +50,6 @@ private: OGLProgram pitch_unswizzle_program; OGLProgram copy_bgra_program; OGLProgram copy_bc4_program; - - OGLBuffer bgr_pbo; - size_t bgr_pbo_size{}; }; GLenum StoreFormat(u32 bytes_per_block); From 22162f906b73b6b1a43be1df9d828746b8dbd216 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 15 Sep 2021 21:33:43 -0400 Subject: [PATCH 3/3] host_shaders: Remove opengl_copy_bgra.comp --- src/video_core/host_shaders/CMakeLists.txt | 1 - src/video_core/host_shaders/opengl_copy_bgra.comp | 15 --------------- src/video_core/renderer_opengl/util_shaders.cpp | 2 -- src/video_core/renderer_opengl/util_shaders.h | 1 - 4 files changed, 19 deletions(-) delete mode 100644 src/video_core/host_shaders/opengl_copy_bgra.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c9cff74503..20d748c12d 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -6,7 +6,6 @@ set(SHADER_FILES convert_float_to_depth.frag full_screen_triangle.vert opengl_copy_bc4.comp - opengl_copy_bgra.comp opengl_present.frag opengl_present.vert pitch_unswizzle.comp diff --git a/src/video_core/host_shaders/opengl_copy_bgra.comp b/src/video_core/host_shaders/opengl_copy_bgra.comp deleted file mode 100644 index 2571a4abf1..0000000000 --- a/src/video_core/host_shaders/opengl_copy_bgra.comp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 430 core - -layout (local_size_x = 4, local_size_y = 4) in; - -layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input; -layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output; - -void main() { - vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID)); - imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra); -} diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index b4083cc36b..897c380b3b 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -14,7 +14,6 @@ #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" #include "video_core/host_shaders/opengl_copy_bc4_comp.h" -#include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -51,7 +50,6 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), - copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index aaa5fba3a7..5de95ea7ac 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -48,7 +48,6 @@ private: OGLProgram block_linear_unswizzle_2d_program; OGLProgram block_linear_unswizzle_3d_program; OGLProgram pitch_unswizzle_program; - OGLProgram copy_bgra_program; OGLProgram copy_bc4_program; };