From 31b125ef578dd5df4e289d1057154dd34f73cb19 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 19 Jun 2021 00:55:13 -0400 Subject: [PATCH 1/3] astc: Various robustness enhancements for the gpu decoder These changes should help in reducing crashes/drivers panics that may occur due to synchronization issues between the shader completion and later access of the decoded texture. --- src/video_core/host_shaders/astc_decoder.comp | 15 +++---- .../renderer_opengl/util_shaders.cpp | 5 +-- .../renderer_vulkan/vk_compute_pass.cpp | 39 ++++--------------- src/video_core/textures/astc.cpp | 2 + src/video_core/textures/astc.h | 2 - 5 files changed, 16 insertions(+), 47 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index eaba1b1033..71327e233c 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -14,9 +14,8 @@ #define BINDING_6_TO_8_BUFFER 2 #define BINDING_7_TO_8_BUFFER 3 #define BINDING_8_TO_8_BUFFER 4 -#define BINDING_BYTE_TO_16_BUFFER 5 -#define BINDING_SWIZZLE_BUFFER 6 -#define BINDING_OUTPUT_IMAGE 7 +#define BINDING_SWIZZLE_BUFFER 5 +#define BINDING_OUTPUT_IMAGE 6 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv @@ -29,7 +28,6 @@ #define BINDING_6_TO_8_BUFFER 3 #define BINDING_7_TO_8_BUFFER 4 #define BINDING_8_TO_8_BUFFER 5 -#define BINDING_BYTE_TO_16_BUFFER 6 #define BINDING_OUTPUT_IMAGE 0 #endif @@ -86,9 +84,6 @@ layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_ layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { uint REPLICATE_8_BIT_TO_8_TABLE[]; }; -layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 { - uint REPLICATE_BYTE_TO_16_TABLE[]; -}; layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; @@ -207,8 +202,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) { } uvec4 ReplicateByteTo16(uvec4 value) { - return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y], - REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]); + return value * 0x101; } uint ReplicateBitTo7(uint value) { @@ -1327,6 +1321,9 @@ void main() { offset += swizzle; const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); + if (any(greaterThanEqual(coord, imageSize(dest_image)))) { + return; + } uint block_index = pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 47fddcb6e8..d57998cdc7 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -83,7 +83,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_6_TO_8_BUFFER = 3; static constexpr GLuint BINDING_7_TO_8_BUFFER = 4; static constexpr GLuint BINDING_8_TO_8_BUFFER = 5; - static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; @@ -105,9 +104,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle, offsetof(AstcBufferData, replicate_8_to_8), sizeof(AstcBufferData::replicate_8_to_8)); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle, - offsetof(AstcBufferData, replicate_byte_to_16), - sizeof(AstcBufferData::replicate_byte_to_16)); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(1, tile_size.width, tile_size.height); @@ -137,6 +133,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } + glMemoryBarrier(GL_ALL_BARRIER_BITS); program_manager.RestoreGuestCompute(); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index e11406e58f..123bed7946 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -40,9 +40,9 @@ constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; -constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5; -constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6; -constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7; +constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 5; +constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 6; +constexpr size_t ASTC_NUM_BINDINGS = 7; VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { return { @@ -71,7 +71,7 @@ std::array BuildInputOutputDescriptorSetBinding }}; } -std::array BuildASTCDescriptorSetBindings() { +std::array BuildASTCDescriptorSetBindings() { return {{ { .binding = ASTC_BINDING_INPUT_BUFFER, @@ -108,13 +108,6 @@ std::array BuildASTCDescriptorSetBindings() { .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = nullptr, }, - { - .binding = ASTC_BINDING_BYTE_TO_16_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, { .binding = ASTC_BINDING_SWIZZLE_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -143,7 +136,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { }; } -std::array BuildASTCPassDescriptorUpdateTemplateEntry() { +std::array +BuildASTCPassDescriptorUpdateTemplateEntry() { return {{ { .dstBinding = ASTC_BINDING_INPUT_BUFFER, @@ -185,14 +179,6 @@ std::array BuildASTCPassDescriptorUpdateT .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), .stride = sizeof(DescriptorUpdateEntry), }, - { - .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, { .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, .dstArrayElement = 0, @@ -222,15 +208,6 @@ struct AstcPushConstants { u32 block_height_mask; }; -struct AstcBufferData { - decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE; - decltype(EncodingsValues) encoding_values = EncodingsValues; - decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; - decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; - decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; - decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; -} constexpr ASTC_BUFFER_DATA; - } // Anonymous namespace VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, @@ -517,9 +494,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, sizeof(AstcBufferData::replicate_7_to_8)); update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), sizeof(AstcBufferData::replicate_8_to_8)); - update_descriptor_queue.AddBuffer(*data_buffer, - offsetof(AstcBufferData, replicate_byte_to_16), - sizeof(AstcBufferData::replicate_byte_to_16)); update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData), sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); @@ -569,6 +543,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); }); + scheduler.Finish(); } } // namespace Vulkan diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 9b2177ebdf..b6e2022f21 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -551,6 +551,8 @@ static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { } } } + +static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable(); static constexpr u32 ReplicateByteTo16(std::size_t value) { return REPLICATE_BYTE_TO_16_TABLE[value]; } diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index c1c37dfe7c..441e8eb048 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -116,7 +116,6 @@ constexpr auto MakeReplicateTable() { return table; } -constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable(); constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable(); constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable(); constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable(); @@ -126,7 +125,6 @@ struct AstcBufferData { decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; - decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; } constexpr ASTC_BUFFER_DATA; void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, From ace20ba4a4774ae3c42f2ef5566c7113f3b980b3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 19 Jun 2021 10:56:13 -0400 Subject: [PATCH 2/3] astc_decoder.comp: Remove unnecessary LUT SSBOs We can move them to instead be compile time constants within the shader. --- src/video_core/host_shaders/astc_decoder.comp | 35 ++++----- .../renderer_opengl/util_shaders.cpp | 21 +----- .../renderer_vulkan/vk_compute_pass.cpp | 74 +++---------------- src/video_core/textures/astc.cpp | 8 +- src/video_core/textures/astc.h | 9 +-- 5 files changed, 34 insertions(+), 113 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 71327e233c..c37f15bfd8 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -11,11 +11,8 @@ #define UNIFORM(n) #define BINDING_INPUT_BUFFER 0 #define BINDING_ENC_BUFFER 1 -#define BINDING_6_TO_8_BUFFER 2 -#define BINDING_7_TO_8_BUFFER 3 -#define BINDING_8_TO_8_BUFFER 4 -#define BINDING_SWIZZLE_BUFFER 5 -#define BINDING_OUTPUT_IMAGE 6 +#define BINDING_SWIZZLE_BUFFER 2 +#define BINDING_OUTPUT_IMAGE 3 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv @@ -25,9 +22,6 @@ #define BINDING_SWIZZLE_BUFFER 0 #define BINDING_INPUT_BUFFER 1 #define BINDING_ENC_BUFFER 2 -#define BINDING_6_TO_8_BUFFER 3 -#define BINDING_7_TO_8_BUFFER 4 -#define BINDING_8_TO_8_BUFFER 5 #define BINDING_OUTPUT_IMAGE 0 #endif @@ -74,16 +68,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues { EncodingData encoding_values[]; }; -// ASTC Precompiled tables -layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 { - uint REPLICATE_6_BIT_TO_8_TABLE[]; -}; -layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 { - uint REPLICATE_7_BIT_TO_8_TABLE[]; -}; -layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { - uint REPLICATE_8_BIT_TO_8_TABLE[]; -}; layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; @@ -134,6 +118,19 @@ const uint REPLICATE_4_BIT_TO_6_TABLE[16] = const uint REPLICATE_5_BIT_TO_6_TABLE[32] = uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63); +const uint REPLICATE_6_BIT_TO_8_TABLE[64] = + uint[](0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 65, 69, 73, 77, 81, 85, 89, + 93, 97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162, + 166, 170, 174, 178, 182, 186, 190, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, + 239, 243, 247, 251, 255); +const uint REPLICATE_7_BIT_TO_8_TABLE[128] = + uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, + 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, + 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, + 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, + 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, + 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, + 237, 239, 241, 243, 245, 247, 249, 251, 253, 255); // Input ASTC texture globals uint current_index = 0; @@ -230,7 +227,7 @@ uint FastReplicateTo8(uint value, uint num_bits) { case 7: return REPLICATE_7_BIT_TO_8_TABLE[value]; case 8: - return REPLICATE_8_BIT_TO_8_TABLE[value]; + return value; } return Replicate(value, num_bits, 8); } diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index d57998cdc7..7e32f49caa 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -69,7 +69,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) swizzle_table_buffer.Create(); astc_buffer.Create(); glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); - glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0); + glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES, + 0); } UtilShaders::~UtilShaders() = default; @@ -79,11 +80,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_ENC_BUFFER = 2; - - static constexpr GLuint BINDING_6_TO_8_BUFFER = 3; - static constexpr GLuint BINDING_7_TO_8_BUFFER = 4; - static constexpr GLuint BINDING_8_TO_8_BUFFER = 5; - static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; const Extent2D tile_size{ @@ -92,18 +88,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, }; program_manager.BindHostCompute(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle, - offsetof(AstcBufferData, encoding_values), - sizeof(AstcBufferData::encoding_values)); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle, - offsetof(AstcBufferData, replicate_6_to_8), - sizeof(AstcBufferData::replicate_6_to_8)); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle, - offsetof(AstcBufferData, replicate_7_to_8), - sizeof(AstcBufferData::replicate_7_to_8)); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle, - offsetof(AstcBufferData, replicate_8_to_8), - sizeof(AstcBufferData::replicate_8_to_8)); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(1, tile_size.width, tile_size.height); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 123bed7946..205cd3b052 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -30,19 +30,16 @@ namespace Vulkan { using Tegra::Texture::SWIZZLE_TABLE; -using Tegra::Texture::ASTC::EncodingsValues; +using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES; using namespace Tegra::Texture::ASTC; namespace { constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; -constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; -constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; -constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; -constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 5; -constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 6; -constexpr size_t ASTC_NUM_BINDINGS = 7; +constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; +constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; +constexpr size_t ASTC_NUM_BINDINGS = 4; VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { return { @@ -87,27 +84,6 @@ std::array BuildASTCDescriptorS .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = nullptr, }, - { - .binding = ASTC_BINDING_6_TO_8_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_7_TO_8_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_8_TO_8_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, { .binding = ASTC_BINDING_SWIZZLE_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -155,30 +131,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() { .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry), .stride = sizeof(DescriptorUpdateEntry), }, - { - .dstBinding = ASTC_BINDING_6_TO_8_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, - { - .dstBinding = ASTC_BINDING_7_TO_8_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, - { - .dstBinding = ASTC_BINDING_8_TO_8_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, { .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, .dstArrayElement = 0, @@ -400,7 +352,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, ASTCDecoderPass::~ASTCDecoderPass() = default; void ASTCDecoderPass::MakeDataBuffer() { - constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE); + constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE); data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -414,9 +366,10 @@ void ASTCDecoderPass::MakeDataBuffer() { data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); - std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA)); + std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES, + sizeof(ASTC_ENCODINGS_VALUES)); // Tack on the swizzle table at the end of the buffer - std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE, + std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE)); scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, @@ -486,15 +439,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); - update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values), - sizeof(AstcBufferData::encoding_values)); - update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8), - sizeof(AstcBufferData::replicate_6_to_8)); - update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8), - sizeof(AstcBufferData::replicate_7_to_8)); - update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), - sizeof(AstcBufferData::replicate_8_to_8)); - update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData), + update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES)); + update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index b6e2022f21..7b756ba418 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -269,7 +269,7 @@ static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result, static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, u32 nValues) { // Determine encoding parameters - IntegerEncodedValue val = EncodingsValues[maxRange]; + IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[maxRange]; // Start decoding u32 nValsDecoded = 0; @@ -310,7 +310,7 @@ struct TexelWeightParams { nIdxs *= 2; } - return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); + return ASTC_ENCODINGS_VALUES[m_MaxWeight].GetBitLength(nIdxs); } u32 GetNumWeightValues() const { @@ -755,12 +755,12 @@ static void DecodeColorValues(u32* out, std::span data, const u32* modes, co // figure out the max value for each of them... u32 range = 256; while (--range > 0) { - IntegerEncodedValue val = EncodingsValues[range]; + IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[range]; u32 bitLength = val.GetBitLength(nValues); if (bitLength <= nBitsForColorData) { // Find the smallest possible range that matches the given encoding while (--range > 0) { - IntegerEncodedValue newval = EncodingsValues[range]; + IntegerEncodedValue newval = ASTC_ENCODINGS_VALUES[range]; if (!newval.MatchesEncoding(val)) { break; } diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 441e8eb048..0229ae1220 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -77,7 +77,7 @@ constexpr std::array MakeEncodedValues() { return encodings; } -constexpr std::array EncodingsValues = MakeEncodedValues(); +constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] // is the same as [(num_bits - 1):0] and repeats all the way down. @@ -120,13 +120,6 @@ constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable(); constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable(); constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable(); -struct AstcBufferData { - decltype(EncodingsValues) encoding_values = EncodingsValues; - decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; - decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; - decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; -} constexpr ASTC_BUFFER_DATA; - void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span output); From 851c76233db1d6fab507b0ab3423284a79829ede Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 19 Jun 2021 11:16:25 -0400 Subject: [PATCH 3/3] util_shaders: Specify ASTC decoder memory barrier bits --- src/video_core/renderer_opengl/util_shaders.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 7e32f49caa..abaf1ee6a7 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -118,7 +118,12 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } - glMemoryBarrier(GL_ALL_BARRIER_BITS); + // Precautionary barrier to ensure the compute shader is done decoding prior to texture access. + // GL_TEXTURE_FETCH_BARRIER_BIT and GL_SHADER_IMAGE_ACCESS_BARRIER_BIT are used in a separate + // glMemoryBarrier call by the texture cache runtime + glMemoryBarrier(GL_UNIFORM_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT | + GL_TEXTURE_UPDATE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT | + GL_SHADER_STORAGE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); program_manager.RestoreGuestCompute(); }