From 8ce158bce6912b2263f1724e6c09d8b517ef18c3 Mon Sep 17 00:00:00 2001 From: Ameer J <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Aug 2023 18:15:59 -0400 Subject: [PATCH] Revert "vulkan dims specialization" This reverts commit e6243058f2269bd79ac8479d58e55feec2611e9d. --- src/video_core/host_shaders/CMakeLists.txt | 57 ---------- src/video_core/host_shaders/astc_decoder.comp | 42 +++---- .../host_shaders/astc_decoder_spv_includes.h | 20 ---- .../renderer_vulkan/vk_compute_pass.cpp | 104 ++---------------- .../renderer_vulkan/vk_compute_pass.h | 2 - 5 files changed, 27 insertions(+), 198 deletions(-) delete mode 100644 src/video_core/host_shaders/astc_decoder_spv_includes.h diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 20e8388eed..e61d9af806 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -13,11 +13,6 @@ set(GLSL_INCLUDES ${FIDELITYFX_FILES} ) -set(ASTC_INCLUDES - # astc_decoder_glsl_includes.h - astc_decoder_spv_includes.h -) - set(SHADER_FILES astc_decoder.comp blit_color_float.frag @@ -100,60 +95,9 @@ if (NOT GLSLANG_ERROR STREQUAL "") set(QUIET_FLAG "") endif() -macro(ASTC_GEN) - # paired list of valid astc block dimensions - set(ASTC_WIDTHS 4 5 5 6 6 6 8 8 8 10 10 10 10 12 12) - set(ASTC_HEIGHTS 4 4 5 4 5 6 5 6 8 5 6 8 10 10 12) - list(LENGTH ASTC_WIDTHS NUM_ASTC_FORMATS) - math(EXPR NUM_ASTC_FORMATS "${NUM_ASTC_FORMATS}-1") - foreach(i RANGE ${NUM_ASTC_FORMATS}) - list(GET ASTC_WIDTHS ${i} ASTC_WIDTH) - list(GET ASTC_HEIGHTS ${i} ASTC_HEIGHT) - - # Vulkan SPIR-V Specialization - - string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_SPV SPIRV_VARIABLE_NAME) - set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_spv.h) - add_custom_command( - OUTPUT - ${SPIRV_HEADER_FILE} - COMMAND - ${GLSLANGVALIDATOR} -V -DBLOCK_WIDTH=${ASTC_WIDTH} -DBLOCK_HEIGHT=${ASTC_HEIGHT} ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} - MAIN_DEPENDENCY - ${SOURCE_FILE} - ) - set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) - - # GLSL Specialization - # Disabled as there was no noticeable performance uplift specializing the shaders for OGL - - # set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}.h) - # set(SHADER_DEFINES "#define BLOCK_WIDTH ${ASTC_WIDTH}" "#define BLOCK_HEIGHT ${ASTC_HEIGHT}") - # set(DEFINES_LINE_NUMBER 14) - # string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT} GLSL_VARIABLE_NAME) - # add_custom_command( - # OUTPUT - # ${SOURCE_HEADER_FILE} - # COMMAND - # ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} "${SHADER_DEFINES}" ${DEFINES_LINE_NUMBER} ${GLSL_VARIABLE_NAME} - # MAIN_DEPENDENCY - # ${SOURCE_FILE} - # DEPENDS - # ${INPUT_FILE} - # ${SOURCE_FILE} - # ) - # set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) - endforeach() -endmacro() - foreach(FILENAME IN ITEMS ${SHADER_FILES}) string(REPLACE "." "_" SHADER_NAME ${FILENAME}) set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) - - if (${FILENAME} MATCHES "astc_decoder.comp") - ASTC_GEN() - endif() - # Skip generating source headers on Vulkan exclusive files if (NOT ${FILENAME} MATCHES "vulkan.*") set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) @@ -207,7 +151,6 @@ endforeach() set(SHADER_SOURCES ${SHADER_FILES}) list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) -list(APPEND SHADER_SOURCES ${ASTC_INCLUDES}) add_custom_target(host_shaders DEPENDS diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index b4bb8299f7..a33c916aca 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -24,9 +24,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; BEGIN_PUSH_CONSTANTS -#ifndef BLOCK_WIDTH UNIFORM(1) uvec2 block_dims; -#endif UNIFORM(2) uint layer_stride; UNIFORM(3) uint block_size; UNIFORM(4) uint x_shift; @@ -77,15 +75,7 @@ int color_bitsread = 0; // At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode // So the maximum would be 144 (12 x 12) elements, x 2 for two planes #define DIVCEIL(number, divisor) (number + divisor - 1) / divisor - -#ifndef BLOCK_WIDTH -#define BLOCK_WIDTH block_dims.x -#define BLOCK_HEIGHT block_dims.y #define ARRAY_NUM_ELEMENTS 144 -#else -#define ARRAY_NUM_ELEMENTS BLOCK_WIDTH * BLOCK_HEIGHT -#endif - #define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) uvec4 result_vector[VECTOR_ARRAY_SIZE]; @@ -275,7 +265,7 @@ uint Hash52(uint p) { } uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) { - if ((BLOCK_WIDTH * BLOCK_HEIGHT) < 32) { + if ((block_dims.y * block_dims.x) < 32) { x <<= 1; y <<= 1; } @@ -888,8 +878,8 @@ uint UnquantizeTexelWeight(EncodingData val) { uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { - const uint Ds = uint((BLOCK_WIDTH * 0.5f + 1024) / (BLOCK_WIDTH - 1)); - const uint Dt = uint((BLOCK_HEIGHT * 0.5f + 1024) / (BLOCK_HEIGHT - 1)); + const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); + const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); const uint num_planes = is_dual_plane ? 2 : 1; const uint area = size.x * size.y; const uint loop_count = min(result_index, area * num_planes); @@ -900,8 +890,8 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { UnquantizeTexelWeight(GetEncodingFromVector(itr)); } for (uint plane = 0; plane < num_planes; ++plane) { - for (uint t = 0; t < BLOCK_HEIGHT; t++) { - for (uint s = 0; s < BLOCK_WIDTH; s++) { + for (uint t = 0; t < block_dims.y; t++) { + for (uint s = 0; s < block_dims.x; s++) { const uint cs = Ds * s; const uint ct = Dt * t; const uint gs = (cs * (size.x - 1) + 32) >> 6; @@ -944,7 +934,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { VectorIndicesFromBase(offset_base); p.w = result_vector[array_index][vector_index]; } - const uint offset = (t * BLOCK_WIDTH + s) + ARRAY_NUM_ELEMENTS * plane; + const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane; const uint array_index = offset / 4; const uint vector_index = offset % 4; unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; @@ -986,8 +976,8 @@ int FindLayout(uint mode) { void FillError(ivec3 coord) { - for (uint j = 0; j < BLOCK_HEIGHT; j++) { - for (uint i = 0; i < BLOCK_WIDTH; i++) { + for (uint j = 0; j < block_dims.y; j++) { + for (uint i = 0; i < block_dims.x; i++) { imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); } } @@ -1003,8 +993,8 @@ void FillVoidExtentLDR(ivec3 coord) { const float r = float(r_u) / 65535.0f; const float g = float(g_u) / 65535.0f; const float b = float(b_u) / 65535.0f; - for (uint j = 0; j < BLOCK_HEIGHT; j++) { - for (uint i = 0; i < BLOCK_WIDTH; i++) { + for (uint j = 0; j < block_dims.y; j++) { + for (uint i = 0; i < block_dims.x; i++) { imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); } } @@ -1099,7 +1089,7 @@ void DecompressBlock(ivec3 coord) { return; } const uvec2 size_params = DecodeBlockSize(mode); - if ((size_params.x > BLOCK_WIDTH) || (size_params.y > BLOCK_HEIGHT)) { + if ((size_params.x > block_dims.x) || (size_params.y > block_dims.y)) { FillError(coord); return; } @@ -1228,21 +1218,21 @@ void DecompressBlock(ivec3 coord) { DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); UnquantizeTexelWeights(size_params, dual_plane); - for (uint j = 0; j < BLOCK_HEIGHT; j++) { - for (uint i = 0; i < BLOCK_WIDTH; i++) { + for (uint j = 0; j < block_dims.y; j++) { + for (uint i = 0; i < block_dims.x; i++) { uint local_partition = 0; if (num_partitions > 1) { local_partition = Select2DPartition(partition_index, i, j, num_partitions); } const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); - const uint weight_offset = (j * BLOCK_WIDTH + i); + const uint weight_offset = (j * block_dims.x + i); const uint array_index = weight_offset / 4; const uint vector_index = weight_offset % 4; const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; uvec4 weight_vec = uvec4(primary_weight); if (dual_plane) { - const uint secondary_weight_offset = (j * BLOCK_WIDTH + i) + ARRAY_NUM_ELEMENTS; + const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS; const uint secondary_array_index = secondary_weight_offset / 4; const uint secondary_vector_index = secondary_weight_offset % 4; const uint secondary_weight = @@ -1280,7 +1270,7 @@ void main() { offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; offset += swizzle; - const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(BLOCK_WIDTH, BLOCK_HEIGHT, 1)); + const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); if (any(greaterThanEqual(coord, imageSize(dest_image)))) { return; } diff --git a/src/video_core/host_shaders/astc_decoder_spv_includes.h b/src/video_core/host_shaders/astc_decoder_spv_includes.h deleted file mode 100644 index 44ee50c5fe..0000000000 --- a/src/video_core/host_shaders/astc_decoder_spv_includes.h +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "video_core/host_shaders/astc_decoder_comp_10x10_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_10x5_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_10x6_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_10x8_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_12x10_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_12x12_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_4x4_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_5x4_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_5x5_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_6x5_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_6x6_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_8x5_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_8x6_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_8x8_spv.h" -#include "video_core/host_shaders/astc_decoder_comp_spv.h" diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index f223422521..54ee030ce4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -11,7 +11,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/div_ceil.h" -#include "video_core/host_shaders/astc_decoder_spv_includes.h" +#include "video_core/host_shaders/astc_decoder_comp_spv.h" #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -124,62 +124,13 @@ constexpr std::array }}; struct AstcPushConstants { + std::array blocks_dims; u32 layer_stride; u32 block_size; u32 x_shift; u32 block_height; u32 block_height_mask; }; - -size_t AstcFormatIndex(VideoCore::Surface::PixelFormat format) { - switch (format) { - case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_UNORM: - return 0; - case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_UNORM: - return 1; - case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_UNORM: - return 2; - case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_UNORM: - return 3; - case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_UNORM: - return 4; - case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_UNORM: - return 5; - case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_UNORM: - return 6; - case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_UNORM: - return 7; - case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_UNORM: - return 8; - case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_UNORM: - return 9; - case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_UNORM: - return 10; - case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_UNORM: - return 11; - case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_UNORM: - return 12; - case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_SRGB: - case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_UNORM: - return 13; - default: - UNREACHABLE(); - return 0; - } -} } // Anonymous namespace ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, @@ -361,53 +312,19 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, COMPUTE_PUSH_CONSTANT_RANGE, ASTC_DECODER_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{ - memory_allocator_} { - // These must match the order found in AstcFormatIndex - static constexpr std::array, 14> ASTC_SHADERS{ - ASTC_DECODER_COMP_4X4_SPV, ASTC_DECODER_COMP_5X4_SPV, ASTC_DECODER_COMP_5X5_SPV, - ASTC_DECODER_COMP_6X5_SPV, ASTC_DECODER_COMP_6X6_SPV, ASTC_DECODER_COMP_8X5_SPV, - ASTC_DECODER_COMP_8X6_SPV, ASTC_DECODER_COMP_8X8_SPV, ASTC_DECODER_COMP_10X5_SPV, - ASTC_DECODER_COMP_10X6_SPV, ASTC_DECODER_COMP_10X8_SPV, ASTC_DECODER_COMP_10X10_SPV, - ASTC_DECODER_COMP_12X10_SPV, ASTC_DECODER_COMP_12X12_SPV, - }; - for (size_t index = 0; index < ASTC_SHADERS.size(); ++index) { - const auto& code = ASTC_SHADERS[index]; - const auto module_ = device.GetLogical().CreateShaderModule({ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = static_cast(code.size_bytes()), - .pCode = code.data(), - }); - device.SaveShader(code); - astc_pipelines[index] = device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *module_, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *layout, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }); - } -} + memory_allocator_} {} ASTCDecoderPass::~ASTCDecoderPass() = default; void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, std::span swizzles) { using namespace VideoCommon::Accelerated; - + const std::array block_dims{ + VideoCore::Surface::DefaultBlockWidth(image.info.format), + VideoCore::Surface::DefaultBlockHeight(image.info.format), + }; scheduler.RequestOutsideRenderPassOperationContext(); - const VkPipeline vk_pipeline = *astc_pipelines[AstcFormatIndex(image.info.format)]; + const VkPipeline vk_pipeline = *pipeline; const VkImageAspectFlags aspect_mask = image.AspectMask(); const VkImage vk_image = image.Handle(); const bool is_initialized = image.ExchangeInitialization(); @@ -454,9 +371,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); ASSERT(params.bytes_per_block_log2 == 4); - scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, params, - descriptor_data](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, + params, descriptor_data](vk::CommandBuffer cmdbuf) { const AstcPushConstants uniforms{ + .blocks_dims = block_dims, .layer_stride = params.layer_stride, .block_size = params.block_size, .x_shift = params.x_shift, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 1f264bea6b..dd39273763 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -95,8 +95,6 @@ public: std::span swizzles); private: - std::array astc_pipelines; - Scheduler& scheduler; StagingBufferPool& staging_buffer_pool; ComputePassDescriptorQueue& compute_pass_descriptor_queue;