1
0
Fork 0
forked from suyu/suyu

astc_decoder.comp: Remove unnecessary LUT SSBOs

We can move them to instead be compile time constants within the shader.
This commit is contained in:
ameerj 2021-06-19 10:56:13 -04:00
parent 31b125ef57
commit ace20ba4a4
5 changed files with 34 additions and 113 deletions

View file

@ -11,11 +11,8 @@
#define UNIFORM(n) #define UNIFORM(n)
#define BINDING_INPUT_BUFFER 0 #define BINDING_INPUT_BUFFER 0
#define BINDING_ENC_BUFFER 1 #define BINDING_ENC_BUFFER 1
#define BINDING_6_TO_8_BUFFER 2 #define BINDING_SWIZZLE_BUFFER 2
#define BINDING_7_TO_8_BUFFER 3 #define BINDING_OUTPUT_IMAGE 3
#define BINDING_8_TO_8_BUFFER 4
#define BINDING_SWIZZLE_BUFFER 5
#define BINDING_OUTPUT_IMAGE 6
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
@ -25,9 +22,6 @@
#define BINDING_SWIZZLE_BUFFER 0 #define BINDING_SWIZZLE_BUFFER 0
#define BINDING_INPUT_BUFFER 1 #define BINDING_INPUT_BUFFER 1
#define BINDING_ENC_BUFFER 2 #define BINDING_ENC_BUFFER 2
#define BINDING_6_TO_8_BUFFER 3
#define BINDING_7_TO_8_BUFFER 4
#define BINDING_8_TO_8_BUFFER 5
#define BINDING_OUTPUT_IMAGE 0 #define BINDING_OUTPUT_IMAGE 0
#endif #endif
@ -74,16 +68,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues { layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues {
EncodingData encoding_values[]; EncodingData encoding_values[];
}; };
// ASTC Precompiled tables
layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 {
uint REPLICATE_6_BIT_TO_8_TABLE[];
};
layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 {
uint REPLICATE_7_BIT_TO_8_TABLE[];
};
layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 {
uint REPLICATE_8_BIT_TO_8_TABLE[];
};
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
@ -134,6 +118,19 @@ const uint REPLICATE_4_BIT_TO_6_TABLE[16] =
const uint REPLICATE_5_BIT_TO_6_TABLE[32] = const uint REPLICATE_5_BIT_TO_6_TABLE[32] =
uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45, uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45,
47, 49, 51, 53, 55, 57, 59, 61, 63); 47, 49, 51, 53, 55, 57, 59, 61, 63);
const uint REPLICATE_6_BIT_TO_8_TABLE[64] =
uint[](0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 65, 69, 73, 77, 81, 85, 89,
93, 97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162,
166, 170, 174, 178, 182, 186, 190, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235,
239, 243, 247, 251, 255);
const uint REPLICATE_7_BIT_TO_8_TABLE[128] =
uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44,
46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88,
90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163,
165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199,
201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235,
237, 239, 241, 243, 245, 247, 249, 251, 253, 255);
// Input ASTC texture globals // Input ASTC texture globals
uint current_index = 0; uint current_index = 0;
@ -230,7 +227,7 @@ uint FastReplicateTo8(uint value, uint num_bits) {
case 7: case 7:
return REPLICATE_7_BIT_TO_8_TABLE[value]; return REPLICATE_7_BIT_TO_8_TABLE[value];
case 8: case 8:
return REPLICATE_8_BIT_TO_8_TABLE[value]; return value;
} }
return Replicate(value, num_bits, 8); return Replicate(value, num_bits, 8);
} }

View file

@ -69,7 +69,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
swizzle_table_buffer.Create(); swizzle_table_buffer.Create();
astc_buffer.Create(); astc_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0); glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES,
0);
} }
UtilShaders::~UtilShaders() = default; UtilShaders::~UtilShaders() = default;
@ -79,11 +80,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_ENC_BUFFER = 2; static constexpr GLuint BINDING_ENC_BUFFER = 2;
static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
const Extent2D tile_size{ const Extent2D tile_size{
@ -92,18 +88,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
}; };
program_manager.BindHostCompute(astc_decoder_program.handle); program_manager.BindHostCompute(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle, glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
offsetof(AstcBufferData, encoding_values),
sizeof(AstcBufferData::encoding_values));
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle,
offsetof(AstcBufferData, replicate_6_to_8),
sizeof(AstcBufferData::replicate_6_to_8));
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle,
offsetof(AstcBufferData, replicate_7_to_8),
sizeof(AstcBufferData::replicate_7_to_8));
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
offsetof(AstcBufferData, replicate_8_to_8),
sizeof(AstcBufferData::replicate_8_to_8));
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(1, tile_size.width, tile_size.height); glUniform2ui(1, tile_size.width, tile_size.height);

View file

@ -30,19 +30,16 @@
namespace Vulkan { namespace Vulkan {
using Tegra::Texture::SWIZZLE_TABLE; using Tegra::Texture::SWIZZLE_TABLE;
using Tegra::Texture::ASTC::EncodingsValues; using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES;
using namespace Tegra::Texture::ASTC; using namespace Tegra::Texture::ASTC;
namespace { namespace {
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; constexpr size_t ASTC_NUM_BINDINGS = 4;
constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 5;
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 6;
constexpr size_t ASTC_NUM_BINDINGS = 7;
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
return { return {
@ -87,27 +84,6 @@ std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorS
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr, .pImmutableSamplers = nullptr,
}, },
{
.binding = ASTC_BINDING_6_TO_8_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_7_TO_8_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_8_TO_8_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{ {
.binding = ASTC_BINDING_SWIZZLE_BUFFER, .binding = ASTC_BINDING_SWIZZLE_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
@ -155,30 +131,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() {
.offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry), .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry), .stride = sizeof(DescriptorUpdateEntry),
}, },
{
.dstBinding = ASTC_BINDING_6_TO_8_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_7_TO_8_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_8_TO_8_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{ {
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
.dstArrayElement = 0, .dstArrayElement = 0,
@ -400,7 +352,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
ASTCDecoderPass::~ASTCDecoderPass() = default; ASTCDecoderPass::~ASTCDecoderPass() = default;
void ASTCDecoderPass::MakeDataBuffer() { void ASTCDecoderPass::MakeDataBuffer() {
constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE); constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE);
data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
@ -414,9 +366,10 @@ void ASTCDecoderPass::MakeDataBuffer() {
data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA)); std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES,
sizeof(ASTC_ENCODINGS_VALUES));
// Tack on the swizzle table at the end of the buffer // Tack on the swizzle table at the end of the buffer
std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE, std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE,
sizeof(SWIZZLE_TABLE)); sizeof(SWIZZLE_TABLE));
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
@ -486,15 +439,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
update_descriptor_queue.Acquire(); update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(map.buffer, input_offset, update_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset); image.guest_size_bytes - swizzle.buffer_offset);
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values), update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES));
sizeof(AstcBufferData::encoding_values)); update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8),
sizeof(AstcBufferData::replicate_6_to_8));
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8),
sizeof(AstcBufferData::replicate_7_to_8));
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
sizeof(AstcBufferData::replicate_8_to_8));
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
sizeof(SWIZZLE_TABLE)); sizeof(SWIZZLE_TABLE));
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));

View file

@ -269,7 +269,7 @@ static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result,
static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
u32 nValues) { u32 nValues) {
// Determine encoding parameters // Determine encoding parameters
IntegerEncodedValue val = EncodingsValues[maxRange]; IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[maxRange];
// Start decoding // Start decoding
u32 nValsDecoded = 0; u32 nValsDecoded = 0;
@ -310,7 +310,7 @@ struct TexelWeightParams {
nIdxs *= 2; nIdxs *= 2;
} }
return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); return ASTC_ENCODINGS_VALUES[m_MaxWeight].GetBitLength(nIdxs);
} }
u32 GetNumWeightValues() const { u32 GetNumWeightValues() const {
@ -755,12 +755,12 @@ static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, co
// figure out the max value for each of them... // figure out the max value for each of them...
u32 range = 256; u32 range = 256;
while (--range > 0) { while (--range > 0) {
IntegerEncodedValue val = EncodingsValues[range]; IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[range];
u32 bitLength = val.GetBitLength(nValues); u32 bitLength = val.GetBitLength(nValues);
if (bitLength <= nBitsForColorData) { if (bitLength <= nBitsForColorData) {
// Find the smallest possible range that matches the given encoding // Find the smallest possible range that matches the given encoding
while (--range > 0) { while (--range > 0) {
IntegerEncodedValue newval = EncodingsValues[range]; IntegerEncodedValue newval = ASTC_ENCODINGS_VALUES[range];
if (!newval.MatchesEncoding(val)) { if (!newval.MatchesEncoding(val)) {
break; break;
} }

View file

@ -77,7 +77,7 @@ constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
return encodings; return encodings;
} }
constexpr std::array<IntegerEncodedValue, 256> EncodingsValues = MakeEncodedValues(); constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
// is the same as [(num_bits - 1):0] and repeats all the way down. // is the same as [(num_bits - 1):0] and repeats all the way down.
@ -120,13 +120,6 @@ constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
struct AstcBufferData {
decltype(EncodingsValues) encoding_values = EncodingsValues;
decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
} constexpr ASTC_BUFFER_DATA;
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);