forked from suyu/suyu
Decoders: Improve overall speed.
This commit is contained in:
parent
f5fd6b5c86
commit
98317f2b77
1 changed files with 11 additions and 4 deletions
|
@ -35,7 +35,7 @@ void incrpdep(u32& value) {
|
||||||
|
|
||||||
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
|
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
|
||||||
void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
|
void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
|
||||||
u32 block_height, u32 block_depth, u32 stride_alignment) {
|
u32 block_height, u32 block_depth, u32 stride) {
|
||||||
// The origin of the transformation can be configured here, leave it as zero as the current API
|
// The origin of the transformation can be configured here, leave it as zero as the current API
|
||||||
// doesn't expose it.
|
// doesn't expose it.
|
||||||
static constexpr u32 origin_x = 0;
|
static constexpr u32 origin_x = 0;
|
||||||
|
@ -45,7 +45,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
|
||||||
// We can configure here a custom pitch
|
// We can configure here a custom pitch
|
||||||
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
|
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
|
||||||
const u32 pitch = width * BYTES_PER_PIXEL;
|
const u32 pitch = width * BYTES_PER_PIXEL;
|
||||||
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * BYTES_PER_PIXEL;
|
|
||||||
|
|
||||||
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
|
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
|
||||||
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
|
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
|
||||||
|
@ -179,15 +178,23 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
|
||||||
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
|
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
|
||||||
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
|
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
|
||||||
u32 stride_alignment) {
|
u32 stride_alignment) {
|
||||||
|
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
|
||||||
|
const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
|
||||||
|
width = (width * bytes_per_pixel) >> new_bpp;
|
||||||
|
bytes_per_pixel = 1U << new_bpp;
|
||||||
Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
|
Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
|
||||||
stride_alignment);
|
stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
|
void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
|
||||||
u32 height, u32 depth, u32 block_height, u32 block_depth,
|
u32 height, u32 depth, u32 block_height, u32 block_depth,
|
||||||
u32 stride_alignment) {
|
u32 stride_alignment) {
|
||||||
|
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
|
||||||
|
const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
|
||||||
|
width = (width * bytes_per_pixel) >> new_bpp;
|
||||||
|
bytes_per_pixel = 1U << new_bpp;
|
||||||
Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
|
Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
|
||||||
stride_alignment);
|
stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
|
void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
|
||||||
|
|
Loading…
Reference in a new issue