Implemented Tile Width Spacing

This commit is contained in:
FernandoS27 2018-11-16 13:01:54 -04:00
parent f9a211220c
commit ddfbe0b58d
8 changed files with 55 additions and 36 deletions

View file

@ -68,13 +68,13 @@ void Fermi2D::HandleSurfaceCopy() {
Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer, src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
dst_buffer, true, regs.src.BlockHeight(), dst_buffer, true, regs.src.BlockHeight(),
regs.src.BlockDepth()); regs.src.BlockDepth(), 0);
} else { } else {
// If the input is linear and the output is tiled, swizzle the input and copy it over. // If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer, src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
src_buffer, false, regs.dst.BlockHeight(), src_buffer, false, regs.dst.BlockHeight(),
regs.dst.BlockDepth()); regs.dst.BlockDepth(), 0);
} }
} }
} }

View file

@ -16,12 +16,12 @@ namespace VideoCore {
using Surface::GetBytesPerPixel; using Surface::GetBytesPerPixel;
using Surface::PixelFormat; using Surface::PixelFormat;
using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr); using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
template <bool morton_to_linear, PixelFormat format> template <bool morton_to_linear, PixelFormat format>
static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
u8* buffer, std::size_t buffer_size, VAddr addr) { u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) {
constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@ -31,12 +31,13 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
if constexpr (morton_to_linear) { if constexpr (morton_to_linear) {
Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
stride, height, depth, block_height, block_depth); stride, height, depth, block_height, block_depth,
tile_width_spacing);
} else { } else {
Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, Tegra::Texture::CopySwizzledData(
(height + tile_size_y - 1) / tile_size_y, depth, (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false,
buffer, false, block_height, block_depth); block_height, block_depth, tile_width_spacing);
} }
} }
@ -325,11 +326,11 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
} }
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer, u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
std::size_t buffer_size, VAddr addr) { u8* buffer, std::size_t buffer_size, VAddr addr) {
GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer, GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
buffer_size, addr); tile_width_spacing, buffer, buffer_size, addr);
} }
void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,

View file

@ -12,8 +12,8 @@ namespace VideoCore {
enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer, u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
std::size_t buffer_size, VAddr addr); u8* buffer, std::size_t buffer_size, VAddr addr);
void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
u8* morton_data, u8* linear_data, bool morton_to_linear); u8* morton_data, u8* linear_data, bool morton_to_linear);

View file

@ -97,6 +97,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
params.srgb_conversion); params.srgb_conversion);
@ -162,6 +163,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.block_width = 1 << config.memory_layout.block_width; params.block_width = 1 << config.memory_layout.block_width;
params.block_height = 1 << config.memory_layout.block_height; params.block_height = 1 << config.memory_layout.block_height;
params.block_depth = 1 << config.memory_layout.block_depth; params.block_depth = 1 << config.memory_layout.block_depth;
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
@ -197,6 +199,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.block_width = 1 << std::min(block_width, 5U); params.block_width = 1 << std::min(block_width, 5U);
params.block_height = 1 << std::min(block_height, 5U); params.block_height = 1 << std::min(block_height, 5U);
params.block_depth = 1 << std::min(block_depth, 5U); params.block_depth = 1 << std::min(block_depth, 5U);
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromDepthFormat(format); params.pixel_format = PixelFormatFromDepthFormat(format);
params.component_type = ComponentTypeFromDepthFormat(format); params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format); params.type = GetFormatType(params.pixel_format);
@ -223,6 +226,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
@ -387,8 +391,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
for (u32 i = 0; i < params.depth; i++) { for (u32 i = 0; i < params.depth; i++) {
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size, params.MipBlockDepth(mip_level), params.tile_width_spacing, 1,
params.addr + offset); gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
offset += layer_size; offset += layer_size;
offset_gl += gl_size; offset_gl += gl_size;
} }
@ -396,8 +400,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
const u64 offset = params.GetMipmapLevelOffset(mip_level); const u64 offset = params.GetMipmapLevelOffset(mip_level);
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(), params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
params.addr + offset); gl_buffer.data(), gl_buffer.size(), params.addr + offset);
} }
} }

View file

@ -196,9 +196,15 @@ struct SurfaceParams {
/// Checks if surfaces are compatible for caching /// Checks if surfaces are compatible for caching
bool IsCompatibleSurface(const SurfaceParams& other) const { bool IsCompatibleSurface(const SurfaceParams& other) const {
return std::tie(pixel_format, type, width, height, target, depth) == if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) ==
std::tie(other.pixel_format, other.type, other.width, other.height, other.target, std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
other.depth); other.depth, other.is_tiled)) {
if (!is_tiled)
return true;
return std::tie(block_height, block_depth, tile_width_spacing) ==
std::tie(other.block_height, other.block_depth, other.tile_width_spacing);
}
return false;
} }
/// Initializes parameters for caching, should be called after everything has been initialized /// Initializes parameters for caching, should be called after everything has been initialized
@ -208,6 +214,7 @@ struct SurfaceParams {
u32 block_width; u32 block_width;
u32 block_height; u32 block_height;
u32 block_depth; u32 block_depth;
u32 tile_width_spacing;
PixelFormat pixel_format; PixelFormat pixel_format;
ComponentType component_type; ComponentType component_type;
SurfaceType type; SurfaceType type;

View file

@ -127,7 +127,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
template <bool fast> template <bool fast>
void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel,
const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) { const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth,
const u32 width_spacing) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 stride_x = width * out_bytes_per_pixel; const u32 stride_x = width * out_bytes_per_pixel;
const u32 layer_z = height * stride_x; const u32 layer_z = height * stride_x;
@ -137,7 +138,8 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
const u32 block_x_elements = gob_elements_x; const u32 block_x_elements = gob_elements_x;
const u32 block_y_elements = gob_elements_y * block_height; const u32 block_y_elements = gob_elements_y * block_height;
const u32 block_z_elements = gob_elements_z * block_depth; const u32 block_z_elements = gob_elements_z * block_depth;
const u32 blocks_on_x = div_ceil(width, block_x_elements); const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing);
const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements);
const u32 blocks_on_y = div_ceil(height, block_y_elements); const u32 blocks_on_y = div_ceil(height, block_y_elements);
const u32 blocks_on_z = div_ceil(depth, block_z_elements); const u32 blocks_on_z = div_ceil(depth, block_z_elements);
const u32 xy_block_size = gob_size * block_height; const u32 xy_block_size = gob_size * block_height;
@ -169,13 +171,15 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth) { bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) {
SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth); bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
width_spacing);
} else { } else {
SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth); bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
width_spacing);
} }
} }
@ -228,19 +232,19 @@ u32 BytesPerPixel(TextureFormat format) {
void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
u32 block_depth) { u32 block_depth, u32 width_spacing) {
CopySwizzledData((width + tile_size_x - 1) / tile_size_x, CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
(height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
block_height, block_depth); block_height, block_depth, width_spacing);
} }
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) { u32 block_height, u32 block_depth, u32 width_spacing) {
std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
width, height, depth, block_height, block_depth); width, height, depth, block_height, block_depth, width_spacing);
return unswizzled_data; return unswizzled_data;
} }

View file

@ -22,19 +22,20 @@ inline std::size_t GetGOBSize() {
void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight, u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight); u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
/** /**
* Unswizzles a swizzled texture without changing its format. * Unswizzles a swizzled texture without changing its format.
*/ */
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight, u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight); u32 block_depth = TICEntry::DefaultBlockHeight,
u32 width_spacing = 0);
/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. /// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.
void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth); bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
/** /**
* Decodes an unswizzled texture into a A8R8G8B8 texture. * Decodes an unswizzled texture into a A8R8G8B8 texture.

View file

@ -166,6 +166,8 @@ struct TICEntry {
BitField<3, 3, u32> block_height; BitField<3, 3, u32> block_height;
BitField<6, 3, u32> block_depth; BitField<6, 3, u32> block_depth;
BitField<10, 3, u32> tile_width_spacing;
// High 16 bits of the pitch value // High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high; BitField<0, 16, u32> pitch_high;
BitField<26, 1, u32> use_header_opt_control; BitField<26, 1, u32> use_header_opt_control;