From db5f2bfa7ef522a56101776248e7cd0daea6d266 Mon Sep 17 00:00:00 2001 From: Subv Date: Sun, 15 Apr 2018 19:52:25 -0500 Subject: [PATCH 1/5] GPU/TIC: Added the pitch and block height fields to the TIC structure. --- src/video_core/textures/texture.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 9d443ea90c..58cbb2115d 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -4,6 +4,7 @@ #pragma once +#include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -57,6 +58,8 @@ union TextureHandle { static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); struct TICEntry { + static constexpr u32 DefaultBlockHeight = 16; + union { u32 raw; BitField<0, 7, TextureFormat> format; @@ -70,7 +73,12 @@ struct TICEntry { BitField<0, 16, u32> address_high; BitField<21, 3, TICHeaderVersion> header_version; }; - INSERT_PADDING_BYTES(4); + union { + BitField<3, 3, u32> block_height; + + // High 16 bits of the pitch value + BitField<0, 16, u32> pitch_high; + }; union { BitField<0, 16, u32> width_minus_1; BitField<23, 4, TextureType> texture_type; @@ -82,6 +90,13 @@ struct TICEntry { return static_cast((static_cast(address_high) << 32) | address_low); } + u32 Pitch() const { + ASSERT(header_version == TICHeaderVersion::Pitch || + header_version == TICHeaderVersion::PitchColorKey); + // The pitch value is 21 bits, and is 32B aligned. + return pitch_high << 5; + } + u32 Width() const { return width_minus_1 + 1; } From 6b63aaa5b4f55621117e27c6b80979908c255e75 Mon Sep 17 00:00:00 2001 From: Subv Date: Sun, 15 Apr 2018 19:53:15 -0500 Subject: [PATCH 2/5] GPU: Allow using a configurable block height when unswizzling textures. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 11 ++++++++++- src/video_core/textures/decoders.cpp | 9 ++++----- src/video_core/textures/decoders.h | 3 ++- src/video_core/textures/texture.h | 7 +++++++ 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 213b20a21a..9d005936d3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1041,9 +1041,18 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu params.height = config.tic.Height(); params.is_tiled = config.tic.IsTiled(); params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); + + if (config.tic.IsTiled()) { + params.block_height = config.tic.BlockHeight(); + } else { + // Use the texture-provided stride value if the texture isn't tiled. + params.stride = params.PixelsInBytes(config.tic.Pitch()); + } + params.UpdateParams(); - if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) { + if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 || + params.stride != params.width) { Surface src_surface; MathUtil::Rectangle rect; std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 2e87281ebc..9c2a10d2e9 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -56,23 +56,22 @@ u32 BytesPerPixel(TextureFormat format) { } } -std::vector UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) { +std::vector UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, + u32 block_height) { u8* data = Memory::GetPointer(address); u32 bytes_per_pixel = BytesPerPixel(format); - static constexpr u32 DefaultBlockHeight = 16; - std::vector unswizzled_data(width * height * bytes_per_pixel); switch (format) { case TextureFormat::DXT1: // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values. CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, DefaultBlockHeight); + unswizzled_data.data(), true, block_height); break; case TextureFormat::A8R8G8B8: CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, DefaultBlockHeight); + unswizzled_data.data(), true, block_height); break; default: UNIMPLEMENTED_MSG("Format not implemented"); diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 0c21694ff2..a700911cf7 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -14,7 +14,8 @@ namespace Texture { /** * Unswizzles a swizzled texture without changing its format. */ -std::vector UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height); +std::vector UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, + u32 block_height = TICEntry::DefaultBlockHeight); /** * Decodes an unswizzled texture into a A8R8G8B8 texture. diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 58cbb2115d..09d2317e00 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -105,6 +105,13 @@ struct TICEntry { return height_minus_1 + 1; } + u32 BlockHeight() const { + ASSERT(header_version == TICHeaderVersion::BlockLinear || + header_version == TICHeaderVersion::BlockLinearColorKey); + // The block height is stored in log2 format. + return 1 << block_height; + } + bool IsTiled() const { return header_version == TICHeaderVersion::BlockLinear || header_version == TICHeaderVersion::BlockLinearColorKey; From ac09b5a2e945f587a8b3b712f54b76d46edb0c2f Mon Sep 17 00:00:00 2001 From: Subv Date: Sun, 15 Apr 2018 19:54:38 -0500 Subject: [PATCH 3/5] GLCache: Added a function to convert cached PixelFormats back to texture formats. TODO: The way we handle cached formats must change, framebuffer and texture formats are too different to keep them in the same place. --- src/video_core/renderer_opengl/gl_rasterizer_cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 3293905d6e..0b2e3ffef0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -115,6 +115,18 @@ struct SurfaceParams { } } + static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) { + // TODO(Subv): Properly implement this + switch (format) { + case PixelFormat::RGBA8: + return Tegra::Texture::TextureFormat::A8R8G8B8; + case PixelFormat::DXT1: + return Tegra::Texture::TextureFormat::DXT1; + default: + UNREACHABLE(); + } + } + static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { SurfaceType a_type = GetFormatType(pixel_format_a); SurfaceType b_type = GetFormatType(pixel_format_b); From a3e82e8e1f5cb39246f30cac045db8e243f0daee Mon Sep 17 00:00:00 2001 From: Subv Date: Sun, 15 Apr 2018 19:55:39 -0500 Subject: [PATCH 4/5] GLCache: Take into account the texture's block height when caching and unswizzling. --- .../renderer_opengl/gl_rasterizer.cpp | 3 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 82 +++++++++---------- .../renderer_opengl/gl_rasterizer_cache.h | 1 + 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7b6240e656..9522a35ea4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -523,7 +523,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu src_params.width = std::min(framebuffer.width, pixel_stride); src_params.height = framebuffer.height; src_params.stride = pixel_stride; - src_params.is_tiled = false; + src_params.is_tiled = true; + src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); src_params.UpdateParams(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 9d005936d3..a92773f11b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -102,39 +102,36 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { } template -void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start, + VAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the - // configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(base), gl_buffer, morton_to_gl); + if (morton_to_gl) { + auto data = Tegra::Texture::UnswizzleTexture( + base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, + block_height); + std::memcpy(gl_buffer, data.data(), data.size()); + } else { + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check + // the configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(base), gl_buffer, morton_to_gl); + } } -template <> -void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, - VAddr start, VAddr end) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1); - - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the - // configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - auto data = - Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height); - std::memcpy(gl_buffer, data.data(), data.size()); -} - -static constexpr std::array morton_to_gl_fns = { - MortonCopy, - MortonCopy, +static constexpr std::array morton_to_gl_fns = + { + MortonCopy, + MortonCopy, }; -static constexpr std::array gl_to_morton_fns = { - MortonCopy, - MortonCopy, +static constexpr std::array gl_to_morton_fns = + { + MortonCopy, + // TODO(Subv): Swizzling the DXT1 format is not yet supported + nullptr, }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -311,15 +308,16 @@ MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& su bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { return std::tie(other_surface.addr, other_surface.width, other_surface.height, - other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == - std::tie(addr, width, height, stride, pixel_format, is_tiled) && + other_surface.stride, other_surface.block_height, other_surface.pixel_format, + other_surface.is_tiled) == + std::tie(addr, width, height, stride, block_height, pixel_format, is_tiled) && pixel_format != PixelFormat::Invalid; } bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { return sub_surface.addr >= addr && sub_surface.end <= end && sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && + sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && GetSubRect(sub_surface).left + sub_surface.width <= stride; @@ -328,7 +326,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && + stride == expanded_surface.stride && (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % BytesInPixels(stride * (is_tiled ? 8 : 1)) == 0; @@ -339,6 +338,9 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { end < texcopy_params.end) { return false; } + if (texcopy_params.block_height != block_height) + return false; + if (texcopy_params.width != texcopy_params.stride) { const u32 tile_stride = static_cast(BytesInPixels(stride * (is_tiled ? 8 : 1))); return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && @@ -481,18 +483,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { const u64 start_offset = load_start - addr; if (!is_tiled) { - ASSERT(type == SurfaceType::Color); const u32 bytes_per_pixel{GetFormatBpp() >> 3}; - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check - // the configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4, - texture_src_data + start_offset, &gl_buffer[start_offset], - true); + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + bytes_per_pixel * width * height); } else { - morton_to_gl_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], addr, - load_start, load_end); + morton_to_gl_fns[static_cast(pixel_format)]( + stride, block_height, height, &gl_buffer[0], addr, load_start, load_end); } } @@ -533,11 +530,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { if (backup_bytes) std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); } else if (!is_tiled) { - ASSERT(type == SurfaceType::Color); std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); } else { - gl_to_morton_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], addr, - flush_start, flush_end); + gl_to_morton_fns[static_cast(pixel_format)]( + stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); } } @@ -1103,6 +1099,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.res_scale = resolution_scale_factor; color_params.width = config.width; color_params.height = config.height; + // TODO(Subv): Can framebuffers use a different block height? + color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; SurfaceParams depth_params = color_params; color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0b2e3ffef0..26d6c3061a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -225,6 +225,7 @@ struct SurfaceParams { u32 width = 0; u32 height = 0; u32 stride = 0; + u32 block_height = 0; u16 res_scale = 1; bool is_tiled = false; From 48d4efbd696d1dbd5330d74e69a52f8e508d279d Mon Sep 17 00:00:00 2001 From: Subv Date: Sun, 15 Apr 2018 19:56:07 -0500 Subject: [PATCH 5/5] GPU: Pitch textures are now supported, don't assert when encountering them. --- src/video_core/engines/maxwell_3d.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a2f1626024..2a3ff234ab 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -218,8 +218,9 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); - ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear, - "TIC versions other than BlockLinear are unimplemented"); + ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || + tic_entry.header_version == Texture::TICHeaderVersion::Pitch, + "TIC versions other than BlockLinear or Pitch are unimplemented"); ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) || (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),