forked from suyu/suyu
c9454c8422
Set Pixel Format to Z32 if its R32F and depth compare enabled, and Implement format ZF32_X24S8
1358 lines
62 KiB
C++
1358 lines
62 KiB
C++
// Copyright 2018 yuzu Emulator Project
|
|
// Licensed under GPLv2 or any later version
|
|
// Refer to the license.txt file included.
|
|
|
|
#include <algorithm>
|
|
#include <optional>
|
|
#include <glad/glad.h>
|
|
|
|
#include "common/alignment.h"
|
|
#include "common/assert.h"
|
|
#include "common/logging/log.h"
|
|
#include "common/microprofile.h"
|
|
#include "common/scope_exit.h"
|
|
#include "core/core.h"
|
|
#include "core/hle/kernel/process.h"
|
|
#include "core/settings.h"
|
|
#include "video_core/engines/maxwell_3d.h"
|
|
#include "video_core/memory_manager.h"
|
|
#include "video_core/morton.h"
|
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
|
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
|
|
#include "video_core/renderer_opengl/utils.h"
|
|
#include "video_core/surface.h"
|
|
#include "video_core/textures/convert.h"
|
|
#include "video_core/textures/decoders.h"
|
|
|
|
namespace OpenGL {
|
|
|
|
using VideoCore::MortonSwizzle;
|
|
using VideoCore::MortonSwizzleMode;
|
|
using VideoCore::Surface::ComponentTypeFromDepthFormat;
|
|
using VideoCore::Surface::ComponentTypeFromRenderTarget;
|
|
using VideoCore::Surface::ComponentTypeFromTexture;
|
|
using VideoCore::Surface::PixelFormatFromDepthFormat;
|
|
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
|
using VideoCore::Surface::PixelFormatFromTextureFormat;
|
|
using VideoCore::Surface::SurfaceTargetFromTextureType;
|
|
|
|
struct FormatTuple {
|
|
GLint internal_format;
|
|
GLenum format;
|
|
GLenum type;
|
|
ComponentType component_type;
|
|
bool compressed;
|
|
};
|
|
|
|
static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
|
|
glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
|
glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
|
glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
|
glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
|
glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
|
|
if (max_mip_level == 1) {
|
|
glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
|
|
}
|
|
}
|
|
|
|
void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
|
|
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
|
|
|
|
gpu_addr = gpu_addr_;
|
|
host_ptr = memory_manager.GetPointer(gpu_addr_);
|
|
size_in_bytes = SizeInBytesRaw();
|
|
|
|
if (IsPixelFormatASTC(pixel_format)) {
|
|
// ASTC is uncompressed in software, in emulated as RGBA8
|
|
size_in_bytes_gl = width * height * depth * 4;
|
|
} else {
|
|
size_in_bytes_gl = SizeInBytesGL();
|
|
}
|
|
}
|
|
|
|
std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only,
|
|
bool uncompressed) const {
|
|
const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
|
|
const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
|
|
const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
|
|
u32 m_depth = (layer_only ? 1U : depth);
|
|
u32 m_width = MipWidth(mip_level);
|
|
u32 m_height = MipHeight(mip_level);
|
|
m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x);
|
|
m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y);
|
|
m_depth = std::max(1U, m_depth >> mip_level);
|
|
u32 m_block_height = MipBlockHeight(mip_level);
|
|
u32 m_block_depth = MipBlockDepth(mip_level);
|
|
return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width,
|
|
m_height, m_depth, m_block_height, m_block_depth);
|
|
}
|
|
|
|
std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
|
|
bool uncompressed) const {
|
|
std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth;
|
|
std::size_t size = 0;
|
|
for (u32 i = 0; i < max_mip_level; i++) {
|
|
size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed);
|
|
}
|
|
if (!force_gl && is_tiled) {
|
|
size = Common::AlignUp(size, block_size_bytes);
|
|
}
|
|
return size;
|
|
}
|
|
|
|
/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
|
|
const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
|
|
SurfaceParams params{};
|
|
params.is_tiled = config.tic.IsTiled();
|
|
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
|
|
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
|
|
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
|
|
params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
|
|
params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
|
|
params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
|
|
params.srgb_conversion);
|
|
|
|
if (config.tsc.depth_compare_enabled) {
|
|
// Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled,
|
|
// then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also
|
|
// causes GetFormatType to properly return 'Depth' below).
|
|
if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) {
|
|
switch (params.pixel_format) {
|
|
case PixelFormat::R16S:
|
|
case PixelFormat::R16U:
|
|
case PixelFormat::R16F:
|
|
params.pixel_format = PixelFormat::Z16;
|
|
break;
|
|
case PixelFormat::R32F:
|
|
params.pixel_format = PixelFormat::Z32F;
|
|
break;
|
|
default:
|
|
LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}",
|
|
static_cast<u32>(params.pixel_format));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
|
|
params.type = GetFormatType(params.pixel_format);
|
|
UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled);
|
|
|
|
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
|
|
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
|
|
if (!params.is_tiled) {
|
|
params.pitch = config.tic.Pitch();
|
|
}
|
|
params.unaligned_height = config.tic.Height();
|
|
params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
|
|
params.identity = SurfaceClass::Uploaded;
|
|
|
|
switch (params.target) {
|
|
case SurfaceTarget::Texture1D:
|
|
case SurfaceTarget::Texture2D:
|
|
params.depth = 1;
|
|
break;
|
|
case SurfaceTarget::TextureCubemap:
|
|
params.depth = config.tic.Depth() * 6;
|
|
break;
|
|
case SurfaceTarget::Texture3D:
|
|
params.depth = config.tic.Depth();
|
|
break;
|
|
case SurfaceTarget::Texture2DArray:
|
|
params.depth = config.tic.Depth();
|
|
if (!entry.IsArray()) {
|
|
// TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of
|
|
// one, but sample the texture in the shader as if it were not an array texture. This
|
|
// probably is valid on hardware, but we still need to write a test to confirm this. In
|
|
// emulation, the workaround here is to continue to treat this as a Texture2D. An
|
|
// example game that does this is Super Mario Odyssey (in Cloud Kingdom).
|
|
ASSERT(params.depth == 1);
|
|
params.target = SurfaceTarget::Texture2D;
|
|
}
|
|
break;
|
|
case SurfaceTarget::TextureCubeArray:
|
|
params.depth = config.tic.Depth() * 6;
|
|
if (!entry.IsArray()) {
|
|
ASSERT(params.depth == 6);
|
|
params.target = SurfaceTarget::TextureCubemap;
|
|
}
|
|
break;
|
|
default:
|
|
LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
|
|
UNREACHABLE();
|
|
params.depth = 1;
|
|
break;
|
|
}
|
|
|
|
params.is_layered = SurfaceTargetIsLayered(params.target);
|
|
params.is_array = SurfaceTargetIsArray(params.target);
|
|
params.max_mip_level = config.tic.max_mip_level + 1;
|
|
params.rt = {};
|
|
|
|
params.InitCacheParameters(config.tic.Address());
|
|
|
|
return params;
|
|
}
|
|
|
|
/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
|
|
const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
|
|
SurfaceParams params{};
|
|
|
|
params.is_tiled =
|
|
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
|
|
params.block_width = 1 << config.memory_layout.block_width;
|
|
params.block_height = 1 << config.memory_layout.block_height;
|
|
params.block_depth = 1 << config.memory_layout.block_depth;
|
|
params.tile_width_spacing = 1;
|
|
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
|
|
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
|
|
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
|
|
params.component_type = ComponentTypeFromRenderTarget(config.format);
|
|
params.type = GetFormatType(params.pixel_format);
|
|
if (params.is_tiled) {
|
|
params.width = config.width;
|
|
} else {
|
|
params.pitch = config.width;
|
|
const u32 bpp = params.GetFormatBpp() / 8;
|
|
params.width = params.pitch / bpp;
|
|
}
|
|
params.height = config.height;
|
|
params.unaligned_height = config.height;
|
|
params.target = SurfaceTarget::Texture2D;
|
|
params.identity = SurfaceClass::RenderTarget;
|
|
params.depth = 1;
|
|
params.max_mip_level = 1;
|
|
params.is_layered = false;
|
|
|
|
// Render target specific parameters, not used for caching
|
|
params.rt.index = static_cast<u32>(index);
|
|
params.rt.array_mode = config.array_mode;
|
|
params.rt.layer_stride = config.layer_stride;
|
|
params.rt.volume = config.volume;
|
|
params.rt.base_layer = config.base_layer;
|
|
|
|
params.InitCacheParameters(config.Address());
|
|
|
|
return params;
|
|
}
|
|
|
|
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
|
|
u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
|
|
u32 block_width, u32 block_height, u32 block_depth,
|
|
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
|
|
SurfaceParams params{};
|
|
|
|
params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
|
|
params.block_width = 1 << std::min(block_width, 5U);
|
|
params.block_height = 1 << std::min(block_height, 5U);
|
|
params.block_depth = 1 << std::min(block_depth, 5U);
|
|
params.tile_width_spacing = 1;
|
|
params.pixel_format = PixelFormatFromDepthFormat(format);
|
|
params.component_type = ComponentTypeFromDepthFormat(format);
|
|
params.type = GetFormatType(params.pixel_format);
|
|
params.srgb_conversion = false;
|
|
params.width = zeta_width;
|
|
params.height = zeta_height;
|
|
params.unaligned_height = zeta_height;
|
|
params.target = SurfaceTarget::Texture2D;
|
|
params.identity = SurfaceClass::DepthBuffer;
|
|
params.depth = 1;
|
|
params.max_mip_level = 1;
|
|
params.is_layered = false;
|
|
params.rt = {};
|
|
|
|
params.InitCacheParameters(zeta_address);
|
|
|
|
return params;
|
|
}
|
|
|
|
/*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
|
|
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
|
|
SurfaceParams params{};
|
|
|
|
params.is_tiled = !config.linear;
|
|
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
|
|
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
|
|
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
|
|
params.tile_width_spacing = 1;
|
|
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
|
|
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
|
|
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
|
|
params.component_type = ComponentTypeFromRenderTarget(config.format);
|
|
params.type = GetFormatType(params.pixel_format);
|
|
params.width = config.width;
|
|
if (!params.is_tiled) {
|
|
const u32 bpp = params.GetFormatBpp() / 8;
|
|
params.pitch = config.width * bpp;
|
|
}
|
|
params.height = config.height;
|
|
params.unaligned_height = config.height;
|
|
params.target = SurfaceTarget::Texture2D;
|
|
params.identity = SurfaceClass::Copy;
|
|
params.depth = 1;
|
|
params.max_mip_level = 1;
|
|
params.rt = {};
|
|
|
|
params.InitCacheParameters(config.Address());
|
|
|
|
return params;
|
|
}
|
|
|
|
static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
|
|
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
|
|
{GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
|
|
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
|
|
{GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
|
|
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
|
|
false}, // A2B10G10R10U
|
|
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
|
|
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
|
|
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
|
|
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
|
|
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
|
|
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
|
|
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
|
|
false}, // R11FG11FB10F
|
|
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
|
|
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
|
true}, // DXT1
|
|
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
|
true}, // DXT23
|
|
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
|
true}, // DXT45
|
|
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
|
|
{GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
|
true}, // DXN2UNORM
|
|
{GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
|
|
{GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
|
true}, // BC7U
|
|
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
|
|
true}, // BC6H_UF16
|
|
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
|
|
true}, // BC6H_SF16
|
|
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
|
|
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
|
|
{GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
|
|
{GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
|
|
{GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
|
|
{GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
|
|
{GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
|
|
{GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
|
|
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
|
|
{GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
|
|
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
|
|
{GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
|
|
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
|
|
{GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
|
|
{GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
|
|
{GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
|
|
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
|
|
false}, // RGBA8_SRGB
|
|
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
|
|
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
|
|
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
|
|
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
|
|
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
|
|
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
|
|
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
|
|
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
|
|
// Compressed sRGB formats
|
|
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
|
true}, // DXT1_SRGB
|
|
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
|
true}, // DXT23_SRGB
|
|
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
|
true}, // DXT45_SRGB
|
|
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
|
|
true}, // BC7U_SRGB
|
|
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
|
|
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
|
|
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
|
|
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
|
|
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
|
|
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
|
|
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
|
|
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
|
|
|
|
// Depth formats
|
|
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
|
|
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
|
|
false}, // Z16
|
|
|
|
// DepthStencil formats
|
|
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
|
|
false}, // Z24S8
|
|
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
|
|
false}, // S8Z24
|
|
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
|
|
ComponentType::Float, false}, // Z32FS8
|
|
}};
|
|
|
|
static GLenum SurfaceTargetToGL(SurfaceTarget target) {
|
|
switch (target) {
|
|
case SurfaceTarget::Texture1D:
|
|
return GL_TEXTURE_1D;
|
|
case SurfaceTarget::Texture2D:
|
|
return GL_TEXTURE_2D;
|
|
case SurfaceTarget::Texture3D:
|
|
return GL_TEXTURE_3D;
|
|
case SurfaceTarget::Texture1DArray:
|
|
return GL_TEXTURE_1D_ARRAY;
|
|
case SurfaceTarget::Texture2DArray:
|
|
return GL_TEXTURE_2D_ARRAY;
|
|
case SurfaceTarget::TextureCubemap:
|
|
return GL_TEXTURE_CUBE_MAP;
|
|
case SurfaceTarget::TextureCubeArray:
|
|
return GL_TEXTURE_CUBE_MAP_ARRAY;
|
|
}
|
|
LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
|
|
UNREACHABLE();
|
|
return {};
|
|
}
|
|
|
|
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
|
|
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
|
|
auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
|
|
ASSERT(component_type == format.component_type);
|
|
|
|
return format;
|
|
}
|
|
|
|
/// Returns the discrepant array target
|
|
constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
|
|
switch (target) {
|
|
case SurfaceTarget::Texture1D:
|
|
return GL_TEXTURE_1D_ARRAY;
|
|
case SurfaceTarget::Texture2D:
|
|
return GL_TEXTURE_2D_ARRAY;
|
|
case SurfaceTarget::Texture3D:
|
|
return GL_NONE;
|
|
case SurfaceTarget::Texture1DArray:
|
|
return GL_TEXTURE_1D;
|
|
case SurfaceTarget::Texture2DArray:
|
|
return GL_TEXTURE_2D;
|
|
case SurfaceTarget::TextureCubemap:
|
|
return GL_TEXTURE_CUBE_MAP_ARRAY;
|
|
case SurfaceTarget::TextureCubeArray:
|
|
return GL_TEXTURE_CUBE_MAP;
|
|
}
|
|
return GL_NONE;
|
|
}
|
|
|
|
Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
|
|
u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
|
|
if (IsPixelFormatASTC(pixel_format)) {
|
|
// ASTC formats must stop at the ATSC block size boundary
|
|
actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second);
|
|
}
|
|
return {0, actual_height, MipWidth(mip_level), 0};
|
|
}
|
|
|
|
void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
|
|
std::vector<u8>& gl_buffer, u32 mip_level) {
|
|
u32 depth = params.MipDepth(mip_level);
|
|
if (params.target == SurfaceTarget::Texture2D) {
|
|
// TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
|
|
depth = 1U;
|
|
}
|
|
if (params.is_layered) {
|
|
u64 offset = params.GetMipmapLevelOffset(mip_level);
|
|
u64 offset_gl = 0;
|
|
const u64 layer_size = params.LayerMemorySize();
|
|
const u64 gl_size = params.LayerSizeGL(mip_level);
|
|
for (u32 i = 0; i < params.depth; i++) {
|
|
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
|
|
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
|
|
params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
|
|
gl_buffer.data() + offset_gl, params.host_ptr + offset);
|
|
offset += layer_size;
|
|
offset_gl += gl_size;
|
|
}
|
|
} else {
|
|
const u64 offset = params.GetMipmapLevelOffset(mip_level);
|
|
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
|
|
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
|
|
params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
|
|
gl_buffer.data(), params.host_ptr + offset);
|
|
}
|
|
}
|
|
|
|
void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface,
|
|
const Surface& dst_surface) {
|
|
const auto& src_params{src_surface->GetSurfaceParams()};
|
|
const auto& dst_params{dst_surface->GetSurfaceParams()};
|
|
|
|
const u32 width{std::min(src_params.width, dst_params.width)};
|
|
const u32 height{std::min(src_params.height, dst_params.height)};
|
|
|
|
glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
|
|
0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
|
|
0, 0, width, height, 1);
|
|
|
|
dst_surface->MarkAsModified(true, *this);
|
|
}
|
|
|
|
MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
|
|
void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
|
|
const GLuint copy_pbo_handle, const GLenum src_attachment,
|
|
const GLenum dst_attachment,
|
|
const std::size_t cubemap_face) {
|
|
MICROPROFILE_SCOPE(OpenGL_CopySurface);
|
|
ASSERT_MSG(dst_attachment == 0, "Unimplemented");
|
|
|
|
const auto& src_params{src_surface->GetSurfaceParams()};
|
|
const auto& dst_params{dst_surface->GetSurfaceParams()};
|
|
|
|
const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
|
|
const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
|
|
|
|
const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
|
|
|
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
|
|
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY);
|
|
if (source_format.compressed) {
|
|
glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
|
|
static_cast<GLsizei>(src_params.size_in_bytes), nullptr);
|
|
} else {
|
|
glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format,
|
|
source_format.type, static_cast<GLsizei>(src_params.size_in_bytes),
|
|
nullptr);
|
|
}
|
|
// If the new texture is bigger than the previous one, we need to fill in the rest with data
|
|
// from the CPU.
|
|
if (src_params.size_in_bytes < dst_params.size_in_bytes) {
|
|
// Upload the rest of the memory.
|
|
if (dst_params.is_tiled) {
|
|
// TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
|
|
// of the data in this case. Games like Super Mario Odyssey seem to hit this case
|
|
// when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
|
|
// but it doesn't clear it beforehand, the texture is already full of zeros.
|
|
LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
|
|
"reinterpretation but the texture is tiled.");
|
|
}
|
|
const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
|
|
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
|
|
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
|
|
memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
|
|
}
|
|
|
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
|
|
|
const GLsizei width{static_cast<GLsizei>(
|
|
std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))};
|
|
const GLsizei height{static_cast<GLsizei>(
|
|
std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))};
|
|
|
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
|
|
if (dest_format.compressed) {
|
|
LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
|
|
UNREACHABLE();
|
|
} else {
|
|
switch (dst_params.target) {
|
|
case SurfaceTarget::Texture1D:
|
|
glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format,
|
|
dest_format.type, nullptr);
|
|
break;
|
|
case SurfaceTarget::Texture2D:
|
|
glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height,
|
|
dest_format.format, dest_format.type, nullptr);
|
|
break;
|
|
case SurfaceTarget::Texture3D:
|
|
case SurfaceTarget::Texture2DArray:
|
|
case SurfaceTarget::TextureCubeArray:
|
|
glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height,
|
|
static_cast<GLsizei>(dst_params.depth), dest_format.format,
|
|
dest_format.type, nullptr);
|
|
break;
|
|
case SurfaceTarget::TextureCubemap:
|
|
glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0,
|
|
static_cast<GLint>(cubemap_face), width, height, 1,
|
|
dest_format.format, dest_format.type, nullptr);
|
|
break;
|
|
default:
|
|
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
|
|
static_cast<u32>(dst_params.target));
|
|
UNREACHABLE();
|
|
}
|
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
|
}
|
|
|
|
dst_surface->MarkAsModified(true, *this);
|
|
}
|
|
|
|
CachedSurface::CachedSurface(const SurfaceParams& params)
|
|
: RasterizerCacheObject{params.host_ptr}, params{params},
|
|
gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
|
|
|
|
const auto optional_cpu_addr{
|
|
Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
|
|
ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
|
|
cpu_addr = *optional_cpu_addr;
|
|
|
|
texture.Create(gl_target);
|
|
|
|
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
|
|
// alternatives. This signals a bug on those functions.
|
|
const auto width = static_cast<GLsizei>(params.MipWidth(0));
|
|
const auto height = static_cast<GLsizei>(params.MipHeight(0));
|
|
memory_size = params.MemorySize();
|
|
reinterpreted = false;
|
|
|
|
const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
|
|
gl_internal_format = format_tuple.internal_format;
|
|
|
|
switch (params.target) {
|
|
case SurfaceTarget::Texture1D:
|
|
glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
|
|
width);
|
|
break;
|
|
case SurfaceTarget::Texture2D:
|
|
case SurfaceTarget::TextureCubemap:
|
|
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
|
|
width, height);
|
|
break;
|
|
case SurfaceTarget::Texture3D:
|
|
case SurfaceTarget::Texture2DArray:
|
|
case SurfaceTarget::TextureCubeArray:
|
|
glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
|
|
width, height, params.depth);
|
|
break;
|
|
default:
|
|
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
|
|
static_cast<u32>(params.target));
|
|
UNREACHABLE();
|
|
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
|
|
width, height);
|
|
}
|
|
|
|
ApplyTextureDefaults(texture.handle, params.max_mip_level);
|
|
|
|
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
|
|
}
|
|
|
|
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
|
|
void CachedSurface::LoadGLBuffer() {
|
|
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
|
|
gl_buffer.resize(params.max_mip_level);
|
|
for (u32 i = 0; i < params.max_mip_level; i++)
|
|
gl_buffer[i].resize(params.GetMipmapSizeGL(i));
|
|
if (params.is_tiled) {
|
|
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
|
|
params.block_width, static_cast<u32>(params.target));
|
|
for (u32 i = 0; i < params.max_mip_level; i++)
|
|
SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
|
|
} else {
|
|
const u32 bpp = params.GetFormatBpp() / 8;
|
|
const u32 copy_size = params.width * bpp;
|
|
if (params.pitch == copy_size) {
|
|
std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
|
|
} else {
|
|
const u8* start{params.host_ptr};
|
|
u8* write_to = gl_buffer[0].data();
|
|
for (u32 h = params.height; h > 0; h--) {
|
|
std::memcpy(write_to, start, copy_size);
|
|
start += params.pitch;
|
|
write_to += copy_size;
|
|
}
|
|
}
|
|
}
|
|
for (u32 i = 0; i < params.max_mip_level; i++) {
|
|
const u32 width = params.MipWidth(i);
|
|
const u32 height = params.MipHeight(i);
|
|
const u32 depth = params.MipDepth(i);
|
|
if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
|
|
// Reserve size for RGBA8 conversion
|
|
constexpr std::size_t rgba_bpp = 4;
|
|
gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
|
|
}
|
|
Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
|
|
height, depth, true, true);
|
|
}
|
|
}
|
|
|
|
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
|
|
void CachedSurface::FlushGLBuffer() {
|
|
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
|
|
|
|
ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
|
|
|
|
// OpenGL temporary buffer needs to be big enough to store raw texture size
|
|
gl_buffer.resize(1);
|
|
gl_buffer[0].resize(GetSizeInBytes());
|
|
|
|
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
|
|
const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
|
|
glPixelStorei(GL_PACK_ALIGNMENT, align);
|
|
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
|
|
ASSERT(!tuple.compressed);
|
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
|
glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
|
|
static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
|
|
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
|
Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
|
|
params.height, params.depth, true, true);
|
|
if (params.is_tiled) {
|
|
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
|
|
params.block_width, static_cast<u32>(params.target));
|
|
|
|
SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
|
|
} else {
|
|
const u32 bpp = params.GetFormatBpp() / 8;
|
|
const u32 copy_size = params.width * bpp;
|
|
if (params.pitch == copy_size) {
|
|
std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
|
|
} else {
|
|
u8* start{params.host_ptr};
|
|
const u8* read_to = gl_buffer[0].data();
|
|
for (u32 h = params.height; h > 0; h--) {
|
|
std::memcpy(start, read_to, copy_size);
|
|
start += params.pitch;
|
|
read_to += copy_size;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
|
|
GLuint draw_fb_handle) {
|
|
const auto& rect{params.GetRect(mip_map)};
|
|
|
|
// Load data from memory to the surface
|
|
const auto x0 = static_cast<GLint>(rect.left);
|
|
const auto y0 = static_cast<GLint>(rect.bottom);
|
|
auto buffer_offset =
|
|
static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
|
|
static_cast<std::size_t>(x0)) *
|
|
GetBytesPerPixel(params.pixel_format);
|
|
|
|
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
|
|
|
|
const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
|
|
glPixelStorei(GL_UNPACK_ALIGNMENT, align);
|
|
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
|
|
|
|
const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
|
|
if (tuple.compressed) {
|
|
switch (params.target) {
|
|
case SurfaceTarget::Texture2D:
|
|
glCompressedTextureSubImage2D(
|
|
texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
|
|
static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
|
|
&gl_buffer[mip_map][buffer_offset]);
|
|
break;
|
|
case SurfaceTarget::Texture3D:
|
|
glCompressedTextureSubImage3D(
|
|
texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
|
|
static_cast<GLsizei>(params.MipHeight(mip_map)),
|
|
static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
|
|
&gl_buffer[mip_map][buffer_offset]);
|
|
break;
|
|
case SurfaceTarget::Texture2DArray:
|
|
case SurfaceTarget::TextureCubeArray:
|
|
glCompressedTextureSubImage3D(
|
|
texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
|
|
static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
|
|
tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
|
|
break;
|
|
case SurfaceTarget::TextureCubemap: {
|
|
const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
|
|
for (std::size_t face = 0; face < params.depth; ++face) {
|
|
glCompressedTextureSubImage3D(
|
|
texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
|
|
static_cast<GLsizei>(params.MipWidth(mip_map)),
|
|
static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
|
|
layer_size, &gl_buffer[mip_map][buffer_offset]);
|
|
buffer_offset += layer_size;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
|
|
static_cast<u32>(params.target));
|
|
UNREACHABLE();
|
|
glCompressedTextureSubImage2D(
|
|
texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
|
|
static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
|
|
static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
|
|
}
|
|
} else {
|
|
switch (params.target) {
|
|
case SurfaceTarget::Texture1D:
|
|
glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
|
|
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
|
|
break;
|
|
case SurfaceTarget::Texture2D:
|
|
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
|
|
static_cast<GLsizei>(rect.GetWidth()),
|
|
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
|
|
&gl_buffer[mip_map][buffer_offset]);
|
|
break;
|
|
case SurfaceTarget::Texture3D:
|
|
glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
|
|
static_cast<GLsizei>(rect.GetWidth()),
|
|
static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
|
|
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
|
|
break;
|
|
case SurfaceTarget::Texture2DArray:
|
|
case SurfaceTarget::TextureCubeArray:
|
|
glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
|
|
static_cast<GLsizei>(rect.GetWidth()),
|
|
static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
|
|
tuple.type, &gl_buffer[mip_map][buffer_offset]);
|
|
break;
|
|
case SurfaceTarget::TextureCubemap: {
|
|
std::size_t start = buffer_offset;
|
|
for (std::size_t face = 0; face < params.depth; ++face) {
|
|
glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
|
|
static_cast<GLsizei>(rect.GetWidth()),
|
|
static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
|
|
tuple.type, &gl_buffer[mip_map][buffer_offset]);
|
|
buffer_offset += params.LayerSizeGL(mip_map);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
|
|
static_cast<u32>(params.target));
|
|
UNREACHABLE();
|
|
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
|
|
static_cast<GLsizei>(rect.GetWidth()),
|
|
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
|
|
&gl_buffer[mip_map][buffer_offset]);
|
|
}
|
|
}
|
|
|
|
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
|
}
|
|
|
|
void CachedSurface::EnsureTextureDiscrepantView() {
|
|
if (discrepant_view.handle != 0)
|
|
return;
|
|
|
|
const GLenum target{GetArrayDiscrepantTarget(params.target)};
|
|
ASSERT(target != GL_NONE);
|
|
|
|
const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
|
|
constexpr GLuint min_layer = 0;
|
|
constexpr GLuint min_level = 0;
|
|
|
|
glGenTextures(1, &discrepant_view.handle);
|
|
glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
|
|
params.max_mip_level, min_layer, num_layers);
|
|
ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
|
|
glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
|
|
reinterpret_cast<const GLint*>(swizzle.data()));
|
|
}
|
|
|
|
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
|
|
void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
|
|
MICROPROFILE_SCOPE(OpenGL_TextureUL);
|
|
|
|
for (u32 i = 0; i < params.max_mip_level; i++)
|
|
UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
|
|
}
|
|
|
|
void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
|
|
Tegra::Texture::SwizzleSource swizzle_y,
|
|
Tegra::Texture::SwizzleSource swizzle_z,
|
|
Tegra::Texture::SwizzleSource swizzle_w) {
|
|
const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
|
|
const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
|
|
const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
|
|
const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
|
|
if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
|
|
return;
|
|
}
|
|
swizzle = {new_x, new_y, new_z, new_w};
|
|
const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
|
|
glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
|
|
if (discrepant_view.handle != 0) {
|
|
glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
|
|
}
|
|
}
|
|
|
|
RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
|
|
: RasterizerCache{rasterizer} {
|
|
read_framebuffer.Create();
|
|
draw_framebuffer.Create();
|
|
copy_pbo.Create();
|
|
}
|
|
|
|
Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
|
|
const GLShader::SamplerEntry& entry) {
|
|
return GetSurface(SurfaceParams::CreateForTexture(config, entry));
|
|
}
|
|
|
|
Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
|
|
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
|
|
const auto& regs{gpu.regs};
|
|
|
|
if (!gpu.dirty_flags.zeta_buffer) {
|
|
return last_depth_buffer;
|
|
}
|
|
gpu.dirty_flags.zeta_buffer = false;
|
|
|
|
if (!regs.zeta.Address() || !regs.zeta_enable) {
|
|
return last_depth_buffer = {};
|
|
}
|
|
|
|
SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
|
|
regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format,
|
|
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
|
|
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
|
|
|
|
return last_depth_buffer = GetSurface(depth_params, preserve_contents);
|
|
}
|
|
|
|
Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
|
|
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
|
|
const auto& regs{gpu.regs};
|
|
|
|
if (!gpu.dirty_flags.color_buffer[index]) {
|
|
return current_color_buffers[index];
|
|
}
|
|
gpu.dirty_flags.color_buffer.reset(index);
|
|
|
|
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
|
|
|
|
if (index >= regs.rt_control.count) {
|
|
return current_color_buffers[index] = {};
|
|
}
|
|
|
|
if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
|
|
return current_color_buffers[index] = {};
|
|
}
|
|
|
|
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
|
|
|
|
return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
|
|
}
|
|
|
|
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
|
|
surface->LoadGLBuffer();
|
|
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
|
surface->MarkAsModified(false, *this);
|
|
surface->MarkForReload(false);
|
|
}
|
|
|
|
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
|
|
if (!params.IsValid()) {
|
|
return {};
|
|
}
|
|
|
|
// Look up surface in the cache based on address
|
|
Surface surface{TryGet(params.host_ptr)};
|
|
if (surface) {
|
|
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
|
|
// Use the cached surface as-is unless it's not synced with memory
|
|
if (surface->MustReload())
|
|
LoadSurface(surface);
|
|
return surface;
|
|
} else if (preserve_contents) {
|
|
// If surface parameters changed and we care about keeping the previous data, recreate
|
|
// the surface from the old one
|
|
Surface new_surface{RecreateSurface(surface, params)};
|
|
Unregister(surface);
|
|
Register(new_surface);
|
|
if (new_surface->IsUploaded()) {
|
|
RegisterReinterpretSurface(new_surface);
|
|
}
|
|
return new_surface;
|
|
} else {
|
|
// Delete the old surface before creating a new one to prevent collisions.
|
|
Unregister(surface);
|
|
}
|
|
}
|
|
|
|
// No cached surface found - get a new one
|
|
surface = GetUncachedSurface(params);
|
|
Register(surface);
|
|
|
|
// Only load surface from memory if we care about the contents
|
|
if (preserve_contents) {
|
|
LoadSurface(surface);
|
|
}
|
|
|
|
return surface;
|
|
}
|
|
|
|
Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {
|
|
Surface surface{TryGetReservedSurface(params)};
|
|
if (!surface) {
|
|
// No reserved surface available, create a new one and reserve it
|
|
surface = std::make_shared<CachedSurface>(params);
|
|
ReserveSurface(surface);
|
|
}
|
|
return surface;
|
|
}
|
|
|
|
void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
|
|
const Surface& dst_surface) {
|
|
const auto& init_params{src_surface->GetSurfaceParams()};
|
|
const auto& dst_params{dst_surface->GetSurfaceParams()};
|
|
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
|
|
GPUVAddr address{init_params.gpu_addr};
|
|
const std::size_t layer_size{dst_params.LayerMemorySize()};
|
|
for (u32 layer = 0; layer < dst_params.depth; layer++) {
|
|
for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
|
|
const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
|
|
const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
|
|
if (!copy) {
|
|
continue;
|
|
}
|
|
const auto& src_params{copy->GetSurfaceParams()};
|
|
const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
|
|
const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
|
|
|
|
glCopyImageSubData(copy->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0,
|
|
0, 0, dst_surface->Texture().handle,
|
|
SurfaceTargetToGL(dst_params.target), mipmap, 0, 0, layer, width,
|
|
height, 1);
|
|
}
|
|
address += layer_size;
|
|
}
|
|
|
|
dst_surface->MarkAsModified(true, *this);
|
|
}
|
|
|
|
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
|
|
const Common::Rectangle<u32>& src_rect,
|
|
const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
|
|
GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
|
|
std::size_t cubemap_face = 0) {
|
|
|
|
const auto& src_params{src_surface->GetSurfaceParams()};
|
|
const auto& dst_params{dst_surface->GetSurfaceParams()};
|
|
|
|
OpenGLState prev_state{OpenGLState::GetCurState()};
|
|
SCOPE_EXIT({ prev_state.Apply(); });
|
|
|
|
OpenGLState state;
|
|
state.draw.read_framebuffer = read_fb_handle;
|
|
state.draw.draw_framebuffer = draw_fb_handle;
|
|
state.Apply();
|
|
|
|
u32 buffers{};
|
|
|
|
if (src_params.type == SurfaceType::ColorTexture) {
|
|
switch (src_params.target) {
|
|
case SurfaceTarget::Texture2D:
|
|
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
|
|
GL_TEXTURE_2D, src_surface->Texture().handle, 0);
|
|
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
|
0, 0);
|
|
break;
|
|
case SurfaceTarget::TextureCubemap:
|
|
glFramebufferTexture2D(
|
|
GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
|
|
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
|
|
src_surface->Texture().handle, 0);
|
|
glFramebufferTexture2D(
|
|
GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
|
|
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
|
|
break;
|
|
case SurfaceTarget::Texture2DArray:
|
|
glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
|
|
src_surface->Texture().handle, 0, 0);
|
|
glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
|
|
break;
|
|
case SurfaceTarget::Texture3D:
|
|
glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
|
|
SurfaceTargetToGL(src_params.target),
|
|
src_surface->Texture().handle, 0, 0);
|
|
glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
|
|
SurfaceTargetToGL(src_params.target), 0, 0, 0);
|
|
break;
|
|
default:
|
|
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
|
|
GL_TEXTURE_2D, src_surface->Texture().handle, 0);
|
|
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
|
0, 0);
|
|
break;
|
|
}
|
|
|
|
switch (dst_params.target) {
|
|
case SurfaceTarget::Texture2D:
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
|
|
GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
|
0, 0);
|
|
break;
|
|
case SurfaceTarget::TextureCubemap:
|
|
glFramebufferTexture2D(
|
|
GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
|
|
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
|
|
dst_surface->Texture().handle, 0);
|
|
glFramebufferTexture2D(
|
|
GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
|
|
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
|
|
break;
|
|
case SurfaceTarget::Texture2DArray:
|
|
glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
|
|
dst_surface->Texture().handle, 0, 0);
|
|
glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
|
|
break;
|
|
|
|
case SurfaceTarget::Texture3D:
|
|
glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
|
|
SurfaceTargetToGL(dst_params.target),
|
|
dst_surface->Texture().handle, 0, 0);
|
|
glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
|
|
SurfaceTargetToGL(dst_params.target), 0, 0, 0);
|
|
break;
|
|
default:
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
|
|
GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
|
0, 0);
|
|
break;
|
|
}
|
|
|
|
buffers = GL_COLOR_BUFFER_BIT;
|
|
} else if (src_params.type == SurfaceType::Depth) {
|
|
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
|
|
GL_TEXTURE_2D, 0, 0);
|
|
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
|
|
src_surface->Texture().handle, 0);
|
|
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
|
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
|
|
GL_TEXTURE_2D, 0, 0);
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
|
|
dst_surface->Texture().handle, 0);
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
|
|
|
buffers = GL_DEPTH_BUFFER_BIT;
|
|
} else if (src_params.type == SurfaceType::DepthStencil) {
|
|
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
|
|
GL_TEXTURE_2D, 0, 0);
|
|
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
|
src_surface->Texture().handle, 0);
|
|
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
|
|
GL_TEXTURE_2D, 0, 0);
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
|
dst_surface->Texture().handle, 0);
|
|
|
|
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
|
}
|
|
|
|
glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
|
|
dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
|
|
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
|
|
|
|
return true;
|
|
}
|
|
|
|
void RasterizerCacheOpenGL::FermiCopySurface(
|
|
const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
|
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
|
|
const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
|
|
|
|
const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
|
|
const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
|
|
|
|
ASSERT(src_params.pixel_format == dst_params.pixel_format);
|
|
ASSERT(src_params.block_height == dst_params.block_height);
|
|
ASSERT(src_params.is_tiled == dst_params.is_tiled);
|
|
ASSERT(src_params.depth == dst_params.depth);
|
|
ASSERT(src_params.target == dst_params.target);
|
|
ASSERT(src_params.rt.index == dst_params.rt.index);
|
|
|
|
auto src_surface = GetSurface(src_params, true);
|
|
auto dst_surface = GetSurface(dst_params, true);
|
|
|
|
BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle,
|
|
draw_framebuffer.handle);
|
|
|
|
dst_surface->MarkAsModified(true, *this);
|
|
}
|
|
|
|
void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
|
|
const Surface& dst_surface) {
|
|
const auto& src_params{src_surface->GetSurfaceParams()};
|
|
const auto& dst_params{dst_surface->GetSurfaceParams()};
|
|
|
|
// Flush enough memory for both the source and destination surface
|
|
FlushRegion(ToCacheAddr(src_params.host_ptr),
|
|
std::max(src_params.MemorySize(), dst_params.MemorySize()));
|
|
|
|
LoadSurface(dst_surface);
|
|
}
|
|
|
|
Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
|
|
const SurfaceParams& new_params) {
|
|
// Verify surface is compatible for blitting
|
|
auto old_params{old_surface->GetSurfaceParams()};
|
|
|
|
// Get a new surface with the new parameters, and blit the previous surface to it
|
|
Surface new_surface{GetUncachedSurface(new_params)};
|
|
|
|
// With use_accurate_gpu_emulation enabled, do an accurate surface copy
|
|
if (Settings::values.use_accurate_gpu_emulation) {
|
|
AccurateCopySurface(old_surface, new_surface);
|
|
return new_surface;
|
|
}
|
|
|
|
const bool old_compressed =
|
|
GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed;
|
|
const bool new_compressed =
|
|
GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed;
|
|
const bool compatible_formats =
|
|
GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) &&
|
|
!(old_compressed || new_compressed);
|
|
// For compatible surfaces, we can just do fast glCopyImageSubData based copy
|
|
if (old_params.target == new_params.target && old_params.depth == new_params.depth &&
|
|
old_params.depth == 1 && compatible_formats) {
|
|
FastCopySurface(old_surface, new_surface);
|
|
return new_surface;
|
|
}
|
|
|
|
switch (new_params.target) {
|
|
case SurfaceTarget::Texture2D:
|
|
CopySurface(old_surface, new_surface, copy_pbo.handle);
|
|
break;
|
|
case SurfaceTarget::Texture3D:
|
|
AccurateCopySurface(old_surface, new_surface);
|
|
break;
|
|
case SurfaceTarget::TextureCubemap:
|
|
case SurfaceTarget::Texture2DArray:
|
|
case SurfaceTarget::TextureCubeArray:
|
|
if (compatible_formats)
|
|
FastLayeredCopySurface(old_surface, new_surface);
|
|
else {
|
|
AccurateCopySurface(old_surface, new_surface);
|
|
}
|
|
break;
|
|
default:
|
|
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
|
|
static_cast<u32>(new_params.target));
|
|
UNREACHABLE();
|
|
}
|
|
|
|
return new_surface;
|
|
}
|
|
|
|
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
|
|
return TryGet(host_ptr);
|
|
}
|
|
|
|
void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
|
|
const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())};
|
|
surface_reserve[surface_reserve_key] = surface;
|
|
}
|
|
|
|
Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params) {
|
|
const auto& surface_reserve_key{SurfaceReserveKey::Create(params)};
|
|
auto search{surface_reserve.find(surface_reserve_key)};
|
|
if (search != surface_reserve.end()) {
|
|
return search->second;
|
|
}
|
|
return {};
|
|
}
|
|
|
|
static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
|
|
u32 height) {
|
|
for (u32 i = 0; i < params.max_mip_level; i++) {
|
|
if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
|
|
return {i};
|
|
}
|
|
}
|
|
return {};
|
|
}
|
|
|
|
static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
|
|
const std::size_t size{params.LayerMemorySize()};
|
|
GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
|
|
for (u32 i = 0; i < params.depth; i++) {
|
|
if (start == addr) {
|
|
return {i};
|
|
}
|
|
start += size;
|
|
}
|
|
return {};
|
|
}
|
|
|
|
static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
|
|
const Surface blitted_surface) {
|
|
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
|
const auto& src_params = render_surface->GetSurfaceParams();
|
|
const std::size_t src_memory_size = src_params.size_in_bytes;
|
|
const std::optional<u32> level =
|
|
TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
|
|
if (level.has_value()) {
|
|
if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
|
|
src_params.height == dst_params.MipHeight(*level) &&
|
|
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
|
|
const std::optional<u32> slot =
|
|
TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
|
|
if (slot.has_value()) {
|
|
glCopyImageSubData(render_surface->Texture().handle,
|
|
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
|
|
blitted_surface->Texture().handle,
|
|
SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
|
|
dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
|
|
blitted_surface->MarkAsModified(true, cache);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
|
|
const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
|
|
const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
|
|
if (bound2 > bound1)
|
|
return true;
|
|
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
|
const auto& src_params = render_surface->GetSurfaceParams();
|
|
return (dst_params.component_type != src_params.component_type);
|
|
}
|
|
|
|
static bool IsReinterpretInvalidSecond(const Surface render_surface,
|
|
const Surface blitted_surface) {
|
|
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
|
const auto& src_params = render_surface->GetSurfaceParams();
|
|
return (dst_params.height > src_params.height && dst_params.width > src_params.width);
|
|
}
|
|
|
|
bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
|
|
Surface intersect) {
|
|
if (IsReinterpretInvalid(triggering_surface, intersect)) {
|
|
Unregister(intersect);
|
|
return false;
|
|
}
|
|
if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
|
|
if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
|
|
Unregister(intersect);
|
|
return false;
|
|
}
|
|
FlushObject(intersect);
|
|
FlushObject(triggering_surface);
|
|
intersect->MarkForReload(true);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void RasterizerCacheOpenGL::SignalPreDrawCall() {
|
|
if (texception && GLAD_GL_ARB_texture_barrier) {
|
|
glTextureBarrier();
|
|
}
|
|
texception = false;
|
|
}
|
|
|
|
void RasterizerCacheOpenGL::SignalPostDrawCall() {
|
|
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
|
|
if (current_color_buffers[i] != nullptr) {
|
|
Surface intersect =
|
|
CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
|
|
if (intersect != nullptr) {
|
|
PartialReinterpretSurface(current_color_buffers[i], intersect);
|
|
texception = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace OpenGL
|