suyu/src/video_core/texture_cache/texture_cache.h
Fernando Sahmkow cc81c0ce64 Texture_Cache: Redo invalid Surfaces handling.
This commit aims to redo the full setup of invalid textures and
guarantee correct behavior across backends in the case of finding one by
using black dummy textures that match the target of the expected
texture.
2019-11-20 14:59:35 -04:00

1049 lines
43 KiB
C++

// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
#include <set>
#include <tuple>
#include <unordered_map>
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/copy_params.h"
#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/surface_params.h"
#include "video_core/texture_cache/surface_view.h"
namespace Tegra::Texture {
struct FullTextureInfo;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace VideoCommon {
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView>
class TextureCache {
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
using IntervalType = typename IntervalMap::interval_type;
public:
void InvalidateRegion(CacheAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
for (const auto& surface : GetSurfacesInRegion(addr, size)) {
Unregister(surface);
}
}
/**
* Guarantees that rendertargets don't unregister themselves if the
* collide. Protection is currently only done on 3D slices.
*/
void GuardRenderTargets(bool new_guard) {
guard_render_targets = new_guard;
}
void GuardSamplers(bool new_guard) {
guard_samplers = new_guard;
}
void FlushRegion(CacheAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
auto surfaces = GetSurfacesInRegion(addr, size);
if (surfaces.empty()) {
return;
}
std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
return a->GetModificationTick() < b->GetModificationTick();
});
for (const auto& surface : surfaces) {
FlushSurface(surface);
}
}
TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Sampler& entry) {
std::lock_guard lock{mutex};
const auto gpu_addr{tic.Address()};
if (!gpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
if (guard_samplers) {
sampled_textures.push_back(surface);
}
return view;
}
TView GetImageSurface(const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Image& entry) {
std::lock_guard lock{mutex};
const auto gpu_addr{tic.Address()};
if (!gpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
if (guard_samplers) {
sampled_textures.push_back(surface);
}
return view;
}
bool TextureBarrier() {
const bool any_rt =
std::any_of(sampled_textures.begin(), sampled_textures.end(),
[](const auto& surface) { return surface->IsRenderTarget(); });
sampled_textures.clear();
return any_rt;
}
TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex};
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.depth_buffer) {
return depth_buffer.view;
}
maxwell3d.dirty.depth_buffer = false;
const auto& regs{maxwell3d.regs};
const auto gpu_addr{regs.zeta.Address()};
if (!gpu_addr || !regs.zeta_enable) {
SetEmptyDepthBuffer();
return {};
}
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
SetEmptyDepthBuffer();
return {};
}
const auto depth_params{SurfaceParams::CreateForDepthBuffer(
system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = surface_view.first;
depth_buffer.view = surface_view.second;
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
return surface_view.second;
}
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.render_target[index]) {
return render_targets[index].view;
}
maxwell3d.dirty.render_target[index] = false;
const auto& regs{maxwell3d.regs};
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
SetEmptyColorBuffer(index);
return {};
}
const auto& config{regs.rt[index]};
const auto gpu_addr{config.Address()};
if (!gpu_addr) {
SetEmptyColorBuffer(index);
return {};
}
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
SetEmptyColorBuffer(index);
return {};
}
auto surface_view =
GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents, true);
if (render_targets[index].target)
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
render_targets[index].target = surface_view.first;
render_targets[index].view = surface_view.second;
if (render_targets[index].target)
render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
return surface_view.second;
}
void MarkColorBufferInUse(std::size_t index) {
if (auto& render_target = render_targets[index].target) {
render_target->MarkAsModified(true, Tick());
}
}
void MarkDepthBufferInUse() {
if (depth_buffer.target) {
depth_buffer.target->MarkAsModified(true, Tick());
}
}
void SetEmptyDepthBuffer() {
if (depth_buffer.target == nullptr) {
return;
}
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = nullptr;
depth_buffer.view = nullptr;
}
void SetEmptyColorBuffer(std::size_t index) {
if (render_targets[index].target == nullptr) {
return;
}
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
render_targets[index].target = nullptr;
render_targets[index].view = nullptr;
}
void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const Tegra::Engines::Fermi2D::Config& copy_config) {
std::lock_guard lock{mutex};
SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
const GPUVAddr src_gpu_addr = src_config.Address();
const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)};
const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)};
const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)};
const auto src_cache_addr{ToCacheAddr(src_host_ptr)};
std::pair<TSurface, TView> dst_surface =
GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false);
std::pair<TSurface, TView> src_surface =
GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false);
ImageBlit(src_surface.second, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick());
}
TSurface TryFindFramebufferSurface(const u8* host_ptr) {
const CacheAddr cache_addr = ToCacheAddr(host_ptr);
if (!cache_addr) {
return nullptr;
}
const CacheAddr page = cache_addr >> registry_page_bits;
std::vector<TSurface>& list = registry[page];
for (auto& surface : list) {
if (surface->GetCacheAddr() == cache_addr) {
return surface;
}
}
return nullptr;
}
u64 Tick() {
return ++ticks;
}
protected:
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
: system{system}, rasterizer{rasterizer} {
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
SetEmptyColorBuffer(i);
}
SetEmptyDepthBuffer();
staging_cache.SetSize(2);
const auto make_siblings = [this](PixelFormat a, PixelFormat b) {
siblings_table[static_cast<std::size_t>(a)] = b;
siblings_table[static_cast<std::size_t>(b)] = a;
};
std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
make_siblings(PixelFormat::Z16, PixelFormat::R16U);
make_siblings(PixelFormat::Z32F, PixelFormat::R32F);
make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F);
sampled_textures.reserve(64);
}
~TextureCache() = default;
virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0;
virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface,
const CopyParams& copy_params) = 0;
virtual void ImageBlit(TView& src_view, TView& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
// Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
// and reading it from a separate buffer.
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) {
auto& maxwell3d = system.GPU().Maxwell3D();
const u32 index = surface->GetRenderTarget();
if (index == DEPTH_RT) {
maxwell3d.dirty.depth_buffer = true;
} else {
maxwell3d.dirty.render_target[index] = true;
}
maxwell3d.dirty.render_settings = true;
}
void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr();
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
const std::size_t size = surface->GetSizeInBytes();
const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_ptr || !cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr);
return;
}
const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
surface->MarkAsContinuous(continuous);
surface->SetCacheAddr(cache_ptr);
surface->SetCpuAddr(*cpu_addr);
RegisterInnerCache(surface);
surface->MarkAsRegistered(true);
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
}
void Unregister(TSurface surface) {
if (guard_render_targets && surface->IsProtected()) {
return;
}
if (!guard_render_targets && surface->IsRenderTarget()) {
ManageRenderTargetUnregister(surface);
}
const std::size_t size = surface->GetSizeInBytes();
const VAddr cpu_addr = surface->GetCpuAddr();
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
UnregisterInnerCache(surface);
surface->MarkAsRegistered(false);
ReserveSurface(surface->GetSurfaceParams(), surface);
}
TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
if (const auto surface = TryGetReservedSurface(params); surface) {
surface->SetGpuAddr(gpu_addr);
return surface;
}
// No reserved surface available, create a new one and reserve it
auto new_surface{CreateSurface(gpu_addr, params)};
return new_surface;
}
Core::System& system;
private:
enum class RecycleStrategy : u32 {
Ignore = 0,
Flush = 1,
BufferCopy = 3,
};
enum class DeductionType : u32 {
DeductionComplete,
DeductionIncomplete,
DeductionFailed,
};
struct Deduction {
DeductionType type{DeductionType::DeductionFailed};
TSurface surface{};
bool Failed() const {
return type == DeductionType::DeductionFailed;
}
bool Incomplete() const {
return type == DeductionType::DeductionIncomplete;
}
bool IsDepth() const {
return surface->GetSurfaceParams().IsPixelFormatZeta();
}
};
/**
* Takes care of selecting a proper strategy to deal with a texture recycle.
*
* @param overlaps The overlapping surfaces registered in the cache.
* @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface.
* @param untopological Indicates to the recycler that the texture has no way
* to match the overlaps due to topological reasons.
**/
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
if (Settings::values.use_accurate_gpu_emulation) {
return RecycleStrategy::Flush;
}
// 3D Textures decision
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
for (const auto& s : overlaps) {
const auto& s_params = s->GetSurfaceParams();
if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
}
// Untopological decision
if (untopological == MatchTopologyResult::CompressUnmatch) {
return RecycleStrategy::Flush;
}
if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
return RecycleStrategy::Flush;
}
return RecycleStrategy::Ignore;
}
/**
* Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
* strategies: Ignore and Flush.
*
* - Ignore: Just unregisters all the overlaps and loads the new texture.
* - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
*
* @param overlaps The overlapping surfaces registered in the cache.
* @param params The parameters for the new surface.
* @param gpu_addr The starting address of the new surface.
* @param preserve_contents Indicates that the new surface should be loaded from memory or left
* blank.
* @param untopological Indicates to the recycler that the texture has no way to match the
* overlaps due to topological reasons.
**/
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const GPUVAddr gpu_addr,
const bool preserve_contents,
const MatchTopologyResult untopological) {
const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
for (auto& surface : overlaps) {
Unregister(surface);
}
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
case RecycleStrategy::Ignore: {
return InitializeSurface(gpu_addr, params, do_load);
}
case RecycleStrategy::Flush: {
std::sort(overlaps.begin(), overlaps.end(),
[](const TSurface& a, const TSurface& b) -> bool {
return a->GetModificationTick() < b->GetModificationTick();
});
for (auto& surface : overlaps) {
FlushSurface(surface);
}
return InitializeSurface(gpu_addr, params, preserve_contents);
}
case RecycleStrategy::BufferCopy: {
auto new_surface = GetUncachedSurface(gpu_addr, params);
BufferCopy(overlaps[0], new_surface);
return {new_surface, new_surface->GetMainView()};
}
default: {
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
return InitializeSurface(gpu_addr, params, do_load);
}
}
}
/**
* Takes a single surface and recreates into another that may differ in
* format, target or width alignment.
*
* @param current_surface The registered surface in the cache which we want to convert.
* @param params The new surface params which we'll use to recreate the surface.
* @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
bool is_render) {
const auto gpu_addr = current_surface->GetGpuAddr();
const auto& cr_params = current_surface->GetSurfaceParams();
TSurface new_surface;
if (cr_params.pixel_format != params.pixel_format && !is_render &&
GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
SurfaceParams new_params = params;
new_params.pixel_format = cr_params.pixel_format;
new_params.type = cr_params.type;
new_surface = GetUncachedSurface(gpu_addr, new_params);
} else {
new_surface = GetUncachedSurface(gpu_addr, params);
}
const auto& final_params = new_surface->GetSurfaceParams();
if (cr_params.type != final_params.type) {
BufferCopy(current_surface, new_surface);
} else {
std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
for (auto& brick : bricks) {
ImageCopy(current_surface, new_surface, brick);
}
}
Unregister(current_surface);
Register(new_surface);
new_surface->MarkAsModified(current_surface->IsModified(), Tick());
return {new_surface, new_surface->GetMainView()};
}
/**
* Takes a single surface and checks with the new surface's params if it's an exact
* match, we return the main view of the registered surface. If its formats don't
* match, we rebuild the surface. We call this last method a `Mirage`. If formats
* match but the targets don't, we create an overview View of the registered surface.
*
* @param current_surface The registered surface in the cache which we want to convert.
* @param params The new surface params which we want to check.
* @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
const SurfaceParams& params, bool is_render) {
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
const bool matches_target = current_surface->MatchTarget(params.target);
const auto match_check = [&]() -> std::pair<TSurface, TView> {
if (matches_target) {
return {current_surface, current_surface->GetMainView()};
}
return {current_surface, current_surface->EmplaceOverview(params)};
};
if (!is_mirage) {
return match_check();
}
if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
return match_check();
}
return RebuildSurface(current_surface, params, is_render);
}
/**
* Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
* in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
* of the new surface, if they all match we end up recreating a surface for them,
* else we return nothing.
*
* @param overlaps The overlapping surfaces registered in the cache.
* @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface.
**/
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr) {
if (params.target == SurfaceTarget::Texture3D) {
return {};
}
bool modified = false;
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
u32 passed_tests = 0;
for (auto& surface : overlaps) {
const SurfaceParams& src_params = surface->GetSurfaceParams();
if (src_params.is_layered || src_params.num_levels > 1) {
// We send this cases to recycle as they are more complex to handle
return {};
}
const std::size_t candidate_size = surface->GetSizeInBytes();
auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
if (!mipmap_layer) {
continue;
}
const auto [layer, mipmap] = *mipmap_layer;
if (new_surface->GetMipmapSize(mipmap) != candidate_size) {
continue;
}
modified |= surface->IsModified();
// Now we got all the data set up
const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1);
passed_tests++;
ImageCopy(surface, new_surface, copy_params);
}
if (passed_tests == 0) {
return {};
// In Accurate GPU all tests should pass, else we recycle
} else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
return {};
}
for (const auto& surface : overlaps) {
Unregister(surface);
}
new_surface->MarkAsModified(modified, Tick());
Register(new_surface);
return {{new_surface, new_surface->GetMainView()}};
}
/**
* Gets the starting address and parameters of a candidate surface and tries
* to find a matching surface within the cache. This is done in 3 big steps:
*
* 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
*
* 2. Check if there are any overlaps at all, if there are none, we just load the texture from
* memory else we move to step 3.
*
* 3. Consists of figuring out the relationship between the candidate texture and the
* overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
* there's many, we just try to reconstruct a new surface out of them based on the
* candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
* have to check if the candidate is a view (layer/mipmap) of the overlap or if the
* registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
* a new surface.
*
* @param gpu_addr The starting address of the candidate surface.
* @param params The parameters on the candidate surface.
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
* @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,
const SurfaceParams& params, bool preserve_contents,
bool is_render) {
// Step 1
// Check Level 1 Cache for a fast structural match. If candidate surface
// matches at certain level we are pretty much done.
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
std::vector<TSurface> overlaps{current_surface};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
}
const auto struct_result = current_surface->MatchesStructure(params);
if (struct_result != MatchStructureResult::None &&
(params.target != SurfaceTarget::Texture3D ||
current_surface->MatchTarget(params.target))) {
if (struct_result == MatchStructureResult::FullMatch) {
return ManageStructuralMatch(current_surface, params, is_render);
} else {
return RebuildSurface(current_surface, params, is_render);
}
}
}
// Step 2
// Obtain all possible overlaps in the memory region
const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
// If none are found, we are done. we just load the surface and create it.
if (overlaps.empty()) {
return InitializeSurface(gpu_addr, params, preserve_contents);
}
// Step 3
// Now we need to figure the relationship between the texture and its overlaps
// we do a topological test to ensure we can find some relationship. If it fails
// immediately recycle the texture
for (const auto& surface : overlaps) {
const auto topological_result = surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
}
}
// Split cases between 1 overlap or many.
if (overlaps.size() == 1) {
TSurface current_surface = overlaps[0];
// First check if the surface is within the overlap. If not, it means
// two things either the candidate surface is a supertexture of the overlap
// or they don't match in any known way.
if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
if (current_surface->GetGpuAddr() == gpu_addr) {
std::optional<std::pair<TSurface, TView>> view =
TryReconstructSurface(overlaps, params, gpu_addr);
if (view) {
return *view;
}
}
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
}
// Now we check if the candidate is a mipmap/layer of the overlap
std::optional<TView> view =
current_surface->EmplaceView(params, gpu_addr, candidate_size);
if (view) {
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
if (is_mirage) {
// On a mirage view, we need to recreate the surface under this new view
// and then obtain a view again.
SurfaceParams new_params = current_surface->GetSurfaceParams();
const u32 wh = SurfaceParams::ConvertWidth(
new_params.width, new_params.pixel_format, params.pixel_format);
const u32 hh = SurfaceParams::ConvertHeight(
new_params.height, new_params.pixel_format, params.pixel_format);
new_params.width = wh;
new_params.height = hh;
new_params.pixel_format = params.pixel_format;
std::pair<TSurface, TView> pair =
RebuildSurface(current_surface, new_params, is_render);
std::optional<TView> mirage_view =
pair.first->EmplaceView(params, gpu_addr, candidate_size);
if (mirage_view)
return {pair.first, *mirage_view};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
}
return {current_surface, *view};
}
} else {
// If there are many overlaps, odds are they are subtextures of the candidate
// surface. We try to construct a new surface based on the candidate parameters,
// using the overlaps. If a single overlap fails, this will fail.
std::optional<std::pair<TSurface, TView>> view =
TryReconstructSurface(overlaps, params, gpu_addr);
if (view) {
return *view;
}
}
// We failed all the tests, recycle the overlaps into a new texture.
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
}
/**
* Gets the starting address and parameters of a candidate surface and tries to find a
* matching surface within the cache that's similar to it. If there are many textures
* or the texture found if entirely incompatible, it will fail. If no texture is found, the
* blit will be unsuccessful.
*
* @param gpu_addr The starting address of the candidate surface.
* @param params The parameters on the candidate surface.
**/
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
Deduction result{};
result.type = DeductionType::DeductionFailed;
return result;
}
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
Deduction result{};
result.type = DeductionType::DeductionFailed;
return result;
}
const auto struct_result = current_surface->MatchesStructure(params);
if (struct_result != MatchStructureResult::None &&
current_surface->MatchTarget(params.target)) {
Deduction result{};
result.type = DeductionType::DeductionComplete;
result.surface = current_surface;
return result;
}
}
const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
if (overlaps.empty()) {
Deduction result{};
result.type = DeductionType::DeductionIncomplete;
return result;
}
if (overlaps.size() > 1) {
Deduction result{};
result.type = DeductionType::DeductionFailed;
return result;
} else {
Deduction result{};
result.type = DeductionType::DeductionComplete;
result.surface = overlaps[0];
return result;
}
}
/**
* Gets a null surface based on a target texture.
* @param target The target of the null surface.
*/
TView GetNullSurface(SurfaceTarget target) {
const u32 i_target = static_cast<u32>(target);
if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) {
return it->second->GetMainView();
}
SurfaceParams params{};
params.target = target;
params.is_tiled = false;
params.srgb_conversion = false;
params.is_layered = false;
params.block_width = 0;
params.block_height = 0;
params.block_depth = 0;
params.tile_width_spacing = 1;
params.width = 1;
params.height = 1;
params.depth = 1;
params.pitch = 4;
params.num_levels = 1;
params.emulated_levels = 1;
params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F;
params.type = VideoCore::Surface::SurfaceType::ColorTexture;
auto surface = CreateSurface(0ULL, params);
invalid_memory.clear();
invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
surface->UploadTexture(invalid_memory);
surface->MarkAsModified(false, Tick());
invalid_cache.emplace(i_target, surface);
return surface->GetMainView();
}
/**
* Gets the a source and destination starting address and parameters,
* and tries to deduce if they are supposed to be depth textures. If so, their
* parameters are modified and fixed into so.
*
* @param src_params The parameters of the candidate surface.
* @param dst_params The parameters of the destination surface.
* @param src_gpu_addr The starting address of the candidate surface.
* @param dst_gpu_addr The starting address of the destination surface.
**/
void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
auto deduced_dst = DeduceSurface(src_gpu_addr, src_params);
if (deduced_src.Failed() || deduced_dst.Failed()) {
return;
}
const bool incomplete_src = deduced_src.Incomplete();
const bool incomplete_dst = deduced_dst.Incomplete();
if (incomplete_src && incomplete_dst) {
return;
}
const bool any_incomplete = incomplete_src || incomplete_dst;
if (!any_incomplete) {
if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
return;
}
} else {
if (incomplete_src && !(deduced_dst.IsDepth())) {
return;
}
if (incomplete_dst && !(deduced_src.IsDepth())) {
return;
}
}
const auto inherit_format = [](SurfaceParams& to, TSurface from) {
const SurfaceParams& params = from->GetSurfaceParams();
to.pixel_format = params.pixel_format;
to.type = params.type;
};
// Now we got the cases where one or both is Depth and the other is not known
if (!incomplete_src) {
inherit_format(src_params, deduced_src.surface);
} else {
inherit_format(src_params, deduced_dst.surface);
}
if (!incomplete_dst) {
inherit_format(dst_params, deduced_dst.surface);
} else {
inherit_format(dst_params, deduced_src.surface);
}
}
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
bool preserve_contents) {
auto new_surface{GetUncachedSurface(gpu_addr, params)};
Register(new_surface);
if (preserve_contents) {
LoadSurface(new_surface);
}
return {new_surface, new_surface->GetMainView()};
}
void LoadSurface(const TSurface& surface) {
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
surface->UploadTexture(staging_cache.GetBuffer(0));
surface->MarkAsModified(false, Tick());
}
void FlushSurface(const TSurface& surface) {
if (!surface->IsModified()) {
return;
}
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->DownloadTexture(staging_cache.GetBuffer(0));
surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
surface->MarkAsModified(false, Tick());
}
void RegisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr();
CacheAddr start = cache_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
l1_cache[cache_addr] = surface;
while (start <= end) {
registry[start].push_back(surface);
start++;
}
}
void UnregisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr();
CacheAddr start = cache_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
l1_cache.erase(cache_addr);
while (start <= end) {
auto& reg{registry[start]};
reg.erase(std::find(reg.begin(), reg.end(), surface));
start++;
}
}
std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
if (size == 0) {
return {};
}
const CacheAddr cache_addr_end = cache_addr + size;
CacheAddr start = cache_addr >> registry_page_bits;
const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
std::vector<TSurface> surfaces;
while (start <= end) {
std::vector<TSurface>& list = registry[start];
for (auto& surface : list) {
if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
surface->MarkAsPicked(true);
surfaces.push_back(surface);
}
}
start++;
}
for (auto& surface : surfaces) {
surface->MarkAsPicked(false);
}
return surfaces;
}
void ReserveSurface(const SurfaceParams& params, TSurface surface) {
surface_reserve[params].push_back(std::move(surface));
}
TSurface TryGetReservedSurface(const SurfaceParams& params) {
auto search{surface_reserve.find(params)};
if (search == surface_reserve.end()) {
return {};
}
for (auto& surface : search->second) {
if (!surface->IsRegistered()) {
return surface;
}
}
return {};
}
constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
return siblings_table[static_cast<std::size_t>(format)];
}
struct FramebufferTargetInfo {
TSurface target;
TView view;
};
VideoCore::RasterizerInterface& rasterizer;
FormatLookupTable format_lookup_table;
u64 ticks{};
// Guards the cache for protection conflicts.
bool guard_render_targets{};
bool guard_samplers{};
// The siblings table is for formats that can inter exchange with one another
// without causing issues. This is only valid when a conflict occurs on a non
// rendering use.
std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
// The internal Cache is different for the Texture Cache. It's based on buckets
// of 1MB. This fits better for the purpose of this cache as textures are normaly
// large in size.
static constexpr u64 registry_page_bits{20};
static constexpr u64 registry_page_size{1 << registry_page_bits};
std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
static constexpr u32 DEPTH_RT = 8;
static constexpr u32 NO_RT = 0xFFFFFFFF;
// The L1 Cache is used for fast texture lookup before checking the overlaps
// This avoids calculating size and other stuffs.
std::unordered_map<CacheAddr, TSurface> l1_cache;
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and
/// destroyed when used with different surface parameters.
std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve;
std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
render_targets;
FramebufferTargetInfo depth_buffer;
std::vector<TSurface> sampled_textures;
/// This cache stores null surfaces in order to be used as a placeholder
/// for invalid texture calls.
std::unordered_map<u32, TSurface> invalid_cache;
std::vector<u8> invalid_memory;
StagingCache staging_cache;
std::recursive_mutex mutex;
};
} // namespace VideoCommon