2019-04-24 21:35:54 +02:00
|
|
|
// Copyright 2019 yuzu Emulator Project
|
|
|
|
// Licensed under GPLv2 or any later version
|
|
|
|
// Refer to the license.txt file included.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2019-06-29 23:54:13 +02:00
|
|
|
#include <algorithm>
|
|
|
|
#include <array>
|
2019-04-24 21:35:54 +02:00
|
|
|
#include <memory>
|
2019-05-11 05:50:01 +02:00
|
|
|
#include <mutex>
|
2019-04-24 21:35:54 +02:00
|
|
|
#include <set>
|
|
|
|
#include <tuple>
|
|
|
|
#include <unordered_map>
|
2019-05-10 06:10:16 +02:00
|
|
|
#include <vector>
|
2019-04-24 21:35:54 +02:00
|
|
|
|
|
|
|
#include <boost/icl/interval_map.hpp>
|
|
|
|
#include <boost/range/iterator_range.hpp>
|
|
|
|
|
|
|
|
#include "common/assert.h"
|
|
|
|
#include "common/common_types.h"
|
2019-05-08 05:13:05 +02:00
|
|
|
#include "common/math_util.h"
|
2019-06-24 07:15:57 +02:00
|
|
|
#include "core/core.h"
|
2019-04-24 21:35:54 +02:00
|
|
|
#include "core/memory.h"
|
2019-06-21 03:22:20 +02:00
|
|
|
#include "core/settings.h"
|
2019-04-24 21:35:54 +02:00
|
|
|
#include "video_core/engines/fermi_2d.h"
|
|
|
|
#include "video_core/engines/maxwell_3d.h"
|
|
|
|
#include "video_core/gpu.h"
|
|
|
|
#include "video_core/memory_manager.h"
|
|
|
|
#include "video_core/rasterizer_interface.h"
|
|
|
|
#include "video_core/surface.h"
|
2019-05-07 16:57:16 +02:00
|
|
|
#include "video_core/texture_cache/copy_params.h"
|
2019-04-24 21:35:54 +02:00
|
|
|
#include "video_core/texture_cache/surface_base.h"
|
|
|
|
#include "video_core/texture_cache/surface_params.h"
|
|
|
|
#include "video_core/texture_cache/surface_view.h"
|
|
|
|
|
|
|
|
namespace Tegra::Texture {
|
|
|
|
struct FullTextureInfo;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace VideoCore {
|
|
|
|
class RasterizerInterface;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace VideoCommon {
|
|
|
|
|
2019-06-13 15:46:36 +02:00
|
|
|
using VideoCore::Surface::PixelFormat;
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
using VideoCore::Surface::SurfaceTarget;
|
|
|
|
using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
|
|
|
|
|
2019-04-25 18:41:57 +02:00
|
|
|
template <typename TSurface, typename TView>
|
2019-04-24 21:35:54 +02:00
|
|
|
class TextureCache {
|
2019-05-07 16:57:16 +02:00
|
|
|
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
|
2019-04-24 21:35:54 +02:00
|
|
|
using IntervalType = typename IntervalMap::interval_type;
|
|
|
|
|
|
|
|
public:
|
|
|
|
void InvalidateRegion(CacheAddr addr, std::size_t size) {
|
2019-05-11 05:50:01 +02:00
|
|
|
std::lock_guard lock{mutex};
|
|
|
|
|
2019-04-24 21:35:54 +02:00
|
|
|
for (const auto& surface : GetSurfacesInRegion(addr, size)) {
|
|
|
|
Unregister(surface);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-26 00:36:19 +02:00
|
|
|
/***
|
2019-05-14 04:59:18 +02:00
|
|
|
* `Guard` guarantees that rendertargets don't unregister themselves if the
|
|
|
|
* collide. Protection is currently only done on 3D slices.
|
2019-06-26 00:36:19 +02:00
|
|
|
***/
|
2019-06-15 19:22:57 +02:00
|
|
|
void GuardRenderTargets(bool new_guard) {
|
|
|
|
guard_render_targets = new_guard;
|
|
|
|
}
|
|
|
|
|
|
|
|
void GuardSamplers(bool new_guard) {
|
|
|
|
guard_samplers = new_guard;
|
2019-05-14 01:14:02 +02:00
|
|
|
}
|
|
|
|
|
2019-05-08 16:32:30 +02:00
|
|
|
void FlushRegion(CacheAddr addr, std::size_t size) {
|
2019-05-11 05:50:01 +02:00
|
|
|
std::lock_guard lock{mutex};
|
|
|
|
|
2019-05-08 16:32:30 +02:00
|
|
|
auto surfaces = GetSurfacesInRegion(addr, size);
|
|
|
|
if (surfaces.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
2019-06-29 22:29:39 +02:00
|
|
|
std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
|
|
|
|
return a->GetModificationTick() < b->GetModificationTick();
|
|
|
|
});
|
2019-05-08 16:32:30 +02:00
|
|
|
for (const auto& surface : surfaces) {
|
|
|
|
FlushSurface(surface);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-12 07:01:27 +02:00
|
|
|
TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
|
|
|
|
const VideoCommon::Shader::Sampler& entry) {
|
|
|
|
std::lock_guard lock{mutex};
|
|
|
|
const auto gpu_addr{tic.Address()};
|
|
|
|
if (!gpu_addr) {
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
const auto params{SurfaceParams::CreateForTexture(tic, entry)};
|
|
|
|
const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
|
|
|
|
if (guard_samplers) {
|
|
|
|
sampled_textures.push_back(surface);
|
|
|
|
}
|
|
|
|
return view;
|
|
|
|
}
|
|
|
|
|
2019-07-12 02:59:59 +02:00
|
|
|
TView GetImageSurface(const Tegra::Texture::TICEntry& tic,
|
2019-07-12 07:01:27 +02:00
|
|
|
const VideoCommon::Shader::Image& entry) {
|
2019-06-10 16:39:59 +02:00
|
|
|
std::lock_guard lock{mutex};
|
2019-07-12 02:59:59 +02:00
|
|
|
const auto gpu_addr{tic.Address()};
|
2019-04-24 21:35:54 +02:00
|
|
|
if (!gpu_addr) {
|
2019-04-25 18:41:57 +02:00
|
|
|
return {};
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
2019-07-12 02:59:59 +02:00
|
|
|
const auto params{SurfaceParams::CreateForImage(tic, entry)};
|
2019-06-30 01:10:31 +02:00
|
|
|
const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
|
2019-06-15 19:22:57 +02:00
|
|
|
if (guard_samplers) {
|
2019-06-30 01:10:31 +02:00
|
|
|
sampled_textures.push_back(surface);
|
2019-06-15 19:22:57 +02:00
|
|
|
}
|
2019-06-30 01:10:31 +02:00
|
|
|
return view;
|
2019-06-15 19:22:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool TextureBarrier() {
|
2019-06-30 01:10:31 +02:00
|
|
|
const bool any_rt =
|
|
|
|
std::any_of(sampled_textures.begin(), sampled_textures.end(),
|
|
|
|
[](const auto& surface) { return surface->IsRenderTarget(); });
|
|
|
|
sampled_textures.clear();
|
|
|
|
return any_rt;
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
TView GetDepthBufferSurface(bool preserve_contents) {
|
2019-06-10 16:39:59 +02:00
|
|
|
std::lock_guard lock{mutex};
|
2019-05-11 04:26:46 +02:00
|
|
|
auto& maxwell3d = system.GPU().Maxwell3D();
|
|
|
|
|
2019-07-10 21:38:31 +02:00
|
|
|
if (!maxwell3d.dirty.depth_buffer) {
|
2019-05-11 04:26:46 +02:00
|
|
|
return depth_buffer.view;
|
|
|
|
}
|
2019-07-10 21:38:31 +02:00
|
|
|
maxwell3d.dirty.depth_buffer = false;
|
2019-05-11 04:26:46 +02:00
|
|
|
|
|
|
|
const auto& regs{maxwell3d.regs};
|
2019-04-24 21:35:54 +02:00
|
|
|
const auto gpu_addr{regs.zeta.Address()};
|
|
|
|
if (!gpu_addr || !regs.zeta_enable) {
|
2019-05-11 04:26:46 +02:00
|
|
|
SetEmptyDepthBuffer();
|
2019-04-25 18:41:57 +02:00
|
|
|
return {};
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
const auto depth_params{SurfaceParams::CreateForDepthBuffer(
|
|
|
|
system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
|
|
|
|
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
|
|
|
|
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
|
2019-06-13 15:46:36 +02:00
|
|
|
auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
|
2019-05-07 16:57:16 +02:00
|
|
|
if (depth_buffer.target)
|
2019-07-14 23:42:39 +02:00
|
|
|
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
|
2019-05-11 04:26:46 +02:00
|
|
|
depth_buffer.target = surface_view.first;
|
|
|
|
depth_buffer.view = surface_view.second;
|
2019-05-07 16:57:16 +02:00
|
|
|
if (depth_buffer.target)
|
2019-07-14 23:42:39 +02:00
|
|
|
depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
|
2019-05-07 16:57:16 +02:00
|
|
|
return surface_view.second;
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
|
2019-06-10 16:39:59 +02:00
|
|
|
std::lock_guard lock{mutex};
|
2019-04-24 21:35:54 +02:00
|
|
|
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
|
2019-05-11 04:26:46 +02:00
|
|
|
auto& maxwell3d = system.GPU().Maxwell3D();
|
2019-07-10 21:38:31 +02:00
|
|
|
if (!maxwell3d.dirty.render_target[index]) {
|
2019-05-11 04:26:46 +02:00
|
|
|
return render_targets[index].view;
|
|
|
|
}
|
2019-07-10 21:38:31 +02:00
|
|
|
maxwell3d.dirty.render_target[index] = false;
|
2019-04-24 21:35:54 +02:00
|
|
|
|
2019-05-11 04:26:46 +02:00
|
|
|
const auto& regs{maxwell3d.regs};
|
2019-04-24 21:35:54 +02:00
|
|
|
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
|
|
|
|
regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
|
2019-05-07 16:57:16 +02:00
|
|
|
SetEmptyColorBuffer(index);
|
2019-04-25 18:41:57 +02:00
|
|
|
return {};
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
const auto& config{regs.rt[index]};
|
|
|
|
const auto gpu_addr{config.Address()};
|
2019-04-24 21:35:54 +02:00
|
|
|
if (!gpu_addr) {
|
2019-05-07 16:57:16 +02:00
|
|
|
SetEmptyColorBuffer(index);
|
2019-04-25 18:41:57 +02:00
|
|
|
return {};
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
|
2019-06-13 15:46:36 +02:00
|
|
|
preserve_contents, true);
|
2019-05-07 16:57:16 +02:00
|
|
|
if (render_targets[index].target)
|
2019-07-14 23:42:39 +02:00
|
|
|
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
|
2019-05-07 16:57:16 +02:00
|
|
|
render_targets[index].target = surface_view.first;
|
2019-05-11 04:26:46 +02:00
|
|
|
render_targets[index].view = surface_view.second;
|
2019-05-07 16:57:16 +02:00
|
|
|
if (render_targets[index].target)
|
2019-07-11 21:15:21 +02:00
|
|
|
render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
|
2019-05-07 16:57:16 +02:00
|
|
|
return surface_view.second;
|
|
|
|
}
|
|
|
|
|
|
|
|
void MarkColorBufferInUse(std::size_t index) {
|
2019-06-29 22:29:39 +02:00
|
|
|
if (auto& render_target = render_targets[index].target) {
|
|
|
|
render_target->MarkAsModified(true, Tick());
|
|
|
|
}
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
void MarkDepthBufferInUse() {
|
2019-06-29 22:29:39 +02:00
|
|
|
if (depth_buffer.target) {
|
2019-05-07 16:57:16 +02:00
|
|
|
depth_buffer.target->MarkAsModified(true, Tick());
|
2019-06-29 22:29:39 +02:00
|
|
|
}
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
void SetEmptyDepthBuffer() {
|
2019-06-26 00:36:19 +02:00
|
|
|
if (depth_buffer.target == nullptr) {
|
|
|
|
return;
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
2019-07-14 23:42:39 +02:00
|
|
|
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
|
2019-06-26 00:36:19 +02:00
|
|
|
depth_buffer.target = nullptr;
|
|
|
|
depth_buffer.view = nullptr;
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void SetEmptyColorBuffer(std::size_t index) {
|
2019-06-26 00:36:19 +02:00
|
|
|
if (render_targets[index].target == nullptr) {
|
|
|
|
return;
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
2019-07-14 23:42:39 +02:00
|
|
|
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
|
2019-06-26 00:36:19 +02:00
|
|
|
render_targets[index].target = nullptr;
|
|
|
|
render_targets[index].view = nullptr;
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
|
|
|
|
2019-05-08 05:13:05 +02:00
|
|
|
void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
|
|
|
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
|
2019-05-18 10:57:49 +02:00
|
|
|
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
2019-06-10 16:39:59 +02:00
|
|
|
std::lock_guard lock{mutex};
|
2019-09-27 23:54:58 +02:00
|
|
|
SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
|
|
|
|
SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
|
|
|
|
const GPUVAddr src_gpu_addr = src_config.Address();
|
|
|
|
const GPUVAddr dst_gpu_addr = dst_config.Address();
|
|
|
|
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
|
|
|
|
std::pair<TSurface, TView> dst_surface = GetSurface(dst_gpu_addr, dst_params, true, false);
|
|
|
|
std::pair<TSurface, TView> src_surface = GetSurface(src_gpu_addr, src_params, true, false);
|
2019-05-18 10:57:49 +02:00
|
|
|
ImageBlit(src_surface.second, dst_surface.second, copy_config);
|
|
|
|
dst_surface.first->MarkAsModified(true, Tick());
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 23:30:36 +02:00
|
|
|
TSurface TryFindFramebufferSurface(const u8* host_ptr) {
|
|
|
|
const CacheAddr cache_addr = ToCacheAddr(host_ptr);
|
|
|
|
if (!cache_addr) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
const CacheAddr page = cache_addr >> registry_page_bits;
|
2019-05-10 06:10:16 +02:00
|
|
|
std::vector<TSurface>& list = registry[page];
|
2019-06-26 01:35:08 +02:00
|
|
|
for (auto& surface : list) {
|
|
|
|
if (surface->GetCacheAddr() == cache_addr) {
|
|
|
|
return surface;
|
2019-05-07 23:30:36 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return nullptr;
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
u64 Tick() {
|
|
|
|
return ++ticks;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
2019-05-07 16:57:16 +02:00
|
|
|
: system{system}, rasterizer{rasterizer} {
|
|
|
|
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
|
|
|
|
SetEmptyColorBuffer(i);
|
|
|
|
}
|
2019-06-29 23:54:13 +02:00
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
SetEmptyDepthBuffer();
|
2019-05-21 17:24:20 +02:00
|
|
|
staging_cache.SetSize(2);
|
2019-06-29 23:54:13 +02:00
|
|
|
|
2019-06-29 22:29:39 +02:00
|
|
|
const auto make_siblings = [this](PixelFormat a, PixelFormat b) {
|
2019-06-29 23:54:13 +02:00
|
|
|
siblings_table[static_cast<std::size_t>(a)] = b;
|
|
|
|
siblings_table[static_cast<std::size_t>(b)] = a;
|
2019-06-29 22:29:39 +02:00
|
|
|
};
|
2019-06-29 23:54:13 +02:00
|
|
|
std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
|
2019-06-18 01:19:47 +02:00
|
|
|
make_siblings(PixelFormat::Z16, PixelFormat::R16U);
|
2019-06-14 21:41:28 +02:00
|
|
|
make_siblings(PixelFormat::Z32F, PixelFormat::R32F);
|
|
|
|
make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F);
|
2019-06-29 23:54:13 +02:00
|
|
|
|
2019-06-30 01:10:31 +02:00
|
|
|
sampled_textures.reserve(64);
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
2019-04-24 21:35:54 +02:00
|
|
|
|
|
|
|
~TextureCache() = default;
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0;
|
2019-04-24 21:35:54 +02:00
|
|
|
|
2019-06-02 04:15:55 +02:00
|
|
|
virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface,
|
2019-05-07 16:57:16 +02:00
|
|
|
const CopyParams& copy_params) = 0;
|
2019-04-24 21:35:54 +02:00
|
|
|
|
2019-06-02 04:15:55 +02:00
|
|
|
virtual void ImageBlit(TView& src_view, TView& dst_view,
|
2019-05-18 10:57:49 +02:00
|
|
|
const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
|
2019-05-08 05:13:05 +02:00
|
|
|
|
2019-06-02 01:12:00 +02:00
|
|
|
// Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
|
|
|
|
// and reading it from a sepparate buffer.
|
2019-06-02 04:15:55 +02:00
|
|
|
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
|
2019-06-02 01:12:00 +02:00
|
|
|
|
2019-07-11 21:15:21 +02:00
|
|
|
void ManageRenderTargetUnregister(TSurface& surface) {
|
|
|
|
auto& maxwell3d = system.GPU().Maxwell3D();
|
2019-07-14 23:42:39 +02:00
|
|
|
const u32 index = surface->GetRenderTarget();
|
|
|
|
if (index == DEPTH_RT) {
|
2019-07-10 21:38:31 +02:00
|
|
|
maxwell3d.dirty.depth_buffer = true;
|
2019-07-11 21:15:21 +02:00
|
|
|
} else {
|
2019-07-10 21:38:31 +02:00
|
|
|
maxwell3d.dirty.render_target[index] = true;
|
2019-07-11 21:15:21 +02:00
|
|
|
}
|
2019-07-10 21:38:31 +02:00
|
|
|
maxwell3d.dirty.render_settings = true;
|
2019-07-11 21:15:21 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
void Register(TSurface surface) {
|
|
|
|
const GPUVAddr gpu_addr = surface->GetGpuAddr();
|
2019-06-25 23:26:00 +02:00
|
|
|
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
|
2019-05-07 16:57:16 +02:00
|
|
|
const std::size_t size = surface->GetSizeInBytes();
|
2019-06-25 23:26:00 +02:00
|
|
|
const std::optional<VAddr> cpu_addr =
|
|
|
|
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
|
2019-05-07 23:30:36 +02:00
|
|
|
if (!cache_ptr || !cpu_addr) {
|
2019-05-07 16:57:16 +02:00
|
|
|
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
|
|
|
|
gpu_addr);
|
|
|
|
return;
|
|
|
|
}
|
2019-06-26 00:36:19 +02:00
|
|
|
const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
|
|
|
|
surface->MarkAsContinuous(continuous);
|
2019-05-07 23:30:36 +02:00
|
|
|
surface->SetCacheAddr(cache_ptr);
|
2019-05-07 16:57:16 +02:00
|
|
|
surface->SetCpuAddr(*cpu_addr);
|
|
|
|
RegisterInnerCache(surface);
|
|
|
|
surface->MarkAsRegistered(true);
|
2019-05-07 23:30:36 +02:00
|
|
|
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-11 05:42:08 +02:00
|
|
|
void Unregister(TSurface surface) {
|
2019-06-15 19:22:57 +02:00
|
|
|
if (guard_render_targets && surface->IsProtected()) {
|
2019-05-07 16:57:16 +02:00
|
|
|
return;
|
2019-05-08 23:45:59 +02:00
|
|
|
}
|
2019-07-11 21:15:21 +02:00
|
|
|
if (!guard_render_targets && surface->IsRenderTarget()) {
|
|
|
|
ManageRenderTargetUnregister(surface);
|
|
|
|
}
|
2019-05-07 16:57:16 +02:00
|
|
|
const std::size_t size = surface->GetSizeInBytes();
|
|
|
|
const VAddr cpu_addr = surface->GetCpuAddr();
|
|
|
|
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
|
|
|
|
UnregisterInnerCache(surface);
|
|
|
|
surface->MarkAsRegistered(false);
|
|
|
|
ReserveSurface(surface->GetSurfaceParams(), surface);
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
|
|
|
|
if (const auto surface = TryGetReservedSurface(params); surface) {
|
|
|
|
surface->SetGpuAddr(gpu_addr);
|
2019-04-24 21:35:54 +02:00
|
|
|
return surface;
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
2019-04-24 21:35:54 +02:00
|
|
|
// No reserved surface available, create a new one and reserve it
|
2019-05-07 16:57:16 +02:00
|
|
|
auto new_surface{CreateSurface(gpu_addr, params)};
|
2019-04-24 21:35:54 +02:00
|
|
|
return new_surface;
|
|
|
|
}
|
|
|
|
|
2019-05-18 10:57:49 +02:00
|
|
|
std::pair<TSurface, TView> GetFermiSurface(
|
|
|
|
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
|
2019-05-08 05:13:05 +02:00
|
|
|
SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config);
|
|
|
|
const GPUVAddr gpu_addr = config.Address();
|
2019-06-13 15:46:36 +02:00
|
|
|
return GetSurface(gpu_addr, params, true, false);
|
2019-05-08 05:13:05 +02:00
|
|
|
}
|
|
|
|
|
2019-04-24 21:35:54 +02:00
|
|
|
Core::System& system;
|
|
|
|
|
|
|
|
private:
|
2019-05-07 16:57:16 +02:00
|
|
|
enum class RecycleStrategy : u32 {
|
|
|
|
Ignore = 0,
|
|
|
|
Flush = 1,
|
|
|
|
BufferCopy = 3,
|
|
|
|
};
|
|
|
|
|
2019-09-27 23:54:58 +02:00
|
|
|
enum class DeductionType : u32 {
|
|
|
|
DeductionComplete,
|
|
|
|
DeductionIncomplete,
|
|
|
|
DeductionFailed,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Deduction {
|
|
|
|
DeductionType type{DeductionType::DeductionFailed};
|
|
|
|
TSurface surface{};
|
|
|
|
|
|
|
|
bool Failed() const {
|
|
|
|
return type == DeductionType::DeductionFailed;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Incomplete() const {
|
|
|
|
return type == DeductionType::DeductionIncomplete;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IsDepth() const {
|
|
|
|
return surface->GetSurfaceParams().IsPixelFormatZeta();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
/**
|
|
|
|
* `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
|
|
|
|
* @param overlaps, the overlapping surfaces registered in the cache.
|
|
|
|
* @param params, the paremeters on the new surface.
|
|
|
|
* @param gpu_addr, the starting address of the new surface.
|
|
|
|
* @param untopological, tells the recycler that the texture has no way to match the overlaps
|
|
|
|
* due to topological reasons.
|
|
|
|
**/
|
2019-05-07 16:57:16 +02:00
|
|
|
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
|
2019-05-24 21:34:31 +02:00
|
|
|
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
|
2019-05-14 03:35:32 +02:00
|
|
|
if (Settings::values.use_accurate_gpu_emulation) {
|
|
|
|
return RecycleStrategy::Flush;
|
|
|
|
}
|
2019-05-07 16:57:16 +02:00
|
|
|
// 3D Textures decision
|
|
|
|
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
|
|
|
|
return RecycleStrategy::Flush;
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
2019-05-07 16:57:16 +02:00
|
|
|
for (auto s : overlaps) {
|
|
|
|
const auto& s_params = s->GetSurfaceParams();
|
|
|
|
if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
|
|
|
|
return RecycleStrategy::Flush;
|
|
|
|
}
|
|
|
|
}
|
2019-05-09 00:27:29 +02:00
|
|
|
// Untopological decision
|
2019-05-24 21:34:31 +02:00
|
|
|
if (untopological == MatchTopologyResult::CompressUnmatch) {
|
|
|
|
return RecycleStrategy::Flush;
|
2019-05-09 00:27:29 +02:00
|
|
|
}
|
2019-06-13 22:41:16 +02:00
|
|
|
if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
|
|
|
|
return RecycleStrategy::Flush;
|
|
|
|
}
|
2019-05-07 16:57:16 +02:00
|
|
|
return RecycleStrategy::Ignore;
|
|
|
|
}
|
2019-04-24 21:35:54 +02:00
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
/**
|
|
|
|
* `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in
|
|
|
|
*the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the
|
|
|
|
*overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the
|
|
|
|
*new surface from that data.
|
|
|
|
* @param overlaps, the overlapping surfaces registered in the cache.
|
|
|
|
* @param params, the paremeters on the new surface.
|
|
|
|
* @param gpu_addr, the starting address of the new surface.
|
|
|
|
* @param preserve_contents, tells if the new surface should be loaded from meory or left blank
|
|
|
|
* @param untopological, tells the recycler that the texture has no way to match the overlaps
|
|
|
|
* due to topological reasons.
|
|
|
|
**/
|
2019-05-07 16:57:16 +02:00
|
|
|
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
|
|
|
|
const SurfaceParams& params, const GPUVAddr gpu_addr,
|
2019-05-09 00:27:29 +02:00
|
|
|
const bool preserve_contents,
|
2019-05-24 21:34:31 +02:00
|
|
|
const MatchTopologyResult untopological) {
|
2019-06-26 00:36:19 +02:00
|
|
|
const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
|
|
|
|
for (auto& surface : overlaps) {
|
2019-05-07 16:57:16 +02:00
|
|
|
Unregister(surface);
|
|
|
|
}
|
2019-05-14 03:35:32 +02:00
|
|
|
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
|
2019-05-07 16:57:16 +02:00
|
|
|
case RecycleStrategy::Ignore: {
|
2019-05-22 18:30:53 +02:00
|
|
|
return InitializeSurface(gpu_addr, params, do_load);
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
|
|
|
case RecycleStrategy::Flush: {
|
|
|
|
std::sort(overlaps.begin(), overlaps.end(),
|
|
|
|
[](const TSurface& a, const TSurface& b) -> bool {
|
|
|
|
return a->GetModificationTick() < b->GetModificationTick();
|
|
|
|
});
|
2019-06-26 00:36:19 +02:00
|
|
|
for (auto& surface : overlaps) {
|
2019-05-07 16:57:16 +02:00
|
|
|
FlushSurface(surface);
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
2019-05-07 16:57:16 +02:00
|
|
|
return InitializeSurface(gpu_addr, params, preserve_contents);
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
2019-06-13 22:41:16 +02:00
|
|
|
case RecycleStrategy::BufferCopy: {
|
|
|
|
auto new_surface = GetUncachedSurface(gpu_addr, params);
|
|
|
|
BufferCopy(overlaps[0], new_surface);
|
|
|
|
return {new_surface, new_surface->GetMainView()};
|
|
|
|
}
|
2019-05-07 16:57:16 +02:00
|
|
|
default: {
|
|
|
|
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
|
2019-05-22 18:30:53 +02:00
|
|
|
return InitializeSurface(gpu_addr, params, do_load);
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-04-24 21:35:54 +02:00
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
/**
|
|
|
|
* `RebuildSurface` this method takes a single surface and recreates into another that
|
|
|
|
* may differ in format, target or width alingment.
|
|
|
|
* @param current_surface, the registered surface in the cache which we want to convert.
|
|
|
|
* @param params, the new surface params which we'll use to recreate the surface.
|
|
|
|
**/
|
2019-06-13 16:39:45 +02:00
|
|
|
std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
|
|
|
|
bool is_render) {
|
2019-05-07 16:57:16 +02:00
|
|
|
const auto gpu_addr = current_surface->GetGpuAddr();
|
2019-06-02 01:12:00 +02:00
|
|
|
const auto& cr_params = current_surface->GetSurfaceParams();
|
2019-06-13 16:39:45 +02:00
|
|
|
TSurface new_surface;
|
|
|
|
if (cr_params.pixel_format != params.pixel_format && !is_render &&
|
2019-06-30 01:47:46 +02:00
|
|
|
GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
|
2019-06-13 16:39:45 +02:00
|
|
|
SurfaceParams new_params = params;
|
|
|
|
new_params.pixel_format = cr_params.pixel_format;
|
|
|
|
new_params.component_type = cr_params.component_type;
|
|
|
|
new_params.type = cr_params.type;
|
|
|
|
new_surface = GetUncachedSurface(gpu_addr, new_params);
|
|
|
|
} else {
|
|
|
|
new_surface = GetUncachedSurface(gpu_addr, params);
|
|
|
|
}
|
|
|
|
const auto& final_params = new_surface->GetSurfaceParams();
|
|
|
|
if (cr_params.type != final_params.type ||
|
|
|
|
(cr_params.component_type != final_params.component_type)) {
|
2019-06-02 01:12:00 +02:00
|
|
|
BufferCopy(current_surface, new_surface);
|
|
|
|
} else {
|
2019-06-13 16:39:45 +02:00
|
|
|
std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
|
2019-06-02 01:12:00 +02:00
|
|
|
for (auto& brick : bricks) {
|
|
|
|
ImageCopy(current_surface, new_surface, brick);
|
|
|
|
}
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
|
|
|
Unregister(current_surface);
|
|
|
|
Register(new_surface);
|
2019-05-09 00:27:29 +02:00
|
|
|
new_surface->MarkAsModified(current_surface->IsModified(), Tick());
|
2019-05-07 16:57:16 +02:00
|
|
|
return {new_surface, new_surface->GetMainView()};
|
|
|
|
}
|
2019-04-24 21:35:54 +02:00
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
/**
|
|
|
|
* `ManageStructuralMatch` this method takes a single surface and checks with the new surface's
|
|
|
|
* params if it's an exact match, we return the main view of the registered surface. If it's
|
|
|
|
* formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats
|
|
|
|
* match but the targets don't, we create an overview View of the registered surface.
|
|
|
|
* @param current_surface, the registered surface in the cache which we want to convert.
|
|
|
|
* @param params, the new surface params which we want to check.
|
|
|
|
**/
|
2019-05-07 16:57:16 +02:00
|
|
|
std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
|
2019-06-13 15:46:36 +02:00
|
|
|
const SurfaceParams& params, bool is_render) {
|
2019-05-07 16:57:16 +02:00
|
|
|
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
|
2019-06-13 15:46:36 +02:00
|
|
|
const bool matches_target = current_surface->MatchTarget(params.target);
|
2019-06-30 01:47:46 +02:00
|
|
|
const auto match_check = [&]() -> std::pair<TSurface, TView> {
|
2019-06-13 15:46:36 +02:00
|
|
|
if (matches_target) {
|
|
|
|
return {current_surface, current_surface->GetMainView()};
|
|
|
|
}
|
|
|
|
return {current_surface, current_surface->EmplaceOverview(params)};
|
2019-06-30 01:47:46 +02:00
|
|
|
};
|
2019-06-26 00:36:19 +02:00
|
|
|
if (!is_mirage) {
|
|
|
|
return match_check();
|
|
|
|
}
|
2019-06-30 01:47:46 +02:00
|
|
|
if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
|
2019-06-26 00:36:19 +02:00
|
|
|
return match_check();
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
2019-06-26 00:36:19 +02:00
|
|
|
return RebuildSurface(current_surface, params, is_render);
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
2019-04-24 21:35:54 +02:00
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
/**
|
|
|
|
* `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface
|
|
|
|
* matches the candidate in some way, we got no guarantess here. We try to see if the overlaps
|
|
|
|
* are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface
|
|
|
|
* for them, else we return nothing.
|
|
|
|
* @param overlaps, the overlapping surfaces registered in the cache.
|
|
|
|
* @param params, the paremeters on the new surface.
|
|
|
|
* @param gpu_addr, the starting address of the new surface.
|
|
|
|
**/
|
|
|
|
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
|
|
|
|
const SurfaceParams& params,
|
|
|
|
const GPUVAddr gpu_addr) {
|
2019-05-08 13:09:02 +02:00
|
|
|
if (params.target == SurfaceTarget::Texture3D) {
|
2019-05-07 16:57:16 +02:00
|
|
|
return {};
|
|
|
|
}
|
2019-05-09 00:27:29 +02:00
|
|
|
bool modified = false;
|
2019-05-07 16:57:16 +02:00
|
|
|
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
|
2019-05-14 06:55:32 +02:00
|
|
|
u32 passed_tests = 0;
|
2019-06-26 00:36:19 +02:00
|
|
|
for (auto& surface : overlaps) {
|
2019-05-07 16:57:16 +02:00
|
|
|
const SurfaceParams& src_params = surface->GetSurfaceParams();
|
|
|
|
if (src_params.is_layered || src_params.num_levels > 1) {
|
|
|
|
// We send this cases to recycle as they are more complex to handle
|
|
|
|
return {};
|
|
|
|
}
|
2019-05-09 00:27:29 +02:00
|
|
|
const std::size_t candidate_size = surface->GetSizeInBytes();
|
2019-05-08 02:28:31 +02:00
|
|
|
auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
|
2019-05-07 16:57:16 +02:00
|
|
|
if (!mipmap_layer) {
|
2019-05-14 06:55:32 +02:00
|
|
|
continue;
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
2019-06-26 00:36:19 +02:00
|
|
|
const auto [layer, mipmap] = *mipmap_layer;
|
2019-05-07 16:57:16 +02:00
|
|
|
if (new_surface->GetMipmapSize(mipmap) != candidate_size) {
|
2019-05-14 06:55:32 +02:00
|
|
|
continue;
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
2019-05-09 00:27:29 +02:00
|
|
|
modified |= surface->IsModified();
|
2019-05-07 16:57:16 +02:00
|
|
|
// Now we got all the data set up
|
2019-05-24 17:59:23 +02:00
|
|
|
const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
|
|
|
|
const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
|
|
|
|
const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1);
|
2019-05-14 06:55:32 +02:00
|
|
|
passed_tests++;
|
2019-05-07 16:57:16 +02:00
|
|
|
ImageCopy(surface, new_surface, copy_params);
|
|
|
|
}
|
2019-05-14 06:55:32 +02:00
|
|
|
if (passed_tests == 0) {
|
|
|
|
return {};
|
2019-06-26 00:36:19 +02:00
|
|
|
// In Accurate GPU all tests should pass, else we recycle
|
2019-05-14 06:55:32 +02:00
|
|
|
} else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
|
|
|
|
return {};
|
|
|
|
}
|
2019-05-07 16:57:16 +02:00
|
|
|
for (auto surface : overlaps) {
|
2019-05-11 05:42:08 +02:00
|
|
|
Unregister(surface);
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
2019-05-09 00:27:29 +02:00
|
|
|
new_surface->MarkAsModified(modified, Tick());
|
2019-05-07 16:57:16 +02:00
|
|
|
Register(new_surface);
|
|
|
|
return {{new_surface, new_surface->GetMainView()}};
|
|
|
|
}
|
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
/**
|
|
|
|
* `GetSurface` gets the starting address and parameters of a candidate surface and tries
|
|
|
|
* to find a matching surface within the cache. This is done in 3 big steps. The first is to
|
|
|
|
* check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
|
|
|
|
* Step 2 is checking if there are any overlaps at all, if none, we just load the texture from
|
|
|
|
* memory else we move to step 3. Step 3 consists on figuring the relationship between the
|
|
|
|
* candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many
|
|
|
|
* overlaps. If there's many, we just try to reconstruct a new surface out of them based on the
|
|
|
|
* candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to
|
|
|
|
* check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface
|
|
|
|
* is a mipmap/layer of the candidate. In this last case we reconstruct a new surface.
|
|
|
|
* @param gpu_addr, the starting address of the candidate surface.
|
|
|
|
* @param params, the paremeters on the candidate surface.
|
|
|
|
* @param preserve_contents, tells if the new surface should be loaded from meory or left blank.
|
|
|
|
**/
|
2019-05-07 16:57:16 +02:00
|
|
|
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
|
2019-06-13 15:46:36 +02:00
|
|
|
bool preserve_contents, bool is_render) {
|
2019-06-25 23:26:00 +02:00
|
|
|
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
2019-05-07 16:57:16 +02:00
|
|
|
const auto cache_addr{ToCacheAddr(host_ptr)};
|
2019-05-10 23:59:18 +02:00
|
|
|
|
2019-05-21 14:36:00 +02:00
|
|
|
// Step 0: guarantee a valid surface
|
|
|
|
if (!cache_addr) {
|
|
|
|
// Return a null surface if it's invalid
|
|
|
|
SurfaceParams new_params = params;
|
|
|
|
new_params.width = 1;
|
|
|
|
new_params.height = 1;
|
|
|
|
new_params.depth = 1;
|
|
|
|
new_params.block_height = 0;
|
|
|
|
new_params.block_depth = 0;
|
|
|
|
return InitializeSurface(gpu_addr, new_params, false);
|
|
|
|
}
|
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
// Step 1
|
|
|
|
// Check Level 1 Cache for a fast structural match. If candidate surface
|
|
|
|
// matches at certain level we are pretty much done.
|
2019-06-30 00:52:37 +02:00
|
|
|
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
|
2019-06-02 04:15:55 +02:00
|
|
|
TSurface& current_surface = iter->second;
|
2019-06-26 00:36:19 +02:00
|
|
|
const auto topological_result = current_surface->MatchesTopology(params);
|
2019-05-24 21:34:31 +02:00
|
|
|
if (topological_result != MatchTopologyResult::FullMatch) {
|
2019-05-10 23:59:18 +02:00
|
|
|
std::vector<TSurface> overlaps{current_surface};
|
2019-06-02 01:12:00 +02:00
|
|
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
|
|
|
topological_result);
|
2019-05-10 23:59:18 +02:00
|
|
|
}
|
2019-06-26 00:36:19 +02:00
|
|
|
const auto struct_result = current_surface->MatchesStructure(params);
|
|
|
|
if (struct_result != MatchStructureResult::None &&
|
2019-05-10 23:59:18 +02:00
|
|
|
(params.target != SurfaceTarget::Texture3D ||
|
|
|
|
current_surface->MatchTarget(params.target))) {
|
2019-06-26 00:36:19 +02:00
|
|
|
if (struct_result == MatchStructureResult::FullMatch) {
|
2019-06-13 15:46:36 +02:00
|
|
|
return ManageStructuralMatch(current_surface, params, is_render);
|
2019-05-10 23:59:18 +02:00
|
|
|
} else {
|
2019-06-13 16:39:45 +02:00
|
|
|
return RebuildSurface(current_surface, params, is_render);
|
2019-05-10 23:59:18 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
// Step 2
|
|
|
|
// Obtain all possible overlaps in the memory region
|
2019-05-07 16:57:16 +02:00
|
|
|
const std::size_t candidate_size = params.GetGuestSizeInBytes();
|
2019-05-07 23:30:36 +02:00
|
|
|
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
|
2019-05-08 23:45:59 +02:00
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
// If none are found, we are done. we just load the surface and create it.
|
2019-05-07 16:57:16 +02:00
|
|
|
if (overlaps.empty()) {
|
|
|
|
return InitializeSurface(gpu_addr, params, preserve_contents);
|
|
|
|
}
|
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
// Step 3
|
|
|
|
// Now we need to figure the relationship between the texture and its overlaps
|
|
|
|
// we do a topological test to ensure we can find some relationship. If it fails
|
|
|
|
// inmediatly recycle the texture
|
2019-06-26 00:36:19 +02:00
|
|
|
for (const auto& surface : overlaps) {
|
|
|
|
const auto topological_result = surface->MatchesTopology(params);
|
2019-05-24 21:34:31 +02:00
|
|
|
if (topological_result != MatchTopologyResult::FullMatch) {
|
2019-06-02 01:12:00 +02:00
|
|
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
|
|
|
topological_result);
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-14 04:59:18 +02:00
|
|
|
// Split cases between 1 overlap or many.
|
2019-05-07 16:57:16 +02:00
|
|
|
if (overlaps.size() == 1) {
|
|
|
|
TSurface current_surface = overlaps[0];
|
2019-05-14 04:59:18 +02:00
|
|
|
// First check if the surface is within the overlap. If not, it means
|
|
|
|
// two things either the candidate surface is a supertexture of the overlap
|
|
|
|
// or they don't match in any known way.
|
2019-05-08 23:45:59 +02:00
|
|
|
if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
|
2019-05-14 03:35:32 +02:00
|
|
|
if (current_surface->GetGpuAddr() == gpu_addr) {
|
|
|
|
std::optional<std::pair<TSurface, TView>> view =
|
2019-05-14 04:59:18 +02:00
|
|
|
TryReconstructSurface(overlaps, params, gpu_addr);
|
2019-06-26 00:36:19 +02:00
|
|
|
if (view) {
|
2019-05-14 03:35:32 +02:00
|
|
|
return *view;
|
|
|
|
}
|
|
|
|
}
|
2019-06-02 01:12:00 +02:00
|
|
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
|
|
|
MatchTopologyResult::FullMatch);
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
2019-05-14 04:59:18 +02:00
|
|
|
// Now we check if the candidate is a mipmap/layer of the overlap
|
2019-05-09 00:27:29 +02:00
|
|
|
std::optional<TView> view =
|
|
|
|
current_surface->EmplaceView(params, gpu_addr, candidate_size);
|
2019-06-26 00:36:19 +02:00
|
|
|
if (view) {
|
2019-05-07 16:57:16 +02:00
|
|
|
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
|
|
|
|
if (is_mirage) {
|
2019-05-18 10:57:49 +02:00
|
|
|
// On a mirage view, we need to recreate the surface under this new view
|
|
|
|
// and then obtain a view again.
|
|
|
|
SurfaceParams new_params = current_surface->GetSurfaceParams();
|
|
|
|
const u32 wh = SurfaceParams::ConvertWidth(
|
|
|
|
new_params.width, new_params.pixel_format, params.pixel_format);
|
|
|
|
const u32 hh = SurfaceParams::ConvertHeight(
|
|
|
|
new_params.height, new_params.pixel_format, params.pixel_format);
|
|
|
|
new_params.width = wh;
|
|
|
|
new_params.height = hh;
|
|
|
|
new_params.pixel_format = params.pixel_format;
|
2019-06-13 16:39:45 +02:00
|
|
|
std::pair<TSurface, TView> pair =
|
|
|
|
RebuildSurface(current_surface, new_params, is_render);
|
2019-05-18 10:57:49 +02:00
|
|
|
std::optional<TView> mirage_view =
|
|
|
|
pair.first->EmplaceView(params, gpu_addr, candidate_size);
|
|
|
|
if (mirage_view)
|
|
|
|
return {pair.first, *mirage_view};
|
2019-06-02 01:12:00 +02:00
|
|
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
|
|
|
MatchTopologyResult::FullMatch);
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
|
|
|
return {current_surface, *view};
|
|
|
|
}
|
|
|
|
} else {
|
2019-05-14 04:59:18 +02:00
|
|
|
// If there are many overlaps, odds are they are subtextures of the candidate
|
|
|
|
// surface. We try to construct a new surface based on the candidate parameters,
|
|
|
|
// using the overlaps. If a single overlap fails, this will fail.
|
2019-05-07 16:57:16 +02:00
|
|
|
std::optional<std::pair<TSurface, TView>> view =
|
2019-05-14 04:59:18 +02:00
|
|
|
TryReconstructSurface(overlaps, params, gpu_addr);
|
2019-06-26 00:36:19 +02:00
|
|
|
if (view) {
|
2019-05-07 16:57:16 +02:00
|
|
|
return *view;
|
|
|
|
}
|
|
|
|
}
|
2019-05-14 04:59:18 +02:00
|
|
|
// We failed all the tests, recycle the overlaps into a new texture.
|
2019-06-02 01:12:00 +02:00
|
|
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
|
|
|
MatchTopologyResult::FullMatch);
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-09-27 23:54:58 +02:00
|
|
|
/**
|
|
|
|
* `DeduceSurface` gets the starting address and parameters of a candidate surface and tries
|
|
|
|
* to find a matching surface within the cache that's similar to it. If there are many textures
|
|
|
|
* or the texture found if entirely incompatible, it will fail. If no texture is found, the
|
|
|
|
* blit will be unsuccessful.
|
|
|
|
* @param gpu_addr, the starting address of the candidate surface.
|
|
|
|
* @param params, the paremeters on the candidate surface.
|
|
|
|
**/
|
|
|
|
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
|
|
|
|
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
|
|
|
const auto cache_addr{ToCacheAddr(host_ptr)};
|
|
|
|
|
|
|
|
if (!cache_addr) {
|
|
|
|
Deduction result{};
|
|
|
|
result.type = DeductionType::DeductionFailed;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
|
|
|
|
TSurface& current_surface = iter->second;
|
|
|
|
const auto topological_result = current_surface->MatchesTopology(params);
|
|
|
|
if (topological_result != MatchTopologyResult::FullMatch) {
|
|
|
|
Deduction result{};
|
|
|
|
result.type = DeductionType::DeductionFailed;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
const auto struct_result = current_surface->MatchesStructure(params);
|
|
|
|
if (struct_result != MatchStructureResult::None &&
|
|
|
|
current_surface->MatchTarget(params.target)) {
|
|
|
|
Deduction result{};
|
|
|
|
result.type = DeductionType::DeductionComplete;
|
|
|
|
result.surface = current_surface;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const std::size_t candidate_size = params.GetGuestSizeInBytes();
|
|
|
|
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
|
|
|
|
|
|
|
|
if (overlaps.empty()) {
|
|
|
|
Deduction result{};
|
|
|
|
result.type = DeductionType::DeductionIncomplete;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (overlaps.size() > 1) {
|
|
|
|
Deduction result{};
|
|
|
|
result.type = DeductionType::DeductionFailed;
|
|
|
|
return result;
|
|
|
|
} else {
|
|
|
|
Deduction result{};
|
|
|
|
result.type = DeductionType::DeductionComplete;
|
|
|
|
result.surface = overlaps[0];
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* `DeduceBestBlit` gets the a source and destination starting address and parameters,
|
|
|
|
* and tries to deduce if they are supposed to be depth textures. If so, their
|
|
|
|
* parameters are modified and fixed into so.
|
|
|
|
* @param gpu_addr, the starting address of the candidate surface.
|
|
|
|
* @param params, the parameters on the candidate surface.
|
|
|
|
**/
|
|
|
|
void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
|
|
|
|
const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
|
|
|
|
auto deduc_src = DeduceSurface(src_gpu_addr, src_params);
|
|
|
|
auto deduc_dst = DeduceSurface(src_gpu_addr, src_params);
|
|
|
|
if (deduc_src.Failed() || deduc_dst.Failed()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const bool incomplete_src = deduc_src.Incomplete();
|
|
|
|
const bool incomplete_dst = deduc_dst.Incomplete();
|
|
|
|
|
|
|
|
if (incomplete_src && incomplete_dst) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const bool any_incomplete = incomplete_src || incomplete_dst;
|
|
|
|
|
|
|
|
if (!any_incomplete && !(deduc_src.IsDepth() && deduc_dst.IsDepth())) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (incomplete_src && !(deduc_dst.IsDepth())) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (incomplete_dst && !(deduc_src.IsDepth())) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto inherit_format = ([](SurfaceParams& to, TSurface from) {
|
|
|
|
const SurfaceParams& params = from->GetSurfaceParams();
|
|
|
|
to.pixel_format = params.pixel_format;
|
|
|
|
to.component_type = params.component_type;
|
|
|
|
to.type = params.type;
|
|
|
|
});
|
|
|
|
// Now we got the cases where one or both is Depth and the other is not known
|
|
|
|
if (!incomplete_src) {
|
|
|
|
inherit_format(src_params, deduc_src.surface);
|
|
|
|
} else {
|
|
|
|
inherit_format(src_params, deduc_dst.surface);
|
|
|
|
}
|
|
|
|
if (!incomplete_dst) {
|
|
|
|
inherit_format(dst_params, deduc_dst.surface);
|
|
|
|
} else {
|
|
|
|
inherit_format(dst_params, deduc_src.surface);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
|
|
|
|
bool preserve_contents) {
|
|
|
|
auto new_surface{GetUncachedSurface(gpu_addr, params)};
|
|
|
|
Register(new_surface);
|
2019-04-24 21:35:54 +02:00
|
|
|
if (preserve_contents) {
|
2019-04-25 18:41:57 +02:00
|
|
|
LoadSurface(new_surface);
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
2019-05-07 16:57:16 +02:00
|
|
|
return {new_surface, new_surface->GetMainView()};
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
void LoadSurface(const TSurface& surface) {
|
2019-05-21 17:24:20 +02:00
|
|
|
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
|
2019-06-25 23:26:00 +02:00
|
|
|
surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
|
2019-05-21 17:24:20 +02:00
|
|
|
surface->UploadTexture(staging_cache.GetBuffer(0));
|
2019-05-07 16:57:16 +02:00
|
|
|
surface->MarkAsModified(false, Tick());
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
void FlushSurface(const TSurface& surface) {
|
2019-04-24 21:35:54 +02:00
|
|
|
if (!surface->IsModified()) {
|
2019-04-25 18:41:57 +02:00
|
|
|
return;
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
2019-05-21 17:24:20 +02:00
|
|
|
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
|
|
|
|
surface->DownloadTexture(staging_cache.GetBuffer(0));
|
2019-06-25 23:26:00 +02:00
|
|
|
surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
|
2019-05-07 16:57:16 +02:00
|
|
|
surface->MarkAsModified(false, Tick());
|
2019-04-24 21:35:54 +02:00
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
void RegisterInnerCache(TSurface& surface) {
|
2019-05-10 23:59:18 +02:00
|
|
|
const CacheAddr cache_addr = surface->GetCacheAddr();
|
|
|
|
CacheAddr start = cache_addr >> registry_page_bits;
|
2019-05-07 23:30:36 +02:00
|
|
|
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
|
2019-05-10 23:59:18 +02:00
|
|
|
l1_cache[cache_addr] = surface;
|
2019-05-07 16:57:16 +02:00
|
|
|
while (start <= end) {
|
2019-05-07 23:30:36 +02:00
|
|
|
registry[start].push_back(surface);
|
2019-05-07 16:57:16 +02:00
|
|
|
start++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void UnregisterInnerCache(TSurface& surface) {
|
2019-05-10 23:59:18 +02:00
|
|
|
const CacheAddr cache_addr = surface->GetCacheAddr();
|
|
|
|
CacheAddr start = cache_addr >> registry_page_bits;
|
2019-05-07 23:30:36 +02:00
|
|
|
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
|
2019-05-10 23:59:18 +02:00
|
|
|
l1_cache.erase(cache_addr);
|
2019-05-07 16:57:16 +02:00
|
|
|
while (start <= end) {
|
2019-05-10 06:10:16 +02:00
|
|
|
auto& reg{registry[start]};
|
|
|
|
reg.erase(std::find(reg.begin(), reg.end(), surface));
|
2019-05-07 16:57:16 +02:00
|
|
|
start++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-07 23:30:36 +02:00
|
|
|
std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
|
2019-05-07 16:57:16 +02:00
|
|
|
if (size == 0) {
|
|
|
|
return {};
|
|
|
|
}
|
2019-05-07 23:30:36 +02:00
|
|
|
const CacheAddr cache_addr_end = cache_addr + size;
|
|
|
|
CacheAddr start = cache_addr >> registry_page_bits;
|
|
|
|
const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
|
2019-05-07 16:57:16 +02:00
|
|
|
std::vector<TSurface> surfaces;
|
|
|
|
while (start <= end) {
|
2019-05-10 06:10:16 +02:00
|
|
|
std::vector<TSurface>& list = registry[start];
|
2019-06-26 00:36:19 +02:00
|
|
|
for (auto& surface : list) {
|
|
|
|
if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
|
|
|
|
surface->MarkAsPicked(true);
|
|
|
|
surfaces.push_back(surface);
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
start++;
|
|
|
|
}
|
2019-06-26 00:36:19 +02:00
|
|
|
for (auto& surface : surfaces) {
|
|
|
|
surface->MarkAsPicked(false);
|
2019-05-07 16:57:16 +02:00
|
|
|
}
|
|
|
|
return surfaces;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReserveSurface(const SurfaceParams& params, TSurface surface) {
|
2019-04-24 21:35:54 +02:00
|
|
|
surface_reserve[params].push_back(std::move(surface));
|
|
|
|
}
|
|
|
|
|
2019-05-07 16:57:16 +02:00
|
|
|
TSurface TryGetReservedSurface(const SurfaceParams& params) {
|
2019-04-24 21:35:54 +02:00
|
|
|
auto search{surface_reserve.find(params)};
|
|
|
|
if (search == surface_reserve.end()) {
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
for (auto& surface : search->second) {
|
|
|
|
if (!surface->IsRegistered()) {
|
|
|
|
return surface;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2019-06-30 01:47:46 +02:00
|
|
|
constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
|
|
|
|
return siblings_table[static_cast<std::size_t>(format)];
|
|
|
|
}
|
|
|
|
|
2019-05-11 04:26:46 +02:00
|
|
|
struct FramebufferTargetInfo {
|
2019-05-07 16:57:16 +02:00
|
|
|
TSurface target;
|
|
|
|
TView view;
|
|
|
|
};
|
|
|
|
|
2019-04-24 21:35:54 +02:00
|
|
|
VideoCore::RasterizerInterface& rasterizer;
|
|
|
|
|
|
|
|
u64 ticks{};
|
|
|
|
|
2019-05-14 01:14:02 +02:00
|
|
|
// Guards the cache for protection conflicts.
|
2019-06-15 19:22:57 +02:00
|
|
|
bool guard_render_targets{};
|
|
|
|
bool guard_samplers{};
|
2019-05-14 01:14:02 +02:00
|
|
|
|
2019-06-14 21:41:28 +02:00
|
|
|
// The siblings table is for formats that can inter exchange with one another
|
|
|
|
// without causing issues. This is only valid when a conflict occurs on a non
|
|
|
|
// rendering use.
|
2019-06-29 23:54:13 +02:00
|
|
|
std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
|
2019-06-13 15:46:36 +02:00
|
|
|
|
2019-05-07 23:30:36 +02:00
|
|
|
// The internal Cache is different for the Texture Cache. It's based on buckets
|
|
|
|
// of 1MB. This fits better for the purpose of this cache as textures are normaly
|
|
|
|
// large in size.
|
|
|
|
static constexpr u64 registry_page_bits{20};
|
|
|
|
static constexpr u64 registry_page_size{1 << registry_page_bits};
|
2019-05-10 06:10:16 +02:00
|
|
|
std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
|
2019-05-07 16:57:16 +02:00
|
|
|
|
2019-07-14 23:42:39 +02:00
|
|
|
static constexpr u32 DEPTH_RT = 8;
|
|
|
|
static constexpr u32 NO_RT = 0xFFFFFFFF;
|
|
|
|
|
2019-05-10 23:59:18 +02:00
|
|
|
// The L1 Cache is used for fast texture lookup before checking the overlaps
|
|
|
|
// This avoids calculating size and other stuffs.
|
|
|
|
std::unordered_map<CacheAddr, TSurface> l1_cache;
|
|
|
|
|
2019-04-24 21:35:54 +02:00
|
|
|
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
|
|
|
|
/// previously been used. This is to prevent surfaces from being constantly created and
|
|
|
|
/// destroyed when used with different surface parameters.
|
2019-05-10 06:10:16 +02:00
|
|
|
std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve;
|
2019-05-11 04:26:46 +02:00
|
|
|
std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
|
|
|
|
render_targets;
|
|
|
|
FramebufferTargetInfo depth_buffer;
|
2019-05-07 16:57:16 +02:00
|
|
|
|
2019-06-30 01:10:31 +02:00
|
|
|
std::vector<TSurface> sampled_textures;
|
2019-06-15 19:22:57 +02:00
|
|
|
|
2019-05-21 17:24:20 +02:00
|
|
|
StagingCache staging_cache;
|
2019-05-11 05:50:01 +02:00
|
|
|
std::recursive_mutex mutex;
|
2019-04-24 21:35:54 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace VideoCommon
|