1
0
Fork 0
forked from suyu/suyu

Change texture_cache chaching from GPUAddr to CacheAddr

This also reverses the changes to make invalidation and flushing through
the GPU address.
This commit is contained in:
Fernando Sahmkow 2019-05-07 17:30:36 -04:00 committed by ReinUsesLisp
parent b711cdce78
commit d86f9cd709
7 changed files with 60 additions and 101 deletions

View file

@ -69,7 +69,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
const u64 aligned_size{Common::AlignUp(size, page_size)}; const u64 aligned_size{Common::AlignUp(size, page_size)};
const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
rasterizer.FlushAndInvalidateRegionEx(gpu_addr, cache_addr, aligned_size); rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
UnmapRange(gpu_addr, aligned_size); UnmapRange(gpu_addr, aligned_size);
return gpu_addr; return gpu_addr;

View file

@ -49,10 +49,6 @@ public:
/// and invalidated /// and invalidated
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
virtual void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) = 0;
/// Attempt to use a faster method to perform a surface copy /// Attempt to use a faster method to perform a surface copy
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Tegra::Engines::Fermi2D::Regs::Surface& dst,

View file

@ -737,27 +737,11 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
buffer_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size);
} }
void RasterizerOpenGL::InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (!addr || !size) {
return;
}
texture_cache.InvalidateRegionEx(gpu_addr, size);
shader_cache.InvalidateRegion(addr, size);
global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
FlushRegion(addr, size); FlushRegion(addr, size);
InvalidateRegion(addr, size); InvalidateRegion(addr, size);
} }
void RasterizerOpenGL::FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) {
FlushRegion(addr, size);
InvalidateRegionEx(gpu_addr, addr, size);
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& src_rect,

View file

@ -64,9 +64,7 @@ public:
void FlushAll() override; void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override; void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override;
void InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size);
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& src_rect,

View file

@ -25,7 +25,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams&
u32 offset = 0; u32 offset = 0;
mipmap_offsets.resize(params.num_levels); mipmap_offsets.resize(params.num_levels);
mipmap_sizes.resize(params.num_levels); mipmap_sizes.resize(params.num_levels);
gpu_addr_end = gpu_addr + memory_size;
for (u32 i = 0; i < params.num_levels; i++) { for (u32 i = 0; i < params.num_levels; i++) {
mipmap_offsets[i] = offset; mipmap_offsets[i] = offset;
mipmap_sizes[i] = params.GetGuestMipmapSize(i); mipmap_sizes[i] = params.GetGuestMipmapSize(i);
@ -99,8 +98,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
} }
} }
void SurfaceBaseImpl::FlushBuffer(std::vector<u8>& staging_buffer) { void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
std::vector<u8>& staging_buffer) {
MICROPROFILE_SCOPE(GPU_Flush_Texture); MICROPROFILE_SCOPE(GPU_Flush_Texture);
auto host_ptr = memory_manager.GetPointer(gpu_addr);
if (params.is_tiled) { if (params.is_tiled) {
ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width);
for (u32 level = 0; level < params.num_levels; ++level) { for (u32 level = 0; level < params.num_levels; ++level) {

View file

@ -45,40 +45,40 @@ class SurfaceBaseImpl {
public: public:
void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector<u8>& staging_buffer); void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector<u8>& staging_buffer);
void FlushBuffer(std::vector<u8>& staging_buffer); void FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector<u8>& staging_buffer);
GPUVAddr GetGpuAddr() const { GPUVAddr GetGpuAddr() const {
return gpu_addr; return gpu_addr;
} }
GPUVAddr GetGpuAddrEnd() const { bool Overlaps(const CacheAddr start, const CacheAddr end) const {
return gpu_addr_end; return (cache_addr < end) && (cache_addr_end > start);
}
bool Overlaps(const GPUVAddr start, const GPUVAddr end) const {
return (gpu_addr < end) && (gpu_addr_end > start);
} }
// Use only when recycling a surface // Use only when recycling a surface
void SetGpuAddr(const GPUVAddr new_addr) { void SetGpuAddr(const GPUVAddr new_addr) {
gpu_addr = new_addr; gpu_addr = new_addr;
gpu_addr_end = new_addr + memory_size;
} }
VAddr GetCpuAddr() const { VAddr GetCpuAddr() const {
return gpu_addr; return cpu_addr;
} }
void SetCpuAddr(const VAddr new_addr) { void SetCpuAddr(const VAddr new_addr) {
cpu_addr = new_addr; cpu_addr = new_addr;
} }
u8* GetHostPtr() const { CacheAddr GetCacheAddr() const {
return host_ptr; return cache_addr;
} }
void SetHostPtr(u8* new_addr) { CacheAddr GetCacheAddrEnd() const {
host_ptr = new_addr; return cache_addr_end;
}
void SetCacheAddr(const CacheAddr new_addr) {
cache_addr = new_addr;
cache_addr_end = new_addr + memory_size;
} }
const SurfaceParams& GetSurfaceParams() const { const SurfaceParams& GetSurfaceParams() const {
@ -201,13 +201,13 @@ protected:
const SurfaceParams params; const SurfaceParams params;
GPUVAddr gpu_addr{}; GPUVAddr gpu_addr{};
GPUVAddr gpu_addr_end{};
std::vector<u32> mipmap_sizes; std::vector<u32> mipmap_sizes;
std::vector<u32> mipmap_offsets; std::vector<u32> mipmap_offsets;
const std::size_t layer_size; const std::size_t layer_size;
const std::size_t memory_size; const std::size_t memory_size;
const std::size_t host_memory_size; const std::size_t host_memory_size;
u8* host_ptr; CacheAddr cache_addr;
CacheAddr cache_addr_end{};
VAddr cpu_addr; VAddr cpu_addr;
private: private:

View file

@ -60,12 +60,6 @@ public:
} }
} }
void InvalidateRegionEx(GPUVAddr addr, std::size_t size) {
for (const auto& surface : GetSurfacesInRegionInner(addr, size)) {
Unregister(surface);
}
}
TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
const VideoCommon::Shader::Sampler& entry) { const VideoCommon::Shader::Sampler& entry) {
const auto gpu_addr{config.tic.Address()}; const auto gpu_addr{config.tic.Address()};
@ -154,9 +148,19 @@ public:
return GetSurface(gpu_addr, params, true).second; return GetSurface(gpu_addr, params, true).second;
} }
TSurface TryFindFramebufferSurface(const u8* host_ptr) const { TSurface TryFindFramebufferSurface(const u8* host_ptr) {
const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; const CacheAddr cache_addr = ToCacheAddr(host_ptr);
return it != registered_surfaces.end() ? *it->second.begin() : nullptr; if (!cache_addr) {
return nullptr;
}
const CacheAddr page = cache_addr >> registry_page_bits;
std::list<TSurface>& list = registry[page];
for (auto& s : list) {
if (s->GetCacheAddr() == cache_addr) {
return s;
}
}
return nullptr;
} }
u64 Tick() { u64 Tick() {
@ -181,30 +185,28 @@ protected:
void Register(TSurface surface) { void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr(); const GPUVAddr gpu_addr = surface->GetGpuAddr();
u8* host_ptr = memory_manager->GetPointer(gpu_addr); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr));
const std::size_t size = surface->GetSizeInBytes(); const std::size_t size = surface->GetSizeInBytes();
const std::optional<VAddr> cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); const std::optional<VAddr> cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr);
if (!host_ptr || !cpu_addr) { if (!cache_ptr || !cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr); gpu_addr);
return; return;
} }
surface->SetHostPtr(host_ptr); surface->SetCacheAddr(cache_ptr);
surface->SetCpuAddr(*cpu_addr); surface->SetCpuAddr(*cpu_addr);
registered_surfaces.add({GetInterval(host_ptr, size), {surface}});
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
RegisterInnerCache(surface); RegisterInnerCache(surface);
surface->MarkAsRegistered(true); surface->MarkAsRegistered(true);
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
} }
void Unregister(TSurface surface) { void Unregister(TSurface surface) {
if (surface->IsProtected()) if (surface->IsProtected())
return; return;
const GPUVAddr gpu_addr = surface->GetGpuAddr(); const GPUVAddr gpu_addr = surface->GetGpuAddr();
const void* host_ptr = surface->GetHostPtr(); const CacheAddr cache_ptr = surface->GetCacheAddr();
const std::size_t size = surface->GetSizeInBytes(); const std::size_t size = surface->GetSizeInBytes();
const VAddr cpu_addr = surface->GetCpuAddr(); const VAddr cpu_addr = surface->GetCpuAddr();
registered_surfaces.erase(GetInterval(host_ptr, size));
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
UnregisterInnerCache(surface); UnregisterInnerCache(surface);
surface->MarkAsRegistered(false); surface->MarkAsRegistered(false);
@ -280,7 +282,7 @@ private:
} }
} }
std::pair<TSurface, TView> RebuildMirage(TSurface current_surface, std::pair<TSurface, TView> RebuildSurface(TSurface current_surface,
const SurfaceParams& params) { const SurfaceParams& params) {
const auto gpu_addr = current_surface->GetGpuAddr(); const auto gpu_addr = current_surface->GetGpuAddr();
TSurface new_surface = GetUncachedSurface(gpu_addr, params); TSurface new_surface = GetUncachedSurface(gpu_addr, params);
@ -297,7 +299,7 @@ private:
const SurfaceParams& params) { const SurfaceParams& params) {
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
if (is_mirage) { if (is_mirage) {
return RebuildMirage(current_surface, params); return RebuildSurface(current_surface, params);
} }
const bool matches_target = current_surface->MatchTarget(params.target); const bool matches_target = current_surface->MatchTarget(params.target);
if (matches_target) { if (matches_target) {
@ -356,7 +358,7 @@ private:
const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto host_ptr{memory_manager->GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)}; const auto cache_addr{ToCacheAddr(host_ptr)};
const std::size_t candidate_size = params.GetGuestSizeInBytes(); const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
if (overlaps.empty()) { if (overlaps.empty()) {
return InitializeSurface(gpu_addr, params, preserve_contents); return InitializeSurface(gpu_addr, params, preserve_contents);
} }
@ -378,7 +380,7 @@ private:
if (s_result == MatchStructureResult::FullMatch) { if (s_result == MatchStructureResult::FullMatch) {
return ManageStructuralMatch(current_surface, params); return ManageStructuralMatch(current_surface, params);
} else { } else {
return RebuildMirage(current_surface, params); return RebuildSurface(current_surface, params);
} }
} }
if (current_surface->GetSizeInBytes() <= candidate_size) { if (current_surface->GetSizeInBytes() <= candidate_size) {
@ -429,58 +431,40 @@ private:
} }
staging_buffer.resize(surface->GetHostSizeInBytes()); staging_buffer.resize(surface->GetHostSizeInBytes());
surface->DownloadTexture(staging_buffer); surface->DownloadTexture(staging_buffer);
surface->FlushBuffer(staging_buffer); surface->FlushBuffer(*memory_manager, staging_buffer);
surface->MarkAsModified(false, Tick()); surface->MarkAsModified(false, Tick());
} }
std::vector<TSurface> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {
if (size == 0) {
return {};
}
const IntervalType interval{cache_addr, cache_addr + size};
std::vector<TSurface> surfaces;
for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) {
for (auto& s : pair.second) {
if (!s || !s->IsRegistered()) {
continue;
}
surfaces.push_back(s);
}
}
return surfaces;
}
void RegisterInnerCache(TSurface& surface) { void RegisterInnerCache(TSurface& surface) {
GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; CacheAddr start = surface->GetCacheAddr() >> registry_page_bits;
const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
while (start <= end) { while (start <= end) {
inner_cache[start].push_back(surface); registry[start].push_back(surface);
start++; start++;
} }
} }
void UnregisterInnerCache(TSurface& surface) { void UnregisterInnerCache(TSurface& surface) {
GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; CacheAddr start = surface->GetCacheAddr() >> registry_page_bits;
const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
while (start <= end) { while (start <= end) {
inner_cache[start].remove(surface); registry[start].remove(surface);
start++; start++;
} }
} }
std::vector<TSurface> GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
if (size == 0) { if (size == 0) {
return {}; return {};
} }
const GPUVAddr gpu_addr_end = gpu_addr + size; const CacheAddr cache_addr_end = cache_addr + size;
GPUVAddr start = gpu_addr >> inner_cache_page_bits; CacheAddr start = cache_addr >> registry_page_bits;
const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
std::vector<TSurface> surfaces; std::vector<TSurface> surfaces;
while (start <= end) { while (start <= end) {
std::list<TSurface>& list = inner_cache[start]; std::list<TSurface>& list = registry[start];
for (auto& s : list) { for (auto& s : list) {
if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) {
s->MarkAsPicked(true); s->MarkAsPicked(true);
surfaces.push_back(s); surfaces.push_back(s);
} }
@ -510,11 +494,6 @@ private:
return {}; return {};
} }
IntervalType GetInterval(const void* host_ptr, const std::size_t size) const {
const CacheAddr addr = ToCacheAddr(host_ptr);
return IntervalType::right_open(addr, addr + size);
}
struct RenderInfo { struct RenderInfo {
RenderTargetConfig config; RenderTargetConfig config;
TSurface target; TSurface target;
@ -531,11 +510,12 @@ private:
u64 ticks{}; u64 ticks{};
IntervalMap registered_surfaces; // The internal Cache is different for the Texture Cache. It's based on buckets
// of 1MB. This fits better for the purpose of this cache as textures are normaly
static constexpr u64 inner_cache_page_bits{20}; // large in size.
static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; static constexpr u64 registry_page_bits{20};
std::unordered_map<GPUVAddr, std::list<TSurface>> inner_cache; static constexpr u64 registry_page_size{1 << registry_page_bits};
std::unordered_map<CacheAddr, std::list<TSurface>> registry;
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and /// previously been used. This is to prevent surfaces from being constantly created and