Texture Cache: Initial Implementation of Sparse Textures.
This commit is contained in:
parent
eb0e10cff2
commit
38165fb7e3
12 changed files with 310 additions and 23 deletions
|
@ -127,8 +127,13 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
|
||||||
|
|
||||||
//// Lock the new page
|
//// Lock the new page
|
||||||
// TryLockPage(page_entry, size);
|
// TryLockPage(page_entry, size);
|
||||||
|
auto& current_page = page_table[PageEntryIndex(gpu_addr)];
|
||||||
|
if (current_page.IsValid() != page_entry.IsValid() ||
|
||||||
|
current_page.ToAddress() != page_entry.ToAddress()) {
|
||||||
|
rasterizer->ModifyGPUMemory(gpu_addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
page_table[PageEntryIndex(gpu_addr)] = page_entry;
|
current_page = page_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
|
std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
|
||||||
|
|
|
@ -87,6 +87,9 @@ public:
|
||||||
/// Unmap memory range
|
/// Unmap memory range
|
||||||
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
|
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
|
/// Unmap memory range
|
||||||
|
virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||||
/// and invalidated
|
/// and invalidated
|
||||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||||
|
|
|
@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||||
shader_cache.OnCPUWrite(addr, size);
|
shader_cache.OnCPUWrite(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
|
texture_cache.UnmapGPUMemory(addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||||
if (!gpu.IsAsync()) {
|
if (!gpu.IsAsync()) {
|
||||||
gpu_memory.Write<u32>(addr, value);
|
gpu_memory.Write<u32>(addr, value);
|
||||||
|
|
|
@ -80,6 +80,7 @@ public:
|
||||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
void SyncGuestHost() override;
|
void SyncGuestHost() override;
|
||||||
void UnmapMemory(VAddr addr, u64 size) override;
|
void UnmapMemory(VAddr addr, u64 size) override;
|
||||||
|
void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
|
||||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||||
void SignalSyncPoint(u32 value) override;
|
void SignalSyncPoint(u32 value) override;
|
||||||
void ReleaseFences() override;
|
void ReleaseFences() override;
|
||||||
|
|
|
@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
|
||||||
pipeline_cache.OnCPUWrite(addr, size);
|
pipeline_cache.OnCPUWrite(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
|
texture_cache.UnmapGPUMemory(addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
|
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||||
if (!gpu.IsAsync()) {
|
if (!gpu.IsAsync()) {
|
||||||
gpu_memory.Write<u32>(addr, value);
|
gpu_memory.Write<u32>(addr, value);
|
||||||
|
|
|
@ -72,6 +72,7 @@ public:
|
||||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
void SyncGuestHost() override;
|
void SyncGuestHost() override;
|
||||||
void UnmapMemory(VAddr addr, u64 size) override;
|
void UnmapMemory(VAddr addr, u64 size) override;
|
||||||
|
void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
|
||||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||||
void SignalSyncPoint(u32 value) override;
|
void SignalSyncPoint(u32 value) override;
|
||||||
void ReleaseFences() override;
|
void ReleaseFences() override;
|
||||||
|
|
|
@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
|
||||||
|
: gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
|
||||||
|
|
||||||
std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
|
std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
|
||||||
if (other_addr < gpu_addr) {
|
if (other_addr < gpu_addr) {
|
||||||
// Subresource address can't be lower than the base
|
// Subresource address can't be lower than the base
|
||||||
|
|
|
@ -57,6 +57,12 @@ struct ImageBase {
|
||||||
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
|
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
|
||||||
|
const VAddr overlap_end = overlap_gpu_addr + overlap_size;
|
||||||
|
const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes;
|
||||||
|
return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
|
||||||
|
}
|
||||||
|
|
||||||
void CheckBadOverlapState();
|
void CheckBadOverlapState();
|
||||||
void CheckAliasState();
|
void CheckAliasState();
|
||||||
|
|
||||||
|
@ -84,6 +90,8 @@ struct ImageBase {
|
||||||
|
|
||||||
std::vector<AliasedImage> aliased_images;
|
std::vector<AliasedImage> aliased_images;
|
||||||
std::vector<ImageId> overlapping_images;
|
std::vector<ImageId> overlapping_images;
|
||||||
|
ImageMapId map_view_id{};
|
||||||
|
bool is_sparse{};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ImageAllocBase {
|
struct ImageAllocBase {
|
||||||
|
|
|
@ -152,6 +152,9 @@ public:
|
||||||
/// Remove images in a region
|
/// Remove images in a region
|
||||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||||
|
|
||||||
|
/// Remove images in a region
|
||||||
|
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
|
||||||
|
|
||||||
/// Blit an image with the given parameters
|
/// Blit an image with the given parameters
|
||||||
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Surface& src,
|
const Tegra::Engines::Fermi2D::Surface& src,
|
||||||
|
@ -190,7 +193,22 @@ public:
|
||||||
private:
|
private:
|
||||||
/// Iterate over all page indices in a range
|
/// Iterate over all page indices in a range
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
static void ForEachPage(VAddr addr, size_t size, Func&& func) {
|
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
|
||||||
|
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
||||||
|
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
|
||||||
|
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
|
||||||
|
if constexpr (RETURNS_BOOL) {
|
||||||
|
if (func(page)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
func(page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
|
||||||
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
||||||
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
|
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
|
||||||
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
|
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
|
||||||
|
@ -269,6 +287,13 @@ private:
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
|
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
|
||||||
|
|
||||||
|
/// Iterates over all the images in a region calling func
|
||||||
|
template <typename Func>
|
||||||
|
void ForEachSparseSegment(ImageBase& image, Func&& func);
|
||||||
|
|
||||||
/// Find or create an image view in the given image with the passed parameters
|
/// Find or create an image view in the given image with the passed parameters
|
||||||
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
|
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
|
||||||
|
|
||||||
|
@ -340,7 +365,8 @@ private:
|
||||||
std::unordered_map<TSCEntry, SamplerId> samplers;
|
std::unordered_map<TSCEntry, SamplerId> samplers;
|
||||||
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
|
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
|
||||||
|
|
||||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
|
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
|
||||||
|
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
|
||||||
|
|
||||||
bool has_deleted_images = false;
|
bool has_deleted_images = false;
|
||||||
u64 total_used_memory = 0;
|
u64 total_used_memory = 0;
|
||||||
|
@ -349,6 +375,7 @@ private:
|
||||||
u64 critical_memory;
|
u64 critical_memory;
|
||||||
|
|
||||||
SlotVector<Image> slot_images;
|
SlotVector<Image> slot_images;
|
||||||
|
SlotVector<ImageMapView> slot_map_views;
|
||||||
SlotVector<ImageView> slot_image_views;
|
SlotVector<ImageView> slot_image_views;
|
||||||
SlotVector<ImageAlloc> slot_image_allocs;
|
SlotVector<ImageAlloc> slot_image_allocs;
|
||||||
SlotVector<Sampler> slot_samplers;
|
SlotVector<Sampler> slot_samplers;
|
||||||
|
@ -702,6 +729,21 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
|
||||||
|
std::vector<ImageId> deleted_images;
|
||||||
|
ForEachImageInRegionGPU(gpu_addr, size,
|
||||||
|
[&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||||
|
for (const ImageId id : deleted_images) {
|
||||||
|
Image& image = slot_images[id];
|
||||||
|
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||||
|
UntrackImage(image);
|
||||||
|
}
|
||||||
|
UnregisterImage(id);
|
||||||
|
DeleteImage(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Surface& src,
|
const Tegra::Engines::Fermi2D::Surface& src,
|
||||||
|
@ -833,9 +875,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad
|
||||||
if (it == page_table.end()) {
|
if (it == page_table.end()) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
const auto& image_ids = it->second;
|
const auto& image_map_ids = it->second;
|
||||||
for (const ImageId image_id : image_ids) {
|
for (const ImageMapId map_id : image_map_ids) {
|
||||||
const ImageBase& image = slot_images[image_id];
|
const ImageMapView& map = slot_map_views[map_id];
|
||||||
|
const ImageBase& image = slot_images[map.image_id];
|
||||||
if (image.cpu_addr != cpu_addr) {
|
if (image.cpu_addr != cpu_addr) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -958,7 +1001,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
|
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
|
||||||
if (!IsValidAddress(gpu_memory, config)) {
|
if (!IsValidEntry(gpu_memory, config)) {
|
||||||
return NULL_IMAGE_VIEW_ID;
|
return NULL_IMAGE_VIEW_ID;
|
||||||
}
|
}
|
||||||
const auto [pair, is_new] = image_views.try_emplace(config);
|
const auto [pair, is_new] = image_views.try_emplace(config);
|
||||||
|
@ -1026,7 +1069,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
|
ForEachImageInRegionGPU(gpu_addr, CalculateGuestSizeInBytes(info), lambda);
|
||||||
return image_id;
|
return image_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1056,7 +1099,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
std::vector<ImageId> left_aliased_ids;
|
std::vector<ImageId> left_aliased_ids;
|
||||||
std::vector<ImageId> right_aliased_ids;
|
std::vector<ImageId> right_aliased_ids;
|
||||||
std::vector<ImageId> bad_overlap_ids;
|
std::vector<ImageId> bad_overlap_ids;
|
||||||
ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
|
ForEachImageInRegionGPU(gpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
|
||||||
if (info.type == ImageType::Linear) {
|
if (info.type == ImageType::Linear) {
|
||||||
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
|
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
|
||||||
// Alias linear images with the same pitch
|
// Alias linear images with the same pitch
|
||||||
|
@ -1091,6 +1134,24 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||||
Image& new_image = slot_images[new_image_id];
|
Image& new_image = slot_images[new_image_id];
|
||||||
|
|
||||||
|
new_image.is_sparse = false;
|
||||||
|
if (new_image.info.type != ImageType::Linear && new_image.info.type != ImageType::Buffer) {
|
||||||
|
const LevelArray offsets = CalculateMipLevelOffsets(new_image.info);
|
||||||
|
size_t level;
|
||||||
|
const size_t levels = static_cast<size_t>(new_image.info.resources.levels);
|
||||||
|
VAddr n_cpu_addr = new_image.cpu_addr;
|
||||||
|
GPUVAddr n_gpu_addr = new_image.gpu_addr;
|
||||||
|
for (level = 0; level < levels; level++) {
|
||||||
|
n_gpu_addr += offsets[level];
|
||||||
|
n_cpu_addr += offsets[level];
|
||||||
|
std::optional<VAddr> cpu_addr_opt = gpu_memory.GpuToCpuAddress(n_gpu_addr);
|
||||||
|
if (!cpu_addr_opt || *cpu_addr_opt == 0 || n_cpu_addr != *cpu_addr_opt) {
|
||||||
|
new_image.is_sparse = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Only upload what we need
|
// TODO: Only upload what we need
|
||||||
RefreshContents(new_image);
|
RefreshContents(new_image);
|
||||||
|
|
||||||
|
@ -1239,7 +1300,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
|
||||||
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
||||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||||
boost::container::small_vector<ImageId, 32> images;
|
boost::container::small_vector<ImageId, 32> images;
|
||||||
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
|
boost::container::small_vector<ImageMapId, 32> maps;
|
||||||
|
ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
|
||||||
const auto it = page_table.find(page);
|
const auto it = page_table.find(page);
|
||||||
if (it == page_table.end()) {
|
if (it == page_table.end()) {
|
||||||
if constexpr (BOOL_BREAK) {
|
if constexpr (BOOL_BREAK) {
|
||||||
|
@ -1248,12 +1310,63 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (const ImageMapId map_id : it->second) {
|
||||||
|
ImageMapView& map = slot_map_views[map_id];
|
||||||
|
if (map.picked) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!map.Overlaps(cpu_addr, size)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
map.picked = true;
|
||||||
|
maps.push_back(map_id);
|
||||||
|
Image& image = slot_images[map.image_id];
|
||||||
|
if (True(image.flags & ImageFlagBits::Picked)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
image.flags |= ImageFlagBits::Picked;
|
||||||
|
images.push_back(map.image_id);
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
if (func(map.image_id, image)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
func(map.image_id, image);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
for (const ImageId image_id : images) {
|
||||||
|
slot_images[image_id].flags &= ~ImageFlagBits::Picked;
|
||||||
|
}
|
||||||
|
for (const ImageMapId map_id : maps) {
|
||||||
|
slot_map_views[map_id].picked = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
template <typename Func>
|
||||||
|
void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
|
||||||
|
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
||||||
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||||
|
boost::container::small_vector<ImageId, 8> images;
|
||||||
|
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
|
||||||
|
const auto it = gpu_page_table.find(page);
|
||||||
|
if (it == gpu_page_table.end()) {
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
for (const ImageId image_id : it->second) {
|
for (const ImageId image_id : it->second) {
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
if (True(image.flags & ImageFlagBits::Picked)) {
|
if (True(image.flags & ImageFlagBits::Picked)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!image.Overlaps(cpu_addr, size)) {
|
if (!image.OverlapsGPU(gpu_addr, size)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
image.flags |= ImageFlagBits::Picked;
|
image.flags |= ImageFlagBits::Picked;
|
||||||
|
@ -1275,6 +1388,30 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
template <typename Func>
|
||||||
|
void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
|
||||||
|
using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
|
||||||
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||||
|
GPUVAddr gpu_addr = image.gpu_addr;
|
||||||
|
const size_t levels = image.info.resources.levels;
|
||||||
|
const auto mipmap_sizes = CalculateMipLevelSizes(image.info);
|
||||||
|
for (size_t level = 0; level < levels; level++) {
|
||||||
|
const size_t size = mipmap_sizes[level];
|
||||||
|
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||||
|
if (cpu_addr && *cpu_addr != 0) {
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
if (func(gpu_addr, *cpu_addr, size)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
func(gpu_addr, *cpu_addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gpu_addr += size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
|
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
|
@ -1292,8 +1429,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
|
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
|
||||||
"Trying to register an already registered image");
|
"Trying to register an already registered image");
|
||||||
image.flags |= ImageFlagBits::Registered;
|
image.flags |= ImageFlagBits::Registered;
|
||||||
ForEachPage(image.cpu_addr, image.guest_size_bytes,
|
|
||||||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
|
||||||
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||||
if ((IsPixelFormatASTC(image.info.format) &&
|
if ((IsPixelFormatASTC(image.info.format) &&
|
||||||
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
||||||
|
@ -1301,6 +1436,21 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||||
}
|
}
|
||||||
total_used_memory += Common::AlignUp(tentative_size, 1024);
|
total_used_memory += Common::AlignUp(tentative_size, 1024);
|
||||||
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||||
|
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
|
||||||
|
if (!image.is_sparse) {
|
||||||
|
auto map_id =
|
||||||
|
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
|
||||||
|
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
|
||||||
|
[this, map_id](u64 page) { page_table[page].push_back(map_id); });
|
||||||
|
image.map_view_id = map_id;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ForEachSparseSegment(image, [this, image_id](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
|
||||||
|
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
|
||||||
|
ForEachCPUPage(cpu_addr, size,
|
||||||
|
[this, map_id](u64 page) { page_table[page].push_back(map_id); });
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1317,9 +1467,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
||||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||||
}
|
}
|
||||||
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
||||||
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
|
||||||
const auto page_it = page_table.find(page);
|
const auto page_it = gpu_page_table.find(page);
|
||||||
if (page_it == page_table.end()) {
|
if (page_it == gpu_page_table.end()) {
|
||||||
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
|
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1331,20 +1481,84 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
||||||
}
|
}
|
||||||
image_ids.erase(vector_it);
|
image_ids.erase(vector_it);
|
||||||
});
|
});
|
||||||
|
if (!image.is_sparse) {
|
||||||
|
const auto map_id = image.map_view_id;
|
||||||
|
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
|
||||||
|
const auto page_it = page_table.find(page);
|
||||||
|
if (page_it == page_table.end()) {
|
||||||
|
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::vector<ImageMapId>& image_map_ids = page_it->second;
|
||||||
|
const auto vector_it = std::ranges::find(image_map_ids, map_id);
|
||||||
|
if (vector_it == image_map_ids.end()) {
|
||||||
|
UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
|
||||||
|
page << PAGE_BITS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
image_map_ids.erase(vector_it);
|
||||||
|
});
|
||||||
|
slot_map_views.erase(map_id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
boost::container::small_vector<ImageMapId, 8> maps_to_delete;
|
||||||
|
ForEachSparseSegment(
|
||||||
|
image, [this, image_id, &maps_to_delete]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr,
|
||||||
|
size_t size) {
|
||||||
|
ForEachCPUPage(cpu_addr, size, [this, image_id, &maps_to_delete](u64 page) {
|
||||||
|
const auto page_it = page_table.find(page);
|
||||||
|
if (page_it == page_table.end()) {
|
||||||
|
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::vector<ImageMapId>& image_map_ids = page_it->second;
|
||||||
|
auto vector_it = image_map_ids.begin();
|
||||||
|
while (vector_it != image_map_ids.end()) {
|
||||||
|
ImageMapView& map = slot_map_views[*vector_it];
|
||||||
|
if (map.image_id != image_id) {
|
||||||
|
vector_it++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!map.picked) {
|
||||||
|
maps_to_delete.push_back(*vector_it);
|
||||||
|
map.picked = true;
|
||||||
|
}
|
||||||
|
vector_it = image_map_ids.erase(vector_it);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const ImageMapId map_id : maps_to_delete) {
|
||||||
|
slot_map_views.erase(map_id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::TrackImage(ImageBase& image) {
|
void TextureCache<P>::TrackImage(ImageBase& image) {
|
||||||
ASSERT(False(image.flags & ImageFlagBits::Tracked));
|
ASSERT(False(image.flags & ImageFlagBits::Tracked));
|
||||||
image.flags |= ImageFlagBits::Tracked;
|
image.flags |= ImageFlagBits::Tracked;
|
||||||
|
if (!image.is_sparse) {
|
||||||
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
|
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ForEachSparseSegment(image,
|
||||||
|
[this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
|
||||||
|
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::UntrackImage(ImageBase& image) {
|
void TextureCache<P>::UntrackImage(ImageBase& image) {
|
||||||
ASSERT(True(image.flags & ImageFlagBits::Tracked));
|
ASSERT(True(image.flags & ImageFlagBits::Tracked));
|
||||||
image.flags &= ~ImageFlagBits::Tracked;
|
image.flags &= ~ImageFlagBits::Tracked;
|
||||||
|
if (!image.is_sparse) {
|
||||||
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
|
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ForEachSparseSegment(image,
|
||||||
|
[this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
|
||||||
|
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
|
@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
|
||||||
constexpr SlotId CORRUPT_ID{0xfffffffe};
|
constexpr SlotId CORRUPT_ID{0xfffffffe};
|
||||||
|
|
||||||
using ImageId = SlotId;
|
using ImageId = SlotId;
|
||||||
|
using ImageMapId = SlotId;
|
||||||
using ImageViewId = SlotId;
|
using ImageViewId = SlotId;
|
||||||
using ImageAllocId = SlotId;
|
using ImageAllocId = SlotId;
|
||||||
using SamplerId = SlotId;
|
using SamplerId = SlotId;
|
||||||
|
|
|
@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
|
||||||
return offsets;
|
return offsets;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
|
||||||
|
const u32 num_levels = info.resources.levels;
|
||||||
|
const LevelInfo level_info = MakeLevelInfo(info);
|
||||||
|
LevelArray sizes{};
|
||||||
|
for (u32 level = 0; level < num_levels; ++level) {
|
||||||
|
sizes[level] = CalculateLevelSize(level_info, level);
|
||||||
|
}
|
||||||
|
return sizes;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
|
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
|
||||||
ASSERT(info.type == ImageType::e3D);
|
ASSERT(info.type == ImageType::e3D);
|
||||||
std::vector<u32> offsets;
|
std::vector<u32> offsets;
|
||||||
|
@ -776,14 +786,37 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
|
||||||
return copies;
|
return copies;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
|
bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr) {
|
||||||
if (config.Address() == 0) {
|
if (gpu_addr == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (config.Address() > (u64(1) << 48)) {
|
if (gpu_addr > (u64(1) << 48)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
|
const auto cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||||
|
return cpu_addr.has_value() && *cpu_addr != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
|
||||||
|
const GPUVAddr gpu_addr = config.Address();
|
||||||
|
if (IsValidAddress(gpu_memory, gpu_addr)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!config.IsBlockLinear()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const size_t levels = config.max_mip_level + 1;
|
||||||
|
if (levels <= 1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const ImageInfo info{config};
|
||||||
|
const LevelArray offsets = CalculateMipLevelOffsets(info);
|
||||||
|
for (size_t level = 1; level < levels; level++) {
|
||||||
|
if (IsValidAddress(gpu_memory, static_cast<GPUVAddr>(gpu_addr + offsets[level]))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||||
|
|
|
@ -40,6 +40,8 @@ struct OverlapResult {
|
||||||
|
|
||||||
[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
|
[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
|
||||||
|
|
||||||
|
[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
|
||||||
|
|
||||||
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
|
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
|
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
|
||||||
|
@ -55,7 +57,9 @@ struct OverlapResult {
|
||||||
const ImageInfo& src,
|
const ImageInfo& src,
|
||||||
SubresourceBase base);
|
SubresourceBase base);
|
||||||
|
|
||||||
[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
|
[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr);
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
|
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
|
||||||
GPUVAddr gpu_addr, const ImageInfo& info,
|
GPUVAddr gpu_addr, const ImageInfo& info,
|
||||||
|
|
Loading…
Reference in a new issue