Texture Cache: Address feedback.
This commit is contained in:
parent
fd98fcf7f0
commit
8f9f142956
5 changed files with 37 additions and 18 deletions
|
@ -80,7 +80,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
||||||
rasterizer->UnmapMemory(*cpu_addr, map.second);
|
rasterizer->UnmapMemory(*cpu_addr, map.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
|
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -114,22 +114,22 @@ public:
|
||||||
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IsGranularRange checks if a gpu region can be simply read with a pointer.
|
* Checks if a gpu region can be simply read with a pointer.
|
||||||
*/
|
*/
|
||||||
[[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
|
[[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IsContinousRange checks if a gpu region is mapped by a single range of cpu addresses.
|
* Checks if a gpu region is mapped by a single range of cpu addresses.
|
||||||
*/
|
*/
|
||||||
[[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
|
[[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IsFullyMappedRange checks if a gpu region is mapped entirely.
|
* Checks if a gpu region is mapped entirely.
|
||||||
*/
|
*/
|
||||||
[[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
|
[[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GetSubmappedRange returns a vector with all the subranges of cpu addresses mapped beneath.
|
* Returns a vector with all the subranges of cpu addresses mapped beneath.
|
||||||
* if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
|
* if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
|
||||||
* will be returned;
|
* will be returned;
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -87,7 +87,7 @@ public:
|
||||||
/// Unmap memory range
|
/// Unmap memory range
|
||||||
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
|
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Unmap memory range
|
/// Remap GPU memory range. This means underneath backing memory changed
|
||||||
virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
|
virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||||
|
|
|
@ -26,11 +26,12 @@ enum class ImageFlagBits : u32 {
|
||||||
Registered = 1 << 6, ///< True when the image is registered
|
Registered = 1 << 6, ///< True when the image is registered
|
||||||
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
||||||
Remapped = 1 << 8, ///< Image has been remapped.
|
Remapped = 1 << 8, ///< Image has been remapped.
|
||||||
|
Sparse = 1 << 9, ///< Image has non continous submemory.
|
||||||
|
|
||||||
// Garbage Collection Flags
|
// Garbage Collection Flags
|
||||||
BadOverlap = 1 << 9, ///< This image overlaps other but doesn't fit, has higher
|
BadOverlap = 1 << 10,///< This image overlaps other but doesn't fit, has higher
|
||||||
///< garbage collection priority
|
///< garbage collection priority
|
||||||
Alias = 1 << 10, ///< This image has aliases and has priority on garbage
|
Alias = 1 << 11, ///< This image has aliases and has priority on garbage
|
||||||
///< collection
|
///< collection
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||||
|
@ -92,7 +93,28 @@ struct ImageBase {
|
||||||
std::vector<AliasedImage> aliased_images;
|
std::vector<AliasedImage> aliased_images;
|
||||||
std::vector<ImageId> overlapping_images;
|
std::vector<ImageId> overlapping_images;
|
||||||
ImageMapId map_view_id{};
|
ImageMapId map_view_id{};
|
||||||
bool is_sparse{};
|
};
|
||||||
|
|
||||||
|
struct ImageMapView {
|
||||||
|
explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id);
|
||||||
|
|
||||||
|
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
|
||||||
|
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
|
||||||
|
const VAddr cpu_addr_end = cpu_addr + size;
|
||||||
|
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
|
||||||
|
const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size;
|
||||||
|
const GPUVAddr gpu_addr_end = gpu_addr + size;
|
||||||
|
return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUVAddr gpu_addr;
|
||||||
|
VAddr cpu_addr;
|
||||||
|
size_t size;
|
||||||
|
ImageId image_id;
|
||||||
|
bool picked{};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ImageAllocBase {
|
struct ImageAllocBase {
|
||||||
|
|
|
@ -156,9 +156,6 @@ public:
|
||||||
/// Remove images in a region
|
/// Remove images in a region
|
||||||
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
|
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
|
||||||
|
|
||||||
/// Used when GPU memory changes layout on sparse textures.
|
|
||||||
// void CheckRemaps();
|
|
||||||
|
|
||||||
/// Blit an image with the given parameters
|
/// Blit an image with the given parameters
|
||||||
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Surface& src,
|
const Tegra::Engines::Fermi2D::Surface& src,
|
||||||
|
@ -1179,8 +1176,9 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||||
Image& new_image = slot_images[new_image_id];
|
Image& new_image = slot_images[new_image_id];
|
||||||
|
|
||||||
new_image.is_sparse =
|
if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
|
||||||
!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes);
|
new_image.flags |= ImageFlagBits::Sparse;
|
||||||
|
}
|
||||||
|
|
||||||
for (const ImageId overlap_id : ignore_textures) {
|
for (const ImageId overlap_id : ignore_textures) {
|
||||||
Image& overlap = slot_images[overlap_id];
|
Image& overlap = slot_images[overlap_id];
|
||||||
|
@ -1519,7 +1517,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||||
total_used_memory += Common::AlignUp(tentative_size, 1024);
|
total_used_memory += Common::AlignUp(tentative_size, 1024);
|
||||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||||
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
|
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
|
||||||
if (!image.is_sparse) {
|
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||||
auto map_id =
|
auto map_id =
|
||||||
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
|
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
|
||||||
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
|
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
|
||||||
|
@ -1574,7 +1572,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
||||||
};
|
};
|
||||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||||
[this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
|
[this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
|
||||||
if (!image.is_sparse) {
|
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||||
const auto map_id = image.map_view_id;
|
const auto map_id = image.map_view_id;
|
||||||
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
|
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
|
||||||
const auto page_it = page_table.find(page);
|
const auto page_it = page_table.find(page);
|
||||||
|
@ -1633,7 +1631,7 @@ template <class P>
|
||||||
void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
|
void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
|
||||||
ASSERT(False(image.flags & ImageFlagBits::Tracked));
|
ASSERT(False(image.flags & ImageFlagBits::Tracked));
|
||||||
image.flags |= ImageFlagBits::Tracked;
|
image.flags |= ImageFlagBits::Tracked;
|
||||||
if (!image.is_sparse) {
|
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||||
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
|
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1659,7 +1657,7 @@ template <class P>
|
||||||
void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
|
void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
|
||||||
ASSERT(True(image.flags & ImageFlagBits::Tracked));
|
ASSERT(True(image.flags & ImageFlagBits::Tracked));
|
||||||
image.flags &= ~ImageFlagBits::Tracked;
|
image.flags &= ~ImageFlagBits::Tracked;
|
||||||
if (!image.is_sparse) {
|
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||||
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
|
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue