forked from suyu/suyu
Buffer Cache: Tune to the levels of the new GC.
This commit is contained in:
parent
ecb3342145
commit
5e982a7812
6 changed files with 78 additions and 6 deletions
|
@ -76,8 +76,9 @@ class BufferCache {
|
|||
|
||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||
|
||||
static constexpr u64 EXPECTED_MEMORY = 512_MiB;
|
||||
static constexpr u64 CRITICAL_MEMORY = 1_GiB;
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
|
||||
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
|
@ -436,6 +437,8 @@ private:
|
|||
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
|
||||
u64 frame_tick = 0;
|
||||
u64 total_used_memory = 0;
|
||||
u64 minimum_memory = 0;
|
||||
u64 critical_memory = 0;
|
||||
|
||||
std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
|
||||
};
|
||||
|
@ -451,11 +454,30 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
|||
// Ensure the first slot is used for the null buffer
|
||||
void(slot_buffers.insert(runtime, NullBufferParams{}));
|
||||
common_ranges.clear();
|
||||
|
||||
if (!runtime.CanReportMemoryUsage()) {
|
||||
minimum_memory = DEFAULT_EXPECTED_MEMORY;
|
||||
critical_memory = DEFAULT_CRITICAL_MEMORY;
|
||||
return;
|
||||
}
|
||||
|
||||
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
|
||||
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
|
||||
const s64 min_spacing_critical = device_memory - 1_GiB;
|
||||
const s64 mem_tresshold = std::min(device_memory, TARGET_THRESHOLD);
|
||||
const s64 min_vacancy_expected = (6 * mem_tresshold) / 10;
|
||||
const s64 min_vacancy_critical = (3 * mem_tresshold) / 10;
|
||||
minimum_memory = static_cast<u64>(
|
||||
std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
|
||||
DEFAULT_EXPECTED_MEMORY));
|
||||
critical_memory = static_cast<u64>(
|
||||
std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
|
||||
DEFAULT_CRITICAL_MEMORY));
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::RunGarbageCollector() {
|
||||
const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
|
||||
const bool aggressive_gc = total_used_memory >= critical_memory;
|
||||
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
|
||||
int num_iterations = aggressive_gc ? 64 : 32;
|
||||
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
|
||||
|
@ -486,7 +508,11 @@ void BufferCache<P>::TickFrame() {
|
|||
const bool skip_preferred = hits * 256 < shots * 251;
|
||||
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||
|
||||
if (total_used_memory >= EXPECTED_MEMORY) {
|
||||
// If we can obtain the memory info, use it instead of the estimate.
|
||||
if (runtime.CanReportMemoryUsage()) {
|
||||
total_used_memory = runtime.GetDeviceMemoryUsage();
|
||||
}
|
||||
if (total_used_memory >= minimum_memory) {
|
||||
RunGarbageCollector();
|
||||
}
|
||||
++frame_tick;
|
||||
|
|
|
@ -135,6 +135,24 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
|
|||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
|
||||
}
|
||||
|
||||
device_access_memory = []() -> u64 {
|
||||
if (GLAD_GL_NVX_gpu_memory_info) {
|
||||
GLint cur_avail_mem_kb = 0;
|
||||
glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, &cur_avail_mem_kb);
|
||||
return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
|
||||
}
|
||||
return 2_GiB; // Return minimum requirements
|
||||
}();
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
|
||||
if (GLAD_GL_NVX_gpu_memory_info) {
|
||||
GLint cur_avail_mem_kb = 0;
|
||||
glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb);
|
||||
return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
|
||||
}
|
||||
return 2_GiB;
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
|
|
|
@ -151,6 +151,16 @@ public:
|
|||
use_storage_buffers = use_storage_buffers_;
|
||||
}
|
||||
|
||||
u64 GetDeviceLocalMemory() const {
|
||||
return device_access_memory;
|
||||
}
|
||||
|
||||
u64 GetDeviceMemoryUsage() const;
|
||||
|
||||
bool CanReportMemoryUsage() const {
|
||||
return GLAD_GL_NVX_gpu_memory_info;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::array PABO_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
|
@ -184,6 +194,8 @@ private:
|
|||
std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
|
||||
|
||||
u32 index_buffer_offset = 0;
|
||||
|
||||
u64 device_access_memory;
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
|
|
|
@ -141,6 +141,18 @@ StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
|
|||
return staging_pool.Request(size, MemoryUsage::Download);
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
|
||||
return device.GetDeviceLocalMemory();
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
|
||||
return device.GetDeviceMemoryUsage();
|
||||
}
|
||||
|
||||
bool BufferCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
|
|
@ -65,6 +65,12 @@ public:
|
|||
|
||||
void Finish();
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
|
||||
u64 GetDeviceMemoryUsage() const;
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
|
|
@ -60,8 +60,6 @@ class TextureCache {
|
|||
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
|
||||
|
||||
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
|
||||
static constexpr s64 MIN_VACANCY_EXPECTED = (6 * TARGET_THRESHOLD) / 10;
|
||||
static constexpr s64 MIN_VACANCY_CRITICAL = (3 * TARGET_THRESHOLD) / 10;
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
|
||||
static constexpr size_t GC_EMERGENCY_COUNTS = 2;
|
||||
|
|
Loading…
Reference in a new issue