Merge pull request #12056 from ameerj/opengl-neglect

OpenGL: Implement async downloads in buffer and texture caches
This commit is contained in:
liamwhite 2023-12-01 09:16:56 -05:00 committed by GitHub
commit 1c21d6c2c2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 74 additions and 44 deletions

View file

@ -146,8 +146,12 @@ StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestUploadBuffer(size); return staging_buffer_pool.RequestUploadBuffer(size);
} }
StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
return staging_buffer_pool.RequestDownloadBuffer(size); return staging_buffer_pool.RequestDownloadBuffer(size, deferred);
}
void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) {
staging_buffer_pool.FreeDeferredStagingBuffer(buffer);
} }
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {

View file

@ -66,7 +66,9 @@ public:
[[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false);
void FreeDeferredStagingBuffer(StagingBufferMap& buffer);
bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) { bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
return false; return false;
@ -246,7 +248,7 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
// TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;

View file

@ -28,63 +28,69 @@ StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
StagingBuffers::~StagingBuffers() = default; StagingBuffers::~StagingBuffers() = default;
StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence,
bool deferred) {
MICROPROFILE_SCOPE(OpenGL_BufferRequest); MICROPROFILE_SCOPE(OpenGL_BufferRequest);
const size_t index = RequestBuffer(requested_size); const size_t index = RequestBuffer(requested_size);
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; OGLSync* const sync = insert_fence ? &allocs[index].sync : nullptr;
sync_indices[index] = insert_fence ? ++current_sync_index : 0; allocs[index].sync_index = insert_fence ? ++current_sync_index : 0;
allocs[index].deferred = deferred;
return StagingBufferMap{ return StagingBufferMap{
.mapped_span = std::span(maps[index], requested_size), .mapped_span = std::span(allocs[index].map, requested_size),
.sync = sync, .sync = sync,
.buffer = buffers[index].handle, .buffer = allocs[index].buffer.handle,
.index = index,
}; };
} }
void StagingBuffers::FreeDeferredStagingBuffer(size_t index) {
ASSERT(allocs[index].deferred);
allocs[index].deferred = false;
}
size_t StagingBuffers::RequestBuffer(size_t requested_size) { size_t StagingBuffers::RequestBuffer(size_t requested_size) {
if (const std::optional<size_t> index = FindBuffer(requested_size); index) { if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
return *index; return *index;
} }
StagingBufferAlloc alloc;
OGLBuffer& buffer = buffers.emplace_back(); alloc.buffer.Create();
buffer.Create();
const auto next_pow2_size = Common::NextPow2(requested_size); const auto next_pow2_size = Common::NextPow2(requested_size);
glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, glNamedBufferStorage(alloc.buffer.handle, next_pow2_size, nullptr,
storage_flags | GL_MAP_PERSISTENT_BIT); storage_flags | GL_MAP_PERSISTENT_BIT);
maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, alloc.map = static_cast<u8*>(glMapNamedBufferRange(alloc.buffer.handle, 0, next_pow2_size,
map_flags | GL_MAP_PERSISTENT_BIT))); map_flags | GL_MAP_PERSISTENT_BIT));
syncs.emplace_back(); alloc.size = next_pow2_size;
sync_indices.emplace_back(); allocs.emplace_back(std::move(alloc));
sizes.push_back(next_pow2_size); return allocs.size() - 1;
ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
maps.size() == sizes.size());
return buffers.size() - 1;
} }
std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
size_t known_unsignaled_index = current_sync_index + 1; size_t known_unsignaled_index = current_sync_index + 1;
size_t smallest_buffer = std::numeric_limits<size_t>::max(); size_t smallest_buffer = std::numeric_limits<size_t>::max();
std::optional<size_t> found; std::optional<size_t> found;
const size_t num_buffers = sizes.size(); const size_t num_buffers = allocs.size();
for (size_t index = 0; index < num_buffers; ++index) { for (size_t index = 0; index < num_buffers; ++index) {
const size_t buffer_size = sizes[index]; StagingBufferAlloc& alloc = allocs[index];
const size_t buffer_size = alloc.size;
if (buffer_size < requested_size || buffer_size >= smallest_buffer) { if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
continue; continue;
} }
if (syncs[index].handle != 0) { if (alloc.deferred) {
if (sync_indices[index] >= known_unsignaled_index) { continue;
}
if (alloc.sync.handle != 0) {
if (alloc.sync_index >= known_unsignaled_index) {
// This fence is later than a fence that is known to not be signaled // This fence is later than a fence that is known to not be signaled
continue; continue;
} }
if (!syncs[index].IsSignaled()) { if (!alloc.sync.IsSignaled()) {
// Since this fence hasn't been signaled, it's safe to assume all later // Since this fence hasn't been signaled, it's safe to assume all later
// fences haven't been signaled either // fences haven't been signaled either
known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); known_unsignaled_index = std::min(known_unsignaled_index, alloc.sync_index);
continue; continue;
} }
syncs[index].Release(); alloc.sync.Release();
} }
smallest_buffer = buffer_size; smallest_buffer = buffer_size;
found = index; found = index;
@ -143,8 +149,12 @@ StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
return upload_buffers.RequestMap(size, true); return upload_buffers.RequestMap(size, true);
} }
StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size, bool deferred) {
return download_buffers.RequestMap(size, false); return download_buffers.RequestMap(size, false, deferred);
}
void StagingBufferPool::FreeDeferredStagingBuffer(StagingBufferMap& buffer) {
download_buffers.FreeDeferredStagingBuffer(buffer.index);
} }
} // namespace OpenGL } // namespace OpenGL

View file

@ -26,23 +26,30 @@ struct StagingBufferMap {
size_t offset = 0; size_t offset = 0;
OGLSync* sync; OGLSync* sync;
GLuint buffer; GLuint buffer;
size_t index;
}; };
struct StagingBuffers { struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers(); ~StagingBuffers();
StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); StagingBufferMap RequestMap(size_t requested_size, bool insert_fence, bool deferred = false);
void FreeDeferredStagingBuffer(size_t index);
size_t RequestBuffer(size_t requested_size); size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size); std::optional<size_t> FindBuffer(size_t requested_size);
std::vector<OGLSync> syncs; struct StagingBufferAlloc {
std::vector<OGLBuffer> buffers; OGLSync sync;
std::vector<u8*> maps; OGLBuffer buffer;
std::vector<size_t> sizes; u8* map;
std::vector<size_t> sync_indices; size_t size;
size_t sync_index;
bool deferred;
};
std::vector<StagingBufferAlloc> allocs;
GLenum storage_flags; GLenum storage_flags;
GLenum map_flags; GLenum map_flags;
size_t current_sync_index = 0; size_t current_sync_index = 0;
@ -85,7 +92,8 @@ public:
~StagingBufferPool() = default; ~StagingBufferPool() = default;
StagingBufferMap RequestUploadBuffer(size_t size); StagingBufferMap RequestUploadBuffer(size_t size);
StagingBufferMap RequestDownloadBuffer(size_t size); StagingBufferMap RequestDownloadBuffer(size_t size, bool deferred = false);
void FreeDeferredStagingBuffer(StagingBufferMap& buffer);
private: private:
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};

View file

@ -557,8 +557,12 @@ StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestUploadBuffer(size); return staging_buffer_pool.RequestUploadBuffer(size);
} }
StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
return staging_buffer_pool.RequestDownloadBuffer(size); return staging_buffer_pool.RequestDownloadBuffer(size, deferred);
}
void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) {
staging_buffer_pool.FreeDeferredStagingBuffer(buffer);
} }
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {

View file

@ -74,7 +74,9 @@ public:
StagingBufferMap UploadStagingBuffer(size_t size); StagingBufferMap UploadStagingBuffer(size_t size);
StagingBufferMap DownloadStagingBuffer(size_t size); StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false);
void FreeDeferredStagingBuffer(StagingBufferMap& buffer);
u64 GetDeviceLocalMemory() const { u64 GetDeviceLocalMemory() const {
return device_access_memory; return device_access_memory;
@ -359,7 +361,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
using Runtime = OpenGL::TextureCacheRuntime; using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image; using Image = OpenGL::Image;
@ -367,7 +369,7 @@ struct TextureCacheParams {
using ImageView = OpenGL::ImageView; using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler; using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer; using Framebuffer = OpenGL::Framebuffer;
using AsyncBuffer = u32; using AsyncBuffer = OpenGL::StagingBufferMap;
using BufferType = GLuint; using BufferType = GLuint;
}; };