gl_buffer_cache: Copy to buffers created as STREAM_READ before downloading
After marking buffers as resident, Nvidia's driver seems to take a slow path. To workaround this issue, copy to a STREAM_READ buffer and then call GetNamedBufferSubData on it. This is a temporary solution until we have asynchronous flushing.
This commit is contained in:
parent
c4fe83a7bc
commit
6481d91e4a
5 changed files with 24 additions and 18 deletions
|
@ -322,8 +322,7 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr,
|
MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
|
||||||
std::size_t size) {
|
|
||||||
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
|
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
|
||||||
if (overlaps.empty()) {
|
if (overlaps.empty()) {
|
||||||
auto& memory_manager = system.GPU().MemoryManager();
|
auto& memory_manager = system.GPU().MemoryManager();
|
||||||
|
@ -377,8 +376,7 @@ private:
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateBlock(const Buffer* block, VAddr start, VAddr end,
|
void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) {
|
||||||
const VectorMapInterval& overlaps) {
|
|
||||||
const IntervalType base_interval{start, end};
|
const IntervalType base_interval{start, end};
|
||||||
IntervalSet interval_set{};
|
IntervalSet interval_set{};
|
||||||
interval_set.add(base_interval);
|
interval_set.add(base_interval);
|
||||||
|
|
|
@ -34,20 +34,27 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
|
||||||
|
|
||||||
Buffer::~Buffer() = default;
|
Buffer::~Buffer() = default;
|
||||||
|
|
||||||
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
|
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
|
||||||
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
|
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
|
||||||
data);
|
data);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
|
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||||
|
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
|
||||||
|
const GLintptr gl_offset = static_cast<GLintptr>(offset);
|
||||||
|
if (read_buffer.handle == 0) {
|
||||||
|
read_buffer.Create();
|
||||||
|
glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
|
||||||
|
GL_STREAM_READ);
|
||||||
|
}
|
||||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||||
glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
|
glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
|
||||||
data);
|
glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
std::size_t size) const {
|
std::size_t size) {
|
||||||
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
|
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
|
||||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,12 +28,12 @@ public:
|
||||||
explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
|
explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
|
||||||
~Buffer();
|
~Buffer();
|
||||||
|
|
||||||
void Upload(std::size_t offset, std::size_t size, const u8* data) const;
|
void Upload(std::size_t offset, std::size_t size, const u8* data);
|
||||||
|
|
||||||
void Download(std::size_t offset, std::size_t size, u8* data) const;
|
void Download(std::size_t offset, std::size_t size, u8* data);
|
||||||
|
|
||||||
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
std::size_t size) const;
|
std::size_t size);
|
||||||
|
|
||||||
GLuint Handle() const noexcept {
|
GLuint Handle() const noexcept {
|
||||||
return gl_buffer.handle;
|
return gl_buffer.handle;
|
||||||
|
@ -45,6 +45,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
OGLBuffer gl_buffer;
|
OGLBuffer gl_buffer;
|
||||||
|
OGLBuffer read_buffer;
|
||||||
u64 gpu_address = 0;
|
u64 gpu_address = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,7 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKSchedu
|
||||||
|
|
||||||
Buffer::~Buffer() = default;
|
Buffer::~Buffer() = default;
|
||||||
|
|
||||||
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
|
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
|
||||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||||
std::memcpy(staging.commit->Map(size), data, size);
|
std::memcpy(staging.commit->Map(size), data, size);
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
|
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
|
||||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
std::size_t size) const {
|
std::size_t size) {
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
|
||||||
const VkBuffer dst_buffer = Handle();
|
const VkBuffer dst_buffer = Handle();
|
||||||
|
|
|
@ -29,12 +29,12 @@ public:
|
||||||
VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
|
VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
|
||||||
~Buffer();
|
~Buffer();
|
||||||
|
|
||||||
void Upload(std::size_t offset, std::size_t size, const u8* data) const;
|
void Upload(std::size_t offset, std::size_t size, const u8* data);
|
||||||
|
|
||||||
void Download(std::size_t offset, std::size_t size, u8* data) const;
|
void Download(std::size_t offset, std::size_t size, u8* data);
|
||||||
|
|
||||||
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
std::size_t size) const;
|
std::size_t size);
|
||||||
|
|
||||||
VkBuffer Handle() const {
|
VkBuffer Handle() const {
|
||||||
return *buffer.handle;
|
return *buffer.handle;
|
||||||
|
|
Loading…
Reference in a new issue