forked from suyu/suyu
gl_buffer_cache: Drop interop based parameter buffer workarounds
Sacrify runtime performance to avoid generating kernel exceptions on Windows due to our abusive aliasing of interop buffer objects.
This commit is contained in:
parent
2b95c137ff
commit
3da87d3f12
3 changed files with 45 additions and 65 deletions
|
@ -91,7 +91,7 @@ class BufferCache {
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr size_t SKIP_CACHE_SIZE = 4096;
|
static constexpr u32 SKIP_CACHE_SIZE = 4096;
|
||||||
|
|
||||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||||
|
@ -671,7 +671,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
const VAddr cpu_addr = binding.cpu_addr;
|
const VAddr cpu_addr = binding.cpu_addr;
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
if (size <= runtime.SkipCacheSize() && !buffer.IsRegionGpuModified(cpu_addr, size)) {
|
if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
|
||||||
if constexpr (IS_OPENGL) {
|
if constexpr (IS_OPENGL) {
|
||||||
if (runtime.HasFastBufferSubData()) {
|
if (runtime.HasFastBufferSubData()) {
|
||||||
// Fast path for Nvidia
|
// Fast path for Nvidia
|
||||||
|
|
|
@ -36,13 +36,8 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast
|
||||||
buffer.Create();
|
buffer.Create();
|
||||||
const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
|
const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
|
||||||
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
|
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
|
||||||
if (runtime.device.UseAssemblyShaders()) {
|
|
||||||
CreateMemoryObjects(runtime);
|
|
||||||
glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(),
|
|
||||||
memory_commit.Offset());
|
|
||||||
} else {
|
|
||||||
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
|
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
|
||||||
}
|
|
||||||
if (runtime.has_unified_vertex_buffers) {
|
if (runtime.has_unified_vertex_buffers) {
|
||||||
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
|
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
|
||||||
}
|
}
|
||||||
|
@ -71,61 +66,33 @@ void Buffer::MakeResident(GLenum access) noexcept {
|
||||||
glMakeNamedBufferResidentNV(buffer.handle, access);
|
glMakeNamedBufferResidentNV(buffer.handle, access);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLuint Buffer::SubBuffer(u32 offset) {
|
|
||||||
if (offset == 0) {
|
|
||||||
return buffer.handle;
|
|
||||||
}
|
|
||||||
for (const auto& [sub_buffer, sub_offset] : subs) {
|
|
||||||
if (sub_offset == offset) {
|
|
||||||
return sub_buffer.handle;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
OGLBuffer sub_buffer;
|
|
||||||
sub_buffer.Create();
|
|
||||||
glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset,
|
|
||||||
memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset);
|
|
||||||
return subs.emplace_back(std::move(sub_buffer), offset).first.handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) {
|
|
||||||
auto& allocator = runtime.vulkan_memory_allocator;
|
|
||||||
auto& device = runtime.vulkan_device->GetLogical();
|
|
||||||
auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{
|
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
||||||
.pNext = nullptr,
|
|
||||||
.flags = 0,
|
|
||||||
.size = SizeBytes(),
|
|
||||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
|
||||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
|
|
||||||
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
|
||||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
|
||||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
|
||||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
|
||||||
.queueFamilyIndexCount = 0,
|
|
||||||
.pQueueFamilyIndices = nullptr,
|
|
||||||
});
|
|
||||||
const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer);
|
|
||||||
memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal);
|
|
||||||
}
|
|
||||||
|
|
||||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
|
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
|
||||||
Vulkan::MemoryAllocator* vulkan_memory_allocator_)
|
Vulkan::MemoryAllocator* vulkan_memory_allocator_)
|
||||||
: device{device_}, vulkan_device{vulkan_device_},
|
: device{device_}, vulkan_device{vulkan_device_},
|
||||||
vulkan_memory_allocator{vulkan_memory_allocator_},
|
vulkan_memory_allocator{vulkan_memory_allocator_},
|
||||||
stream_buffer{device.HasFastBufferSubData() ? std::nullopt
|
has_fast_buffer_sub_data{device.HasFastBufferSubData()},
|
||||||
: std::make_optional<StreamBuffer>()} {
|
use_assembly_shaders{device.UseAssemblyShaders()},
|
||||||
|
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
|
||||||
|
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
|
||||||
GLint gl_max_attributes;
|
GLint gl_max_attributes;
|
||||||
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
|
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
|
||||||
max_attributes = static_cast<u32>(gl_max_attributes);
|
max_attributes = static_cast<u32>(gl_max_attributes);
|
||||||
use_assembly_shaders = device.UseAssemblyShaders();
|
|
||||||
has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory();
|
|
||||||
|
|
||||||
for (auto& stage_uniforms : fast_uniforms) {
|
for (auto& stage_uniforms : fast_uniforms) {
|
||||||
for (OGLBuffer& buffer : stage_uniforms) {
|
for (OGLBuffer& buffer : stage_uniforms) {
|
||||||
buffer.Create();
|
buffer.Create();
|
||||||
glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
|
glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (auto& stage_uniforms : copy_uniforms) {
|
||||||
|
for (OGLBuffer& buffer : stage_uniforms) {
|
||||||
|
buffer.Create();
|
||||||
|
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (OGLBuffer& buffer : copy_compute_uniforms) {
|
||||||
|
buffer.Create();
|
||||||
|
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||||
|
@ -167,8 +134,14 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset,
|
||||||
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||||
u32 offset, u32 size) {
|
u32 offset, u32 size) {
|
||||||
if (use_assembly_shaders) {
|
if (use_assembly_shaders) {
|
||||||
const GLuint sub_buffer = buffer.SubBuffer(offset);
|
GLuint handle;
|
||||||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0,
|
if (offset != 0) {
|
||||||
|
handle = copy_uniforms[stage][binding_index].handle;
|
||||||
|
glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
|
||||||
|
} else {
|
||||||
|
handle = buffer.Handle();
|
||||||
|
}
|
||||||
|
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
|
||||||
static_cast<GLsizeiptr>(size));
|
static_cast<GLsizeiptr>(size));
|
||||||
} else {
|
} else {
|
||||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||||
|
@ -181,8 +154,15 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
|
||||||
void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
||||||
u32 size) {
|
u32 size) {
|
||||||
if (use_assembly_shaders) {
|
if (use_assembly_shaders) {
|
||||||
glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index,
|
GLuint handle;
|
||||||
buffer.SubBuffer(offset), 0, static_cast<GLsizeiptr>(size));
|
if (offset != 0) {
|
||||||
|
handle = copy_compute_uniforms[binding_index].handle;
|
||||||
|
glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
|
||||||
|
} else {
|
||||||
|
handle = buffer.Handle();
|
||||||
|
}
|
||||||
|
glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
|
||||||
|
static_cast<GLsizeiptr>(size));
|
||||||
} else {
|
} else {
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
|
glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
|
||||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||||
|
|
|
@ -39,8 +39,6 @@ public:
|
||||||
|
|
||||||
void MakeResident(GLenum access) noexcept;
|
void MakeResident(GLenum access) noexcept;
|
||||||
|
|
||||||
[[nodiscard]] GLuint SubBuffer(u32 offset);
|
|
||||||
|
|
||||||
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
||||||
return address;
|
return address;
|
||||||
}
|
}
|
||||||
|
@ -50,13 +48,9 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void CreateMemoryObjects(BufferCacheRuntime& runtime);
|
|
||||||
|
|
||||||
GLuint64EXT address = 0;
|
GLuint64EXT address = 0;
|
||||||
Vulkan::MemoryCommit memory_commit;
|
|
||||||
OGLBuffer buffer;
|
OGLBuffer buffer;
|
||||||
GLenum current_residency_access = GL_NONE;
|
GLenum current_residency_access = GL_NONE;
|
||||||
std::vector<std::pair<OGLBuffer, u32>> subs;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class BufferCacheRuntime {
|
class BufferCacheRuntime {
|
||||||
|
@ -127,7 +121,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool HasFastBufferSubData() const noexcept {
|
[[nodiscard]] bool HasFastBufferSubData() const noexcept {
|
||||||
return device.HasFastBufferSubData();
|
return has_fast_buffer_sub_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -140,16 +134,22 @@ private:
|
||||||
const Device& device;
|
const Device& device;
|
||||||
const Vulkan::Device* vulkan_device;
|
const Vulkan::Device* vulkan_device;
|
||||||
Vulkan::MemoryAllocator* vulkan_memory_allocator;
|
Vulkan::MemoryAllocator* vulkan_memory_allocator;
|
||||||
std::optional<StreamBuffer> stream_buffer;
|
|
||||||
|
bool has_fast_buffer_sub_data = false;
|
||||||
|
bool use_assembly_shaders = false;
|
||||||
|
bool has_unified_vertex_buffers = false;
|
||||||
|
|
||||||
u32 max_attributes = 0;
|
u32 max_attributes = 0;
|
||||||
|
|
||||||
bool use_assembly_shaders = false;
|
std::optional<StreamBuffer> stream_buffer;
|
||||||
bool has_unified_vertex_buffers = false;
|
|
||||||
|
|
||||||
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
|
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
|
||||||
VideoCommon::NUM_STAGES>
|
VideoCommon::NUM_STAGES>
|
||||||
fast_uniforms;
|
fast_uniforms;
|
||||||
|
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
|
||||||
|
VideoCommon::NUM_STAGES>
|
||||||
|
copy_uniforms;
|
||||||
|
std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
|
||||||
|
|
||||||
u32 index_buffer_offset = 0;
|
u32 index_buffer_offset = 0;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue