Merge pull request #2512 from ReinUsesLisp/comp-indexing
gl_shader_decompiler: Pessimize uniform buffer access on AMD's prorpietary driver
This commit is contained in:
commit
0bcc305797
3 changed files with 80 additions and 3 deletions
|
@ -2,11 +2,14 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <glad/glad.h>
|
#include <glad/glad.h>
|
||||||
|
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
#include "common/scope_exit.h"
|
||||||
#include "video_core/renderer_opengl/gl_device.h"
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
@ -24,6 +27,7 @@ Device::Device() {
|
||||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||||
has_variable_aoffi = TestVariableAoffi();
|
has_variable_aoffi = TestVariableAoffi();
|
||||||
|
has_component_indexing_bug = TestComponentIndexingBug();
|
||||||
}
|
}
|
||||||
|
|
||||||
Device::Device(std::nullptr_t) {
|
Device::Device(std::nullptr_t) {
|
||||||
|
@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) {
|
||||||
max_vertex_attributes = 16;
|
max_vertex_attributes = 16;
|
||||||
max_varyings = 15;
|
max_varyings = 15;
|
||||||
has_variable_aoffi = true;
|
has_variable_aoffi = true;
|
||||||
|
has_component_indexing_bug = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Device::TestVariableAoffi() {
|
bool Device::TestVariableAoffi() {
|
||||||
|
@ -52,4 +57,53 @@ void main() {
|
||||||
return supported;
|
return supported;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Device::TestComponentIndexingBug() {
|
||||||
|
constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
|
||||||
|
const GLchar* COMPONENT_TEST = R"(#version 430 core
|
||||||
|
layout (std430, binding = 0) buffer OutputBuffer {
|
||||||
|
uint output_value;
|
||||||
|
};
|
||||||
|
layout (std140, binding = 0) uniform InputBuffer {
|
||||||
|
uvec4 input_value[4096];
|
||||||
|
};
|
||||||
|
layout (location = 0) uniform uint idx;
|
||||||
|
void main() {
|
||||||
|
output_value = input_value[idx >> 2][idx & 3];
|
||||||
|
})";
|
||||||
|
const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)};
|
||||||
|
SCOPE_EXIT({ glDeleteProgram(shader); });
|
||||||
|
glUseProgram(shader);
|
||||||
|
|
||||||
|
OGLVertexArray vao;
|
||||||
|
vao.Create();
|
||||||
|
glBindVertexArray(vao.handle);
|
||||||
|
|
||||||
|
constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432};
|
||||||
|
OGLBuffer ubo;
|
||||||
|
ubo.Create();
|
||||||
|
glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW);
|
||||||
|
glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle);
|
||||||
|
|
||||||
|
OGLBuffer ssbo;
|
||||||
|
ssbo.Create();
|
||||||
|
glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT);
|
||||||
|
|
||||||
|
for (GLuint index = 4; index < 8; ++index) {
|
||||||
|
glInvalidateBufferData(ssbo.handle);
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle);
|
||||||
|
|
||||||
|
glProgramUniform1ui(shader, 0, index);
|
||||||
|
glDrawArrays(GL_POINTS, 0, 1);
|
||||||
|
|
||||||
|
GLuint result;
|
||||||
|
glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
|
||||||
|
if (result != values.at(index)) {
|
||||||
|
LOG_INFO(Render_OpenGL, log_message, true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG_INFO(Render_OpenGL, log_message, false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -30,13 +30,19 @@ public:
|
||||||
return has_variable_aoffi;
|
return has_variable_aoffi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HasComponentIndexingBug() const {
|
||||||
|
return has_component_indexing_bug;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool TestVariableAoffi();
|
static bool TestVariableAoffi();
|
||||||
|
static bool TestComponentIndexingBug();
|
||||||
|
|
||||||
std::size_t uniform_buffer_alignment{};
|
std::size_t uniform_buffer_alignment{};
|
||||||
u32 max_vertex_attributes{};
|
u32 max_vertex_attributes{};
|
||||||
u32 max_varyings{};
|
u32 max_varyings{};
|
||||||
bool has_variable_aoffi{};
|
bool has_variable_aoffi{};
|
||||||
|
bool has_component_indexing_bug{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -577,11 +577,28 @@ private:
|
||||||
if (std::holds_alternative<OperationNode>(*offset)) {
|
if (std::holds_alternative<OperationNode>(*offset)) {
|
||||||
// Indirect access
|
// Indirect access
|
||||||
const std::string final_offset = code.GenerateTemporary();
|
const std::string final_offset = code.GenerateTemporary();
|
||||||
code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset));
|
code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset));
|
||||||
return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
|
|
||||||
|
if (!device.HasComponentIndexingBug()) {
|
||||||
|
return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
|
||||||
final_offset, final_offset);
|
final_offset, final_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AMD's proprietary GLSL compiler emits ill code for variable component access.
|
||||||
|
// To bypass this driver bug generate 4 ifs, one per each component.
|
||||||
|
const std::string pack = code.GenerateTemporary();
|
||||||
|
code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
|
||||||
|
final_offset);
|
||||||
|
|
||||||
|
const std::string result = code.GenerateTemporary();
|
||||||
|
code.AddLine("float {};", result);
|
||||||
|
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||||
|
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
|
||||||
|
pack, GetSwizzle(swizzle));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
UNREACHABLE_MSG("Unmanaged offset node type");
|
UNREACHABLE_MSG("Unmanaged offset node type");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue