1
0
Fork 0
forked from suyu/suyu

renderer_opengl: Use ARB_separate_shader_objects

Ensures that states set for a particular stage are not attached to other
stages which may not need them.
This commit is contained in:
ReinUsesLisp 2021-07-14 03:48:30 -03:00 committed by ameerj
parent fc7bed21b5
commit e1ed218b41
9 changed files with 154 additions and 116 deletions

View file

@ -46,17 +46,13 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) { switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL: case Settings::ShaderBackend::GLSL:
source_program.handle = glCreateProgram(); source_program = CreateProgram(code, GL_COMPUTE_SHADER);
AttachShader(GL_COMPUTE_SHADER, source_program.handle, code);
LinkProgram(source_program.handle);
break; break;
case Settings::ShaderBackend::GLASM: case Settings::ShaderBackend::GLASM:
assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
break; break;
case Settings::ShaderBackend::SPIRV: case Settings::ShaderBackend::SPIRV:
source_program.handle = glCreateProgram(); source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v);
LinkProgram(source_program.handle);
break; break;
} }
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
@ -154,7 +150,7 @@ void ComputePipeline::Configure() {
if (assembly_program.handle != 0) { if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle); program_manager.BindComputeAssemblyProgram(assembly_program.handle);
} else { } else {
program_manager.BindProgram(source_program.handle); program_manager.BindComputeProgram(source_program.handle);
} }
buffer_cache.UnbindComputeTextureBuffers(); buffer_cache.UnbindComputeTextureBuffers();
size_t texbuf_index{}; size_t texbuf_index{};

View file

@ -237,43 +237,31 @@ GraphicsPipeline::GraphicsPipeline(
if (key.xfb_enabled && device.UseAssemblyShaders()) { if (key.xfb_enabled && device.UseAssemblyShaders()) {
GenerateTransformFeedbackState(); GenerateTransformFeedbackState();
} }
auto func{ auto func{[this, device, sources, sources_spirv,
[this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable { shader_notify](ShaderContext::Context*) mutable {
if (!device.UseAssemblyShaders()) {
program.handle = glCreateProgram();
}
for (size_t stage = 0; stage < 5; ++stage) { for (size_t stage = 0; stage < 5; ++stage) {
switch (device.GetShaderBackend()) { switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL: { case Settings::ShaderBackend::GLSL:
const auto code{sources[stage]}; if (!sources[stage].empty()) {
if (code.empty()) { source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
continue;
} }
AttachShader(Stage(stage), program.handle, code); break;
} break; case Settings::ShaderBackend::GLASM:
case Settings::ShaderBackend::GLASM: { if (!sources[stage].empty()) {
const auto code{sources[stage]}; assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
if (code.empty()) {
continue;
} }
assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); break;
} break; case Settings::ShaderBackend::SPIRV:
case Settings::ShaderBackend::SPIRV: { if (!sources_spirv[stage].empty()) {
const auto code{sources_spirv[stage]}; source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
if (code.empty()) {
continue;
} }
AttachShader(Stage(stage), program.handle, code); break;
} break;
} }
} }
if (!device.UseAssemblyShaders()) {
LinkProgram(program.handle);
}
if (shader_notify) { if (shader_notify) {
shader_notify->MarkShaderComplete(); shader_notify->MarkShaderComplete();
} }
is_built.store(true, std::memory_order_relaxed); is_built = true;
}}; }};
if (thread_worker) { if (thread_worker) {
thread_worker->QueueWork(std::move(func)); thread_worker->QueueWork(std::move(func));
@ -449,7 +437,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if (assembly_programs[0].handle != 0) { if (assembly_programs[0].handle != 0) {
program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
} else { } else {
program_manager.BindProgram(program.handle); program_manager.BindSourcePrograms(source_programs);
} }
const ImageId* views_it{image_view_ids.data()}; const ImageId* views_it{image_view_ids.data()};
GLsizei texture_binding = 0; GLsizei texture_binding = 0;

View file

@ -129,7 +129,7 @@ private:
void (*configure_func)(GraphicsPipeline*, bool){}; void (*configure_func)(GraphicsPipeline*, bool){};
OGLProgram program; std::array<OGLProgram, 5> source_programs;
std::array<OGLAssemblyProgram, 5> assembly_programs; std::array<OGLAssemblyProgram, 5> assembly_programs;
u32 enabled_stages_mask{}; u32 enabled_stages_mask{};

View file

@ -24,34 +24,68 @@ class ProgramManager {
public: public:
explicit ProgramManager(const Device& device) { explicit ProgramManager(const Device& device) {
glCreateProgramPipelines(1, &pipeline.handle);
if (device.UseAssemblyShaders()) { if (device.UseAssemblyShaders()) {
glEnable(GL_COMPUTE_PROGRAM_NV); glEnable(GL_COMPUTE_PROGRAM_NV);
} }
} }
void BindProgram(GLuint program) { void BindComputeProgram(GLuint program) {
if (current_source_program == program) {
return;
}
current_source_program = program;
glUseProgram(program); glUseProgram(program);
is_compute_bound = true;
} }
void BindComputeAssemblyProgram(GLuint program) { void BindComputeAssemblyProgram(GLuint program) {
if (current_compute_assembly_program != program) { if (current_assembly_compute_program != program) {
current_compute_assembly_program = program; current_assembly_compute_program = program;
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
} }
if (current_source_program != 0) { UnbindPipeline();
current_source_program = 0;
glUseProgram(0);
} }
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
static constexpr std::array<GLenum, 5> stage_enums{
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
};
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
}
}
BindPipeline();
}
void BindPresentPrograms(GLuint vertex, GLuint fragment) {
if (current_programs[0] != vertex) {
current_programs[0] = vertex;
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
}
if (current_programs[4] != fragment) {
current_programs[4] = fragment;
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
}
glUseProgramStages(
pipeline.handle,
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
current_programs[1] = 0;
current_programs[2] = 0;
current_programs[3] = 0;
if (current_stage_mask != 0) {
current_stage_mask = 0;
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
glDisable(program_type);
}
}
BindPipeline();
} }
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
u32 stage_mask) { u32 stage_mask) {
const u32 changed_mask = current_assembly_mask ^ stage_mask; const u32 changed_mask = current_stage_mask ^ stage_mask;
current_assembly_mask = stage_mask; current_stage_mask = stage_mask;
if (changed_mask != 0) { if (changed_mask != 0) {
for (size_t stage = 0; stage < NUM_STAGES; ++stage) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
@ -65,25 +99,47 @@ public:
} }
} }
for (size_t stage = 0; stage < NUM_STAGES; ++stage) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_assembly_programs[stage] != programs[stage].handle) { if (current_programs[stage] != programs[stage].handle) {
current_assembly_programs[stage] = programs[stage].handle; current_programs[stage] = programs[stage].handle;
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
} }
} }
if (current_source_program != 0) { UnbindPipeline();
current_source_program = 0;
glUseProgram(0);
}
} }
void RestoreGuestCompute() {} void RestoreGuestCompute() {}
private: private:
GLuint current_source_program = 0; void BindPipeline() {
if (!is_pipeline_bound) {
is_pipeline_bound = true;
glBindProgramPipeline(pipeline.handle);
}
UnbindCompute();
}
u32 current_assembly_mask = 0; void UnbindPipeline() {
std::array<GLuint, NUM_STAGES> current_assembly_programs{}; if (is_pipeline_bound) {
GLuint current_compute_assembly_program = 0; is_pipeline_bound = false;
glBindProgramPipeline(0);
}
UnbindCompute();
}
void UnbindCompute() {
if (is_compute_bound) {
is_compute_bound = false;
glUseProgram(0);
}
}
OGLPipeline pipeline;
bool is_pipeline_bound{};
bool is_compute_bound{};
u32 current_stage_mask = 0;
std::array<GLuint, NUM_STAGES> current_programs{};
GLuint current_assembly_compute_program = 0;
}; };
} // namespace OpenGL } // namespace OpenGL

View file

@ -13,6 +13,33 @@
namespace OpenGL { namespace OpenGL {
static OGLProgram LinkSeparableProgram(GLuint shader) {
OGLProgram program;
program.handle = glCreateProgram();
glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glAttachShader(program.handle, shader);
glLinkProgram(program.handle);
if (!Settings::values.renderer_debug) {
return program;
}
GLint link_status{};
glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
GLint log_length{};
glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return program;
}
std::string log(log_length, 0);
glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
if (link_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
return program;
}
static void LogShader(GLuint shader, std::string_view code = {}) { static void LogShader(GLuint shader, std::string_view code = {}) {
GLint shader_status{}; GLint shader_status{};
glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
@ -36,7 +63,7 @@ static void LogShader(GLuint shader, std::string_view code = {}) {
} }
} }
void AttachShader(GLenum stage, GLuint program, std::string_view code) { OGLProgram CreateProgram(std::string_view code, GLenum stage) {
OGLShader shader; OGLShader shader;
shader.handle = glCreateShader(stage); shader.handle = glCreateShader(stage);
@ -44,45 +71,23 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) {
const GLchar* const code_ptr = code.data(); const GLchar* const code_ptr = code.data();
glShaderSource(shader.handle, 1, &code_ptr, &length); glShaderSource(shader.handle, 1, &code_ptr, &length);
glCompileShader(shader.handle); glCompileShader(shader.handle);
glAttachShader(program, shader.handle);
if (Settings::values.renderer_debug) { if (Settings::values.renderer_debug) {
LogShader(shader.handle, code); LogShader(shader.handle, code);
} }
return LinkSeparableProgram(shader.handle);
} }
void AttachShader(GLenum stage, GLuint program, std::span<const u32> code) { OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
OGLShader shader; OGLShader shader;
shader.handle = glCreateShader(stage); shader.handle = glCreateShader(stage);
glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
static_cast<GLsizei>(code.size_bytes())); static_cast<GLsizei>(code.size_bytes()));
glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
glAttachShader(program, shader.handle);
if (Settings::values.renderer_debug) { if (Settings::values.renderer_debug) {
LogShader(shader.handle); LogShader(shader.handle);
} }
} return LinkSeparableProgram(shader.handle);
void LinkProgram(GLuint program) {
glLinkProgram(program);
if (!Settings::values.renderer_debug) {
return;
}
GLint link_status{};
glGetProgramiv(program, GL_LINK_STATUS, &link_status);
GLint log_length{};
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return;
}
std::string log(log_length, 0);
glGetProgramInfoLog(program, log_length, nullptr, log.data());
if (link_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
} }
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {

View file

@ -17,11 +17,9 @@
namespace OpenGL { namespace OpenGL {
void AttachShader(GLenum stage, GLuint program, std::string_view code); OGLProgram CreateProgram(std::string_view code, GLenum stage);
void AttachShader(GLenum stage, GLuint program, std::span<const u32> code); OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
void LinkProgram(GLuint program);
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);

View file

@ -251,10 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
void RendererOpenGL::InitOpenGLObjects() { void RendererOpenGL::InitOpenGLObjects() {
// Create shader programs // Create shader programs
present_program.handle = glCreateProgram(); present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT); present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG);
LinkProgram(present_program.handle);
// Generate presentation sampler // Generate presentation sampler
present_sampler.Create(); present_sampler.Create();
@ -340,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
// Set projection matrix // Set projection matrix
const std::array ortho_matrix = const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
program_manager.BindProgram(present_program.handle); program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
ortho_matrix.data());
const auto& texcoords = screen_info.display_texcoords; const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left; auto left = texcoords.left;

View file

@ -110,7 +110,8 @@ private:
// OpenGL object IDs // OpenGL object IDs
OGLSampler present_sampler; OGLSampler present_sampler;
OGLBuffer vertex_buffer; OGLBuffer vertex_buffer;
OGLProgram present_program; OGLProgram present_vertex;
OGLProgram present_fragment;
OGLFramebuffer screenshot_framebuffer; OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer // GPU address of the vertex buffer

View file

@ -42,12 +42,7 @@ using VideoCore::Surface::BytesPerBlock;
namespace { namespace {
OGLProgram MakeProgram(std::string_view source) { OGLProgram MakeProgram(std::string_view source) {
OGLProgram program; return CreateProgram(source, GL_COMPUTE_SHADER);
OGLShader shader;
program.handle = glCreateProgram();
AttachShader(GL_COMPUTE_SHADER, program.handle, source);
LinkProgram(program.handle);
return program;
} }
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
@ -84,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
}; };
program_manager.BindProgram(astc_decoder_program.handle); program_manager.BindComputeProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
@ -132,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@ -171,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@ -220,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented"); "Non-power of two images are not implemented");
program_manager.BindProgram(pitch_unswizzle_program.handle); program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0);
@ -248,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
static constexpr GLuint LOC_SRC_OFFSET = 0; static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1; static constexpr GLuint LOC_DST_OFFSET = 1;
program_manager.BindProgram(copy_bc4_program.handle); program_manager.BindComputeProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) { for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0); ASSERT(copy.src_subresource.base_layer == 0);
@ -284,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
break; break;
case 4: { case 4: {
// BGRA8 copy // BGRA8 copy
program_manager.BindProgram(copy_bgra_program.handle); program_manager.BindComputeProgram(copy_bgra_program.handle);
constexpr GLenum FORMAT = GL_RGBA8; constexpr GLenum FORMAT = GL_RGBA8;
for (const ImageCopy& copy : copies) { for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset); ASSERT(copy.src_offset == zero_offset);