From e3f4233cefff611e03a2031c6194a118d946a5d9 Mon Sep 17 00:00:00 2001 From: Subv Date: Sat, 25 Jul 2015 20:13:11 -0500 Subject: [PATCH] Initial implementation of fragment shader generation with caching. --- src/video_core/pica.h | 4 + .../renderer_opengl/gl_rasterizer.cpp | 373 +++++++----------- .../renderer_opengl/gl_rasterizer.h | 110 ++++-- .../renderer_opengl/gl_shader_util.cpp | 349 ++++++++++++++++ .../renderer_opengl/gl_shader_util.h | 6 + src/video_core/renderer_opengl/gl_shaders.h | 8 +- src/video_core/renderer_opengl/gl_state.h | 1 + 7 files changed, 579 insertions(+), 272 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index ff81b409db..18fdc8c855 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -317,6 +317,7 @@ struct Regs { }; union { + u32 source_raw; BitField< 0, 4, Source> color_source1; BitField< 4, 4, Source> color_source2; BitField< 8, 4, Source> color_source3; @@ -326,6 +327,7 @@ struct Regs { }; union { + u32 modifier_raw; BitField< 0, 4, ColorModifier> color_modifier1; BitField< 4, 4, ColorModifier> color_modifier2; BitField< 8, 4, ColorModifier> color_modifier3; @@ -335,6 +337,7 @@ struct Regs { }; union { + u32 op_raw; BitField< 0, 4, Operation> color_op; BitField<16, 4, Operation> alpha_op; }; @@ -348,6 +351,7 @@ struct Regs { }; union { + u32 scale_raw; BitField< 0, 2, u32> color_scale; BitField<16, 2, u32> alpha_scale; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a613fe1366..45329d561f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -8,6 +8,7 @@ #include #include "common/color.h" +#include "common/file_util.h" #include "common/math_util.h" #include "common/microprofile.h" #include "common/profiler.h" @@ -38,36 +39,6 @@ RasterizerOpenGL::RasterizerOpenGL() : last_fb_color_addr(0), last_fb_depth_addr RasterizerOpenGL::~RasterizerOpenGL() { } void RasterizerOpenGL::InitObjects() { - // Create the hardware shader program and get attrib/uniform locations - shader.Create(GLShaders::g_vertex_shader_hw, GLShaders::g_fragment_shader_hw); - attrib_position = glGetAttribLocation(shader.handle, "vert_position"); - attrib_color = glGetAttribLocation(shader.handle, "vert_color"); - attrib_texcoords = glGetAttribLocation(shader.handle, "vert_texcoords"); - - uniform_alphatest_enabled = glGetUniformLocation(shader.handle, "alphatest_enabled"); - uniform_alphatest_func = glGetUniformLocation(shader.handle, "alphatest_func"); - uniform_alphatest_ref = glGetUniformLocation(shader.handle, "alphatest_ref"); - - uniform_tex = glGetUniformLocation(shader.handle, "tex"); - - uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.handle, "tev_combiner_buffer_color"); - - const auto tev_stages = Pica::g_state.regs.GetTevStages(); - for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { - auto& uniform_tev_cfg = uniform_tev_cfgs[tev_stage_index]; - - std::string tev_ref_str = "tev_cfgs[" + std::to_string(tev_stage_index) + "]"; - uniform_tev_cfg.enabled = glGetUniformLocation(shader.handle, (tev_ref_str + ".enabled").c_str()); - uniform_tev_cfg.color_sources = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_sources").c_str()); - uniform_tev_cfg.alpha_sources = glGetUniformLocation(shader.handle, (tev_ref_str + ".alpha_sources").c_str()); - uniform_tev_cfg.color_modifiers = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_modifiers").c_str()); - uniform_tev_cfg.alpha_modifiers = glGetUniformLocation(shader.handle, (tev_ref_str + ".alpha_modifiers").c_str()); - uniform_tev_cfg.color_alpha_op = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_alpha_op").c_str()); - uniform_tev_cfg.color_alpha_multiplier = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_alpha_multiplier").c_str()); - uniform_tev_cfg.const_color = glGetUniformLocation(shader.handle, (tev_ref_str + ".const_color").c_str()); - uniform_tev_cfg.updates_combiner_buffer_color_alpha = glGetUniformLocation(shader.handle, (tev_ref_str + ".updates_combiner_buffer_color_alpha").c_str()); - } - // Create sampler objects for (size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -78,29 +49,25 @@ void RasterizerOpenGL::InitObjects() { vertex_buffer.Create(); vertex_array.Create(); - // Update OpenGL state state.draw.vertex_array = vertex_array.handle; state.draw.vertex_buffer = vertex_buffer.handle; - state.draw.shader_program = shader.handle; - state.Apply(); - // Set the texture samplers to correspond to different texture units - glUniform1i(uniform_tex, 0); - glUniform1i(uniform_tex + 1, 1); - glUniform1i(uniform_tex + 2, 2); - // Set vertex attributes - glVertexAttribPointer(attrib_position, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); - glVertexAttribPointer(attrib_color, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color)); - glVertexAttribPointer(attrib_texcoords, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); - glVertexAttribPointer(attrib_texcoords + 1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); - glVertexAttribPointer(attrib_texcoords + 2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); - glEnableVertexAttribArray(attrib_position); - glEnableVertexAttribArray(attrib_color); - glEnableVertexAttribArray(attrib_texcoords); - glEnableVertexAttribArray(attrib_texcoords + 1); - glEnableVertexAttribArray(attrib_texcoords + 2); + glVertexAttribPointer(ShaderUtil::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); + glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_POSITION); + + glVertexAttribPointer(ShaderUtil::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color)); + glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_COLOR); + + glVertexAttribPointer(ShaderUtil::ATTRIBUTE_TEXCOORDS, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); + glVertexAttribPointer(ShaderUtil::ATTRIBUTE_TEXCOORDS + 1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); + glVertexAttribPointer(ShaderUtil::ATTRIBUTE_TEXCOORDS + 2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); + glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_TEXCOORDS); + glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_TEXCOORDS + 1); + glEnableVertexAttribArray(ShaderUtil::ATTRIBUTE_TEXCOORDS + 2); + + RegenerateShaders(); // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation fb_color_texture.texture.Create(); @@ -156,55 +123,11 @@ void RasterizerOpenGL::Reset() { SyncBlendEnabled(); SyncBlendFuncs(); SyncBlendColor(); - SyncAlphaTest(); SyncLogicOp(); SyncStencilTest(); SyncDepthTest(); - // TEV stage 0 - SyncTevSources(0, regs.tev_stage0); - SyncTevModifiers(0, regs.tev_stage0); - SyncTevOps(0, regs.tev_stage0); - SyncTevColor(0, regs.tev_stage0); - SyncTevMultipliers(0, regs.tev_stage0); - - // TEV stage 1 - SyncTevSources(1, regs.tev_stage1); - SyncTevModifiers(1, regs.tev_stage1); - SyncTevOps(1, regs.tev_stage1); - SyncTevColor(1, regs.tev_stage1); - SyncTevMultipliers(1, regs.tev_stage1); - - // TEV stage 2 - SyncTevSources(2, regs.tev_stage2); - SyncTevModifiers(2, regs.tev_stage2); - SyncTevOps(2, regs.tev_stage2); - SyncTevColor(2, regs.tev_stage2); - SyncTevMultipliers(2, regs.tev_stage2); - - // TEV stage 3 - SyncTevSources(3, regs.tev_stage3); - SyncTevModifiers(3, regs.tev_stage3); - SyncTevOps(3, regs.tev_stage3); - SyncTevColor(3, regs.tev_stage3); - SyncTevMultipliers(3, regs.tev_stage3); - - // TEV stage 4 - SyncTevSources(4, regs.tev_stage4); - SyncTevModifiers(4, regs.tev_stage4); - SyncTevOps(4, regs.tev_stage4); - SyncTevColor(4, regs.tev_stage4); - SyncTevMultipliers(4, regs.tev_stage4); - - // TEV stage 5 - SyncTevSources(5, regs.tev_stage5); - SyncTevModifiers(5, regs.tev_stage5); - SyncTevOps(5, regs.tev_stage5); - SyncTevColor(5, regs.tev_stage5); - SyncTevMultipliers(5, regs.tev_stage5); - - SyncCombinerColor(); - SyncCombinerWriteFlags(); + RegenerateShaders(); res_cache.FullFlush(); } @@ -217,10 +140,88 @@ void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, vertex_batch.emplace_back(v2); } +namespace ShaderCache { +extern std::string GenerateFragmentShader(const ShaderCacheKey& config); +} + +void RasterizerOpenGL::RegenerateShaders() { + const auto& regs = Pica::g_state.regs; + + ShaderCacheKey config; + config.alpha_test_func = regs.output_merger.alpha_test.enable ? + regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; + config.tev_stages = regs.GetTevStages(); + for (auto& tev : config.tev_stages) { + tev.const_r = 0; + tev.const_g = 0; + tev.const_b = 0; + tev.const_a = 0; + } + config.combiner_buffer_input = + regs.tev_combiner_buffer_input.update_mask_rgb.Value() | + regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; + + auto cached_shader = shader_cache.find(config); + if (cached_shader != shader_cache.end()) { + current_shader = &cached_shader->second; + state.draw.shader_program = current_shader->shader.handle; + state.Apply(); + } else { + LOG_CRITICAL(Render_OpenGL, "Creating new shader: %08X", hash(config)); + + TEVShader shader; + + std::string fragShader = ShaderCache::GenerateFragmentShader(config); + shader.shader.Create(GLShaders::g_vertex_shader_hw, fragShader.c_str()); + + shader.uniform_alphatest_ref = glGetUniformLocation(shader.shader.handle, "alphatest_ref"); + shader.uniform_tex = glGetUniformLocation(shader.shader.handle, "tex"); + shader.uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.shader.handle, "tev_combiner_buffer_color"); + shader.uniform_tev_const_colors = glGetUniformLocation(shader.shader.handle, "const_color"); + + current_shader = &shader_cache.emplace(config, std::move(shader)).first->second; + + state.draw.shader_program = current_shader->shader.handle; + state.Apply(); + + // Set the texture samplers to correspond to different texture units + if (shader.uniform_tex != -1) { + glUniform1i(shader.uniform_tex, 0); + glUniform1i(shader.uniform_tex + 1, 1); + glUniform1i(shader.uniform_tex + 2, 2); + } + } + + + // Sync alpha reference + if (current_shader->uniform_alphatest_ref != -1) + glUniform1f(current_shader->uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f); + + // Sync combiner buffer color + if (current_shader->uniform_tev_combiner_buffer_color != -1) { + auto combiner_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.tev_combiner_buffer_color.raw); + glUniform4fv(current_shader->uniform_tev_combiner_buffer_color, 1, combiner_color.data()); + } + + // Sync TEV const colors + if (current_shader->uniform_tev_const_colors != -1) { + auto& tev_stages = Pica::g_state.regs.GetTevStages(); + for (int tev_index = 0; tev_index < tev_stages.size(); ++tev_index) { + auto const_color = PicaToGL::ColorRGBA8(tev_stages[tev_index].const_color); + glUniform4fv(current_shader->uniform_tev_const_colors + tev_index, 1, const_color.data()); + } + } +} + void RasterizerOpenGL::DrawTriangles() { SyncFramebuffer(); SyncDrawState(); + if (state.draw.shader_dirty) { + RegenerateShaders(); + state.draw.shader_dirty = false; + } + glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); @@ -272,6 +273,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Alpha test case PICA_REG_INDEX(output_merger.alpha_test): SyncAlphaTest(); + state.draw.shader_dirty = true; break; // Stencil test @@ -290,117 +292,57 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncLogicOp(); break; - // TEV stage 0 + // TEV stages case PICA_REG_INDEX(tev_stage0.color_source1): - SyncTevSources(0, regs.tev_stage0); - break; case PICA_REG_INDEX(tev_stage0.color_modifier1): - SyncTevModifiers(0, regs.tev_stage0); - break; case PICA_REG_INDEX(tev_stage0.color_op): - SyncTevOps(0, regs.tev_stage0); + case PICA_REG_INDEX(tev_stage0.color_scale): + case PICA_REG_INDEX(tev_stage1.color_source1): + case PICA_REG_INDEX(tev_stage1.color_modifier1): + case PICA_REG_INDEX(tev_stage1.color_op): + case PICA_REG_INDEX(tev_stage1.color_scale): + case PICA_REG_INDEX(tev_stage2.color_source1): + case PICA_REG_INDEX(tev_stage2.color_modifier1): + case PICA_REG_INDEX(tev_stage2.color_op): + case PICA_REG_INDEX(tev_stage2.color_scale): + case PICA_REG_INDEX(tev_stage3.color_source1): + case PICA_REG_INDEX(tev_stage3.color_modifier1): + case PICA_REG_INDEX(tev_stage3.color_op): + case PICA_REG_INDEX(tev_stage3.color_scale): + case PICA_REG_INDEX(tev_stage4.color_source1): + case PICA_REG_INDEX(tev_stage4.color_modifier1): + case PICA_REG_INDEX(tev_stage4.color_op): + case PICA_REG_INDEX(tev_stage4.color_scale): + case PICA_REG_INDEX(tev_stage5.color_source1): + case PICA_REG_INDEX(tev_stage5.color_modifier1): + case PICA_REG_INDEX(tev_stage5.color_op): + case PICA_REG_INDEX(tev_stage5.color_scale): + case PICA_REG_INDEX(tev_combiner_buffer_input): + state.draw.shader_dirty = true; break; case PICA_REG_INDEX(tev_stage0.const_r): - SyncTevColor(0, regs.tev_stage0); - break; - case PICA_REG_INDEX(tev_stage0.color_scale): - SyncTevMultipliers(0, regs.tev_stage0); - break; - - // TEV stage 1 - case PICA_REG_INDEX(tev_stage1.color_source1): - SyncTevSources(1, regs.tev_stage1); - break; - case PICA_REG_INDEX(tev_stage1.color_modifier1): - SyncTevModifiers(1, regs.tev_stage1); - break; - case PICA_REG_INDEX(tev_stage1.color_op): - SyncTevOps(1, regs.tev_stage1); + SyncTevConstColor(0, regs.tev_stage0); break; case PICA_REG_INDEX(tev_stage1.const_r): - SyncTevColor(1, regs.tev_stage1); - break; - case PICA_REG_INDEX(tev_stage1.color_scale): - SyncTevMultipliers(1, regs.tev_stage1); - break; - - // TEV stage 2 - case PICA_REG_INDEX(tev_stage2.color_source1): - SyncTevSources(2, regs.tev_stage2); - break; - case PICA_REG_INDEX(tev_stage2.color_modifier1): - SyncTevModifiers(2, regs.tev_stage2); - break; - case PICA_REG_INDEX(tev_stage2.color_op): - SyncTevOps(2, regs.tev_stage2); + SyncTevConstColor(1, regs.tev_stage0); break; case PICA_REG_INDEX(tev_stage2.const_r): - SyncTevColor(2, regs.tev_stage2); - break; - case PICA_REG_INDEX(tev_stage2.color_scale): - SyncTevMultipliers(2, regs.tev_stage2); - break; - - // TEV stage 3 - case PICA_REG_INDEX(tev_stage3.color_source1): - SyncTevSources(3, regs.tev_stage3); - break; - case PICA_REG_INDEX(tev_stage3.color_modifier1): - SyncTevModifiers(3, regs.tev_stage3); - break; - case PICA_REG_INDEX(tev_stage3.color_op): - SyncTevOps(3, regs.tev_stage3); + SyncTevConstColor(2, regs.tev_stage0); break; case PICA_REG_INDEX(tev_stage3.const_r): - SyncTevColor(3, regs.tev_stage3); - break; - case PICA_REG_INDEX(tev_stage3.color_scale): - SyncTevMultipliers(3, regs.tev_stage3); - break; - - // TEV stage 4 - case PICA_REG_INDEX(tev_stage4.color_source1): - SyncTevSources(4, regs.tev_stage4); - break; - case PICA_REG_INDEX(tev_stage4.color_modifier1): - SyncTevModifiers(4, regs.tev_stage4); - break; - case PICA_REG_INDEX(tev_stage4.color_op): - SyncTevOps(4, regs.tev_stage4); + SyncTevConstColor(3, regs.tev_stage0); break; case PICA_REG_INDEX(tev_stage4.const_r): - SyncTevColor(4, regs.tev_stage4); - break; - case PICA_REG_INDEX(tev_stage4.color_scale): - SyncTevMultipliers(4, regs.tev_stage4); - break; - - // TEV stage 5 - case PICA_REG_INDEX(tev_stage5.color_source1): - SyncTevSources(5, regs.tev_stage5); - break; - case PICA_REG_INDEX(tev_stage5.color_modifier1): - SyncTevModifiers(5, regs.tev_stage5); - break; - case PICA_REG_INDEX(tev_stage5.color_op): - SyncTevOps(5, regs.tev_stage5); + SyncTevConstColor(4, regs.tev_stage0); break; case PICA_REG_INDEX(tev_stage5.const_r): - SyncTevColor(5, regs.tev_stage5); - break; - case PICA_REG_INDEX(tev_stage5.color_scale): - SyncTevMultipliers(5, regs.tev_stage5); + SyncTevConstColor(5, regs.tev_stage0); break; // TEV combiner buffer color case PICA_REG_INDEX(tev_combiner_buffer_color): SyncCombinerColor(); break; - - // TEV combiner buffer write flags - case PICA_REG_INDEX(tev_combiner_buffer_input): - SyncCombinerWriteFlags(); - break; } } @@ -712,9 +654,8 @@ void RasterizerOpenGL::SyncBlendColor() { void RasterizerOpenGL::SyncAlphaTest() { const auto& regs = Pica::g_state.regs; - glUniform1i(uniform_alphatest_enabled, regs.output_merger.alpha_test.enable); - glUniform1i(uniform_alphatest_func, (GLint)regs.output_merger.alpha_test.func.Value()); - glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f); + if (current_shader->uniform_alphatest_ref != -1) + glUniform1f(current_shader->uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f); } void RasterizerOpenGL::SyncLogicOp() { @@ -744,55 +685,17 @@ void RasterizerOpenGL::SyncDepthTest() { state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; } -void RasterizerOpenGL::SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { - GLint color_srcs[3] = { (GLint)config.color_source1.Value(), - (GLint)config.color_source2.Value(), - (GLint)config.color_source3.Value() }; - GLint alpha_srcs[3] = { (GLint)config.alpha_source1.Value(), - (GLint)config.alpha_source2.Value(), - (GLint)config.alpha_source3.Value() }; - - glUniform3iv(uniform_tev_cfgs[stage_index].color_sources, 1, color_srcs); - glUniform3iv(uniform_tev_cfgs[stage_index].alpha_sources, 1, alpha_srcs); -} - -void RasterizerOpenGL::SyncTevModifiers(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { - GLint color_mods[3] = { (GLint)config.color_modifier1.Value(), - (GLint)config.color_modifier2.Value(), - (GLint)config.color_modifier3.Value() }; - GLint alpha_mods[3] = { (GLint)config.alpha_modifier1.Value(), - (GLint)config.alpha_modifier2.Value(), - (GLint)config.alpha_modifier3.Value() }; - - glUniform3iv(uniform_tev_cfgs[stage_index].color_modifiers, 1, color_mods); - glUniform3iv(uniform_tev_cfgs[stage_index].alpha_modifiers, 1, alpha_mods); -} - -void RasterizerOpenGL::SyncTevOps(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { - glUniform2i(uniform_tev_cfgs[stage_index].color_alpha_op, (GLint)config.color_op.Value(), (GLint)config.alpha_op.Value()); -} - -void RasterizerOpenGL::SyncTevColor(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { - auto const_color = PicaToGL::ColorRGBA8(config.const_color); - glUniform4fv(uniform_tev_cfgs[stage_index].const_color, 1, const_color.data()); -} - -void RasterizerOpenGL::SyncTevMultipliers(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { - glUniform2i(uniform_tev_cfgs[stage_index].color_alpha_multiplier, config.GetColorMultiplier(), config.GetAlphaMultiplier()); -} - void RasterizerOpenGL::SyncCombinerColor() { - auto combiner_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.tev_combiner_buffer_color.raw); - glUniform4fv(uniform_tev_combiner_buffer_color, 1, combiner_color.data()); + if (current_shader->uniform_tev_combiner_buffer_color != -1) { + auto combiner_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.tev_combiner_buffer_color.raw); + glUniform4fv(current_shader->uniform_tev_combiner_buffer_color, 1, combiner_color.data()); + } } -void RasterizerOpenGL::SyncCombinerWriteFlags() { - const auto& regs = Pica::g_state.regs; - const auto tev_stages = regs.GetTevStages(); - for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { - glUniform2i(uniform_tev_cfgs[tev_stage_index].updates_combiner_buffer_color_alpha, - regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index), - regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)); +void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevStageConfig& tev_stage) { + if (current_shader->uniform_tev_const_colors != -1) { + auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); + glUniform4fv(current_shader->uniform_tev_const_colors + stage_index, 1, const_color.data()); } } @@ -824,12 +727,6 @@ void RasterizerOpenGL::SyncDrawState() { } } - // Skip processing TEV stages that simply pass the previous stage results through - const auto tev_stages = regs.GetTevStages(); - for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { - glUniform1i(uniform_tev_cfgs[tev_stage_index].enabled, !IsPassThroughTevStage(tev_stages[tev_stage_index])); - } - state.Apply(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1fe3078468..19e8db69a1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/common_types.h" @@ -13,6 +14,60 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/shader/shader_interpreter.h" +template +inline size_t hash(const T& o) { + return std::hash()(o); +} + +template +inline size_t combine_hash(const T& o) { + return hash(o); +} + +template +inline size_t combine_hash(const T& o, const Args&... args) { + return hash(o) * 3 + combine_hash(args...); +} + +struct ShaderCacheKey { + using Regs = Pica::Regs; + + bool operator ==(const ShaderCacheKey& o) const { + return hash(*this) == hash(o); + }; + + Regs::CompareFunc alpha_test_func; + std::array tev_stages; + u8 combiner_buffer_input; + + bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); + } + + bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); + } +}; + +namespace std { + +template<> struct hash<::Pica::Regs::TevStageConfig> { + size_t operator()(const ::Pica::Regs::TevStageConfig& o) { + return ::combine_hash( + ::hash(o.source_raw), ::hash(o.modifier_raw), + ::hash(o.op_raw), ::hash(o.scale_raw)); + } +}; + +template<> struct hash<::ShaderCacheKey> { + size_t operator()(const ::ShaderCacheKey& o) const { + return ::combine_hash(o.alpha_test_func, o.combiner_buffer_input, + o.tev_stages[0], o.tev_stages[1], o.tev_stages[2], + o.tev_stages[3], o.tev_stages[4], o.tev_stages[5]); + } +}; +} + class RasterizerOpenGL : public HWRasterizer { public: @@ -33,6 +88,8 @@ public: /// Draw the current batch of triangles void DrawTriangles() override; + void RegenerateShaders(); + /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer void CommitFramebuffer() override; @@ -59,6 +116,22 @@ private: GLuint updates_combiner_buffer_color_alpha; }; + struct TEVShader { + OGLShader shader; + + // Hardware fragment shader + GLuint uniform_alphatest_ref; + GLuint uniform_tex; + GLuint uniform_tev_combiner_buffer_color; + GLuint uniform_tev_const_colors; + + TEVShader() = default; + TEVShader(TEVShader&& o) : shader(std::move(o.shader)), + uniform_alphatest_ref(o.uniform_alphatest_ref), uniform_tex(o.uniform_tex), + uniform_tev_combiner_buffer_color(o.uniform_tev_combiner_buffer_color), + uniform_tev_const_colors(o.uniform_tev_const_colors) {} + }; + /// Structure used for storing information about color textures struct TextureInfo { OGLTexture texture; @@ -156,27 +229,12 @@ private: /// Syncs the depth test states to match the PICA register void SyncDepthTest(); - /// Syncs the specified TEV stage's color and alpha sources to match the PICA register - void SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config); - - /// Syncs the specified TEV stage's color and alpha modifiers to match the PICA register - void SyncTevModifiers(unsigned stage_index, const Pica::Regs::TevStageConfig& config); - - /// Syncs the specified TEV stage's color and alpha combiner operations to match the PICA register - void SyncTevOps(unsigned stage_index, const Pica::Regs::TevStageConfig& config); - - /// Syncs the specified TEV stage's constant color to match the PICA register - void SyncTevColor(unsigned stage_index, const Pica::Regs::TevStageConfig& config); - - /// Syncs the specified TEV stage's color and alpha multipliers to match the PICA register - void SyncTevMultipliers(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + /// Syncs the TEV constant color to match the PICA register + void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage); /// Syncs the TEV combiner color buffer to match the PICA register void SyncCombinerColor(); - /// Syncs the TEV combiner write flags to match the PICA register - void SyncCombinerWriteFlags(); - /// Syncs the remaining OpenGL drawing state to match the current PICA state void SyncDrawState(); @@ -213,21 +271,11 @@ private: std::array texture_samplers; TextureInfo fb_color_texture; DepthTextureInfo fb_depth_texture; - OGLShader shader; + + std::unordered_map shader_cache; + TEVShader* current_shader = nullptr; + OGLVertexArray vertex_array; OGLBuffer vertex_buffer; OGLFramebuffer framebuffer; - - // Hardware vertex shader - GLuint attrib_position; - GLuint attrib_color; - GLuint attrib_texcoords; - - // Hardware fragment shader - GLuint uniform_alphatest_enabled; - GLuint uniform_alphatest_func; - GLuint uniform_alphatest_ref; - GLuint uniform_tex; - GLuint uniform_tev_combiner_buffer_color; - TEVConfigUniforms uniform_tev_cfgs[6]; }; diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4cf246c061..ee32f6a311 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -2,6 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. + +#include "gl_shader_util.h" +#include "gl_rasterizer.h" +#include "common/logging/log.h" + +#include "video_core/pica.h" + #include #include @@ -65,6 +72,13 @@ GLuint LoadShaders(const char* vertex_shader, const char* fragment_shader) { GLuint program_id = glCreateProgram(); glAttachShader(program_id, vertex_shader_id); glAttachShader(program_id, fragment_shader_id); + + glBindAttribLocation(program_id, Attributes::ATTRIBUTE_POSITION, "vert_position"); + glBindAttribLocation(program_id, Attributes::ATTRIBUTE_COLOR, "vert_color"); + glBindAttribLocation(program_id, Attributes::ATTRIBUTE_TEXCOORDS + 0, "vert_texcoords0"); + glBindAttribLocation(program_id, Attributes::ATTRIBUTE_TEXCOORDS + 1, "vert_texcoords1"); + glBindAttribLocation(program_id, Attributes::ATTRIBUTE_TEXCOORDS + 2, "vert_texcoords2"); + glLinkProgram(program_id); // Check the program @@ -88,3 +102,338 @@ GLuint LoadShaders(const char* vertex_shader, const char* fragment_shader) { } } + +namespace ShaderCache +{ + +static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { + return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace && + stage.alpha_op == Pica::Regs::TevStageConfig::Operation::Replace && + stage.color_source1 == Pica::Regs::TevStageConfig::Source::Previous && + stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous && + stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor && + stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha && + stage.GetColorMultiplier() == 1 && + stage.GetAlphaMultiplier() == 1); +} + +void AppendSource(std::string& shader, Pica::Regs::TevStageConfig::Source source, const std::string& index_name) { + using Source = Pica::Regs::TevStageConfig::Source; + switch (source) { + case Source::PrimaryColor: + shader += "o[2]"; + break; + case Source::PrimaryFragmentColor: + // HACK: Until we implement fragment lighting, use primary_color + shader += "o[2]"; + break; + case Source::SecondaryFragmentColor: + // HACK: Until we implement fragment lighting, use zero + shader += "vec4(0.0, 0.0, 0.0, 0.0)"; + break; + case Source::Texture0: + shader += "texture(tex[0], o[3].xy)"; + break; + case Source::Texture1: + shader += "texture(tex[1], o[3].zw)"; + break; + case Source::Texture2: // TODO: Unverified + shader += "texture(tex[2], o[5].zw)"; + break; + case Source::PreviousBuffer: + shader += "g_combiner_buffer"; + break; + case Source::Constant: + shader += "const_color[" + index_name + "]"; + break; + case Source::Previous: + shader += "g_last_tex_env_out"; + break; + default: + shader += "vec4(0.0)"; + LOG_CRITICAL(Render_OpenGL, "Unknown source op %u", source); + break; + } +} + +void AppendColorModifier(std::string& shader, Pica::Regs::TevStageConfig::ColorModifier modifier, Pica::Regs::TevStageConfig::Source source, const std::string& index_name) { + using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; + switch (modifier) { + case ColorModifier::SourceColor: + AppendSource(shader, source, index_name); + shader += ".rgb"; + break; + case ColorModifier::OneMinusSourceColor: + shader += "vec3(1.0) - "; + AppendSource(shader, source, index_name); + shader += ".rgb"; + break; + case ColorModifier::SourceAlpha: + AppendSource(shader, source, index_name); + shader += ".aaa"; + break; + case ColorModifier::OneMinusSourceAlpha: + shader += "vec3(1.0) - "; + AppendSource(shader, source, index_name); + shader += ".aaa"; + break; + case ColorModifier::SourceRed: + AppendSource(shader, source, index_name); + shader += ".rrr"; + break; + case ColorModifier::OneMinusSourceRed: + shader += "vec3(1.0) - "; + AppendSource(shader, source, index_name); + shader += ".rrr"; + break; + case ColorModifier::SourceGreen: + AppendSource(shader, source, index_name); + shader += ".ggg"; + break; + case ColorModifier::OneMinusSourceGreen: + shader += "vec3(1.0) - "; + AppendSource(shader, source, index_name); + shader += ".ggg"; + break; + case ColorModifier::SourceBlue: + AppendSource(shader, source, index_name); + shader += ".bbb"; + break; + case ColorModifier::OneMinusSourceBlue: + shader += "vec3(1.0) - "; + AppendSource(shader, source, index_name); + shader += ".bbb"; + break; + default: + shader += "vec3(0.0)"; + LOG_CRITICAL(Render_OpenGL, "Unknown color modifier op %u", modifier); + break; + } +} + +void AppendAlphaModifier(std::string& shader, Pica::Regs::TevStageConfig::AlphaModifier modifier, Pica::Regs::TevStageConfig::Source source, const std::string& index_name) { + using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; + switch (modifier) { + case AlphaModifier::SourceAlpha: + AppendSource(shader, source, index_name); + shader += ".a"; + break; + case AlphaModifier::OneMinusSourceAlpha: + shader += "1.0 - "; + AppendSource(shader, source, index_name); + shader += ".a"; + break; + case AlphaModifier::SourceRed: + AppendSource(shader, source, index_name); + shader += ".r"; + break; + case AlphaModifier::OneMinusSourceRed: + shader += "1.0 - "; + AppendSource(shader, source, index_name); + shader += ".r"; + break; + case AlphaModifier::SourceGreen: + AppendSource(shader, source, index_name); + shader += ".g"; + break; + case AlphaModifier::OneMinusSourceGreen: + shader += "1.0 - "; + AppendSource(shader, source, index_name); + shader += ".g"; + break; + case AlphaModifier::SourceBlue: + AppendSource(shader, source, index_name); + shader += ".b"; + break; + case AlphaModifier::OneMinusSourceBlue: + shader += "1.0 - "; + AppendSource(shader, source, index_name); + shader += ".b"; + break; + default: + shader += "vec3(0.0)"; + LOG_CRITICAL(Render_OpenGL, "Unknown alpha modifier op %u", modifier); + break; + } +} + +void AppendColorCombiner(std::string& shader, Pica::Regs::TevStageConfig::Operation operation, const std::string& variable_name) { + using Operation = Pica::Regs::TevStageConfig::Operation; + + switch (operation) { + case Operation::Replace: + shader += variable_name + "[0]"; + break; + case Operation::Modulate: + shader += variable_name + "[0] * " + variable_name + "[1]"; + break; + case Operation::Add: + shader += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0)"; + break; + case Operation::AddSigned: + shader += "clamp(" + variable_name + "[0] + " + variable_name + "[1] - vec3(0.5), 0.0, 1.0)"; + break; + case Operation::Lerp: + shader += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (vec3(1.0) - " + variable_name + "[2])"; + break; + case Operation::Subtract: + shader += "max(" + variable_name + "[0] - " + variable_name + "[1], 0.0)"; + break; + case Operation::MultiplyThenAdd: + shader += "min(" + variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2], 1.0)"; + break; + case Operation::AddThenMultiply: + shader += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + "[2]"; + break; + default: + shader += "0.0"; + LOG_CRITICAL(Render_OpenGL, "Unknown color comb op %u", operation); + break; + } +} + +void AppendAlphaCombiner(std::string& shader, Pica::Regs::TevStageConfig::Operation operation, const std::string& variable_name) { + using Operation = Pica::Regs::TevStageConfig::Operation; + switch (operation) { + case Operation::Replace: + shader += variable_name + "[0]"; + break; + case Operation::Modulate: + shader += variable_name + "[0] * " + variable_name + "[1]"; + break; + case Operation::Add: + shader += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0)"; + break; + case Operation::AddSigned: + shader += "clamp(" + variable_name + "[0] + " + variable_name + "[1] - 0.5, 0.0, 1.0)"; + break; + case Operation::Lerp: + shader += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (1.0 - " + variable_name + "[2])"; + break; + case Operation::Subtract: + shader += "max(" + variable_name + "[0] - " + variable_name + "[1], 0.0)"; + break; + case Operation::MultiplyThenAdd: + shader += "min(" + variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2], 1.0)"; + break; + case Operation::AddThenMultiply: + shader += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + "[2]"; + break; + default: + shader += "0.0"; + LOG_CRITICAL(Render_OpenGL, "Unknown alpha combiner op %u", operation); + break; + } +} + +void AppendAlphaTestCondition(std::string& shader, Pica::Regs::CompareFunc func) { + using CompareFunc = Pica::Regs::CompareFunc; + switch (func) { + case CompareFunc::Never: + shader += "true"; + break; + case CompareFunc::Always: + shader += "false"; + break; + case CompareFunc::Equal: + shader += "g_last_tex_env_out.a != alphatest_ref"; + break; + case CompareFunc::NotEqual: + shader += "g_last_tex_env_out.a == alphatest_ref"; + break; + case CompareFunc::LessThan: + shader += "g_last_tex_env_out.a >= alphatest_ref"; + break; + case CompareFunc::LessThanOrEqual: + shader += "g_last_tex_env_out.a > alphatest_ref"; + break; + case CompareFunc::GreaterThan: + shader += "g_last_tex_env_out.a <= alphatest_ref"; + break; + case CompareFunc::GreaterThanOrEqual: + shader += "g_last_tex_env_out.a < alphatest_ref"; + break; + default: + shader += "false"; + LOG_CRITICAL(Render_OpenGL, "Unknown alpha test condition %u", func); + break; + } +} + +std::string GenerateFragmentShader(const ShaderCacheKey& config) { + std::string shader = R"( +#version 150 core + +#define NUM_VTX_ATTR 7 +#define NUM_TEV_STAGES 6 + +in vec4 o[NUM_VTX_ATTR]; +out vec4 color; + +uniform float alphatest_ref; +uniform vec4 const_color[NUM_TEV_STAGES]; +uniform sampler2D tex[3]; + +uniform vec4 tev_combiner_buffer_color; + +void main(void) { + vec4 g_combiner_buffer = tev_combiner_buffer_color; + vec4 g_last_tex_env_out = vec4(0.0, 0.0, 0.0, 0.0); +)"; + + // Do not do any sort of processing if it's obvious we're not going to pass the alpha test + if (config.alpha_test_func == Pica::Regs::CompareFunc::Never) { + shader += "discard;"; + return shader; + } + + auto& tev_stages = config.tev_stages; + for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { + auto& tev_stage = tev_stages[tev_stage_index]; + if (!IsPassThroughTevStage(tev_stage)) { + std::string index_name = std::to_string(tev_stage_index); + + shader += "vec3 color_results_" + index_name + "[3] = vec3[3]("; + AppendColorModifier(shader, tev_stage.color_modifier1, tev_stage.color_source1, index_name); + shader += ", "; + AppendColorModifier(shader, tev_stage.color_modifier2, tev_stage.color_source2, index_name); + shader += ", "; + AppendColorModifier(shader, tev_stage.color_modifier3, tev_stage.color_source3, index_name); + shader += ");\n"; + + shader += "vec3 color_output_" + index_name + " = "; + AppendColorCombiner(shader, tev_stage.color_op, "color_results_" + index_name); + shader += ";\n"; + + shader += "float alpha_results_" + index_name + "[3] = float[3]("; + AppendAlphaModifier(shader, tev_stage.alpha_modifier1, tev_stage.alpha_source1, index_name); + shader += ", "; + AppendAlphaModifier(shader, tev_stage.alpha_modifier2, tev_stage.alpha_source2, index_name); + shader += ", "; + AppendAlphaModifier(shader, tev_stage.alpha_modifier3, tev_stage.alpha_source3, index_name); + shader += ");\n"; + + shader += "float alpha_output_" + index_name + " = "; + AppendAlphaCombiner(shader, tev_stage.alpha_op, "alpha_results_" + index_name); + shader += ";\n"; + + shader += "g_last_tex_env_out = vec4(min(color_output_" + index_name + " * " + std::to_string(tev_stage.GetColorMultiplier()) + ".0, 1.0), min(alpha_output_" + index_name + " * " + std::to_string(tev_stage.GetAlphaMultiplier()) + ".0, 1.0));\n"; + } + + if (config.TevStageUpdatesCombinerBufferColor(tev_stage_index)) + shader += "g_combiner_buffer.rgb = g_last_tex_env_out.rgb;\n"; + + if (config.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) + shader += "g_combiner_buffer.a = g_last_tex_env_out.a;\n"; + } + + if (config.alpha_test_func != Pica::Regs::CompareFunc::Always) { + shader += "if ("; + AppendAlphaTestCondition(shader, config.alpha_test_func); + shader += ") {\n discard;\n }\n"; + } + + shader += "color = g_last_tex_env_out;\n}"; + return shader; +} +} diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index c9d7cc380c..ca62c83ba1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -8,6 +8,12 @@ namespace ShaderUtil { +enum Attributes { + ATTRIBUTE_POSITION = 0, + ATTRIBUTE_COLOR = 1, + ATTRIBUTE_TEXCOORDS = 2, +}; + GLuint LoadShaders(const char* vertex_file_path, const char* fragment_file_path); } diff --git a/src/video_core/renderer_opengl/gl_shaders.h b/src/video_core/renderer_opengl/gl_shaders.h index a8cb2f5950..2ba2c6b0fe 100644 --- a/src/video_core/renderer_opengl/gl_shaders.h +++ b/src/video_core/renderer_opengl/gl_shaders.h @@ -49,14 +49,16 @@ const char g_vertex_shader_hw[] = R"( in vec4 vert_position; in vec4 vert_color; -in vec2 vert_texcoords[3]; +in vec2 vert_texcoords0; +in vec2 vert_texcoords1; +in vec2 vert_texcoords2; out vec4 o[NUM_VTX_ATTR]; void main() { o[2] = vert_color; - o[3] = vec4(vert_texcoords[0].xy, vert_texcoords[1].xy); - o[5] = vec4(0.0, 0.0, vert_texcoords[2].xy); + o[3] = vec4(vert_texcoords0.xy, vert_texcoords1.xy); + o[5] = vec4(0.0, 0.0, vert_texcoords2.xy); gl_Position = vec4(vert_position.x, -vert_position.y, -vert_position.z, vert_position.w); } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 6ecbedbb45..668b042593 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -65,6 +65,7 @@ public: GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING GLuint shader_program; // GL_CURRENT_PROGRAM + bool shader_dirty; } draw; OpenGLState();