From b624a9520546410445c1320fac163759c9be6750 Mon Sep 17 00:00:00 2001 From: wwylele <wwylele@gmail.com> Date: Wed, 19 Apr 2017 23:48:10 +0300 Subject: [PATCH 1/2] rasterizer: implement combiner operation 7 (Dot3_RGBA) --- src/video_core/regs_texturing.h | 2 +- .../renderer_opengl/gl_shader_gen.cpp | 29 ++++++++++++------- src/video_core/swrasterizer/rasterizer.cpp | 23 ++++++++++----- src/video_core/swrasterizer/texturing.cpp | 3 +- 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h index be8bc68266..0b62da1457 100644 --- a/src/video_core/regs_texturing.h +++ b/src/video_core/regs_texturing.h @@ -199,7 +199,7 @@ struct TexturingRegs { Lerp = 4, Subtract = 5, Dot3_RGB = 6, - + Dot3_RGBA = 7, MultiplyThenAdd = 8, AddThenMultiply = 9, }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7abdeba052..8be4171fcf 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -230,6 +230,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper variable_name + "[2]"; break; case Operation::Dot3_RGB: + case Operation::Dot3_RGBA: out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)"; break; @@ -329,17 +330,25 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi AppendColorCombiner(out, stage.color_op, "color_results_" + index_name); out += ";\n"; - out += "float alpha_results_" + index_name + "[3] = float[3]("; - AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name); - out += ", "; - AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name); - out += ", "; - AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name); - out += ");\n"; + if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { + // result of Dot3_RGBA operation is also placed to the alpha component + out += "float alpha_output_" + index_name + " = color_output_" + index_name + "[0];\n"; + } else { + out += "float alpha_results_" + index_name + "[3] = float[3]("; + AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, + index_name); + out += ", "; + AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, + index_name); + out += ", "; + AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, + index_name); + out += ");\n"; - out += "float alpha_output_" + index_name + " = "; - AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name); - out += ";\n"; + out += "float alpha_output_" + index_name + " = "; + AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name); + out += ";\n"; + } out += "last_tex_env_out = vec4(" "clamp(color_output_" + diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 7557fcb892..cb1b90a817 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -403,13 +403,22 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve }; auto color_output = ColorCombine(tev_stage.color_op, color_result); - // alpha combiner - std::array<u8, 3> alpha_result = {{ - GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), - GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), - GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)), - }}; - auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); + u8 alpha_output; + if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { + // result of Dot3_RGBA operation is also placed to the alpha component + alpha_output = color_output.x; + } else { + // alpha combiner + std::array<u8, 3> alpha_result = {{ + GetAlphaModifier(tev_stage.alpha_modifier1, + GetSource(tev_stage.alpha_source1)), + GetAlphaModifier(tev_stage.alpha_modifier2, + GetSource(tev_stage.alpha_source2)), + GetAlphaModifier(tev_stage.alpha_modifier3, + GetSource(tev_stage.alpha_source3)), + }}; + alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); + } combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp index eb18e4ba48..aeb6aeb8cb 100644 --- a/src/video_core/swrasterizer/texturing.cpp +++ b/src/video_core/swrasterizer/texturing.cpp @@ -169,7 +169,8 @@ Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> i result = (result * input[2].Cast<int>()) / 255; return result.Cast<u8>(); } - case Operation::Dot3_RGB: { + case Operation::Dot3_RGB: + case Operation::Dot3_RGBA: { // Not fully accurate. Worst case scenario seems to yield a +/-3 error. Some HW results // indicate that the per-component computation can't have a higher precision than 1/256, // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give From 2c2e872b318dee94057461a24c5915e8e0ebc844 Mon Sep 17 00:00:00 2001 From: wwylele <wwylele@gmail.com> Date: Thu, 20 Apr 2017 22:56:07 +0300 Subject: [PATCH 2/2] gl_shader_gen: remove TODO about Lerp behaviour verification. The implementation is verified against hardware --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8be4171fcf..193cb13bd7 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -214,8 +214,6 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)"; break; case Operation::Lerp: - // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use - // builtin lerp out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (vec3(1.0) - " + variable_name + "[2])"; break;