From b624a9520546410445c1320fac163759c9be6750 Mon Sep 17 00:00:00 2001
From: wwylele <wwylele@gmail.com>
Date: Wed, 19 Apr 2017 23:48:10 +0300
Subject: [PATCH 1/2] rasterizer: implement combiner operation 7 (Dot3_RGBA)

---
 src/video_core/regs_texturing.h               |  2 +-
 .../renderer_opengl/gl_shader_gen.cpp         | 29 ++++++++++++-------
 src/video_core/swrasterizer/rasterizer.cpp    | 23 ++++++++++-----
 src/video_core/swrasterizer/texturing.cpp     |  3 +-
 4 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h
index be8bc68266..0b62da1457 100644
--- a/src/video_core/regs_texturing.h
+++ b/src/video_core/regs_texturing.h
@@ -199,7 +199,7 @@ struct TexturingRegs {
             Lerp = 4,
             Subtract = 5,
             Dot3_RGB = 6,
-
+            Dot3_RGBA = 7,
             MultiplyThenAdd = 8,
             AddThenMultiply = 9,
         };
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 7abdeba052..8be4171fcf 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -230,6 +230,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
                variable_name + "[2]";
         break;
     case Operation::Dot3_RGB:
+    case Operation::Dot3_RGBA:
         out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name +
                "[1] - vec3(0.5)) * 4.0)";
         break;
@@ -329,17 +330,25 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
         AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);
         out += ";\n";
 
-        out += "float alpha_results_" + index_name + "[3] = float[3](";
-        AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name);
-        out += ", ";
-        AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name);
-        out += ", ";
-        AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name);
-        out += ");\n";
+        if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
+            // result of Dot3_RGBA operation is also placed to the alpha component
+            out += "float alpha_output_" + index_name + " = color_output_" + index_name + "[0];\n";
+        } else {
+            out += "float alpha_results_" + index_name + "[3] = float[3](";
+            AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1,
+                                index_name);
+            out += ", ";
+            AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2,
+                                index_name);
+            out += ", ";
+            AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3,
+                                index_name);
+            out += ");\n";
 
-        out += "float alpha_output_" + index_name + " = ";
-        AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
-        out += ";\n";
+            out += "float alpha_output_" + index_name + " = ";
+            AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
+            out += ";\n";
+        }
 
         out += "last_tex_env_out = vec4("
                "clamp(color_output_" +
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index 7557fcb892..cb1b90a817 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -403,13 +403,22 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                 };
                 auto color_output = ColorCombine(tev_stage.color_op, color_result);
 
-                // alpha combiner
-                std::array<u8, 3> alpha_result = {{
-                    GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)),
-                    GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)),
-                    GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)),
-                }};
-                auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
+                u8 alpha_output;
+                if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
+                    // result of Dot3_RGBA operation is also placed to the alpha component
+                    alpha_output = color_output.x;
+                } else {
+                    // alpha combiner
+                    std::array<u8, 3> alpha_result = {{
+                        GetAlphaModifier(tev_stage.alpha_modifier1,
+                                         GetSource(tev_stage.alpha_source1)),
+                        GetAlphaModifier(tev_stage.alpha_modifier2,
+                                         GetSource(tev_stage.alpha_source2)),
+                        GetAlphaModifier(tev_stage.alpha_modifier3,
+                                         GetSource(tev_stage.alpha_source3)),
+                    }};
+                    alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
+                }
 
                 combiner_output[0] =
                     std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());
diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp
index eb18e4ba48..aeb6aeb8cb 100644
--- a/src/video_core/swrasterizer/texturing.cpp
+++ b/src/video_core/swrasterizer/texturing.cpp
@@ -169,7 +169,8 @@ Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> i
         result = (result * input[2].Cast<int>()) / 255;
         return result.Cast<u8>();
     }
-    case Operation::Dot3_RGB: {
+    case Operation::Dot3_RGB:
+    case Operation::Dot3_RGBA: {
         // Not fully accurate.  Worst case scenario seems to yield a +/-3 error.  Some HW results
         // indicate that the per-component computation can't have a higher precision than 1/256,
         // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give

From 2c2e872b318dee94057461a24c5915e8e0ebc844 Mon Sep 17 00:00:00 2001
From: wwylele <wwylele@gmail.com>
Date: Thu, 20 Apr 2017 22:56:07 +0300
Subject: [PATCH 2/2] gl_shader_gen: remove TODO about Lerp behaviour
 verification. The implementation is verified against hardware

---
 src/video_core/renderer_opengl/gl_shader_gen.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 8be4171fcf..193cb13bd7 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -214,8 +214,6 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
         out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)";
         break;
     case Operation::Lerp:
-        // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use
-        // builtin lerp
         out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name +
                "[1] * (vec3(1.0) - " + variable_name + "[2])";
         break;