From c900c092e329d2e78b9663c1c3fe401da7faaea5 Mon Sep 17 00:00:00 2001
From: Jannik Vogel <email@jannikvogel.de>
Date: Mon, 23 May 2016 16:03:11 +0200
Subject: [PATCH 1/4] OpenGL: Avoid undefined behaviour for
 UNIFORM_BLOCK_DATA_SIZE

---
 src/video_core/renderer_opengl/gl_rasterizer.h   | 10 ++++++----
 src/video_core/renderer_opengl/gl_shader_gen.cpp |  4 ++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bb7f201614..bdc7bd0f24 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -316,16 +316,18 @@ private:
         GLfloat dist_atten_scale;
     };
 
-    /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned
+    /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
+    // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
+    //       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+    //       Not following that rule will cause problems on some AMD drivers.
     struct UniformData {
-        // A vec4 color for each of the six tev stages
-        GLvec4 const_color[6];
-        GLvec4 tev_combiner_buffer_color;
         GLint alphatest_ref;
         GLfloat depth_scale;
         GLfloat depth_offset;
         alignas(16) GLvec3 lighting_global_ambient;
         LightSrc light_src[8];
+        alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
+        alignas(16) GLvec4 tev_combiner_buffer_color;
     };
 
     static_assert(sizeof(UniformData) == 0x390, "The size of the UniformData structure has changed, update the structure in the shader");
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 8332e722db..ea7ab2883a 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -552,13 +552,13 @@ struct LightSrc {
 };
 
 layout (std140) uniform shader_data {
-    vec4 const_color[NUM_TEV_STAGES];
-    vec4 tev_combiner_buffer_color;
     int alphatest_ref;
     float depth_scale;
     float depth_offset;
     vec3 lighting_global_ambient;
     LightSrc light_src[NUM_LIGHTS];
+    vec4 const_color[NUM_TEV_STAGES];
+    vec4 tev_combiner_buffer_color;
 };
 
 uniform sampler2D tex[3];

From 57855a1701474c65b8dd95d0c312d02fae8fe1a6 Mon Sep 17 00:00:00 2001
From: Jannik Vogel <email@jannikvogel.de>
Date: Wed, 11 May 2016 13:39:28 +0200
Subject: [PATCH 2/4] Pica: Add fog state

---
 src/video_core/command_processor.cpp | 14 ++++++++
 src/video_core/pica.h                | 53 +++++++++++++++++++++-------
 src/video_core/pica_state.h          | 16 +++++++--
 3 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 19e03adf46..689859049d 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -423,6 +423,20 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
             break;
         }
 
+        case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8):
+        case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9):
+        case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea):
+        case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb):
+        case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec):
+        case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed):
+        case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee):
+        case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef):
+        {
+            g_state.fog.lut[regs.fog_lut_offset % 128].raw = value;
+            regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1);
+            break;
+        }
+
         default:
             break;
     }
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 544ea037f2..09702d46ad 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -401,22 +401,47 @@ struct Regs {
     TevStageConfig tev_stage3;
     INSERT_PADDING_WORDS(0x3);
 
+    enum class FogMode : u32 {
+        None = 0,
+        Fog  = 5,
+        Gas  = 7,
+    };
+
     union {
-        // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
-        // these masks are set
-        BitField< 8, 4, u32> update_mask_rgb;
-        BitField<12, 4, u32> update_mask_a;
+        BitField<0, 3, FogMode> fog_mode;
+        BitField<16, 1, u32> fog_flip;
 
-        bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
-            return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
-        }
+        union {
+            // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
+            // these masks are set
+            BitField< 8, 4, u32> update_mask_rgb;
+            BitField<12, 4, u32> update_mask_a;
 
-        bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
-            return (stage_index < 4) && (update_mask_a & (1 << stage_index));
-        }
-    } tev_combiner_buffer_input;
+            bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
+                return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
+            }
+
+            bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
+                return (stage_index < 4) && (update_mask_a & (1 << stage_index));
+            }
+        } tev_combiner_buffer_input;
+    };
+
+    union {
+        u32 raw;
+        BitField< 0, 8, u32> r;
+        BitField< 8, 8, u32> g;
+        BitField<16, 8, u32> b;
+    } fog_color;
+
+    INSERT_PADDING_WORDS(0x4);
+
+    BitField<0, 16, u32> fog_lut_offset;
+
+    INSERT_PADDING_WORDS(0x1);
+
+    u32 fog_lut_data[8];
 
-    INSERT_PADDING_WORDS(0xf);
     TevStageConfig tev_stage4;
     INSERT_PADDING_WORDS(0x3);
     TevStageConfig tev_stage5;
@@ -1318,6 +1343,10 @@ ASSERT_REG_POSITION(tev_stage1, 0xc8);
 ASSERT_REG_POSITION(tev_stage2, 0xd0);
 ASSERT_REG_POSITION(tev_stage3, 0xd8);
 ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0);
+ASSERT_REG_POSITION(fog_mode, 0xe0);
+ASSERT_REG_POSITION(fog_color, 0xe1);
+ASSERT_REG_POSITION(fog_lut_offset, 0xe6);
+ASSERT_REG_POSITION(fog_lut_data, 0xe8);
 ASSERT_REG_POSITION(tev_stage4, 0xf0);
 ASSERT_REG_POSITION(tev_stage5, 0xf8);
 ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 495174c250..01f4285a88 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -33,10 +33,10 @@ struct State {
             u32 raw;
 
             // LUT value, encoded as 12-bit fixed point, with 12 fraction bits
-            BitField< 0, 12, u32> value;
+            BitField< 0, 12, u32> value; // 0.0.12 fixed point
 
             // Used by HW for efficient interpolation, Citra does not use these
-            BitField<12, 12, u32> difference;
+            BitField<12, 12, s32> difference; // 1.0.11 fixed point
 
             float ToFloat() {
                 return static_cast<float>(value) / 4095.f;
@@ -46,6 +46,18 @@ struct State {
         std::array<std::array<LutEntry, 256>, 24> luts;
     } lighting;
 
+    struct {
+        union LutEntry {
+            // Used for raw access
+            u32 raw;
+
+            BitField< 0, 13, s32> difference; // 1.1.11 fixed point
+            BitField<13, 11, u32> value; // 0.0.11 fixed point
+        };
+
+        std::array<LutEntry, 128> lut;
+    } fog;
+
     /// Current Pica command list
     struct {
         const u32* head_ptr;

From ebee2513a9389e6e79baebcd8431e1a3495d58a6 Mon Sep 17 00:00:00 2001
From: Jannik Vogel <email@jannikvogel.de>
Date: Wed, 11 May 2016 13:39:56 +0200
Subject: [PATCH 3/4] Rasterizer: Implement fog

---
 src/video_core/rasterizer.cpp | 73 +++++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 21 deletions(-)

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 65168f05ab..a84170094c 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -398,6 +398,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
                                                 float24::FromFloat32(static_cast<float>(w2)));
             float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
 
+            // interpolated_z = z / w
+            float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 +
+                                           v1.screenpos[2].ToFloat32() * w1 +
+                                           v2.screenpos[2].ToFloat32() * w2) / wsum;
+
+            // Not fully accurate. About 3 bits in precision are missing.
+            // Z-Buffer (z / w * scale + offset)
+            float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32();
+            float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32();
+            float depth = interpolated_z_over_w * depth_scale + depth_offset;
+
+            // Potentially switch to W-Buffer
+            if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
+                // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
+                depth *= interpolated_w_inverse.ToFloat32() * wsum;
+            }
+
+            // Clamp the result
+            depth = MathUtil::Clamp(depth, 0.0f, 1.0f);
+
             // Perspective correct attribute interpolation:
             // Attribute values cannot be calculated by simple linear interpolation since
             // they are not linear in screen space. For example, when interpolating a
@@ -833,6 +853,38 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
                     continue;
             }
 
+            // Apply fog combiner
+            // Not fully accurate. We'd have to know what data type is used to
+            // store the depth etc. Using float for now until we know more
+            // about Pica datatypes
+            if (regs.fog_mode == Regs::FogMode::Fog) {
+                const Math::Vec3<u8> fog_color = {
+                    static_cast<u8>(regs.fog_color.r.Value()),
+                    static_cast<u8>(regs.fog_color.g.Value()),
+                    static_cast<u8>(regs.fog_color.b.Value()),
+                };
+
+                // Get index into fog LUT
+                float fog_index;
+                if (g_state.regs.fog_flip) {
+                    fog_index = (1.0f - depth) * 128.0f;
+                } else {
+                    fog_index = depth * 128.0f;
+                }
+
+                // Generate clamped fog factor from LUT for given fog index
+                float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f);
+                float fog_f = fog_index - fog_i;
+                const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)];
+                float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11
+                fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f);
+
+                // Blend the fog
+                for (unsigned i = 0; i < 3; i++) {
+                    combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i];
+                }
+            }
+
             u8 old_stencil = 0;
 
             auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) {
@@ -887,27 +939,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
                 }
             }
 
-            // interpolated_z = z / w
-            float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 +
-                                           v1.screenpos[2].ToFloat32() * w1 +
-                                           v2.screenpos[2].ToFloat32() * w2) / wsum;
-
-            // Not fully accurate. About 3 bits in precision are missing.
-            // Z-Buffer (z / w * scale + offset)
-            float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32();
-            float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32();
-            float depth = interpolated_z_over_w * depth_scale + depth_offset;
-
-            // Potentially switch to W-Buffer
-            if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
-
-                // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
-                depth *= interpolated_w_inverse.ToFloat32() * wsum;
-            }
-
-            // Clamp the result
-            depth = MathUtil::Clamp(depth, 0.0f, 1.0f);
-
             // Convert float to integer
             unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
             u32 z = (u32)(depth * ((1 << num_bits) - 1));

From a12571c709c5af840bb89c43344bd982a496f21a Mon Sep 17 00:00:00 2001
From: Jannik Vogel <email@jannikvogel.de>
Date: Sat, 21 May 2016 01:04:57 +0200
Subject: [PATCH 4/4] OpenGL: Implement fog

---
 .../renderer_opengl/gl_rasterizer.cpp         | 68 +++++++++++++++++++
 .../renderer_opengl/gl_rasterizer.h           | 17 ++++-
 .../renderer_opengl/gl_shader_gen.cpp         | 34 ++++++++--
 src/video_core/renderer_opengl/gl_state.cpp   |  8 +++
 src/video_core/renderer_opengl/gl_state.h     |  4 ++
 5 files changed, 124 insertions(+), 7 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 931c34a379..328a4f66be 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -62,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
         uniform_block_data.lut_dirty[index] = true;
     }
 
+    uniform_block_data.fog_lut_dirty = true;
+
     // Set vertex attributes
     glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
     glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
@@ -102,6 +104,18 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
         glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
     }
 
+    // Setup the LUT for the fog
+    {
+        fog_lut.Create();
+        state.fog_lut.texture_1d = fog_lut.handle;
+    }
+    state.Apply();
+
+    glActiveTexture(GL_TEXTURE9);
+    glTexImage1D(GL_TEXTURE_1D, 0, GL_R32UI, 128, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr);
+    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
     // Sync fixed function OpenGL state
     SyncCullMode();
     SyncBlendEnabled();
@@ -215,6 +229,12 @@ void RasterizerOpenGL::DrawTriangles() {
         }
     }
 
+    // Sync the fog lut
+    if (uniform_block_data.fog_lut_dirty) {
+        SyncFogLUT();
+        uniform_block_data.fog_lut_dirty = false;
+    }
+
     // Sync the uniform data
     if (uniform_block_data.dirty) {
         glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
@@ -280,6 +300,21 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
         SyncBlendColor();
         break;
 
+    // Fog state
+    case PICA_REG_INDEX(fog_color):
+        SyncFogColor();
+        break;
+    case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8):
+    case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9):
+    case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea):
+    case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb):
+    case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec):
+    case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed):
+    case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee):
+    case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef):
+        uniform_block_data.fog_lut_dirty = true;
+        break;
+
     // Alpha test
     case PICA_REG_INDEX(output_merger.alpha_test):
         SyncAlphaTest();
@@ -329,6 +364,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
         break;
 
     // TEV stages
+    // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input)
     case PICA_REG_INDEX(tev_stage0.color_source1):
     case PICA_REG_INDEX(tev_stage0.color_modifier1):
     case PICA_REG_INDEX(tev_stage0.color_op):
@@ -950,9 +986,15 @@ void RasterizerOpenGL::SetShader() {
         uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
         if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); }
 
+        GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
+        if (uniform_fog_lut != -1) { glUniform1i(uniform_fog_lut, 9); }
+
         current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
 
         unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
+        GLint block_size;
+        glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, GL_UNIFORM_BLOCK_DATA_SIZE, &block_size);
+        ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!");
         glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
 
         // Update uniforms
@@ -974,6 +1016,8 @@ void RasterizerOpenGL::SetShader() {
             SyncLightDistanceAttenuationBias(light_index);
             SyncLightDistanceAttenuationScale(light_index);
         }
+
+        SyncFogColor();
     }
 }
 
@@ -1040,6 +1084,30 @@ void RasterizerOpenGL::SyncBlendColor() {
     state.blend.color.alpha = blend_color[3];
 }
 
+void RasterizerOpenGL::SyncFogColor() {
+    const auto& regs = Pica::g_state.regs;
+    uniform_block_data.data.fog_color = {
+      regs.fog_color.r.Value() / 255.0f,
+      regs.fog_color.g.Value() / 255.0f,
+      regs.fog_color.b.Value() / 255.0f
+    };
+    uniform_block_data.dirty = true;
+}
+
+void RasterizerOpenGL::SyncFogLUT() {
+    std::array<GLuint, 128> new_data;
+
+    std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), [](const auto& entry) {
+        return entry.raw;
+    });
+
+    if (new_data != fog_lut_data) {
+        fog_lut_data = new_data;
+        glActiveTexture(GL_TEXTURE9);
+        glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, fog_lut_data.data());
+    }
+}
+
 void RasterizerOpenGL::SyncAlphaTest() {
     const auto& regs = Pica::g_state.regs;
     if (regs.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bdc7bd0f24..42482df4b6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -76,6 +76,9 @@ union PicaShaderConfig {
             state.tev_stages[i].scales_raw = tev_stage.scales_raw;
         }
 
+        state.fog_mode = regs.fog_mode;
+        state.fog_flip = regs.fog_flip;
+
         state.combiner_buffer_input =
             regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
             regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
@@ -168,13 +171,14 @@ union PicaShaderConfig {
     };
 
     struct State {
-
         Pica::Regs::CompareFunc alpha_test_func;
         Pica::Regs::TextureConfig::TextureType texture0_type;
         std::array<TevStageConfigRaw, 6> tev_stages;
         u8 combiner_buffer_input;
 
         Pica::Regs::DepthBuffering depthmap_enable;
+        Pica::Regs::FogMode fog_mode;
+        bool fog_flip;
 
         struct {
             struct {
@@ -324,13 +328,14 @@ private:
         GLint alphatest_ref;
         GLfloat depth_scale;
         GLfloat depth_offset;
+        alignas(16) GLvec3 fog_color;
         alignas(16) GLvec3 lighting_global_ambient;
         LightSrc light_src[8];
         alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
         alignas(16) GLvec4 tev_combiner_buffer_color;
     };
 
-    static_assert(sizeof(UniformData) == 0x390, "The size of the UniformData structure has changed, update the structure in the shader");
+    static_assert(sizeof(UniformData) == 0x3A0, "The size of the UniformData structure has changed, update the structure in the shader");
     static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
 
     /// Sets the OpenGL shader in accordance with the current PICA register state
@@ -354,6 +359,10 @@ private:
     /// Syncs the blend color to match the PICA register
     void SyncBlendColor();
 
+    /// Syncs the fog states to match the PICA register
+    void SyncFogColor();
+    void SyncFogLUT();
+
     /// Syncs the alpha test states to match the PICA register
     void SyncAlphaTest();
 
@@ -421,6 +430,7 @@ private:
     struct {
         UniformData data;
         bool lut_dirty[6];
+        bool fog_lut_dirty;
         bool dirty;
     } uniform_block_data = {};
 
@@ -432,4 +442,7 @@ private:
 
     std::array<OGLTexture, 6> lighting_luts;
     std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
+
+    OGLTexture fog_lut;
+    std::array<GLuint, 128> fog_lut_data{};
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index ea7ab2883a..3bace7f01d 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -555,6 +555,7 @@ layout (std140) uniform shader_data {
     int alphatest_ref;
     float depth_scale;
     float depth_offset;
+    vec3 fog_color;
     vec3 lighting_global_ambient;
     LightSrc light_src[NUM_LIGHTS];
     vec4 const_color[NUM_TEV_STAGES];
@@ -563,6 +564,7 @@ layout (std140) uniform shader_data {
 
 uniform sampler2D tex[3];
 uniform sampler1D lut[6];
+uniform usampler1D fog_lut;
 
 // Rotate the vector v by the quaternion q
 vec3 quaternion_rotate(vec4 q, vec3 v) {
@@ -580,6 +582,12 @@ vec4 secondary_fragment_color = vec4(0.0);
         return out;
     }
 
+    out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
+    out += "float depth = z_over_w * depth_scale + depth_offset;\n";
+    if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
+        out += "depth /= gl_FragCoord.w;\n";
+    }
+
     if (state.lighting.enable)
         WriteLighting(out, config);
 
@@ -596,14 +604,30 @@ vec4 secondary_fragment_color = vec4(0.0);
         out += ") discard;\n";
     }
 
-    out += "color = last_tex_env_out;\n";
+    // Append fog combiner
+    if (state.fog_mode == Regs::FogMode::Fog) {
+        // Get index into fog LUT
+        if (state.fog_flip) {
+            out += "float fog_index = (1.0 - depth) * 128.0;\n";
+        } else {
+            out += "float fog_index = depth * 128.0;\n";
+        }
 
-    out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
-    out += "float depth = z_over_w * depth_scale + depth_offset;\n";
-    if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
-        out += "depth /= gl_FragCoord.w;\n";
+        // Generate clamped fog factor from LUT for given fog index
+        out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n";
+        out += "float fog_f = fog_index - fog_i;\n";
+        out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n";
+        out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> 19);\n"; // Extract signed difference
+        out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n";
+        out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / 2047.0;\n";
+        out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
+
+        // Blend the fog
+        out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n";
     }
+
     out += "gl_FragDepth = depth;\n";
+    out += "color = last_tex_env_out;\n";
 
     out += "}";
 
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index fa141fc9ad..13ee986b9f 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -58,6 +58,8 @@ OpenGLState::OpenGLState() {
         lut.texture_1d = 0;
     }
 
+    fog_lut.texture_1d = 0;
+
     draw.read_framebuffer = 0;
     draw.draw_framebuffer = 0;
     draw.vertex_array = 0;
@@ -195,6 +197,12 @@ void OpenGLState::Apply() const {
         }
     }
 
+    // Fog LUT
+    if (fog_lut.texture_1d != cur_state.fog_lut.texture_1d) {
+        glActiveTexture(GL_TEXTURE9);
+        glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d);
+    }
+
     // Framebuffer
     if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
         glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 2287270549..13c71b0a60 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -67,6 +67,10 @@ public:
         GLuint texture_1d; // GL_TEXTURE_BINDING_1D
     } lighting_luts[6];
 
+    struct {
+        GLuint texture_1d; // GL_TEXTURE_BINDING_1D
+    } fog_lut;
+
     struct {
         GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
         GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING