From ab60414122184851415a27ae8bcacb4aab0504b6 Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 16 Jun 2017 14:00:15 +0300 Subject: [PATCH] gl_rasterizer/lighting: fix LUT interpolation --- src/video_core/pica_state.h | 12 +- src/video_core/regs_lighting.h | 2 + .../renderer_opengl/gl_rasterizer.cpp | 71 ++++------- .../renderer_opengl/gl_rasterizer.h | 7 +- .../renderer_opengl/gl_shader_gen.cpp | 110 ++++++++++-------- src/video_core/renderer_opengl/gl_state.cpp | 12 +- src/video_core/renderer_opengl/gl_state.h | 4 +- 7 files changed, 102 insertions(+), 116 deletions(-) diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index f46db09fba..3b00df0b3d 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -87,12 +87,18 @@ struct State { // LUT value, encoded as 12-bit fixed point, with 12 fraction bits BitField<0, 12, u32> value; // 0.0.12 fixed point - // Used by HW for efficient interpolation, Citra does not use these - BitField<12, 12, s32> difference; // 1.0.11 fixed point + // Used for efficient interpolation. + BitField<12, 11, u32> difference; // 0.0.11 fixed point + BitField<23, 1, u32> neg_difference; - float ToFloat() { + float ToFloat() const { return static_cast(value) / 4095.f; } + + float DiffToFloat() const { + float diff = static_cast(difference) / 2047.f; + return neg_difference ? -diff : diff; + } }; std::array, 24> luts; diff --git a/src/video_core/regs_lighting.h b/src/video_core/regs_lighting.h index 7221d16886..b89709cfe6 100644 --- a/src/video_core/regs_lighting.h +++ b/src/video_core/regs_lighting.h @@ -26,6 +26,8 @@ struct LightingRegs { DistanceAttenuation = 16, }; + static constexpr unsigned NumLightingSampler = 24; + static LightingSampler SpotlightAttenuationSampler(unsigned index) { return static_cast( static_cast(LightingSampler::SpotlightAttenuation) + index); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e6cccebf65..c73e1d6e2b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -49,9 +49,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { uniform_block_data.dirty = true; - for (unsigned index = 0; index < lighting_luts.size(); index++) { - uniform_block_data.lut_dirty[index] = true; - } + uniform_block_data.lut_dirty.fill(true); uniform_block_data.fog_lut_dirty = true; @@ -96,18 +94,16 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { framebuffer.Create(); // Allocate and bind lighting lut textures - for (size_t i = 0; i < lighting_luts.size(); ++i) { - lighting_luts[i].Create(); - state.lighting_luts[i].texture_1d = lighting_luts[i].handle; - } + lighting_lut_buffer.Create(); + state.lighting_lut.texture_buffer = lighting_lut.handle; state.Apply(); - - for (size_t i = 0; i < lighting_luts.size(); ++i) { - glActiveTexture(static_cast(GL_TEXTURE3 + i)); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - } + lighting_lut.Create(); + glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); + glBufferData(GL_TEXTURE_BUFFER, + sizeof(GLfloat) * 2 * 256 * Pica::LightingRegs::NumLightingSampler, nullptr, + GL_DYNAMIC_DRAW); + glActiveTexture(GL_TEXTURE15); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, lighting_lut_buffer.handle); // Setup the LUT for the fog { @@ -313,7 +309,7 @@ void RasterizerOpenGL::DrawTriangles() { } // Sync the lighting luts - for (unsigned index = 0; index < lighting_luts.size(); index++) { + for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) { if (uniform_block_data.lut_dirty[index]) { SyncLightingLUT(index); uniform_block_data.lut_dirty[index] = false; @@ -851,7 +847,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { auto& lut_config = regs.lighting.lut_config; - uniform_block_data.lut_dirty[lut_config.type / 4] = true; + uniform_block_data.lut_dirty[lut_config.type] = true; break; } } @@ -1201,29 +1197,9 @@ void RasterizerOpenGL::SetShader() { } // Set the texture samplers to correspond to different lookup table texture units - GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); + GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lighting_lut"); if (uniform_lut != -1) { - glUniform1i(uniform_lut, 3); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 4); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 5); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 6); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 7); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 8); + glUniform1i(uniform_lut, 15); } GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); @@ -1571,20 +1547,17 @@ void RasterizerOpenGL::SyncGlobalAmbient() { } void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { - std::array new_data; - - for (unsigned offset = 0; offset < new_data.size(); ++offset) { - new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat(); - new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat(); - new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat(); - new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat(); - } + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[lut_index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); if (new_data != lighting_lut_data[lut_index]) { lighting_lut_data[lut_index] = new_data; - glActiveTexture(GL_TEXTURE3 + lut_index); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, - lighting_lut_data[lut_index].data()); + glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, lut_index * new_data.size() * sizeof(GLvec2), + new_data.size() * sizeof(GLvec2), new_data.data()); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d9a3e9d1cc..79acd4230a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -263,7 +263,7 @@ private: struct { UniformData data; - bool lut_dirty[6]; + std::array lut_dirty; bool fog_lut_dirty; bool proctex_noise_lut_dirty; bool proctex_color_map_dirty; @@ -279,8 +279,9 @@ private: OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; - std::array lighting_luts; - std::array, 6> lighting_lut_data{}; + OGLBuffer lighting_lut_buffer; + OGLTexture lighting_lut; + std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; OGLTexture fog_lut; std::array fog_lut_data{}; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 540cbb9d0a..0c7c4dd5cb 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -562,9 +562,9 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n"; out += "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; - // Gets the index into the specified lookup table for specular lighting - auto GetLutIndex = [&lighting](unsigned light_num, LightingRegs::LightingLutInput input, - bool abs) { + // Samples the specified lookup table for specular lighting + auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num, + LightingRegs::LightingLutInput input, bool abs) { std::string index; switch (input) { case LightingRegs::LightingLutInput::NH: @@ -610,22 +610,18 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { break; } + std::string sampler_string = std::to_string(static_cast(sampler)); + if (abs) { // LUT index is in the range of (0.0, 1.0) index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.0)"; + return "LookupLightingLUTUnsigned(" + sampler_string + ", " + index + ")"; } else { // LUT index is in the range of (-1.0, 1.0) - index = "((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0"; + return "LookupLightingLUTSigned(" + sampler_string + ", " + index + ")"; } - return "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))"; - }; - - // Gets the lighting lookup table value given the specified sampler and index - auto GetLutValue = [](LightingRegs::LightingSampler sampler, std::string lut_index) { - return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " + - lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]"); }; // Write the code to emulate each enabled light @@ -653,21 +649,21 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (light_config.spot_atten_enable && LightingRegs::IsLightingSamplerSupported( lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { - std::string index = - GetLutIndex(light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input); - auto sampler = LightingRegs::SpotlightAttenuationSampler(light_config.num); - spot_atten = "(" + std::to_string(lighting.lut_sp.scale) + " * " + - GetLutValue(sampler, index) + ")"; + std::string value = + GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num), + light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input); + spot_atten = "(" + std::to_string(lighting.lut_sp.scale) + " * " + value + ")"; } // If enabled, compute distance attenuation value std::string dist_atten = "1.0"; if (light_config.dist_atten_enable) { - std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + - light_src + ".position) + " + light_src + ".dist_atten_bias)"; - index = "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))"; + std::string index = "clamp(" + light_src + ".dist_atten_scale * length(-view - " + + light_src + ".position) + " + light_src + + ".dist_atten_bias, 0.0, 1.0)"; auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num); - dist_atten = GetLutValue(sampler, index); + dist_atten = "LookupLightingLUTUnsigned(" + + std::to_string(static_cast(sampler)) + "," + index + ")"; } // If enabled, clamp specular component if lighting result is negative @@ -686,10 +682,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { LightingRegs::IsLightingSamplerSupported( lighting.config, LightingRegs::LightingSampler::Distribution0)) { // Lookup specular "distribution 0" LUT value - std::string index = - GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); - d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::Distribution0, index) + ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num, + lighting.lut_d0.type, lighting.lut_d0.abs_input); + d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + value + ")"; } std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; if (light_config.geometric_factor_0) { @@ -700,10 +696,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (lighting.lut_rr.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::ReflectRed)) { - std::string index = - GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); - std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::ReflectRed, index) + ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num, + lighting.lut_rr.type, lighting.lut_rr.abs_input); + value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + value + ")"; out += "refl_value.r = " + value + ";\n"; } else { out += "refl_value.r = 1.0;\n"; @@ -713,11 +709,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (lighting.lut_rg.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::ReflectGreen)) { - std::string index = - GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); - std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::ReflectGreen, index) + - ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num, + lighting.lut_rg.type, lighting.lut_rg.abs_input); + value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + value + ")"; out += "refl_value.g = " + value + ";\n"; } else { out += "refl_value.g = refl_value.r;\n"; @@ -727,11 +722,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (lighting.lut_rb.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::ReflectBlue)) { - std::string index = - GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); - std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::ReflectBlue, index) + - ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num, + lighting.lut_rb.type, lighting.lut_rb.abs_input); + value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + value + ")"; out += "refl_value.b = " + value + ";\n"; } else { out += "refl_value.b = refl_value.r;\n"; @@ -743,10 +737,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { LightingRegs::IsLightingSamplerSupported( lighting.config, LightingRegs::LightingSampler::Distribution1)) { // Lookup specular "distribution 1" LUT value - std::string index = - GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); - d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::Distribution1, index) + ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num, + lighting.lut_d1.type, lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + value + ")"; } std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; @@ -759,10 +753,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value - std::string index = - GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); - std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::Fresnel, index) + ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num, + lighting.lut_fr.type, lighting.lut_fr.abs_input); + value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + value + ")"; // Enabled for difffuse lighting alpha component if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || @@ -1016,10 +1010,6 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { #define NUM_TEV_STAGES 6 #define NUM_LIGHTS 8 -// Texture coordinate offsets and scales -#define OFFSET_256 (0.5 / 256.0) -#define SCALE_256 (255.0 / 256.0) - in vec4 primary_color; in vec2 texcoord[3]; in float texcoord0_w; @@ -1061,7 +1051,7 @@ layout (std140) uniform shader_data { }; uniform sampler2D tex[3]; -uniform sampler1D lut[6]; +uniform samplerBuffer lighting_lut; uniform usampler1D fog_lut; uniform sampler1D proctex_noise_lut; uniform sampler1D proctex_color_map; @@ -1074,6 +1064,24 @@ vec3 quaternion_rotate(vec4 q, vec3 v) { return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); } +float LookupLightingLUT(int lut_index, int index, float delta) { + vec2 entry = texelFetch(lighting_lut, lut_index * 256 + index).rg; + return entry.r + entry.g * delta; +} + +float LookupLightingLUTUnsigned(int lut_index, float pos) { + int index = clamp(int(pos * 256.0), 0, 255); + float delta = pos * 256.0 - index; + return LookupLightingLUT(lut_index, index, delta); +} + +float LookupLightingLUTSigned(int lut_index, float pos) { + int index = clamp(int(pos * 128.0), -128, 127); + float delta = pos * 128.0 - index; + if (index < 0) index += 256; + return LookupLightingLUT(lut_index, index, delta); +} + )"; if (config.state.proctex.enable) diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index bf837a7fba..40d7cee969 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -52,9 +52,7 @@ OpenGLState::OpenGLState() { texture_unit.sampler = 0; } - for (auto& lut : lighting_luts) { - lut.texture_1d = 0; - } + lighting_lut.texture_buffer = 0; fog_lut.texture_1d = 0; @@ -194,11 +192,9 @@ void OpenGLState::Apply() const { } // Lighting LUTs - for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) { - if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) { - glActiveTexture(GL_TEXTURE3 + i); - glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d); - } + if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { + glActiveTexture(GL_TEXTURE15); + glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer); } // Fog LUT diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 7dcc03bd5c..d524d06250 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -64,8 +64,8 @@ public: } texture_units[3]; struct { - GLuint texture_1d; // GL_TEXTURE_BINDING_1D - } lighting_luts[6]; + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER + } lighting_lut; struct { GLuint texture_1d; // GL_TEXTURE_BINDING_1D