forked from suyu/suyu
Merge pull request #2697 from wwylele/proctex
Implemented Procedural Texture (Texture Unit 3)
This commit is contained in:
commit
bae3799bd5
15 changed files with 1048 additions and 11 deletions
|
@ -652,6 +652,16 @@ static inline decltype((X{} * int{} + X{} * int{}) / base) LerpInt(const X& begi
|
|||
return (begin * (base - t) + end * t) / base;
|
||||
}
|
||||
|
||||
// bilinear interpolation. s is for interpolating x00-x01 and x10-x11, and t is for the second
|
||||
// interpolation.
|
||||
template <typename X>
|
||||
inline auto BilinearInterp(const X& x00, const X& x01, const X& x10, const X& x11, const float s,
|
||||
const float t) {
|
||||
auto y0 = Lerp(x00, x01, s);
|
||||
auto y1 = Lerp(x10, x11, s);
|
||||
return Lerp(y0, y1, t);
|
||||
}
|
||||
|
||||
// Utility vector factories
|
||||
template <typename T>
|
||||
static inline Vec2<T> MakeVec(const T& x, const T& y) {
|
||||
|
|
|
@ -15,6 +15,7 @@ set(SRCS
|
|||
shader/shader_interpreter.cpp
|
||||
swrasterizer/clipper.cpp
|
||||
swrasterizer/framebuffer.cpp
|
||||
swrasterizer/proctex.cpp
|
||||
swrasterizer/rasterizer.cpp
|
||||
swrasterizer/swrasterizer.cpp
|
||||
swrasterizer/texturing.cpp
|
||||
|
@ -54,6 +55,7 @@ set(HEADERS
|
|||
shader/shader_interpreter.h
|
||||
swrasterizer/clipper.h
|
||||
swrasterizer/framebuffer.h
|
||||
swrasterizer/proctex.h
|
||||
swrasterizer/rasterizer.h
|
||||
swrasterizer/swrasterizer.h
|
||||
swrasterizer/texturing.h
|
||||
|
|
|
@ -556,6 +556,37 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||
break;
|
||||
}
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[0], 0xb0):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[1], 0xb1):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[2], 0xb2):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[3], 0xb3):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[4], 0xb4):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[5], 0xb5):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[6], 0xb6):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[7], 0xb7): {
|
||||
auto& index = regs.texturing.proctex_lut_config.index;
|
||||
auto& pt = g_state.proctex;
|
||||
|
||||
switch (regs.texturing.proctex_lut_config.ref_table.Value()) {
|
||||
case TexturingRegs::ProcTexLutTable::Noise:
|
||||
pt.noise_table[index % pt.noise_table.size()].raw = value;
|
||||
break;
|
||||
case TexturingRegs::ProcTexLutTable::ColorMap:
|
||||
pt.color_map_table[index % pt.color_map_table.size()].raw = value;
|
||||
break;
|
||||
case TexturingRegs::ProcTexLutTable::AlphaMap:
|
||||
pt.alpha_map_table[index % pt.alpha_map_table.size()].raw = value;
|
||||
break;
|
||||
case TexturingRegs::ProcTexLutTable::Color:
|
||||
pt.color_table[index % pt.color_table.size()].raw = value;
|
||||
break;
|
||||
case TexturingRegs::ProcTexLutTable::ColorDiff:
|
||||
pt.color_diff_table[index % pt.color_diff_table.size()].raw = value;
|
||||
break;
|
||||
}
|
||||
index.Assign(index + 1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <array>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/primitive_assembly.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
@ -25,6 +26,59 @@ struct State {
|
|||
|
||||
Shader::AttributeBuffer input_default_attributes;
|
||||
|
||||
struct ProcTex {
|
||||
union ValueEntry {
|
||||
u32 raw;
|
||||
|
||||
// LUT value, encoded as 12-bit fixed point, with 12 fraction bits
|
||||
BitField<0, 12, u32> value; // 0.0.12 fixed point
|
||||
|
||||
// Difference between two entry values. Used for efficient interpolation.
|
||||
// 0.0.12 fixed point with two's complement. The range is [-0.5, 0.5).
|
||||
// Note: the type of this is different from the one of lighting LUT
|
||||
BitField<12, 12, s32> difference;
|
||||
|
||||
float ToFloat() const {
|
||||
return static_cast<float>(value) / 4095.f;
|
||||
}
|
||||
|
||||
float DiffToFloat() const {
|
||||
return static_cast<float>(difference) / 4095.f;
|
||||
}
|
||||
};
|
||||
|
||||
union ColorEntry {
|
||||
u32 raw;
|
||||
BitField<0, 8, u32> r;
|
||||
BitField<8, 8, u32> g;
|
||||
BitField<16, 8, u32> b;
|
||||
BitField<24, 8, u32> a;
|
||||
|
||||
Math::Vec4<u8> ToVector() const {
|
||||
return {static_cast<u8>(r), static_cast<u8>(g), static_cast<u8>(b),
|
||||
static_cast<u8>(a)};
|
||||
}
|
||||
};
|
||||
|
||||
union ColorDifferenceEntry {
|
||||
u32 raw;
|
||||
BitField<0, 8, s32> r; // half of the difference between two ColorEntry
|
||||
BitField<8, 8, s32> g;
|
||||
BitField<16, 8, s32> b;
|
||||
BitField<24, 8, s32> a;
|
||||
|
||||
Math::Vec4<s32> ToVector() const {
|
||||
return Math::Vec4<s32>{r, g, b, a} * 2;
|
||||
}
|
||||
};
|
||||
|
||||
std::array<ValueEntry, 128> noise_table;
|
||||
std::array<ValueEntry, 128> color_map_table;
|
||||
std::array<ValueEntry, 128> alpha_map_table;
|
||||
std::array<ColorEntry, 256> color_table;
|
||||
std::array<ColorDifferenceEntry, 256> color_diff_table;
|
||||
} proctex;
|
||||
|
||||
struct {
|
||||
union LutEntry {
|
||||
// Used for raw access
|
||||
|
|
|
@ -101,6 +101,13 @@ ASSERT_REG_POSITION(texturing.texture1, 0x91);
|
|||
ASSERT_REG_POSITION(texturing.texture1_format, 0x96);
|
||||
ASSERT_REG_POSITION(texturing.texture2, 0x99);
|
||||
ASSERT_REG_POSITION(texturing.texture2_format, 0x9e);
|
||||
ASSERT_REG_POSITION(texturing.proctex, 0xa8);
|
||||
ASSERT_REG_POSITION(texturing.proctex_noise_u, 0xa9);
|
||||
ASSERT_REG_POSITION(texturing.proctex_noise_v, 0xaa);
|
||||
ASSERT_REG_POSITION(texturing.proctex_noise_frequency, 0xab);
|
||||
ASSERT_REG_POSITION(texturing.proctex_lut, 0xac);
|
||||
ASSERT_REG_POSITION(texturing.proctex_lut_offset, 0xad);
|
||||
ASSERT_REG_POSITION(texturing.proctex_lut_config, 0xaf);
|
||||
ASSERT_REG_POSITION(texturing.tev_stage0, 0xc0);
|
||||
ASSERT_REG_POSITION(texturing.tev_stage1, 0xc8);
|
||||
ASSERT_REG_POSITION(texturing.tev_stage2, 0xd0);
|
||||
|
|
|
@ -127,8 +127,8 @@ struct TexturingRegs {
|
|||
BitField<0, 1, u32> texture0_enable;
|
||||
BitField<1, 1, u32> texture1_enable;
|
||||
BitField<2, 1, u32> texture2_enable;
|
||||
BitField<8, 2, u32> texture3_coordinates; // TODO: unimplemented
|
||||
BitField<10, 1, u32> texture3_enable; // TODO: unimplemented
|
||||
BitField<8, 2, u32> texture3_coordinates;
|
||||
BitField<10, 1, u32> texture3_enable;
|
||||
BitField<13, 1, u32> texture2_use_coord1;
|
||||
BitField<16, 1, u32> clear_texture_cache; // TODO: unimplemented
|
||||
} main_config;
|
||||
|
@ -142,7 +142,7 @@ struct TexturingRegs {
|
|||
INSERT_PADDING_WORDS(0x2);
|
||||
TextureConfig texture2;
|
||||
BitField<0, 4, TextureFormat> texture2_format;
|
||||
INSERT_PADDING_WORDS(0x21);
|
||||
INSERT_PADDING_WORDS(0x9);
|
||||
|
||||
struct FullTextureConfig {
|
||||
const bool enabled;
|
||||
|
@ -157,6 +157,96 @@ struct TexturingRegs {
|
|||
}};
|
||||
}
|
||||
|
||||
// 0xa8-0xad: ProcTex Config
|
||||
enum class ProcTexClamp : u32 {
|
||||
ToZero = 0,
|
||||
ToEdge = 1,
|
||||
SymmetricalRepeat = 2,
|
||||
MirroredRepeat = 3,
|
||||
Pulse = 4,
|
||||
};
|
||||
|
||||
enum class ProcTexCombiner : u32 {
|
||||
U = 0, // u
|
||||
U2 = 1, // u * u
|
||||
V = 2, // v
|
||||
V2 = 3, // v * v
|
||||
Add = 4, // (u + v) / 2
|
||||
Add2 = 5, // (u * u + v * v) / 2
|
||||
SqrtAdd2 = 6, // sqrt(u * u + v * v)
|
||||
Min = 7, // min(u, v)
|
||||
Max = 8, // max(u, v)
|
||||
RMax = 9, // Average of Max and SqrtAdd2
|
||||
};
|
||||
|
||||
enum class ProcTexShift : u32 {
|
||||
None = 0,
|
||||
Odd = 1,
|
||||
Even = 2,
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 3, ProcTexClamp> u_clamp;
|
||||
BitField<3, 3, ProcTexClamp> v_clamp;
|
||||
BitField<6, 4, ProcTexCombiner> color_combiner;
|
||||
BitField<10, 4, ProcTexCombiner> alpha_combiner;
|
||||
BitField<14, 1, u32> separate_alpha;
|
||||
BitField<15, 1, u32> noise_enable;
|
||||
BitField<16, 2, ProcTexShift> u_shift;
|
||||
BitField<18, 2, ProcTexShift> v_shift;
|
||||
BitField<20, 8, u32> bias_low; // float16 TODO: unimplemented
|
||||
} proctex;
|
||||
|
||||
union ProcTexNoiseConfig {
|
||||
BitField<0, 16, s32> amplitude; // fixed1.3.12
|
||||
BitField<16, 16, u32> phase; // float16
|
||||
};
|
||||
|
||||
ProcTexNoiseConfig proctex_noise_u;
|
||||
ProcTexNoiseConfig proctex_noise_v;
|
||||
|
||||
union {
|
||||
BitField<0, 16, u32> u; // float16
|
||||
BitField<16, 16, u32> v; // float16
|
||||
} proctex_noise_frequency;
|
||||
|
||||
enum class ProcTexFilter : u32 {
|
||||
Nearest = 0,
|
||||
Linear = 1,
|
||||
NearestMipmapNearest = 2,
|
||||
LinearMipmapNearest = 3,
|
||||
NearestMipmapLinear = 4,
|
||||
LinearMipmapLinear = 5,
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 3, ProcTexFilter> filter;
|
||||
BitField<11, 8, u32> width;
|
||||
BitField<19, 8, u32> bias_high; // TODO: unimplemented
|
||||
} proctex_lut;
|
||||
|
||||
BitField<0, 8, u32> proctex_lut_offset;
|
||||
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
|
||||
// 0xaf-0xb7: ProcTex LUT
|
||||
enum class ProcTexLutTable : u32 {
|
||||
Noise = 0,
|
||||
ColorMap = 2,
|
||||
AlphaMap = 3,
|
||||
Color = 4,
|
||||
ColorDiff = 5,
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 8, u32> index;
|
||||
BitField<8, 4, ProcTexLutTable> ref_table;
|
||||
} proctex_lut_config;
|
||||
|
||||
u32 proctex_lut_data[8];
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
// 0xc0-0xff: Texture Combiner (akin to glTexEnv)
|
||||
struct TevStageConfig {
|
||||
enum class Source : u32 {
|
||||
|
|
|
@ -55,6 +55,12 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
|
|||
|
||||
uniform_block_data.fog_lut_dirty = true;
|
||||
|
||||
uniform_block_data.proctex_noise_lut_dirty = true;
|
||||
uniform_block_data.proctex_color_map_dirty = true;
|
||||
uniform_block_data.proctex_alpha_map_dirty = true;
|
||||
uniform_block_data.proctex_lut_dirty = true;
|
||||
uniform_block_data.proctex_diff_lut_dirty = true;
|
||||
|
||||
// Set vertex attributes
|
||||
glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE,
|
||||
sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
|
||||
|
@ -115,6 +121,51 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
|
|||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
// Setup the noise LUT for proctex
|
||||
proctex_noise_lut.Create();
|
||||
state.proctex_noise_lut.texture_1d = proctex_noise_lut.handle;
|
||||
state.Apply();
|
||||
glActiveTexture(GL_TEXTURE10);
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
// Setup the color map for proctex
|
||||
proctex_color_map.Create();
|
||||
state.proctex_color_map.texture_1d = proctex_color_map.handle;
|
||||
state.Apply();
|
||||
glActiveTexture(GL_TEXTURE11);
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
// Setup the alpha map for proctex
|
||||
proctex_alpha_map.Create();
|
||||
state.proctex_alpha_map.texture_1d = proctex_alpha_map.handle;
|
||||
state.Apply();
|
||||
glActiveTexture(GL_TEXTURE12);
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
// Setup the LUT for proctex
|
||||
proctex_lut.Create();
|
||||
state.proctex_lut.texture_1d = proctex_lut.handle;
|
||||
state.Apply();
|
||||
glActiveTexture(GL_TEXTURE13);
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
// Setup the difference LUT for proctex
|
||||
proctex_diff_lut.Create();
|
||||
state.proctex_diff_lut.texture_1d = proctex_diff_lut.handle;
|
||||
state.Apply();
|
||||
glActiveTexture(GL_TEXTURE14);
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
// Sync fixed function OpenGL state
|
||||
SyncCullMode();
|
||||
SyncBlendEnabled();
|
||||
|
@ -272,6 +323,36 @@ void RasterizerOpenGL::DrawTriangles() {
|
|||
uniform_block_data.fog_lut_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the proctex noise lut
|
||||
if (uniform_block_data.proctex_noise_lut_dirty) {
|
||||
SyncProcTexNoiseLUT();
|
||||
uniform_block_data.proctex_noise_lut_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the proctex color map
|
||||
if (uniform_block_data.proctex_color_map_dirty) {
|
||||
SyncProcTexColorMap();
|
||||
uniform_block_data.proctex_color_map_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the proctex alpha map
|
||||
if (uniform_block_data.proctex_alpha_map_dirty) {
|
||||
SyncProcTexAlphaMap();
|
||||
uniform_block_data.proctex_alpha_map_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the proctex lut
|
||||
if (uniform_block_data.proctex_lut_dirty) {
|
||||
SyncProcTexLUT();
|
||||
uniform_block_data.proctex_lut_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the proctex difference lut
|
||||
if (uniform_block_data.proctex_diff_lut_dirty) {
|
||||
SyncProcTexDiffLUT();
|
||||
uniform_block_data.proctex_diff_lut_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the uniform data
|
||||
if (uniform_block_data.dirty) {
|
||||
glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data,
|
||||
|
@ -354,6 +435,47 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
|
|||
uniform_block_data.fog_lut_dirty = true;
|
||||
break;
|
||||
|
||||
// ProcTex state
|
||||
case PICA_REG_INDEX(texturing.proctex):
|
||||
case PICA_REG_INDEX(texturing.proctex_lut):
|
||||
case PICA_REG_INDEX(texturing.proctex_lut_offset):
|
||||
shader_dirty = true;
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX(texturing.proctex_noise_u):
|
||||
case PICA_REG_INDEX(texturing.proctex_noise_v):
|
||||
case PICA_REG_INDEX(texturing.proctex_noise_frequency):
|
||||
SyncProcTexNoise();
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[0], 0xb0):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[1], 0xb1):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[2], 0xb2):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[3], 0xb3):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[4], 0xb4):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[5], 0xb5):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[6], 0xb6):
|
||||
case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[7], 0xb7):
|
||||
using Pica::TexturingRegs;
|
||||
switch (regs.texturing.proctex_lut_config.ref_table.Value()) {
|
||||
case TexturingRegs::ProcTexLutTable::Noise:
|
||||
uniform_block_data.proctex_noise_lut_dirty = true;
|
||||
break;
|
||||
case TexturingRegs::ProcTexLutTable::ColorMap:
|
||||
uniform_block_data.proctex_color_map_dirty = true;
|
||||
break;
|
||||
case TexturingRegs::ProcTexLutTable::AlphaMap:
|
||||
uniform_block_data.proctex_alpha_map_dirty = true;
|
||||
break;
|
||||
case TexturingRegs::ProcTexLutTable::Color:
|
||||
uniform_block_data.proctex_lut_dirty = true;
|
||||
break;
|
||||
case TexturingRegs::ProcTexLutTable::ColorDiff:
|
||||
uniform_block_data.proctex_diff_lut_dirty = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
// Alpha test
|
||||
case PICA_REG_INDEX(framebuffer.output_merger.alpha_test):
|
||||
SyncAlphaTest();
|
||||
|
@ -1072,6 +1194,35 @@ void RasterizerOpenGL::SetShader() {
|
|||
glUniform1i(uniform_fog_lut, 9);
|
||||
}
|
||||
|
||||
GLuint uniform_proctex_noise_lut =
|
||||
glGetUniformLocation(shader->shader.handle, "proctex_noise_lut");
|
||||
if (uniform_proctex_noise_lut != -1) {
|
||||
glUniform1i(uniform_proctex_noise_lut, 10);
|
||||
}
|
||||
|
||||
GLuint uniform_proctex_color_map =
|
||||
glGetUniformLocation(shader->shader.handle, "proctex_color_map");
|
||||
if (uniform_proctex_color_map != -1) {
|
||||
glUniform1i(uniform_proctex_color_map, 11);
|
||||
}
|
||||
|
||||
GLuint uniform_proctex_alpha_map =
|
||||
glGetUniformLocation(shader->shader.handle, "proctex_alpha_map");
|
||||
if (uniform_proctex_alpha_map != -1) {
|
||||
glUniform1i(uniform_proctex_alpha_map, 12);
|
||||
}
|
||||
|
||||
GLuint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut");
|
||||
if (uniform_proctex_lut != -1) {
|
||||
glUniform1i(uniform_proctex_lut, 13);
|
||||
}
|
||||
|
||||
GLuint uniform_proctex_diff_lut =
|
||||
glGetUniformLocation(shader->shader.handle, "proctex_diff_lut");
|
||||
if (uniform_proctex_diff_lut != -1) {
|
||||
glUniform1i(uniform_proctex_diff_lut, 14);
|
||||
}
|
||||
|
||||
current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
|
||||
|
||||
GLuint block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
|
||||
|
@ -1105,6 +1256,7 @@ void RasterizerOpenGL::SetShader() {
|
|||
}
|
||||
|
||||
SyncFogColor();
|
||||
SyncProcTexNoise();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1204,6 +1356,86 @@ void RasterizerOpenGL::SyncFogLUT() {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncProcTexNoise() {
|
||||
const auto& regs = Pica::g_state.regs.texturing;
|
||||
uniform_block_data.data.proctex_noise_f = {
|
||||
Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(),
|
||||
Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(),
|
||||
};
|
||||
uniform_block_data.data.proctex_noise_a = {
|
||||
regs.proctex_noise_u.amplitude / 4095.0f, regs.proctex_noise_v.amplitude / 4095.0f,
|
||||
};
|
||||
uniform_block_data.data.proctex_noise_p = {
|
||||
Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(),
|
||||
Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(),
|
||||
};
|
||||
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
|
||||
static void SyncProcTexValueLUT(const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut,
|
||||
std::array<GLvec2, 128>& lut_data, GLenum texture) {
|
||||
std::array<GLvec2, 128> new_data;
|
||||
std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) {
|
||||
return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
|
||||
});
|
||||
|
||||
if (new_data != lut_data) {
|
||||
lut_data = new_data;
|
||||
glActiveTexture(texture);
|
||||
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RG, GL_FLOAT, lut_data.data());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncProcTexNoiseLUT() {
|
||||
SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, GL_TEXTURE10);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncProcTexColorMap() {
|
||||
SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data,
|
||||
GL_TEXTURE11);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncProcTexAlphaMap() {
|
||||
SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data,
|
||||
GL_TEXTURE12);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncProcTexLUT() {
|
||||
std::array<GLvec4, 256> new_data;
|
||||
|
||||
std::transform(Pica::g_state.proctex.color_table.begin(),
|
||||
Pica::g_state.proctex.color_table.end(), new_data.begin(),
|
||||
[](const auto& entry) {
|
||||
auto rgba = entry.ToVector() / 255.0f;
|
||||
return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
|
||||
});
|
||||
|
||||
if (new_data != proctex_lut_data) {
|
||||
proctex_lut_data = new_data;
|
||||
glActiveTexture(GL_TEXTURE13);
|
||||
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_lut_data.data());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncProcTexDiffLUT() {
|
||||
std::array<GLvec4, 256> new_data;
|
||||
|
||||
std::transform(Pica::g_state.proctex.color_diff_table.begin(),
|
||||
Pica::g_state.proctex.color_diff_table.end(), new_data.begin(),
|
||||
[](const auto& entry) {
|
||||
auto rgba = entry.ToVector() / 255.0f;
|
||||
return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
|
||||
});
|
||||
|
||||
if (new_data != proctex_diff_lut_data) {
|
||||
proctex_diff_lut_data = new_data;
|
||||
glActiveTexture(GL_TEXTURE14);
|
||||
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_diff_lut_data.data());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncAlphaTest() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
|
||||
|
|
|
@ -143,6 +143,9 @@ private:
|
|||
GLint scissor_x2;
|
||||
GLint scissor_y2;
|
||||
alignas(16) GLvec3 fog_color;
|
||||
alignas(8) GLvec2 proctex_noise_f;
|
||||
alignas(8) GLvec2 proctex_noise_a;
|
||||
alignas(8) GLvec2 proctex_noise_p;
|
||||
alignas(16) GLvec3 lighting_global_ambient;
|
||||
LightSrc light_src[8];
|
||||
alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
|
||||
|
@ -150,7 +153,7 @@ private:
|
|||
};
|
||||
|
||||
static_assert(
|
||||
sizeof(UniformData) == 0x3C0,
|
||||
sizeof(UniformData) == 0x3E0,
|
||||
"The size of the UniformData structure has changed, update the structure in the shader");
|
||||
static_assert(sizeof(UniformData) < 16384,
|
||||
"UniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
@ -180,6 +183,16 @@ private:
|
|||
void SyncFogColor();
|
||||
void SyncFogLUT();
|
||||
|
||||
/// Sync the procedural texture noise configuration to match the PICA register
|
||||
void SyncProcTexNoise();
|
||||
|
||||
/// Sync the procedural texture lookup tables
|
||||
void SyncProcTexNoiseLUT();
|
||||
void SyncProcTexColorMap();
|
||||
void SyncProcTexAlphaMap();
|
||||
void SyncProcTexLUT();
|
||||
void SyncProcTexDiffLUT();
|
||||
|
||||
/// Syncs the alpha test states to match the PICA register
|
||||
void SyncAlphaTest();
|
||||
|
||||
|
@ -248,6 +261,11 @@ private:
|
|||
UniformData data;
|
||||
bool lut_dirty[6];
|
||||
bool fog_lut_dirty;
|
||||
bool proctex_noise_lut_dirty;
|
||||
bool proctex_color_map_dirty;
|
||||
bool proctex_alpha_map_dirty;
|
||||
bool proctex_lut_dirty;
|
||||
bool proctex_diff_lut_dirty;
|
||||
bool dirty;
|
||||
} uniform_block_data = {};
|
||||
|
||||
|
@ -262,4 +280,19 @@ private:
|
|||
|
||||
OGLTexture fog_lut;
|
||||
std::array<GLuint, 128> fog_lut_data{};
|
||||
|
||||
OGLTexture proctex_noise_lut;
|
||||
std::array<GLvec2, 128> proctex_noise_lut_data{};
|
||||
|
||||
OGLTexture proctex_color_map;
|
||||
std::array<GLvec2, 128> proctex_color_map_data{};
|
||||
|
||||
OGLTexture proctex_alpha_map;
|
||||
std::array<GLvec2, 128> proctex_alpha_map_data{};
|
||||
|
||||
OGLTexture proctex_lut;
|
||||
std::array<GLvec4, 256> proctex_lut_data{};
|
||||
|
||||
OGLTexture proctex_diff_lut;
|
||||
std::array<GLvec4, 256> proctex_diff_lut_data{};
|
||||
};
|
||||
|
|
|
@ -114,6 +114,22 @@ PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) {
|
|||
state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
|
||||
state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
|
||||
|
||||
state.proctex.enable = regs.texturing.main_config.texture3_enable;
|
||||
if (state.proctex.enable) {
|
||||
state.proctex.coord = regs.texturing.main_config.texture3_coordinates;
|
||||
state.proctex.u_clamp = regs.texturing.proctex.u_clamp;
|
||||
state.proctex.v_clamp = regs.texturing.proctex.v_clamp;
|
||||
state.proctex.color_combiner = regs.texturing.proctex.color_combiner;
|
||||
state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
|
||||
state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
|
||||
state.proctex.noise_enable = regs.texturing.proctex.noise_enable;
|
||||
state.proctex.u_shift = regs.texturing.proctex.u_shift;
|
||||
state.proctex.v_shift = regs.texturing.proctex.v_shift;
|
||||
state.proctex.lut_width = regs.texturing.proctex_lut.width;
|
||||
state.proctex.lut_offset = regs.texturing.proctex_lut_offset;
|
||||
state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -132,8 +148,7 @@ static std::string TexCoord(const PicaShaderConfig& config, int texture_unit) {
|
|||
if (texture_unit == 2 && config.state.texture2_use_coord1) {
|
||||
return "texcoord[1]";
|
||||
}
|
||||
// TODO: if texture unit 3 (procedural texture) implementation also uses this function,
|
||||
// config.state.texture3_coordinates should be repected here.
|
||||
|
||||
return "texcoord[" + std::to_string(texture_unit) + "]";
|
||||
}
|
||||
|
||||
|
@ -175,6 +190,14 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config,
|
|||
case Source::Texture2:
|
||||
out += "texture(tex[2], " + TexCoord(config, 2) + ")";
|
||||
break;
|
||||
case Source::Texture3:
|
||||
if (config.state.proctex.enable) {
|
||||
out += "ProcTex()";
|
||||
} else {
|
||||
LOG_ERROR(Render_OpenGL, "Using Texture3 without enabling it");
|
||||
out += "vec4(0.0)";
|
||||
}
|
||||
break;
|
||||
case Source::PreviousBuffer:
|
||||
out += "combiner_buffer";
|
||||
break;
|
||||
|
@ -483,9 +506,18 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
|
|||
if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
|
||||
// Bump mapping is enabled using a normal map, read perturbation vector from the selected
|
||||
// texture
|
||||
std::string bump_selector = std::to_string(lighting.bump_selector);
|
||||
out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], " +
|
||||
TexCoord(config, lighting.bump_selector) + ").rgb - 1.0;\n";
|
||||
if (lighting.bump_selector == 3) {
|
||||
if (config.state.proctex.enable) {
|
||||
out += "vec3 surface_normal = 2.0 * ProcTex().rgb - 1.0;\n";
|
||||
} else {
|
||||
LOG_ERROR(Render_OpenGL, "Using Texture3 without enabling it");
|
||||
out += "vec3 surface_normal = vec3(-1.0);\n";
|
||||
}
|
||||
} else {
|
||||
std::string bump_selector = std::to_string(lighting.bump_selector);
|
||||
out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], " +
|
||||
TexCoord(config, lighting.bump_selector) + ").rgb - 1.0;\n";
|
||||
}
|
||||
|
||||
// Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
|
||||
// precision result
|
||||
|
@ -693,6 +725,221 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
|
|||
out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n";
|
||||
}
|
||||
|
||||
using ProcTexClamp = TexturingRegs::ProcTexClamp;
|
||||
using ProcTexShift = TexturingRegs::ProcTexShift;
|
||||
using ProcTexCombiner = TexturingRegs::ProcTexCombiner;
|
||||
using ProcTexFilter = TexturingRegs::ProcTexFilter;
|
||||
|
||||
void AppendProcTexShiftOffset(std::string& out, const std::string& v, ProcTexShift mode,
|
||||
ProcTexClamp clamp_mode) {
|
||||
std::string offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5";
|
||||
switch (mode) {
|
||||
case ProcTexShift::None:
|
||||
out += "0";
|
||||
break;
|
||||
case ProcTexShift::Odd:
|
||||
out += offset + " * ((int(" + v + ") / 2) % 2)";
|
||||
break;
|
||||
case ProcTexShift::Even:
|
||||
out += offset + " * (((int(" + v + ") + 1) / 2) % 2)";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown shift mode %u", static_cast<u32>(mode));
|
||||
out += "0";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void AppendProcTexClamp(std::string& out, const std::string& var, ProcTexClamp mode) {
|
||||
switch (mode) {
|
||||
case ProcTexClamp::ToZero:
|
||||
out += var + " = " + var + " > 1.0 ? 0 : " + var + ";\n";
|
||||
break;
|
||||
case ProcTexClamp::ToEdge:
|
||||
out += var + " = " + "min(" + var + ", 1.0);\n";
|
||||
break;
|
||||
case ProcTexClamp::SymmetricalRepeat:
|
||||
out += var + " = " + "fract(" + var + ");\n";
|
||||
break;
|
||||
case ProcTexClamp::MirroredRepeat: {
|
||||
out +=
|
||||
var + " = int(" + var + ") % 2 == 0 ? fract(" + var + ") : 1.0 - fract(" + var + ");\n";
|
||||
break;
|
||||
}
|
||||
case ProcTexClamp::Pulse:
|
||||
out += var + " = " + var + " > 0.5 ? 1.0 : 0.0;\n";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown clamp mode %u", static_cast<u32>(mode));
|
||||
out += var + " = " + "min(" + var + ", 1.0);\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner,
|
||||
const std::string& map_lut) {
|
||||
std::string combined;
|
||||
switch (combiner) {
|
||||
case ProcTexCombiner::U:
|
||||
combined = "u";
|
||||
break;
|
||||
case ProcTexCombiner::U2:
|
||||
combined = "(u * u)";
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::V:
|
||||
combined = "v";
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::V2:
|
||||
combined = "(v * v)";
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::Add:
|
||||
combined = "((u + v) * 0.5)";
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::Add2:
|
||||
combined = "((u * u + v * v) * 0.5)";
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::SqrtAdd2:
|
||||
combined = "min(sqrt(u * u + v * v), 1.0)";
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::Min:
|
||||
combined = "min(u, v)";
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::Max:
|
||||
combined = "max(u, v)";
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::RMax:
|
||||
combined = "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown combiner %u", static_cast<u32>(combiner));
|
||||
combined = "0.0";
|
||||
break;
|
||||
}
|
||||
out += "ProcTexLookupLUT(" + map_lut + ", " + combined + ")";
|
||||
}
|
||||
|
||||
void AppendProcTexSampler(std::string& out, const PicaShaderConfig& config) {
|
||||
// LUT sampling uitlity
|
||||
// For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
|
||||
// coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
|
||||
// value entries and difference entries.
|
||||
out += R"(
|
||||
float ProcTexLookupLUT(sampler1D lut, float coord) {
|
||||
coord *= 128;
|
||||
float index_i = clamp(floor(coord), 0.0, 127.0);
|
||||
float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be
|
||||
// extracted as index_i = 127.0 and index_f = 1.0
|
||||
vec2 entry = texelFetch(lut, int(index_i), 0).rg;
|
||||
return clamp(entry.r + entry.g * index_f, 0.0, 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
// Noise utility
|
||||
if (config.state.proctex.noise_enable) {
|
||||
// See swrasterizer/proctex.cpp for more information about these functions
|
||||
out += R"(
|
||||
int ProcTexNoiseRand1D(int v) {
|
||||
const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11);
|
||||
return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF];
|
||||
}
|
||||
|
||||
float ProcTexNoiseRand2D(vec2 point) {
|
||||
const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14);
|
||||
int u2 = ProcTexNoiseRand1D(int(point.x));
|
||||
int v2 = ProcTexNoiseRand1D(int(point.y));
|
||||
v2 += ((u2 & 3) == 1) ? 4 : 0;
|
||||
v2 ^= (u2 & 1) * 6;
|
||||
v2 += 10 + u2;
|
||||
v2 &= 0xF;
|
||||
v2 ^= table[u2];
|
||||
return -1.0 + float(v2) * 2.0/ 15.0;
|
||||
}
|
||||
|
||||
float ProcTexNoiseCoef(vec2 x) {
|
||||
vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p);
|
||||
vec2 point = floor(grid);
|
||||
vec2 frac = grid - point;
|
||||
|
||||
float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y);
|
||||
float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0);
|
||||
float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0);
|
||||
float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0);
|
||||
|
||||
float x_noise = ProcTexLookupLUT(proctex_noise_lut, frac.x);
|
||||
float y_noise = ProcTexLookupLUT(proctex_noise_lut, frac.y);
|
||||
float x0 = mix(g0, g1, x_noise);
|
||||
float x1 = mix(g2, g3, x_noise);
|
||||
return mix(x0, x1, y_noise);
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
out += "vec4 ProcTex() {\n";
|
||||
out += "vec2 uv = abs(texcoord[" + std::to_string(config.state.proctex.coord) + "]);\n";
|
||||
|
||||
// Get shift offset before noise generation
|
||||
out += "float u_shift = ";
|
||||
AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift,
|
||||
config.state.proctex.u_clamp);
|
||||
out += ";\n";
|
||||
out += "float v_shift = ";
|
||||
AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift,
|
||||
config.state.proctex.v_clamp);
|
||||
out += ";\n";
|
||||
|
||||
// Generate noise
|
||||
if (config.state.proctex.noise_enable) {
|
||||
out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n";
|
||||
out += "uv = abs(uv);\n";
|
||||
}
|
||||
|
||||
// Shift
|
||||
out += "float u = uv.x + u_shift;\n";
|
||||
out += "float v = uv.y + v_shift;\n";
|
||||
|
||||
// Clamp
|
||||
AppendProcTexClamp(out, "u", config.state.proctex.u_clamp);
|
||||
AppendProcTexClamp(out, "v", config.state.proctex.v_clamp);
|
||||
|
||||
// Combine and map
|
||||
out += "float lut_coord = ";
|
||||
AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map");
|
||||
out += ";\n";
|
||||
|
||||
// Look up color
|
||||
// For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
|
||||
out += "lut_coord *= " + std::to_string(config.state.proctex.lut_width - 1) + ";\n";
|
||||
// TODO(wwylele): implement mipmap
|
||||
switch (config.state.proctex.lut_filter) {
|
||||
case ProcTexFilter::Linear:
|
||||
case ProcTexFilter::LinearMipmapLinear:
|
||||
case ProcTexFilter::LinearMipmapNearest:
|
||||
out += "int lut_index_i = int(lut_coord) + " +
|
||||
std::to_string(config.state.proctex.lut_offset) + ";\n";
|
||||
out += "float lut_index_f = fract(lut_coord);\n";
|
||||
out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i, 0) + lut_index_f * "
|
||||
"texelFetch(proctex_diff_lut, lut_index_i, 0);\n";
|
||||
break;
|
||||
case ProcTexFilter::Nearest:
|
||||
case ProcTexFilter::NearestMipmapLinear:
|
||||
case ProcTexFilter::NearestMipmapNearest:
|
||||
out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n";
|
||||
out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)), 0);\n";
|
||||
break;
|
||||
}
|
||||
|
||||
if (config.state.proctex.separate_alpha) {
|
||||
// Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It
|
||||
// uses the output of CombineAndMap directly instead.
|
||||
out += "float final_alpha = ";
|
||||
AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map");
|
||||
out += ";\n";
|
||||
out += "return vec4(final_color.xyz, final_alpha);\n}\n";
|
||||
} else {
|
||||
out += "return final_color;\n}\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::string GenerateFragmentShader(const PicaShaderConfig& config) {
|
||||
const auto& state = config.state;
|
||||
|
||||
|
@ -735,6 +982,9 @@ layout (std140) uniform shader_data {
|
|||
int scissor_x2;
|
||||
int scissor_y2;
|
||||
vec3 fog_color;
|
||||
vec2 proctex_noise_f;
|
||||
vec2 proctex_noise_a;
|
||||
vec2 proctex_noise_p;
|
||||
vec3 lighting_global_ambient;
|
||||
LightSrc light_src[NUM_LIGHTS];
|
||||
vec4 const_color[NUM_TEV_STAGES];
|
||||
|
@ -744,12 +994,21 @@ layout (std140) uniform shader_data {
|
|||
uniform sampler2D tex[3];
|
||||
uniform sampler1D lut[6];
|
||||
uniform usampler1D fog_lut;
|
||||
uniform sampler1D proctex_noise_lut;
|
||||
uniform sampler1D proctex_color_map;
|
||||
uniform sampler1D proctex_alpha_map;
|
||||
uniform sampler1D proctex_lut;
|
||||
uniform sampler1D proctex_diff_lut;
|
||||
|
||||
// Rotate the vector v by the quaternion q
|
||||
vec3 quaternion_rotate(vec4 q, vec3 v) {
|
||||
return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
|
||||
}
|
||||
})";
|
||||
|
||||
if (config.state.proctex.enable)
|
||||
AppendProcTexSampler(out, config);
|
||||
|
||||
out += R"(
|
||||
void main() {
|
||||
vec4 primary_fragment_color = vec4(0.0);
|
||||
vec4 secondary_fragment_color = vec4(0.0);
|
||||
|
|
|
@ -113,6 +113,19 @@ union PicaShaderConfig {
|
|||
} lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
|
||||
} lighting;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
u32 coord;
|
||||
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
|
||||
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
|
||||
bool separate_alpha;
|
||||
bool noise_enable;
|
||||
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
|
||||
u32 lut_width;
|
||||
u32 lut_offset;
|
||||
Pica::TexturingRegs::ProcTexFilter lut_filter;
|
||||
} proctex;
|
||||
|
||||
} state;
|
||||
};
|
||||
#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
|
||||
|
|
|
@ -58,6 +58,12 @@ OpenGLState::OpenGLState() {
|
|||
|
||||
fog_lut.texture_1d = 0;
|
||||
|
||||
proctex_lut.texture_1d = 0;
|
||||
proctex_diff_lut.texture_1d = 0;
|
||||
proctex_color_map.texture_1d = 0;
|
||||
proctex_alpha_map.texture_1d = 0;
|
||||
proctex_noise_lut.texture_1d = 0;
|
||||
|
||||
draw.read_framebuffer = 0;
|
||||
draw.draw_framebuffer = 0;
|
||||
draw.vertex_array = 0;
|
||||
|
@ -201,6 +207,36 @@ void OpenGLState::Apply() const {
|
|||
glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d);
|
||||
}
|
||||
|
||||
// ProcTex Noise LUT
|
||||
if (proctex_noise_lut.texture_1d != cur_state.proctex_noise_lut.texture_1d) {
|
||||
glActiveTexture(GL_TEXTURE10);
|
||||
glBindTexture(GL_TEXTURE_1D, proctex_noise_lut.texture_1d);
|
||||
}
|
||||
|
||||
// ProcTex Color Map
|
||||
if (proctex_color_map.texture_1d != cur_state.proctex_color_map.texture_1d) {
|
||||
glActiveTexture(GL_TEXTURE11);
|
||||
glBindTexture(GL_TEXTURE_1D, proctex_color_map.texture_1d);
|
||||
}
|
||||
|
||||
// ProcTex Alpha Map
|
||||
if (proctex_alpha_map.texture_1d != cur_state.proctex_alpha_map.texture_1d) {
|
||||
glActiveTexture(GL_TEXTURE12);
|
||||
glBindTexture(GL_TEXTURE_1D, proctex_alpha_map.texture_1d);
|
||||
}
|
||||
|
||||
// ProcTex LUT
|
||||
if (proctex_lut.texture_1d != cur_state.proctex_lut.texture_1d) {
|
||||
glActiveTexture(GL_TEXTURE13);
|
||||
glBindTexture(GL_TEXTURE_1D, proctex_lut.texture_1d);
|
||||
}
|
||||
|
||||
// ProcTex Diff LUT
|
||||
if (proctex_diff_lut.texture_1d != cur_state.proctex_diff_lut.texture_1d) {
|
||||
glActiveTexture(GL_TEXTURE14);
|
||||
glBindTexture(GL_TEXTURE_1D, proctex_diff_lut.texture_1d);
|
||||
}
|
||||
|
||||
// Framebuffer
|
||||
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
|
||||
|
|
|
@ -71,6 +71,26 @@ public:
|
|||
GLuint texture_1d; // GL_TEXTURE_BINDING_1D
|
||||
} fog_lut;
|
||||
|
||||
struct {
|
||||
GLuint texture_1d; // GL_TEXTURE_BINDING_1D
|
||||
} proctex_noise_lut;
|
||||
|
||||
struct {
|
||||
GLuint texture_1d; // GL_TEXTURE_BINDING_1D
|
||||
} proctex_color_map;
|
||||
|
||||
struct {
|
||||
GLuint texture_1d; // GL_TEXTURE_BINDING_1D
|
||||
} proctex_alpha_map;
|
||||
|
||||
struct {
|
||||
GLuint texture_1d; // GL_TEXTURE_BINDING_1D
|
||||
} proctex_lut;
|
||||
|
||||
struct {
|
||||
GLuint texture_1d; // GL_TEXTURE_BINDING_1D
|
||||
} proctex_diff_lut;
|
||||
|
||||
struct {
|
||||
GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
|
||||
GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
|
||||
|
|
223
src/video_core/swrasterizer/proctex.cpp
Normal file
223
src/video_core/swrasterizer/proctex.cpp
Normal file
|
@ -0,0 +1,223 @@
|
|||
// Copyright 2017 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/swrasterizer/proctex.h"
|
||||
|
||||
namespace Pica {
|
||||
namespace Rasterizer {
|
||||
|
||||
using ProcTexClamp = TexturingRegs::ProcTexClamp;
|
||||
using ProcTexShift = TexturingRegs::ProcTexShift;
|
||||
using ProcTexCombiner = TexturingRegs::ProcTexCombiner;
|
||||
using ProcTexFilter = TexturingRegs::ProcTexFilter;
|
||||
|
||||
static float LookupLUT(const std::array<State::ProcTex::ValueEntry, 128>& lut, float coord) {
|
||||
// For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
|
||||
// coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
|
||||
// value entries and difference entries.
|
||||
coord *= 128;
|
||||
const int index_int = std::min(static_cast<int>(coord), 127);
|
||||
const float frac = coord - index_int;
|
||||
return lut[index_int].ToFloat() + frac * lut[index_int].DiffToFloat();
|
||||
}
|
||||
|
||||
// These function are used to generate random noise for procedural texture. Their results are
|
||||
// verified against real hardware, but it's not known if the algorithm is the same as hardware.
|
||||
static unsigned int NoiseRand1D(unsigned int v) {
|
||||
static constexpr std::array<unsigned int, 16> table{
|
||||
{0, 4, 10, 8, 4, 9, 7, 12, 5, 15, 13, 14, 11, 15, 2, 11}};
|
||||
return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF];
|
||||
}
|
||||
|
||||
static float NoiseRand2D(unsigned int x, unsigned int y) {
|
||||
static constexpr std::array<unsigned int, 16> table{
|
||||
{10, 2, 15, 8, 0, 7, 4, 5, 5, 13, 2, 6, 13, 9, 3, 14}};
|
||||
unsigned int u2 = NoiseRand1D(x);
|
||||
unsigned int v2 = NoiseRand1D(y);
|
||||
v2 += ((u2 & 3) == 1) ? 4 : 0;
|
||||
v2 ^= (u2 & 1) * 6;
|
||||
v2 += 10 + u2;
|
||||
v2 &= 0xF;
|
||||
v2 ^= table[u2];
|
||||
return -1.0f + v2 * 2.0f / 15.0f;
|
||||
}
|
||||
|
||||
static float NoiseCoef(float u, float v, TexturingRegs regs, State::ProcTex state) {
|
||||
const float freq_u = float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32();
|
||||
const float freq_v = float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32();
|
||||
const float phase_u = float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32();
|
||||
const float phase_v = float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32();
|
||||
const float x = 9 * freq_u * std::abs(u + phase_u);
|
||||
const float y = 9 * freq_v * std::abs(v + phase_v);
|
||||
const int x_int = static_cast<int>(x);
|
||||
const int y_int = static_cast<int>(y);
|
||||
const float x_frac = x - x_int;
|
||||
const float y_frac = y - y_int;
|
||||
|
||||
const float g0 = NoiseRand2D(x_int, y_int) * (x_frac + y_frac);
|
||||
const float g1 = NoiseRand2D(x_int + 1, y_int) * (x_frac + y_frac - 1);
|
||||
const float g2 = NoiseRand2D(x_int, y_int + 1) * (x_frac + y_frac - 1);
|
||||
const float g3 = NoiseRand2D(x_int + 1, y_int + 1) * (x_frac + y_frac - 2);
|
||||
const float x_noise = LookupLUT(state.noise_table, x_frac);
|
||||
const float y_noise = LookupLUT(state.noise_table, y_frac);
|
||||
return Math::BilinearInterp(g0, g1, g2, g3, x_noise, y_noise);
|
||||
}
|
||||
|
||||
static float GetShiftOffset(float v, ProcTexShift mode, ProcTexClamp clamp_mode) {
|
||||
const float offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? 1 : 0.5f;
|
||||
switch (mode) {
|
||||
case ProcTexShift::None:
|
||||
return 0;
|
||||
case ProcTexShift::Odd:
|
||||
return offset * (((int)v / 2) % 2);
|
||||
case ProcTexShift::Even:
|
||||
return offset * ((((int)v + 1) / 2) % 2);
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown shift mode %u", static_cast<u32>(mode));
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
static void ClampCoord(float& coord, ProcTexClamp mode) {
|
||||
switch (mode) {
|
||||
case ProcTexClamp::ToZero:
|
||||
if (coord > 1.0f)
|
||||
coord = 0.0f;
|
||||
break;
|
||||
case ProcTexClamp::ToEdge:
|
||||
coord = std::min(coord, 1.0f);
|
||||
break;
|
||||
case ProcTexClamp::SymmetricalRepeat:
|
||||
coord = coord - std::floor(coord);
|
||||
break;
|
||||
case ProcTexClamp::MirroredRepeat: {
|
||||
int integer = static_cast<int>(coord);
|
||||
float frac = coord - integer;
|
||||
coord = (integer % 2) == 0 ? frac : (1.0f - frac);
|
||||
break;
|
||||
}
|
||||
case ProcTexClamp::Pulse:
|
||||
if (coord <= 0.5f)
|
||||
coord = 0.0f;
|
||||
else
|
||||
coord = 1.0f;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown clamp mode %u", static_cast<u32>(mode));
|
||||
coord = std::min(coord, 1.0f);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
float CombineAndMap(float u, float v, ProcTexCombiner combiner,
|
||||
const std::array<State::ProcTex::ValueEntry, 128>& map_table) {
|
||||
float f;
|
||||
switch (combiner) {
|
||||
case ProcTexCombiner::U:
|
||||
f = u;
|
||||
break;
|
||||
case ProcTexCombiner::U2:
|
||||
f = u * u;
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::V:
|
||||
f = v;
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::V2:
|
||||
f = v * v;
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::Add:
|
||||
f = (u + v) * 0.5f;
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::Add2:
|
||||
f = (u * u + v * v) * 0.5f;
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::SqrtAdd2:
|
||||
f = std::min(std::sqrt(u * u + v * v), 1.0f);
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::Min:
|
||||
f = std::min(u, v);
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::Max:
|
||||
f = std::max(u, v);
|
||||
break;
|
||||
case TexturingRegs::ProcTexCombiner::RMax:
|
||||
f = std::min(((u + v) * 0.5f + std::sqrt(u * u + v * v)) * 0.5f, 1.0f);
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown combiner %u", static_cast<u32>(combiner));
|
||||
f = 0.0f;
|
||||
break;
|
||||
}
|
||||
return LookupLUT(map_table, f);
|
||||
}
|
||||
|
||||
Math::Vec4<u8> ProcTex(float u, float v, TexturingRegs regs, State::ProcTex state) {
|
||||
u = std::abs(u);
|
||||
v = std::abs(v);
|
||||
|
||||
// Get shift offset before noise generation
|
||||
const float u_shift = GetShiftOffset(v, regs.proctex.u_shift, regs.proctex.u_clamp);
|
||||
const float v_shift = GetShiftOffset(u, regs.proctex.v_shift, regs.proctex.v_clamp);
|
||||
|
||||
// Generate noise
|
||||
if (regs.proctex.noise_enable) {
|
||||
float noise = NoiseCoef(u, v, regs, state);
|
||||
u += noise * regs.proctex_noise_u.amplitude / 4095.0f;
|
||||
v += noise * regs.proctex_noise_v.amplitude / 4095.0f;
|
||||
u = std::abs(u);
|
||||
v = std::abs(v);
|
||||
}
|
||||
|
||||
// Shift
|
||||
u += u_shift;
|
||||
v += v_shift;
|
||||
|
||||
// Clamp
|
||||
ClampCoord(u, regs.proctex.u_clamp);
|
||||
ClampCoord(v, regs.proctex.v_clamp);
|
||||
|
||||
// Combine and map
|
||||
const float lut_coord = CombineAndMap(u, v, regs.proctex.color_combiner, state.color_map_table);
|
||||
|
||||
// Look up the color
|
||||
// For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
|
||||
const u32 offset = regs.proctex_lut_offset;
|
||||
const u32 width = regs.proctex_lut.width;
|
||||
const float index = offset + (lut_coord * (width - 1));
|
||||
Math::Vec4<u8> final_color;
|
||||
// TODO(wwylele): implement mipmap
|
||||
switch (regs.proctex_lut.filter) {
|
||||
case ProcTexFilter::Linear:
|
||||
case ProcTexFilter::LinearMipmapLinear:
|
||||
case ProcTexFilter::LinearMipmapNearest: {
|
||||
const int index_int = static_cast<int>(index);
|
||||
const float frac = index - index_int;
|
||||
const auto color_value = state.color_table[index_int].ToVector().Cast<float>();
|
||||
const auto color_diff = state.color_diff_table[index_int].ToVector().Cast<float>();
|
||||
final_color = (color_value + frac * color_diff).Cast<u8>();
|
||||
break;
|
||||
}
|
||||
case ProcTexFilter::Nearest:
|
||||
case ProcTexFilter::NearestMipmapLinear:
|
||||
case ProcTexFilter::NearestMipmapNearest:
|
||||
final_color = state.color_table[static_cast<int>(std::round(index))].ToVector();
|
||||
break;
|
||||
}
|
||||
|
||||
if (regs.proctex.separate_alpha) {
|
||||
// Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It
|
||||
// uses the output of CombineAndMap directly instead.
|
||||
const float final_alpha =
|
||||
CombineAndMap(u, v, regs.proctex.alpha_combiner, state.alpha_map_table);
|
||||
return Math::MakeVec<u8>(final_color.rgb(), static_cast<u8>(final_alpha * 255));
|
||||
} else {
|
||||
return final_color;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Rasterizer
|
||||
} // namespace Pica
|
16
src/video_core/swrasterizer/proctex.h
Normal file
16
src/video_core/swrasterizer/proctex.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
// Copyright 2017 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/pica_state.h"
|
||||
|
||||
namespace Pica {
|
||||
namespace Rasterizer {
|
||||
|
||||
/// Generates procedural texture color for the given coordinates
|
||||
Math::Vec4<u8> ProcTex(float u, float v, TexturingRegs regs, State::ProcTex state);
|
||||
|
||||
} // namespace Rasterizer
|
||||
} // namespace Pica
|
|
@ -23,6 +23,7 @@
|
|||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/swrasterizer/framebuffer.h"
|
||||
#include "video_core/swrasterizer/proctex.h"
|
||||
#include "video_core/swrasterizer/rasterizer.h"
|
||||
#include "video_core/swrasterizer/texturing.h"
|
||||
#include "video_core/texture/texture_decode.h"
|
||||
|
@ -268,7 +269,7 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
|
|||
uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
|
||||
uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
|
||||
|
||||
Math::Vec4<u8> texture_color[3]{};
|
||||
Math::Vec4<u8> texture_color[4]{};
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
const auto& texture = textures[i];
|
||||
if (!texture.enabled)
|
||||
|
@ -334,6 +335,13 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
|
|||
}
|
||||
}
|
||||
|
||||
// sample procedural texture
|
||||
if (regs.texturing.main_config.texture3_enable) {
|
||||
const auto& proctex_uv = uv[regs.texturing.main_config.texture3_coordinates];
|
||||
texture_color[3] = ProcTex(proctex_uv.u().ToFloat32(), proctex_uv.v().ToFloat32(),
|
||||
g_state.regs.texturing, g_state.proctex);
|
||||
}
|
||||
|
||||
// Texture environment - consists of 6 stages of color and alpha combining.
|
||||
//
|
||||
// Color combiners take three input color values from some source (e.g. interpolated
|
||||
|
@ -376,6 +384,9 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
|
|||
case Source::Texture2:
|
||||
return texture_color[2];
|
||||
|
||||
case Source::Texture3:
|
||||
return texture_color[3];
|
||||
|
||||
case Source::PreviousBuffer:
|
||||
return combiner_buffer;
|
||||
|
||||
|
|
Loading…
Reference in a new issue