1
0
Fork 0
forked from suyu/suyu

Merge pull request #929 from neobrain/geoshader_definitions

Pica/Shader: Add geometry shader definitions.
This commit is contained in:
Tony Wasserka 2015-07-21 15:24:48 +02:00
commit aa6dfdb827
6 changed files with 169 additions and 156 deletions

View file

@ -259,7 +259,7 @@ void GraphicsVertexShaderModel::OnUpdate()
for (auto pattern : Pica::g_state.vs.swizzle_data) for (auto pattern : Pica::g_state.vs.swizzle_data)
info.swizzle_info.push_back({pattern}); info.swizzle_info.push_back({pattern});
info.labels.insert({ Pica::g_state.regs.vs_main_offset, "main" }); info.labels.insert({ Pica::g_state.regs.vs.main_offset, "main" });
endResetModel(); endResetModel();
} }

View file

@ -45,7 +45,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
if (GPU::g_skip_frame && id != PICA_REG_INDEX(trigger_irq)) if (GPU::g_skip_frame && id != PICA_REG_INDEX(trigger_irq))
return; return;
// TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value
u32 old_value = regs[id]; u32 old_value = regs[id];
regs[id] = (old_value & ~mask) | (value & mask); regs[id] = (old_value & ~mask) | (value & mask);
@ -282,7 +282,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
&geometry_dumper, _1, _2, _3)); &geometry_dumper, _1, _2, _3));
// Send to vertex shader // Send to vertex shader
VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs);
if (is_indexed) { if (is_indexed) {
// TODO: Add processed vertex to vertex cache! // TODO: Add processed vertex to vertex cache!
@ -321,35 +321,35 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
break; break;
} }
case PICA_REG_INDEX(vs_bool_uniforms): case PICA_REG_INDEX(vs.bool_uniforms):
for (unsigned i = 0; i < 16; ++i) for (unsigned i = 0; i < 16; ++i)
g_state.vs.uniforms.b[i] = (regs.vs_bool_uniforms.Value() & (1 << i)) != 0; g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0;
break; break;
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1):
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2):
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4):
{ {
int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
auto values = regs.vs_int_uniforms[index]; auto values = regs.vs.int_uniforms[index];
g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
break; break;
} }
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8):
{ {
auto& uniform_setup = regs.vs_uniform_setup; auto& uniform_setup = regs.vs.uniform_setup;
// TODO: Does actual hardware indeed keep an intermediate buffer or does // TODO: Does actual hardware indeed keep an intermediate buffer or does
// it directly write the values? // it directly write the values?
@ -392,32 +392,32 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
} }
// Load shader program code // Load shader program code
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc):
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd):
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce):
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf):
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0):
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1):
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3):
{ {
g_state.vs.program_code[regs.vs_program.offset] = value; g_state.vs.program_code[regs.vs.program.offset] = value;
regs.vs_program.offset++; regs.vs.program.offset++;
break; break;
} }
// Load swizzle pattern data // Load swizzle pattern data
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6):
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7):
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8):
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9):
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da):
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db):
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd):
{ {
g_state.vs.swizzle_data[regs.vs_swizzle_patterns.offset] = value; g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value;
regs.vs_swizzle_patterns.offset++; regs.vs.swizzle_patterns.offset++;
break; break;
} }

View file

@ -788,25 +788,27 @@ struct Regs {
List = 0, List = 0,
Strip = 1, Strip = 1,
Fan = 2, Fan = 2,
ListIndexed = 3, // TODO: No idea if this is correct Shader = 3, // Programmable setup unit implemented in a geometry shader
}; };
BitField<8, 2, TriangleTopology> triangle_topology; BitField<8, 2, TriangleTopology> triangle_topology;
INSERT_PADDING_WORDS(0x51); INSERT_PADDING_WORDS(0x21);
struct ShaderConfig {
BitField<0, 16, u32> bool_uniforms;
BitField<0, 16, u32> vs_bool_uniforms;
union { union {
BitField< 0, 8, u32> x; BitField< 0, 8, u32> x;
BitField< 8, 8, u32> y; BitField< 8, 8, u32> y;
BitField<16, 8, u32> z; BitField<16, 8, u32> z;
BitField<24, 8, u32> w; BitField<24, 8, u32> w;
} vs_int_uniforms[4]; } int_uniforms[4];
INSERT_PADDING_WORDS(0x5); INSERT_PADDING_WORDS(0x5);
// Offset to shader program entry point (in words) // Offset to shader program entry point (in words)
BitField<0, 16, u32> vs_main_offset; BitField<0, 16, u32> main_offset;
union { union {
BitField< 0, 4, u64> attribute0_register; BitField< 0, 4, u64> attribute0_register;
@ -835,8 +837,9 @@ struct Regs {
}; };
return (int)fields[attribute_index]; return (int)fields[attribute_index];
} }
} vs_input_register_map; } input_register_map;
// OUTMAP_MASK, 0x28E, CODETRANSFER_END
INSERT_PADDING_WORDS(0x3); INSERT_PADDING_WORDS(0x3);
struct { struct {
@ -853,16 +856,16 @@ struct Regs {
union { union {
// Index of the next uniform to write to // Index of the next uniform to write to
// TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
// TODO: Maybe the uppermost index is for the geometry shader? Investigate!
BitField<0, 7, u32> index; BitField<0, 7, u32> index;
BitField<31, 1, Format> format; BitField<31, 1, Format> format;
}; };
// Writing to these registers sets the "current" uniform. // Writing to these registers sets the current uniform.
// TODO: It's not clear how the hardware stores what the "current" uniform is.
u32 set_value[8]; u32 set_value[8];
} vs_uniform_setup; } uniform_setup;
INSERT_PADDING_WORDS(0x2); INSERT_PADDING_WORDS(0x2);
@ -872,9 +875,8 @@ struct Regs {
u32 offset; u32 offset;
// Writing to these registers sets the "current" word in the shader program. // Writing to these registers sets the "current" word in the shader program.
// TODO: It's not clear how the hardware stores what the "current" word is.
u32 set_word[8]; u32 set_word[8];
} vs_program; } program;
INSERT_PADDING_WORDS(0x1); INSERT_PADDING_WORDS(0x1);
@ -885,12 +887,17 @@ struct Regs {
// Incremented with each instruction write. // Incremented with each instruction write.
u32 offset; u32 offset;
// Writing to these registers sets the "current" swizzle pattern in the table. // Writing to these registers sets the current swizzle pattern in the table.
// TODO: It's not clear how the hardware stores what the "current" swizzle pattern is.
u32 set_word[8]; u32 set_word[8];
} vs_swizzle_patterns; } swizzle_patterns;
INSERT_PADDING_WORDS(0x22); INSERT_PADDING_WORDS(0x2);
};
ShaderConfig gs;
ShaderConfig vs;
INSERT_PADDING_WORDS(0x20);
// Map register indices to names readable by humans // Map register indices to names readable by humans
// Used for debugging purposes, so performance is not an issue here // Used for debugging purposes, so performance is not an issue here
@ -937,13 +944,20 @@ struct Regs {
ADD_FIELD(vs_default_attributes_setup); ADD_FIELD(vs_default_attributes_setup);
ADD_FIELD(command_buffer); ADD_FIELD(command_buffer);
ADD_FIELD(triangle_topology); ADD_FIELD(triangle_topology);
ADD_FIELD(vs_bool_uniforms); ADD_FIELD(gs.bool_uniforms);
ADD_FIELD(vs_int_uniforms); ADD_FIELD(gs.int_uniforms);
ADD_FIELD(vs_main_offset); ADD_FIELD(gs.main_offset);
ADD_FIELD(vs_input_register_map); ADD_FIELD(gs.input_register_map);
ADD_FIELD(vs_uniform_setup); ADD_FIELD(gs.uniform_setup);
ADD_FIELD(vs_program); ADD_FIELD(gs.program);
ADD_FIELD(vs_swizzle_patterns); ADD_FIELD(gs.swizzle_patterns);
ADD_FIELD(vs.bool_uniforms);
ADD_FIELD(vs.int_uniforms);
ADD_FIELD(vs.main_offset);
ADD_FIELD(vs.input_register_map);
ADD_FIELD(vs.uniform_setup);
ADD_FIELD(vs.program);
ADD_FIELD(vs.swizzle_patterns);
#undef ADD_FIELD #undef ADD_FIELD
@ -1015,17 +1029,14 @@ ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
ASSERT_REG_POSITION(command_buffer, 0x238); ASSERT_REG_POSITION(command_buffer, 0x238);
ASSERT_REG_POSITION(triangle_topology, 0x25e); ASSERT_REG_POSITION(triangle_topology, 0x25e);
ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); ASSERT_REG_POSITION(gs, 0x280);
ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); ASSERT_REG_POSITION(vs, 0x2b0);
ASSERT_REG_POSITION(vs_main_offset, 0x2ba);
ASSERT_REG_POSITION(vs_input_register_map, 0x2bb);
ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0);
ASSERT_REG_POSITION(vs_program, 0x2cb);
ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5);
#undef ASSERT_REG_POSITION #undef ASSERT_REG_POSITION
#endif // !defined(_MSC_VER) #endif // !defined(_MSC_VER)
static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size");
// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
@ -1135,7 +1146,7 @@ struct State {
Regs regs; Regs regs;
/// Vertex shader memory /// Vertex shader memory
struct { struct ShaderSetup {
struct { struct {
Math::Vec4<float24> f[96]; Math::Vec4<float24> f[96];
std::array<bool, 16> b; std::array<bool, 16> b;
@ -1146,7 +1157,10 @@ struct State {
std::array<u32, 1024> program_code; std::array<u32, 1024> program_code;
std::array<u32, 1024> swizzle_data; std::array<u32, 1024> swizzle_data;
} vs; };
ShaderSetup vs;
ShaderSetup gs;
/// Current Pica command list /// Current Pica command list
struct { struct {

View file

@ -20,8 +20,9 @@ template<typename VertexType>
void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler)
{ {
switch (topology) { switch (topology) {
// TODO: Figure out what's different with TriangleTopology::Shader.
case Regs::TriangleTopology::List: case Regs::TriangleTopology::List:
case Regs::TriangleTopology::ListIndexed: case Regs::TriangleTopology::Shader:
if (buffer_index < 2) { if (buffer_index < 2) {
buffer[buffer_index++] = vtx; buffer[buffer_index++] = vtx;
} else { } else {

View file

@ -546,20 +546,18 @@ static void ProcessShaderCode(VertexShaderState& state) {
static Common::Profiling::TimingCategory shader_category("Vertex Shader"); static Common::Profiling::TimingCategory shader_category("Vertex Shader");
OutputVertex RunShader(const InputVertex& input, int num_attributes) { OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) {
Common::Profiling::ScopeTimer timer(shader_category); Common::Profiling::ScopeTimer timer(shader_category);
const auto& regs = g_state.regs;
const auto& vs = g_state.vs;
VertexShaderState state; VertexShaderState state;
const u32* main = &vs.program_code[regs.vs_main_offset]; const u32* main = &setup.program_code[config.main_offset];
state.program_counter = (u32*)main; state.program_counter = (u32*)main;
state.debug.max_offset = 0; state.debug.max_offset = 0;
state.debug.max_opdesc_id = 0; state.debug.max_opdesc_id = 0;
// Setup input register table // Setup input register table
const auto& attribute_register_map = regs.vs_input_register_map; const auto& attribute_register_map = config.input_register_map;
float24 dummy_register; float24 dummy_register;
boost::fill(state.input_register_table, &dummy_register); boost::fill(state.input_register_table, &dummy_register);
@ -584,16 +582,16 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
state.conditional_code[1] = false; state.conditional_code[1] = false;
ProcessShaderCode(state); ProcessShaderCode(state);
DebugUtils::DumpShader(vs.program_code.data(), state.debug.max_offset, vs.swizzle_data.data(), DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(),
state.debug.max_opdesc_id, regs.vs_main_offset, state.debug.max_opdesc_id, config.main_offset,
regs.vs_output_attributes); g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here
// Setup output data // Setup output data
OutputVertex ret; OutputVertex ret;
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
// figure out what those circumstances are and enable the remaining outputs then. // figure out what those circumstances are and enable the remaining outputs then.
for (int i = 0; i < 7; ++i) { for (int i = 0; i < 7; ++i) {
const auto& output_register_map = regs.vs_output_attributes[i]; const auto& output_register_map = g_state.regs.vs_output_attributes[i]; // TODO: Don't hardcode VS here
u32 semantics[4] = { u32 semantics[4] = {
output_register_map.map_x, output_register_map.map_y, output_register_map.map_x, output_register_map.map_y,

View file

@ -65,7 +65,7 @@ struct OutputVertex {
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
OutputVertex RunShader(const InputVertex& input, int num_attributes); OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup);
} // namespace } // namespace