1
0
Fork 0
forked from suyu/suyu

Merge pull request #2476 from yuriks/shader-refactor3

Oh No! More shader changes!
This commit is contained in:
Yuri Kunde Schlesner 2017-02-04 13:02:48 -08:00 committed by GitHub
commit 97e06b0a0d
20 changed files with 184 additions and 180 deletions

View file

@ -71,8 +71,8 @@ void GraphicsTracingWidget::StartRecording() {
std::array<u32, 4 * 16> default_attributes; std::array<u32, 4 * 16> default_attributes;
for (unsigned i = 0; i < 16; ++i) { for (unsigned i = 0; i < 16; ++i) {
for (unsigned comp = 0; comp < 3; ++comp) { for (unsigned comp = 0; comp < 3; ++comp) {
default_attributes[4 * i + comp] = default_attributes[4 * i + comp] = nihstro::to_float24(
nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32()); Pica::g_state.input_default_attributes.attr[i][comp].ToFloat32());
} }
} }

View file

@ -511,7 +511,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
auto& shader_config = Pica::g_state.regs.vs; auto& shader_config = Pica::g_state.regs.vs;
for (auto instr : shader_setup.program_code) for (auto instr : shader_setup.program_code)
info.code.push_back({instr}); info.code.push_back({instr});
int num_attributes = Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); int num_attributes = shader_config.max_input_attribute_index + 1;
for (auto pattern : shader_setup.swizzle_data) for (auto pattern : shader_setup.swizzle_data)
info.swizzle_info.push_back({pattern}); info.swizzle_info.push_back({pattern});
@ -522,11 +522,11 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
// Generate debug information // Generate debug information
Pica::Shader::InterpreterEngine shader_engine; Pica::Shader::InterpreterEngine shader_engine;
shader_engine.SetupBatch(shader_setup, entry_point); shader_engine.SetupBatch(shader_setup, entry_point);
debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, num_attributes); debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, shader_config);
// Reload widget state // Reload widget state
for (int attr = 0; attr < num_attributes; ++attr) { for (int attr = 0; attr < num_attributes; ++attr) {
unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr); unsigned source_attr = shader_config.GetRegisterForAttribute(attr);
input_data_mapping[attr]->setText(QString("-> v%1").arg(source_attr)); input_data_mapping[attr]->setText(QString("-> v%1").arg(source_attr));
input_data_container[attr]->setVisible(true); input_data_container[attr]->setVisible(true);
} }

View file

@ -82,7 +82,7 @@ private:
nihstro::ShaderInfo info; nihstro::ShaderInfo info;
Pica::Shader::DebugData<true> debug_data; Pica::Shader::DebugData<true> debug_data;
Pica::Shader::InputVertex input_vertex; Pica::Shader::AttributeBuffer input_vertex;
friend class GraphicsVertexShaderModel; friend class GraphicsVertexShaderModel;
}; };

View file

@ -121,22 +121,19 @@ public:
class Iterator { class Iterator {
public: public:
Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {} Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {} Iterator(IntTy val) : m_val(val), m_bit(0) {}
Iterator& operator=(Iterator other) { Iterator& operator=(Iterator other) {
new (this) Iterator(other); new (this) Iterator(other);
return *this; return *this;
} }
int operator*() { int operator*() {
return m_bit; return m_bit + ComputeLsb();
} }
Iterator& operator++() { Iterator& operator++() {
if (m_val == 0) { int lsb = ComputeLsb();
m_bit = -1; m_val >>= lsb + 1;
} else { m_bit += lsb + 1;
int bit = LeastSignificantSetBit(m_val); m_has_lsb = false;
m_val &= ~(1 << bit);
m_bit = bit;
}
return *this; return *this;
} }
Iterator operator++(int _) { Iterator operator++(int _) {
@ -145,15 +142,24 @@ public:
return other; return other;
} }
bool operator==(Iterator other) const { bool operator==(Iterator other) const {
return m_bit == other.m_bit; return m_val == other.m_val;
} }
bool operator!=(Iterator other) const { bool operator!=(Iterator other) const {
return m_bit != other.m_bit; return m_val != other.m_val;
} }
private: private:
int ComputeLsb() {
if (!m_has_lsb) {
m_lsb = LeastSignificantSetBit(m_val);
m_has_lsb = true;
}
return m_lsb;
}
IntTy m_val; IntTy m_val;
int m_bit; int m_bit;
int m_lsb = -1;
bool m_has_lsb = false;
}; };
BitSet() : m_val(0) {} BitSet() : m_val(0) {}
@ -221,11 +227,10 @@ public:
} }
Iterator begin() const { Iterator begin() const {
Iterator it(m_val, 0); return Iterator(m_val);
return ++it;
} }
Iterator end() const { Iterator end() const {
return Iterator(m_val, -1); return Iterator(0);
} }
IntTy m_val; IntTy m_val;

View file

@ -18,6 +18,8 @@
#include "video_core/rasterizer.h" #include "video_core/rasterizer.h"
#include "video_core/shader/shader.h" #include "video_core/shader/shader.h"
using Pica::Rasterizer::Vertex;
namespace Pica { namespace Pica {
namespace Clipper { namespace Clipper {
@ -29,20 +31,20 @@ public:
float24::FromFloat32(0), float24::FromFloat32(0))) float24::FromFloat32(0), float24::FromFloat32(0)))
: coeffs(coeffs), bias(bias) {} : coeffs(coeffs), bias(bias) {}
bool IsInside(const OutputVertex& vertex) const { bool IsInside(const Vertex& vertex) const {
return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
} }
bool IsOutSide(const OutputVertex& vertex) const { bool IsOutSide(const Vertex& vertex) const {
return !IsInside(vertex); return !IsInside(vertex);
} }
OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const {
float24 dp = Math::Dot(v0.pos + bias, coeffs); float24 dp = Math::Dot(v0.pos + bias, coeffs);
float24 dp_prev = Math::Dot(v1.pos + bias, coeffs); float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
float24 factor = dp_prev / (dp_prev - dp); float24 factor = dp_prev / (dp_prev - dp);
return OutputVertex::Lerp(factor, v0, v1); return Vertex::Lerp(factor, v0, v1);
} }
private: private:
@ -51,7 +53,7 @@ private:
Math::Vec4<float24> bias; Math::Vec4<float24> bias;
}; };
static void InitScreenCoordinates(OutputVertex& vtx) { static void InitScreenCoordinates(Vertex& vtx) {
struct { struct {
float24 halfsize_x; float24 halfsize_x;
float24 offset_x; float24 offset_x;
@ -91,8 +93,8 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
// introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
// fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
static const size_t MAX_VERTICES = 9; static const size_t MAX_VERTICES = 9;
static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
static_vector<OutputVertex, MAX_VERTICES> buffer_b; static_vector<Vertex, MAX_VERTICES> buffer_b;
auto* output_list = &buffer_a; auto* output_list = &buffer_a;
auto* input_list = &buffer_b; auto* input_list = &buffer_b;
@ -123,7 +125,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
std::swap(input_list, output_list); std::swap(input_list, output_list);
output_list->clear(); output_list->clear();
const OutputVertex* reference_vertex = &input_list->back(); const Vertex* reference_vertex = &input_list->back();
for (const auto& vertex : *input_list) { for (const auto& vertex : *input_list) {
// NOTE: This algorithm changes vertex order in some cases! // NOTE: This algorithm changes vertex order in some cases!
@ -148,9 +150,9 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
InitScreenCoordinates((*output_list)[1]); InitScreenCoordinates((*output_list)[1]);
for (size_t i = 0; i < output_list->size() - 2; i++) { for (size_t i = 0; i < output_list->size() - 2; i++) {
OutputVertex& vtx0 = (*output_list)[0]; Vertex& vtx0 = (*output_list)[0];
OutputVertex& vtx1 = (*output_list)[i + 1]; Vertex& vtx1 = (*output_list)[i + 1];
OutputVertex& vtx2 = (*output_list)[i + 2]; Vertex& vtx2 = (*output_list)[i + 2];
InitScreenCoordinates(vtx2); InitScreenCoordinates(vtx2);

View file

@ -125,20 +125,21 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// TODO: Verify that this actually modifies the register! // TODO: Verify that this actually modifies the register!
if (setup.index < 15) { if (setup.index < 15) {
g_state.vs_default_attributes[setup.index] = attribute; g_state.input_default_attributes.attr[setup.index] = attribute;
setup.index++; setup.index++;
} else { } else {
// Put each attribute into an immediate input buffer. // Put each attribute into an immediate input buffer. When all specified immediate
// When all specified immediate attributes are present, the Vertex Shader is invoked // attributes are present, the Vertex Shader is invoked and everything is sent to
// and everything is // the primitive assembler.
// sent to the primitive assembler.
auto& immediate_input = g_state.immediate.input_vertex; auto& immediate_input = g_state.immediate.input_vertex;
auto& immediate_attribute_id = g_state.immediate.current_attribute; auto& immediate_attribute_id = g_state.immediate.current_attribute;
immediate_input.attr[immediate_attribute_id++] = attribute; immediate_input.attr[immediate_attribute_id] = attribute;
if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { if (immediate_attribute_id < regs.max_input_attrib_index) {
immediate_attribute_id += 1;
} else {
MICROPROFILE_SCOPE(GPU_Drawing); MICROPROFILE_SCOPE(GPU_Drawing);
immediate_attribute_id = 0; immediate_attribute_id = 0;
@ -150,10 +151,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
static_cast<void*>(&immediate_input)); static_cast<void*>(&immediate_input));
Shader::UnitState shader_unit; Shader::UnitState shader_unit;
shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); Shader::AttributeBuffer output{};
shader_unit.LoadInput(regs.vs, immediate_input);
shader_engine->Run(g_state.vs, shader_unit); shader_engine->Run(g_state.vs, shader_unit);
auto output_vertex = Shader::OutputVertex::FromRegisters( shader_unit.WriteOutput(regs.vs, output);
shader_unit.registers.output, regs, regs.vs.output_mask);
// Send to renderer // Send to renderer
using Pica::Shader::OutputVertex; using Pica::Shader::OutputVertex;
@ -162,7 +164,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
}; };
g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); g_state.primitive_assembler.SubmitVertex(
Shader::OutputVertex::FromAttributeBuffer(regs, output), AddTriangle);
} }
} }
} }
@ -280,19 +283,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
if (!vertex_cache_hit) { if (!vertex_cache_hit) {
// Initialize data for the current vertex // Initialize data for the current vertex
Shader::InputVertex input; Shader::AttributeBuffer input, output{};
loader.LoadVertex(base_address, index, vertex, input, memory_accesses); loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
// Send to vertex shader // Send to vertex shader
if (g_debug_context) if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
(void*)&input); (void*)&input);
shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); shader_unit.LoadInput(regs.vs, input);
shader_engine->Run(g_state.vs, shader_unit); shader_engine->Run(g_state.vs, shader_unit);
shader_unit.WriteOutput(regs.vs, output);
// Retrieve vertex from register data // Retrieve vertex from register data
output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs, output);
regs, regs.vs.output_mask);
if (is_indexed) { if (is_indexed) {
vertex_cache[vertex_cache_pos] = output_vertex; vertex_cache[vertex_cache_pos] = output_vertex;

View file

@ -99,7 +99,8 @@ struct Regs {
TEXCOORD1_U = 14, TEXCOORD1_U = 14,
TEXCOORD1_V = 15, TEXCOORD1_V = 15,
// TODO: Not verified TEXCOORD0_W = 16,
VIEW_X = 18, VIEW_X = 18,
VIEW_Y = 19, VIEW_Y = 19,
VIEW_Z = 20, VIEW_Z = 20,
@ -871,7 +872,7 @@ struct Regs {
LightSrc light[8]; LightSrc light[8];
LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
INSERT_PADDING_WORDS(0x1); INSERT_PADDING_WORDS(0x1);
BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 BitField<0, 3, u32> max_light_index; // Number of enabled lights - 1
union { union {
BitField<2, 2, LightingFresnelSelector> fresnel_selector; BitField<2, 2, LightingFresnelSelector> fresnel_selector;
@ -1048,7 +1049,7 @@ struct Regs {
BitField<48, 12, u64> attribute_mask; BitField<48, 12, u64> attribute_mask;
// number of total attributes minus 1 // number of total attributes minus 1
BitField<60, 4, u64> num_extra_attributes; BitField<60, 4, u64> max_attribute_index;
}; };
inline VertexAttributeFormat GetFormat(int n) const { inline VertexAttributeFormat GetFormat(int n) const {
@ -1079,7 +1080,7 @@ struct Regs {
} }
inline int GetNumTotalAttributes() const { inline int GetNumTotalAttributes() const {
return (int)num_extra_attributes + 1; return (int)max_attribute_index + 1;
} }
// Attribute loaders map the source vertex data to input attributes // Attribute loaders map the source vertex data to input attributes
@ -1179,7 +1180,12 @@ struct Regs {
} }
} command_buffer; } command_buffer;
INSERT_PADDING_WORDS(0x07); INSERT_PADDING_WORDS(4);
/// Number of input attributes to the vertex shader minus 1
BitField<0, 4, u32> max_input_attrib_index;
INSERT_PADDING_WORDS(2);
enum class GPUMode : u32 { enum class GPUMode : u32 {
Drawing = 0, Drawing = 0,
@ -1217,42 +1223,21 @@ struct Regs {
union { union {
// Number of input attributes to shader unit - 1 // Number of input attributes to shader unit - 1
BitField<0, 4, u32> num_input_attributes; BitField<0, 4, u32> max_input_attribute_index;
}; };
// Offset to shader program entry point (in words) // Offset to shader program entry point (in words)
BitField<0, 16, u32> main_offset; BitField<0, 16, u32> main_offset;
union { /// Maps input attributes to registers. 4-bits per attribute, specifying a register index
BitField<0, 4, u64> attribute0_register; u32 input_attribute_to_register_map_low;
BitField<4, 4, u64> attribute1_register; u32 input_attribute_to_register_map_high;
BitField<8, 4, u64> attribute2_register;
BitField<12, 4, u64> attribute3_register;
BitField<16, 4, u64> attribute4_register;
BitField<20, 4, u64> attribute5_register;
BitField<24, 4, u64> attribute6_register;
BitField<28, 4, u64> attribute7_register;
BitField<32, 4, u64> attribute8_register;
BitField<36, 4, u64> attribute9_register;
BitField<40, 4, u64> attribute10_register;
BitField<44, 4, u64> attribute11_register;
BitField<48, 4, u64> attribute12_register;
BitField<52, 4, u64> attribute13_register;
BitField<56, 4, u64> attribute14_register;
BitField<60, 4, u64> attribute15_register;
int GetRegisterForAttribute(int attribute_index) const { unsigned int GetRegisterForAttribute(unsigned int attribute_index) const {
u64 fields[] = { u64 map = ((u64)input_attribute_to_register_map_high << 32) |
attribute0_register, attribute1_register, attribute2_register, (u64)input_attribute_to_register_map_low;
attribute3_register, attribute4_register, attribute5_register, return (map >> (attribute_index * 4)) & 0b1111;
attribute6_register, attribute7_register, attribute8_register,
attribute9_register, attribute10_register, attribute11_register,
attribute12_register, attribute13_register, attribute14_register,
attribute15_register,
};
return (int)fields[attribute_index];
} }
} input_register_map;
BitField<0, 16, u32> output_mask; BitField<0, 16, u32> output_mask;

View file

@ -23,7 +23,7 @@ struct State {
Shader::ShaderSetup vs; Shader::ShaderSetup vs;
Shader::ShaderSetup gs; Shader::ShaderSetup gs;
std::array<Math::Vec4<float24>, 16> vs_default_attributes; Shader::AttributeBuffer input_default_attributes;
struct { struct {
union LutEntry { union LutEntry {
@ -66,7 +66,7 @@ struct State {
/// Struct used to describe immediate mode rendering state /// Struct used to describe immediate mode rendering state
struct ImmediateModeState { struct ImmediateModeState {
// Used to buffer partial vertices for immediate-mode rendering. // Used to buffer partial vertices for immediate-mode rendering.
Shader::InputVertex input_vertex; Shader::AttributeBuffer input_vertex;
// Index of the next attribute to be loaded into `input_vertex`. // Index of the next attribute to be loaded into `input_vertex`.
u32 current_attribute = 0; u32 current_attribute = 0;
} immediate; } immediate;

View file

@ -14,7 +14,7 @@ PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topolo
: topology(topology), buffer_index(0) {} : topology(topology), buffer_index(0) {}
template <typename VertexType> template <typename VertexType>
void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,
TriangleHandler triangle_handler) { TriangleHandler triangle_handler) {
switch (topology) { switch (topology) {
// TODO: Figure out what's different with TriangleTopology::Shader. // TODO: Figure out what's different with TriangleTopology::Shader.

View file

@ -15,7 +15,8 @@ namespace Pica {
*/ */
template <typename VertexType> template <typename VertexType>
struct PrimitiveAssembler { struct PrimitiveAssembler {
using TriangleHandler = std::function<void(VertexType& v0, VertexType& v1, VertexType& v2)>; using TriangleHandler =
std::function<void(const VertexType& v0, const VertexType& v1, const VertexType& v2)>;
PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List);
@ -25,7 +26,7 @@ struct PrimitiveAssembler {
* NOTE: We could specify the triangle handler in the constructor, but this way we can * NOTE: We could specify the triangle handler in the constructor, but this way we can
* keep event and handler code next to each other. * keep event and handler code next to each other.
*/ */
void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler); void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler);
/** /**
* Resets the internal state of the PrimitiveAssembler. * Resets the internal state of the PrimitiveAssembler.

View file

@ -308,8 +308,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24
* Helper function for ProcessTriangle with the "reversed" flag to allow for implementing * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
* culling via recursion. * culling via recursion.
*/ */
static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Vertex& v2,
const Shader::OutputVertex& v2, bool reversed = false) { bool reversed = false) {
const auto& regs = g_state.regs; const auto& regs = g_state.regs;
MICROPROFILE_SCOPE(GPU_Rasterization); MICROPROFILE_SCOPE(GPU_Rasterization);
@ -1277,8 +1277,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
} }
} }
void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2) {
const Shader::OutputVertex& v2) {
ProcessTriangleInternal(v0, v1, v2); ProcessTriangleInternal(v0, v1, v2);
} }

View file

@ -4,16 +4,44 @@
#pragma once #pragma once
namespace Pica { #include "video_core/shader/shader.h"
namespace Shader { namespace Pica {
struct OutputVertex;
}
namespace Rasterizer { namespace Rasterizer {
void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, struct Vertex : Shader::OutputVertex {
const Shader::OutputVertex& v2); Vertex(const OutputVertex& v) : OutputVertex(v) {}
// Attributes used to store intermediate results
// position after perspective divide
Math::Vec3<float24> screenpos;
// Linear interpolation
// factor: 0=this, 1=vtx
void Lerp(float24 factor, const Vertex& vtx) {
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
// TODO: Should perform perspective correct interpolation here...
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
}
// Linear interpolation
// factor: 0=v0, 1=v1
static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) {
Vertex ret = v0;
ret.Lerp(factor, v1);
return ret;
}
};
void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2);
} // namespace Rasterizer } // namespace Rasterizer

View file

@ -467,7 +467,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Fragment lighting switches // Fragment lighting switches
case PICA_REG_INDEX(lighting.disable): case PICA_REG_INDEX(lighting.disable):
case PICA_REG_INDEX(lighting.num_lights): case PICA_REG_INDEX(lighting.max_light_index):
case PICA_REG_INDEX(lighting.config0): case PICA_REG_INDEX(lighting.config0):
case PICA_REG_INDEX(lighting.config1): case PICA_REG_INDEX(lighting.config1):
case PICA_REG_INDEX(lighting.abs_lut_input): case PICA_REG_INDEX(lighting.abs_lut_input):

View file

@ -84,7 +84,7 @@ union PicaShaderConfig {
// Fragment lighting // Fragment lighting
state.lighting.enable = !regs.lighting.disable; state.lighting.enable = !regs.lighting.disable;
state.lighting.src_num = regs.lighting.num_lights + 1; state.lighting.src_num = regs.lighting.max_light_index + 1;
for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
unsigned num = regs.lighting.light_enable.GetNum(light_index); unsigned num = regs.lighting.light_enable.GetNum(light_index);

View file

@ -4,6 +4,7 @@
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include "common/bit_set.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/microprofile.h" #include "common/microprofile.h"
#include "video_core/pica.h" #include "video_core/pica.h"
@ -19,38 +20,32 @@ namespace Pica {
namespace Shader { namespace Shader {
OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) {
u32 output_mask) {
// Setup output data // Setup output data
OutputVertex ret; union {
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to OutputVertex ret{};
// figure out what those circumstances are and enable the remaining outputs then. std::array<float24, 24> vertex_slots;
unsigned index = 0; };
for (unsigned i = 0; i < 7; ++i) { static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes.");
if (index >= regs.vs_output_total) unsigned int num_attributes = regs.vs_output_total;
break; ASSERT(num_attributes <= 7);
for (unsigned int i = 0; i < num_attributes; ++i) {
const auto& output_register_map = regs.vs_output_attributes[i];
if ((output_mask & (1 << i)) == 0) Regs::VSOutputAttributes::Semantic semantics[4] = {
continue; output_register_map.map_x, output_register_map.map_y, output_register_map.map_z,
output_register_map.map_w};
const auto& output_register_map = regs.vs_output_attributes[index];
u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
output_register_map.map_z, output_register_map.map_w};
for (unsigned comp = 0; comp < 4; ++comp) { for (unsigned comp = 0; comp < 4; ++comp) {
float24* out = ((float24*)&ret) + semantics[comp]; Regs::VSOutputAttributes::Semantic semantic = semantics[comp];
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { float24* out = &vertex_slots[semantic];
*out = output_regs[i][comp]; if (semantic < vertex_slots.size()) {
} else { *out = input.attr[i][comp];
// Zero output so that attributes which aren't output won't have denormals in them, } else if (semantic != Regs::VSOutputAttributes::INVALID) {
// which would slow us down later. LOG_ERROR(HW_GPU, "Invalid/unknown semantic id: %u", (unsigned int)semantic);
memset(out, 0, sizeof(*out));
} }
} }
index++;
} }
// The hardware takes the absolute and saturates vertex colors like this, *before* doing // The hardware takes the absolute and saturates vertex colors like this, *before* doing
@ -71,12 +66,20 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], co
return ret; return ret;
} }
void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input) {
// Setup input register table const unsigned max_attribute = config.max_input_attribute_index;
const auto& attribute_register_map = g_state.regs.vs.input_register_map;
for (int i = 0; i < num_attributes; i++) for (unsigned attr = 0; attr <= max_attribute; ++attr) {
registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; unsigned reg = config.GetRegisterForAttribute(attr);
registers.input[reg] = input.attr[attr];
}
}
void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) {
unsigned int output_i = 0;
for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
output.attr[output_i++] = registers.output[reg];
}
} }
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));

View file

@ -23,14 +23,11 @@ namespace Pica {
namespace Shader { namespace Shader {
struct InputVertex { struct AttributeBuffer {
alignas(16) Math::Vec4<float24> attr[16]; alignas(16) Math::Vec4<float24> attr[16];
}; };
struct OutputVertex { struct OutputVertex {
OutputVertex() = default;
// VS output attributes
Math::Vec4<float24> pos; Math::Vec4<float24> pos;
Math::Vec4<float24> quat; Math::Vec4<float24> quat;
Math::Vec4<float24> color; Math::Vec4<float24> color;
@ -42,43 +39,22 @@ struct OutputVertex {
INSERT_PADDING_WORDS(1); INSERT_PADDING_WORDS(1);
Math::Vec2<float24> tc2; Math::Vec2<float24> tc2;
// Padding for optimal alignment static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output);
INSERT_PADDING_WORDS(4);
// Attributes used to store intermediate results
// position after perspective divide
Math::Vec3<float24> screenpos;
INSERT_PADDING_WORDS(1);
// Linear interpolation
// factor: 0=this, 1=vtx
void Lerp(float24 factor, const OutputVertex& vtx) {
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
// TODO: Should perform perspective correct interpolation here...
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
}
// Linear interpolation
// factor: 0=v0, 1=v1
static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {
OutputVertex ret = v0;
ret.Lerp(factor, v1);
return ret;
}
static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs,
u32 output_mask);
}; };
#define ASSERT_POS(var, pos) \
static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \
"offset.")
ASSERT_POS(pos, Regs::VSOutputAttributes::POSITION_X);
ASSERT_POS(quat, Regs::VSOutputAttributes::QUATERNION_X);
ASSERT_POS(color, Regs::VSOutputAttributes::COLOR_R);
ASSERT_POS(tc0, Regs::VSOutputAttributes::TEXCOORD0_U);
ASSERT_POS(tc1, Regs::VSOutputAttributes::TEXCOORD1_U);
ASSERT_POS(tc0_w, Regs::VSOutputAttributes::TEXCOORD0_W);
ASSERT_POS(view, Regs::VSOutputAttributes::VIEW_X);
ASSERT_POS(tc2, Regs::VSOutputAttributes::TEXCOORD2_U);
#undef ASSERT_POS
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
/** /**
* This structure contains the state information that needs to be unique for a shader unit. The 3DS * This structure contains the state information that needs to be unique for a shader unit. The 3DS
@ -137,10 +113,12 @@ struct UnitState {
/** /**
* Loads the unit state with an input vertex. * Loads the unit state with an input vertex.
* *
* @param input Input vertex into the shader * @param config Shader configuration registers corresponding to the unit.
* @param num_attributes The number of vertex shader attributes to load * @param input Attribute buffer to load into the input registers.
*/ */
void LoadInputVertex(const InputVertex& input, int num_attributes); void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input);
void WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output);
}; };
struct ShaderSetup { struct ShaderSetup {

View file

@ -668,14 +668,14 @@ void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const {
} }
DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup,
const InputVertex& input, const AttributeBuffer& input,
int num_attributes) const { const Regs::ShaderConfig& config) const {
UnitState state; UnitState state;
DebugData<true> debug_data; DebugData<true> debug_data;
// Setup input register table // Setup input register table
boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
state.LoadInputVertex(input, num_attributes); state.LoadInput(config, input);
RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);
return debug_data; return debug_data;
} }

View file

@ -19,12 +19,11 @@ public:
/** /**
* Produce debug information based on the given shader and input vertex * Produce debug information based on the given shader and input vertex
* @param input Input vertex into the shader * @param input Input vertex into the shader
* @param num_attributes The number of vertex shader attributes
* @param config Configuration object for the shader pipeline * @param config Configuration object for the shader pipeline
* @return Debug information for this shader with regards to the given vertex * @return Debug information for this shader with regards to the given vertex
*/ */
DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input,
int num_attributes) const; const Regs::ShaderConfig& config) const;
}; };
} // namespace } // namespace

View file

@ -70,7 +70,8 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
is_setup = true; is_setup = true;
} }
void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, void VertexLoader::LoadVertex(u32 base_address, int index, int vertex,
Shader::AttributeBuffer& input,
DebugUtils::MemoryAccessTracker& memory_accesses) { DebugUtils::MemoryAccessTracker& memory_accesses) {
ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices.");
@ -142,7 +143,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
} else if (vertex_attribute_is_default[i]) { } else if (vertex_attribute_is_default[i]) {
// Load the default attribute if we're configured to do so // Load the default attribute if we're configured to do so
input.attr[i] = g_state.vs_default_attributes[i]; input.attr[i] = g_state.input_default_attributes.attr[i];
LOG_TRACE(HW_GPU, LOG_TRACE(HW_GPU,
"Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i,
vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),

View file

@ -11,7 +11,7 @@ class MemoryAccessTracker;
} }
namespace Shader { namespace Shader {
struct InputVertex; struct AttributeBuffer;
} }
class VertexLoader { class VertexLoader {
@ -22,7 +22,7 @@ public:
} }
void Setup(const Pica::Regs& regs); void Setup(const Pica::Regs& regs);
void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, void LoadVertex(u32 base_address, int index, int vertex, Shader::AttributeBuffer& input,
DebugUtils::MemoryAccessTracker& memory_accesses); DebugUtils::MemoryAccessTracker& memory_accesses);
int GetNumTotalAttributes() const { int GetNumTotalAttributes() const {