3
0
Fork 0
forked from suyu/suyu

fixed_pipeline_state: Pack attribute state

Reduce FixedPipelineState's size from 1384 to 664 bytes
This commit is contained in:
ReinUsesLisp 2020-04-17 18:37:27 -03:00
parent 2133482a17
commit ab6704f20c
6 changed files with 85 additions and 101 deletions

View file

@ -1149,7 +1149,7 @@ public:
/// Returns whether the vertex array specified by index is supposed to be /// Returns whether the vertex array specified by index is supposed to be
/// accessed per instance or not. /// accessed per instance or not.
bool IsInstancingEnabled(u32 index) const { bool IsInstancingEnabled(std::size_t index) const {
return is_instanced[index]; return is_instanced[index];
} }
} instanced_arrays; } instanced_arrays;

View file

@ -6,6 +6,7 @@
#include <boost/functional/hash.hpp> #include <boost/functional/hash.hpp>
#include "common/cityhash.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@ -128,25 +129,6 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
} // Anonymous namespace } // Anonymous namespace
std::size_t FixedPipelineState::VertexBinding::Hash() const noexcept {
return (index << stride) ^ divisor;
}
bool FixedPipelineState::VertexBinding::operator==(const VertexBinding& rhs) const noexcept {
return std::tie(index, stride, divisor) == std::tie(rhs.index, rhs.stride, rhs.divisor);
}
std::size_t FixedPipelineState::VertexAttribute::Hash() const noexcept {
return static_cast<std::size_t>(index) ^ (static_cast<std::size_t>(buffer) << 13) ^
(static_cast<std::size_t>(type) << 22) ^ (static_cast<std::size_t>(size) << 31) ^
(static_cast<std::size_t>(offset) << 36);
}
bool FixedPipelineState::VertexAttribute::operator==(const VertexAttribute& rhs) const noexcept {
return std::tie(index, buffer, type, size, offset) ==
std::tie(rhs.index, rhs.buffer, rhs.type, rhs.size, rhs.offset);
}
std::size_t FixedPipelineState::StencilFace::Hash() const noexcept { std::size_t FixedPipelineState::StencilFace::Hash() const noexcept {
return static_cast<std::size_t>(action_stencil_fail) ^ return static_cast<std::size_t>(action_stencil_fail) ^
(static_cast<std::size_t>(action_depth_fail) << 4) ^ (static_cast<std::size_t>(action_depth_fail) << 4) ^
@ -182,21 +164,12 @@ bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment
} }
std::size_t FixedPipelineState::VertexInput::Hash() const noexcept { std::size_t FixedPipelineState::VertexInput::Hash() const noexcept {
std::size_t hash = num_bindings ^ (num_attributes << 32); // TODO(Rodrigo): Replace this
for (std::size_t i = 0; i < num_bindings; ++i) { return Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
boost::hash_combine(hash, bindings[i].Hash());
}
for (std::size_t i = 0; i < num_attributes; ++i) {
boost::hash_combine(hash, attributes[i].Hash());
}
return hash;
} }
bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept { bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept {
return std::equal(bindings.begin(), bindings.begin() + num_bindings, rhs.bindings.begin(), return std::memcmp(this, &rhs, sizeof *this) == 0;
rhs.bindings.begin() + rhs.num_bindings) &&
std::equal(attributes.begin(), attributes.begin() + num_attributes,
rhs.attributes.begin(), rhs.attributes.begin() + rhs.num_attributes);
} }
std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept { std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept {

View file

@ -7,6 +7,7 @@
#include <array> #include <array>
#include <type_traits> #include <type_traits>
#include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
@ -18,48 +19,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
// TODO(Rodrigo): Optimize this structure. // TODO(Rodrigo): Optimize this structure.
template <class T>
inline constexpr bool IsHashable = std::has_unique_object_representations_v<T>&&
std::is_trivially_copyable_v<T>&& std::is_trivially_constructible_v<T>;
struct FixedPipelineState { struct FixedPipelineState {
using PixelFormat = VideoCore::Surface::PixelFormat;
struct VertexBinding {
constexpr VertexBinding(u32 index, u32 stride, u32 divisor)
: index{index}, stride{stride}, divisor{divisor} {}
VertexBinding() = default;
u32 index;
u32 stride;
u32 divisor;
std::size_t Hash() const noexcept;
bool operator==(const VertexBinding& rhs) const noexcept;
bool operator!=(const VertexBinding& rhs) const noexcept {
return !operator==(rhs);
}
};
struct VertexAttribute {
constexpr VertexAttribute(u32 index, u32 buffer, Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size, u32 offset)
: index{index}, buffer{buffer}, type{type}, size{size}, offset{offset} {}
VertexAttribute() = default;
u32 index;
u32 buffer;
Maxwell::VertexAttribute::Type type;
Maxwell::VertexAttribute::Size size;
u32 offset;
std::size_t Hash() const noexcept;
bool operator==(const VertexAttribute& rhs) const noexcept;
bool operator!=(const VertexAttribute& rhs) const noexcept {
return !operator==(rhs);
}
};
struct StencilFace { struct StencilFace {
constexpr StencilFace(Maxwell::StencilOp action_stencil_fail, constexpr StencilFace(Maxwell::StencilOp action_stencil_fail,
Maxwell::StencilOp action_depth_fail, Maxwell::StencilOp action_depth_fail,
@ -114,10 +78,52 @@ struct FixedPipelineState {
}; };
struct VertexInput { struct VertexInput {
std::size_t num_bindings = 0; union Binding {
std::size_t num_attributes = 0; u16 raw;
std::array<VertexBinding, Maxwell::NumVertexArrays> bindings; BitField<0, 1, u16> enabled;
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; BitField<1, 12, u16> stride;
};
union Attribute {
u32 raw;
BitField<0, 1, u32> enabled;
BitField<1, 5, u32> buffer;
BitField<6, 14, u32> offset;
BitField<20, 3, u32> type;
BitField<23, 6, u32> size;
constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
}
constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
}
};
std::array<Binding, Maxwell::NumVertexArrays> bindings;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
std::array<Attribute, Maxwell::NumVertexAttributes> attributes;
void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept {
auto& binding = bindings[index];
binding.raw = 0;
binding.enabled.Assign(enabled ? 1 : 0);
binding.stride.Assign(stride);
binding_divisors[index] = divisor;
}
void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset,
Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size) noexcept {
auto& attribute = attributes[index];
attribute.raw = 0;
attribute.enabled.Assign(enabled ? 1 : 0);
attribute.buffer.Assign(buffer);
attribute.offset.Assign(offset);
attribute.type.Assign(static_cast<u32>(type));
attribute.size.Assign(static_cast<u32>(size));
}
std::size_t Hash() const noexcept; std::size_t Hash() const noexcept;
@ -127,6 +133,7 @@ struct FixedPipelineState {
return !operator==(rhs); return !operator==(rhs);
} }
}; };
static_assert(IsHashable<VertexInput>);
struct InputAssembly { struct InputAssembly {
constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable, constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable,
@ -256,8 +263,6 @@ struct FixedPipelineState {
DepthStencil depth_stencil; DepthStencil depth_stencil;
ColorBlending color_blending; ColorBlending color_blending;
}; };
static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexBinding>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexAttribute>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::StencilFace>); static_assert(std::is_trivially_copyable_v<FixedPipelineState::StencilFace>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::BlendingAttachment>); static_assert(std::is_trivially_copyable_v<FixedPipelineState::BlendingAttachment>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexInput>); static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexInput>);

View file

@ -165,35 +165,41 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
std::vector<VkVertexInputBindingDescription> vertex_bindings; std::vector<VkVertexInputBindingDescription> vertex_bindings;
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
for (std::size_t i = 0; i < vi.num_bindings; ++i) { for (std::size_t index = 0; index < std::size(vi.bindings); ++index) {
const auto& binding = vi.bindings[i]; const auto& binding = vi.bindings[index];
const bool instanced = binding.divisor != 0; if (!binding.enabled) {
continue;
}
const bool instanced = vi.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
auto& vertex_binding = vertex_bindings.emplace_back(); auto& vertex_binding = vertex_bindings.emplace_back();
vertex_binding.binding = binding.index; vertex_binding.binding = static_cast<u32>(index);
vertex_binding.stride = binding.stride; vertex_binding.stride = binding.stride;
vertex_binding.inputRate = rate; vertex_binding.inputRate = rate;
if (instanced) { if (instanced) {
auto& binding_divisor = vertex_binding_divisors.emplace_back(); auto& binding_divisor = vertex_binding_divisors.emplace_back();
binding_divisor.binding = binding.index; binding_divisor.binding = static_cast<u32>(index);
binding_divisor.divisor = binding.divisor; binding_divisor.divisor = vi.binding_divisors[index];
} }
} }
std::vector<VkVertexInputAttributeDescription> vertex_attributes; std::vector<VkVertexInputAttributeDescription> vertex_attributes;
const auto& input_attributes = program[0]->entries.attributes; const auto& input_attributes = program[0]->entries.attributes;
for (std::size_t i = 0; i < vi.num_attributes; ++i) { for (std::size_t index = 0; index < std::size(vi.attributes); ++index) {
const auto& attribute = vi.attributes[i]; const auto& attribute = vi.attributes[index];
if (input_attributes.find(attribute.index) == input_attributes.end()) { if (!attribute.enabled) {
continue;
}
if (input_attributes.find(static_cast<u32>(index)) == input_attributes.end()) {
// Skip attributes not used by the vertex shaders. // Skip attributes not used by the vertex shaders.
continue; continue;
} }
auto& vertex_attribute = vertex_attributes.emplace_back(); auto& vertex_attribute = vertex_attributes.emplace_back();
vertex_attribute.location = attribute.index; vertex_attribute.location = static_cast<u32>(index);
vertex_attribute.binding = attribute.buffer; vertex_attribute.binding = attribute.buffer;
vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.type, attribute.size); vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size());
vertex_attribute.offset = attribute.offset; vertex_attribute.offset = attribute.offset;
} }

View file

@ -334,7 +334,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
specialization.point_size = fixed_state.input_assembly.point_size; specialization.point_size = fixed_state.input_assembly.point_size;
} }
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();
} }
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;

View file

@ -806,25 +806,29 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
BufferBindings& buffer_bindings) { BufferBindings& buffer_bindings) {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = system.GPU().Maxwell3D().regs;
for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) { for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
const auto& attrib = regs.vertex_attrib_format[index]; const auto& attrib = regs.vertex_attrib_format[index];
if (!attrib.IsValid()) { if (!attrib.IsValid()) {
vertex_input.SetAttribute(index, false, 0, 0, {}, {});
continue; continue;
} }
const auto& buffer = regs.vertex_array[attrib.buffer]; [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
ASSERT(buffer.IsEnabled()); ASSERT(buffer.IsEnabled());
vertex_input.attributes[vertex_input.num_attributes++] = vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size, attrib.size.Value());
attrib.offset);
} }
for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) { for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const auto& vertex_array = regs.vertex_array[index]; const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled()) { if (!vertex_array.IsEnabled()) {
vertex_input.SetBinding(index, false, 0, 0);
continue; continue;
} }
vertex_input.SetBinding(
index, true, vertex_array.stride,
regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
const GPUVAddr start{vertex_array.StartAddress()}; const GPUVAddr start{vertex_array.StartAddress()};
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
@ -832,10 +836,6 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
ASSERT(end > start); ASSERT(end > start);
const std::size_t size{end - start + 1}; const std::size_t size{end - start + 1};
const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding(
index, vertex_array.stride,
regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
buffer_bindings.AddVertexBinding(buffer, offset); buffer_bindings.AddVertexBinding(buffer, offset);
} }
} }