forked from suyu/suyu
fixed_pipeline_cache: Use dirty flags to lazily update key
Use dirty flags to avoid building pipeline key from scratch on each draw call. This saves a bit of unnecesary work on each draw call.
This commit is contained in:
parent
95722823b9
commit
70353649d7
7 changed files with 103 additions and 56 deletions
|
@ -12,14 +12,15 @@
|
|||
#include "common/cityhash.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::size_t POINT = 0;
|
||||
constexpr std::size_t LINE = 1;
|
||||
constexpr std::size_t POLYGON = 2;
|
||||
constexpr size_t POINT = 0;
|
||||
constexpr size_t LINE = 1;
|
||||
constexpr size_t POLYGON = 2;
|
||||
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
|
||||
POINT, // Points
|
||||
LINE, // Lines
|
||||
|
@ -40,10 +41,14 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
|
|||
|
||||
} // Anonymous namespace
|
||||
|
||||
void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
|
||||
const std::array enabled_lut = {regs.polygon_offset_point_enable,
|
||||
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
bool has_extended_dynamic_state) {
|
||||
const Maxwell& regs = maxwell3d.regs;
|
||||
const std::array enabled_lut{
|
||||
regs.polygon_offset_point_enable,
|
||||
regs.polygon_offset_line_enable,
|
||||
regs.polygon_offset_fill_enable};
|
||||
regs.polygon_offset_fill_enable,
|
||||
};
|
||||
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
|
||||
|
||||
raw1 = 0;
|
||||
|
@ -64,18 +69,22 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
|
|||
|
||||
raw2 = 0;
|
||||
const auto test_func =
|
||||
regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
|
||||
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
|
||||
alpha_test_func.Assign(PackComparisonOp(test_func));
|
||||
early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
|
||||
|
||||
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
|
||||
point_size = Common::BitCast<u32>(regs.point_size);
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
binding_divisors[index] =
|
||||
regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0;
|
||||
if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) {
|
||||
maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false;
|
||||
for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
|
||||
binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
|
||||
}
|
||||
|
||||
}
|
||||
if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) {
|
||||
maxwell3d.dirty.flags[Dirty::VertexAttributes] = false;
|
||||
for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
|
||||
const auto& input = regs.vertex_attrib_format[index];
|
||||
auto& attribute = attributes[index];
|
||||
|
@ -85,24 +94,28 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
|
|||
attribute.offset.Assign(input.offset);
|
||||
attribute.type.Assign(static_cast<u32>(input.type.Value()));
|
||||
attribute.size.Assign(static_cast<u32>(input.size.Value()));
|
||||
attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0);
|
||||
}
|
||||
|
||||
for (std::size_t index = 0; index < std::size(attachments); ++index) {
|
||||
attachments[index].Fill(regs, index);
|
||||
}
|
||||
|
||||
if (maxwell3d.dirty.flags[Dirty::Blending]) {
|
||||
maxwell3d.dirty.flags[Dirty::Blending] = false;
|
||||
for (size_t index = 0; index < attachments.size(); ++index) {
|
||||
attachments[index].Refresh(regs, index);
|
||||
}
|
||||
}
|
||||
if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
|
||||
maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
|
||||
const auto& transform = regs.viewport_transform;
|
||||
std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
|
||||
[](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
|
||||
|
||||
std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
|
||||
return static_cast<u16>(viewport.swizzle.raw);
|
||||
});
|
||||
}
|
||||
if (!has_extended_dynamic_state) {
|
||||
no_extended_dynamic_state.Assign(1);
|
||||
dynamic_state.Fill(regs);
|
||||
dynamic_state.Refresh(regs);
|
||||
}
|
||||
}
|
||||
|
||||
void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
|
||||
void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
|
||||
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
|
||||
|
||||
raw = 0;
|
||||
|
@ -141,7 +154,7 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
|
|||
enable.Assign(1);
|
||||
}
|
||||
|
||||
void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
|
||||
void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
|
||||
u32 packed_front_face = PackFrontFace(regs.front_face);
|
||||
if (regs.screen_y_control.triangle_rast_flip != 0) {
|
||||
// Flip front face
|
||||
|
@ -178,9 +191,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
|
|||
});
|
||||
}
|
||||
|
||||
std::size_t FixedPipelineState::Hash() const noexcept {
|
||||
size_t FixedPipelineState::Hash() const noexcept {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
|
||||
return static_cast<std::size_t>(hash);
|
||||
return static_cast<size_t>(hash);
|
||||
}
|
||||
|
||||
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
|
||||
|
|
|
@ -58,7 +58,7 @@ struct FixedPipelineState {
|
|||
BitField<30, 1, u32> enable;
|
||||
};
|
||||
|
||||
void Fill(const Maxwell& regs, std::size_t index);
|
||||
void Refresh(const Maxwell& regs, size_t index);
|
||||
|
||||
constexpr std::array<bool, 4> Mask() const noexcept {
|
||||
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
|
||||
|
@ -96,8 +96,6 @@ struct FixedPipelineState {
|
|||
BitField<6, 14, u32> offset;
|
||||
BitField<20, 3, u32> type;
|
||||
BitField<23, 6, u32> size;
|
||||
// Not really an element of a vertex attribute, but it can be packed here
|
||||
BitField<29, 1, u32> binding_index_enabled;
|
||||
|
||||
constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
|
||||
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
|
||||
|
@ -108,7 +106,7 @@ struct FixedPipelineState {
|
|||
}
|
||||
};
|
||||
|
||||
template <std::size_t Position>
|
||||
template <size_t Position>
|
||||
union StencilFace {
|
||||
BitField<Position + 0, 3, u32> action_stencil_fail;
|
||||
BitField<Position + 3, 3, u32> action_depth_fail;
|
||||
|
@ -152,7 +150,7 @@ struct FixedPipelineState {
|
|||
// Vertex stride is a 12 bits value, we have 4 bits to spare per element
|
||||
std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
|
||||
|
||||
void Fill(const Maxwell& regs);
|
||||
void Refresh(const Maxwell& regs);
|
||||
|
||||
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
|
||||
return UnpackComparisonOp(depth_test_func);
|
||||
|
@ -199,9 +197,9 @@ struct FixedPipelineState {
|
|||
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
|
||||
DynamicState dynamic_state;
|
||||
|
||||
void Fill(const Maxwell& regs, bool has_extended_dynamic_state);
|
||||
void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
|
||||
|
||||
std::size_t Hash() const noexcept;
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const FixedPipelineState& rhs) const noexcept;
|
||||
|
||||
|
@ -209,8 +207,8 @@ struct FixedPipelineState {
|
|||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
std::size_t Size() const noexcept {
|
||||
const std::size_t total_size = sizeof *this;
|
||||
size_t Size() const noexcept {
|
||||
const size_t total_size = sizeof *this;
|
||||
return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
|
||||
}
|
||||
};
|
||||
|
@ -224,7 +222,7 @@ namespace std {
|
|||
|
||||
template <>
|
||||
struct hash<Vulkan::FixedPipelineState> {
|
||||
std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
|
||||
size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -221,9 +221,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
|
|||
std::vector<VkVertexInputBindingDescription> vertex_bindings;
|
||||
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
|
||||
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (state.attributes[index].binding_index_enabled == 0) {
|
||||
continue;
|
||||
}
|
||||
const bool instanced = state.binding_divisors[index] != 0;
|
||||
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
|
||||
vertex_bindings.push_back({
|
||||
|
|
|
@ -267,8 +267,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
|||
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
GraphicsPipelineCacheKey key;
|
||||
key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
|
||||
graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
|
||||
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
|
||||
|
@ -276,14 +275,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
|||
texture_cache.UpdateRenderTargets(false);
|
||||
|
||||
const auto shaders = pipeline_cache.GetShaders();
|
||||
key.shaders = GetShaderAddresses(shaders);
|
||||
graphics_key.shaders = GetShaderAddresses(shaders);
|
||||
|
||||
graphics_key.shaders = GetShaderAddresses(shaders);
|
||||
SetupShaderDescriptors(shaders, is_indexed);
|
||||
|
||||
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
|
||||
key.renderpass = framebuffer->RenderPass();
|
||||
graphics_key.renderpass = framebuffer->RenderPass();
|
||||
|
||||
auto* const pipeline =
|
||||
pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders);
|
||||
VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
|
||||
graphics_key, framebuffer->NumColorBuffers(), async_shaders);
|
||||
if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
|
||||
// Async graphics pipeline was not ready.
|
||||
return;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
@ -173,6 +174,8 @@ private:
|
|||
VKUpdateDescriptorQueue update_descriptor_queue;
|
||||
BlitImageHelper blit_image;
|
||||
|
||||
GraphicsPipelineCacheKey graphics_key;
|
||||
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
|
|
|
@ -18,9 +18,7 @@
|
|||
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace Dirty;
|
||||
using namespace VideoCommon::Dirty;
|
||||
using Tegra::Engines::Maxwell3D;
|
||||
|
@ -128,6 +126,34 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
|
|||
tables[0][OFF(stencil_enable)] = StencilTestEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyBlending(Tables& tables) {
|
||||
tables[0][OFF(color_mask_common)] = Blending;
|
||||
tables[0][OFF(independent_blend_enable)] = Blending;
|
||||
FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
|
||||
FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
|
||||
FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
|
||||
}
|
||||
|
||||
void SetupDirtyInstanceDivisors(Tables& tables) {
|
||||
static constexpr size_t divisor_offset = 3;
|
||||
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
|
||||
tables[0][OFF(instanced_arrays) + index] = InstanceDivisors;
|
||||
tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] =
|
||||
InstanceDivisors;
|
||||
}
|
||||
}
|
||||
|
||||
void SetupDirtyVertexAttributes(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes);
|
||||
}
|
||||
|
||||
void SetupDirtyViewportSwizzles(Tables& tables) {
|
||||
static constexpr size_t swizzle_offset = 6;
|
||||
for (size_t index = 0; index < Regs::NumViewports; ++index) {
|
||||
tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
|
||||
ViewportSwizzles;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
StateTracker::StateTracker(Tegra::GPU& gpu)
|
||||
|
@ -148,6 +174,10 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
|
|||
SetupDirtyFrontFace(tables);
|
||||
SetupDirtyStencilOp(tables);
|
||||
SetupDirtyStencilTestEnable(tables);
|
||||
SetupDirtyBlending(tables);
|
||||
SetupDirtyInstanceDivisors(tables);
|
||||
SetupDirtyVertexAttributes(tables);
|
||||
SetupDirtyViewportSwizzles(tables);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -35,6 +35,11 @@ enum : u8 {
|
|||
StencilOp,
|
||||
StencilTestEnable,
|
||||
|
||||
Blending,
|
||||
InstanceDivisors,
|
||||
VertexAttributes,
|
||||
ViewportSwizzles,
|
||||
|
||||
Last
|
||||
};
|
||||
static_assert(Last <= std::numeric_limits<u8>::max());
|
||||
|
|
Loading…
Reference in a new issue