From aad0cbf024fb8077a9b375a093c60a7e2ab1db3d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 9 Nov 2022 17:58:10 +0100 Subject: [PATCH] MacroHLE: Add HLE replacement for base vertex and base instance. --- .../spirv/emit_spirv_context_get_set.cpp | 8 ++ .../backend/spirv/spirv_emit_context.cpp | 10 ++ src/shader_recompiler/environment.h | 5 + .../frontend/ir/attribute.cpp | 4 + src/shader_recompiler/frontend/ir/attribute.h | 4 + .../frontend/ir/ir_emitter.cpp | 8 ++ .../frontend/ir/ir_emitter.h | 2 + .../frontend/maxwell/translate_program.cpp | 2 +- .../ir_opt/constant_propagation_pass.cpp | 45 ++++++- src/shader_recompiler/ir_opt/passes.h | 2 +- src/shader_recompiler/shader_info.h | 5 + src/shader_recompiler/varying_state.h | 2 +- src/video_core/engines/maxwell_3d.cpp | 15 ++- src/video_core/engines/maxwell_3d.h | 17 +++ src/video_core/macro/macro_hle.cpp | 115 +++++++++--------- src/video_core/memory_manager.cpp | 10 +- src/video_core/memory_manager.h | 3 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/shader_environment.cpp | 53 ++++++++ src/video_core/shader_environment.h | 21 ++++ 22 files changed, 265 insertions(+), 70 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 73b67f0af6..e4802bf9e7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -339,6 +339,10 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); } + case IR::Attribute::BaseInstance: + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance)); + case IR::Attribute::BaseVertex: + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); case IR::Attribute::FrontFace: return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits::max())), @@ -380,6 +384,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; return ctx.OpISub(ctx.U32[1], index, base); } + case IR::Attribute::BaseInstance: + return ctx.OpLoad(ctx.U32[1], ctx.base_instance); + case IR::Attribute::BaseVertex: + return ctx.OpLoad(ctx.U32[1], ctx.base_vertex); default: throw NotImplementedException("Read U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 41dc6d0319..563a5fc49a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1379,18 +1379,28 @@ void EmitContext::DefineInputs(const IR::Program& program) { if (loads[IR::Attribute::InstanceId]) { if (profile.support_vertex_instance_id) { instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); + if (loads[IR::Attribute::BaseInstance]) { + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); + } } else { instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } + } else if (loads[IR::Attribute::BaseInstance]) { + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } if (loads[IR::Attribute::VertexId]) { if (profile.support_vertex_instance_id) { vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); + if (loads[IR::Attribute::BaseVertex]) { + base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); + } } else { vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } + } else if (loads[IR::Attribute::BaseVertex]) { + base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } if (loads[IR::Attribute::FrontFace]) { front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 402f2664f6..b9b4455f69 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -34,6 +34,11 @@ public: [[nodiscard]] virtual std::array WorkgroupSize() const = 0; + [[nodiscard]] virtual bool HasHLEMacroState() const = 0; + + [[nodiscard]] virtual std::optional GetReplaceConstBuffer( + u32 bank, u32 offset) = 0; + virtual void Dump(u64 hash) = 0; [[nodiscard]] const ProgramHeader& SPH() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 7d3d882e44..73e189a895 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -446,6 +446,10 @@ std::string NameOf(Attribute attribute) { return "ViewportMask"; case Attribute::FrontFace: return "FrontFace"; + case Attribute::BaseInstance: + return "BaseInstance"; + case Attribute::BaseVertex: + return "BaseVertex"; } return fmt::format("", static_cast(attribute)); } diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 6ee3947b12..364d8a9124 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -219,6 +219,10 @@ enum class Attribute : u64 { FixedFncTexture9Q = 231, ViewportMask = 232, FrontFace = 255, + + // Implementation attributes + BaseInstance = 256, + BaseVertex = 257, }; constexpr size_t NUM_GENERICS = 32; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0cdac0effd..eb2e49a688 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { return Inst(Opcode::GetAttribute, attribute, vertex); } +U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { + return GetAttributeU32(attribute, Imm32(0)); +} + +U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) { + return Inst(Opcode::GetAttributeU32, attribute, vertex); +} + void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { Inst(Opcode::SetAttribute, attribute, value, vertex); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2df992feb6..7aaaa4ab06 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -74,6 +74,8 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); + [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); + [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex); void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 3adbd2b166..ac159d24b9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -219,7 +219,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool #include "common/bit_cast.h" +#include "shader_recompiler/environment.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/value.h" @@ -515,6 +516,8 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { case IR::Attribute::PrimitiveId: case IR::Attribute::InstanceId: case IR::Attribute::VertexId: + case IR::Attribute::BaseVertex: + case IR::Attribute::BaseInstance: break; default: return; @@ -644,7 +647,37 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { } } -void ConstantPropagation(IR::Block& block, IR::Inst& inst) { +void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { + const IR::Value bank{inst.Arg(0)}; + const IR::Value offset{inst.Arg(1)}; + if (!bank.IsImmediate() || !offset.IsImmediate()) { + return; + } + const auto bank_value = bank.U32(); + const auto offset_value = offset.U32(); + auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value); + if (!replacement) { + return; + } + const auto new_attribute = [replacement]() { + switch (*replacement) { + case ReplaceConstant::BaseInstance: + return IR::Attribute::BaseInstance; + case ReplaceConstant::BaseVertex: + return IR::Attribute::BaseVertex; + default: + throw NotImplementedException("Not implemented replacement variable {}", *replacement); + } + }(); + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::GetCbufU32) { + inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute)); + } else { + inst.ReplaceUsesWith(ir.GetAttribute(new_attribute)); + } +} + +void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); @@ -789,18 +822,24 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { IR::Opcode::CompositeInsertF16x4); case IR::Opcode::FSwizzleAdd: return FoldFSwizzleAdd(block, inst); + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU32: + if (env.HasHLEMacroState()) { + return FoldConstBuffer(env, block, inst); + } + break; default: break; } } } // Anonymous namespace -void ConstantPropagationPass(IR::Program& program) { +void ConstantPropagationPass(Environment& env, IR::Program& program) { const auto end{program.post_order_blocks.rend()}; for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { - ConstantPropagation(*block, inst); + ConstantPropagation(env, *block, inst); } } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 11bfe801a2..1f8f2ba95e 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -13,7 +13,7 @@ struct HostTranslateInfo; namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); -void ConstantPropagationPass(IR::Program& program); +void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d9c6e92dbe..ea0f483441 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -16,6 +16,11 @@ namespace Shader { +enum class ReplaceConstant : u32 { + BaseInstance, + BaseVertex, +}; + enum class TextureType : u32 { Color1D, ColorArray1D, diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h index 7b28a285f9..18a9aaf50f 100644 --- a/src/shader_recompiler/varying_state.h +++ b/src/shader_recompiler/varying_state.h @@ -11,7 +11,7 @@ namespace Shader { struct VaryingState { - std::bitset<256> mask{}; + std::bitset<512> mask{}; void Set(IR::Attribute attribute, bool state = true) { mask[static_cast(attribute)] = state; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a0dd7400df..50d8a94b15 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -182,8 +182,14 @@ u32 Maxwell3D::GetMaxCurrentVertices() { size_t Maxwell3D::EstimateIndexBufferSize() { GPUVAddr start_address = regs.index_buffer.StartAddress(); GPUVAddr end_address = regs.index_buffer.EndAddress(); - return std::min(memory_manager.GetMemoryLayoutSize(start_address), - static_cast(end_address - start_address)); + constexpr std::array max_sizes = { + std::numeric_limits::max(), std::numeric_limits::max(), + std::numeric_limits::max(), std::numeric_limits::max()}; + const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); + return std::min( + memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / + byte_size, + static_cast(end_address - start_address)); } u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { @@ -572,4 +578,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } +void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { + const u64 key = (static_cast(bank) << 32) | offset; + replace_table.emplace(key, name); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cfe1e48832..397e88f675 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3020,6 +3020,23 @@ public: /// Store temporary hw register values, used by some calls to restore state after a operation Regs shadow_state; + // None Engine + enum class EngineHint : u32 { + None = 0x0, + OnHLEMacro = 0x1, + }; + + EngineHint engine_state{EngineHint::None}; + + enum class HLEReplaceName : u32 { + BaseVertex = 0x0, + BaseInstance = 0x1, + }; + + void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); + + std::unordered_map replace_table; + static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); static_assert(std::is_trivially_copyable_v, "Maxwell3D Regs must be trivially copyable"); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 93b6d42a47..638247e55d 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -14,26 +14,29 @@ #include "video_core/rasterizer_interface.h" namespace Tegra { + +using Maxwell = Engines::Maxwell3D; + namespace { -bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { +bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) { switch (topology) { - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches: + case Maxwell::Regs::PrimitiveTopology::Points: + case Maxwell::Regs::PrimitiveTopology::Lines: + case Maxwell::Regs::PrimitiveTopology::LineLoop: + case Maxwell::Regs::PrimitiveTopology::LineStrip: + case Maxwell::Regs::PrimitiveTopology::Triangles: + case Maxwell::Regs::PrimitiveTopology::TriangleStrip: + case Maxwell::Regs::PrimitiveTopology::TriangleFan: + case Maxwell::Regs::PrimitiveTopology::LinesAdjacency: + case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency: + case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency: + case Maxwell::Regs::PrimitiveTopology::Patches: return true; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon: + case Maxwell::Regs::PrimitiveTopology::Quads: + case Maxwell::Regs::PrimitiveTopology::QuadStrip: + case Maxwell::Regs::PrimitiveTopology::Polygon: default: return false; } @@ -82,8 +85,7 @@ public: : HLEMacroImpl(maxwell3d_), extended(extended_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - auto topology = - static_cast(parameters[0]); + auto topology = static_cast(parameters[0]); if (!IsTopologySafe(topology)) { Fallback(parameters); return; @@ -99,18 +101,16 @@ public: params.stride = 0; if (extended) { - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, parameters[4], true); + maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; + maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); } maxwell3d.draw_manager->DrawArrayIndirect(topology); if (extended) { - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, 0, true); + maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.replace_table.clear(); } - maxwell3d.regs.vertex_buffer.first = 0; - maxwell3d.regs.vertex_buffer.count = 0; } private: @@ -134,13 +134,18 @@ private: const u32 base_instance = parameters[4]; if (extended) { - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, base_instance, true); + maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; + maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); } maxwell3d.draw_manager->DrawArray( static_cast(parameters[0]), vertex_first, vertex_count, base_instance, instance_count); + + if (extended) { + maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.replace_table.clear(); + } } bool extended; @@ -151,8 +156,7 @@ public: explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - auto topology = - static_cast(parameters[0]); + auto topology = static_cast(parameters[0]); if (!IsTopologySafe(topology)) { Fallback(parameters); return; @@ -164,16 +168,12 @@ public: minimum_limit = std::max(parameters[3], minimum_limit); } const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); - const u32 base_size = std::max(minimum_limit, estimate); - const u32 element_base = parameters[4]; - const u32 base_instance = parameters[5]; - maxwell3d.regs.index_buffer.first = 0; - maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping + const u32 base_size = std::max(minimum_limit, estimate); maxwell3d.regs.draw.topology.Assign(topology); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, element_base, true); - maxwell3d.CallMethod(0x8e5, base_instance, true); + maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; + maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); + maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; params.include_count = false; @@ -184,9 +184,8 @@ public: params.stride = 0; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, 0x0, true); - maxwell3d.CallMethod(0x8e5, 0x0, true); + maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.replace_table.clear(); } private: @@ -197,18 +196,17 @@ private: const u32 base_instance = parameters[5]; maxwell3d.regs.vertex_id_base = element_base; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, element_base, true); - maxwell3d.CallMethod(0x8e5, base_instance, true); + maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; + maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); + maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); maxwell3d.draw_manager->DrawIndex( static_cast(parameters[0]), parameters[3], parameters[1], element_base, base_instance, instance_count); maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, 0x0, true); - maxwell3d.CallMethod(0x8e5, 0x0, true); + maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.replace_table.clear(); } u32 minimum_limit{1 << 18}; @@ -238,8 +236,7 @@ public: : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - const auto topology = - static_cast(parameters[2]); + const auto topology = static_cast(parameters[2]); if (!IsTopologySafe(topology)) { Fallback(parameters); return; @@ -277,9 +274,6 @@ public: } const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); const u32 base_size = std::max(minimum_limit, estimate); - - maxwell3d.regs.index_buffer.first = 0; - maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; @@ -290,7 +284,12 @@ public: params.max_draw_counts = draw_count; params.stride = stride; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); + maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; + maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); + maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); + maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.replace_table.clear(); } private: @@ -299,9 +298,8 @@ private: // Clean everything. // Clean everything. maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, 0x0, true); - maxwell3d.CallMethod(0x8e5, 0x0, true); + maxwell3d.engine_state = Maxwell::EngineHint::None; + maxwell3d.replace_table.clear(); }); maxwell3d.RefreshParameters(); const u32 start_indirect = parameters[0]; @@ -310,8 +308,7 @@ private: // Nothing to do. return; } - const auto topology = - static_cast(parameters[2]); + const auto topology = static_cast(parameters[2]); maxwell3d.regs.draw.topology.Assign(topology); const u32 padding = parameters[3]; const std::size_t max_draws = parameters[4]; @@ -326,9 +323,9 @@ private: const u32 base_vertex = parameters[base + 3]; const u32 base_instance = parameters[base + 4]; maxwell3d.regs.vertex_id_base = base_vertex; - maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, base_vertex, true); - maxwell3d.CallMethod(0x8e5, base_instance, true); + maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; + maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); + maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], base_vertex, base_instance, parameters[base + 1]); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 8f6c510458..11e7d225ec 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -577,7 +577,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { return range_so_far; } -size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { +size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { PTEKind base_kind = GetPageKind(gpu_addr); if (base_kind == PTEKind::INVALID) { return 0; @@ -596,6 +596,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { return true; } range_so_far += copy_amount; + if (range_so_far >= max_size) { + result = true; + return true; + } return false; }; auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -605,6 +609,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { return true; } range_so_far += copy_amount; + if (range_so_far >= max_size) { + result = true; + return true; + } return false; }; auto check_short_pages = [&](std::size_t page_index, std::size_t offset, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 65f6e8134d..ca22520d75 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -118,7 +118,8 @@ public: PTEKind GetPageKind(GPUVAddr gpu_addr) const; - size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const; + size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, + size_t max_size = std::numeric_limits::max()) const; private: template diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index e62b36822f..df229f41b8 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -97,6 +97,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0); alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0); alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0); + app_stage.Assign(maxwell3d.engine_state); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index ab79fb8f36..03bf64b575 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -197,6 +197,7 @@ struct FixedPipelineState { BitField<14, 1, u32> smooth_lines; BitField<15, 1, u32> alpha_to_coverage_enabled; BitField<16, 1, u32> alpha_to_one_enabled; + BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e7262420c9..58b955821e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 8; +constexpr u32 CACHE_VERSION = 9; template auto MakeSpan(Container& container) { diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 9588107473..99d85bfb3e 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -202,12 +202,15 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { const u64 num_texture_types{static_cast(texture_types.size())}; const u64 num_texture_pixel_formats{static_cast(texture_pixel_formats.size())}; const u64 num_cbuf_values{static_cast(cbuf_values.size())}; + const u64 num_cbuf_replacement_values{static_cast(cbuf_replacements.size())}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .write(reinterpret_cast(&num_texture_pixel_formats), sizeof(num_texture_pixel_formats)) .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .write(reinterpret_cast(&num_cbuf_replacement_values), + sizeof(num_cbuf_replacement_values)) .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) @@ -229,6 +232,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + for (const auto& [key, type] : cbuf_replacements) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -318,6 +325,8 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, ASSERT(local_size <= std::numeric_limits::max()); local_memory_size = static_cast(local_size) + sph.common3.shader_local_memory_crs_size; texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; + has_hle_engine_state = + maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro; } u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { @@ -331,6 +340,30 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { return value; } +std::optional GraphicsEnvironment::GetReplaceConstBuffer(u32 bank, + u32 offset) { + if (!has_hle_engine_state) { + return std::nullopt; + } + const u64 key = (static_cast(bank) << 32) | static_cast(offset); + auto it = maxwell3d->replace_table.find(key); + if (it == maxwell3d->replace_table.end()) { + return std::nullopt; + } + const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) { + switch (name) { + case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex: + return Shader::ReplaceConstant::BaseVertex; + case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance: + return Shader::ReplaceConstant::BaseInstance; + default: + UNREACHABLE(); + } + }(it->second); + cbuf_replacements.emplace(key, converted_value); + return converted_value; +} + Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; @@ -409,11 +442,14 @@ void FileEnvironment::Deserialize(std::ifstream& file) { u64 num_texture_types{}; u64 num_texture_pixel_formats{}; u64 num_cbuf_values{}; + u64 num_cbuf_replacement_values{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .read(reinterpret_cast(&num_texture_pixel_formats), sizeof(num_texture_pixel_formats)) .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .read(reinterpret_cast(&num_cbuf_replacement_values), + sizeof(num_cbuf_replacement_values)) .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) @@ -444,6 +480,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast(&value), sizeof(value)); cbuf_values.emplace(key, value); } + for (size_t i = 0; i < num_cbuf_replacement_values; ++i) { + u64 key; + Shader::ReplaceConstant value; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&value), sizeof(value)); + cbuf_replacements.emplace(key, value); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -512,6 +555,16 @@ std::array FileEnvironment::WorkgroupSize() const { return workgroup_size; } +std::optional FileEnvironment::GetReplaceConstBuffer(u32 bank, + u32 offset) { + const u64 key = (static_cast(bank) << 32) | static_cast(offset); + auto it = cbuf_replacements.find(key); + if (it == cbuf_replacements.end()) { + return std::nullopt; + } + return it->second; +} + void SerializePipeline(std::span key, std::span envs, const std::filesystem::path& filename, u32 cache_version) try { std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 1342fab1e9..d75987a52e 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -60,6 +60,10 @@ public: void Serialize(std::ofstream& file) const; + bool HasHLEMacroState() const override { + return has_hle_engine_state; + } + protected: std::optional TryFindSize(); @@ -73,6 +77,7 @@ protected: std::unordered_map texture_types; std::unordered_map texture_pixel_formats; std::unordered_map cbuf_values; + std::unordered_map cbuf_replacements; u32 local_memory_size{}; u32 texture_bound{}; @@ -89,6 +94,7 @@ protected: u32 viewport_transform_state = 1; bool has_unbound_instructions = false; + bool has_hle_engine_state = false; }; class GraphicsEnvironment final : public GenericEnvironment { @@ -109,6 +115,8 @@ public: u32 ReadViewportTransformState() override; + std::optional GetReplaceConstBuffer(u32 bank, u32 offset) override; + private: Tegra::Engines::Maxwell3D* maxwell3d{}; size_t stage_index{}; @@ -131,6 +139,11 @@ public: u32 ReadViewportTransformState() override; + std::optional GetReplaceConstBuffer( + [[maybe_unused]] u32 bank, [[maybe_unused]] u32 offset) override { + return std::nullopt; + } + private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; @@ -166,6 +179,13 @@ public: [[nodiscard]] std::array WorkgroupSize() const override; + [[nodiscard]] std::optional GetReplaceConstBuffer(u32 bank, + u32 offset) override; + + [[nodiscard]] bool HasHLEMacroState() const override { + return cbuf_replacements.size() != 0; + } + void Dump(u64 hash) override; private: @@ -173,6 +193,7 @@ private: std::unordered_map texture_types; std::unordered_map texture_pixel_formats; std::unordered_map cbuf_values; + std::unordered_map cbuf_replacements; std::array workgroup_size{}; u32 local_memory_size{}; u32 shared_memory_size{};