From 3217400dd1c42cf04c4bf5b25bfc07e67e20773a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Jul 2019 01:21:14 -0300 Subject: [PATCH 01/11] gl_resource_manager: Add managed query class --- .../renderer_opengl/gl_resource_manager.cpp | 17 +++++++++++++ .../renderer_opengl/gl_resource_manager.h | 25 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 5c96c1d462..f0ddfb2764 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -207,4 +207,21 @@ void OGLFramebuffer::Release() { handle = 0; } +void OGLQuery::Create(GLenum target) { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glCreateQueries(target, 1, &handle); +} + +void OGLQuery::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteQueries(1, &handle); + handle = 0; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 3a85a1d4c1..514d1d165f 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -266,4 +266,29 @@ public: GLuint handle = 0; }; +class OGLQuery : private NonCopyable { +public: + OGLQuery() = default; + + OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLQuery() { + Release(); + } + + OGLQuery& operator=(OGLQuery&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create(GLenum target); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + } // namespace OpenGL From 2b58652f0897053d4da04deb586490220ab5a774 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Jul 2019 19:40:10 -0300 Subject: [PATCH 02/11] maxwell_3d: Slow implementation of passed samples (query 21) Implements GL_SAMPLES_PASSED by waiting immediately for queries. --- src/video_core/CMakeLists.txt | 2 + src/video_core/engines/maxwell_3d.cpp | 39 ++++++++---- src/video_core/engines/maxwell_3d.h | 38 ++++++++++-- src/video_core/rasterizer_interface.h | 10 ++++ .../renderer_opengl/gl_query_cache.cpp | 59 +++++++++++++++++++ .../renderer_opengl/gl_query_cache.h | 41 +++++++++++++ .../renderer_opengl/gl_rasterizer.cpp | 24 ++++++++ .../renderer_opengl/gl_rasterizer.h | 5 ++ 8 files changed, 201 insertions(+), 17 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_query_cache.cpp create mode 100644 src/video_core/renderer_opengl/gl_query_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index db9332d003..3208f4993d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -74,6 +74,8 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h + renderer_opengl/gl_query_cache.cpp + renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 0b3e8749b8..fe91ff6a04 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -400,6 +400,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessQueryCondition(); break; } + case MAXWELL3D_REG_INDEX(counter_reset): { + ProcessCounterReset(); + break; + } case MAXWELL3D_REG_INDEX(sync_info): { ProcessSyncPoint(); break; @@ -544,23 +548,23 @@ void Maxwell3D::ProcessQueryGet() { "Units other than CROP are unimplemented"); switch (regs.query.query_get.operation) { - case Regs::QueryOperation::Release: { - const u64 result = regs.query.query_sequence; - StampQueryResult(result, regs.query.query_get.short_query == 0); + case Regs::QueryOperation::Release: + StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); break; - } - case Regs::QueryOperation::Acquire: { - // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU - // to write a value that matches the current payload. + case Regs::QueryOperation::Acquire: + // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that + // matches the current payload. UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); break; - } case Regs::QueryOperation::Counter: { - u64 result{}; + u64 result; switch (regs.query.query_get.select) { case Regs::QuerySelect::Zero: result = 0; break; + case Regs::QuerySelect::SamplesPassed: + result = rasterizer.Query(VideoCore::QueryType::SamplesPassed); + break; default: result = 1; UNIMPLEMENTED_MSG("Unimplemented query select type {}", @@ -569,15 +573,13 @@ void Maxwell3D::ProcessQueryGet() { StampQueryResult(result, regs.query.query_get.short_query == 0); break; } - case Regs::QueryOperation::Trap: { + case Regs::QueryOperation::Trap: UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); break; - } - default: { + default: UNIMPLEMENTED_MSG("Unknown query operation"); break; } - } } void Maxwell3D::ProcessQueryCondition() { @@ -619,6 +621,17 @@ void Maxwell3D::ProcessQueryCondition() { } } +void Maxwell3D::ProcessCounterReset() { + switch (regs.counter_reset) { + case Regs::CounterReset::SampleCnt: + rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed); + break; + default: + UNIMPLEMENTED_MSG("counter_reset={}", static_cast(regs.counter_reset)); + break; + } +} + void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); const u32 increment = regs.sync_info.increment.Value(); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0a2af54e50..d21f678ed0 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -409,6 +409,27 @@ public: Linear = 1, }; + enum class CounterReset : u32 { + SampleCnt = 0x01, + Unk02 = 0x02, + Unk03 = 0x03, + Unk04 = 0x04, + EmittedPrimitives = 0x10, // Not tested + Unk11 = 0x11, + Unk12 = 0x12, + Unk13 = 0x13, + Unk15 = 0x15, + Unk16 = 0x16, + Unk17 = 0x17, + Unk18 = 0x18, + Unk1A = 0x1A, + Unk1B = 0x1B, + Unk1C = 0x1C, + Unk1D = 0x1D, + Unk1E = 0x1E, + GeneratedPrimitives = 0x1F, + }; + struct Cull { enum class FrontFace : u32 { ClockWise = 0x0900, @@ -857,7 +878,7 @@ public: BitField<7, 1, u32> c7; } clip_distance_enabled; - INSERT_UNION_PADDING_WORDS(0x1); + u32 samplecnt_enable; float point_size; @@ -865,7 +886,11 @@ public: u32 point_sprite_enable; - INSERT_UNION_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x3); + + CounterReset counter_reset; + + INSERT_UNION_PADDING_WORDS(0x1); u32 zeta_enable; @@ -1412,12 +1437,15 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); - // Writes the query result accordingly + /// Writes the query result accordingly. void StampQueryResult(u64 payload, bool long_query); - // Handles Conditional Rendering + /// Handles conditional rendering. void ProcessQueryCondition(); + /// Handles counter resets. + void ProcessCounterReset(); + /// Handles writes to syncing register. void ProcessSyncPoint(); @@ -1499,8 +1527,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); +ASSERT_REG_POSITION(samplecnt_enable, 0x545); ASSERT_REG_POSITION(point_size, 0x546); ASSERT_REG_POSITION(point_sprite_enable, 0x548); +ASSERT_REG_POSITION(counter_reset, 0x54C); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index c586cd6fec..2fc6275398 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -17,6 +17,10 @@ class MemoryManager; namespace VideoCore { +enum class QueryType { + SamplesPassed, +}; + enum class LoadCallbackStage { Prepare, Decompile, @@ -41,6 +45,12 @@ public: /// Dispatches a compute shader invocation virtual void DispatchCompute(GPUVAddr code_addr) = 0; + /// Resets the counter of a query + virtual void ResetCounter(QueryType type) = 0; + + /// Returns the value of a GPU query + virtual u64 Query(QueryType type) = 0; + /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp new file mode 100644 index 0000000000..1c7dc999ad --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -0,0 +1,59 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "video_core/renderer_opengl/gl_query_cache.h" + +namespace OpenGL { + +HostCounter::HostCounter(GLenum target) { + query.Create(target); +} + +HostCounter::~HostCounter() = default; + +void HostCounter::UpdateState(bool enabled) { + if (enabled) { + Enable(); + } else { + Disable(); + } +} + +void HostCounter::Reset() { + counter = 0; + Disable(); +} + +u64 HostCounter::Query() { + if (!is_beginned) { + return counter; + } + Disable(); + u64 value; + glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); + Enable(); + + counter += value; + return counter; +} + +void HostCounter::Enable() { + if (is_beginned) { + return; + } + is_beginned = true; + glBeginQuery(GL_SAMPLES_PASSED, query.handle); +} + +void HostCounter::Disable() { + if (!is_beginned) { + return; + } + glEndQuery(GL_SAMPLES_PASSED); + is_beginned = false; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h new file mode 100644 index 0000000000..52c6546bf9 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -0,0 +1,41 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class HostCounter final { +public: + explicit HostCounter(GLenum target); + ~HostCounter(); + + /// Enables or disables the counter as required. + void UpdateState(bool enabled); + + /// Resets the counter disabling it if needed. + void Reset(); + + /// Returns the current value of the query. + /// @note It may harm precision of future queries if the counter is not disabled. + u64 Query(); + +private: + /// Enables the counter when disabled. + void Enable(); + + /// Disables the counter when enabled. + void Disable(); + + OGLQuery query; ///< OpenGL query. + u64 counter{}; ///< Added values of the counter. + bool is_beginned{}; ///< True when the OpenGL query is beginned. +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b0eb14c8b5..8d132732a1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -547,6 +547,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); + const auto& regs = gpu.regs; + samples_passed.UpdateState(regs.samplecnt_enable); + SyncRasterizeEnable(state); SyncColorMask(); SyncFragmentColorClampState(); @@ -709,6 +712,27 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); } +void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { + switch (type) { + case VideoCore::QueryType::SamplesPassed: + samples_passed.Reset(); + break; + default: + UNIMPLEMENTED_MSG("type={}", static_cast(type)); + break; + } +} + +u64 RasterizerOpenGL::Query(VideoCore::QueryType type) { + switch (type) { + case VideoCore::QueryType::SamplesPassed: + return samples_passed.Query(); + default: + UNIMPLEMENTED_MSG("type={}", static_cast(type)); + return 1; + } +} + void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 0501f38289..32bcaf8c2e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -61,6 +62,8 @@ public: bool DrawMultiBatch(bool is_indexed) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + u64 Query(VideoCore::QueryType type) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -221,6 +224,8 @@ private: GLintptr SetupIndexBuffer(); void SetupShaders(GLenum primitive_mode); + + HostCounter samples_passed{GL_SAMPLES_PASSED}; }; } // namespace OpenGL From fe1238be7a14b98c1698b5f8398b0efe83ade43a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 26 Nov 2019 18:30:21 -0300 Subject: [PATCH 03/11] gl_rasterizer: Add queued commands counter Keep track of the queued OpenGL commands that can signal a fence if waited on. As a side effect, we avoid calls to glFlush when no commands are queued. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 +++++++++++++ src/video_core/renderer_opengl/gl_rasterizer.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8d132732a1..652db705b4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -541,6 +541,8 @@ void RasterizerOpenGL::Clear() { } else if (use_stencil) { glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); } + + ++num_queued_commands; } void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { @@ -641,6 +643,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { glTextureBarrier(); } + ++num_queued_commands; + const GLuint base_instance = static_cast(gpu.regs.vb_base_instance); const GLsizei num_instances = static_cast(is_instanced ? gpu.mme_draw.instance_count : 1); @@ -710,6 +714,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { state.ApplyProgramPipeline(); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); + ++num_queued_commands; } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -762,10 +767,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { } void RasterizerOpenGL::FlushCommands() { + // Only flush when we have commands queued to OpenGL. + if (num_queued_commands == 0) { + return; + } + num_queued_commands = 0; glFlush(); } void RasterizerOpenGL::TickFrame() { + // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. + num_queued_commands = 0; + buffer_cache.TickFrame(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 32bcaf8c2e..a9218db228 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -226,6 +226,9 @@ private: void SetupShaders(GLenum primitive_mode); HostCounter samples_passed{GL_SAMPLES_PASSED}; + + /// Number of commands queued to the OpenGL driver. Reseted on flush. + std::size_t num_queued_commands = 0; }; } // namespace OpenGL From ef9920e164e208f03488ed9593ecffeaf4bcac3e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 26 Nov 2019 18:33:30 -0300 Subject: [PATCH 04/11] gl_rasterizer: Sort method declarations --- .../renderer_opengl/gl_rasterizer.h | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a9218db228..857a6c073b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -183,10 +183,23 @@ private: /// Syncs the alpha test state to match the guest state void SyncAlphaTest(); - /// Check for extension that are not strictly required - /// but are needed for correct emulation + /// Check for extension that are not strictly required but are needed for correct emulation void CheckExtensions(); + std::size_t CalculateVertexArraysSize() const; + + std::size_t CalculateIndexBufferSize() const; + + /// Updates and returns a vertex array object representing current vertex format + GLuint SetupVertexFormat(); + + void SetupVertexBuffer(GLuint vao); + void SetupVertexInstances(GLuint vao); + + GLintptr SetupIndexBuffer(); + + void SetupShaders(GLenum primitive_mode); + const Device device; OpenGLState state; @@ -211,20 +224,6 @@ private: BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; - std::size_t CalculateVertexArraysSize() const; - - std::size_t CalculateIndexBufferSize() const; - - /// Updates and returns a vertex array object representing current vertex format - GLuint SetupVertexFormat(); - - void SetupVertexBuffer(GLuint vao); - void SetupVertexInstances(GLuint vao); - - GLintptr SetupIndexBuffer(); - - void SetupShaders(GLenum primitive_mode); - HostCounter samples_passed{GL_SAMPLES_PASSED}; /// Number of commands queued to the OpenGL driver. Reseted on flush. From aae8c180cbbf91ba12f53c37e81a97d4b3cc4ccd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 26 Nov 2019 18:52:15 -0300 Subject: [PATCH 05/11] gl_query_cache: Implement host queries using a deferred cache Instead of waiting immediately for executed commands, defer the query until the guest CPU reads it. This way we get closer to what the guest program is doing. To archive this we have to build a dependency queue, because host APIs (like OpenGL and Vulkan) use ranged queries instead of counters like NVN. Waiting for queries implicitly uses fences and this requires a command being queued, otherwise the driver will lock waiting until a timeout. To fix this when there are no commands queued, we explicitly call glFlush. --- src/video_core/engines/maxwell_3d.cpp | 41 ++-- src/video_core/engines/maxwell_3d.h | 4 + src/video_core/rasterizer_interface.h | 5 +- .../renderer_opengl/gl_query_cache.cpp | 201 +++++++++++++++--- .../renderer_opengl/gl_query_cache.h | 123 +++++++++-- .../renderer_opengl/gl_rasterizer.cpp | 30 +-- .../renderer_opengl/gl_rasterizer.h | 10 +- 7 files changed, 328 insertions(+), 86 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fe91ff6a04..9add2bc94a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -556,23 +556,13 @@ void Maxwell3D::ProcessQueryGet() { // matches the current payload. UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); break; - case Regs::QueryOperation::Counter: { - u64 result; - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - result = 0; - break; - case Regs::QuerySelect::SamplesPassed: - result = rasterizer.Query(VideoCore::QueryType::SamplesPassed); - break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast(regs.query.query_get.select.Value())); + case Regs::QueryOperation::Counter: + if (const std::optional result = GetQueryResult()) { + // If the query returns an empty optional it means it's cached and deferred. + // In this case we have a non-empty result, so we stamp it immediately. + StampQueryResult(*result, regs.query.query_get.short_query == 0); } - StampQueryResult(result, regs.query.query_get.short_query == 0); break; - } case Regs::QueryOperation::Trap: UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); break; @@ -595,20 +585,20 @@ void Maxwell3D::ProcessQueryCondition() { } case Regs::ConditionMode::ResNonZero: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; break; } case Regs::ConditionMode::Equal: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; break; } case Regs::ConditionMode::NotEqual: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; break; @@ -674,6 +664,21 @@ void Maxwell3D::DrawArrays() { } } +std::optional Maxwell3D::GetQueryResult() { + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + return 0; + case Regs::QuerySelect::SamplesPassed: + // Deferred. + rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed); + return {}; + default: + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast(regs.query.query_get.select.Value())); + return 1; + } +} + void Maxwell3D::ProcessCBBind(std::size_t stage_index) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. auto& shader = state.shader_stages[stage_index]; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d21f678ed0..26939be3f5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -1462,6 +1463,9 @@ private: // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); + + /// Returns a query's value or an empty object if the value will be deferred through a cache. + std::optional GetQueryResult(); }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 2fc6275398..a394f2d3e0 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -20,6 +20,7 @@ namespace VideoCore { enum class QueryType { SamplesPassed, }; +constexpr std::size_t NumQueryTypes = 1; enum class LoadCallbackStage { Prepare, @@ -48,8 +49,8 @@ public: /// Resets the counter of a query virtual void ResetCounter(QueryType type) = 0; - /// Returns the value of a GPU query - virtual u64 Query(QueryType type) = 0; + /// Records a GPU query and caches it + virtual void Query(GPUVAddr gpu_addr, QueryType type) = 0; /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 1c7dc999ad..8f0e8241d6 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -2,58 +2,203 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include +#include +#include + #include +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_query_cache.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" namespace OpenGL { -HostCounter::HostCounter(GLenum target) { - query.Create(target); +using VideoCore::QueryType; + +namespace { + +constexpr std::array QueryTargets = {GL_SAMPLES_PASSED}; + +constexpr GLenum GetTarget(QueryType type) { + return QueryTargets[static_cast(type)]; } -HostCounter::~HostCounter() = default; +} // Anonymous namespace -void HostCounter::UpdateState(bool enabled) { +CounterStream::CounterStream(QueryCache& cache, QueryType type) + : cache{cache}, type{type}, target{GetTarget(type)} {} + +CounterStream::~CounterStream() = default; + +void CounterStream::Update(bool enabled, bool any_command_queued) { if (enabled) { - Enable(); - } else { - Disable(); + if (!current) { + current = cache.GetHostCounter(last, type); + } + return; } + + if (current) { + EndQuery(any_command_queued); + } + last = std::exchange(current, nullptr); } -void HostCounter::Reset() { - counter = 0; - Disable(); +void CounterStream::Reset(bool any_command_queued) { + if (current) { + EndQuery(any_command_queued); + } + current = nullptr; + last = nullptr; } -u64 HostCounter::Query() { - if (!is_beginned) { - return counter; +std::shared_ptr CounterStream::GetCurrent(bool any_command_queued) { + if (!current) { + return nullptr; } - Disable(); - u64 value; - glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); - Enable(); + EndQuery(any_command_queued); + last = std::move(current); + current = cache.GetHostCounter(last, type); + return last; +} - counter += value; +void CounterStream::EndQuery(bool any_command_queued) { + if (!any_command_queued) { + // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not + // having any of these causes a lock. glFlush is considered a command, so we can safely wait + // for this. Insert to the OpenGL command stream a flush. + glFlush(); + } + glEndQuery(target); +} + +QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer) + : RasterizerCache{rasterizer}, system{system}, + rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {} + +QueryCache::~QueryCache() = default; + +void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) { + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + + auto query = TryGet(host_ptr); + if (!query) { + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + + query = std::make_shared(type, *cpu_addr, host_ptr); + Register(query); + } + + query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued())); + query->MarkAsModified(true, *this); +} + +void QueryCache::UpdateCounters() { + auto& samples_passed = GetStream(QueryType::SamplesPassed); + + const auto& regs = system.GPU().Maxwell3D().regs; + samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued()); +} + +void QueryCache::ResetCounter(QueryType type) { + GetStream(type).Reset(rasterizer.AnyCommandQueued()); +} + +void QueryCache::Reserve(QueryType type, OGLQuery&& query) { + reserved_queries[static_cast(type)].push_back(std::move(query)); +} + +std::shared_ptr QueryCache::GetHostCounter(std::shared_ptr dependency, + QueryType type) { + const auto type_index = static_cast(type); + auto& reserve = reserved_queries[type_index]; + + if (reserve.empty()) { + return std::make_shared(*this, std::move(dependency), type); + } + + auto counter = std::make_shared(*this, std::move(dependency), type, + std::move(reserve.back())); + reserve.pop_back(); return counter; } -void HostCounter::Enable() { - if (is_beginned) { - return; +void QueryCache::FlushObjectInner(const std::shared_ptr& counter_) { + auto& counter = *counter_; + auto& stream = GetStream(counter.GetType()); + + // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. + // To avoid this disable and re-enable keeping the dependency stream. + const bool is_enabled = stream.IsEnabled(); + if (is_enabled) { + stream.Update(false, false); + } + + counter.Flush(); + + if (is_enabled) { + stream.Update(true, false); } - is_beginned = true; - glBeginQuery(GL_SAMPLES_PASSED, query.handle); } -void HostCounter::Disable() { - if (!is_beginned) { - return; +CounterStream& QueryCache::GetStream(QueryType type) { + return streams[static_cast(type)]; +} + +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr dependency, QueryType type) + : cache{cache}, type{type}, dependency{std::move(dependency)} { + const GLenum target = GetTarget(type); + query.Create(target); + glBeginQuery(target, query.handle); +} + +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr dependency, QueryType type, + OGLQuery&& query_) + : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { + glBeginQuery(GetTarget(type), query.handle); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, std::move(query)); +} + +u64 HostCounter::Query() { + if (query.handle == 0) { + return result; } - glEndQuery(GL_SAMPLES_PASSED); - is_beginned = false; + + glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result); + + if (dependency) { + result += dependency->Query(); + } + + return result; +} + +CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + +CachedQuery::~CachedQuery() = default; + +void CachedQuery::Flush() { + const u64 value = counter->Query(); + std::memcpy(host_ptr, &value, sizeof(value)); +} + +void CachedQuery::SetCounter(std::shared_ptr counter_) { + counter = std::move(counter_); +} + +QueryType CachedQuery::GetType() const { + return type; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 52c6546bf9..91594b1203 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -4,38 +4,131 @@ #pragma once +#include +#include +#include +#include + #include #include "common/common_types.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +namespace Core { +class System; +} + namespace OpenGL { +class CachedQuery; +class HostCounter; +class RasterizerOpenGL; +class QueryCache; + +class CounterStream final { +public: + explicit CounterStream(QueryCache& cache, VideoCore::QueryType type); + ~CounterStream(); + + void Update(bool enabled, bool any_command_queued); + + void Reset(bool any_command_queued); + + std::shared_ptr GetCurrent(bool any_command_queued); + + bool IsEnabled() const { + return current != nullptr; + } + +private: + void EndQuery(bool any_command_queued); + + QueryCache& cache; + + std::shared_ptr current; + std::shared_ptr last; + VideoCore::QueryType type; + GLenum target; +}; + +class QueryCache final : public RasterizerCache> { +public: + explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); + ~QueryCache(); + + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type); + + void UpdateCounters(); + + void ResetCounter(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, OGLQuery&& query); + + std::shared_ptr GetHostCounter(std::shared_ptr dependency, + VideoCore::QueryType type); + +protected: + void FlushObjectInner(const std::shared_ptr& counter) override; + +private: + CounterStream& GetStream(VideoCore::QueryType type); + + Core::System& system; + RasterizerOpenGL& rasterizer; + + std::array streams; + std::array, VideoCore::NumQueryTypes> reserved_queries; +}; + class HostCounter final { public: - explicit HostCounter(GLenum target); + explicit HostCounter(QueryCache& cache, std::shared_ptr dependency, + VideoCore::QueryType type); + explicit HostCounter(QueryCache& cache, std::shared_ptr dependency, + VideoCore::QueryType type, OGLQuery&& query); ~HostCounter(); - /// Enables or disables the counter as required. - void UpdateState(bool enabled); - - /// Resets the counter disabling it if needed. - void Reset(); - /// Returns the current value of the query. - /// @note It may harm precision of future queries if the counter is not disabled. u64 Query(); private: - /// Enables the counter when disabled. - void Enable(); + QueryCache& cache; + VideoCore::QueryType type; - /// Disables the counter when enabled. - void Disable(); + std::shared_ptr dependency; ///< Counter queued before this one. + OGLQuery query; ///< OpenGL query. + u64 result; ///< Added values of the counter. +}; - OGLQuery query; ///< OpenGL query. - u64 counter{}; ///< Added values of the counter. - bool is_beginned{}; ///< True when the OpenGL query is beginned. +class CachedQuery final : public RasterizerCacheObject { +public: + explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); + ~CachedQuery(); + + /// Writes the counter value to host memory. + void Flush(); + + /// Updates the counter this cached query registered in guest memory will write when requested. + void SetCounter(std::shared_ptr counter); + + /// Returns the query type. + VideoCore::QueryType GetType() const; + + VAddr GetCpuAddr() const override { + return cpu_addr; + } + + std::size_t GetSizeInBytes() const override { + return sizeof(u64); + } + +private: + VideoCore::QueryType type; + VAddr cpu_addr; ///< Guest CPU address. + u8* host_ptr; ///< Writable host pointer. + std::shared_ptr counter; ///< Host counter to query, owns the dependency tree. }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 652db705b4..827f858849 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,6 +25,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, - shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, - buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { + shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, + screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { shader_program_manager = std::make_unique(); state.draw.shader_program = 0; state.Apply(); @@ -548,9 +549,9 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; - samples_passed.UpdateState(regs.samplecnt_enable); + + query_cache.UpdateCounters(); SyncRasterizeEnable(state); SyncColorMask(); @@ -718,24 +719,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { - switch (type) { - case VideoCore::QueryType::SamplesPassed: - samples_passed.Reset(); - break; - default: - UNIMPLEMENTED_MSG("type={}", static_cast(type)); - break; - } + query_cache.ResetCounter(type); } -u64 RasterizerOpenGL::Query(VideoCore::QueryType type) { - switch (type) { - case VideoCore::QueryType::SamplesPassed: - return samples_passed.Query(); - default: - UNIMPLEMENTED_MSG("type={}", static_cast(type)); - return 1; - } +void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) { + query_cache.Query(gpu_addr, type); } void RasterizerOpenGL::FlushAll() {} @@ -747,6 +735,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -757,6 +746,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 857a6c073b..4fb6811a7b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -63,7 +63,7 @@ public: void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; void ResetCounter(VideoCore::QueryType type) override; - u64 Query(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -78,6 +78,11 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; + /// Returns true when there are commands queued to the OpenGL server. + bool AnyCommandQueued() const { + return num_queued_commands > 0; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -207,6 +212,7 @@ private: ShaderCacheOpenGL shader_cache; SamplerCacheOpenGL sampler_cache; FramebufferCacheOpenGL framebuffer_cache; + QueryCache query_cache; Core::System& system; ScreenInfo& screen_info; @@ -224,8 +230,6 @@ private: BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; - HostCounter samples_passed{GL_SAMPLES_PASSED}; - /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; }; From 73d2d3342dc8867d32f08f89b2ca36ff071598dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 28 Nov 2019 02:15:34 -0300 Subject: [PATCH 06/11] gl_query_cache: Optimize query cache Use a custom cache instead of relying on a ranged cache. --- src/video_core/engines/maxwell_3d.cpp | 11 +- src/video_core/rasterizer_interface.h | 3 +- .../renderer_opengl/gl_query_cache.cpp | 214 +++++++++++++----- .../renderer_opengl/gl_query_cache.h | 61 +++-- .../renderer_opengl/gl_rasterizer.cpp | 5 +- .../renderer_opengl/gl_rasterizer.h | 2 +- 6 files changed, 217 insertions(+), 79 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9add2bc94a..842cdcbcf7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,6 +4,7 @@ #include #include +#include #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" @@ -16,6 +17,8 @@ namespace Tegra::Engines { +using VideoCore::QueryType; + /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -614,10 +617,11 @@ void Maxwell3D::ProcessQueryCondition() { void Maxwell3D::ProcessCounterReset() { switch (regs.counter_reset) { case Regs::CounterReset::SampleCnt: - rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed); + rasterizer.ResetCounter(QueryType::SamplesPassed); break; default: - UNIMPLEMENTED_MSG("counter_reset={}", static_cast(regs.counter_reset)); + LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", + static_cast(regs.counter_reset)); break; } } @@ -670,7 +674,8 @@ std::optional Maxwell3D::GetQueryResult() { return 0; case Regs::QuerySelect::SamplesPassed: // Deferred. - rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed); + rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + system.GPU().GetTicks()); return {}; default: UNIMPLEMENTED_MSG("Unimplemented query select type {}", diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a394f2d3e0..e9f1436f07 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -6,6 +6,7 @@ #include #include +#include #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" @@ -50,7 +51,7 @@ public: virtual void ResetCounter(QueryType type) = 0; /// Records a GPU query and caches it - virtual void Query(GPUVAddr gpu_addr, QueryType type) = 0; + virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional timestamp) = 0; /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 8f0e8241d6..74cb732097 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include +#include #include #include @@ -22,6 +24,13 @@ using VideoCore::QueryType; namespace { +constexpr std::uintptr_t PAGE_SIZE = 4096; +constexpr int PAGE_SHIFT = 12; + +constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp +constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp +constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8; + constexpr std::array QueryTargets = {GL_SAMPLES_PASSED}; constexpr GLenum GetTarget(QueryType type) { @@ -37,23 +46,19 @@ CounterStream::~CounterStream() = default; void CounterStream::Update(bool enabled, bool any_command_queued) { if (enabled) { - if (!current) { - current = cache.GetHostCounter(last, type); - } - return; + Enable(); + } else { + Disable(any_command_queued); } - - if (current) { - EndQuery(any_command_queued); - } - last = std::exchange(current, nullptr); } void CounterStream::Reset(bool any_command_queued) { if (current) { EndQuery(any_command_queued); + + // Immediately start a new query to avoid disabling its state. + current = cache.GetHostCounter(nullptr, type); } - current = nullptr; last = nullptr; } @@ -67,6 +72,20 @@ std::shared_ptr CounterStream::GetCurrent(bool any_command_queued) return last; } +void CounterStream::Enable() { + if (current) { + return; + } + current = cache.GetHostCounter(last, type); +} + +void CounterStream::Disable(bool any_command_queued) { + if (current) { + EndQuery(any_command_queued); + } + last = std::exchange(current, nullptr); +} + void CounterStream::EndQuery(bool any_command_queued) { if (!any_command_queued) { // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not @@ -78,26 +97,57 @@ void CounterStream::EndQuery(bool any_command_queued) { } QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer}, system{system}, - rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {} + : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this, + QueryType::SamplesPassed}}} {} QueryCache::~QueryCache() = default; -void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) { +void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) { + const u64 addr_begin = static_cast(addr); + const u64 addr_end = addr_begin + static_cast(size); + const auto in_range = [addr_begin, addr_end](CachedQuery& query) { + const u64 cache_begin = query.GetCacheAddr(); + const u64 cache_end = cache_begin + query.GetSizeInBytes(); + return cache_begin < addr_end && addr_begin < cache_end; + }; + + const u64 page_end = addr_end >> PAGE_SHIFT; + for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { + const auto& it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + continue; + } + auto& contents = it->second; + for (auto& query : contents) { + if (!in_range(query)) { + continue; + } + rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1); + Flush(query); + } + contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), + std::end(contents)); + } +} + +void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) { + // We can handle flushes in the same way as invalidations. + InvalidateRegion(addr, size); +} + +void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional timestamp) { auto& memory_manager = system.GPU().MemoryManager(); const auto host_ptr = memory_manager.GetPointer(gpu_addr); - auto query = TryGet(host_ptr); + CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); if (!query) { const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); ASSERT_OR_EXECUTE(cpu_addr, return;); - query = std::make_shared(type, *cpu_addr, host_ptr); - Register(query); + query = &Register(CachedQuery(type, *cpu_addr, host_ptr)); } - query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued())); - query->MarkAsModified(true, *this); + query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp); } void QueryCache::UpdateCounters() { @@ -117,34 +167,54 @@ void QueryCache::Reserve(QueryType type, OGLQuery&& query) { std::shared_ptr QueryCache::GetHostCounter(std::shared_ptr dependency, QueryType type) { - const auto type_index = static_cast(type); - auto& reserve = reserved_queries[type_index]; - + auto& reserve = reserved_queries[static_cast(type)]; + OGLQuery query; if (reserve.empty()) { - return std::make_shared(*this, std::move(dependency), type); + query.Create(GetTarget(type)); + } else { + query = std::move(reserve.back()); + reserve.pop_back(); } - auto counter = std::make_shared(*this, std::move(dependency), type, - std::move(reserve.back())); - reserve.pop_back(); - return counter; + return std::make_shared(*this, std::move(dependency), type, std::move(query)); } -void QueryCache::FlushObjectInner(const std::shared_ptr& counter_) { - auto& counter = *counter_; - auto& stream = GetStream(counter.GetType()); +CachedQuery& QueryCache::Register(CachedQuery&& cached_query) { + const u64 page = static_cast(cached_query.GetCacheAddr()) >> PAGE_SHIFT; + auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query)); + rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1); + return stored_ref; +} + +CachedQuery* QueryCache::TryGet(CacheAddr addr) { + const u64 page = static_cast(addr) >> PAGE_SHIFT; + const auto it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + return nullptr; + } + auto& contents = it->second; + const auto found = + std::find_if(std::begin(contents), std::end(contents), + [addr](const auto& query) { return query.GetCacheAddr() == addr; }); + return found != std::end(contents) ? &*found : nullptr; +} + +void QueryCache::Flush(CachedQuery& cached_query) { + auto& stream = GetStream(cached_query.GetType()); // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. // To avoid this disable and re-enable keeping the dependency stream. - const bool is_enabled = stream.IsEnabled(); - if (is_enabled) { - stream.Update(false, false); + // But we only have to do this if we have pending waits to be done. + const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending(); + const bool any_command_queued = rasterizer.AnyCommandQueued(); + if (slice_counter) { + stream.Update(false, any_command_queued); } - counter.Flush(); + cached_query.Flush(); - if (is_enabled) { - stream.Update(true, false); + if (slice_counter) { + stream.Update(true, any_command_queued); } } @@ -152,13 +222,6 @@ CounterStream& QueryCache::GetStream(QueryType type) { return streams[static_cast(type)]; } -HostCounter::HostCounter(QueryCache& cache, std::shared_ptr dependency, QueryType type) - : cache{cache}, type{type}, dependency{std::move(dependency)} { - const GLenum target = GetTarget(type); - query.Create(target); - glBeginQuery(target, query.handle); -} - HostCounter::HostCounter(QueryCache& cache, std::shared_ptr dependency, QueryType type, OGLQuery&& query_) : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { @@ -170,35 +233,80 @@ HostCounter::~HostCounter() { } u64 HostCounter::Query() { - if (query.handle == 0) { - return result; + if (result) { + return *result; } - glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result); - + u64 value; + glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); if (dependency) { - result += dependency->Query(); + value += dependency->Query(); } - return result; + return *(result = value); +} + +bool HostCounter::WaitPending() const noexcept { + return result.has_value(); } CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + +CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept + : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, + counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {} CachedQuery::~CachedQuery() = default; +CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { + type = rhs.type; + cpu_addr = rhs.cpu_addr; + host_ptr = rhs.host_ptr; + counter = std::move(rhs.counter); + timestamp = rhs.timestamp; + return *this; +} + void CachedQuery::Flush() { - const u64 value = counter->Query(); - std::memcpy(host_ptr, &value, sizeof(value)); + // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero + // in these cases. + const u64 value = counter ? counter->Query() : 0; + std::memcpy(host_ptr, &value, sizeof(u64)); + + if (timestamp) { + std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); + } } -void CachedQuery::SetCounter(std::shared_ptr counter_) { +void CachedQuery::SetCounter(std::shared_ptr counter_, std::optional timestamp_) { + if (counter) { + // If there's an old counter set it means the query is being rewritten by the game. + // To avoid losing the data forever, flush here. + Flush(); + } counter = std::move(counter_); + timestamp = timestamp_; } -QueryType CachedQuery::GetType() const { +bool CachedQuery::WaitPending() const noexcept { + return counter && counter->WaitPending(); +} + +QueryType CachedQuery::GetType() const noexcept { return type; } +VAddr CachedQuery::GetCpuAddr() const noexcept { + return cpu_addr; +} + +CacheAddr CachedQuery::GetCacheAddr() const noexcept { + return ToCacheAddr(host_ptr); +} + +u64 CachedQuery::GetSizeInBytes() const noexcept { + return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 91594b1203..d9f22b44d7 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -7,12 +7,12 @@ #include #include #include +#include #include #include #include "common/common_types.h" -#include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -43,6 +43,10 @@ public: } private: + void Enable(); + + void Disable(bool any_command_queued); + void EndQuery(bool any_command_queued); QueryCache& cache; @@ -53,12 +57,16 @@ private: GLenum target; }; -class QueryCache final : public RasterizerCache> { +class QueryCache final { public: explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); ~QueryCache(); - void Query(GPUVAddr gpu_addr, VideoCore::QueryType type); + void InvalidateRegion(CacheAddr addr, std::size_t size); + + void FlushRegion(CacheAddr addr, std::size_t size); + + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp); void UpdateCounters(); @@ -69,23 +77,26 @@ public: std::shared_ptr GetHostCounter(std::shared_ptr dependency, VideoCore::QueryType type); -protected: - void FlushObjectInner(const std::shared_ptr& counter) override; - private: + CachedQuery& Register(CachedQuery&& cached_query); + + CachedQuery* TryGet(CacheAddr addr); + + void Flush(CachedQuery& cached_query); + CounterStream& GetStream(VideoCore::QueryType type); Core::System& system; RasterizerOpenGL& rasterizer; + std::unordered_map> cached_queries; + std::array streams; std::array, VideoCore::NumQueryTypes> reserved_queries; }; class HostCounter final { public: - explicit HostCounter(QueryCache& cache, std::shared_ptr dependency, - VideoCore::QueryType type); explicit HostCounter(QueryCache& cache, std::shared_ptr dependency, VideoCore::QueryType type, OGLQuery&& query); ~HostCounter(); @@ -93,42 +104,54 @@ public: /// Returns the current value of the query. u64 Query(); + /// Returns true when querying this counter will potentially wait for OpenGL. + bool WaitPending() const noexcept; + private: QueryCache& cache; VideoCore::QueryType type; std::shared_ptr dependency; ///< Counter queued before this one. OGLQuery query; ///< OpenGL query. - u64 result; ///< Added values of the counter. + std::optional result; ///< Added values of the counter. }; -class CachedQuery final : public RasterizerCacheObject { +class CachedQuery final { public: explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); + CachedQuery(CachedQuery&&) noexcept; + CachedQuery(const CachedQuery&) = delete; ~CachedQuery(); + CachedQuery& operator=(CachedQuery&&) noexcept; + /// Writes the counter value to host memory. void Flush(); /// Updates the counter this cached query registered in guest memory will write when requested. - void SetCounter(std::shared_ptr counter); + void SetCounter(std::shared_ptr counter, std::optional timestamp); + + /// Returns true when a flushing this query will potentially wait for OpenGL. + bool WaitPending() const noexcept; /// Returns the query type. - VideoCore::QueryType GetType() const; + VideoCore::QueryType GetType() const noexcept; - VAddr GetCpuAddr() const override { - return cpu_addr; - } + /// Returns the guest CPU address for this query. + VAddr GetCpuAddr() const noexcept; - std::size_t GetSizeInBytes() const override { - return sizeof(u64); - } + /// Returns the cache address for this query. + CacheAddr GetCacheAddr() const noexcept; + + /// Returns the number of cached bytes. + u64 GetSizeInBytes() const noexcept; private: - VideoCore::QueryType type; + VideoCore::QueryType type; ///< Abstracted query type (e.g. samples passed). VAddr cpu_addr; ///< Guest CPU address. u8* host_ptr; ///< Writable host pointer. std::shared_ptr counter; ///< Host counter to query, owns the dependency tree. + std::optional timestamp; ///< Timestamp to flush to guest memory. }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 827f858849..4bdc8db859 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -722,8 +722,9 @@ void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { query_cache.ResetCounter(type); } -void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) { - query_cache.Query(gpu_addr, type); +void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional timestamp) { + query_cache.Query(gpu_addr, type, timestamp); } void RasterizerOpenGL::FlushAll() {} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4fb6811a7b..c772fd4baa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -63,7 +63,7 @@ public: void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; void ResetCounter(VideoCore::QueryType type) override; - void Query(GPUVAddr gpu_addr, VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; From c31382ced54c07650ae41fa2f75dc53da894784e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 11 Feb 2020 16:02:41 -0300 Subject: [PATCH 07/11] query_cache: Abstract OpenGL implementation Abstract the current OpenGL implementation into the VideoCommon namespace and reimplement it on top of that. Doing this avoids repeating code and logic in the Vulkan implementation. --- src/video_core/CMakeLists.txt | 1 + src/video_core/query_cache.h | 323 ++++++++++++++++++ .../renderer_opengl/gl_query_cache.cpp | 287 +++------------- .../renderer_opengl/gl_query_cache.h | 122 ++----- 4 files changed, 394 insertions(+), 339 deletions(-) create mode 100644 src/video_core/query_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3208f4993d..bb5895e992 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -37,6 +37,7 @@ add_library(video_core STATIC memory_manager.h morton.cpp morton.h + query_cache.h rasterizer_accelerated.cpp rasterizer_accelerated.h rasterizer_cache.cpp diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h new file mode 100644 index 0000000000..4c9151ce8c --- /dev/null +++ b/src/video_core/query_cache.h @@ -0,0 +1,323 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template +class CounterStreamBase { +public: + explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) + : cache{cache}, type{type} {} + + /// Updates the state of the stream, enabling or disabling as needed. + void Update(bool enabled) { + if (enabled) { + Enable(); + } else { + Disable(); + } + } + + /// Resets the stream to zero. It doesn't disable the query after resetting. + void Reset() { + if (current) { + current->EndQuery(); + + // Immediately start a new query to avoid disabling its state. + current = cache.Counter(nullptr, type); + } + last = nullptr; + } + + /// Returns the current counter slicing as needed. + std::shared_ptr Current() { + if (!current) { + return nullptr; + } + current->EndQuery(); + last = std::move(current); + current = cache.Counter(last, type); + return last; + } + + /// Returns true when the counter stream is enabled. + bool IsEnabled() const { + return static_cast(current); + } + +private: + /// Enables the stream. + void Enable() { + if (current) { + return; + } + current = cache.Counter(last, type); + } + + // Disables the stream. + void Disable() { + if (current) { + current->EndQuery(); + } + last = std::exchange(current, nullptr); + } + + QueryCache& cache; + const VideoCore::QueryType type; + + std::shared_ptr current; + std::shared_ptr last; +}; + +template +class QueryCacheBase { +public: + explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ + static_cast(*this), + VideoCore::QueryType::SamplesPassed}}} {} + + void InvalidateRegion(CacheAddr addr, std::size_t size) { + FlushAndRemoveRegion(addr, size); + } + + void FlushRegion(CacheAddr addr, std::size_t size) { + FlushAndRemoveRegion(addr, size); + } + + /** + * Records a query in GPU mapped memory, potentially marked with a timestamp. + * @param gpu_addr GPU address to flush to when the mapped memory is read. + * @param type Query type, e.g. SamplesPassed. + * @param timestamp Timestamp, when empty the flushed query is assumed to be short. + */ + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) { + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + + CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); + if (!query) { + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + + query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); + } + + query->BindCounter(Stream(type).Current(), timestamp); + } + + /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. + void UpdateCounters() { + const auto& regs = system.GPU().Maxwell3D().regs; + Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); + } + + /// Resets a counter to zero. It doesn't disable the query after resetting. + void ResetCounter(VideoCore::QueryType type) { + Stream(type).Reset(); + } + + /// Returns a new host counter. + std::shared_ptr Counter(std::shared_ptr dependency, + VideoCore::QueryType type) { + return std::make_shared(static_cast(*this), std::move(dependency), + type); + } + + /// Returns the counter stream of the specified type. + CounterStream& Stream(VideoCore::QueryType type) { + return streams[static_cast(type)]; + } + +private: + /// Flushes a memory range to guest memory and removes it from the cache. + void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { + const u64 addr_begin = static_cast(addr); + const u64 addr_end = addr_begin + static_cast(size); + const auto in_range = [addr_begin, addr_end](CachedQuery& query) { + const u64 cache_begin = query.CacheAddr(); + const u64 cache_end = cache_begin + query.SizeInBytes(); + return cache_begin < addr_end && addr_begin < cache_end; + }; + + const u64 page_end = addr_end >> PAGE_SHIFT; + for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { + const auto& it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + continue; + } + auto& contents = it->second; + for (auto& query : contents) { + if (!in_range(query)) { + continue; + } + rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); + query.Flush(); + } + contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), + std::end(contents)); + } + } + + /// Registers the passed parameters as cached and returns a pointer to the stored cached query. + CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { + rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); + const u64 page = static_cast(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; + return &cached_queries[page].emplace_back(static_cast(*this), type, cpu_addr, + host_ptr); + } + + /// Tries to a get a cached query. Returns nullptr on failure. + CachedQuery* TryGet(CacheAddr addr) { + const u64 page = static_cast(addr) >> PAGE_SHIFT; + const auto it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + return nullptr; + } + auto& contents = it->second; + const auto found = std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.CacheAddr() == addr; }); + return found != std::end(contents) ? &*found : nullptr; + } + + static constexpr std::uintptr_t PAGE_SIZE = 4096; + static constexpr int PAGE_SHIFT = 12; + + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; + + std::unordered_map> cached_queries; + + std::array streams; +}; + +template +class HostCounterBase { +public: + explicit HostCounterBase(std::shared_ptr dependency) + : dependency{std::move(dependency)} {} + + /// Returns the current value of the query. + u64 Query() { + if (result) { + return *result; + } + + u64 value = BlockingQuery(); + if (dependency) { + value += dependency->Query(); + } + + return *(result = value); + } + + /// Returns true when flushing this query will potentially wait. + bool WaitPending() const noexcept { + return result.has_value(); + } + +protected: + /// Returns the value of query from the backend API blocking as needed. + virtual u64 BlockingQuery() const = 0; + +private: + std::shared_ptr dependency; ///< Counter to add to this value. + std::optional result; ///< Filled with the already returned value. +}; + +template +class CachedQueryBase { +public: + explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) + : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + + CachedQueryBase(CachedQueryBase&& rhs) noexcept + : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)}, + timestamp{rhs.timestamp} {} + + CachedQueryBase(const CachedQueryBase&) = delete; + + CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept { + cpu_addr = rhs.cpu_addr; + host_ptr = rhs.host_ptr; + counter = std::move(rhs.counter); + timestamp = rhs.timestamp; + return *this; + } + + /// Flushes the query to guest memory. + virtual void Flush() { + // When counter is nullptr it means that it's just been reseted. We are supposed to write a + // zero in these cases. + const u64 value = counter ? counter->Query() : 0; + std::memcpy(host_ptr, &value, sizeof(u64)); + + if (timestamp) { + std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); + } + } + + /// Binds a counter to this query. + void BindCounter(std::shared_ptr counter_, std::optional timestamp_) { + if (counter) { + // If there's an old counter set it means the query is being rewritten by the game. + // To avoid losing the data forever, flush here. + Flush(); + } + counter = std::move(counter_); + timestamp = timestamp_; + } + + VAddr CpuAddr() const noexcept { + return cpu_addr; + } + + CacheAddr CacheAddr() const noexcept { + return ToCacheAddr(host_ptr); + } + + u64 SizeInBytes() const noexcept { + return SizeInBytes(timestamp.has_value()); + } + + static u64 SizeInBytes(bool with_timestamp) { + return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; + } + +protected: + /// Returns true when querying the counter may potentially block. + bool WaitPending() const noexcept { + return counter && counter->WaitPending(); + } + +private: + static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp. + static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp. + static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query. + + VAddr cpu_addr; ///< Guest CPU address. + u8* host_ptr; ///< Writable host pointer. + std::shared_ptr counter; ///< Host counter to query, owns the dependency tree. + std::optional timestamp; ///< Timestamp to flush to guest memory. +}; + +} // namespace VideoCommon diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 74cb732097..7d5a044c71 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -20,211 +20,49 @@ namespace OpenGL { -using VideoCore::QueryType; - namespace { -constexpr std::uintptr_t PAGE_SIZE = 4096; -constexpr int PAGE_SHIFT = 12; - -constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp -constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp -constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8; - constexpr std::array QueryTargets = {GL_SAMPLES_PASSED}; -constexpr GLenum GetTarget(QueryType type) { +constexpr GLenum GetTarget(VideoCore::QueryType type) { return QueryTargets[static_cast(type)]; } } // Anonymous namespace -CounterStream::CounterStream(QueryCache& cache, QueryType type) - : cache{cache}, type{type}, target{GetTarget(type)} {} - -CounterStream::~CounterStream() = default; - -void CounterStream::Update(bool enabled, bool any_command_queued) { - if (enabled) { - Enable(); - } else { - Disable(any_command_queued); - } -} - -void CounterStream::Reset(bool any_command_queued) { - if (current) { - EndQuery(any_command_queued); - - // Immediately start a new query to avoid disabling its state. - current = cache.GetHostCounter(nullptr, type); - } - last = nullptr; -} - -std::shared_ptr CounterStream::GetCurrent(bool any_command_queued) { - if (!current) { - return nullptr; - } - EndQuery(any_command_queued); - last = std::move(current); - current = cache.GetHostCounter(last, type); - return last; -} - -void CounterStream::Enable() { - if (current) { - return; - } - current = cache.GetHostCounter(last, type); -} - -void CounterStream::Disable(bool any_command_queued) { - if (current) { - EndQuery(any_command_queued); - } - last = std::exchange(current, nullptr); -} - -void CounterStream::EndQuery(bool any_command_queued) { - if (!any_command_queued) { - // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not - // having any of these causes a lock. glFlush is considered a command, so we can safely wait - // for this. Insert to the OpenGL command stream a flush. - glFlush(); - } - glEndQuery(target); -} - -QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer) - : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this, - QueryType::SamplesPassed}}} {} +QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) + : VideoCommon::QueryCacheBase{system, static_cast( + gl_rasterizer)}, + gl_rasterizer{gl_rasterizer} {} QueryCache::~QueryCache() = default; -void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) { - const u64 addr_begin = static_cast(addr); - const u64 addr_end = addr_begin + static_cast(size); - const auto in_range = [addr_begin, addr_end](CachedQuery& query) { - const u64 cache_begin = query.GetCacheAddr(); - const u64 cache_end = cache_begin + query.GetSizeInBytes(); - return cache_begin < addr_end && addr_begin < cache_end; - }; - - const u64 page_end = addr_end >> PAGE_SHIFT; - for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { - const auto& it = cached_queries.find(page); - if (it == std::end(cached_queries)) { - continue; - } - auto& contents = it->second; - for (auto& query : contents) { - if (!in_range(query)) { - continue; - } - rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1); - Flush(query); - } - contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), - std::end(contents)); - } -} - -void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) { - // We can handle flushes in the same way as invalidations. - InvalidateRegion(addr, size); -} - -void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional timestamp) { - auto& memory_manager = system.GPU().MemoryManager(); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - - CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); - if (!query) { - const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); - ASSERT_OR_EXECUTE(cpu_addr, return;); - - query = &Register(CachedQuery(type, *cpu_addr, host_ptr)); - } - - query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp); -} - -void QueryCache::UpdateCounters() { - auto& samples_passed = GetStream(QueryType::SamplesPassed); - - const auto& regs = system.GPU().Maxwell3D().regs; - samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued()); -} - -void QueryCache::ResetCounter(QueryType type) { - GetStream(type).Reset(rasterizer.AnyCommandQueued()); -} - -void QueryCache::Reserve(QueryType type, OGLQuery&& query) { - reserved_queries[static_cast(type)].push_back(std::move(query)); -} - -std::shared_ptr QueryCache::GetHostCounter(std::shared_ptr dependency, - QueryType type) { - auto& reserve = reserved_queries[static_cast(type)]; +OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { + auto& reserve = queries_reserve[static_cast(type)]; OGLQuery query; if (reserve.empty()) { query.Create(GetTarget(type)); - } else { - query = std::move(reserve.back()); - reserve.pop_back(); + return query; } - return std::make_shared(*this, std::move(dependency), type, std::move(query)); + query = std::move(reserve.back()); + reserve.pop_back(); + return query; } -CachedQuery& QueryCache::Register(CachedQuery&& cached_query) { - const u64 page = static_cast(cached_query.GetCacheAddr()) >> PAGE_SHIFT; - auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query)); - rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1); - return stored_ref; +void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { + queries_reserve[static_cast(type)].push_back(std::move(query)); } -CachedQuery* QueryCache::TryGet(CacheAddr addr) { - const u64 page = static_cast(addr) >> PAGE_SHIFT; - const auto it = cached_queries.find(page); - if (it == std::end(cached_queries)) { - return nullptr; - } - auto& contents = it->second; - const auto found = - std::find_if(std::begin(contents), std::end(contents), - [addr](const auto& query) { return query.GetCacheAddr() == addr; }); - return found != std::end(contents) ? &*found : nullptr; +bool QueryCache::AnyCommandQueued() const noexcept { + return gl_rasterizer.AnyCommandQueued(); } -void QueryCache::Flush(CachedQuery& cached_query) { - auto& stream = GetStream(cached_query.GetType()); - - // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. - // To avoid this disable and re-enable keeping the dependency stream. - // But we only have to do this if we have pending waits to be done. - const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending(); - const bool any_command_queued = rasterizer.AnyCommandQueued(); - if (slice_counter) { - stream.Update(false, any_command_queued); - } - - cached_query.Flush(); - - if (slice_counter) { - stream.Update(true, any_command_queued); - } -} - -CounterStream& QueryCache::GetStream(QueryType type) { - return streams[static_cast(type)]; -} - -HostCounter::HostCounter(QueryCache& cache, std::shared_ptr dependency, QueryType type, - OGLQuery&& query_) - : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr dependency, + VideoCore::QueryType type) + : VideoCommon::HostCounterBase{std::move(dependency)}, cache{cache}, + type{type}, query{cache.AllocateQuery(type)} { glBeginQuery(GetTarget(type), query.handle); } @@ -232,81 +70,50 @@ HostCounter::~HostCounter() { cache.Reserve(type, std::move(query)); } -u64 HostCounter::Query() { - if (result) { - return *result; +void HostCounter::EndQuery() { + if (!cache.AnyCommandQueued()) { + // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not + // having any of these causes a lock. glFlush is considered a command, so we can safely wait + // for this. Insert to the OpenGL command stream a flush. + glFlush(); } - - u64 value; - glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); - if (dependency) { - value += dependency->Query(); - } - - return *(result = value); + glEndQuery(GetTarget(type)); } -bool HostCounter::WaitPending() const noexcept { - return result.has_value(); +u64 HostCounter::BlockingQuery() const { + GLint64 value; + glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); + return static_cast(value); } -CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) - : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} +CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) + : VideoCommon::CachedQueryBase{cpu_addr, host_ptr}, cache{&cache}, type{type} {} CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept - : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, - counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {} - -CachedQuery::~CachedQuery() = default; + : VideoCommon::CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { + VideoCommon::CachedQueryBase::operator=(std::move(rhs)); + cache = rhs.cache; type = rhs.type; - cpu_addr = rhs.cpu_addr; - host_ptr = rhs.host_ptr; - counter = std::move(rhs.counter); - timestamp = rhs.timestamp; return *this; } void CachedQuery::Flush() { - // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero - // in these cases. - const u64 value = counter ? counter->Query() : 0; - std::memcpy(host_ptr, &value, sizeof(u64)); - - if (timestamp) { - std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); + // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. + // To avoid this disable and re-enable keeping the dependency stream. + // But we only have to do this if we have pending waits to be done. + auto& stream = cache->Stream(type); + const bool slice_counter = WaitPending() && stream.IsEnabled(); + if (slice_counter) { + stream.Update(false); } -} -void CachedQuery::SetCounter(std::shared_ptr counter_, std::optional timestamp_) { - if (counter) { - // If there's an old counter set it means the query is being rewritten by the game. - // To avoid losing the data forever, flush here. - Flush(); + VideoCommon::CachedQueryBase::Flush(); + + if (slice_counter) { + stream.Update(true); } - counter = std::move(counter_); - timestamp = timestamp_; -} - -bool CachedQuery::WaitPending() const noexcept { - return counter && counter->WaitPending(); -} - -QueryType CachedQuery::GetType() const noexcept { - return type; -} - -VAddr CachedQuery::GetCpuAddr() const noexcept { - return cpu_addr; -} - -CacheAddr CachedQuery::GetCacheAddr() const noexcept { - return ToCacheAddr(host_ptr); -} - -u64 CachedQuery::GetSizeInBytes() const noexcept { - return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index d9f22b44d7..20d337f156 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -13,6 +13,7 @@ #include #include "common/common_types.h" +#include "video_core/query_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -24,134 +25,57 @@ namespace OpenGL { class CachedQuery; class HostCounter; -class RasterizerOpenGL; class QueryCache; +class RasterizerOpenGL; -class CounterStream final { -public: - explicit CounterStream(QueryCache& cache, VideoCore::QueryType type); - ~CounterStream(); +using CounterStream = VideoCommon::CounterStreamBase; - void Update(bool enabled, bool any_command_queued); - - void Reset(bool any_command_queued); - - std::shared_ptr GetCurrent(bool any_command_queued); - - bool IsEnabled() const { - return current != nullptr; - } - -private: - void Enable(); - - void Disable(bool any_command_queued); - - void EndQuery(bool any_command_queued); - - QueryCache& cache; - - std::shared_ptr current; - std::shared_ptr last; - VideoCore::QueryType type; - GLenum target; -}; - -class QueryCache final { +class QueryCache final + : public VideoCommon::QueryCacheBase { public: explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); ~QueryCache(); - void InvalidateRegion(CacheAddr addr, std::size_t size); - - void FlushRegion(CacheAddr addr, std::size_t size); - - void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp); - - void UpdateCounters(); - - void ResetCounter(VideoCore::QueryType type); + OGLQuery AllocateQuery(VideoCore::QueryType type); void Reserve(VideoCore::QueryType type, OGLQuery&& query); - std::shared_ptr GetHostCounter(std::shared_ptr dependency, - VideoCore::QueryType type); + bool AnyCommandQueued() const noexcept; private: - CachedQuery& Register(CachedQuery&& cached_query); - - CachedQuery* TryGet(CacheAddr addr); - - void Flush(CachedQuery& cached_query); - - CounterStream& GetStream(VideoCore::QueryType type); - - Core::System& system; - RasterizerOpenGL& rasterizer; - - std::unordered_map> cached_queries; - - std::array streams; - std::array, VideoCore::NumQueryTypes> reserved_queries; + RasterizerOpenGL& gl_rasterizer; + std::array, VideoCore::NumQueryTypes> queries_reserve; }; -class HostCounter final { +class HostCounter final : public VideoCommon::HostCounterBase { public: explicit HostCounter(QueryCache& cache, std::shared_ptr dependency, - VideoCore::QueryType type, OGLQuery&& query); + VideoCore::QueryType type); ~HostCounter(); - /// Returns the current value of the query. - u64 Query(); - - /// Returns true when querying this counter will potentially wait for OpenGL. - bool WaitPending() const noexcept; + void EndQuery(); private: + u64 BlockingQuery() const override; + QueryCache& cache; VideoCore::QueryType type; - - std::shared_ptr dependency; ///< Counter queued before this one. - OGLQuery query; ///< OpenGL query. - std::optional result; ///< Added values of the counter. + OGLQuery query; }; -class CachedQuery final { +class CachedQuery final : public VideoCommon::CachedQueryBase { public: - explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); - CachedQuery(CachedQuery&&) noexcept; - CachedQuery(const CachedQuery&) = delete; - ~CachedQuery(); + explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, + u8* host_ptr); + CachedQuery(CachedQuery&& rhs) noexcept; - CachedQuery& operator=(CachedQuery&&) noexcept; + CachedQuery& operator=(CachedQuery&& rhs) noexcept; - /// Writes the counter value to host memory. - void Flush(); - - /// Updates the counter this cached query registered in guest memory will write when requested. - void SetCounter(std::shared_ptr counter, std::optional timestamp); - - /// Returns true when a flushing this query will potentially wait for OpenGL. - bool WaitPending() const noexcept; - - /// Returns the query type. - VideoCore::QueryType GetType() const noexcept; - - /// Returns the guest CPU address for this query. - VAddr GetCpuAddr() const noexcept; - - /// Returns the cache address for this query. - CacheAddr GetCacheAddr() const noexcept; - - /// Returns the number of cached bytes. - u64 GetSizeInBytes() const noexcept; + void Flush() override; private: - VideoCore::QueryType type; ///< Abstracted query type (e.g. samples passed). - VAddr cpu_addr; ///< Guest CPU address. - u8* host_ptr; ///< Writable host pointer. - std::shared_ptr counter; ///< Host counter to query, owns the dependency tree. - std::optional timestamp; ///< Timestamp to flush to guest memory. + QueryCache* cache; + VideoCore::QueryType type; }; } // namespace OpenGL From bcd348f2388cf944f2ac49364a8d13b47cc21456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 11 Feb 2020 18:59:44 -0300 Subject: [PATCH 08/11] vk_query_cache: Implement generic query cache on Vulkan --- src/video_core/CMakeLists.txt | 2 + src/video_core/query_cache.h | 37 +++++- .../renderer_opengl/gl_query_cache.cpp | 11 +- .../renderer_opengl/gl_query_cache.h | 11 +- src/video_core/renderer_vulkan/vk_device.cpp | 10 +- .../renderer_vulkan/vk_query_cache.cpp | 122 ++++++++++++++++++ .../renderer_vulkan/vk_query_cache.h | 104 +++++++++++++++ .../renderer_vulkan/vk_rasterizer.cpp | 21 ++- .../renderer_vulkan/vk_rasterizer.h | 6 +- .../renderer_vulkan/vk_scheduler.cpp | 8 ++ src/video_core/renderer_vulkan/vk_scheduler.h | 15 +++ 11 files changed, 327 insertions(+), 20 deletions(-) create mode 100644 src/video_core/renderer_vulkan/vk_query_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_query_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index bb5895e992..4b0c6346fe 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -180,6 +180,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_memory_manager.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_query_cache.cpp + renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_renderpass_cache.cpp diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 4c9151ce8c..069032121b 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -88,7 +88,8 @@ private: std::shared_ptr last; }; -template +template class QueryCacheBase { public: explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) @@ -127,15 +128,25 @@ public: /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. void UpdateCounters() { + std::unique_lock lock{mutex}; const auto& regs = system.GPU().Maxwell3D().regs; Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); } /// Resets a counter to zero. It doesn't disable the query after resetting. void ResetCounter(VideoCore::QueryType type) { + std::unique_lock lock{mutex}; Stream(type).Reset(); } + /// Disable all active streams. Expected to be called at the end of a command buffer. + void DisableStreams() { + std::unique_lock lock{mutex}; + for (auto& stream : streams) { + stream.Update(false); + } + } + /// Returns a new host counter. std::shared_ptr Counter(std::shared_ptr dependency, VideoCore::QueryType type) { @@ -148,6 +159,9 @@ public: return streams[static_cast(type)]; } +protected: + std::array query_pools; + private: /// Flushes a memory range to guest memory and removes it from the cache. void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { @@ -213,8 +227,16 @@ private: template class HostCounterBase { public: - explicit HostCounterBase(std::shared_ptr dependency) - : dependency{std::move(dependency)} {} + explicit HostCounterBase(std::shared_ptr dependency_) + : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { + // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. + static constexpr u64 depth_threshold = 96; + if (depth > depth_threshold) { + depth = 0; + base_result = dependency->Query(); + dependency = nullptr; + } + } /// Returns the current value of the query. u64 Query() { @@ -222,9 +244,10 @@ public: return *result; } - u64 value = BlockingQuery(); + u64 value = BlockingQuery() + base_result; if (dependency) { value += dependency->Query(); + dependency = nullptr; } return *(result = value); @@ -235,6 +258,10 @@ public: return result.has_value(); } + u64 Depth() const noexcept { + return depth; + } + protected: /// Returns the value of query from the backend API blocking as needed. virtual u64 BlockingQuery() const = 0; @@ -242,6 +269,8 @@ protected: private: std::shared_ptr dependency; ///< Counter to add to this value. std::optional result; ///< Filled with the already returned value. + u64 depth; ///< Number of nested dependencies. + u64 base_result = 0; ///< Equivalent to nested dependencies value. }; template diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 7d5a044c71..f12e9f55f7 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -31,15 +31,16 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { } // Anonymous namespace QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) - : VideoCommon::QueryCacheBase{system, static_cast( - gl_rasterizer)}, + : VideoCommon::QueryCacheBase< + QueryCache, CachedQuery, CounterStream, HostCounter, + std::vector>{system, + static_cast(gl_rasterizer)}, gl_rasterizer{gl_rasterizer} {} QueryCache::~QueryCache() = default; OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { - auto& reserve = queries_reserve[static_cast(type)]; + auto& reserve = query_pools[static_cast(type)]; OGLQuery query; if (reserve.empty()) { query.Create(GetTarget(type)); @@ -52,7 +53,7 @@ OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { } void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { - queries_reserve[static_cast(type)].push_back(std::move(query)); + query_pools[static_cast(type)].push_back(std::move(query)); } bool QueryCache::AnyCommandQueued() const noexcept { diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 20d337f156..99d187837a 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -6,12 +6,8 @@ #include #include -#include -#include #include -#include - #include "common/common_types.h" #include "video_core/query_cache.h" #include "video_core/rasterizer_interface.h" @@ -30,8 +26,8 @@ class RasterizerOpenGL; using CounterStream = VideoCommon::CounterStreamBase; -class QueryCache final - : public VideoCommon::QueryCacheBase { +class QueryCache final : public VideoCommon::QueryCacheBase> { public: explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); ~QueryCache(); @@ -44,7 +40,6 @@ public: private: RasterizerOpenGL& gl_rasterizer; - std::array, VideoCore::NumQueryTypes> queries_reserve; }; class HostCounter final : public VideoCommon::HostCounterBase { @@ -59,7 +54,7 @@ private: u64 BlockingQuery() const override; QueryCache& cache; - VideoCore::QueryType type; + const VideoCore::QueryType type; OGLQuery query; }; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9840f26e5d..588a6835f6 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan features.depthBiasClamp = true; features.geometryShader = true; features.tessellationShader = true; + features.occlusionQueryPrecise = true; features.fragmentStoresAndAtomics = true; features.shaderImageGatherExtended = true; features.shaderStorageImageWriteWithoutFormat = true; @@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan bit8_storage.uniformAndStorageBuffer8BitAccess = true; SetNext(next, bit8_storage); + vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; + host_query_reset.hostQueryReset = true; + SetNext(next, host_query_reset); + vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8.shaderFloat16 = true; @@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, }; std::bitset available_extensions{}; @@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, @@ -376,7 +383,7 @@ std::vector VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } }; - extensions.reserve(13); + extensions.reserve(14); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); @@ -384,6 +391,7 @@ std::vector VKDevice::LoadExtensions(const vk::DispatchLoaderDynami extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); + extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); [[maybe_unused]] const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp new file mode 100644 index 0000000000..ffbf60dda8 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +namespace { + +constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; + +constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { + return QUERY_TARGETS[static_cast(type)]; +} + +} // Anonymous namespace + +QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} + +QueryPool::~QueryPool() = default; + +void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { + device = &device_; + type = type_; +} + +std::pair QueryPool::Commit(VKFence& fence) { + std::size_t index; + do { + index = CommitResource(fence); + } while (usage[index]); + usage[index] = true; + + return {*pools[index / GROW_STEP], static_cast(index % GROW_STEP)}; +} + +void QueryPool::Allocate(std::size_t begin, std::size_t end) { + usage.resize(end); + + const auto dev = device->GetLogical(); + const u32 size = static_cast(end - begin); + const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); + pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); +} + +void QueryPool::Reserve(std::pair query) { + const auto it = + std::find_if(std::begin(pools), std::end(pools), + [query_pool = query.first](auto& pool) { return query_pool == *pool; }); + ASSERT(it != std::end(pools)); + + const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); + usage[pool_index * GROW_STEP + static_cast(query.second)] = false; +} + +VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler) + : VideoCommon::QueryCacheBase{system, rasterizer}, + device{device}, scheduler{scheduler} { + for (std::size_t i = 0; i < static_cast(VideoCore::NumQueryTypes); ++i) { + query_pools[i].Initialize(device, static_cast(i)); + } +} + +VKQueryCache::~VKQueryCache() = default; + +std::pair VKQueryCache::AllocateQuery(VideoCore::QueryType type) { + return query_pools[static_cast(type)].Commit(scheduler.GetFence()); +} + +void VKQueryCache::Reserve(VideoCore::QueryType type, + std::pair query) { + query_pools[static_cast(type)].Reserve(query); +} + +HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr dependency, + VideoCore::QueryType type) + : VideoCommon::HostCounterBase{std::move(dependency)}, cache{cache}, + type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { + const auto dev = cache.Device().GetLogical(); + cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { + dev.resetQueryPoolEXT(query.first, query.second, 1, dld); + cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); + }); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, query); +} + +void HostCounter::EndQuery() { + cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { + cmdbuf.endQuery(query.first, query.second, dld); + }); +} + +u64 HostCounter::BlockingQuery() const { + if (ticks >= cache.Scheduler().Ticks()) { + cache.Scheduler().Flush(); + } + + const auto dev = cache.Device().GetLogical(); + const auto& dld = cache.Device().GetDispatchLoader(); + u64 value; + dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), + vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); + return value; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h new file mode 100644 index 0000000000..c3092ee969 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -0,0 +1,104 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "video_core/query_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Vulkan { + +class CachedQuery; +class HostCounter; +class VKDevice; +class VKQueryCache; +class VKScheduler; + +using CounterStream = VideoCommon::CounterStreamBase; + +class QueryPool final : public VKFencedPool { +public: + explicit QueryPool(); + ~QueryPool() override; + + void Initialize(const VKDevice& device, VideoCore::QueryType type); + + std::pair Commit(VKFence& fence); + + void Reserve(std::pair query); + +protected: + void Allocate(std::size_t begin, std::size_t end) override; + +private: + static constexpr std::size_t GROW_STEP = 512; + + const VKDevice* device = nullptr; + VideoCore::QueryType type = {}; + + std::vector pools; + std::vector usage; +}; + +class VKQueryCache final + : public VideoCommon::QueryCacheBase { +public: + explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler); + ~VKQueryCache(); + + std::pair AllocateQuery(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, std::pair query); + + const VKDevice& Device() const noexcept { + return device; + } + + VKScheduler& Scheduler() const noexcept { + return scheduler; + } + +private: + const VKDevice& device; + VKScheduler& scheduler; +}; + +class HostCounter final : public VideoCommon::HostCounterBase { +public: + explicit HostCounter(VKQueryCache& cache, std::shared_ptr dependency, + VideoCore::QueryType type); + ~HostCounter(); + + void EndQuery(); + +private: + u64 BlockingQuery() const override; + + VKQueryCache& cache; + const VideoCore::QueryType type; + const std::pair query; + const u64 ticks; +}; + +class CachedQuery : public VideoCommon::CachedQueryBase { +public: + explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) + : VideoCommon::CachedQueryBase{cpu_addr, host_ptr} {} +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index aada38702e..79aa121edc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind staging_pool), pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), - sampler_cache(device) {} + sampler_cache(device), query_cache(system, *this, device, scheduler) { + scheduler.SetQueryCache(query_cache); +} RasterizerVulkan::~RasterizerVulkan() = default; @@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { FlushWork(); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; @@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); if (!system.GPU().Maxwell3D().ShouldExecute()) { return; @@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { sampled_views.clear(); image_views.clear(); + query_cache.UpdateCounters(); + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ComputePipelineCacheKey key{ code_addr, @@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { }); } +void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { + query_cache.ResetCounter(type); +} + +void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional timestamp) { + query_cache.Query(gpu_addr, type, timestamp); +} + void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7be71e734c..add1ad88c7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" @@ -96,7 +97,7 @@ struct ImageView { vk::ImageLayout* layout = nullptr; }; -class RasterizerVulkan : public VideoCore::RasterizerAccelerated { +class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, VKScreenInfo& screen_info, const VKDevice& device, @@ -108,6 +109,8 @@ public: bool DrawMultiBatch(bool is_indexed) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -247,6 +250,7 @@ private: VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; VKSamplerCache sampler_cache; + VKQueryCache query_cache; std::array color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index d66133ad1a..92bd6c3444 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { } void VKScheduler::AllocateNewContext() { + ++ticks; + std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); @@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() { current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, device.GetDispatchLoader()); + // Enable counters once again. These are disabled when a command buffer is finished. + if (query_cache) { + query_cache->UpdateCounters(); + } } void VKScheduler::InvalidateState() { @@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() { } void VKScheduler::EndPendingOperations() { + query_cache->DisableStreams(); EndRenderPass(); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bcdffbba0a..62fd7858b1 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include @@ -18,6 +19,7 @@ namespace Vulkan { class VKDevice; class VKFence; +class VKQueryCache; class VKResourceManager; class VKFenceView { @@ -67,6 +69,11 @@ public: /// Binds a pipeline to the current execution context. void BindGraphicsPipeline(vk::Pipeline pipeline); + /// Assigns the query cache. + void SetQueryCache(VKQueryCache& query_cache_) { + query_cache = &query_cache_; + } + /// Returns true when viewports have been set in the current command buffer. bool TouchViewports() { return std::exchange(state.viewports, true); @@ -112,6 +119,11 @@ public: return current_fence; } + /// Returns the current command buffer tick. + u64 Ticks() const { + return ticks; + } + private: class Command { public: @@ -205,6 +217,8 @@ private: const VKDevice& device; VKResourceManager& resource_manager; + VKQueryCache* query_cache = nullptr; + vk::CommandBuffer current_cmdbuf; VKFence* current_fence = nullptr; VKFence* next_fence = nullptr; @@ -227,6 +241,7 @@ private: Common::SPSCQueue> chunk_reserve; std::mutex mutex; std::condition_variable cv; + std::atomic ticks = 0; bool quit = false; }; From cc0694559fd7084221540d595edb228f8267a96c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 13 Feb 2020 14:28:22 -0300 Subject: [PATCH 09/11] query_cache: Add a recursive mutex for concurrent usage --- src/video_core/query_cache.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 069032121b..86f5aade1c 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -98,10 +99,12 @@ public: VideoCore::QueryType::SamplesPassed}}} {} void InvalidateRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } void FlushRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } @@ -112,6 +115,7 @@ public: * @param timestamp Timestamp, when empty the flushed query is assumed to be short. */ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) { + std::unique_lock lock{mutex}; auto& memory_manager = system.GPU().MemoryManager(); const auto host_ptr = memory_manager.GetPointer(gpu_addr); @@ -219,6 +223,8 @@ private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; + std::recursive_mutex mutex; + std::unordered_map> cached_queries; std::array streams; From 54a00ee4cfdc9f9cd0985d10c4f3a822a284d997 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 13 Feb 2020 15:31:37 -0300 Subject: [PATCH 10/11] query_cache: Fix ambiguity in CacheAddr getter --- src/video_core/query_cache.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 86f5aade1c..a040858e81 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -172,7 +172,7 @@ private: const u64 addr_begin = static_cast(addr); const u64 addr_end = addr_begin + static_cast(size); const auto in_range = [addr_begin, addr_end](CachedQuery& query) { - const u64 cache_begin = query.CacheAddr(); + const u64 cache_begin = query.GetCacheAddr(); const u64 cache_end = cache_begin + query.SizeInBytes(); return cache_begin < addr_end && addr_begin < cache_end; }; @@ -212,8 +212,9 @@ private: return nullptr; } auto& contents = it->second; - const auto found = std::find_if(std::begin(contents), std::end(contents), - [addr](auto& query) { return query.CacheAddr() == addr; }); + const auto found = + std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.GetCacheAddr() == addr; }); return found != std::end(contents) ? &*found : nullptr; } @@ -326,7 +327,7 @@ public: return cpu_addr; } - CacheAddr CacheAddr() const noexcept { + CacheAddr GetCacheAddr() const noexcept { return ToCacheAddr(host_ptr); } From 6d3a046caa894997946736d3466855682d770d45 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 13 Feb 2020 21:11:21 -0300 Subject: [PATCH 11/11] query_cache: Address feedback --- src/video_core/query_cache.h | 32 +++++++++---------- .../renderer_opengl/gl_query_cache.h | 2 ++ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index a040858e81..e66054ed06 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -62,7 +62,7 @@ public: /// Returns true when the counter stream is enabled. bool IsEnabled() const { - return static_cast(current); + return current != nullptr; } private: @@ -163,6 +163,11 @@ public: return streams[static_cast(type)]; } + /// Returns the counter stream of the specified type. + const CounterStream& Stream(VideoCore::QueryType type) const { + return streams[static_cast(type)]; + } + protected: std::array query_pools; @@ -219,7 +224,7 @@ private: } static constexpr std::uintptr_t PAGE_SIZE = 4096; - static constexpr int PAGE_SHIFT = 12; + static constexpr unsigned PAGE_SHIFT = 12; Core::System& system; VideoCore::RasterizerInterface& rasterizer; @@ -237,13 +242,14 @@ public: explicit HostCounterBase(std::shared_ptr dependency_) : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. - static constexpr u64 depth_threshold = 96; + constexpr u64 depth_threshold = 96; if (depth > depth_threshold) { depth = 0; base_result = dependency->Query(); dependency = nullptr; } } + virtual ~HostCounterBase() = default; /// Returns the current value of the query. u64 Query() { @@ -257,7 +263,8 @@ public: dependency = nullptr; } - return *(result = value); + result = value; + return *result; } /// Returns true when flushing this query will potentially wait. @@ -285,20 +292,13 @@ class CachedQueryBase { public: explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + virtual ~CachedQueryBase() = default; - CachedQueryBase(CachedQueryBase&& rhs) noexcept - : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)}, - timestamp{rhs.timestamp} {} - + CachedQueryBase(CachedQueryBase&&) noexcept = default; CachedQueryBase(const CachedQueryBase&) = delete; - CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept { - cpu_addr = rhs.cpu_addr; - host_ptr = rhs.host_ptr; - counter = std::move(rhs.counter); - timestamp = rhs.timestamp; - return *this; - } + CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default; + CachedQueryBase& operator=(const CachedQueryBase&) = delete; /// Flushes the query to guest memory. virtual void Flush() { @@ -335,7 +335,7 @@ public: return SizeInBytes(timestamp.has_value()); } - static u64 SizeInBytes(bool with_timestamp) { + static constexpr u64 SizeInBytes(bool with_timestamp) noexcept { return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; } diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 99d187837a..d8e7052a12 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -63,8 +63,10 @@ public: explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); CachedQuery(CachedQuery&& rhs) noexcept; + CachedQuery(const CachedQuery&) = delete; CachedQuery& operator=(CachedQuery&& rhs) noexcept; + CachedQuery& operator=(const CachedQuery&) = delete; void Flush() override;