From 2b58652f0897053d4da04deb586490220ab5a774 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Jul 2019 19:40:10 -0300 Subject: [PATCH] maxwell_3d: Slow implementation of passed samples (query 21) Implements GL_SAMPLES_PASSED by waiting immediately for queries. --- src/video_core/CMakeLists.txt | 2 + src/video_core/engines/maxwell_3d.cpp | 39 ++++++++---- src/video_core/engines/maxwell_3d.h | 38 ++++++++++-- src/video_core/rasterizer_interface.h | 10 ++++ .../renderer_opengl/gl_query_cache.cpp | 59 +++++++++++++++++++ .../renderer_opengl/gl_query_cache.h | 41 +++++++++++++ .../renderer_opengl/gl_rasterizer.cpp | 24 ++++++++ .../renderer_opengl/gl_rasterizer.h | 5 ++ 8 files changed, 201 insertions(+), 17 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_query_cache.cpp create mode 100644 src/video_core/renderer_opengl/gl_query_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index db9332d003..3208f4993d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -74,6 +74,8 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h + renderer_opengl/gl_query_cache.cpp + renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 0b3e8749b8..fe91ff6a04 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -400,6 +400,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessQueryCondition(); break; } + case MAXWELL3D_REG_INDEX(counter_reset): { + ProcessCounterReset(); + break; + } case MAXWELL3D_REG_INDEX(sync_info): { ProcessSyncPoint(); break; @@ -544,23 +548,23 @@ void Maxwell3D::ProcessQueryGet() { "Units other than CROP are unimplemented"); switch (regs.query.query_get.operation) { - case Regs::QueryOperation::Release: { - const u64 result = regs.query.query_sequence; - StampQueryResult(result, regs.query.query_get.short_query == 0); + case Regs::QueryOperation::Release: + StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); break; - } - case Regs::QueryOperation::Acquire: { - // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU - // to write a value that matches the current payload. + case Regs::QueryOperation::Acquire: + // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that + // matches the current payload. UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); break; - } case Regs::QueryOperation::Counter: { - u64 result{}; + u64 result; switch (regs.query.query_get.select) { case Regs::QuerySelect::Zero: result = 0; break; + case Regs::QuerySelect::SamplesPassed: + result = rasterizer.Query(VideoCore::QueryType::SamplesPassed); + break; default: result = 1; UNIMPLEMENTED_MSG("Unimplemented query select type {}", @@ -569,15 +573,13 @@ void Maxwell3D::ProcessQueryGet() { StampQueryResult(result, regs.query.query_get.short_query == 0); break; } - case Regs::QueryOperation::Trap: { + case Regs::QueryOperation::Trap: UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); break; - } - default: { + default: UNIMPLEMENTED_MSG("Unknown query operation"); break; } - } } void Maxwell3D::ProcessQueryCondition() { @@ -619,6 +621,17 @@ void Maxwell3D::ProcessQueryCondition() { } } +void Maxwell3D::ProcessCounterReset() { + switch (regs.counter_reset) { + case Regs::CounterReset::SampleCnt: + rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed); + break; + default: + UNIMPLEMENTED_MSG("counter_reset={}", static_cast(regs.counter_reset)); + break; + } +} + void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); const u32 increment = regs.sync_info.increment.Value(); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0a2af54e50..d21f678ed0 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -409,6 +409,27 @@ public: Linear = 1, }; + enum class CounterReset : u32 { + SampleCnt = 0x01, + Unk02 = 0x02, + Unk03 = 0x03, + Unk04 = 0x04, + EmittedPrimitives = 0x10, // Not tested + Unk11 = 0x11, + Unk12 = 0x12, + Unk13 = 0x13, + Unk15 = 0x15, + Unk16 = 0x16, + Unk17 = 0x17, + Unk18 = 0x18, + Unk1A = 0x1A, + Unk1B = 0x1B, + Unk1C = 0x1C, + Unk1D = 0x1D, + Unk1E = 0x1E, + GeneratedPrimitives = 0x1F, + }; + struct Cull { enum class FrontFace : u32 { ClockWise = 0x0900, @@ -857,7 +878,7 @@ public: BitField<7, 1, u32> c7; } clip_distance_enabled; - INSERT_UNION_PADDING_WORDS(0x1); + u32 samplecnt_enable; float point_size; @@ -865,7 +886,11 @@ public: u32 point_sprite_enable; - INSERT_UNION_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x3); + + CounterReset counter_reset; + + INSERT_UNION_PADDING_WORDS(0x1); u32 zeta_enable; @@ -1412,12 +1437,15 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); - // Writes the query result accordingly + /// Writes the query result accordingly. void StampQueryResult(u64 payload, bool long_query); - // Handles Conditional Rendering + /// Handles conditional rendering. void ProcessQueryCondition(); + /// Handles counter resets. + void ProcessCounterReset(); + /// Handles writes to syncing register. void ProcessSyncPoint(); @@ -1499,8 +1527,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); +ASSERT_REG_POSITION(samplecnt_enable, 0x545); ASSERT_REG_POSITION(point_size, 0x546); ASSERT_REG_POSITION(point_sprite_enable, 0x548); +ASSERT_REG_POSITION(counter_reset, 0x54C); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index c586cd6fec..2fc6275398 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -17,6 +17,10 @@ class MemoryManager; namespace VideoCore { +enum class QueryType { + SamplesPassed, +}; + enum class LoadCallbackStage { Prepare, Decompile, @@ -41,6 +45,12 @@ public: /// Dispatches a compute shader invocation virtual void DispatchCompute(GPUVAddr code_addr) = 0; + /// Resets the counter of a query + virtual void ResetCounter(QueryType type) = 0; + + /// Returns the value of a GPU query + virtual u64 Query(QueryType type) = 0; + /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp new file mode 100644 index 0000000000..1c7dc999ad --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -0,0 +1,59 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "video_core/renderer_opengl/gl_query_cache.h" + +namespace OpenGL { + +HostCounter::HostCounter(GLenum target) { + query.Create(target); +} + +HostCounter::~HostCounter() = default; + +void HostCounter::UpdateState(bool enabled) { + if (enabled) { + Enable(); + } else { + Disable(); + } +} + +void HostCounter::Reset() { + counter = 0; + Disable(); +} + +u64 HostCounter::Query() { + if (!is_beginned) { + return counter; + } + Disable(); + u64 value; + glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); + Enable(); + + counter += value; + return counter; +} + +void HostCounter::Enable() { + if (is_beginned) { + return; + } + is_beginned = true; + glBeginQuery(GL_SAMPLES_PASSED, query.handle); +} + +void HostCounter::Disable() { + if (!is_beginned) { + return; + } + glEndQuery(GL_SAMPLES_PASSED); + is_beginned = false; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h new file mode 100644 index 0000000000..52c6546bf9 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -0,0 +1,41 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class HostCounter final { +public: + explicit HostCounter(GLenum target); + ~HostCounter(); + + /// Enables or disables the counter as required. + void UpdateState(bool enabled); + + /// Resets the counter disabling it if needed. + void Reset(); + + /// Returns the current value of the query. + /// @note It may harm precision of future queries if the counter is not disabled. + u64 Query(); + +private: + /// Enables the counter when disabled. + void Enable(); + + /// Disables the counter when enabled. + void Disable(); + + OGLQuery query; ///< OpenGL query. + u64 counter{}; ///< Added values of the counter. + bool is_beginned{}; ///< True when the OpenGL query is beginned. +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b0eb14c8b5..8d132732a1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -547,6 +547,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); + const auto& regs = gpu.regs; + samples_passed.UpdateState(regs.samplecnt_enable); + SyncRasterizeEnable(state); SyncColorMask(); SyncFragmentColorClampState(); @@ -709,6 +712,27 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); } +void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { + switch (type) { + case VideoCore::QueryType::SamplesPassed: + samples_passed.Reset(); + break; + default: + UNIMPLEMENTED_MSG("type={}", static_cast(type)); + break; + } +} + +u64 RasterizerOpenGL::Query(VideoCore::QueryType type) { + switch (type) { + case VideoCore::QueryType::SamplesPassed: + return samples_passed.Query(); + default: + UNIMPLEMENTED_MSG("type={}", static_cast(type)); + return 1; + } +} + void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 0501f38289..32bcaf8c2e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -61,6 +62,8 @@ public: bool DrawMultiBatch(bool is_indexed) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + u64 Query(VideoCore::QueryType type) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -221,6 +224,8 @@ private: GLintptr SetupIndexBuffer(); void SetupShaders(GLenum primitive_mode); + + HostCounter samples_passed{GL_SAMPLES_PASSED}; }; } // namespace OpenGL