1
0
Fork 0
forked from suyu/suyu

vk_query_cache: Implement generic query cache on Vulkan

This commit is contained in:
ReinUsesLisp 2020-02-11 18:59:44 -03:00
parent c31382ced5
commit bcd348f238
11 changed files with 327 additions and 20 deletions

View file

@ -180,6 +180,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_memory_manager.h renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_query_cache.cpp
renderer_vulkan/vk_query_cache.h
renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_renderpass_cache.cpp renderer_vulkan/vk_renderpass_cache.cpp

View file

@ -88,7 +88,8 @@ private:
std::shared_ptr<HostCounter> last; std::shared_ptr<HostCounter> last;
}; };
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
class QueryPool>
class QueryCacheBase { class QueryCacheBase {
public: public:
explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
@ -127,15 +128,25 @@ public:
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() { void UpdateCounters() {
std::unique_lock lock{mutex};
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = system.GPU().Maxwell3D().regs;
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
} }
/// Resets a counter to zero. It doesn't disable the query after resetting. /// Resets a counter to zero. It doesn't disable the query after resetting.
void ResetCounter(VideoCore::QueryType type) { void ResetCounter(VideoCore::QueryType type) {
std::unique_lock lock{mutex};
Stream(type).Reset(); Stream(type).Reset();
} }
/// Disable all active streams. Expected to be called at the end of a command buffer.
void DisableStreams() {
std::unique_lock lock{mutex};
for (auto& stream : streams) {
stream.Update(false);
}
}
/// Returns a new host counter. /// Returns a new host counter.
std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
VideoCore::QueryType type) { VideoCore::QueryType type) {
@ -148,6 +159,9 @@ public:
return streams[static_cast<std::size_t>(type)]; return streams[static_cast<std::size_t>(type)];
} }
protected:
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
private: private:
/// Flushes a memory range to guest memory and removes it from the cache. /// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
@ -213,8 +227,16 @@ private:
template <class QueryCache, class HostCounter> template <class QueryCache, class HostCounter>
class HostCounterBase { class HostCounterBase {
public: public:
explicit HostCounterBase(std::shared_ptr<HostCounter> dependency) explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
: dependency{std::move(dependency)} {} : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
// Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
static constexpr u64 depth_threshold = 96;
if (depth > depth_threshold) {
depth = 0;
base_result = dependency->Query();
dependency = nullptr;
}
}
/// Returns the current value of the query. /// Returns the current value of the query.
u64 Query() { u64 Query() {
@ -222,9 +244,10 @@ public:
return *result; return *result;
} }
u64 value = BlockingQuery(); u64 value = BlockingQuery() + base_result;
if (dependency) { if (dependency) {
value += dependency->Query(); value += dependency->Query();
dependency = nullptr;
} }
return *(result = value); return *(result = value);
@ -235,6 +258,10 @@ public:
return result.has_value(); return result.has_value();
} }
u64 Depth() const noexcept {
return depth;
}
protected: protected:
/// Returns the value of query from the backend API blocking as needed. /// Returns the value of query from the backend API blocking as needed.
virtual u64 BlockingQuery() const = 0; virtual u64 BlockingQuery() const = 0;
@ -242,6 +269,8 @@ protected:
private: private:
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
std::optional<u64> result; ///< Filled with the already returned value. std::optional<u64> result; ///< Filled with the already returned value.
u64 depth; ///< Number of nested dependencies.
u64 base_result = 0; ///< Equivalent to nested dependencies value.
}; };
template <class HostCounter> template <class HostCounter>

View file

@ -31,15 +31,16 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace } // Anonymous namespace
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
: VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, : VideoCommon::QueryCacheBase<
HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>( QueryCache, CachedQuery, CounterStream, HostCounter,
gl_rasterizer)}, std::vector<OGLQuery>>{system,
static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
gl_rasterizer{gl_rasterizer} {} gl_rasterizer{gl_rasterizer} {}
QueryCache::~QueryCache() = default; QueryCache::~QueryCache() = default;
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
auto& reserve = queries_reserve[static_cast<std::size_t>(type)]; auto& reserve = query_pools[static_cast<std::size_t>(type)];
OGLQuery query; OGLQuery query;
if (reserve.empty()) { if (reserve.empty()) {
query.Create(GetTarget(type)); query.Create(GetTarget(type));
@ -52,7 +53,7 @@ OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
} }
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query)); query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
} }
bool QueryCache::AnyCommandQueued() const noexcept { bool QueryCache::AnyCommandQueued() const noexcept {

View file

@ -6,12 +6,8 @@
#include <array> #include <array>
#include <memory> #include <memory>
#include <optional>
#include <unordered_map>
#include <vector> #include <vector>
#include <glad/glad.h>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/query_cache.h" #include "video_core/query_cache.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
@ -30,8 +26,8 @@ class RasterizerOpenGL;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { HostCounter, std::vector<OGLQuery>> {
public: public:
explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
~QueryCache(); ~QueryCache();
@ -44,7 +40,6 @@ public:
private: private:
RasterizerOpenGL& gl_rasterizer; RasterizerOpenGL& gl_rasterizer;
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
}; };
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
@ -59,7 +54,7 @@ private:
u64 BlockingQuery() const override; u64 BlockingQuery() const override;
QueryCache& cache; QueryCache& cache;
VideoCore::QueryType type; const VideoCore::QueryType type;
OGLQuery query; OGLQuery query;
}; };

View file

@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.depthBiasClamp = true; features.depthBiasClamp = true;
features.geometryShader = true; features.geometryShader = true;
features.tessellationShader = true; features.tessellationShader = true;
features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true; features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true; features.shaderImageGatherExtended = true;
features.shaderStorageImageWriteWithoutFormat = true; features.shaderStorageImageWriteWithoutFormat = true;
@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
bit8_storage.uniformAndStorageBuffer8BitAccess = true; bit8_storage.uniformAndStorageBuffer8BitAccess = true;
SetNext(next, bit8_storage); SetNext(next, bit8_storage);
vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
host_query_reset.hostQueryReset = true;
SetNext(next, host_query_reset);
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) { if (is_float16_supported) {
float16_int8.shaderFloat16 = true; float16_int8.shaderFloat16 = true;
@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
}; };
std::bitset<required_extensions.size()> available_extensions{}; std::bitset<required_extensions.size()> available_extensions{};
@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.geometryShader, "geometryShader"),
std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.tessellationShader, "tessellationShader"),
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat, std::make_pair(features.shaderStorageImageWriteWithoutFormat,
@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
} }
}; };
extensions.reserve(13); extensions.reserve(14);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
[[maybe_unused]] const bool nsight = [[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");

View file

@ -0,0 +1,122 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <utility>
#include <vector>
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
namespace {
constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion};
constexpr vk::QueryType GetTarget(VideoCore::QueryType type) {
return QUERY_TARGETS[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
QueryPool::~QueryPool() = default;
void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
device = &device_;
type = type_;
}
std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) {
std::size_t index;
do {
index = CommitResource(fence);
} while (usage[index]);
usage[index] = true;
return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)};
}
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
usage.resize(end);
const auto dev = device->GetLogical();
const u32 size = static_cast<u32>(end - begin);
const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {});
pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader()));
}
void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) {
const auto it =
std::find_if(std::begin(pools), std::end(pools),
[query_pool = query.first](auto& pool) { return query_pool == *pool; });
ASSERT(it != std::end(pools));
const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler)
: VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
QueryPool>{system, rasterizer},
device{device}, scheduler{scheduler} {
for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
}
}
VKQueryCache::~VKQueryCache() = default;
std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
}
void VKQueryCache::Reserve(VideoCore::QueryType type,
std::pair<vk::QueryPool, std::uint32_t> query) {
query_pools[static_cast<std::size_t>(type)].Reserve(query);
}
HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
VideoCore::QueryType type)
: VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
const auto dev = cache.Device().GetLogical();
cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) {
dev.resetQueryPoolEXT(query.first, query.second, 1, dld);
cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld);
});
}
HostCounter::~HostCounter() {
cache.Reserve(type, query);
}
void HostCounter::EndQuery() {
cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) {
cmdbuf.endQuery(query.first, query.second, dld);
});
}
u64 HostCounter::BlockingQuery() const {
if (ticks >= cache.Scheduler().Ticks()) {
cache.Scheduler().Flush();
}
const auto dev = cache.Device().GetLogical();
const auto& dld = cache.Device().GetDispatchLoader();
u64 value;
dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
return value;
}
} // namespace Vulkan

View file

@ -0,0 +1,104 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/query_cache.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class CachedQuery;
class HostCounter;
class VKDevice;
class VKQueryCache;
class VKScheduler;
using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
class QueryPool final : public VKFencedPool {
public:
explicit QueryPool();
~QueryPool() override;
void Initialize(const VKDevice& device, VideoCore::QueryType type);
std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence);
void Reserve(std::pair<vk::QueryPool, std::uint32_t> query);
protected:
void Allocate(std::size_t begin, std::size_t end) override;
private:
static constexpr std::size_t GROW_STEP = 512;
const VKDevice* device = nullptr;
VideoCore::QueryType type = {};
std::vector<UniqueQueryPool> pools;
std::vector<bool> usage;
};
class VKQueryCache final
: public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
QueryPool> {
public:
explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler);
~VKQueryCache();
std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query);
const VKDevice& Device() const noexcept {
return device;
}
VKScheduler& Scheduler() const noexcept {
return scheduler;
}
private:
const VKDevice& device;
VKScheduler& scheduler;
};
class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
public:
explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
VideoCore::QueryType type);
~HostCounter();
void EndQuery();
private:
u64 BlockingQuery() const override;
VKQueryCache& cache;
const VideoCore::QueryType type;
const std::pair<vk::QueryPool, std::uint32_t> query;
const u64 ticks;
};
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
};
} // namespace Vulkan

View file

@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
staging_pool), staging_pool),
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device) {} sampler_cache(device), query_cache(system, *this, device, scheduler) {
scheduler.SetQueryCache(query_cache);
}
RasterizerVulkan::~RasterizerVulkan() = default; RasterizerVulkan::~RasterizerVulkan() = default;
@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
FlushWork(); FlushWork();
query_cache.UpdateCounters();
const auto& gpu = system.GPU().Maxwell3D(); const auto& gpu = system.GPU().Maxwell3D();
GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
void RasterizerVulkan::Clear() { void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing); MICROPROFILE_SCOPE(Vulkan_Clearing);
query_cache.UpdateCounters();
const auto& gpu = system.GPU().Maxwell3D(); const auto& gpu = system.GPU().Maxwell3D();
if (!system.GPU().Maxwell3D().ShouldExecute()) { if (!system.GPU().Maxwell3D().ShouldExecute()) {
return; return;
@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
sampled_views.clear(); sampled_views.clear();
image_views.clear(); image_views.clear();
query_cache.UpdateCounters();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ComputePipelineCacheKey key{ const ComputePipelineCacheKey key{
code_addr, code_addr,
@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
}); });
} }
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
query_cache.ResetCounter(type);
}
void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
std::optional<u64> timestamp) {
query_cache.Query(gpu_addr, type, timestamp);
}
void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
texture_cache.FlushRegion(addr, size); texture_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size);
query_cache.FlushRegion(addr, size);
} }
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
texture_cache.InvalidateRegion(addr, size); texture_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
} }
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {

View file

@ -24,6 +24,7 @@
#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h"
@ -96,7 +97,7 @@ struct ImageView {
vk::ImageLayout* layout = nullptr; vk::ImageLayout* layout = nullptr;
}; };
class RasterizerVulkan : public VideoCore::RasterizerAccelerated { class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public: public:
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
VKScreenInfo& screen_info, const VKDevice& device, VKScreenInfo& screen_info, const VKDevice& device,
@ -108,6 +109,8 @@ public:
bool DrawMultiBatch(bool is_indexed) override; bool DrawMultiBatch(bool is_indexed) override;
void Clear() override; void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override; void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override; void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override;
@ -247,6 +250,7 @@ private:
VKPipelineCache pipeline_cache; VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache; VKBufferCache buffer_cache;
VKSamplerCache sampler_cache; VKSamplerCache sampler_cache;
VKQueryCache query_cache;
std::array<View, Maxwell::NumRenderTargets> color_attachments; std::array<View, Maxwell::NumRenderTargets> color_attachments;
View zeta_attachment; View zeta_attachment;

View file

@ -6,6 +6,7 @@
#include "common/microprofile.h" #include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
} }
void VKScheduler::AllocateNewContext() { void VKScheduler::AllocateNewContext() {
++ticks;
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
current_fence = next_fence; current_fence = next_fence;
next_fence = &resource_manager.CommitFence(); next_fence = &resource_manager.CommitFence();
@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
device.GetDispatchLoader()); device.GetDispatchLoader());
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
query_cache->UpdateCounters();
}
} }
void VKScheduler::InvalidateState() { void VKScheduler::InvalidateState() {
@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {
} }
void VKScheduler::EndPendingOperations() { void VKScheduler::EndPendingOperations() {
query_cache->DisableStreams();
EndRenderPass(); EndRenderPass();
} }

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <atomic>
#include <condition_variable> #include <condition_variable>
#include <memory> #include <memory>
#include <optional> #include <optional>
@ -18,6 +19,7 @@ namespace Vulkan {
class VKDevice; class VKDevice;
class VKFence; class VKFence;
class VKQueryCache;
class VKResourceManager; class VKResourceManager;
class VKFenceView { class VKFenceView {
@ -67,6 +69,11 @@ public:
/// Binds a pipeline to the current execution context. /// Binds a pipeline to the current execution context.
void BindGraphicsPipeline(vk::Pipeline pipeline); void BindGraphicsPipeline(vk::Pipeline pipeline);
/// Assigns the query cache.
void SetQueryCache(VKQueryCache& query_cache_) {
query_cache = &query_cache_;
}
/// Returns true when viewports have been set in the current command buffer. /// Returns true when viewports have been set in the current command buffer.
bool TouchViewports() { bool TouchViewports() {
return std::exchange(state.viewports, true); return std::exchange(state.viewports, true);
@ -112,6 +119,11 @@ public:
return current_fence; return current_fence;
} }
/// Returns the current command buffer tick.
u64 Ticks() const {
return ticks;
}
private: private:
class Command { class Command {
public: public:
@ -205,6 +217,8 @@ private:
const VKDevice& device; const VKDevice& device;
VKResourceManager& resource_manager; VKResourceManager& resource_manager;
VKQueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf; vk::CommandBuffer current_cmdbuf;
VKFence* current_fence = nullptr; VKFence* current_fence = nullptr;
VKFence* next_fence = nullptr; VKFence* next_fence = nullptr;
@ -227,6 +241,7 @@ private:
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex; std::mutex mutex;
std::condition_variable cv; std::condition_variable cv;
std::atomic<u64> ticks = 0;
bool quit = false; bool quit = false;
}; };