forked from suyu/suyu
Query Cache: Fix guest side sample counting
This commit is contained in:
parent
282ae8fa51
commit
2fea1b8407
5 changed files with 97 additions and 46 deletions
|
@ -586,12 +586,6 @@ void Maxwell3D::ProcessQueryCondition() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessCounterReset() {
|
void Maxwell3D::ProcessCounterReset() {
|
||||||
#if ANDROID
|
|
||||||
if (!Settings::IsGPULevelHigh()) {
|
|
||||||
// This is problematic on Android, disable on GPU Normal.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
switch (regs.clear_report_value) {
|
switch (regs.clear_report_value) {
|
||||||
case Regs::ClearReport::ZPassPixelCount:
|
case Regs::ClearReport::ZPassPixelCount:
|
||||||
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
|
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
|
||||||
|
|
|
@ -9,16 +9,15 @@
|
||||||
namespace VideoCommon {
|
namespace VideoCommon {
|
||||||
|
|
||||||
enum class QueryFlagBits : u32 {
|
enum class QueryFlagBits : u32 {
|
||||||
HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
|
HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
|
||||||
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
|
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
|
||||||
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
|
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
|
||||||
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
|
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
|
||||||
IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
|
IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
|
||||||
IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
|
IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
|
||||||
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
|
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
|
||||||
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
|
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
|
||||||
IsFence = 1 << 8, ///< Indicates the query is a fence.
|
IsFence = 1 << 8, ///< Indicates the query is a fence.
|
||||||
IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment
|
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
|
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
|
||||||
|
|
||||||
|
|
|
@ -256,30 +256,32 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
||||||
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
|
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
|
||||||
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
|
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
|
||||||
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
|
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
|
||||||
std::function<void()> operation(
|
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
|
||||||
[this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] {
|
pointer, pointer_timestamp] {
|
||||||
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
|
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
|
||||||
if (!is_synced) [[likely]] {
|
|
||||||
impl->pending_unregister.push_back(query_location);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
|
|
||||||
UNREACHABLE();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
|
|
||||||
u64 timestamp = impl->gpu.GetTicks();
|
|
||||||
std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp));
|
|
||||||
std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
|
|
||||||
} else {
|
|
||||||
u32 value = static_cast<u32>(query_base->value);
|
|
||||||
std::memcpy(pointer, &value, sizeof(value));
|
|
||||||
}
|
|
||||||
if (!is_synced) [[likely]] {
|
if (!is_synced) [[likely]] {
|
||||||
impl->pending_unregister.push_back(query_location);
|
impl->pending_unregister.push_back(query_location);
|
||||||
}
|
}
|
||||||
});
|
return;
|
||||||
|
}
|
||||||
|
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
|
||||||
|
UNREACHABLE();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
query_base->value += streamer->GetAmmendValue();
|
||||||
|
streamer->SetAccumulationValue(query_base->value);
|
||||||
|
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
|
||||||
|
u64 timestamp = impl->gpu.GetTicks();
|
||||||
|
std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp));
|
||||||
|
std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
|
||||||
|
} else {
|
||||||
|
u32 value = static_cast<u32>(query_base->value);
|
||||||
|
std::memcpy(pointer, &value, sizeof(value));
|
||||||
|
}
|
||||||
|
if (!is_synced) [[likely]] {
|
||||||
|
impl->pending_unregister.push_back(query_location);
|
||||||
|
}
|
||||||
|
});
|
||||||
if (is_fence) {
|
if (is_fence) {
|
||||||
impl->rasterizer.SignalFence(std::move(operation));
|
impl->rasterizer.SignalFence(std::move(operation));
|
||||||
} else {
|
} else {
|
||||||
|
@ -354,9 +356,9 @@ void QueryCacheBase<Traits>::NotifySegment(bool resume) {
|
||||||
if (resume) {
|
if (resume) {
|
||||||
impl->runtime.ResumeHostConditionalRendering();
|
impl->runtime.ResumeHostConditionalRendering();
|
||||||
} else {
|
} else {
|
||||||
impl->runtime.PauseHostConditionalRendering();
|
|
||||||
CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
|
CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
|
||||||
CounterClose(VideoCommon::QueryType::StreamingByteCount);
|
CounterClose(VideoCommon::QueryType::StreamingByteCount);
|
||||||
|
impl->runtime.PauseHostConditionalRendering();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -78,6 +78,14 @@ public:
|
||||||
return dependence_mask;
|
return dependence_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 GetAmmendValue() const {
|
||||||
|
return ammend_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetAccumulationValue(u64 new_value) {
|
||||||
|
acumulation_value = new_value;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void MakeDependent(StreamerInterface* depend_on) {
|
void MakeDependent(StreamerInterface* depend_on) {
|
||||||
dependence_mask |= 1ULL << depend_on->id;
|
dependence_mask |= 1ULL << depend_on->id;
|
||||||
|
@ -87,6 +95,8 @@ protected:
|
||||||
const size_t id;
|
const size_t id;
|
||||||
u64 dependence_mask;
|
u64 dependence_mask;
|
||||||
u64 dependent_mask;
|
u64 dependent_mask;
|
||||||
|
u64 ammend_value{};
|
||||||
|
u64 acumulation_value{};
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename QueryType>
|
template <typename QueryType>
|
||||||
|
|
|
@ -110,13 +110,16 @@ struct HostSyncValues {
|
||||||
|
|
||||||
class SamplesStreamer : public BaseStreamer {
|
class SamplesStreamer : public BaseStreamer {
|
||||||
public:
|
public:
|
||||||
explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
|
explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_,
|
||||||
|
VideoCore::RasterizerInterface* rasterizer_, const Device& device_,
|
||||||
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
|
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
|
||||||
: BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
|
: BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_},
|
||||||
memory_allocator{memory_allocator_} {
|
scheduler{scheduler_}, memory_allocator{memory_allocator_} {
|
||||||
BuildResolveBuffer();
|
BuildResolveBuffer();
|
||||||
current_bank = nullptr;
|
current_bank = nullptr;
|
||||||
current_query = nullptr;
|
current_query = nullptr;
|
||||||
|
ammend_value = 0;
|
||||||
|
acumulation_value = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
~SamplesStreamer() = default;
|
~SamplesStreamer() = default;
|
||||||
|
@ -151,6 +154,11 @@ public:
|
||||||
PauseCounter();
|
PauseCounter();
|
||||||
}
|
}
|
||||||
AbandonCurrentQuery();
|
AbandonCurrentQuery();
|
||||||
|
std::function<void()> func([this, counts = pending_flush_queries.size()] {
|
||||||
|
ammend_value = 0;
|
||||||
|
acumulation_value = 0;
|
||||||
|
});
|
||||||
|
rasterizer->SyncOperation(std::move(func));
|
||||||
}
|
}
|
||||||
|
|
||||||
void CloseCounter() override {
|
void CloseCounter() override {
|
||||||
|
@ -244,7 +252,7 @@ public:
|
||||||
}
|
}
|
||||||
if (query->size_slots > 1) {
|
if (query->size_slots > 1) {
|
||||||
// This is problematic.
|
// This is problematic.
|
||||||
UNIMPLEMENTED();
|
// UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
|
query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
|
||||||
auto loc_data = offsets[query->start_bank_id];
|
auto loc_data = offsets[query->start_bank_id];
|
||||||
|
@ -255,16 +263,20 @@ public:
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ReplicateCurrentQueryIfNeeded();
|
||||||
|
std::function<void()> func([this] { ammend_value = acumulation_value; });
|
||||||
|
rasterizer->SyncOperation(std::move(func));
|
||||||
AbandonCurrentQuery();
|
AbandonCurrentQuery();
|
||||||
pending_sync.clear();
|
pending_sync.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
|
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
|
||||||
[[maybe_unused]] std::optional<u32> subreport) override {
|
[[maybe_unused]] std::optional<u32> subreport) override {
|
||||||
|
PauseCounter();
|
||||||
auto index = BuildQuery();
|
auto index = BuildQuery();
|
||||||
auto* new_query = GetQuery(index);
|
auto* new_query = GetQuery(index);
|
||||||
new_query->guest_address = address;
|
new_query->guest_address = address;
|
||||||
new_query->value = 100;
|
new_query->value = 0;
|
||||||
new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
|
new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
|
||||||
if (has_timestamp) {
|
if (has_timestamp) {
|
||||||
new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
|
new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
|
||||||
|
@ -291,6 +303,7 @@ public:
|
||||||
|
|
||||||
void PushUnsyncedQueries() override {
|
void PushUnsyncedQueries() override {
|
||||||
PauseCounter();
|
PauseCounter();
|
||||||
|
current_bank->Close();
|
||||||
{
|
{
|
||||||
std::scoped_lock lk(flush_guard);
|
std::scoped_lock lk(flush_guard);
|
||||||
pending_flush_sets.emplace_back(std::move(pending_flush_queries));
|
pending_flush_sets.emplace_back(std::move(pending_flush_queries));
|
||||||
|
@ -429,6 +442,34 @@ private:
|
||||||
current_query_id = 0;
|
current_query_id = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ReplicateCurrentQueryIfNeeded() {
|
||||||
|
if (pending_sync.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!current_query) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto index = BuildQuery();
|
||||||
|
auto* new_query = GetQuery(index);
|
||||||
|
new_query->guest_address = 0;
|
||||||
|
new_query->value = 0;
|
||||||
|
new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
|
||||||
|
new_query->start_bank_id = current_query->start_bank_id;
|
||||||
|
new_query->size_banks = current_query->size_banks;
|
||||||
|
new_query->start_slot = current_query->start_slot;
|
||||||
|
new_query->size_slots = current_query->size_slots;
|
||||||
|
ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) {
|
||||||
|
bank->AddReference(amount);
|
||||||
|
});
|
||||||
|
pending_flush_queries.push_back(index);
|
||||||
|
std::function<void()> func([this, index] {
|
||||||
|
auto* query = GetQuery(index);
|
||||||
|
query->value += GetAmmendValue();
|
||||||
|
SetAccumulationValue(query->value);
|
||||||
|
Free(index);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void BuildResolveBuffer() {
|
void BuildResolveBuffer() {
|
||||||
const VkBufferCreateInfo buffer_ci = {
|
const VkBufferCreateInfo buffer_ci = {
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
|
@ -448,6 +489,7 @@ private:
|
||||||
static constexpr size_t resolve_slots = 8;
|
static constexpr size_t resolve_slots = 8;
|
||||||
|
|
||||||
QueryCacheRuntime& runtime;
|
QueryCacheRuntime& runtime;
|
||||||
|
VideoCore::RasterizerInterface* rasterizer;
|
||||||
const Device& device;
|
const Device& device;
|
||||||
Scheduler& scheduler;
|
Scheduler& scheduler;
|
||||||
const MemoryAllocator& memory_allocator;
|
const MemoryAllocator& memory_allocator;
|
||||||
|
@ -470,6 +512,7 @@ private:
|
||||||
size_t current_query_id;
|
size_t current_query_id;
|
||||||
VideoCommon::HostQueryBase* current_query;
|
VideoCommon::HostQueryBase* current_query;
|
||||||
bool has_started{};
|
bool has_started{};
|
||||||
|
bool current_unset{};
|
||||||
std::mutex flush_guard;
|
std::mutex flush_guard;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -677,7 +720,6 @@ public:
|
||||||
size_t offset_base = staging_ref.offset;
|
size_t offset_base = staging_ref.offset;
|
||||||
for (auto q : pending_flush_queries) {
|
for (auto q : pending_flush_queries) {
|
||||||
auto* query = GetQuery(q);
|
auto* query = GetQuery(q);
|
||||||
query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush;
|
|
||||||
auto& bank = bank_pool.GetBank(query->start_bank_id);
|
auto& bank = bank_pool.GetBank(query->start_bank_id);
|
||||||
bank.Sync(staging_ref, offset_base, query->start_slot, 1);
|
bank.Sync(staging_ref, offset_base, query->start_slot, 1);
|
||||||
offset_base += TFBQueryBank::QUERY_SIZE;
|
offset_base += TFBQueryBank::QUERY_SIZE;
|
||||||
|
@ -1047,8 +1089,8 @@ struct QueryCacheRuntimeImpl {
|
||||||
buffer_cache{buffer_cache_}, device{device_},
|
buffer_cache{buffer_cache_}, device{device_},
|
||||||
memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
|
memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
|
||||||
guest_streamer(0, runtime),
|
guest_streamer(0, runtime),
|
||||||
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, device,
|
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
|
||||||
scheduler, memory_allocator),
|
device, scheduler, memory_allocator),
|
||||||
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
|
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
|
||||||
scheduler, memory_allocator, staging_pool),
|
scheduler, memory_allocator, staging_pool),
|
||||||
primitives_succeeded_streamer(
|
primitives_succeeded_streamer(
|
||||||
|
@ -1277,6 +1319,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!is_in_bc[0] && !is_in_bc[1]) {
|
||||||
|
// Both queries are in query cache, it's best to just flush.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
|
HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue