3
0
Fork 0
forked from suyu/suyu

Merge pull request #10942 from FernandoS27/android-is-a-pain-in-the-a--

Memory Tracking: Add mechanism to register small writes when gpu page is contested by GPU
This commit is contained in:
liamwhite 2023-07-02 11:29:01 -04:00 committed by GitHub
commit eaa62aee98
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 329 additions and 41 deletions

View file

@ -27,6 +27,7 @@
#include "core/file_sys/savedata_factory.h" #include "core/file_sys/savedata_factory.h"
#include "core/file_sys/vfs_concat.h" #include "core/file_sys/vfs_concat.h"
#include "core/file_sys/vfs_real.h" #include "core/file_sys/vfs_real.h"
#include "core/gpu_dirty_memory_manager.h"
#include "core/hid/hid_core.h" #include "core/hid/hid_core.h"
#include "core/hle/kernel/k_memory_manager.h" #include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_process.h"
@ -130,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
struct System::Impl { struct System::Impl {
explicit Impl(System& system) explicit Impl(System& system)
: kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system},
gpu_dirty_memory_write_manager{} {
memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
}
void Initialize(System& system) { void Initialize(System& system) {
device_memory = std::make_unique<Core::DeviceMemory>(); device_memory = std::make_unique<Core::DeviceMemory>();
@ -234,6 +238,8 @@ struct System::Impl {
// Setting changes may require a full system reinitialization (e.g., disabling multicore). // Setting changes may require a full system reinitialization (e.g., disabling multicore).
ReinitializeIfNecessary(system); ReinitializeIfNecessary(system);
memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
kernel.Initialize(); kernel.Initialize();
cpu_manager.Initialize(); cpu_manager.Initialize();
@ -540,6 +546,9 @@ struct System::Impl {
std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
gpu_dirty_memory_write_manager{};
}; };
System::System() : impl{std::make_unique<Impl>(*this)} {} System::System() : impl{std::make_unique<Impl>(*this)} {}
@ -629,10 +638,31 @@ void System::PrepareReschedule(const u32 core_index) {
impl->kernel.PrepareReschedule(core_index); impl->kernel.PrepareReschedule(core_index);
} }
Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() {
const std::size_t core = impl->kernel.GetCurrentHostThreadID();
return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
? core
: Core::Hardware::NUM_CPU_CORES - 1];
}
/// Provides a constant reference to the current gou dirty memory manager.
const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const {
const std::size_t core = impl->kernel.GetCurrentHostThreadID();
return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
? core
: Core::Hardware::NUM_CPU_CORES - 1];
}
size_t System::GetCurrentHostThreadID() const { size_t System::GetCurrentHostThreadID() const {
return impl->kernel.GetCurrentHostThreadID(); return impl->kernel.GetCurrentHostThreadID();
} }
void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
for (auto& manager : impl->gpu_dirty_memory_write_manager) {
manager.Gather(callback);
}
}
PerfStatsResults System::GetAndResetPerfStats() { PerfStatsResults System::GetAndResetPerfStats() {
return impl->GetAndResetPerfStats(); return impl->GetAndResetPerfStats();
} }

View file

@ -108,9 +108,10 @@ class CpuManager;
class Debugger; class Debugger;
class DeviceMemory; class DeviceMemory;
class ExclusiveMonitor; class ExclusiveMonitor;
class SpeedLimiter; class GPUDirtyMemoryManager;
class PerfStats; class PerfStats;
class Reporter; class Reporter;
class SpeedLimiter;
class TelemetrySession; class TelemetrySession;
struct PerfStatsResults; struct PerfStatsResults;
@ -225,6 +226,14 @@ public:
/// Prepare the core emulation for a reschedule /// Prepare the core emulation for a reschedule
void PrepareReschedule(u32 core_index); void PrepareReschedule(u32 core_index);
/// Provides a reference to the gou dirty memory manager.
[[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager();
/// Provides a constant reference to the current gou dirty memory manager.
[[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const;
void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
[[nodiscard]] size_t GetCurrentHostThreadID() const; [[nodiscard]] size_t GetCurrentHostThreadID() const;
/// Gets and resets core performance statistics /// Gets and resets core performance statistics

View file

@ -0,0 +1,122 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <atomic>
#include <bit>
#include <functional>
#include <mutex>
#include <utility>
#include <vector>
#include "core/memory.h"
namespace Core {
class GPUDirtyMemoryManager {
public:
GPUDirtyMemoryManager() : current{default_transform} {
back_buffer.reserve(256);
front_buffer.reserve(256);
}
~GPUDirtyMemoryManager() = default;
void Collect(VAddr address, size_t size) {
TransformAddress t = BuildTransform(address, size);
TransformAddress tmp, original;
do {
tmp = current.load(std::memory_order_acquire);
original = tmp;
if (tmp.address != t.address) {
if (IsValid(tmp.address)) {
std::scoped_lock lk(guard);
back_buffer.emplace_back(tmp);
current.exchange(t, std::memory_order_relaxed);
return;
}
tmp.address = t.address;
tmp.mask = 0;
}
if ((tmp.mask | t.mask) == tmp.mask) {
return;
}
tmp.mask |= t.mask;
} while (!current.compare_exchange_weak(original, tmp, std::memory_order_release,
std::memory_order_relaxed));
}
void Gather(std::function<void(VAddr, size_t)>& callback) {
{
std::scoped_lock lk(guard);
TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
front_buffer.swap(back_buffer);
if (IsValid(t.address)) {
front_buffer.emplace_back(t);
}
}
for (auto& transform : front_buffer) {
size_t offset = 0;
u64 mask = transform.mask;
while (mask != 0) {
const size_t empty_bits = std::countr_zero(mask);
offset += empty_bits << align_bits;
mask = mask >> empty_bits;
const size_t continuous_bits = std::countr_one(mask);
callback((static_cast<VAddr>(transform.address) << page_bits) + offset,
continuous_bits << align_bits);
mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
offset += continuous_bits << align_bits;
}
}
front_buffer.clear();
}
private:
struct alignas(8) TransformAddress {
u32 address;
u32 mask;
};
constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1;
constexpr static size_t page_size = 1ULL << page_bits;
constexpr static size_t page_mask = page_size - 1;
constexpr static size_t align_bits = 6U;
constexpr static size_t align_size = 1U << align_bits;
constexpr static size_t align_mask = align_size - 1;
constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U};
bool IsValid(VAddr address) {
return address < (1ULL << 39);
}
template <typename T>
T CreateMask(size_t top_bit, size_t minor_bit) {
T mask = ~T(0);
mask <<= (sizeof(T) * 8 - top_bit);
mask >>= (sizeof(T) * 8 - top_bit);
mask >>= minor_bit;
mask <<= minor_bit;
return mask;
}
TransformAddress BuildTransform(VAddr address, size_t size) {
const size_t minor_address = address & page_mask;
const size_t minor_bit = minor_address >> align_bits;
const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
TransformAddress result{};
result.address = static_cast<u32>(address >> page_bits);
result.mask = CreateMask<u32>(top_bit, minor_bit);
return result;
}
std::atomic<TransformAddress> current{};
std::mutex guard;
std::vector<TransformAddress> back_buffer;
std::vector<TransformAddress> front_buffer;
};
} // namespace Core

View file

@ -3,6 +3,7 @@
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
#include <span>
#include "common/assert.h" #include "common/assert.h"
#include "common/atomic_ops.h" #include "common/atomic_ops.h"
@ -13,6 +14,7 @@
#include "common/swap.h" #include "common/swap.h"
#include "core/core.h" #include "core/core.h"
#include "core/device_memory.h" #include "core/device_memory.h"
#include "core/gpu_dirty_memory_manager.h"
#include "core/hardware_properties.h" #include "core/hardware_properties.h"
#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_process.h"
@ -678,7 +680,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
GetInteger(vaddr), static_cast<u64>(data)); GetInteger(vaddr), static_cast<u64>(data));
}, },
[&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
if (ptr) { if (ptr) {
std::memcpy(ptr, &data, sizeof(T)); std::memcpy(ptr, &data, sizeof(T));
} }
@ -692,7 +694,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
}, },
[&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
if (ptr) { if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@ -707,7 +709,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
}, },
[&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); }); [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); });
if (ptr) { if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@ -717,7 +719,7 @@ struct Memory::Impl {
void HandleRasterizerDownload(VAddr address, size_t size) { void HandleRasterizerDownload(VAddr address, size_t size) {
const size_t core = system.GetCurrentHostThreadID(); const size_t core = system.GetCurrentHostThreadID();
auto& current_area = rasterizer_areas[core]; auto& current_area = rasterizer_read_areas[core];
const VAddr end_address = address + size; const VAddr end_address = address + size;
if (current_area.start_address <= address && end_address <= current_area.end_address) if (current_area.start_address <= address && end_address <= current_area.end_address)
[[likely]] { [[likely]] {
@ -726,9 +728,31 @@ struct Memory::Impl {
current_area = system.GPU().OnCPURead(address, size); current_area = system.GPU().OnCPURead(address, size);
} }
Common::PageTable* current_page_table = nullptr; void HandleRasterizerWrite(VAddr address, size_t size) {
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; const size_t core = system.GetCurrentHostThreadID();
auto& current_area = rasterizer_write_areas[core];
VAddr subaddress = address >> YUZU_PAGEBITS;
bool do_collection = current_area.last_address == subaddress;
if (!do_collection) [[unlikely]] {
do_collection = system.GPU().OnCPUWrite(address, size);
if (!do_collection) {
return;
}
current_area.last_address = subaddress;
}
gpu_dirty_managers[core].Collect(address, size);
}
struct GPUDirtyState {
VAddr last_address;
};
Core::System& system; Core::System& system;
Common::PageTable* current_page_table = nullptr;
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
rasterizer_read_areas{};
std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
}; };
Memory::Memory(Core::System& system_) : system{system_} { Memory::Memory(Core::System& system_) : system{system_} {
@ -876,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size)
impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);
} }
void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) {
impl->gpu_dirty_managers = managers;
}
Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {
return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);
} }

View file

@ -5,6 +5,7 @@
#include <cstddef> #include <cstddef>
#include <memory> #include <memory>
#include <span>
#include <string> #include <string>
#include "common/typed_address.h" #include "common/typed_address.h"
#include "core/hle/result.h" #include "core/hle/result.h"
@ -15,7 +16,8 @@ struct PageTable;
namespace Core { namespace Core {
class System; class System;
} class GPUDirtyMemoryManager;
} // namespace Core
namespace Kernel { namespace Kernel {
class PhysicalMemory; class PhysicalMemory;
@ -458,6 +460,8 @@ public:
*/ */
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
private: private:
Core::System& system; Core::System& system;

View file

@ -115,7 +115,34 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
template <class P> template <class P>
void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
memory_tracker.CachedCpuWrite(cpu_addr, size); const bool is_dirty = IsRegionRegistered(cpu_addr, size);
if (!is_dirty) {
return;
}
VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE);
VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE);
if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
WriteMemory(cpu_addr, size);
return;
}
tmp_buffer.resize_destructive(size);
cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size);
InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
}
template <class P>
bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) {
const bool is_dirty = IsRegionRegistered(cpu_addr, size);
if (!is_dirty) {
return false;
}
if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
return true;
}
WriteMemory(cpu_addr, size);
return false;
} }
template <class P> template <class P>
@ -1553,6 +1580,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
return false; return false;
} }
InlineMemoryImplementation(dest_address, copy_size, inlined_buffer);
return true;
}
template <class P>
void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
std::span<const u8> inlined_buffer) {
const IntervalType subtract_interval{dest_address, dest_address + copy_size}; const IntervalType subtract_interval{dest_address, dest_address + copy_size};
ClearDownload(subtract_interval); ClearDownload(subtract_interval);
common_ranges.subtract(subtract_interval); common_ranges.subtract(subtract_interval);
@ -1574,8 +1609,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
} else { } else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
} }
return true;
} }
template <class P> template <class P>

View file

@ -245,6 +245,8 @@ public:
void CachedWriteMemory(VAddr cpu_addr, u64 size); void CachedWriteMemory(VAddr cpu_addr, u64 size);
bool OnCPUWrite(VAddr cpu_addr, u64 size);
void DownloadMemory(VAddr cpu_addr, u64 size); void DownloadMemory(VAddr cpu_addr, u64 size);
std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
@ -543,6 +545,9 @@ private:
void ClearDownload(IntervalType subtract_interval); void ClearDownload(IntervalType subtract_interval);
void InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
std::span<const u8> inlined_buffer);
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory; Core::Memory::Memory& cpu_memory;

View file

@ -69,7 +69,6 @@ public:
} }
void SignalFence(std::function<void()>&& func) { void SignalFence(std::function<void()>&& func) {
rasterizer.InvalidateGPUCache();
bool delay_fence = Settings::IsGPULevelHigh(); bool delay_fence = Settings::IsGPULevelHigh();
if constexpr (!can_async_check) { if constexpr (!can_async_check) {
TryReleasePendingFences<false>(); TryReleasePendingFences<false>();
@ -96,6 +95,7 @@ public:
guard.unlock(); guard.unlock();
cv.notify_all(); cv.notify_all();
} }
rasterizer.InvalidateGPUCache();
} }
void SignalSyncPoint(u32 value) { void SignalSyncPoint(u32 value) {

View file

@ -95,7 +95,9 @@ struct GPU::Impl {
/// Synchronizes CPU writes with Host GPU memory. /// Synchronizes CPU writes with Host GPU memory.
void InvalidateGPUCache() { void InvalidateGPUCache() {
rasterizer->InvalidateGPUCache(); std::function<void(VAddr, size_t)> callback_writes(
[this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
system.GatherGPUDirtyMemory(callback_writes);
} }
/// Signal the ending of command list. /// Signal the ending of command list.
@ -299,6 +301,10 @@ struct GPU::Impl {
gpu_thread.InvalidateRegion(addr, size); gpu_thread.InvalidateRegion(addr, size);
} }
bool OnCPUWrite(VAddr addr, u64 size) {
return rasterizer->OnCPUWrite(addr, size);
}
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size) { void FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size); gpu_thread.FlushAndInvalidateRegion(addr, size);
@ -561,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) {
impl->InvalidateRegion(addr, size); impl->InvalidateRegion(addr, size);
} }
bool GPU::OnCPUWrite(VAddr addr, u64 size) {
return impl->OnCPUWrite(addr, size);
}
void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
impl->FlushAndInvalidateRegion(addr, size); impl->FlushAndInvalidateRegion(addr, size);
} }

View file

@ -250,6 +250,10 @@ public:
/// Notify rasterizer that any caches of the specified region should be invalidated /// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(VAddr addr, u64 size); void InvalidateRegion(VAddr addr, u64 size);
/// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
/// sensible, false otherwise
bool OnCPUWrite(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size); void FlushAndInvalidateRegion(VAddr addr, u64 size);

View file

@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
rasterizer->FlushRegion(flush->addr, flush->size); rasterizer->FlushRegion(flush->addr, flush->size);
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);
} else { } else {
ASSERT(false); ASSERT(false);
} }
@ -102,12 +102,12 @@ void ThreadManager::TickGPU() {
} }
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
rasterizer->OnCPUWrite(addr, size); rasterizer->OnCacheInvalidation(addr, size);
} }
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
rasterizer->OnCPUWrite(addr, size); rasterizer->OnCacheInvalidation(addr, size);
} }
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {

View file

@ -109,7 +109,9 @@ public:
} }
/// Notify rasterizer that any caches of the specified region are desync with guest /// Notify rasterizer that any caches of the specified region are desync with guest
virtual void OnCPUWrite(VAddr addr, u64 size) = 0; virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0;
virtual bool OnCPUWrite(VAddr addr, u64 size) = 0;
/// Sync memory between guest and host. /// Sync memory between guest and host.
virtual void InvalidateGPUCache() = 0; virtual void InvalidateGPUCache() = 0;

View file

@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp
return false; return false;
} }
void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {
return false;
}
void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {}
VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) {
VideoCore::RasterizerDownloadArea new_area{ VideoCore::RasterizerDownloadArea new_area{
.start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),

View file

@ -53,7 +53,8 @@ public:
VideoCommon::CacheType which = VideoCommon::CacheType::All) override; VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size, void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override; VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override; void OnCacheInvalidation(VAddr addr, u64 size) override;
bool OnCPUWrite(VAddr addr, u64 size) override;
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
void InvalidateGPUCache() override; void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override; void UnmapMemory(VAddr addr, u64 size) override;

View file

@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
} }
} }
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return false;
}
{
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.OnCPUWrite(addr, size)) {
return true;
}
}
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
shader_cache.InvalidateRegion(addr, size);
return false;
}
void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) { if (addr == 0 || size == 0) {
return; return;
} }
shader_cache.OnCPUWrite(addr, size);
{ {
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size); texture_cache.WriteMemory(addr, size);
@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.CachedWriteMemory(addr, size); buffer_cache.CachedWriteMemory(addr, size);
} }
shader_cache.InvalidateRegion(addr, size);
} }
void RasterizerOpenGL::InvalidateGPUCache() { void RasterizerOpenGL::InvalidateGPUCache() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); gpu.InvalidateGPUCache();
shader_cache.SyncGuestHost();
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.FlushCachedWrites();
}
} }
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size); buffer_cache.WriteMemory(addr, size);
} }
shader_cache.OnCPUWrite(addr, size); shader_cache.OnCacheInvalidation(addr, size);
} }
void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {

View file

@ -98,7 +98,8 @@ public:
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size, void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override; VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override; void OnCacheInvalidation(VAddr addr, u64 size) override;
bool OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override; void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override; void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;

View file

@ -566,11 +566,32 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s
} }
} }
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return false;
}
{
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.OnCPUWrite(addr, size)) {
return true;
}
}
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
pipeline_cache.InvalidateRegion(addr, size);
return false;
}
void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
if (addr == 0 || size == 0) { if (addr == 0 || size == 0) {
return; return;
} }
pipeline_cache.OnCPUWrite(addr, size);
{ {
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size); texture_cache.WriteMemory(addr, size);
@ -579,14 +600,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.CachedWriteMemory(addr, size); buffer_cache.CachedWriteMemory(addr, size);
} }
pipeline_cache.InvalidateRegion(addr, size);
} }
void RasterizerVulkan::InvalidateGPUCache() { void RasterizerVulkan::InvalidateGPUCache() {
pipeline_cache.SyncGuestHost(); gpu.InvalidateGPUCache();
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.FlushCachedWrites();
}
} }
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
@ -598,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size); buffer_cache.WriteMemory(addr, size);
} }
pipeline_cache.OnCPUWrite(addr, size); pipeline_cache.OnCacheInvalidation(addr, size);
} }
void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {

View file

@ -96,7 +96,8 @@ public:
void InvalidateRegion(VAddr addr, u64 size, void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override; VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
void OnCPUWrite(VAddr addr, u64 size) override; void OnCacheInvalidation(VAddr addr, u64 size) override;
bool OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override; void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override; void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;

View file

@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
RemovePendingShaders(); RemovePendingShaders();
} }
void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {
std::scoped_lock lock{invalidation_mutex}; std::scoped_lock lock{invalidation_mutex};
InvalidatePagesInRegion(addr, size); InvalidatePagesInRegion(addr, size);
} }

View file

@ -62,7 +62,7 @@ public:
/// @brief Unmarks a memory region as cached and marks it for removal /// @brief Unmarks a memory region as cached and marks it for removal
/// @param addr Start address of the CPU write operation /// @param addr Start address of the CPU write operation
/// @param size Number of bytes of the CPU write operation /// @param size Number of bytes of the CPU write operation
void OnCPUWrite(VAddr addr, size_t size); void OnCacheInvalidation(VAddr addr, size_t size);
/// @brief Flushes delayed removal operations /// @brief Flushes delayed removal operations
void SyncGuestHost(); void SyncGuestHost();