forked from suyu/suyu
Merge pull request #2237 from bunnei/cache-host-addr
gpu: Use host address for caching instead of guest address.
This commit is contained in:
commit
47b622825c
26 changed files with 393 additions and 293 deletions
|
@ -10,6 +10,7 @@
|
|||
#include "core/core.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvmap.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
@ -178,7 +179,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
|
|||
auto& gpu = system_instance.GPU();
|
||||
auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
|
||||
ASSERT(cpu_addr);
|
||||
gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
|
||||
gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size);
|
||||
|
||||
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
|
||||
|
||||
|
|
|
@ -67,8 +67,11 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
|
|||
LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
|
||||
(base + size) * PAGE_SIZE);
|
||||
|
||||
RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
|
||||
FlushMode::FlushAndInvalidate);
|
||||
// During boot, current_page_table might not be set yet, in which case we need not flush
|
||||
if (current_page_table) {
|
||||
RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
|
||||
FlushMode::FlushAndInvalidate);
|
||||
}
|
||||
|
||||
VAddr end = base + size;
|
||||
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
|
||||
|
@ -359,13 +362,13 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
|
|||
auto& gpu = system_instance.GPU();
|
||||
switch (mode) {
|
||||
case FlushMode::Flush:
|
||||
gpu.FlushRegion(overlap_start, overlap_size);
|
||||
gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
|
||||
break;
|
||||
case FlushMode::Invalidate:
|
||||
gpu.InvalidateRegion(overlap_start, overlap_size);
|
||||
gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
|
||||
break;
|
||||
case FlushMode::FlushAndInvalidate:
|
||||
gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
|
||||
gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
|
@ -48,7 +49,8 @@ void KeplerMemory::ProcessData(u32 data) {
|
|||
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
|
||||
// We do this before actually writing the new data because the destination address might contain
|
||||
// a dirty surface that will have to be written back to memory.
|
||||
Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
|
||||
system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)),
|
||||
sizeof(u32));
|
||||
|
||||
Memory::Write32(*dest_address, data);
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
|
|
|
@ -396,7 +396,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
|
|||
const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
|
||||
ASSERT_MSG(address, "Invalid GPU address");
|
||||
|
||||
Memory::Write32(*address, value);
|
||||
u8* ptr{Memory::GetPointer(*address)};
|
||||
rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
|
||||
std::memcpy(ptr, &value, sizeof(u32));
|
||||
|
||||
dirty_flags.OnMemoryWrite();
|
||||
|
||||
// Increment the current buffer position.
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
@ -92,12 +93,14 @@ void MaxwellDMA::HandleCopy() {
|
|||
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
|
||||
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
|
||||
// copying.
|
||||
Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
|
||||
Core::System::GetInstance().Renderer().Rasterizer().FlushRegion(
|
||||
ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size);
|
||||
|
||||
// We have to invalidate the destination region to evict any outdated surfaces from the
|
||||
// cache. We do this before actually writing the new data because the destination address
|
||||
// might contain a dirty surface that will have to be written back to memory.
|
||||
Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
|
||||
Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion(
|
||||
ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size);
|
||||
};
|
||||
|
||||
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
|
||||
|
|
|
@ -11,6 +11,11 @@
|
|||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
using CacheAddr = std::uintptr_t;
|
||||
inline CacheAddr ToCacheAddr(const void* host_ptr) {
|
||||
return reinterpret_cast<CacheAddr>(host_ptr);
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
@ -209,13 +214,13 @@ public:
|
|||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
|
||||
private:
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
|
|
|
@ -22,15 +22,15 @@ void GPUAsynch::SwapBuffers(
|
|||
gpu_thread.SwapBuffers(std::move(framebuffer));
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
|
||||
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
|
||||
gpu_thread.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
gpu_thread.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,9 +26,9 @@ public:
|
|||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
|
||||
private:
|
||||
GPUThread::ThreadManager gpu_thread;
|
||||
|
|
|
@ -22,15 +22,15 @@ void GPUSynch::SwapBuffers(
|
|||
renderer.SwapBuffers(std::move(framebuffer));
|
||||
}
|
||||
|
||||
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
|
||||
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
|
||||
renderer.Rasterizer().FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
renderer.Rasterizer().InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
|
|
|
@ -21,9 +21,9 @@ public:
|
|||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "core/frontend/scope_acquire_window_context.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
|
@ -13,38 +12,13 @@
|
|||
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
||||
/// Executes a single GPU thread command
|
||||
static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
|
||||
Tegra::DmaPusher& dma_pusher) {
|
||||
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
|
||||
dma_pusher.Push(std::move(submit_list->entries));
|
||||
dma_pusher.DispatchCalls();
|
||||
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
|
||||
renderer.SwapBuffers(data->framebuffer);
|
||||
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
|
||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
|
||||
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
|
||||
renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs the GPU thread
|
||||
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
|
||||
SynchState& state) {
|
||||
|
||||
MicroProfileOnThreadCreate("GpuThread");
|
||||
|
||||
auto WaitForWakeup = [&]() {
|
||||
std::unique_lock<std::mutex> lock{state.signal_mutex};
|
||||
state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
|
||||
};
|
||||
|
||||
// Wait for first GPU command before acquiring the window context
|
||||
WaitForWakeup();
|
||||
state.WaitForCommands();
|
||||
|
||||
// If emulation was stopped during disk shader loading, abort before trying to acquire context
|
||||
if (!state.is_running) {
|
||||
|
@ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
|
|||
|
||||
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
|
||||
|
||||
CommandDataContainer next;
|
||||
while (state.is_running) {
|
||||
if (!state.is_running) {
|
||||
return;
|
||||
state.WaitForCommands();
|
||||
while (!state.queue.Empty()) {
|
||||
state.queue.Pop(next);
|
||||
if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
|
||||
dma_pusher.Push(std::move(submit_list->entries));
|
||||
dma_pusher.DispatchCalls();
|
||||
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
||||
state.DecrementFramesCounter();
|
||||
renderer.SwapBuffers(std::move(data->framebuffer));
|
||||
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
|
||||
return;
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// Thread has been woken up, so make the previous write queue the next read queue
|
||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
||||
std::swap(state.push_queue, state.pop_queue);
|
||||
}
|
||||
|
||||
// Execute all of the GPU commands
|
||||
while (!state.pop_queue->empty()) {
|
||||
ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
|
||||
state.pop_queue->pop();
|
||||
}
|
||||
|
||||
state.UpdateIdleState();
|
||||
|
||||
// Signal that the GPU thread has finished processing commands
|
||||
if (state.is_idle) {
|
||||
state.idle_condition.notify_one();
|
||||
}
|
||||
|
||||
// Wait for CPU thread to send more GPU commands
|
||||
WaitForWakeup();
|
||||
}
|
||||
}
|
||||
|
||||
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
|
||||
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
|
||||
std::ref(dma_pusher), std::ref(state)},
|
||||
thread_id{thread.get_id()} {}
|
||||
std::ref(dma_pusher), std::ref(state)} {}
|
||||
|
||||
ThreadManager::~ThreadManager() {
|
||||
{
|
||||
// Notify GPU thread that a shutdown is pending
|
||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
||||
state.is_running = false;
|
||||
}
|
||||
|
||||
state.signal_condition.notify_one();
|
||||
// Notify GPU thread that a shutdown is pending
|
||||
PushCommand(EndProcessingCommand());
|
||||
thread.join();
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||
if (entries.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
PushCommand(SubmitListCommand(std::move(entries)), false, false);
|
||||
PushCommand(SubmitListCommand(std::move(entries)));
|
||||
}
|
||||
|
||||
void ThreadManager::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
|
||||
state.IncrementFramesCounter();
|
||||
PushCommand(SwapBuffersCommand(std::move(framebuffer)));
|
||||
state.WaitForFrames();
|
||||
}
|
||||
|
||||
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||
// Block the CPU when using accurate emulation
|
||||
PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
|
||||
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
|
||||
PushCommand(FlushRegionCommand(addr, size));
|
||||
}
|
||||
|
||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||
PushCommand(InvalidateRegionCommand(addr, size), true, true);
|
||||
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
if (state.queue.Empty()) {
|
||||
// It's quicker to invalidate a single region on the CPU if the queue is already empty
|
||||
renderer.Rasterizer().InvalidateRegion(addr, size);
|
||||
} else {
|
||||
PushCommand(InvalidateRegionCommand(addr, size));
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
||||
|
||||
if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
|
||||
// Execute the command synchronously on the current thread
|
||||
ExecuteCommand(&command_data, renderer, dma_pusher);
|
||||
return;
|
||||
}
|
||||
|
||||
// Push the command to the GPU thread
|
||||
state.UpdateIdleState();
|
||||
state.push_queue->emplace(command_data);
|
||||
}
|
||||
|
||||
// Signal the GPU thread that commands are pending
|
||||
state.signal_condition.notify_one();
|
||||
|
||||
if (wait_for_idle) {
|
||||
// Wait for the GPU to be idle (all commands to be executed)
|
||||
std::unique_lock<std::mutex> lock{state.idle_mutex};
|
||||
state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
|
||||
}
|
||||
void ThreadManager::PushCommand(CommandData&& command_data) {
|
||||
state.queue.Push(CommandDataContainer(std::move(command_data)));
|
||||
state.SignalCommands();
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::GPUThread
|
||||
|
|
|
@ -13,6 +13,9 @@
|
|||
#include <thread>
|
||||
#include <variant>
|
||||
|
||||
#include "common/threadsafe_queue.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
class DmaPusher;
|
||||
|
@ -24,6 +27,9 @@ class RendererBase;
|
|||
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
||||
/// Command to signal to the GPU thread that processing has ended
|
||||
struct EndProcessingCommand final {};
|
||||
|
||||
/// Command to signal to the GPU thread that a command list is ready for processing
|
||||
struct SubmitListCommand final {
|
||||
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
|
||||
|
@ -36,59 +42,110 @@ struct SwapBuffersCommand final {
|
|||
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
|
||||
: framebuffer{std::move(framebuffer)} {}
|
||||
|
||||
std::optional<const Tegra::FramebufferConfig> framebuffer;
|
||||
std::optional<Tegra::FramebufferConfig> framebuffer;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to flush a region
|
||||
struct FlushRegionCommand final {
|
||||
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
const VAddr addr;
|
||||
const u64 size;
|
||||
CacheAddr addr;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to invalidate a region
|
||||
struct InvalidateRegionCommand final {
|
||||
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
const VAddr addr;
|
||||
const u64 size;
|
||||
CacheAddr addr;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to flush and invalidate a region
|
||||
struct FlushAndInvalidateRegionCommand final {
|
||||
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
|
||||
explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
|
||||
: addr{addr}, size{size} {}
|
||||
|
||||
const VAddr addr;
|
||||
const u64 size;
|
||||
CacheAddr addr;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
||||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
|
||||
using CommandData =
|
||||
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
||||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
|
||||
|
||||
struct CommandDataContainer {
|
||||
CommandDataContainer() = default;
|
||||
|
||||
CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
|
||||
|
||||
CommandDataContainer& operator=(const CommandDataContainer& t) {
|
||||
data = std::move(t.data);
|
||||
return *this;
|
||||
}
|
||||
|
||||
CommandData data;
|
||||
};
|
||||
|
||||
/// Struct used to synchronize the GPU thread
|
||||
struct SynchState final {
|
||||
std::atomic<bool> is_running{true};
|
||||
std::atomic<bool> is_idle{true};
|
||||
std::condition_variable signal_condition;
|
||||
std::mutex signal_mutex;
|
||||
std::condition_variable idle_condition;
|
||||
std::mutex idle_mutex;
|
||||
std::atomic_bool is_running{true};
|
||||
std::atomic_int queued_frame_count{};
|
||||
std::mutex frames_mutex;
|
||||
std::mutex commands_mutex;
|
||||
std::condition_variable commands_condition;
|
||||
std::condition_variable frames_condition;
|
||||
|
||||
// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
|
||||
// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
|
||||
// empty. This allows for efficient thread-safe access, as it does not require any copies.
|
||||
|
||||
using CommandQueue = std::queue<CommandData>;
|
||||
std::array<CommandQueue, 2> command_queues;
|
||||
CommandQueue* push_queue{&command_queues[0]};
|
||||
CommandQueue* pop_queue{&command_queues[1]};
|
||||
|
||||
void UpdateIdleState() {
|
||||
std::lock_guard<std::mutex> lock{idle_mutex};
|
||||
is_idle = command_queues[0].empty() && command_queues[1].empty();
|
||||
void IncrementFramesCounter() {
|
||||
std::lock_guard<std::mutex> lock{frames_mutex};
|
||||
++queued_frame_count;
|
||||
}
|
||||
|
||||
void DecrementFramesCounter() {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{frames_mutex};
|
||||
--queued_frame_count;
|
||||
|
||||
if (queued_frame_count) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
frames_condition.notify_one();
|
||||
}
|
||||
|
||||
void WaitForFrames() {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{frames_mutex};
|
||||
if (!queued_frame_count) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the GPU to be idle (all commands to be executed)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock{frames_mutex};
|
||||
frames_condition.wait(lock, [this] { return !queued_frame_count; });
|
||||
}
|
||||
}
|
||||
|
||||
void SignalCommands() {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock{commands_mutex};
|
||||
if (queue.Empty()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
commands_condition.notify_one();
|
||||
}
|
||||
|
||||
void WaitForCommands() {
|
||||
std::unique_lock<std::mutex> lock{commands_mutex};
|
||||
commands_condition.wait(lock, [this] { return !queue.Empty(); });
|
||||
}
|
||||
|
||||
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
|
||||
CommandQueue queue;
|
||||
};
|
||||
|
||||
/// Class used to manage the GPU thread
|
||||
|
@ -105,22 +162,17 @@ public:
|
|||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
void FlushRegion(VAddr addr, u64 size);
|
||||
void FlushRegion(CacheAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
void InvalidateRegion(VAddr addr, u64 size);
|
||||
void InvalidateRegion(CacheAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
|
||||
|
||||
private:
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
|
||||
|
||||
/// Returns true if this is called by the GPU thread
|
||||
bool IsGpuThread() const {
|
||||
return std::this_thread::get_id() == thread_id;
|
||||
}
|
||||
void PushCommand(CommandData&& command_data);
|
||||
|
||||
private:
|
||||
SynchState state;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
||||
|
@ -12,14 +13,26 @@
|
|||
|
||||
#include "common/common_types.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
class RasterizerCacheObject {
|
||||
public:
|
||||
explicit RasterizerCacheObject(const u8* host_ptr)
|
||||
: host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
|
||||
|
||||
virtual ~RasterizerCacheObject();
|
||||
|
||||
CacheAddr GetCacheAddr() const {
|
||||
return cache_addr;
|
||||
}
|
||||
|
||||
const u8* GetHostPtr() const {
|
||||
return host_ptr;
|
||||
}
|
||||
|
||||
/// Gets the address of the shader in guest memory, required for cache management
|
||||
virtual VAddr GetAddr() const = 0;
|
||||
virtual VAddr GetCpuAddr() const = 0;
|
||||
|
||||
/// Gets the size of the shader in guest memory, required for cache management
|
||||
virtual std::size_t GetSizeInBytes() const = 0;
|
||||
|
@ -58,6 +71,8 @@ private:
|
|||
bool is_registered{}; ///< Whether the object is currently registered with the cache
|
||||
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
|
||||
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
|
||||
CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
|
||||
const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
|
||||
};
|
||||
|
||||
template <class T>
|
||||
|
@ -68,7 +83,9 @@ public:
|
|||
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
|
||||
|
||||
/// Write any cached resources overlapping the specified region back to memory
|
||||
void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
|
||||
void FlushRegion(CacheAddr addr, std::size_t size) {
|
||||
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||
|
||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||
for (auto& object : objects) {
|
||||
FlushObject(object);
|
||||
|
@ -76,7 +93,9 @@ public:
|
|||
}
|
||||
|
||||
/// Mark the specified region as being invalidated
|
||||
void InvalidateRegion(VAddr addr, u64 size) {
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||
|
||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||
for (auto& object : objects) {
|
||||
if (!object->IsRegistered()) {
|
||||
|
@ -89,48 +108,60 @@ public:
|
|||
|
||||
/// Invalidates everything in the cache
|
||||
void InvalidateAll() {
|
||||
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||
|
||||
while (interval_cache.begin() != interval_cache.end()) {
|
||||
Unregister(*interval_cache.begin()->second.begin());
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Tries to get an object from the cache with the specified address
|
||||
T TryGet(VAddr addr) const {
|
||||
/// Tries to get an object from the cache with the specified cache address
|
||||
T TryGet(CacheAddr addr) const {
|
||||
const auto iter = map_cache.find(addr);
|
||||
if (iter != map_cache.end())
|
||||
return iter->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
T TryGet(const void* addr) const {
|
||||
const auto iter = map_cache.find(ToCacheAddr(addr));
|
||||
if (iter != map_cache.end())
|
||||
return iter->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Register an object into the cache
|
||||
void Register(const T& object) {
|
||||
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||
|
||||
object->SetIsRegistered(true);
|
||||
interval_cache.add({GetInterval(object), ObjectSet{object}});
|
||||
map_cache.insert({object->GetAddr(), object});
|
||||
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
|
||||
map_cache.insert({object->GetCacheAddr(), object});
|
||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
|
||||
}
|
||||
|
||||
/// Unregisters an object from the cache
|
||||
void Unregister(const T& object) {
|
||||
object->SetIsRegistered(false);
|
||||
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
|
||||
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
|
||||
if (Settings::values.use_accurate_gpu_emulation) {
|
||||
FlushObject(object);
|
||||
}
|
||||
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||
|
||||
object->SetIsRegistered(false);
|
||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
|
||||
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
|
||||
map_cache.erase(object->GetAddr());
|
||||
map_cache.erase(object->GetCacheAddr());
|
||||
}
|
||||
|
||||
/// Returns a ticks counter used for tracking when cached objects were last modified
|
||||
u64 GetModifiedTicks() {
|
||||
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||
|
||||
return ++modified_ticks;
|
||||
}
|
||||
|
||||
/// Flushes the specified object, updating appropriate cache state as needed
|
||||
void FlushObject(const T& object) {
|
||||
std::lock_guard<std::recursive_mutex> lock{mutex};
|
||||
|
||||
if (!object->IsDirty()) {
|
||||
return;
|
||||
}
|
||||
|
@ -140,7 +171,7 @@ protected:
|
|||
|
||||
private:
|
||||
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
||||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
|
||||
std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
|
@ -164,17 +195,18 @@ private:
|
|||
}
|
||||
|
||||
using ObjectSet = std::set<T>;
|
||||
using ObjectCache = std::unordered_map<VAddr, T>;
|
||||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
|
||||
using ObjectCache = std::unordered_map<CacheAddr, T>;
|
||||
using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
|
||||
using ObjectInterval = typename IntervalCache::interval_type;
|
||||
|
||||
static auto GetInterval(const T& object) {
|
||||
return ObjectInterval::right_open(object->GetAddr(),
|
||||
object->GetAddr() + object->GetSizeInBytes());
|
||||
return ObjectInterval::right_open(object->GetCacheAddr(),
|
||||
object->GetCacheAddr() + object->GetSizeInBytes());
|
||||
}
|
||||
|
||||
ObjectCache map_cache;
|
||||
IntervalCache interval_cache; ///< Cache of objects
|
||||
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
std::recursive_mutex mutex;
|
||||
};
|
||||
|
|
|
@ -35,14 +35,14 @@ public:
|
|||
virtual void FlushAll() = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
/// and invalidated
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
|
||||
/// Attempt to use a faster method to perform a surface copy
|
||||
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
|
@ -63,7 +63,7 @@ public:
|
|||
}
|
||||
|
||||
/// Increase/decrease the number of object in pages touching the specified region
|
||||
virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
|
||||
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
|
||||
|
||||
/// Initialize disk cached resources for the game being emulated
|
||||
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
|
||||
|
|
|
@ -13,6 +13,11 @@
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
|
||||
std::size_t alignment, u8* host_ptr)
|
||||
: cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
|
||||
host_ptr} {}
|
||||
|
||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
|
||||
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
|
||||
|
||||
|
@ -26,11 +31,12 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
|
|||
// TODO: Figure out which size is the best for given games.
|
||||
cache &= size >= 2048;
|
||||
|
||||
const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
|
||||
if (cache) {
|
||||
auto entry = TryGet(*cpu_addr);
|
||||
auto entry = TryGet(host_ptr);
|
||||
if (entry) {
|
||||
if (entry->size >= size && entry->alignment == alignment) {
|
||||
return entry->offset;
|
||||
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
|
||||
return entry->GetOffset();
|
||||
}
|
||||
Unregister(entry);
|
||||
}
|
||||
|
@ -39,17 +45,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
|
|||
AlignBuffer(alignment);
|
||||
const GLintptr uploaded_offset = buffer_offset;
|
||||
|
||||
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
|
||||
if (!host_ptr) {
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
std::memcpy(buffer_ptr, host_ptr, size);
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
|
||||
if (cache) {
|
||||
auto entry = std::make_shared<CachedBufferEntry>();
|
||||
entry->offset = uploaded_offset;
|
||||
entry->size = size;
|
||||
entry->alignment = alignment;
|
||||
entry->addr = *cpu_addr;
|
||||
auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
|
||||
alignment, host_ptr);
|
||||
Register(entry);
|
||||
}
|
||||
|
||||
|
|
|
@ -17,22 +17,39 @@ namespace OpenGL {
|
|||
|
||||
class RasterizerOpenGL;
|
||||
|
||||
struct CachedBufferEntry final : public RasterizerCacheObject {
|
||||
VAddr GetAddr() const override {
|
||||
return addr;
|
||||
class CachedBufferEntry final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
|
||||
std::size_t alignment, u8* host_ptr);
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
GLintptr GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetAlignment() const {
|
||||
return alignment;
|
||||
}
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void Flush() override {}
|
||||
|
||||
VAddr addr;
|
||||
std::size_t size;
|
||||
GLintptr offset;
|
||||
std::size_t alignment;
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
std::size_t size{};
|
||||
GLintptr offset{};
|
||||
std::size_t alignment{};
|
||||
};
|
||||
|
||||
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
||||
|
|
|
@ -15,12 +15,13 @@
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
|
||||
CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
|
||||
: cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
|
||||
buffer.Create();
|
||||
// Bind and unbind the buffer so it gets allocated by the driver
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||
LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
|
||||
LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
|
||||
}
|
||||
|
||||
void CachedGlobalRegion::Reload(u32 size_) {
|
||||
|
@ -35,7 +36,7 @@ void CachedGlobalRegion::Reload(u32 size_) {
|
|||
|
||||
// TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
|
||||
glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
|
||||
glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
|
||||
}
|
||||
|
||||
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
|
||||
|
@ -46,11 +47,11 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
|
|||
return search->second;
|
||||
}
|
||||
|
||||
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
|
||||
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) {
|
||||
GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
|
||||
if (!region) {
|
||||
// No reserved surface available, create a new one and reserve it
|
||||
region = std::make_shared<CachedGlobalRegion>(addr, size);
|
||||
region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr);
|
||||
ReserveGlobalRegion(region);
|
||||
}
|
||||
region->Reload(size);
|
||||
|
@ -58,7 +59,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
|
|||
}
|
||||
|
||||
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
|
||||
reserve.insert_or_assign(region->GetAddr(), std::move(region));
|
||||
reserve.insert_or_assign(region->GetCpuAddr(), std::move(region));
|
||||
}
|
||||
|
||||
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
|
||||
|
@ -80,11 +81,12 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
|
|||
ASSERT(actual_addr);
|
||||
|
||||
// Look up global region in the cache based on address
|
||||
GlobalRegion region = TryGet(*actual_addr);
|
||||
const auto& host_ptr{Memory::GetPointer(*actual_addr)};
|
||||
GlobalRegion region{TryGet(host_ptr)};
|
||||
|
||||
if (!region) {
|
||||
// No global region found - create a new one
|
||||
region = GetUncachedGlobalRegion(*actual_addr, size);
|
||||
region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr);
|
||||
Register(region);
|
||||
}
|
||||
|
||||
|
|
|
@ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
|
|||
|
||||
class CachedGlobalRegion final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedGlobalRegion(VAddr addr, u32 size);
|
||||
explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
|
||||
|
||||
/// Gets the address of the shader in guest memory, required for cache management
|
||||
VAddr GetAddr() const override {
|
||||
return addr;
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
/// Gets the size of the shader in guest memory, required for cache management
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
@ -53,9 +51,8 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
VAddr addr{};
|
||||
VAddr cpu_addr{};
|
||||
u32 size{};
|
||||
|
||||
OGLBuffer buffer;
|
||||
};
|
||||
|
||||
|
@ -69,7 +66,7 @@ public:
|
|||
|
||||
private:
|
||||
GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
|
||||
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
|
||||
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr);
|
||||
void ReserveGlobalRegion(GlobalRegion region);
|
||||
|
||||
std::unordered_map<VAddr, GlobalRegion> reserve;
|
||||
|
|
|
@ -449,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
|
|||
return boost::make_iterator_range(map.equal_range(interval));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
|
||||
void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
|
||||
const u64 page_start{addr >> Memory::PAGE_BITS};
|
||||
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
|
||||
|
||||
|
@ -747,12 +747,12 @@ void RasterizerOpenGL::DrawArrays() {
|
|||
|
||||
void RasterizerOpenGL::FlushAll() {}
|
||||
|
||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
res_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
res_cache.InvalidateRegion(addr, size);
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
|
@ -760,7 +760,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
|||
buffer_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
FlushRegion(addr, size);
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
@ -782,7 +782,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
|
||||
const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
|
||||
if (!surface) {
|
||||
return {};
|
||||
}
|
||||
|
|
|
@ -57,9 +57,9 @@ public:
|
|||
void DrawArrays() override;
|
||||
void Clear() override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
|
@ -67,7 +67,7 @@ public:
|
|||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
bool AccelerateDrawBatch(bool is_indexed) override;
|
||||
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
|
||||
void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
|
|
|
@ -61,6 +61,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
|
|||
|
||||
addr = cpu_addr ? *cpu_addr : 0;
|
||||
gpu_addr = gpu_addr_;
|
||||
host_ptr = Memory::GetPointer(addr);
|
||||
size_in_bytes = SizeInBytesRaw();
|
||||
|
||||
if (IsPixelFormatASTC(pixel_format)) {
|
||||
|
@ -563,8 +564,8 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
|
|||
}
|
||||
|
||||
CachedSurface::CachedSurface(const SurfaceParams& params)
|
||||
: params(params), gl_target(SurfaceTargetToGL(params.target)),
|
||||
cached_size_in_bytes(params.size_in_bytes) {
|
||||
: params{params}, gl_target{SurfaceTargetToGL(params.target)},
|
||||
cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
|
||||
texture.Create(gl_target);
|
||||
|
||||
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
|
||||
|
@ -633,10 +634,9 @@ void CachedSurface::LoadGLBuffer() {
|
|||
const u32 bpp = params.GetFormatBpp() / 8;
|
||||
const u32 copy_size = params.width * bpp;
|
||||
if (params.pitch == copy_size) {
|
||||
std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
|
||||
params.size_in_bytes_gl);
|
||||
std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
|
||||
} else {
|
||||
const u8* start = Memory::GetPointer(params.addr);
|
||||
const u8* start{params.host_ptr};
|
||||
u8* write_to = gl_buffer[0].data();
|
||||
for (u32 h = params.height; h > 0; h--) {
|
||||
std::memcpy(write_to, start, copy_size);
|
||||
|
@ -680,8 +680,6 @@ void CachedSurface::FlushGLBuffer() {
|
|||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
|
||||
params.height, params.depth, true, true);
|
||||
const u8* const texture_src_data = Memory::GetPointer(params.addr);
|
||||
ASSERT(texture_src_data);
|
||||
if (params.is_tiled) {
|
||||
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
|
||||
params.block_width, static_cast<u32>(params.target));
|
||||
|
@ -691,9 +689,9 @@ void CachedSurface::FlushGLBuffer() {
|
|||
const u32 bpp = params.GetFormatBpp() / 8;
|
||||
const u32 copy_size = params.width * bpp;
|
||||
if (params.pitch == copy_size) {
|
||||
std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
|
||||
std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
|
||||
} else {
|
||||
u8* start = Memory::GetPointer(params.addr);
|
||||
u8* start{params.host_ptr};
|
||||
const u8* read_to = gl_buffer[0].data();
|
||||
for (u32 h = params.height; h > 0; h--) {
|
||||
std::memcpy(start, read_to, copy_size);
|
||||
|
@ -932,7 +930,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
|
|||
}
|
||||
|
||||
// Look up surface in the cache based on address
|
||||
Surface surface{TryGet(params.addr)};
|
||||
Surface surface{TryGet(params.host_ptr)};
|
||||
if (surface) {
|
||||
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
|
||||
// Use the cached surface as-is unless it's not synced with memory
|
||||
|
@ -986,7 +984,7 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
|
|||
for (u32 layer = 0; layer < dst_params.depth; layer++) {
|
||||
for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
|
||||
const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
|
||||
const Surface& copy = TryGet(sub_address);
|
||||
const Surface& copy = TryGet(Memory::GetPointer(sub_address));
|
||||
if (!copy)
|
||||
continue;
|
||||
const auto& src_params{copy->GetSurfaceParams()};
|
||||
|
@ -1163,7 +1161,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
|
|||
const auto& dst_params{dst_surface->GetSurfaceParams()};
|
||||
|
||||
// Flush enough memory for both the source and destination surface
|
||||
FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
|
||||
FlushRegion(ToCacheAddr(src_params.host_ptr),
|
||||
std::max(src_params.MemorySize(), dst_params.MemorySize()));
|
||||
|
||||
LoadSurface(dst_surface);
|
||||
}
|
||||
|
@ -1215,8 +1214,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
|
|||
return new_surface;
|
||||
}
|
||||
|
||||
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
|
||||
return TryGet(addr);
|
||||
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
|
||||
return TryGet(host_ptr);
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
|
||||
|
@ -1267,7 +1266,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
|
|||
src_params.height == dst_params.MipHeight(*level) &&
|
||||
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
|
||||
const std::optional<u32> slot =
|
||||
TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
|
||||
TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level);
|
||||
if (slot.has_value()) {
|
||||
glCopyImageSubData(render_surface->Texture().handle,
|
||||
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
|
||||
|
@ -1283,8 +1282,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
|
|||
}
|
||||
|
||||
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
|
||||
const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
|
||||
const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
|
||||
const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
|
||||
const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
|
||||
if (bound2 > bound1)
|
||||
return true;
|
||||
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
||||
|
@ -1327,7 +1326,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() {
|
|||
void RasterizerCacheOpenGL::SignalPostDrawCall() {
|
||||
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
|
||||
if (current_color_buffers[i] != nullptr) {
|
||||
Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
|
||||
Surface intersect =
|
||||
CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
|
||||
if (intersect != nullptr) {
|
||||
PartialReinterpretSurface(current_color_buffers[i], intersect);
|
||||
texception = true;
|
||||
|
|
|
@ -297,6 +297,7 @@ struct SurfaceParams {
|
|||
bool srgb_conversion;
|
||||
// Parameters used for caching
|
||||
VAddr addr;
|
||||
u8* host_ptr;
|
||||
Tegra::GPUVAddr gpu_addr;
|
||||
std::size_t size_in_bytes;
|
||||
std::size_t size_in_bytes_gl;
|
||||
|
@ -345,9 +346,9 @@ class RasterizerOpenGL;
|
|||
|
||||
class CachedSurface final : public RasterizerCacheObject {
|
||||
public:
|
||||
CachedSurface(const SurfaceParams& params);
|
||||
explicit CachedSurface(const SurfaceParams& params);
|
||||
|
||||
VAddr GetAddr() const override {
|
||||
VAddr GetCpuAddr() const override {
|
||||
return params.addr;
|
||||
}
|
||||
|
||||
|
@ -449,7 +450,7 @@ public:
|
|||
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
|
||||
|
||||
/// Tries to find a framebuffer using on the provided CPU address
|
||||
Surface TryFindFramebufferSurface(VAddr addr) const;
|
||||
Surface TryFindFramebufferSurface(const u8* host_ptr) const;
|
||||
|
||||
/// Copies the contents of one surface to another
|
||||
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
||||
|
@ -506,12 +507,12 @@ private:
|
|||
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
|
||||
Surface last_depth_buffer;
|
||||
|
||||
using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
|
||||
using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
|
||||
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
|
||||
|
||||
static auto GetReinterpretInterval(const Surface& object) {
|
||||
return SurfaceInterval::right_open(object->GetAddr() + 1,
|
||||
object->GetAddr() + object->GetMemorySize() - 1);
|
||||
return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
|
||||
object->GetCacheAddr() + object->GetMemorySize() - 1);
|
||||
}
|
||||
|
||||
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
|
||||
|
@ -523,7 +524,7 @@ private:
|
|||
reinterpret_surface->MarkReinterpreted();
|
||||
}
|
||||
|
||||
Surface CollideOnReinterpretedSurface(VAddr addr) const {
|
||||
Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
|
||||
const SurfaceInterval interval{addr};
|
||||
for (auto& pair :
|
||||
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
|
||||
|
|
|
@ -42,9 +42,9 @@ VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
|
|||
}
|
||||
|
||||
/// Gets the shader program code from memory for the specified address
|
||||
ProgramCode GetShaderCode(VAddr addr) {
|
||||
ProgramCode GetShaderCode(const u8* host_ptr) {
|
||||
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
|
||||
Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
|
||||
std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
|
||||
return program_code;
|
||||
}
|
||||
|
||||
|
@ -214,12 +214,13 @@ std::set<GLenum> GetSupportedFormats() {
|
|||
|
||||
} // namespace
|
||||
|
||||
CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
||||
ShaderDiskCacheOpenGL& disk_cache,
|
||||
CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
|
||||
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
|
||||
const PrecompiledPrograms& precompiled_programs,
|
||||
ProgramCode&& program_code, ProgramCode&& program_code_b)
|
||||
: addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
|
||||
disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
|
||||
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
|
||||
: host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier},
|
||||
program_type{program_type}, disk_cache{disk_cache},
|
||||
precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
|
||||
|
||||
const std::size_t code_size = CalculateProgramSize(program_code);
|
||||
const std::size_t code_size_b =
|
||||
|
@ -243,12 +244,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro
|
|||
disk_cache.SaveRaw(raw);
|
||||
}
|
||||
|
||||
CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
||||
ShaderDiskCacheOpenGL& disk_cache,
|
||||
CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
|
||||
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
|
||||
const PrecompiledPrograms& precompiled_programs,
|
||||
GLShader::ProgramResult result)
|
||||
: addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
|
||||
disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
|
||||
GLShader::ProgramResult result, u8* host_ptr)
|
||||
: guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type},
|
||||
disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
|
||||
host_ptr} {
|
||||
|
||||
code = std::move(result.first);
|
||||
entries = result.second;
|
||||
|
@ -271,7 +273,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
|
|||
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
||||
}
|
||||
|
||||
LabelGLObject(GL_PROGRAM, program->handle, addr);
|
||||
LabelGLObject(GL_PROGRAM, program->handle, guest_addr);
|
||||
}
|
||||
|
||||
handle = program->handle;
|
||||
|
@ -323,7 +325,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
|
|||
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
||||
}
|
||||
|
||||
LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
|
||||
LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name);
|
||||
|
||||
return target_program->handle;
|
||||
};
|
||||
|
@ -489,14 +491,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
|||
const VAddr program_addr{GetShaderAddress(program)};
|
||||
|
||||
// Look up shader in the cache based on address
|
||||
Shader shader{TryGet(program_addr)};
|
||||
const auto& host_ptr{Memory::GetPointer(program_addr)};
|
||||
Shader shader{TryGet(host_ptr)};
|
||||
|
||||
if (!shader) {
|
||||
// No shader found - create a new one
|
||||
ProgramCode program_code = GetShaderCode(program_addr);
|
||||
const auto& host_ptr{Memory::GetPointer(program_addr)};
|
||||
ProgramCode program_code{GetShaderCode(host_ptr)};
|
||||
ProgramCode program_code_b;
|
||||
if (program == Maxwell::ShaderProgram::VertexA) {
|
||||
program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
|
||||
program_code_b = GetShaderCode(
|
||||
Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
|
||||
}
|
||||
const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
|
||||
|
||||
|
@ -504,11 +509,11 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
|||
if (found != precompiled_shaders.end()) {
|
||||
shader =
|
||||
std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
|
||||
precompiled_programs, found->second);
|
||||
precompiled_programs, found->second, host_ptr);
|
||||
} else {
|
||||
shader = std::make_shared<CachedShader>(
|
||||
program_addr, unique_identifier, program, disk_cache, precompiled_programs,
|
||||
std::move(program_code), std::move(program_code_b));
|
||||
std::move(program_code), std::move(program_code_b), host_ptr);
|
||||
}
|
||||
Register(shader);
|
||||
}
|
||||
|
|
|
@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
|
|||
|
||||
class CachedShader final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
||||
ShaderDiskCacheOpenGL& disk_cache,
|
||||
explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
|
||||
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
|
||||
const PrecompiledPrograms& precompiled_programs,
|
||||
ProgramCode&& program_code, ProgramCode&& program_code_b);
|
||||
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
|
||||
|
||||
explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
||||
ShaderDiskCacheOpenGL& disk_cache,
|
||||
explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
|
||||
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
|
||||
const PrecompiledPrograms& precompiled_programs,
|
||||
GLShader::ProgramResult result);
|
||||
GLShader::ProgramResult result, u8* host_ptr);
|
||||
|
||||
VAddr GetAddr() const override {
|
||||
return addr;
|
||||
VAddr GetCpuAddr() const override {
|
||||
return guest_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
|
@ -91,7 +91,8 @@ private:
|
|||
|
||||
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
|
||||
|
||||
VAddr addr{};
|
||||
u8* host_ptr{};
|
||||
VAddr guest_addr{};
|
||||
u64 unique_identifier{};
|
||||
Maxwell::ShaderProgram program_type{};
|
||||
ShaderDiskCacheOpenGL& disk_cache;
|
||||
|
|
|
@ -17,6 +17,11 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
|
||||
std::size_t alignment, u8* host_ptr)
|
||||
: cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
|
||||
host_ptr} {}
|
||||
|
||||
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
|
||||
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
|
||||
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
|
||||
|
@ -37,16 +42,18 @@ VKBufferCache::~VKBufferCache() = default;
|
|||
u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
|
||||
bool cache) {
|
||||
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
|
||||
ASSERT(cpu_addr);
|
||||
ASSERT_MSG(cpu_addr, "Invalid GPU address");
|
||||
|
||||
// Cache management is a big overhead, so only cache entries with a given size.
|
||||
// TODO: Figure out which size is the best for given games.
|
||||
cache &= size >= 2048;
|
||||
|
||||
const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
|
||||
if (cache) {
|
||||
if (auto entry = TryGet(*cpu_addr); entry) {
|
||||
if (entry->size >= size && entry->alignment == alignment) {
|
||||
return entry->offset;
|
||||
auto entry = TryGet(host_ptr);
|
||||
if (entry) {
|
||||
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
|
||||
return entry->GetOffset();
|
||||
}
|
||||
Unregister(entry);
|
||||
}
|
||||
|
@ -55,17 +62,17 @@ u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64
|
|||
AlignBuffer(alignment);
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
|
||||
if (!host_ptr) {
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
std::memcpy(buffer_ptr, host_ptr, size);
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
|
||||
if (cache) {
|
||||
auto entry = std::make_shared<CachedBufferEntry>();
|
||||
entry->offset = uploaded_offset;
|
||||
entry->size = size;
|
||||
entry->alignment = alignment;
|
||||
entry->addr = *cpu_addr;
|
||||
auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
|
||||
alignment, host_ptr);
|
||||
Register(entry);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,22 +24,39 @@ class VKFence;
|
|||
class VKMemoryManager;
|
||||
class VKStreamBuffer;
|
||||
|
||||
struct CachedBufferEntry final : public RasterizerCacheObject {
|
||||
VAddr GetAddr() const override {
|
||||
return addr;
|
||||
class CachedBufferEntry final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
|
||||
u8* host_ptr);
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
u64 GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetAlignment() const {
|
||||
return alignment;
|
||||
}
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void Flush() override {}
|
||||
|
||||
VAddr addr;
|
||||
std::size_t size;
|
||||
u64 offset;
|
||||
std::size_t alignment;
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
std::size_t size{};
|
||||
u64 offset{};
|
||||
std::size_t alignment{};
|
||||
};
|
||||
|
||||
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
||||
|
|
Loading…
Reference in a new issue