forked from suyu/suyu
video_core: Cache GPU internal writes.
This commit is contained in:
parent
b78328f19a
commit
6c7eb81f7d
10 changed files with 185 additions and 30 deletions
|
@ -85,6 +85,7 @@ add_library(video_core STATIC
|
||||||
gpu.h
|
gpu.h
|
||||||
gpu_thread.cpp
|
gpu_thread.cpp
|
||||||
gpu_thread.h
|
gpu_thread.h
|
||||||
|
invalidation_accumulator.h
|
||||||
memory_manager.cpp
|
memory_manager.cpp
|
||||||
memory_manager.h
|
memory_manager.h
|
||||||
precompiled_headers.h
|
precompiled_headers.h
|
||||||
|
|
|
@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
|
||||||
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
|
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
|
||||||
x_elements, regs.line_count, regs.dest.BlockHeight(),
|
x_elements, regs.line_count, regs.dest.BlockHeight(),
|
||||||
regs.dest.BlockDepth(), regs.line_length_in);
|
regs.dest.BlockDepth(), regs.line_length_in);
|
||||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -485,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessQueryGet() {
|
void Maxwell3D::ProcessQueryGet() {
|
||||||
// TODO(Subv): Support the other query units.
|
|
||||||
if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
|
|
||||||
LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (regs.report_semaphore.query.operation) {
|
switch (regs.report_semaphore.query.operation) {
|
||||||
case Regs::ReportSemaphore::Operation::Release:
|
case Regs::ReportSemaphore::Operation::Release:
|
||||||
if (regs.report_semaphore.query.short_query != 0) {
|
if (regs.report_semaphore.query.short_query != 0) {
|
||||||
|
@ -649,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
|
||||||
|
|
||||||
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
|
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
|
||||||
const size_t copy_size = amount * sizeof(u32);
|
const size_t copy_size = amount * sizeof(u32);
|
||||||
memory_manager.WriteBlock(address, start_base, copy_size);
|
memory_manager.WriteBlockCached(address, start_base, copy_size);
|
||||||
|
|
||||||
// Increment the current buffer position.
|
// Increment the current buffer position.
|
||||||
regs.const_buffer.offset += static_cast<u32>(copy_size);
|
regs.const_buffer.offset += static_cast<u32>(copy_size);
|
||||||
|
|
|
@ -69,7 +69,7 @@ void MaxwellDMA::Launch() {
|
||||||
if (launch.multi_line_enable) {
|
if (launch.multi_line_enable) {
|
||||||
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||||
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||||
|
memory_manager.FlushCaching();
|
||||||
if (!is_src_pitch && !is_dst_pitch) {
|
if (!is_src_pitch && !is_dst_pitch) {
|
||||||
// If both the source and the destination are in block layout, assert.
|
// If both the source and the destination are in block layout, assert.
|
||||||
CopyBlockLinearToBlockLinear();
|
CopyBlockLinearToBlockLinear();
|
||||||
|
@ -104,6 +104,7 @@ void MaxwellDMA::Launch() {
|
||||||
reinterpret_cast<u8*>(tmp_buffer.data()),
|
reinterpret_cast<u8*>(tmp_buffer.data()),
|
||||||
regs.line_length_in * sizeof(u32));
|
regs.line_length_in * sizeof(u32));
|
||||||
} else {
|
} else {
|
||||||
|
memory_manager.FlushCaching();
|
||||||
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
|
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
|
||||||
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
|
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
|
||||||
((address & 0x180) >> 1) | ((address & 0x20) << 3);
|
((address & 0x180) >> 1) | ((address & 0x20) << 3);
|
||||||
|
@ -121,7 +122,7 @@ void MaxwellDMA::Launch() {
|
||||||
memory_manager.ReadBlockUnsafe(
|
memory_manager.ReadBlockUnsafe(
|
||||||
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
|
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
|
||||||
tmp_buffer.data(), tmp_buffer.size());
|
tmp_buffer.data(), tmp_buffer.size());
|
||||||
memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(),
|
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
|
||||||
tmp_buffer.size());
|
tmp_buffer.size());
|
||||||
}
|
}
|
||||||
} else if (is_src_pitch && !is_dst_pitch) {
|
} else if (is_src_pitch && !is_dst_pitch) {
|
||||||
|
@ -132,7 +133,7 @@ void MaxwellDMA::Launch() {
|
||||||
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
|
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
|
||||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
|
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
|
||||||
tmp_buffer.size());
|
tmp_buffer.size());
|
||||||
memory_manager.WriteBlock(
|
memory_manager.WriteBlockCached(
|
||||||
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
|
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
|
||||||
tmp_buffer.data(), tmp_buffer.size());
|
tmp_buffer.data(), tmp_buffer.size());
|
||||||
}
|
}
|
||||||
|
@ -141,7 +142,7 @@ void MaxwellDMA::Launch() {
|
||||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||||
regs.line_length_in);
|
regs.line_length_in);
|
||||||
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
|
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
|
||||||
regs.line_length_in);
|
regs.line_length_in);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||||
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||||
regs.pitch_out);
|
regs.pitch_out);
|
||||||
|
|
||||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||||
|
@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||||
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||||
regs.pitch_in);
|
regs.pitch_in);
|
||||||
|
|
||||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||||
|
@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||||
regs.src_params.block_size.height, regs.src_params.block_size.depth,
|
regs.src_params.block_size.height, regs.src_params.block_size.depth,
|
||||||
regs.pitch_out);
|
regs.pitch_out);
|
||||||
|
|
||||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
||||||
|
@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
||||||
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
|
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
|
||||||
dst.block_size.height, dst.block_size.depth, pitch);
|
dst.block_size.height, dst.block_size.depth, pitch);
|
||||||
|
|
||||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MaxwellDMA::ReleaseSemaphore() {
|
void MaxwellDMA::ReleaseSemaphore() {
|
||||||
|
|
78
src/video_core/invalidation_accumulator.h
Normal file
78
src/video_core/invalidation_accumulator.h
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace VideoCommon {
|
||||||
|
|
||||||
|
class InvalidationAccumulator {
|
||||||
|
public:
|
||||||
|
InvalidationAccumulator() = default;
|
||||||
|
~InvalidationAccumulator() = default;
|
||||||
|
|
||||||
|
void Add(GPUVAddr address, size_t size) {
|
||||||
|
const auto reset_values = [&]() {
|
||||||
|
if (has_collected) {
|
||||||
|
buffer.emplace_back(start_address, accumulated_size);
|
||||||
|
}
|
||||||
|
start_address = address;
|
||||||
|
accumulated_size = size;
|
||||||
|
last_collection = start_address + size;
|
||||||
|
};
|
||||||
|
if (address >= start_address && address + size <= last_collection) [[likely]] {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
size = (address + size + atomicy_side_mask) & atomicy_mask - address;
|
||||||
|
address = address & atomicy_mask;
|
||||||
|
if (!has_collected) [[unlikely]] {
|
||||||
|
reset_values();
|
||||||
|
has_collected = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (address != last_collection) [[unlikely]] {
|
||||||
|
reset_values();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
accumulated_size += size;
|
||||||
|
last_collection += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clear() {
|
||||||
|
buffer.clear();
|
||||||
|
start_address = 0;
|
||||||
|
last_collection = 0;
|
||||||
|
has_collected = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AnyAccumulated() const {
|
||||||
|
return has_collected;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
void Callback(Func&& func) {
|
||||||
|
if (!has_collected) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buffer.emplace_back(start_address, accumulated_size);
|
||||||
|
for (auto& [address, size] : buffer) {
|
||||||
|
func(address, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr size_t atomicy_bits = 5;
|
||||||
|
static constexpr size_t atomicy_size = 1ULL << atomicy_bits;
|
||||||
|
static constexpr size_t atomicy_side_mask = atomicy_size - 1;
|
||||||
|
static constexpr size_t atomicy_mask = ~atomicy_side_mask;
|
||||||
|
GPUVAddr start_address{};
|
||||||
|
GPUVAddr last_collection{};
|
||||||
|
size_t accumulated_size{};
|
||||||
|
bool has_collected{};
|
||||||
|
std::vector<std::pair<VAddr, size_t>> buffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCommon
|
|
@ -11,6 +11,7 @@
|
||||||
#include "core/hle/kernel/k_page_table.h"
|
#include "core/hle/kernel/k_page_table.h"
|
||||||
#include "core/hle/kernel/k_process.h"
|
#include "core/hle/kernel/k_process.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
#include "video_core/invalidation_accumulator.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
|
@ -26,7 +27,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
|
||||||
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
|
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
|
||||||
page_bits != big_page_bits ? page_bits : 0},
|
page_bits != big_page_bits ? page_bits : 0},
|
||||||
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
|
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
|
||||||
1, std::memory_order_acq_rel)} {
|
1, std::memory_order_acq_rel)},
|
||||||
|
accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
|
||||||
address_space_size = 1ULL << address_space_bits;
|
address_space_size = 1ULL << address_space_bits;
|
||||||
page_size = 1ULL << page_bits;
|
page_size = 1ULL << page_bits;
|
||||||
page_mask = page_size - 1ULL;
|
page_mask = page_size - 1ULL;
|
||||||
|
@ -185,15 +187,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
|
GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
|
||||||
|
|
||||||
for (const auto& [map_addr, map_size] : submapped_ranges) {
|
for (const auto& [map_addr, map_size] : page_stash) {
|
||||||
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
rasterizer->UnmapMemory(map_addr, map_size);
|
||||||
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
|
|
||||||
ASSERT(cpu_addr);
|
|
||||||
|
|
||||||
rasterizer->UnmapMemory(*cpu_addr, map_size);
|
|
||||||
}
|
}
|
||||||
|
page_stash.clear();
|
||||||
|
|
||||||
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||||
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||||
|
@ -454,6 +453,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
|
||||||
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
|
||||||
|
std::size_t size) {
|
||||||
|
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
||||||
|
accumulator->Add(gpu_dest_addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
|
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
|
||||||
VideoCommon::CacheType which) const {
|
VideoCommon::CacheType which) const {
|
||||||
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
|
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
|
||||||
|
@ -663,7 +668,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
|
||||||
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||||
GPUVAddr gpu_addr, std::size_t size) const {
|
GPUVAddr gpu_addr, std::size_t size) const {
|
||||||
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
|
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
|
||||||
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
|
GetSubmappedRangeImpl<true>(gpu_addr, size, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool is_gpu_address>
|
||||||
|
void MemoryManager::GetSubmappedRangeImpl(
|
||||||
|
GPUVAddr gpu_addr, std::size_t size,
|
||||||
|
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
|
||||||
|
result) const {
|
||||||
|
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
|
||||||
|
last_segment{};
|
||||||
std::optional<VAddr> old_page_addr{};
|
std::optional<VAddr> old_page_addr{};
|
||||||
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
|
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
|
||||||
[[maybe_unused]] std::size_t offset,
|
[[maybe_unused]] std::size_t offset,
|
||||||
|
@ -685,8 +700,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||||
}
|
}
|
||||||
old_page_addr = {cpu_addr_base + copy_amount};
|
old_page_addr = {cpu_addr_base + copy_amount};
|
||||||
if (!last_segment) {
|
if (!last_segment) {
|
||||||
|
if constexpr (is_gpu_address) {
|
||||||
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
|
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
|
||||||
last_segment = {new_base_addr, copy_amount};
|
last_segment = {new_base_addr, copy_amount};
|
||||||
|
} else {
|
||||||
|
last_segment = {cpu_addr_base, copy_amount};
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
last_segment->second += copy_amount;
|
last_segment->second += copy_amount;
|
||||||
}
|
}
|
||||||
|
@ -703,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||||
}
|
}
|
||||||
old_page_addr = {cpu_addr_base + copy_amount};
|
old_page_addr = {cpu_addr_base + copy_amount};
|
||||||
if (!last_segment) {
|
if (!last_segment) {
|
||||||
|
if constexpr (is_gpu_address) {
|
||||||
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
|
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
|
||||||
last_segment = {new_base_addr, copy_amount};
|
last_segment = {new_base_addr, copy_amount};
|
||||||
|
} else {
|
||||||
|
last_segment = {cpu_addr_base, copy_amount};
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
last_segment->second += copy_amount;
|
last_segment->second += copy_amount;
|
||||||
}
|
}
|
||||||
|
@ -715,7 +738,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||||
};
|
};
|
||||||
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
|
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
|
||||||
split(0, 0, 0);
|
split(0, 0, 0);
|
||||||
return result;
|
}
|
||||||
|
|
||||||
|
void MemoryManager::FlushCaching() {
|
||||||
|
if (!accumulator->AnyAccumulated()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
accumulator->Callback([this](GPUVAddr addr, size_t size) {
|
||||||
|
GetSubmappedRangeImpl<false>(addr, size, page_stash);
|
||||||
|
});
|
||||||
|
rasterizer->InnerInvalidation(page_stash);
|
||||||
|
page_stash.clear();
|
||||||
|
accumulator->Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
|
@ -19,6 +19,10 @@ namespace VideoCore {
|
||||||
class RasterizerInterface;
|
class RasterizerInterface;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace VideoCommon {
|
||||||
|
class InvalidationAccumulator;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
class DeviceMemory;
|
class DeviceMemory;
|
||||||
namespace Memory {
|
namespace Memory {
|
||||||
|
@ -80,6 +84,7 @@ public:
|
||||||
*/
|
*/
|
||||||
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
|
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
|
||||||
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||||
|
void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if a gpu region can be simply read with a pointer.
|
* Checks if a gpu region can be simply read with a pointer.
|
||||||
|
@ -129,6 +134,8 @@ public:
|
||||||
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
|
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
|
||||||
size_t max_size = std::numeric_limits<size_t>::max()) const;
|
size_t max_size = std::numeric_limits<size_t>::max()) const;
|
||||||
|
|
||||||
|
void FlushCaching();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
|
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
|
||||||
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
|
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
|
||||||
|
@ -154,6 +161,12 @@ private:
|
||||||
inline bool IsBigPageContinous(size_t big_page_index) const;
|
inline bool IsBigPageContinous(size_t big_page_index) const;
|
||||||
inline void SetBigPageContinous(size_t big_page_index, bool value);
|
inline void SetBigPageContinous(size_t big_page_index, bool value);
|
||||||
|
|
||||||
|
template <bool is_gpu_address>
|
||||||
|
void GetSubmappedRangeImpl(
|
||||||
|
GPUVAddr gpu_addr, std::size_t size,
|
||||||
|
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
|
||||||
|
result) const;
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
Core::Memory::Memory& memory;
|
Core::Memory::Memory& memory;
|
||||||
Core::DeviceMemory& device_memory;
|
Core::DeviceMemory& device_memory;
|
||||||
|
@ -201,10 +214,12 @@ private:
|
||||||
Common::VirtualBuffer<u32> big_page_table_cpu;
|
Common::VirtualBuffer<u32> big_page_table_cpu;
|
||||||
|
|
||||||
std::vector<u64> big_page_continous;
|
std::vector<u64> big_page_continous;
|
||||||
|
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
|
||||||
|
|
||||||
constexpr static size_t continous_bits = 64;
|
constexpr static size_t continous_bits = 64;
|
||||||
|
|
||||||
const size_t unique_identifier;
|
const size_t unique_identifier;
|
||||||
|
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
|
||||||
|
|
||||||
static std::atomic<size_t> unique_identifier_generator;
|
static std::atomic<size_t> unique_identifier_generator;
|
||||||
};
|
};
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <span>
|
#include <span>
|
||||||
|
#include <utility>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
#include "video_core/cache_types.h"
|
#include "video_core/cache_types.h"
|
||||||
|
@ -95,6 +96,12 @@ public:
|
||||||
virtual void InvalidateRegion(VAddr addr, u64 size,
|
virtual void InvalidateRegion(VAddr addr, u64 size,
|
||||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
|
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
|
||||||
|
|
||||||
|
virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
|
||||||
|
for (const auto [cpu_addr, size] : sequences) {
|
||||||
|
InvalidateRegion(cpu_addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region are desync with guest
|
/// Notify rasterizer that any caches of the specified region are desync with guest
|
||||||
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
|
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
|
|
|
@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||||
|
|
||||||
SCOPE_EXIT({ gpu.TickWork(); });
|
SCOPE_EXIT({ gpu.TickWork(); });
|
||||||
FlushWork();
|
FlushWork();
|
||||||
|
gpu_memory->FlushCaching();
|
||||||
|
|
||||||
query_cache.UpdateCounters();
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
|
@ -393,6 +394,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
||||||
|
|
||||||
void RasterizerVulkan::DispatchCompute() {
|
void RasterizerVulkan::DispatchCompute() {
|
||||||
FlushWork();
|
FlushWork();
|
||||||
|
gpu_memory->FlushCaching();
|
||||||
|
|
||||||
ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
|
ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
|
||||||
if (!pipeline) {
|
if (!pipeline) {
|
||||||
|
@ -481,6 +483,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
|
for (const auto [addr, size] : sequences) {
|
||||||
|
texture_cache.WriteMemory(addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{buffer_cache.mutex};
|
||||||
|
for (const auto [addr, size] : sequences) {
|
||||||
|
buffer_cache.WriteMemory(addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
for (const auto [addr, size] : sequences) {
|
||||||
|
query_cache.InvalidateRegion(addr, size);
|
||||||
|
pipeline_cache.InvalidateRegion(addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -79,6 +79,7 @@ public:
|
||||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||||
void InvalidateRegion(VAddr addr, u64 size,
|
void InvalidateRegion(VAddr addr, u64 size,
|
||||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||||
|
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
|
||||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
void InvalidateGPUCache() override;
|
void InvalidateGPUCache() override;
|
||||||
void UnmapMemory(VAddr addr, u64 size) override;
|
void UnmapMemory(VAddr addr, u64 size) override;
|
||||||
|
|
Loading…
Reference in a new issue