forked from suyu/suyu
GPU-SMMU: Estimate game leak and preallocate device region.
This commit is contained in:
parent
96fd1348ae
commit
0adc09e0af
9 changed files with 329 additions and 27 deletions
|
@ -611,6 +611,8 @@ add_library(core STATIC
|
|||
hle/service/ns/pdm_qry.h
|
||||
hle/service/nvdrv/core/container.cpp
|
||||
hle/service/nvdrv/core/container.h
|
||||
hle/service/nvdrv/core/heap_mapper.cpp
|
||||
hle/service/nvdrv/core/heap_mapper.h
|
||||
hle/service/nvdrv/core/nvmap.cpp
|
||||
hle/service/nvdrv/core/nvmap.h
|
||||
hle/service/nvdrv/core/syncpoint_manager.cpp
|
||||
|
|
|
@ -20,10 +20,10 @@ namespace Core {
|
|||
|
||||
namespace {
|
||||
|
||||
class PhysicalAddressContainer {
|
||||
class MultiAddressContainer {
|
||||
public:
|
||||
PhysicalAddressContainer() = default;
|
||||
~PhysicalAddressContainer() = default;
|
||||
MultiAddressContainer() = default;
|
||||
~MultiAddressContainer() = default;
|
||||
|
||||
void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
|
||||
buffer.resize(8);
|
||||
|
@ -145,7 +145,7 @@ struct DeviceMemoryManagerAllocator {
|
|||
std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator>
|
||||
pin_allocator;
|
||||
Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
|
||||
PhysicalAddressContainer multi_dev_address;
|
||||
MultiAddressContainer multi_dev_address;
|
||||
|
||||
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
|
||||
template <bool pin_area>
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "core/hle/kernel/k_process.h"
|
||||
#include "core/hle/service/nvdrv/core/container.h"
|
||||
#include "core/hle/service/nvdrv/core/heap_mapper.h"
|
||||
#include "core/hle/service/nvdrv/core/nvmap.h"
|
||||
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
|
||||
#include "core/memory.h"
|
||||
|
@ -36,6 +37,14 @@ Container::~Container() = default;
|
|||
|
||||
size_t Container::OpenSession(Kernel::KProcess* process) {
|
||||
std::scoped_lock lk(impl->session_guard);
|
||||
for (auto& session : impl->sessions) {
|
||||
if (!session.is_active) {
|
||||
continue;
|
||||
}
|
||||
if (session.process == process) {
|
||||
return session.id;
|
||||
}
|
||||
}
|
||||
size_t new_id{};
|
||||
auto* memory_interface = &process->GetMemory();
|
||||
auto& smmu = impl->host1x.MemoryManager();
|
||||
|
@ -48,16 +57,65 @@ size_t Container::OpenSession(Kernel::KProcess* process) {
|
|||
impl->sessions.emplace_back(new_id, process, smmu_id);
|
||||
new_id = impl->new_ids++;
|
||||
}
|
||||
LOG_CRITICAL(Debug, "Created Session {}", new_id);
|
||||
auto& session = impl->sessions[new_id];
|
||||
session.is_active = true;
|
||||
// Optimization
|
||||
if (process->IsApplication()) {
|
||||
auto& page_table = process->GetPageTable().GetBasePageTable();
|
||||
auto heap_start = page_table.GetHeapRegionStart();
|
||||
|
||||
Kernel::KProcessAddress cur_addr = heap_start;
|
||||
size_t region_size = 0;
|
||||
VAddr region_start = 0;
|
||||
while (true) {
|
||||
Kernel::KMemoryInfo mem_info{};
|
||||
Kernel::Svc::PageInfo page_info{};
|
||||
R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info),
|
||||
cur_addr));
|
||||
auto svc_mem_info = mem_info.GetSvcMemoryInfo();
|
||||
|
||||
// check if this memory block is heap
|
||||
if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) {
|
||||
if (svc_mem_info.size > region_size) {
|
||||
region_size = svc_mem_info.size;
|
||||
region_start = svc_mem_info.base_address;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we're done.
|
||||
const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size;
|
||||
if (next_address <= GetInteger(cur_addr)) {
|
||||
break;
|
||||
}
|
||||
|
||||
cur_addr = next_address;
|
||||
}
|
||||
session.has_preallocated_area = false;
|
||||
auto start_region = (region_size >> 15) >= 1024 ? smmu.Allocate(region_size) : 0;
|
||||
if (start_region != 0) {
|
||||
session.mapper = std::make_unique<HeapMapper>(region_start, start_region, region_size,
|
||||
smmu_id, impl->host1x);
|
||||
session.has_preallocated_area = true;
|
||||
LOG_CRITICAL(Debug, "Preallocation created!");
|
||||
}
|
||||
}
|
||||
return new_id;
|
||||
}
|
||||
|
||||
void Container::CloseSession(size_t id) {
|
||||
std::scoped_lock lk(impl->session_guard);
|
||||
auto& session = impl->sessions[id];
|
||||
auto& smmu = impl->host1x.MemoryManager();
|
||||
if (session.has_preallocated_area) {
|
||||
const DAddr region_start = session.mapper->GetRegionStart();
|
||||
const size_t region_size = session.mapper->GetRegionSize();
|
||||
session.mapper.reset();
|
||||
smmu.Free(region_start, region_size);
|
||||
session.has_preallocated_area = false;
|
||||
}
|
||||
session.is_active = false;
|
||||
smmu.UnregisterProcess(impl->sessions[id].smmu_id);
|
||||
impl->id_pool.emplace_front(id);
|
||||
LOG_CRITICAL(Debug, "Closed Session {}", id);
|
||||
}
|
||||
|
||||
Session* Container::GetSession(size_t id) {
|
||||
|
|
|
@ -20,6 +20,7 @@ class Host1x;
|
|||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
||||
class HeapMapper;
|
||||
class NvMap;
|
||||
class SyncpointManager;
|
||||
|
||||
|
@ -29,6 +30,9 @@ struct Session {
|
|||
size_t id;
|
||||
Kernel::KProcess* process;
|
||||
size_t smmu_id;
|
||||
bool has_preallocated_area{};
|
||||
std::unique_ptr<HeapMapper> mapper{};
|
||||
bool is_active{};
|
||||
};
|
||||
|
||||
class Container {
|
||||
|
|
172
src/core/hle/service/nvdrv/core/heap_mapper.cpp
Normal file
172
src/core/hle/service/nvdrv/core/heap_mapper.cpp
Normal file
|
@ -0,0 +1,172 @@
|
|||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#define BOOST_NO_MT
|
||||
#include <boost/pool/detail/mutex.hpp>
|
||||
#undef BOOST_NO_MT
|
||||
#include <boost/icl/interval.hpp>
|
||||
#include <boost/icl/interval_base_set.hpp>
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include <boost/icl/split_interval_map.hpp>
|
||||
#include <boost/pool/pool.hpp>
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include <boost/pool/poolfwd.hpp>
|
||||
|
||||
#include "core/hle/service/nvdrv/core/heap_mapper.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
|
||||
namespace boost {
|
||||
template <typename T>
|
||||
class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
|
||||
}
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
||||
using IntervalCompare = std::less<DAddr>;
|
||||
using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
|
||||
using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
|
||||
using IntervalSet = boost::icl::interval_set<DAddr>;
|
||||
using IntervalType = typename IntervalSet::interval_type;
|
||||
|
||||
template <typename Type>
|
||||
struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
|
||||
// types
|
||||
typedef counter_add_functor<Type> type;
|
||||
typedef boost::icl::identity_based_inplace_combine<Type> base_type;
|
||||
|
||||
// public member functions
|
||||
void operator()(Type& current, const Type& added) const {
|
||||
current += added;
|
||||
if (current < base_type::identity_element()) {
|
||||
current = base_type::identity_element();
|
||||
}
|
||||
}
|
||||
|
||||
// public static functions
|
||||
static void version(Type&){};
|
||||
};
|
||||
|
||||
using OverlapCombine = counter_add_functor<int>;
|
||||
using OverlapSection = boost::icl::inter_section<int>;
|
||||
using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
|
||||
|
||||
struct HeapMapper::HeapMapperInternal {
|
||||
HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {}
|
||||
~HeapMapperInternal() = default;
|
||||
|
||||
template <typename Func>
|
||||
void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
|
||||
Func&& func) {
|
||||
const DAddr start_address = cpu_addr;
|
||||
const DAddr end_address = start_address + size;
|
||||
const IntervalType search_interval{start_address, end_address};
|
||||
auto it = current_range.lower_bound(search_interval);
|
||||
if (it == current_range.end()) {
|
||||
return;
|
||||
}
|
||||
auto end_it = current_range.upper_bound(search_interval);
|
||||
for (; it != end_it; it++) {
|
||||
auto& inter = it->first;
|
||||
DAddr inter_addr_end = inter.upper();
|
||||
DAddr inter_addr = inter.lower();
|
||||
if (inter_addr_end > end_address) {
|
||||
inter_addr_end = end_address;
|
||||
}
|
||||
if (inter_addr < start_address) {
|
||||
inter_addr = start_address;
|
||||
}
|
||||
func(inter_addr, inter_addr_end, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveEachInOverlapCounter(OverlapCounter& current_range,
|
||||
const IntervalType search_interval, int subtract_value) {
|
||||
bool any_removals = false;
|
||||
current_range.add(std::make_pair(search_interval, subtract_value));
|
||||
do {
|
||||
any_removals = false;
|
||||
auto it = current_range.lower_bound(search_interval);
|
||||
if (it == current_range.end()) {
|
||||
return;
|
||||
}
|
||||
auto end_it = current_range.upper_bound(search_interval);
|
||||
for (; it != end_it; it++) {
|
||||
if (it->second <= 0) {
|
||||
any_removals = true;
|
||||
current_range.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (any_removals);
|
||||
}
|
||||
|
||||
IntervalSet base_set;
|
||||
OverlapCounter mapping_overlaps;
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory;
|
||||
std::mutex guard;
|
||||
};
|
||||
|
||||
HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id,
|
||||
Tegra::Host1x::Host1x& host1x)
|
||||
: m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_smmu_id{smmu_id} {
|
||||
m_internal = std::make_unique<HeapMapperInternal>(host1x);
|
||||
}
|
||||
|
||||
HeapMapper::~HeapMapper() {
|
||||
m_internal->device_memory.Unmap(m_daddress, m_size);
|
||||
}
|
||||
|
||||
DAddr HeapMapper::Map(VAddr start, size_t size) {
|
||||
std::scoped_lock lk(m_internal->guard);
|
||||
m_internal->base_set.clear();
|
||||
const IntervalType interval{start, start + size};
|
||||
m_internal->base_set.insert(interval);
|
||||
m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int){
|
||||
const IntervalType other{start_addr, end_addr};
|
||||
m_internal->base_set.subtract(other);
|
||||
});
|
||||
if (!m_internal->base_set.empty()) {
|
||||
auto it = m_internal->base_set.begin();
|
||||
auto end_it = m_internal->base_set.end();
|
||||
for (; it != end_it; it++) {
|
||||
const VAddr inter_addr_end = it->upper();
|
||||
const VAddr inter_addr = it->lower();
|
||||
const size_t offset = inter_addr - m_vaddress;
|
||||
const size_t sub_size = inter_addr_end - inter_addr;
|
||||
m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size, m_smmu_id);
|
||||
}
|
||||
}
|
||||
m_internal->mapping_overlaps += std::make_pair(interval, 1);
|
||||
m_internal->base_set.clear();
|
||||
return m_daddress + (start - m_vaddress);
|
||||
}
|
||||
|
||||
void HeapMapper::Unmap(VAddr start, size_t size) {
|
||||
std::scoped_lock lk(m_internal->guard);
|
||||
m_internal->base_set.clear();
|
||||
m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int value) {
|
||||
if (value <= 1) {
|
||||
const IntervalType other{start_addr, end_addr};
|
||||
m_internal->base_set.insert(other);
|
||||
}
|
||||
});
|
||||
if (!m_internal->base_set.empty()) {
|
||||
auto it = m_internal->base_set.begin();
|
||||
auto end_it = m_internal->base_set.end();
|
||||
for (; it != end_it; it++) {
|
||||
const VAddr inter_addr_end = it->upper();
|
||||
const VAddr inter_addr = it->lower();
|
||||
const size_t offset = inter_addr - m_vaddress;
|
||||
const size_t sub_size = inter_addr_end - inter_addr;
|
||||
m_internal->device_memory.Unmap(m_daddress + offset, sub_size);
|
||||
}
|
||||
}
|
||||
const IntervalType to_remove{start, start + size};
|
||||
m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1);
|
||||
m_internal->base_set.clear();
|
||||
}
|
||||
|
||||
} // namespace Service::Nvidia::NvCore
|
48
src/core/hle/service/nvdrv/core/heap_mapper.h
Normal file
48
src/core/hle/service/nvdrv/core/heap_mapper.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Host1x {
|
||||
class Host1x;
|
||||
} // namespace Tegra::Host1x
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
||||
class HeapMapper {
|
||||
public:
|
||||
HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id,
|
||||
Tegra::Host1x::Host1x& host1x);
|
||||
~HeapMapper();
|
||||
|
||||
bool IsInBounds(VAddr start, size_t size) const {
|
||||
VAddr end = start + size;
|
||||
return start >= m_vaddress && end <= (m_vaddress + m_size);
|
||||
}
|
||||
|
||||
DAddr Map(VAddr start, size_t size);
|
||||
|
||||
void Unmap(VAddr start, size_t size);
|
||||
|
||||
DAddr GetRegionStart() const {
|
||||
return m_daddress;
|
||||
}
|
||||
|
||||
size_t GetRegionSize() const {
|
||||
return m_size;
|
||||
}
|
||||
|
||||
private:
|
||||
struct HeapMapperInternal;
|
||||
VAddr m_vaddress;
|
||||
DAddr m_daddress;
|
||||
size_t m_size;
|
||||
size_t m_smmu_id;
|
||||
std::unique_ptr<HeapMapperInternal> m_internal;
|
||||
};
|
||||
|
||||
} // namespace Service::Nvidia::NvCore
|
|
@ -8,10 +8,12 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/hle/service/nvdrv/core/container.h"
|
||||
#include "core/hle/service/nvdrv/core/heap_mapper.h"
|
||||
#include "core/hle/service/nvdrv/core/nvmap.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
|
||||
|
||||
using Core::Memory::YUZU_PAGESIZE;
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
@ -90,10 +92,19 @@ void NvMap::UnmapHandle(Handle& handle_description) {
|
|||
}
|
||||
|
||||
// Free and unmap the handle from the SMMU
|
||||
const size_t map_size = handle_description.aligned_size;
|
||||
if (!handle_description.in_heap) {
|
||||
auto& smmu = host1x.MemoryManager();
|
||||
smmu.Unmap(handle_description.d_address, handle_description.aligned_size);
|
||||
smmu.Free(handle_description.d_address, static_cast<size_t>(handle_description.aligned_size));
|
||||
smmu.Unmap(handle_description.d_address, map_size);
|
||||
smmu.Free(handle_description.d_address, static_cast<size_t>(map_size));
|
||||
handle_description.d_address = 0;
|
||||
return;
|
||||
}
|
||||
const VAddr vaddress = handle_description.address;
|
||||
auto* session = core.GetSession(handle_description.session_id);
|
||||
session->mapper->Unmap(vaddress, map_size);
|
||||
handle_description.d_address = 0;
|
||||
handle_description.in_heap = false;
|
||||
}
|
||||
|
||||
bool NvMap::TryRemoveHandle(const Handle& handle_description) {
|
||||
|
@ -188,7 +199,14 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
|
|||
DAddr address{};
|
||||
auto& smmu = host1x.MemoryManager();
|
||||
auto* session = core.GetSession(session_id);
|
||||
while ((address = smmu.Allocate(handle_description->aligned_size)) == 0) {
|
||||
const VAddr vaddress = handle_description->address;
|
||||
const size_t map_size = handle_description->aligned_size;
|
||||
handle_description->session_id = session_id;
|
||||
if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) {
|
||||
handle_description->d_address = session->mapper->Map(vaddress, map_size);
|
||||
handle_description->in_heap = true;
|
||||
} else {
|
||||
while ((address = smmu.Allocate(map_size)) == 0) {
|
||||
// Free handles until the allocation succeeds
|
||||
std::scoped_lock queueLock(unmap_queue_lock);
|
||||
if (auto freeHandleDesc{unmap_queue.front()}) {
|
||||
|
@ -203,9 +221,9 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
|
|||
}
|
||||
|
||||
handle_description->d_address = address;
|
||||
|
||||
smmu.Map(address, handle_description->address, handle_description->aligned_size,
|
||||
session->smmu_id);
|
||||
smmu.Map(address, vaddress, map_size, session->smmu_id);
|
||||
handle_description->in_heap = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (low_area_pin) {
|
||||
|
|
|
@ -70,6 +70,8 @@ public:
|
|||
|
||||
u8 kind{}; //!< Used for memory compression
|
||||
bool allocated{}; //!< If the handle has been allocated with `Alloc`
|
||||
bool in_heap{};
|
||||
size_t session_id{};
|
||||
|
||||
DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
|
||||
//!< this can also be in the nvdrv tmem
|
||||
|
|
|
@ -34,8 +34,6 @@
|
|||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
|
||||
#pragma optimize("", off)
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
struct GPU::Impl {
|
||||
|
|
Loading…
Reference in a new issue