diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index ca54eb6c6f..0f713ead14 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -611,6 +611,8 @@ add_library(core STATIC hle/service/ns/pdm_qry.h hle/service/nvdrv/core/container.cpp hle/service/nvdrv/core/container.h + hle/service/nvdrv/core/heap_mapper.cpp + hle/service/nvdrv/core/heap_mapper.h hle/service/nvdrv/core/nvmap.cpp hle/service/nvdrv/core/nvmap.h hle/service/nvdrv/core/syncpoint_manager.cpp diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index b3a5f3d8b4..138eb5017c 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -20,10 +20,10 @@ namespace Core { namespace { -class PhysicalAddressContainer { +class MultiAddressContainer { public: - PhysicalAddressContainer() = default; - ~PhysicalAddressContainer() = default; + MultiAddressContainer() = default; + ~MultiAddressContainer() = default; void GatherValues(u32 start_entry, Common::ScratchBuffer& buffer) { buffer.resize(8); @@ -145,7 +145,7 @@ struct DeviceMemoryManagerAllocator { std::conditional_t, EmptyAllocator> pin_allocator; Common::FlatAllocator main_allocator; - PhysicalAddressContainer multi_dev_address; + MultiAddressContainer multi_dev_address; /// Returns true when vaddr -> vaddr+size is fully contained in the buffer template diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index e12ce05c13..ba7eb9e241 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp @@ -8,6 +8,7 @@ #include "core/hle/kernel/k_process.h" #include "core/hle/service/nvdrv/core/container.h" +#include "core/hle/service/nvdrv/core/heap_mapper.h" #include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/core/syncpoint_manager.h" #include "core/memory.h" @@ -36,6 +37,14 @@ Container::~Container() = default; size_t Container::OpenSession(Kernel::KProcess* process) { std::scoped_lock lk(impl->session_guard); + for (auto& session : impl->sessions) { + if (!session.is_active) { + continue; + } + if (session.process == process) { + return session.id; + } + } size_t new_id{}; auto* memory_interface = &process->GetMemory(); auto& smmu = impl->host1x.MemoryManager(); @@ -48,16 +57,65 @@ size_t Container::OpenSession(Kernel::KProcess* process) { impl->sessions.emplace_back(new_id, process, smmu_id); new_id = impl->new_ids++; } - LOG_CRITICAL(Debug, "Created Session {}", new_id); + auto& session = impl->sessions[new_id]; + session.is_active = true; + // Optimization + if (process->IsApplication()) { + auto& page_table = process->GetPageTable().GetBasePageTable(); + auto heap_start = page_table.GetHeapRegionStart(); + + Kernel::KProcessAddress cur_addr = heap_start; + size_t region_size = 0; + VAddr region_start = 0; + while (true) { + Kernel::KMemoryInfo mem_info{}; + Kernel::Svc::PageInfo page_info{}; + R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), + cur_addr)); + auto svc_mem_info = mem_info.GetSvcMemoryInfo(); + + // check if this memory block is heap + if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { + if (svc_mem_info.size > region_size) { + region_size = svc_mem_info.size; + region_start = svc_mem_info.base_address; + } + } + + // Check if we're done. + const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size; + if (next_address <= GetInteger(cur_addr)) { + break; + } + + cur_addr = next_address; + } + session.has_preallocated_area = false; + auto start_region = (region_size >> 15) >= 1024 ? smmu.Allocate(region_size) : 0; + if (start_region != 0) { + session.mapper = std::make_unique(region_start, start_region, region_size, + smmu_id, impl->host1x); + session.has_preallocated_area = true; + LOG_CRITICAL(Debug, "Preallocation created!"); + } + } return new_id; } void Container::CloseSession(size_t id) { std::scoped_lock lk(impl->session_guard); + auto& session = impl->sessions[id]; auto& smmu = impl->host1x.MemoryManager(); + if (session.has_preallocated_area) { + const DAddr region_start = session.mapper->GetRegionStart(); + const size_t region_size = session.mapper->GetRegionSize(); + session.mapper.reset(); + smmu.Free(region_start, region_size); + session.has_preallocated_area = false; + } + session.is_active = false; smmu.UnregisterProcess(impl->sessions[id].smmu_id); impl->id_pool.emplace_front(id); - LOG_CRITICAL(Debug, "Closed Session {}", id); } Session* Container::GetSession(size_t id) { diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h index a1fd20199c..86705cbc8e 100644 --- a/src/core/hle/service/nvdrv/core/container.h +++ b/src/core/hle/service/nvdrv/core/container.h @@ -20,6 +20,7 @@ class Host1x; namespace Service::Nvidia::NvCore { +class HeapMapper; class NvMap; class SyncpointManager; @@ -29,6 +30,9 @@ struct Session { size_t id; Kernel::KProcess* process; size_t smmu_id; + bool has_preallocated_area{}; + std::unique_ptr mapper{}; + bool is_active{}; }; class Container { diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.cpp b/src/core/hle/service/nvdrv/core/heap_mapper.cpp new file mode 100644 index 0000000000..59d993bc61 --- /dev/null +++ b/src/core/hle/service/nvdrv/core/heap_mapper.cpp @@ -0,0 +1,172 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include +#define BOOST_NO_MT +#include +#undef BOOST_NO_MT +#include +#include +#include +#include +#include +#include +#include + +#include "core/hle/service/nvdrv/core/heap_mapper.h" +#include "video_core/host1x/host1x.h" + +namespace boost { +template +class fast_pool_allocator; +} + +namespace Service::Nvidia::NvCore { + +using IntervalCompare = std::less; +using IntervalInstance = boost::icl::interval_type_default; +using IntervalAllocator = boost::fast_pool_allocator; +using IntervalSet = boost::icl::interval_set; +using IntervalType = typename IntervalSet::interval_type; + +template +struct counter_add_functor : public boost::icl::identity_based_inplace_combine { + // types + typedef counter_add_functor type; + typedef boost::icl::identity_based_inplace_combine base_type; + + // public member functions + void operator()(Type& current, const Type& added) const { + current += added; + if (current < base_type::identity_element()) { + current = base_type::identity_element(); + } + } + + // public static functions + static void version(Type&){}; +}; + +using OverlapCombine = counter_add_functor; +using OverlapSection = boost::icl::inter_section; +using OverlapCounter = boost::icl::split_interval_map; + +struct HeapMapper::HeapMapperInternal { + HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {} + ~HeapMapperInternal() = default; + + template + void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, + Func&& func) { + const DAddr start_address = cpu_addr; + const DAddr end_address = start_address + size; + const IntervalType search_interval{start_address, end_address}; + auto it = current_range.lower_bound(search_interval); + if (it == current_range.end()) { + return; + } + auto end_it = current_range.upper_bound(search_interval); + for (; it != end_it; it++) { + auto& inter = it->first; + DAddr inter_addr_end = inter.upper(); + DAddr inter_addr = inter.lower(); + if (inter_addr_end > end_address) { + inter_addr_end = end_address; + } + if (inter_addr < start_address) { + inter_addr = start_address; + } + func(inter_addr, inter_addr_end, it->second); + } + } + + void RemoveEachInOverlapCounter(OverlapCounter& current_range, + const IntervalType search_interval, int subtract_value) { + bool any_removals = false; + current_range.add(std::make_pair(search_interval, subtract_value)); + do { + any_removals = false; + auto it = current_range.lower_bound(search_interval); + if (it == current_range.end()) { + return; + } + auto end_it = current_range.upper_bound(search_interval); + for (; it != end_it; it++) { + if (it->second <= 0) { + any_removals = true; + current_range.erase(it); + break; + } + } + } while (any_removals); + } + + IntervalSet base_set; + OverlapCounter mapping_overlaps; + Tegra::MaxwellDeviceMemoryManager& device_memory; + std::mutex guard; +}; + +HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id, + Tegra::Host1x::Host1x& host1x) + : m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_smmu_id{smmu_id} { + m_internal = std::make_unique(host1x); +} + +HeapMapper::~HeapMapper() { + m_internal->device_memory.Unmap(m_daddress, m_size); +} + +DAddr HeapMapper::Map(VAddr start, size_t size) { + std::scoped_lock lk(m_internal->guard); + m_internal->base_set.clear(); + const IntervalType interval{start, start + size}; + m_internal->base_set.insert(interval); + m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int){ + const IntervalType other{start_addr, end_addr}; + m_internal->base_set.subtract(other); + }); + if (!m_internal->base_set.empty()) { + auto it = m_internal->base_set.begin(); + auto end_it = m_internal->base_set.end(); + for (; it != end_it; it++) { + const VAddr inter_addr_end = it->upper(); + const VAddr inter_addr = it->lower(); + const size_t offset = inter_addr - m_vaddress; + const size_t sub_size = inter_addr_end - inter_addr; + m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size, m_smmu_id); + } + } + m_internal->mapping_overlaps += std::make_pair(interval, 1); + m_internal->base_set.clear(); + return m_daddress + (start - m_vaddress); +} + +void HeapMapper::Unmap(VAddr start, size_t size) { + std::scoped_lock lk(m_internal->guard); + m_internal->base_set.clear(); + m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int value) { + if (value <= 1) { + const IntervalType other{start_addr, end_addr}; + m_internal->base_set.insert(other); + } + }); + if (!m_internal->base_set.empty()) { + auto it = m_internal->base_set.begin(); + auto end_it = m_internal->base_set.end(); + for (; it != end_it; it++) { + const VAddr inter_addr_end = it->upper(); + const VAddr inter_addr = it->lower(); + const size_t offset = inter_addr - m_vaddress; + const size_t sub_size = inter_addr_end - inter_addr; + m_internal->device_memory.Unmap(m_daddress + offset, sub_size); + } + } + const IntervalType to_remove{start, start + size}; + m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1); + m_internal->base_set.clear(); +} + +} // namespace Service::Nvidia::NvCore \ No newline at end of file diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.h b/src/core/hle/service/nvdrv/core/heap_mapper.h new file mode 100644 index 0000000000..8b23638b87 --- /dev/null +++ b/src/core/hle/service/nvdrv/core/heap_mapper.h @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Tegra::Host1x { +class Host1x; +} // namespace Tegra::Host1x + +namespace Service::Nvidia::NvCore { + +class HeapMapper { +public: + HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id, + Tegra::Host1x::Host1x& host1x); + ~HeapMapper(); + + bool IsInBounds(VAddr start, size_t size) const { + VAddr end = start + size; + return start >= m_vaddress && end <= (m_vaddress + m_size); + } + + DAddr Map(VAddr start, size_t size); + + void Unmap(VAddr start, size_t size); + + DAddr GetRegionStart() const { + return m_daddress; + } + + size_t GetRegionSize() const { + return m_size; + } + +private: + struct HeapMapperInternal; + VAddr m_vaddress; + DAddr m_daddress; + size_t m_size; + size_t m_smmu_id; + std::unique_ptr m_internal; +}; + +} // namespace Service::Nvidia::NvCore \ No newline at end of file diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index 0b2ddd9805..023c070d95 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp @@ -8,10 +8,12 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/hle/service/nvdrv/core/container.h" +#include "core/hle/service/nvdrv/core/heap_mapper.h" #include "core/hle/service/nvdrv/core/nvmap.h" #include "core/memory.h" #include "video_core/host1x/host1x.h" + using Core::Memory::YUZU_PAGESIZE; namespace Service::Nvidia::NvCore { @@ -90,10 +92,19 @@ void NvMap::UnmapHandle(Handle& handle_description) { } // Free and unmap the handle from the SMMU - auto& smmu = host1x.MemoryManager(); - smmu.Unmap(handle_description.d_address, handle_description.aligned_size); - smmu.Free(handle_description.d_address, static_cast(handle_description.aligned_size)); + const size_t map_size = handle_description.aligned_size; + if (!handle_description.in_heap) { + auto& smmu = host1x.MemoryManager(); + smmu.Unmap(handle_description.d_address, map_size); + smmu.Free(handle_description.d_address, static_cast(map_size)); + handle_description.d_address = 0; + return; + } + const VAddr vaddress = handle_description.address; + auto* session = core.GetSession(handle_description.session_id); + session->mapper->Unmap(vaddress, map_size); handle_description.d_address = 0; + handle_description.in_heap = false; } bool NvMap::TryRemoveHandle(const Handle& handle_description) { @@ -188,24 +199,31 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are DAddr address{}; auto& smmu = host1x.MemoryManager(); auto* session = core.GetSession(session_id); - while ((address = smmu.Allocate(handle_description->aligned_size)) == 0) { - // Free handles until the allocation succeeds - std::scoped_lock queueLock(unmap_queue_lock); - if (auto freeHandleDesc{unmap_queue.front()}) { - // Handles in the unmap queue are guaranteed not to be pinned so don't bother - // checking if they are before unmapping - std::scoped_lock freeLock(freeHandleDesc->mutex); - if (handle_description->d_address) - UnmapHandle(*freeHandleDesc); - } else { - LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); + const VAddr vaddress = handle_description->address; + const size_t map_size = handle_description->aligned_size; + handle_description->session_id = session_id; + if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) { + handle_description->d_address = session->mapper->Map(vaddress, map_size); + handle_description->in_heap = true; + } else { + while ((address = smmu.Allocate(map_size)) == 0) { + // Free handles until the allocation succeeds + std::scoped_lock queueLock(unmap_queue_lock); + if (auto freeHandleDesc{unmap_queue.front()}) { + // Handles in the unmap queue are guaranteed not to be pinned so don't bother + // checking if they are before unmapping + std::scoped_lock freeLock(freeHandleDesc->mutex); + if (handle_description->d_address) + UnmapHandle(*freeHandleDesc); + } else { + LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); + } } + + handle_description->d_address = address; + smmu.Map(address, vaddress, map_size, session->smmu_id); + handle_description->in_heap = false; } - - handle_description->d_address = address; - - smmu.Map(address, handle_description->address, handle_description->aligned_size, - session->smmu_id); } if (low_area_pin) { diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index 7dd6d26c3c..4af61289e8 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h @@ -70,6 +70,8 @@ public: u8 kind{}; //!< Used for memory compression bool allocated{}; //!< If the handle has been allocated with `Alloc` + bool in_heap{}; + size_t session_id{}; DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to, //!< this can also be in the nvdrv tmem diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 6ad3b94f85..609704b333 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -34,8 +34,6 @@ #include "video_core/renderer_base.h" #include "video_core/shader_notify.h" -#pragma optimize("", off) - namespace Tegra { struct GPU::Impl {