From 9bede4eeed523f9707a989f1297279c006086e76 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 18:15:53 -0400 Subject: [PATCH] VM_Manager: Align allocated memory to 256bytes This commit ensures that all backing memory allocated for the Guest CPU is aligned to 256 bytes. This due to how gpu memory works and the heavy constraints it has in the alignment of physical memory. --- src/common/alignment.h | 79 +++++++++++++++++++++++++ src/core/hle/kernel/code_set.h | 3 +- src/core/hle/kernel/physical_memory.h | 13 ++++ src/core/hle/kernel/process.cpp | 6 +- src/core/hle/kernel/shared_memory.cpp | 4 +- src/core/hle/kernel/shared_memory.h | 13 ++-- src/core/hle/kernel/transfer_memory.cpp | 2 +- src/core/hle/kernel/transfer_memory.h | 3 +- src/core/hle/kernel/vm_manager.cpp | 15 ++--- src/core/hle/kernel/vm_manager.h | 9 +-- src/core/hle/service/ns/pl_u.cpp | 12 ++-- src/core/loader/elf.cpp | 2 +- src/core/loader/kip.cpp | 2 +- src/core/loader/nro.cpp | 2 +- src/core/loader/nso.cpp | 2 +- 15 files changed, 131 insertions(+), 36 deletions(-) create mode 100644 src/core/hle/kernel/physical_memory.h diff --git a/src/common/alignment.h b/src/common/alignment.h index 617b14d9b7..b3fbdfe209 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -3,7 +3,10 @@ #pragma once #include +#include #include +#include +#include namespace Common { @@ -37,4 +40,80 @@ constexpr bool IsWordAligned(T value) { return (value & 0b11) == 0; } +template +class AlignmentAllocator { +public: + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + typedef T* pointer; + typedef const T* const_pointer; + + typedef T& reference; + typedef const T& const_reference; + +public: + inline AlignmentAllocator() throw() {} + + template + inline AlignmentAllocator(const AlignmentAllocator&) throw() {} + + inline ~AlignmentAllocator() throw() {} + + inline pointer adress(reference r) { + return &r; + } + + inline const_pointer adress(const_reference r) const { + return &r; + } + +#if (defined _MSC_VER) + inline pointer allocate(size_type n) { + return (pointer)_aligned_malloc(n * sizeof(value_type), Align); + } + + inline void deallocate(pointer p, size_type) { + _aligned_free(p); + } +#else + inline pointer allocate(size_type n) { + return (pointer)std::aligned_alloc(Align, n * sizeof(value_type)); + } + + inline void deallocate(pointer p, size_type) { + std::free(p); + } +#endif + + inline void construct(pointer p, const value_type& wert) { + new (p) value_type(wert); + } + + inline void destroy(pointer p) { + p->~value_type(); + } + + inline size_type max_size() const throw() { + return size_type(-1) / sizeof(value_type); + } + + template + struct rebind { + typedef AlignmentAllocator other; + }; + + bool operator!=(const AlignmentAllocator& other) const { + return !(*this == other); + } + + // Returns true if and only if storage allocated from *this + // can be deallocated from other, and vice versa. + // Always returns true for stateless allocators. + bool operator==(const AlignmentAllocator& other) const { + return true; + } +}; + } // namespace Common diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h index 879957dcb1..d8ad540309 100644 --- a/src/core/hle/kernel/code_set.h +++ b/src/core/hle/kernel/code_set.h @@ -8,6 +8,7 @@ #include #include "common/common_types.h" +#include "core/hle/kernel/physical_memory.h" namespace Kernel { @@ -77,7 +78,7 @@ struct CodeSet final { } /// The overall data that backs this code set. - std::vector memory; + Kernel::PhysicalMemory memory; /// The segments that comprise this code set. std::array segments; diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h new file mode 100644 index 0000000000..dd49c75a27 --- /dev/null +++ b/src/core/hle/kernel/physical_memory.h @@ -0,0 +1,13 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/alignment.h" + +namespace Kernel { + +using PhysicalMemory = std::vector>; + +} diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 92169a97b0..e80a12ac35 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -247,7 +247,7 @@ VAddr Process::CreateTLSRegion() { ASSERT(region_address.Succeeded()); const auto map_result = vm_manager.MapMemoryBlock( - *region_address, std::make_shared>(Memory::PAGE_SIZE), 0, + *region_address, std::make_shared(Memory::PAGE_SIZE), 0, Memory::PAGE_SIZE, MemoryState::ThreadLocal); ASSERT(map_result.Succeeded()); @@ -277,7 +277,7 @@ void Process::FreeTLSRegion(VAddr tls_address) { } void Process::LoadModule(CodeSet module_, VAddr base_addr) { - const auto memory = std::make_shared>(std::move(module_.memory)); + const auto memory = std::make_shared(std::move(module_.memory)); const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) { @@ -327,7 +327,7 @@ void Process::AllocateMainThreadStack(u64 stack_size) { // Allocate and map the main thread stack const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; vm_manager - .MapMemoryBlock(mapping_address, std::make_shared>(main_thread_stack_size), + .MapMemoryBlock(mapping_address, std::make_shared(main_thread_stack_size), 0, main_thread_stack_size, MemoryState::Stack) .Unwrap(); } diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index f15c5ee362..45a9e19422 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp @@ -28,7 +28,7 @@ SharedPtr SharedMemory::Create(KernelCore& kernel, Process* owner_ shared_memory->other_permissions = other_permissions; if (address == 0) { - shared_memory->backing_block = std::make_shared>(size); + shared_memory->backing_block = std::make_shared(size); shared_memory->backing_block_offset = 0; // Refresh the address mappings for the current process. @@ -59,7 +59,7 @@ SharedPtr SharedMemory::Create(KernelCore& kernel, Process* owner_ } SharedPtr SharedMemory::CreateForApplet( - KernelCore& kernel, std::shared_ptr> heap_block, std::size_t offset, u64 size, + KernelCore& kernel, std::shared_ptr heap_block, std::size_t offset, u64 size, MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { SharedPtr shared_memory(new SharedMemory(kernel)); diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h index c2b6155e18..01ca6dcd22 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/shared_memory.h @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "core/hle/kernel/object.h" +#include "core/hle/kernel/physical_memory.h" #include "core/hle/kernel/process.h" #include "core/hle/result.h" @@ -62,12 +63,10 @@ public: * block. * @param name Optional object name, used for debugging purposes. */ - static SharedPtr CreateForApplet(KernelCore& kernel, - std::shared_ptr> heap_block, - std::size_t offset, u64 size, - MemoryPermission permissions, - MemoryPermission other_permissions, - std::string name = "Unknown Applet"); + static SharedPtr CreateForApplet( + KernelCore& kernel, std::shared_ptr heap_block, std::size_t offset, + u64 size, MemoryPermission permissions, MemoryPermission other_permissions, + std::string name = "Unknown Applet"); std::string GetTypeName() const override { return "SharedMemory"; @@ -135,7 +134,7 @@ private: ~SharedMemory() override; /// Backing memory for this shared memory block. - std::shared_ptr> backing_block; + std::shared_ptr backing_block; /// Offset into the backing block for this shared memory. std::size_t backing_block_offset = 0; /// Size of the memory block. Page-aligned. diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp index 26c4e5e674..1113c815ec 100644 --- a/src/core/hle/kernel/transfer_memory.cpp +++ b/src/core/hle/kernel/transfer_memory.cpp @@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p return ERR_INVALID_STATE; } - backing_block = std::make_shared>(size); + backing_block = std::make_shared(size); const auto map_state = owner_permissions == MemoryPermission::None ? MemoryState::TransferMemoryIsolated diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h index a140b1e2bb..6be9dc0946 100644 --- a/src/core/hle/kernel/transfer_memory.h +++ b/src/core/hle/kernel/transfer_memory.h @@ -8,6 +8,7 @@ #include #include "core/hle/kernel/object.h" +#include "core/hle/kernel/physical_memory.h" union ResultCode; @@ -82,7 +83,7 @@ private: ~TransferMemory() override; /// Memory block backing this instance. - std::shared_ptr> backing_block; + std::shared_ptr backing_block; /// The base address for the memory managed by this instance. VAddr base_address = 0; diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 4f45fb03b3..40cea1e7cc 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -5,6 +5,7 @@ #include #include #include +#include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" #include "common/memory_hook.h" @@ -103,7 +104,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const { } ResultVal VMManager::MapMemoryBlock(VAddr target, - std::shared_ptr> block, + std::shared_ptr block, std::size_t offset, u64 size, MemoryState state, VMAPermission perm) { ASSERT(block != nullptr); @@ -260,7 +261,7 @@ ResultVal VMManager::SetHeapSize(u64 size) { if (heap_memory == nullptr) { // Initialize heap - heap_memory = std::make_shared>(size); + heap_memory = std::make_shared(size); heap_end = heap_region_base + size; } else { UnmapRange(heap_region_base, GetCurrentHeapSize()); @@ -341,7 +342,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); if (vma.state == MemoryState::Unmapped) { const auto map_res = - MapMemoryBlock(cur_addr, std::make_shared>(map_size, 0), 0, + MapMemoryBlock(cur_addr, std::make_shared(map_size, 0), 0, map_size, MemoryState::Heap, VMAPermission::ReadWrite); result = map_res.Code(); if (result.IsError()) { @@ -442,7 +443,7 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { if (result.IsError()) { for (const auto [map_address, map_size] : unmapped_regions) { const auto remap_res = - MapMemoryBlock(map_address, std::make_shared>(map_size, 0), 0, + MapMemoryBlock(map_address, std::make_shared(map_size, 0), 0, map_size, MemoryState::Heap, VMAPermission::None); ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); } @@ -593,7 +594,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem ASSERT_MSG(vma_offset + size <= vma->second.size, "Shared memory exceeds bounds of mapped block"); - const std::shared_ptr>& backing_block = vma->second.backing_block; + const std::shared_ptr& backing_block = vma->second.backing_block; const std::size_t backing_block_offset = vma->second.offset + vma_offset; CASCADE_RESULT(auto new_vma, @@ -606,7 +607,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem return RESULT_SUCCESS; } -void VMManager::RefreshMemoryBlockMappings(const std::vector* block) { +void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) { // If this ever proves to have a noticeable performance impact, allow users of the function to // specify a specific range of addresses to limit the scan to. for (const auto& p : vma_map) { @@ -764,7 +765,7 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre right.backing_block->begin() + right.offset + right.size); } else { // Slow case: make a new memory block for left and right. - auto new_memory = std::make_shared>(); + auto new_memory = std::make_shared(); new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, left.backing_block->begin() + left.offset + left.size); new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 0aecb74991..b18cde6197 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "common/memory_hook.h" #include "common/page_table.h" +#include "core/hle/kernel/physical_memory.h" #include "core/hle/result.h" #include "core/memory.h" @@ -290,7 +291,7 @@ struct VirtualMemoryArea { // Settings for type = AllocatedMemoryBlock /// Memory block backing this VMA. - std::shared_ptr> backing_block = nullptr; + std::shared_ptr backing_block = nullptr; /// Offset into the backing_memory the mapping starts from. std::size_t offset = 0; @@ -348,7 +349,7 @@ public: * @param size Size of the mapping. * @param state MemoryState tag to attach to the VMA. */ - ResultVal MapMemoryBlock(VAddr target, std::shared_ptr> block, + ResultVal MapMemoryBlock(VAddr target, std::shared_ptr block, std::size_t offset, u64 size, MemoryState state, VMAPermission perm = VMAPermission::ReadWrite); @@ -547,7 +548,7 @@ public: * Scans all VMAs and updates the page table range of any that use the given vector as backing * memory. This should be called after any operation that causes reallocation of the vector. */ - void RefreshMemoryBlockMappings(const std::vector* block); + void RefreshMemoryBlockMappings(const PhysicalMemory* block); /// Dumps the address space layout to the log, for debugging void LogLayout() const; @@ -777,7 +778,7 @@ private: // the entire virtual address space extents that bound the allocations, including any holes. // This makes deallocation and reallocation of holes fast and keeps process memory contiguous // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. - std::shared_ptr> heap_memory; + std::shared_ptr heap_memory; // The end of the currently allocated heap. This is not an inclusive // end of the range. This is essentially 'base_address + current_size'. diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp index ad176f89df..2a522136d0 100644 --- a/src/core/hle/service/ns/pl_u.cpp +++ b/src/core/hle/service/ns/pl_u.cpp @@ -77,7 +77,7 @@ enum class LoadState : u32 { Done = 1, }; -static void DecryptSharedFont(const std::vector& input, std::vector& output, +static void DecryptSharedFont(const std::vector& input, Kernel::PhysicalMemory& output, std::size_t& offset) { ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); @@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector& input, std::vector& ou offset += transformed_font.size() * sizeof(u32); } -static void EncryptSharedFont(const std::vector& input, std::vector& output, +static void EncryptSharedFont(const std::vector& input, Kernel::PhysicalMemory& output, std::size_t& offset) { ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; @@ -121,7 +121,7 @@ struct PL_U::Impl { return shared_font_regions.at(index); } - void BuildSharedFontsRawRegions(const std::vector& input) { + void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) { // As we can derive the xor key we can just populate the offsets // based on the shared memory dump unsigned cur_offset = 0; @@ -144,7 +144,7 @@ struct PL_U::Impl { Kernel::SharedPtr shared_font_mem; /// Backing memory for the shared font data - std::shared_ptr> shared_font; + std::shared_ptr shared_font; // Automatically populated based on shared_fonts dump or system archives. std::vector shared_font_regions; @@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique()} { // Rebuild shared fonts from data ncas if (nand->HasEntry(static_cast(FontArchives::Standard), FileSys::ContentRecordType::Data)) { - impl->shared_font = std::make_shared>(SHARED_FONT_MEM_SIZE); + impl->shared_font = std::make_shared(SHARED_FONT_MEM_SIZE); for (auto font : SHARED_FONTS) { const auto nca = nand->GetEntry(static_cast(font.first), FileSys::ContentRecordType::Data); @@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique()} { } } else { - impl->shared_font = std::make_shared>( + impl->shared_font = std::make_shared( SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 6d4b023758..f1795fdd62 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) { } } - std::vector program_image(total_image_size); + Kernel::PhysicalMemory program_image(total_image_size); std::size_t current_image_position = 0; Kernel::CodeSet codeset; diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 70051c13ae..474b55cb13 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp @@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) { const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); Kernel::CodeSet codeset; - std::vector program_image; + Kernel::PhysicalMemory program_image; const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, const std::vector& data, u32 offset) { diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 6a0ca389b0..e92e2e06ea 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector& data, } // Build program image - std::vector program_image(PageAlignSize(nro_header.file_size)); + Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size)); std::memcpy(program_image.data(), data.data(), program_image.size()); if (program_image.size() != PageAlignSize(nro_header.file_size)) { return {}; diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 29311404a7..70c90109f3 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -89,7 +89,7 @@ std::optional AppLoader_NSO::LoadModule(Kernel::Process& process, // Build program image Kernel::CodeSet codeset; - std::vector program_image; + Kernel::PhysicalMemory program_image; for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { std::vector data = file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset);