From a7837a3791562899bf5e0e98aef851a2f4aaf376 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sat, 5 Jun 2021 06:23:25 -0300 Subject: [PATCH 01/11] common/host_memory: Add interface and Windows implementation --- src/common/CMakeLists.txt | 2 + src/common/host_memory.cpp | 320 +++++++++++++++++++++++++++++++++++++ src/common/host_memory.h | 62 +++++++ 3 files changed, 384 insertions(+) create mode 100644 src/common/host_memory.cpp create mode 100644 src/common/host_memory.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2d403d471e..97fbdcbf9f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -131,6 +131,8 @@ add_library(common STATIC hash.h hex_util.cpp hex_util.h + host_memory.cpp + host_memory.h intrusive_red_black_tree.h logging/backend.cpp logging/backend.h diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp new file mode 100644 index 0000000000..4f5086e909 --- /dev/null +++ b/src/common/host_memory.cpp @@ -0,0 +1,320 @@ +#ifdef __linux__ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <fcntl.h> +#include <sys/mman.h> +#include <unistd.h> +#elif defined(_WIN32) // ^^^ Linux ^^^ vvv Windows vvv +#ifdef _WIN32_WINNT +#undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0A00 // Windows 10 + +#include <windows.h> + +#include <boost/icl/separate_interval_set.hpp> + +#include <iterator> +#include <unordered_map> + +#pragma comment(lib, "mincore.lib") + +#endif // ^^^ Windows ^^^ + +#include <mutex> + +#include "common/assert.h" +#include "common/host_memory.h" +#include "common/logging/log.h" + +namespace Common { + +constexpr size_t PageAlignment = 0x1000; + +#ifdef _WIN32 + +class HostMemory::Impl { +public: + explicit Impl(size_t backing_size_, size_t virtual_size_) + : backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()} { + // Allocate backing file map + backing_handle = + CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ, + PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0); + if (!backing_handle) { + throw std::bad_alloc{}; + } + // Allocate a virtual memory for the backing file map as placeholder + backing_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, backing_size, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, nullptr, 0)); + if (!backing_base) { + Release(); + throw std::bad_alloc{}; + } + // Map backing placeholder + void* const ret = MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size, + MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0); + if (ret != backing_base) { + Release(); + throw std::bad_alloc{}; + } + // Allocate virtual address placeholder + virtual_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, virtual_size, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, nullptr, 0)); + if (!virtual_base) { + Release(); + throw std::bad_alloc{}; + } + } + + ~Impl() { + Release(); + } + + void Map(size_t virtual_offset, size_t host_offset, size_t length) { + std::unique_lock lock{placeholder_mutex}; + if (!IsNiechePlaceholder(virtual_offset, length)) { + Split(virtual_offset, length); + } + ASSERT(placeholders.find({virtual_offset, virtual_offset + length}) == placeholders.end()); + TrackPlaceholder(virtual_offset, host_offset, length); + + MapView(virtual_offset, host_offset, length); + } + + void Unmap(size_t virtual_offset, size_t length) { + std::lock_guard lock{placeholder_mutex}; + + // Unmap until there are no more placeholders + while (UnmapOnePlaceholder(virtual_offset, length)) { + } + } + + void Protect(size_t virtual_offset, size_t length, bool read, bool write) { + DWORD new_flags{}; + if (read && write) { + new_flags = PAGE_READWRITE; + } else if (read && !write) { + new_flags = PAGE_READONLY; + } else if (!read && !write) { + new_flags = PAGE_NOACCESS; + } else { + UNIMPLEMENTED_MSG("Protection flag combination read={} write={}", read, write); + } + DWORD old_flags{}; + if (!VirtualProtect(virtual_base + virtual_offset, length, new_flags, &old_flags)) { + LOG_CRITICAL(HW_Memory, "Failed to change virtual memory protect rules"); + } + } + + const size_t backing_size; ///< Size of the backing memory in bytes + const size_t virtual_size; ///< Size of the virtual address placeholder in bytes + + u8* backing_base{}; + u8* virtual_base{}; + +private: + /// Release all resources in the object + void Release() { + if (!placeholders.empty()) { + for (const auto& placeholder : placeholders) { + if (!UnmapViewOfFile2(process, virtual_base + placeholder.lower(), + MEM_PRESERVE_PLACEHOLDER)) { + LOG_CRITICAL(HW_Memory, "Failed to unmap virtual memory placeholder"); + } + } + Coalesce(0, virtual_size); + } + if (virtual_base) { + if (!VirtualFree(virtual_base, 0, MEM_RELEASE)) { + LOG_CRITICAL(HW_Memory, "Failed to free virtual memory"); + } + } + if (backing_base) { + if (!UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) { + LOG_CRITICAL(HW_Memory, "Failed to unmap backing memory placeholder"); + } + if (!VirtualFreeEx(process, backing_base, 0, MEM_RELEASE)) { + LOG_CRITICAL(HW_Memory, "Failed to free backing memory"); + } + } + if (!CloseHandle(backing_handle)) { + LOG_CRITICAL(HW_Memory, "Failed to free backing memory file handle"); + } + } + + /// Unmap one placeholder in the given range (partial unmaps are supported) + /// Return true when there are no more placeholders to unmap + bool UnmapOnePlaceholder(size_t virtual_offset, size_t length) { + const auto it = placeholders.find({virtual_offset, virtual_offset + length}); + const auto begin = placeholders.begin(); + const auto end = placeholders.end(); + if (it == end) { + return false; + } + const size_t placeholder_begin = it->lower(); + const size_t placeholder_end = it->upper(); + const size_t unmap_begin = std::max(virtual_offset, placeholder_begin); + const size_t unmap_end = std::min(virtual_offset + length, placeholder_end); + ASSERT(unmap_begin >= placeholder_begin && unmap_begin < placeholder_end); + ASSERT(unmap_end <= placeholder_end && unmap_end > placeholder_begin); + + const auto host_pointer_it = placeholder_host_pointers.find(placeholder_begin); + ASSERT(host_pointer_it != placeholder_host_pointers.end()); + const size_t host_offset = host_pointer_it->second; + + const bool split_left = unmap_begin > placeholder_begin; + const bool split_right = unmap_end < placeholder_end; + + if (!UnmapViewOfFile2(process, virtual_base + placeholder_begin, + MEM_PRESERVE_PLACEHOLDER)) { + LOG_CRITICAL(HW_Memory, "Failed to unmap placeholder"); + } + // If we have to remap memory regions due to partial unmaps, we are in a data race as + // Windows doesn't support remapping memory without unmapping first. Avoid adding any extra + // logic within the panic region described below. + + // Panic region, we are in a data race right now + if (split_left || split_right) { + Split(unmap_begin, unmap_end - unmap_begin); + } + if (split_left) { + MapView(placeholder_begin, host_offset, unmap_begin - placeholder_begin); + } + if (split_right) { + MapView(unmap_end, host_offset + unmap_end - placeholder_begin, + placeholder_end - unmap_end); + } + // End panic region + + size_t coalesce_begin = unmap_begin; + if (!split_left) { + // Try to coalesce pages to the left + coalesce_begin = it == begin ? 0 : std::prev(it)->upper(); + if (coalesce_begin != placeholder_begin) { + Coalesce(coalesce_begin, unmap_end - coalesce_begin); + } + } + if (!split_right) { + // Try to coalesce pages to the right + const auto next = std::next(it); + const size_t next_begin = next == end ? virtual_size : next->lower(); + if (placeholder_end != next_begin) { + // We can coalesce to the right + Coalesce(coalesce_begin, next_begin - coalesce_begin); + } + } + // Remove and reinsert placeholder trackers + UntrackPlaceholder(it); + if (split_left) { + TrackPlaceholder(placeholder_begin, host_offset, unmap_begin - placeholder_begin); + } + if (split_right) { + TrackPlaceholder(unmap_end, host_offset + unmap_end - placeholder_begin, + placeholder_end - unmap_end); + } + return true; + } + + void MapView(size_t virtual_offset, size_t host_offset, size_t length) { + if (!MapViewOfFile3(backing_handle, process, virtual_base + virtual_offset, host_offset, + length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) { + LOG_CRITICAL(HW_Memory, "Failed to map placeholder"); + } + } + + void Split(size_t virtual_offset, size_t length) { + if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length, + MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) { + LOG_CRITICAL(HW_Memory, "Failed to split placeholder"); + } + } + + void Coalesce(size_t virtual_offset, size_t length) { + if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length, + MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) { + LOG_CRITICAL(HW_Memory, "Failed to coalesce placeholders"); + } + } + + void TrackPlaceholder(size_t virtual_offset, size_t host_offset, size_t length) { + placeholders.insert({virtual_offset, virtual_offset + length}); + placeholder_host_pointers.emplace(virtual_offset, host_offset); + } + + void UntrackPlaceholder(boost::icl::separate_interval_set<size_t>::iterator it) { + placeholders.erase(it); + placeholder_host_pointers.erase(it->lower()); + } + + /// Return true when a given memory region is a "nieche" and the placeholders don't have to be + /// splitted. + bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const { + const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length}); + if (it != placeholders.end() && it->lower() == virtual_offset + length) { + const bool is_root = it == placeholders.begin() && virtual_offset == 0; + return is_root || std::prev(it)->upper() == virtual_offset; + } + return false; + } + + HANDLE process{}; ///< Current process handle + HANDLE backing_handle{}; ///< File based backing memory + + std::mutex placeholder_mutex; ///< Mutex for placeholders + boost::icl::separate_interval_set<size_t> placeholders; ///< Mapped placeholders + std::unordered_map<size_t, size_t> placeholder_host_pointers; ///< Placeholder backing offset +}; + +#else + +#error Please implement the host memory for your platform + +#endif + +HostMemory::HostMemory(size_t backing_size, size_t virtual_size) + : impl{std::make_unique<HostMemory::Impl>(backing_size, virtual_size)}, + backing_base{impl->backing_base}, virtual_base{impl->virtual_base} {} + +HostMemory::~HostMemory() = default; + +HostMemory::HostMemory(HostMemory&&) noexcept = default; + +HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default; + +void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) { + ASSERT(virtual_offset % PageAlignment == 0); + ASSERT(host_offset % PageAlignment == 0); + ASSERT(length % PageAlignment == 0); + ASSERT(virtual_offset + length <= impl->virtual_size); + ASSERT(host_offset + length <= impl->backing_size); + if (length == 0) { + return; + } + impl->Map(virtual_offset, host_offset, length); +} + +void HostMemory::Unmap(size_t virtual_offset, size_t length) { + ASSERT(virtual_offset % PageAlignment == 0); + ASSERT(length % PageAlignment == 0); + ASSERT(virtual_offset + length <= impl->virtual_size); + if (length == 0) { + return; + } + impl->Unmap(virtual_offset, length); +} + +void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) { + ASSERT(virtual_offset % PageAlignment == 0); + ASSERT(length % PageAlignment == 0); + ASSERT(virtual_offset + length <= impl->virtual_size); + if (length == 0) { + return; + } + impl->Protect(virtual_offset, length, read, write); +} + +} // namespace Common diff --git a/src/common/host_memory.h b/src/common/host_memory.h new file mode 100644 index 0000000000..98005df7a2 --- /dev/null +++ b/src/common/host_memory.h @@ -0,0 +1,62 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include "common/common_types.h" + +namespace Common { + +/** + * A low level linear memory buffer, which supports multiple mappings + * Its purpose is to rebuild a given sparse memory layout, including mirrors. + */ +class HostMemory { +public: + explicit HostMemory(size_t backing_size, size_t virtual_size); + ~HostMemory(); + + /** + * Copy constructors. They shall return a copy of the buffer without the mappings. + * TODO: Implement them with COW if needed. + */ + HostMemory(const HostMemory& other) = delete; + HostMemory& operator=(const HostMemory& other) = delete; + + /** + * Move constructors. They will move the buffer and the mappings to the new object. + */ + HostMemory(HostMemory&& other) noexcept; + HostMemory& operator=(HostMemory&& other) noexcept; + + void Map(size_t virtual_offset, size_t host_offset, size_t length); + + void Unmap(size_t virtual_offset, size_t length); + + void Protect(size_t virtual_offset, size_t length, bool read, bool write); + + [[nodiscard]] u8* BackingBasePointer() noexcept { + return backing_base; + } + [[nodiscard]] const u8* BackingBasePointer() const noexcept { + return backing_base; + } + + [[nodiscard]] u8* VirtualBasePointer() noexcept { + return virtual_base; + } + [[nodiscard]] const u8* VirtualBasePointer() const noexcept { + return virtual_base; + } + +private: + // Low level handler for the platform dependent memory routines + class Impl; + std::unique_ptr<Impl> impl; + u8* backing_base{}; + u8* virtual_base{}; +}; + +} // namespace Common From 5105318bbc6843de14f3f949515007d9bf76aa7b Mon Sep 17 00:00:00 2001 From: Markus Wick <markus@selfnet.de> Date: Sat, 5 Jun 2021 10:07:26 +0200 Subject: [PATCH 02/11] common/host_memory: Add Linux implementation --- src/common/host_memory.cpp | 130 ++++++++++++++++++++++++++++++++++--- 1 file changed, 120 insertions(+), 10 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 4f5086e909..eb50fbd9f8 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -1,11 +1,5 @@ -#ifdef __linux__ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#include <fcntl.h> -#include <sys/mman.h> -#include <unistd.h> -#elif defined(_WIN32) // ^^^ Linux ^^^ vvv Windows vvv +#ifdef _WIN32 + #ifdef _WIN32_WINNT #undef _WIN32_WINNT #endif @@ -20,13 +14,23 @@ #pragma comment(lib, "mincore.lib") -#endif // ^^^ Windows ^^^ +#elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <fcntl.h> +#include <sys/mman.h> +#include <unistd.h> + +#endif // ^^^ Linux ^^^ #include <mutex> #include "common/assert.h" #include "common/host_memory.h" #include "common/logging/log.h" +#include "common/scope_exit.h" namespace Common { @@ -269,7 +273,113 @@ private: std::unordered_map<size_t, size_t> placeholder_host_pointers; ///< Placeholder backing offset }; -#else +#elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv + +class HostMemory::Impl { +public: + explicit Impl(size_t backing_size_, size_t virtual_size_) + : backing_size{backing_size_}, virtual_size{virtual_size_} { + bool good = false; + SCOPE_EXIT({ + if (!good) { + Release(); + } + }); + + // Backing memory initialization + fd = memfd_create("HostMemory", 0); + if (fd == -1) { + LOG_CRITICAL(HW_Memory, "memfd_create failed: {}", strerror(errno)); + throw std::bad_alloc{}; + } + + // Defined to extend the file with zeros + int ret = ftruncate(fd, backing_size); + if (ret != 0) { + LOG_CRITICAL(HW_Memory, "ftruncate failed with {}, are you out-of-memory?", + strerror(errno)); + throw std::bad_alloc{}; + } + + backing_base = static_cast<u8*>( + mmap(nullptr, backing_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); + if (backing_base == MAP_FAILED) { + LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno)); + throw std::bad_alloc{}; + } + + // Virtual memory initialization + virtual_base = static_cast<u8*>( + mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + if (virtual_base == MAP_FAILED) { + LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno)); + throw std::bad_alloc{}; + } + + good = true; + } + + ~Impl() { + Release(); + } + + void Map(size_t virtual_offset, size_t host_offset, size_t length) { + + void* ret = mmap(virtual_base + virtual_offset, length, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, host_offset); + ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); + } + + void Unmap(size_t virtual_offset, size_t length) { + // The method name is wrong. We're still talking about the virtual range. + // We don't want to unmap, we want to reserve this memory. + + void* ret = mmap(virtual_base + virtual_offset, length, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); + } + + void Protect(size_t virtual_offset, size_t length, bool read, bool write) { + int flags = 0; + if (read) { + flags |= PROT_READ; + } + if (write) { + flags |= PROT_WRITE; + } + int ret = mprotect(virtual_base + virtual_offset, length, flags); + ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno)); + } + + const size_t backing_size; ///< Size of the backing memory in bytes + const size_t virtual_size; ///< Size of the virtual address placeholder in bytes + + u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)}; + u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)}; + +private: + /// Release all resources in the object + void Release() { + if (virtual_base != MAP_FAILED) { + int ret = munmap(virtual_base, virtual_size); + ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno)); + } + + if (backing_base != MAP_FAILED) { + int ret = munmap(backing_base, backing_size); + ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno)); + } + + if (fd != -1) { + int ret = close(fd); + ASSERT_MSG(ret == 0, "close failed: {}", strerror(errno)); + } + } + + int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create +}; + +#else // ^^^ Linux ^^^ #error Please implement the host memory for your platform From 740edacc8dd03a8dccdd194ffed8e2b5ec490f73 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sat, 5 Jun 2021 04:16:32 -0300 Subject: [PATCH 03/11] tests: Add tests for host memory --- src/tests/CMakeLists.txt | 1 + src/tests/common/host_memory.cpp | 183 +++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 src/tests/common/host_memory.cpp diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index d875c4fee1..96bc30cac3 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -2,6 +2,7 @@ add_executable(tests common/bit_field.cpp common/cityhash.cpp common/fibers.cpp + common/host_memory.cpp common/param_package.cpp common/ring_buffer.cpp core/core_timing.cpp diff --git a/src/tests/common/host_memory.cpp b/src/tests/common/host_memory.cpp new file mode 100644 index 0000000000..e241f8be51 --- /dev/null +++ b/src/tests/common/host_memory.cpp @@ -0,0 +1,183 @@ +// Copyright 2021 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <catch2/catch.hpp> + +#include "common/host_memory.h" + +using Common::HostMemory; + +static constexpr size_t VIRTUAL_SIZE = 1ULL << 39; +static constexpr size_t BACKING_SIZE = 4ULL * 1024 * 1024 * 1024; + +TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") { + { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); } + { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); } +} + +TEST_CASE("HostMemory: Simple map", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x5000, 0x8000, 0x1000); + + volatile u8* const data = mem.VirtualBasePointer() + 0x5000; + data[0] = 50; + REQUIRE(data[0] == 50); +} + +TEST_CASE("HostMemory: Simple mirror map", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x5000, 0x3000, 0x2000); + mem.Map(0x8000, 0x4000, 0x1000); + + volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000; + volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000; + mirror_b[0] = 76; + REQUIRE(mirror_a[0x1000] == 76); +} + +TEST_CASE("HostMemory: Simple unmap", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x5000, 0x3000, 0x2000); + + volatile u8* const data = mem.VirtualBasePointer() + 0x5000; + data[75] = 50; + REQUIRE(data[75] == 50); + + mem.Unmap(0x5000, 0x2000); +} + +TEST_CASE("HostMemory: Simple unmap and remap", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x5000, 0x3000, 0x2000); + + volatile u8* const data = mem.VirtualBasePointer() + 0x5000; + data[0] = 50; + REQUIRE(data[0] == 50); + + mem.Unmap(0x5000, 0x2000); + + mem.Map(0x5000, 0x3000, 0x2000); + REQUIRE(data[0] == 50); + + mem.Map(0x7000, 0x2000, 0x5000); + REQUIRE(data[0x3000] == 50); +} + +TEST_CASE("HostMemory: Nieche allocation", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x0000, 0, 0x20000); + mem.Unmap(0x0000, 0x4000); + mem.Map(0x1000, 0, 0x2000); + mem.Map(0x3000, 0, 0x1000); + mem.Map(0, 0, 0x1000); +} + +TEST_CASE("HostMemory: Full unmap", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x8000, 0, 0x4000); + mem.Unmap(0x8000, 0x4000); + mem.Map(0x6000, 0, 0x16000); +} + +TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x0000, 0, 0x4000); + mem.Unmap(0x2000, 0x4000); + mem.Map(0x2000, 0x80000, 0x4000); +} + +TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x8000, 0, 0x4000); + mem.Unmap(0x6000, 0x4000); + mem.Map(0x8000, 0, 0x2000); +} + +TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x0000, 0, 0x4000); + mem.Map(0x4000, 0, 0x1b000); + mem.Unmap(0x3000, 0x1c000); + mem.Map(0x3000, 0, 0x20000); +} + +TEST_CASE("HostMemory: Unmap between placeholders", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x0000, 0, 0x4000); + mem.Map(0x4000, 0, 0x4000); + mem.Unmap(0x2000, 0x4000); + mem.Map(0x2000, 0, 0x4000); +} + +TEST_CASE("HostMemory: Unmap to origin", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x4000, 0, 0x4000); + mem.Map(0x8000, 0, 0x4000); + mem.Unmap(0x4000, 0x4000); + mem.Map(0, 0, 0x4000); + mem.Map(0x4000, 0, 0x4000); +} + +TEST_CASE("HostMemory: Unmap to right", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x4000, 0, 0x4000); + mem.Map(0x8000, 0, 0x4000); + mem.Unmap(0x8000, 0x4000); + mem.Map(0x8000, 0, 0x4000); +} + +TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x4000, 0x10000, 0x4000); + + volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; + ptr[0x1000] = 17; + + mem.Unmap(0x6000, 0x2000); + + REQUIRE(ptr[0x1000] == 17); +} + +TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x4000, 0x10000, 0x4000); + + volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; + ptr[0x3000] = 19; + ptr[0x3fff] = 12; + + mem.Unmap(0x4000, 0x2000); + + REQUIRE(ptr[0x3000] == 19); + REQUIRE(ptr[0x3fff] == 12); +} + +TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x4000, 0x10000, 0x4000); + + volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; + ptr[0x0000] = 19; + ptr[0x3fff] = 12; + + mem.Unmap(0x1000, 0x2000); + + REQUIRE(ptr[0x0000] == 19); + REQUIRE(ptr[0x3fff] == 12); +} + +TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") { + HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); + mem.Map(0x4000, 0x10000, 0x2000); + mem.Map(0x6000, 0x20000, 0x2000); + + volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; + ptr[0x0000] = 19; + ptr[0x3fff] = 12; + + mem.Unmap(0x5000, 0x2000); + + REQUIRE(ptr[0x0000] == 19); + REQUIRE(ptr[0x3fff] == 12); +} From 621f3f5f47bf9619148cc0ab7ed315e05abf79d7 Mon Sep 17 00:00:00 2001 From: Markus Wick <markus@selfnet.de> Date: Sun, 19 Jan 2020 01:49:30 +0100 Subject: [PATCH 04/11] core: Make use of fastmem --- externals/dynarmic | 2 +- src/common/page_table.h | 2 ++ src/core/arm/dynarmic/arm_dynarmic_32.cpp | 1 + src/core/arm/dynarmic/arm_dynarmic_64.cpp | 4 ++++ src/core/device_memory.cpp | 2 +- src/core/device_memory.h | 17 ++++++++++------- src/core/memory.cpp | 12 ++++++++++++ 7 files changed, 31 insertions(+), 9 deletions(-) diff --git a/externals/dynarmic b/externals/dynarmic index 828959caed..0c12614d1a 160000 --- a/externals/dynarmic +++ b/externals/dynarmic @@ -1 +1 @@ -Subproject commit 828959caedfac2d456a0c877fda4612e35fffc03 +Subproject commit 0c12614d1a7a72d778609920dde96a4c63074ece diff --git a/src/common/page_table.h b/src/common/page_table.h index e92b66b2b0..8267e8b4d8 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h @@ -111,6 +111,8 @@ struct PageTable { VirtualBuffer<u64> backing_addr; size_t current_address_space_width_in_bits; + + u8* fastmem_arena; }; } // namespace Common diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index cea7f0fb13..fb128f7358 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -128,6 +128,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* if (page_table) { config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( page_table->pointers.data()); + config.fastmem_pointer = page_table->fastmem_arena; } config.absolute_offset_page_table = true; config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 63193dcb14..b0ac8cf8ac 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -160,6 +160,10 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* config.absolute_offset_page_table = true; config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; config.only_detect_misalignment_via_page_table_on_page_boundary = true; + + config.fastmem_pointer = page_table->fastmem_arena; + config.fastmem_address_space_bits = address_space_bits; + config.silently_mirror_fastmem = false; } // Multi-process state diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp index 0c4b440ed7..f19c0515ff 100644 --- a/src/core/device_memory.cpp +++ b/src/core/device_memory.cpp @@ -6,7 +6,7 @@ namespace Core { -DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size} {} +DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size, 1ULL << 39} {} DeviceMemory::~DeviceMemory() = default; } // namespace Core diff --git a/src/core/device_memory.h b/src/core/device_memory.h index 5b1ae28f3d..c4d17705f1 100644 --- a/src/core/device_memory.h +++ b/src/core/device_memory.h @@ -5,7 +5,7 @@ #pragma once #include "common/common_types.h" -#include "common/virtual_buffer.h" +#include "common/host_memory.h" namespace Core { @@ -21,27 +21,30 @@ enum : u64 { }; }; // namespace DramMemoryMap -class DeviceMemory : NonCopyable { +class DeviceMemory { public: explicit DeviceMemory(); ~DeviceMemory(); + DeviceMemory& operator=(const DeviceMemory&) = delete; + DeviceMemory(const DeviceMemory&) = delete; + template <typename T> PAddr GetPhysicalAddr(const T* ptr) const { - return (reinterpret_cast<uintptr_t>(ptr) - reinterpret_cast<uintptr_t>(buffer.data())) + + return (reinterpret_cast<uintptr_t>(ptr) - + reinterpret_cast<uintptr_t>(buffer.BackingBasePointer())) + DramMemoryMap::Base; } u8* GetPointer(PAddr addr) { - return buffer.data() + (addr - DramMemoryMap::Base); + return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base); } const u8* GetPointer(PAddr addr) const { - return buffer.data() + (addr - DramMemoryMap::Base); + return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base); } -private: - Common::VirtualBuffer<u8> buffer; + Common::HostMemory buffer; }; } // namespace Core diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9857278f66..79468e4dc7 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -12,6 +12,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/page_table.h" +#include "common/settings.h" #include "common/swap.h" #include "core/arm/arm_interface.h" #include "core/core.h" @@ -32,6 +33,7 @@ struct Memory::Impl { void SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) { current_page_table = &process.PageTable().PageTableImpl(); + current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); @@ -41,13 +43,19 @@ struct Memory::Impl { void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); + ASSERT_MSG(target >= DramMemoryMap::Base && target < DramMemoryMap::End, + "Out of bounds target: {:016X}", target); MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); + + system.DeviceMemory().buffer.Map(base, target - DramMemoryMap::Base, size); } void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); + + system.DeviceMemory().buffer.Unmap(base, size); } bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const { @@ -466,6 +474,10 @@ struct Memory::Impl { if (vaddr == 0) { return; } + + const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; + system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); + // Iterate over a contiguous CPU address space, which corresponds to the specified GPU // address space, marking the region as un/cached. The region is marked un/cached at a // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size From c4609c92eea30558473f02082733c7e59c2d2013 Mon Sep 17 00:00:00 2001 From: Markus Wick <markus@selfnet.de> Date: Sat, 5 Jun 2021 11:47:08 +0200 Subject: [PATCH 05/11] common/host_memory: Optimize for huge tables. In theory, if we have 2 MB continously mapped, this should save one layer of TLB. Let's make it at least more likely by aligning the memory. --- src/common/host_memory.cpp | 29 +++++++++++++++++++---------- src/common/host_memory.h | 6 +++++- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index eb50fbd9f8..8a328f916d 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -27,6 +27,7 @@ #include <mutex> +#include "common/alignment.h" #include "common/assert.h" #include "common/host_memory.h" #include "common/logging/log.h" @@ -35,6 +36,7 @@ namespace Common { constexpr size_t PageAlignment = 0x1000; +constexpr size_t HugePageSize = 0x200000; #ifdef _WIN32 @@ -385,9 +387,16 @@ private: #endif -HostMemory::HostMemory(size_t backing_size, size_t virtual_size) - : impl{std::make_unique<HostMemory::Impl>(backing_size, virtual_size)}, - backing_base{impl->backing_base}, virtual_base{impl->virtual_base} {} +HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_) + : backing_size(backing_size_), + virtual_size(virtual_size_), impl{std::make_unique<HostMemory::Impl>( + AlignUp(backing_size, PageAlignment), + AlignUp(virtual_size, PageAlignment) + 3 * HugePageSize)}, + backing_base{impl->backing_base}, virtual_base{impl->virtual_base} { + virtual_base += 2 * HugePageSize - 1; + virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1); + virtual_base_offset = virtual_base - impl->virtual_base; +} HostMemory::~HostMemory() = default; @@ -399,32 +408,32 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) { ASSERT(virtual_offset % PageAlignment == 0); ASSERT(host_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); - ASSERT(virtual_offset + length <= impl->virtual_size); - ASSERT(host_offset + length <= impl->backing_size); + ASSERT(virtual_offset + length <= virtual_size); + ASSERT(host_offset + length <= backing_size); if (length == 0) { return; } - impl->Map(virtual_offset, host_offset, length); + impl->Map(virtual_offset + virtual_base_offset, host_offset, length); } void HostMemory::Unmap(size_t virtual_offset, size_t length) { ASSERT(virtual_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); - ASSERT(virtual_offset + length <= impl->virtual_size); + ASSERT(virtual_offset + length <= virtual_size); if (length == 0) { return; } - impl->Unmap(virtual_offset, length); + impl->Unmap(virtual_offset + virtual_base_offset, length); } void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) { ASSERT(virtual_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); - ASSERT(virtual_offset + length <= impl->virtual_size); + ASSERT(virtual_offset + length <= virtual_size); if (length == 0) { return; } - impl->Protect(virtual_offset, length, read, write); + impl->Protect(virtual_offset + virtual_base_offset, length, read, write); } } // namespace Common diff --git a/src/common/host_memory.h b/src/common/host_memory.h index 98005df7a2..eaa7d18ab5 100644 --- a/src/common/host_memory.h +++ b/src/common/host_memory.h @@ -15,7 +15,7 @@ namespace Common { */ class HostMemory { public: - explicit HostMemory(size_t backing_size, size_t virtual_size); + explicit HostMemory(size_t backing_size_, size_t virtual_size_); ~HostMemory(); /** @@ -52,11 +52,15 @@ public: } private: + size_t backing_size{}; + size_t virtual_size{}; + // Low level handler for the platform dependent memory routines class Impl; std::unique_ptr<Impl> impl; u8* backing_base{}; u8* virtual_base{}; + size_t virtual_base_offset{}; }; } // namespace Common From 5ba28325b262d44fcd7721aa00074955bd794015 Mon Sep 17 00:00:00 2001 From: FernandoS27 <fsahmkow27@gmail.com> Date: Sun, 6 Jun 2021 09:57:24 +0200 Subject: [PATCH 06/11] General: Add settings for fastmem and disabling adress space check. --- src/common/settings.cpp | 8 ++++++++ src/common/settings.h | 4 ++++ src/core/arm/dynarmic/arm_dynarmic_32.cpp | 5 ++++- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 8 +++++++- src/core/memory.cpp | 14 ++++++++++---- src/yuzu/configuration/config.cpp | 7 +++++++ src/yuzu/configuration/configure_cpu.cpp | 9 +++++++++ src/yuzu/configuration/configure_cpu.h | 1 + src/yuzu/configuration/configure_cpu.ui | 12 ++++++++++++ src/yuzu/configuration/configure_cpu_debug.cpp | 3 +++ src/yuzu/configuration/configure_cpu_debug.ui | 14 ++++++++++++++ src/yuzu_cmd/default_ini.h | 4 ++++ 12 files changed, 83 insertions(+), 6 deletions(-) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bcb4e4be1a..360e878d60 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -90,6 +90,13 @@ bool IsGPULevelHigh() { values.gpu_accuracy.GetValue() == GPUAccuracy::High; } +bool IsFastmemEnabled() { + if (values.cpu_accuracy.GetValue() == CPUAccuracy::DebugMode) { + return values.cpuopt_fastmem; + } + return true; +} + float Volume() { if (values.audio_muted) { return 0.0f; @@ -115,6 +122,7 @@ void RestoreGlobalState(bool is_powered_on) { values.cpuopt_unsafe_unfuse_fma.SetGlobal(true); values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true); values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); + values.cpuopt_unsafe_fastmem_check.SetGlobal(true); // Renderer values.renderer_backend.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 48085b9a95..1af8c5ac2e 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -125,10 +125,12 @@ struct Values { bool cpuopt_const_prop; bool cpuopt_misc_ir; bool cpuopt_reduce_misalign_checks; + bool cpuopt_fastmem; Setting<bool> cpuopt_unsafe_unfuse_fma; Setting<bool> cpuopt_unsafe_reduce_fp_error; Setting<bool> cpuopt_unsafe_inaccurate_nan; + Setting<bool> cpuopt_unsafe_fastmem_check; // Renderer Setting<RendererBackend> renderer_backend; @@ -249,6 +251,8 @@ void SetConfiguringGlobal(bool is_global); bool IsGPULevelExtreme(); bool IsGPULevelHigh(); +bool IsFastmemEnabled(); + float Volume(); std::string GetTimeZoneString(); diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index fb128f7358..c8f6dc7653 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -144,7 +144,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* // Code cache size config.code_cache_size = 512 * 1024 * 1024; - config.far_code_offset = 256 * 1024 * 1024; + config.far_code_offset = 400 * 1024 * 1024; // Safe optimizations if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { @@ -172,6 +172,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_reduce_misalign_checks) { config.only_detect_misalignment_via_page_table_on_page_boundary = false; } + if (!Settings::values.cpuopt_fastmem) { + config.fastmem_pointer = nullptr; + } } // Unsafe optimizations diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index b0ac8cf8ac..ba524cd058 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -185,7 +185,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* // Code cache size config.code_cache_size = 512 * 1024 * 1024; - config.far_code_offset = 256 * 1024 * 1024; + config.far_code_offset = 400 * 1024 * 1024; // Safe optimizations if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { @@ -213,6 +213,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_reduce_misalign_checks) { config.only_detect_misalignment_via_page_table_on_page_boundary = false; } + if (!Settings::values.cpuopt_fastmem) { + config.fastmem_pointer = nullptr; + } } // Unsafe optimizations @@ -227,6 +230,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } + if (Settings::values.cpuopt_unsafe_fastmem_check.GetValue()) { + config.fastmem_address_space_bits = 64; + } } return std::make_shared<Dynarmic::A64::Jit>(config); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 79468e4dc7..f285c6f639 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -47,7 +47,9 @@ struct Memory::Impl { "Out of bounds target: {:016X}", target); MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); - system.DeviceMemory().buffer.Map(base, target - DramMemoryMap::Base, size); + if (Settings::IsFastmemEnabled()) { + system.DeviceMemory().buffer.Map(base, target - DramMemoryMap::Base, size); + } } void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { @@ -55,7 +57,9 @@ struct Memory::Impl { ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); - system.DeviceMemory().buffer.Unmap(base, size); + if (Settings::IsFastmemEnabled()) { + system.DeviceMemory().buffer.Unmap(base, size); + } } bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const { @@ -475,8 +479,10 @@ struct Memory::Impl { return; } - const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; - system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); + if (Settings::IsFastmemEnabled()) { + const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; + system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); + } // Iterate over a contiguous CPU address space, which corresponds to the specified GPU // address space, marking the region as un/cached. The region is marked un/cached at a diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index e9d4bef607..a59b36e138 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -756,6 +756,8 @@ void Config::ReadCpuValues() { QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true); ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan, QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true); + ReadSettingGlobal(Settings::values.cpuopt_unsafe_fastmem_check, + QStringLiteral("cpuopt_unsafe_fastmem_check"), true); if (global) { Settings::values.cpuopt_page_tables = @@ -774,6 +776,8 @@ void Config::ReadCpuValues() { ReadSetting(QStringLiteral("cpuopt_misc_ir"), true).toBool(); Settings::values.cpuopt_reduce_misalign_checks = ReadSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), true).toBool(); + Settings::values.cpuopt_fastmem = + ReadSetting(QStringLiteral("cpuopt_fastmem"), true).toBool(); } qt_config->endGroup(); @@ -1332,6 +1336,8 @@ void Config::SaveCpuValues() { Settings::values.cpuopt_unsafe_reduce_fp_error, true); WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), Settings::values.cpuopt_unsafe_inaccurate_nan, true); + WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_fastmem_check"), + Settings::values.cpuopt_unsafe_fastmem_check, true); if (global) { WriteSetting(QStringLiteral("cpuopt_page_tables"), Settings::values.cpuopt_page_tables, @@ -1348,6 +1354,7 @@ void Config::SaveCpuValues() { WriteSetting(QStringLiteral("cpuopt_misc_ir"), Settings::values.cpuopt_misc_ir, true); WriteSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), Settings::values.cpuopt_reduce_misalign_checks, true); + WriteSetting(QStringLiteral("cpuopt_fastmem"), Settings::values.cpuopt_fastmem, true); } qt_config->endGroup(); diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index 525c42ff0d..22219cbada 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -35,12 +35,15 @@ void ConfigureCpu::SetConfiguration() { ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock); ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); + ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock); ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); ui->cpuopt_unsafe_reduce_fp_error->setChecked( Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()); ui->cpuopt_unsafe_inaccurate_nan->setChecked( Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); + ui->cpuopt_unsafe_fastmem_check->setChecked( + Settings::values.cpuopt_unsafe_fastmem_check.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue())); @@ -84,6 +87,9 @@ void ConfigureCpu::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan, ui->cpuopt_unsafe_inaccurate_nan, cpuopt_unsafe_inaccurate_nan); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check, + ui->cpuopt_unsafe_fastmem_check, + cpuopt_unsafe_fastmem_check); if (Settings::IsConfiguringGlobal()) { // Guard if during game and set to game-specific value @@ -134,4 +140,7 @@ void ConfigureCpu::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan, Settings::values.cpuopt_unsafe_inaccurate_nan, cpuopt_unsafe_inaccurate_nan); + ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check, + Settings::values.cpuopt_unsafe_fastmem_check, + cpuopt_unsafe_fastmem_check); } diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h index 8e2eeb7a68..57ff2772a6 100644 --- a/src/yuzu/configuration/configure_cpu.h +++ b/src/yuzu/configuration/configure_cpu.h @@ -41,4 +41,5 @@ private: ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma; ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error; ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; + ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check; }; diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index 99b5736405..31ef9e3f54 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -123,6 +123,18 @@ </property> </widget> </item> + <item> + <widget class="QCheckBox" name="cpuopt_unsafe_fastmem_check"> + <property name="toolTip"> + <string> + <div>This option improves speed by eliminating a safety check before every memory read/write in guest. Disabling it may allow a game to read/write the emulator's memory.</div> + </string> + </property> + <property name="text"> + <string>Disable address space checks</string> + </property> + </widget> + </item> </layout> </widget> </item> diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp index c925c023c4..e25c52baf4 100644 --- a/src/yuzu/configuration/configure_cpu_debug.cpp +++ b/src/yuzu/configuration/configure_cpu_debug.cpp @@ -39,6 +39,8 @@ void ConfigureCpuDebug::SetConfiguration() { ui->cpuopt_misc_ir->setChecked(Settings::values.cpuopt_misc_ir); ui->cpuopt_reduce_misalign_checks->setEnabled(runtime_lock); ui->cpuopt_reduce_misalign_checks->setChecked(Settings::values.cpuopt_reduce_misalign_checks); + ui->cpuopt_fastmem->setEnabled(runtime_lock); + ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem); } void ConfigureCpuDebug::ApplyConfiguration() { @@ -50,6 +52,7 @@ void ConfigureCpuDebug::ApplyConfiguration() { Settings::values.cpuopt_const_prop = ui->cpuopt_const_prop->isChecked(); Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked(); Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked(); + Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked(); } void ConfigureCpuDebug::changeEvent(QEvent* event) { diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui index a90dc64fed..11ee19a128 100644 --- a/src/yuzu/configuration/configure_cpu_debug.ui +++ b/src/yuzu/configuration/configure_cpu_debug.ui @@ -139,6 +139,20 @@ </property> </widget> </item> + <item> + <widget class="QCheckBox" name="cpuopt_fastmem"> + <property name="text"> + <string>Enable Host MMU Emulation</string> + </property> + <property name="toolTip"> + <string> + <div style="white-space: nowrap">This optimization speeds up memory accesses by the guest program.</div> + <div style="white-space: nowrap">Enabling it causes guest memory reads/writes to be done directly into memory and make use of Host's MMU.</div> + <div style="white-space: nowrap">Disabling this forces all memory accesses to use Software MMU Emulation.</div> + </string> + </property> + </widget> + </item> </layout> </widget> </item> diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 8ce2967ac2..f48d935a1f 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -150,6 +150,10 @@ cpuopt_misc_ir = # 0: Disabled, 1 (default): Enabled cpuopt_reduce_misalign_checks = +# Enable Host MMU Emulation (faster guest memory access) +# 0: Disabled, 1 (default): Enabled +cpuopt_fastmem = + [Renderer] # Which backend API to use. # 0 (default): OpenGL, 1: Vulkan From ee67460ff0e12a1603431d86fe3919a24b3858fb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sun, 6 Jun 2021 20:53:26 -0300 Subject: [PATCH 07/11] host_memory: Support staged VirtualProtect calls --- src/common/host_memory.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 8a328f916d..c6d65aab94 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -110,9 +110,18 @@ public: } else { UNIMPLEMENTED_MSG("Protection flag combination read={} write={}", read, write); } - DWORD old_flags{}; - if (!VirtualProtect(virtual_base + virtual_offset, length, new_flags, &old_flags)) { - LOG_CRITICAL(HW_Memory, "Failed to change virtual memory protect rules"); + const size_t virtual_end = virtual_offset + length; + + std::lock_guard lock{placeholder_mutex}; + auto [it, end] = placeholders.equal_range({virtual_offset, virtual_end}); + while (it != end) { + const size_t offset = std::max(it->lower(), virtual_offset); + const size_t protect_length = std::min(it->upper(), virtual_end) - offset; + DWORD old_flags{}; + if (!VirtualProtect(virtual_base + offset, protect_length, new_flags, &old_flags)) { + LOG_CRITICAL(HW_Memory, "Failed to change virtual memory protect rules"); + } + ++it; } } From 7b0d8bd1fbfd7d8fe4d939cb3b8649a29f61655c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sun, 6 Jun 2021 20:58:57 -0300 Subject: [PATCH 08/11] rasterizer: Update pages in batches --- src/video_core/rasterizer_accelerated.cpp | 56 +++++++++++++++++------ 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 6decd25462..4c95247022 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <atomic> + #include "common/assert.h" #include "common/common_types.h" #include "common/div_ceil.h" @@ -10,35 +12,59 @@ namespace VideoCore { -RasterizerAccelerated::RasterizerAccelerated(Core::Memory::Memory& cpu_memory_) - : cpu_memory{cpu_memory_} {} +using namespace Core::Memory; + +RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : cpu_memory{cpu_memory_} {} RasterizerAccelerated::~RasterizerAccelerated() = default; void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { - const auto page_end = Common::DivCeil(addr + size, Core::Memory::PAGE_SIZE); - for (auto page = addr >> Core::Memory::PAGE_BITS; page != page_end; ++page) { - auto& count = cached_pages.at(page >> 2).Count(page); + u64 uncache_begin = 0; + u64 cache_begin = 0; + u64 uncache_bytes = 0; + u64 cache_bytes = 0; + + std::atomic_thread_fence(std::memory_order_acquire); + const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); + for (u64 page = addr >> PAGE_BITS; page != page_end; ++page) { + std::atomic_uint16_t& count = cached_pages.at(page >> 2).Count(page); if (delta > 0) { - ASSERT_MSG(count < UINT16_MAX, "Count may overflow!"); + ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); } else if (delta < 0) { - ASSERT_MSG(count > 0, "Count may underflow!"); + ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); } else { - ASSERT_MSG(true, "Delta must be non-zero!"); + ASSERT_MSG(false, "Delta must be non-zero!"); } // Adds or subtracts 1, as count is a unsigned 8-bit value - count += static_cast<u16>(delta); + count.fetch_add(static_cast<u16>(delta), std::memory_order_release); // Assume delta is either -1 or 1 - if (count == 0) { - cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS, - Core::Memory::PAGE_SIZE, false); - } else if (count == 1 && delta > 0) { - cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS, - Core::Memory::PAGE_SIZE, true); + if (count.load(std::memory_order::relaxed) == 0) { + if (uncache_bytes == 0) { + uncache_begin = page; + } + uncache_bytes += PAGE_SIZE; + } else if (uncache_bytes > 0) { + cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false); + uncache_bytes = 0; } + if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { + if (cache_bytes == 0) { + cache_begin = page; + } + cache_bytes += PAGE_SIZE; + } else if (cache_bytes > 0) { + cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true); + cache_bytes = 0; + } + } + if (uncache_bytes > 0) { + cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false); + } + if (cache_bytes > 0) { + cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true); } } From 588ab44470e65d4e2ec010aa48bb5f430d301f81 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow <fsahmkow27@gmail.com> Date: Mon, 7 Jun 2021 02:04:35 +0200 Subject: [PATCH 09/11] GPUTHread: Remove async reads from Normal Accuracy. --- src/video_core/gpu_thread.cpp | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index cd1fbb9bff..46f642b197 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -99,25 +99,13 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { PushCommand(FlushRegionCommand(addr, size)); return; } - - // Asynchronous GPU mode - switch (Settings::values.gpu_accuracy.GetValue()) { - case Settings::GPUAccuracy::Normal: - PushCommand(FlushRegionCommand(addr, size)); - break; - case Settings::GPUAccuracy::High: - // TODO(bunnei): Is this right? Preserving existing behavior for now - break; - case Settings::GPUAccuracy::Extreme: { - auto& gpu = system.GPU(); - u64 fence = gpu.RequestFlush(addr, size); - PushCommand(GPUTickCommand(), true); - ASSERT(fence <= gpu.CurrentFlushRequestFence()); - break; - } - default: - UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue()); + if (!Settings::IsGPULevelExtreme()) { + return; } + auto& gpu = system.GPU(); + u64 fence = gpu.RequestFlush(addr, size); + PushCommand(GPUTickCommand(), true); + ASSERT(fence <= gpu.CurrentFlushRequestFence()); } void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { From f332d4a9b548e9c7e18c245fd3b90ffc5a94b943 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Tue, 8 Jun 2021 02:14:12 -0300 Subject: [PATCH 10/11] common/host_shader: Load Windows 10 functions dynamically Workaround old headers and libraries shipped on MinGW. --- src/common/host_memory.cpp | 117 ++++++++++++++++++++++++++++--------- 1 file changed, 88 insertions(+), 29 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index c6d65aab94..9ae384f01c 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -1,18 +1,10 @@ #ifdef _WIN32 -#ifdef _WIN32_WINNT -#undef _WIN32_WINNT -#endif -#define _WIN32_WINNT 0x0A00 // Windows 10 - -#include <windows.h> - -#include <boost/icl/separate_interval_set.hpp> - #include <iterator> #include <unordered_map> - -#pragma comment(lib, "mincore.lib") +#include <boost/icl/separate_interval_set.hpp> +#include <windows.h> +#include "common/dynamic_library.h" #elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv @@ -40,38 +32,99 @@ constexpr size_t HugePageSize = 0x200000; #ifdef _WIN32 +// Manually imported for MinGW compatibility +#ifndef MEM_RESERVE_PLACEHOLDER +#define MEM_RESERVE_PLACEHOLDER 0x0004000 +#endif +#ifndef MEM_REPLACE_PLACEHOLDER +#define MEM_REPLACE_PLACEHOLDER 0x00004000 +#endif +#ifndef MEM_COALESCE_PLACEHOLDERS +#define MEM_COALESCE_PLACEHOLDERS 0x00000001 +#endif +#ifndef MEM_PRESERVE_PLACEHOLDER +#define MEM_PRESERVE_PLACEHOLDER 0x00000002 +#endif + +using PFN_CreateFileMapping2 = _Ret_maybenull_ HANDLE(WINAPI*)( + _In_ HANDLE File, _In_opt_ SECURITY_ATTRIBUTES* SecurityAttributes, _In_ ULONG DesiredAccess, + _In_ ULONG PageProtection, _In_ ULONG AllocationAttributes, _In_ ULONG64 MaximumSize, + _In_opt_ PCWSTR Name, + _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, + _In_ ULONG ParameterCount); + +using PFN_VirtualAlloc2 = _Ret_maybenull_ PVOID(WINAPI*)( + _In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress, _In_ SIZE_T Size, + _In_ ULONG AllocationType, _In_ ULONG PageProtection, + _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, + _In_ ULONG ParameterCount); + +using PFN_MapViewOfFile3 = _Ret_maybenull_ PVOID(WINAPI*)( + _In_ HANDLE FileMapping, _In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress, + _In_ ULONG64 Offset, _In_ SIZE_T ViewSize, _In_ ULONG AllocationType, _In_ ULONG PageProtection, + _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, + _In_ ULONG ParameterCount); + +using PFN_UnmapViewOfFile2 = BOOL(WINAPI*)(_In_ HANDLE Process, _In_ PVOID BaseAddress, + _In_ ULONG UnmapFlags); + +template <typename T> +static void GetFuncAddress(Common::DynamicLibrary& dll, const char* name, T& pfn) { + if (!dll.GetSymbol(name, &pfn)) { + LOG_CRITICAL(HW_Memory, "Failed to load {}", name); + throw std::bad_alloc{}; + } +} + class HostMemory::Impl { public: explicit Impl(size_t backing_size_, size_t virtual_size_) - : backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()} { + : backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()}, + kernelbase_dll("Kernelbase") { + if (!kernelbase_dll.IsOpen()) { + LOG_CRITICAL(HW_Memory, "Failed to load Kernelbase.dll"); + throw std::bad_alloc{}; + } + GetFuncAddress(kernelbase_dll, "CreateFileMapping2", pfn_CreateFileMapping2); + GetFuncAddress(kernelbase_dll, "VirtualAlloc2", pfn_VirtualAlloc2); + GetFuncAddress(kernelbase_dll, "MapViewOfFile3", pfn_MapViewOfFile3); + GetFuncAddress(kernelbase_dll, "UnmapViewOfFile2", pfn_UnmapViewOfFile2); + // Allocate backing file map backing_handle = - CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ, - PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0); + pfn_CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ, + PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0); if (!backing_handle) { + LOG_CRITICAL(HW_Memory, "Failed to allocate {} MiB of backing memory", + backing_size >> 20); throw std::bad_alloc{}; } // Allocate a virtual memory for the backing file map as placeholder - backing_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, backing_size, - MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, - PAGE_NOACCESS, nullptr, 0)); + backing_base = static_cast<u8*>(pfn_VirtualAlloc2(process, nullptr, backing_size, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, nullptr, 0)); if (!backing_base) { Release(); + LOG_CRITICAL(HW_Memory, "Failed to reserve {} MiB of virtual memory", + backing_size >> 20); throw std::bad_alloc{}; } // Map backing placeholder - void* const ret = MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size, - MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0); + void* const ret = pfn_MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size, + MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0); if (ret != backing_base) { Release(); + LOG_CRITICAL(HW_Memory, "Failed to map {} MiB of virtual memory", backing_size >> 20); throw std::bad_alloc{}; } // Allocate virtual address placeholder - virtual_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, virtual_size, - MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, - PAGE_NOACCESS, nullptr, 0)); + virtual_base = static_cast<u8*>(pfn_VirtualAlloc2(process, nullptr, virtual_size, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, nullptr, 0)); if (!virtual_base) { Release(); + LOG_CRITICAL(HW_Memory, "Failed to reserve {} GiB of virtual memory", + virtual_size >> 30); throw std::bad_alloc{}; } } @@ -136,8 +189,8 @@ private: void Release() { if (!placeholders.empty()) { for (const auto& placeholder : placeholders) { - if (!UnmapViewOfFile2(process, virtual_base + placeholder.lower(), - MEM_PRESERVE_PLACEHOLDER)) { + if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder.lower(), + MEM_PRESERVE_PLACEHOLDER)) { LOG_CRITICAL(HW_Memory, "Failed to unmap virtual memory placeholder"); } } @@ -149,7 +202,7 @@ private: } } if (backing_base) { - if (!UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) { + if (!pfn_UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) { LOG_CRITICAL(HW_Memory, "Failed to unmap backing memory placeholder"); } if (!VirtualFreeEx(process, backing_base, 0, MEM_RELEASE)) { @@ -184,8 +237,8 @@ private: const bool split_left = unmap_begin > placeholder_begin; const bool split_right = unmap_end < placeholder_end; - if (!UnmapViewOfFile2(process, virtual_base + placeholder_begin, - MEM_PRESERVE_PLACEHOLDER)) { + if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder_begin, + MEM_PRESERVE_PLACEHOLDER)) { LOG_CRITICAL(HW_Memory, "Failed to unmap placeholder"); } // If we have to remap memory regions due to partial unmaps, we are in a data race as @@ -235,8 +288,8 @@ private: } void MapView(size_t virtual_offset, size_t host_offset, size_t length) { - if (!MapViewOfFile3(backing_handle, process, virtual_base + virtual_offset, host_offset, - length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) { + if (!pfn_MapViewOfFile3(backing_handle, process, virtual_base + virtual_offset, host_offset, + length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) { LOG_CRITICAL(HW_Memory, "Failed to map placeholder"); } } @@ -279,6 +332,12 @@ private: HANDLE process{}; ///< Current process handle HANDLE backing_handle{}; ///< File based backing memory + DynamicLibrary kernelbase_dll; + PFN_CreateFileMapping2 pfn_CreateFileMapping2{}; + PFN_VirtualAlloc2 pfn_VirtualAlloc2{}; + PFN_MapViewOfFile3 pfn_MapViewOfFile3{}; + PFN_UnmapViewOfFile2 pfn_UnmapViewOfFile2{}; + std::mutex placeholder_mutex; ///< Mutex for placeholders boost::icl::separate_interval_set<size_t> placeholders; ///< Mapped placeholders std::unordered_map<size_t, size_t> placeholder_host_pointers; ///< Placeholder backing offset From 7f85abb28120fbb57bb813b828ee42f2a2031990 Mon Sep 17 00:00:00 2001 From: Markus Wick <markus@selfnet.de> Date: Fri, 11 Jun 2021 11:47:23 +0200 Subject: [PATCH 11/11] common/host_memory: Implement a fallback if fastmem fails. This falls back to the old approach of using a virtual buffer. Windows is untested, but this build should fix support for Windows < 10 v1803. However without fastmem support at all. --- src/common/host_memory.cpp | 59 +++++++++++++++++++++++++++++--------- src/common/host_memory.h | 4 +++ 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 9ae384f01c..8bd70abc79 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -449,21 +449,52 @@ private: int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create }; -#else // ^^^ Linux ^^^ +#else // ^^^ Linux ^^^ vvv Generic vvv -#error Please implement the host memory for your platform +class HostMemory::Impl { +public: + explicit Impl(size_t /*backing_size */, size_t /* virtual_size */) { + // This is just a place holder. + // Please implement fastmem in a propper way on your platform. + throw std::bad_alloc{}; + } -#endif + void Map(size_t virtual_offset, size_t host_offset, size_t length) {} + + void Unmap(size_t virtual_offset, size_t length) {} + + void Protect(size_t virtual_offset, size_t length, bool read, bool write) {} + + u8* backing_base{nullptr}; + u8* virtual_base{nullptr}; +}; + +#endif // ^^^ Generic ^^^ HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_) - : backing_size(backing_size_), - virtual_size(virtual_size_), impl{std::make_unique<HostMemory::Impl>( - AlignUp(backing_size, PageAlignment), - AlignUp(virtual_size, PageAlignment) + 3 * HugePageSize)}, - backing_base{impl->backing_base}, virtual_base{impl->virtual_base} { - virtual_base += 2 * HugePageSize - 1; - virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1); - virtual_base_offset = virtual_base - impl->virtual_base; + : backing_size(backing_size_), virtual_size(virtual_size_) { + try { + // Try to allocate a fastmem arena. + // The implementation will fail with std::bad_alloc on errors. + impl = std::make_unique<HostMemory::Impl>(AlignUp(backing_size, PageAlignment), + AlignUp(virtual_size, PageAlignment) + + 3 * HugePageSize); + backing_base = impl->backing_base; + virtual_base = impl->virtual_base; + + if (virtual_base) { + virtual_base += 2 * HugePageSize - 1; + virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1); + virtual_base_offset = virtual_base - impl->virtual_base; + } + + } catch (const std::bad_alloc&) { + LOG_CRITICAL(HW_Memory, + "Fastmem unavailable, falling back to VirtualBuffer for memory allocation"); + fallback_buffer = std::make_unique<Common::VirtualBuffer<u8>>(backing_size); + backing_base = fallback_buffer->data(); + virtual_base = nullptr; + } } HostMemory::~HostMemory() = default; @@ -478,7 +509,7 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) { ASSERT(length % PageAlignment == 0); ASSERT(virtual_offset + length <= virtual_size); ASSERT(host_offset + length <= backing_size); - if (length == 0) { + if (length == 0 || !virtual_base || !impl) { return; } impl->Map(virtual_offset + virtual_base_offset, host_offset, length); @@ -488,7 +519,7 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length) { ASSERT(virtual_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); ASSERT(virtual_offset + length <= virtual_size); - if (length == 0) { + if (length == 0 || !virtual_base || !impl) { return; } impl->Unmap(virtual_offset + virtual_base_offset, length); @@ -498,7 +529,7 @@ void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool w ASSERT(virtual_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); ASSERT(virtual_offset + length <= virtual_size); - if (length == 0) { + if (length == 0 || !virtual_base || !impl) { return; } impl->Protect(virtual_offset + virtual_base_offset, length, read, write); diff --git a/src/common/host_memory.h b/src/common/host_memory.h index eaa7d18ab5..9b8326d0fb 100644 --- a/src/common/host_memory.h +++ b/src/common/host_memory.h @@ -6,6 +6,7 @@ #include <memory> #include "common/common_types.h" +#include "common/virtual_buffer.h" namespace Common { @@ -61,6 +62,9 @@ private: u8* backing_base{}; u8* virtual_base{}; size_t virtual_base_offset{}; + + // Fallback if fastmem is not supported on this platform + std::unique_ptr<Common::VirtualBuffer<u8>> fallback_buffer; }; } // namespace Common