From a7837a3791562899bf5e0e98aef851a2f4aaf376 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 5 Jun 2021 06:23:25 -0300
Subject: [PATCH 01/11] common/host_memory: Add interface and Windows
 implementation

---
 src/common/CMakeLists.txt  |   2 +
 src/common/host_memory.cpp | 320 +++++++++++++++++++++++++++++++++++++
 src/common/host_memory.h   |  62 +++++++
 3 files changed, 384 insertions(+)
 create mode 100644 src/common/host_memory.cpp
 create mode 100644 src/common/host_memory.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2d403d471e..97fbdcbf9f 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -131,6 +131,8 @@ add_library(common STATIC
     hash.h
     hex_util.cpp
     hex_util.h
+    host_memory.cpp
+    host_memory.h
     intrusive_red_black_tree.h
     logging/backend.cpp
     logging/backend.h
diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
new file mode 100644
index 0000000000..4f5086e909
--- /dev/null
+++ b/src/common/host_memory.cpp
@@ -0,0 +1,320 @@
+#ifdef __linux__
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#elif defined(_WIN32) // ^^^ Linux ^^^ vvv Windows vvv
+#ifdef _WIN32_WINNT
+#undef _WIN32_WINNT
+#endif
+#define _WIN32_WINNT 0x0A00 // Windows 10
+
+#include <windows.h>
+
+#include <boost/icl/separate_interval_set.hpp>
+
+#include <iterator>
+#include <unordered_map>
+
+#pragma comment(lib, "mincore.lib")
+
+#endif // ^^^ Windows ^^^
+
+#include <mutex>
+
+#include "common/assert.h"
+#include "common/host_memory.h"
+#include "common/logging/log.h"
+
+namespace Common {
+
+constexpr size_t PageAlignment = 0x1000;
+
+#ifdef _WIN32
+
+class HostMemory::Impl {
+public:
+    explicit Impl(size_t backing_size_, size_t virtual_size_)
+        : backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()} {
+        // Allocate backing file map
+        backing_handle =
+            CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ,
+                               PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0);
+        if (!backing_handle) {
+            throw std::bad_alloc{};
+        }
+        // Allocate a virtual memory for the backing file map as placeholder
+        backing_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, backing_size,
+                                                      MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
+                                                      PAGE_NOACCESS, nullptr, 0));
+        if (!backing_base) {
+            Release();
+            throw std::bad_alloc{};
+        }
+        // Map backing placeholder
+        void* const ret = MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size,
+                                         MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0);
+        if (ret != backing_base) {
+            Release();
+            throw std::bad_alloc{};
+        }
+        // Allocate virtual address placeholder
+        virtual_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, virtual_size,
+                                                      MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
+                                                      PAGE_NOACCESS, nullptr, 0));
+        if (!virtual_base) {
+            Release();
+            throw std::bad_alloc{};
+        }
+    }
+
+    ~Impl() {
+        Release();
+    }
+
+    void Map(size_t virtual_offset, size_t host_offset, size_t length) {
+        std::unique_lock lock{placeholder_mutex};
+        if (!IsNiechePlaceholder(virtual_offset, length)) {
+            Split(virtual_offset, length);
+        }
+        ASSERT(placeholders.find({virtual_offset, virtual_offset + length}) == placeholders.end());
+        TrackPlaceholder(virtual_offset, host_offset, length);
+
+        MapView(virtual_offset, host_offset, length);
+    }
+
+    void Unmap(size_t virtual_offset, size_t length) {
+        std::lock_guard lock{placeholder_mutex};
+
+        // Unmap until there are no more placeholders
+        while (UnmapOnePlaceholder(virtual_offset, length)) {
+        }
+    }
+
+    void Protect(size_t virtual_offset, size_t length, bool read, bool write) {
+        DWORD new_flags{};
+        if (read && write) {
+            new_flags = PAGE_READWRITE;
+        } else if (read && !write) {
+            new_flags = PAGE_READONLY;
+        } else if (!read && !write) {
+            new_flags = PAGE_NOACCESS;
+        } else {
+            UNIMPLEMENTED_MSG("Protection flag combination read={} write={}", read, write);
+        }
+        DWORD old_flags{};
+        if (!VirtualProtect(virtual_base + virtual_offset, length, new_flags, &old_flags)) {
+            LOG_CRITICAL(HW_Memory, "Failed to change virtual memory protect rules");
+        }
+    }
+
+    const size_t backing_size; ///< Size of the backing memory in bytes
+    const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
+
+    u8* backing_base{};
+    u8* virtual_base{};
+
+private:
+    /// Release all resources in the object
+    void Release() {
+        if (!placeholders.empty()) {
+            for (const auto& placeholder : placeholders) {
+                if (!UnmapViewOfFile2(process, virtual_base + placeholder.lower(),
+                                      MEM_PRESERVE_PLACEHOLDER)) {
+                    LOG_CRITICAL(HW_Memory, "Failed to unmap virtual memory placeholder");
+                }
+            }
+            Coalesce(0, virtual_size);
+        }
+        if (virtual_base) {
+            if (!VirtualFree(virtual_base, 0, MEM_RELEASE)) {
+                LOG_CRITICAL(HW_Memory, "Failed to free virtual memory");
+            }
+        }
+        if (backing_base) {
+            if (!UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) {
+                LOG_CRITICAL(HW_Memory, "Failed to unmap backing memory placeholder");
+            }
+            if (!VirtualFreeEx(process, backing_base, 0, MEM_RELEASE)) {
+                LOG_CRITICAL(HW_Memory, "Failed to free backing memory");
+            }
+        }
+        if (!CloseHandle(backing_handle)) {
+            LOG_CRITICAL(HW_Memory, "Failed to free backing memory file handle");
+        }
+    }
+
+    /// Unmap one placeholder in the given range (partial unmaps are supported)
+    /// Return true when there are no more placeholders to unmap
+    bool UnmapOnePlaceholder(size_t virtual_offset, size_t length) {
+        const auto it = placeholders.find({virtual_offset, virtual_offset + length});
+        const auto begin = placeholders.begin();
+        const auto end = placeholders.end();
+        if (it == end) {
+            return false;
+        }
+        const size_t placeholder_begin = it->lower();
+        const size_t placeholder_end = it->upper();
+        const size_t unmap_begin = std::max(virtual_offset, placeholder_begin);
+        const size_t unmap_end = std::min(virtual_offset + length, placeholder_end);
+        ASSERT(unmap_begin >= placeholder_begin && unmap_begin < placeholder_end);
+        ASSERT(unmap_end <= placeholder_end && unmap_end > placeholder_begin);
+
+        const auto host_pointer_it = placeholder_host_pointers.find(placeholder_begin);
+        ASSERT(host_pointer_it != placeholder_host_pointers.end());
+        const size_t host_offset = host_pointer_it->second;
+
+        const bool split_left = unmap_begin > placeholder_begin;
+        const bool split_right = unmap_end < placeholder_end;
+
+        if (!UnmapViewOfFile2(process, virtual_base + placeholder_begin,
+                              MEM_PRESERVE_PLACEHOLDER)) {
+            LOG_CRITICAL(HW_Memory, "Failed to unmap placeholder");
+        }
+        // If we have to remap memory regions due to partial unmaps, we are in a data race as
+        // Windows doesn't support remapping memory without unmapping first. Avoid adding any extra
+        // logic within the panic region described below.
+
+        // Panic region, we are in a data race right now
+        if (split_left || split_right) {
+            Split(unmap_begin, unmap_end - unmap_begin);
+        }
+        if (split_left) {
+            MapView(placeholder_begin, host_offset, unmap_begin - placeholder_begin);
+        }
+        if (split_right) {
+            MapView(unmap_end, host_offset + unmap_end - placeholder_begin,
+                    placeholder_end - unmap_end);
+        }
+        // End panic region
+
+        size_t coalesce_begin = unmap_begin;
+        if (!split_left) {
+            // Try to coalesce pages to the left
+            coalesce_begin = it == begin ? 0 : std::prev(it)->upper();
+            if (coalesce_begin != placeholder_begin) {
+                Coalesce(coalesce_begin, unmap_end - coalesce_begin);
+            }
+        }
+        if (!split_right) {
+            // Try to coalesce pages to the right
+            const auto next = std::next(it);
+            const size_t next_begin = next == end ? virtual_size : next->lower();
+            if (placeholder_end != next_begin) {
+                // We can coalesce to the right
+                Coalesce(coalesce_begin, next_begin - coalesce_begin);
+            }
+        }
+        // Remove and reinsert placeholder trackers
+        UntrackPlaceholder(it);
+        if (split_left) {
+            TrackPlaceholder(placeholder_begin, host_offset, unmap_begin - placeholder_begin);
+        }
+        if (split_right) {
+            TrackPlaceholder(unmap_end, host_offset + unmap_end - placeholder_begin,
+                             placeholder_end - unmap_end);
+        }
+        return true;
+    }
+
+    void MapView(size_t virtual_offset, size_t host_offset, size_t length) {
+        if (!MapViewOfFile3(backing_handle, process, virtual_base + virtual_offset, host_offset,
+                            length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) {
+            LOG_CRITICAL(HW_Memory, "Failed to map placeholder");
+        }
+    }
+
+    void Split(size_t virtual_offset, size_t length) {
+        if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length,
+                           MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) {
+            LOG_CRITICAL(HW_Memory, "Failed to split placeholder");
+        }
+    }
+
+    void Coalesce(size_t virtual_offset, size_t length) {
+        if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length,
+                           MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) {
+            LOG_CRITICAL(HW_Memory, "Failed to coalesce placeholders");
+        }
+    }
+
+    void TrackPlaceholder(size_t virtual_offset, size_t host_offset, size_t length) {
+        placeholders.insert({virtual_offset, virtual_offset + length});
+        placeholder_host_pointers.emplace(virtual_offset, host_offset);
+    }
+
+    void UntrackPlaceholder(boost::icl::separate_interval_set<size_t>::iterator it) {
+        placeholders.erase(it);
+        placeholder_host_pointers.erase(it->lower());
+    }
+
+    /// Return true when a given memory region is a "nieche" and the placeholders don't have to be
+    /// splitted.
+    bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const {
+        const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length});
+        if (it != placeholders.end() && it->lower() == virtual_offset + length) {
+            const bool is_root = it == placeholders.begin() && virtual_offset == 0;
+            return is_root || std::prev(it)->upper() == virtual_offset;
+        }
+        return false;
+    }
+
+    HANDLE process{};        ///< Current process handle
+    HANDLE backing_handle{}; ///< File based backing memory
+
+    std::mutex placeholder_mutex;                                 ///< Mutex for placeholders
+    boost::icl::separate_interval_set<size_t> placeholders;       ///< Mapped placeholders
+    std::unordered_map<size_t, size_t> placeholder_host_pointers; ///< Placeholder backing offset
+};
+
+#else
+
+#error Please implement the host memory for your platform
+
+#endif
+
+HostMemory::HostMemory(size_t backing_size, size_t virtual_size)
+    : impl{std::make_unique<HostMemory::Impl>(backing_size, virtual_size)},
+      backing_base{impl->backing_base}, virtual_base{impl->virtual_base} {}
+
+HostMemory::~HostMemory() = default;
+
+HostMemory::HostMemory(HostMemory&&) noexcept = default;
+
+HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default;
+
+void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) {
+    ASSERT(virtual_offset % PageAlignment == 0);
+    ASSERT(host_offset % PageAlignment == 0);
+    ASSERT(length % PageAlignment == 0);
+    ASSERT(virtual_offset + length <= impl->virtual_size);
+    ASSERT(host_offset + length <= impl->backing_size);
+    if (length == 0) {
+        return;
+    }
+    impl->Map(virtual_offset, host_offset, length);
+}
+
+void HostMemory::Unmap(size_t virtual_offset, size_t length) {
+    ASSERT(virtual_offset % PageAlignment == 0);
+    ASSERT(length % PageAlignment == 0);
+    ASSERT(virtual_offset + length <= impl->virtual_size);
+    if (length == 0) {
+        return;
+    }
+    impl->Unmap(virtual_offset, length);
+}
+
+void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) {
+    ASSERT(virtual_offset % PageAlignment == 0);
+    ASSERT(length % PageAlignment == 0);
+    ASSERT(virtual_offset + length <= impl->virtual_size);
+    if (length == 0) {
+        return;
+    }
+    impl->Protect(virtual_offset, length, read, write);
+}
+
+} // namespace Common
diff --git a/src/common/host_memory.h b/src/common/host_memory.h
new file mode 100644
index 0000000000..98005df7a2
--- /dev/null
+++ b/src/common/host_memory.h
@@ -0,0 +1,62 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include "common/common_types.h"
+
+namespace Common {
+
+/**
+ * A low level linear memory buffer, which supports multiple mappings
+ * Its purpose is to rebuild a given sparse memory layout, including mirrors.
+ */
+class HostMemory {
+public:
+    explicit HostMemory(size_t backing_size, size_t virtual_size);
+    ~HostMemory();
+
+    /**
+     * Copy constructors. They shall return a copy of the buffer without the mappings.
+     * TODO: Implement them with COW if needed.
+     */
+    HostMemory(const HostMemory& other) = delete;
+    HostMemory& operator=(const HostMemory& other) = delete;
+
+    /**
+     * Move constructors. They will move the buffer and the mappings to the new object.
+     */
+    HostMemory(HostMemory&& other) noexcept;
+    HostMemory& operator=(HostMemory&& other) noexcept;
+
+    void Map(size_t virtual_offset, size_t host_offset, size_t length);
+
+    void Unmap(size_t virtual_offset, size_t length);
+
+    void Protect(size_t virtual_offset, size_t length, bool read, bool write);
+
+    [[nodiscard]] u8* BackingBasePointer() noexcept {
+        return backing_base;
+    }
+    [[nodiscard]] const u8* BackingBasePointer() const noexcept {
+        return backing_base;
+    }
+
+    [[nodiscard]] u8* VirtualBasePointer() noexcept {
+        return virtual_base;
+    }
+    [[nodiscard]] const u8* VirtualBasePointer() const noexcept {
+        return virtual_base;
+    }
+
+private:
+    // Low level handler for the platform dependent memory routines
+    class Impl;
+    std::unique_ptr<Impl> impl;
+    u8* backing_base{};
+    u8* virtual_base{};
+};
+
+} // namespace Common

From 5105318bbc6843de14f3f949515007d9bf76aa7b Mon Sep 17 00:00:00 2001
From: Markus Wick <markus@selfnet.de>
Date: Sat, 5 Jun 2021 10:07:26 +0200
Subject: [PATCH 02/11] common/host_memory: Add Linux implementation

---
 src/common/host_memory.cpp | 130 ++++++++++++++++++++++++++++++++++---
 1 file changed, 120 insertions(+), 10 deletions(-)

diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index 4f5086e909..eb50fbd9f8 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -1,11 +1,5 @@
-#ifdef __linux__
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#elif defined(_WIN32) // ^^^ Linux ^^^ vvv Windows vvv
+#ifdef _WIN32
+
 #ifdef _WIN32_WINNT
 #undef _WIN32_WINNT
 #endif
@@ -20,13 +14,23 @@
 
 #pragma comment(lib, "mincore.lib")
 
-#endif // ^^^ Windows ^^^
+#elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#endif // ^^^ Linux ^^^
 
 #include <mutex>
 
 #include "common/assert.h"
 #include "common/host_memory.h"
 #include "common/logging/log.h"
+#include "common/scope_exit.h"
 
 namespace Common {
 
@@ -269,7 +273,113 @@ private:
     std::unordered_map<size_t, size_t> placeholder_host_pointers; ///< Placeholder backing offset
 };
 
-#else
+#elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv
+
+class HostMemory::Impl {
+public:
+    explicit Impl(size_t backing_size_, size_t virtual_size_)
+        : backing_size{backing_size_}, virtual_size{virtual_size_} {
+        bool good = false;
+        SCOPE_EXIT({
+            if (!good) {
+                Release();
+            }
+        });
+
+        // Backing memory initialization
+        fd = memfd_create("HostMemory", 0);
+        if (fd == -1) {
+            LOG_CRITICAL(HW_Memory, "memfd_create failed: {}", strerror(errno));
+            throw std::bad_alloc{};
+        }
+
+        // Defined to extend the file with zeros
+        int ret = ftruncate(fd, backing_size);
+        if (ret != 0) {
+            LOG_CRITICAL(HW_Memory, "ftruncate failed with {}, are you out-of-memory?",
+                         strerror(errno));
+            throw std::bad_alloc{};
+        }
+
+        backing_base = static_cast<u8*>(
+            mmap(nullptr, backing_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
+        if (backing_base == MAP_FAILED) {
+            LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
+            throw std::bad_alloc{};
+        }
+
+        // Virtual memory initialization
+        virtual_base = static_cast<u8*>(
+            mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+        if (virtual_base == MAP_FAILED) {
+            LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
+            throw std::bad_alloc{};
+        }
+
+        good = true;
+    }
+
+    ~Impl() {
+        Release();
+    }
+
+    void Map(size_t virtual_offset, size_t host_offset, size_t length) {
+
+        void* ret = mmap(virtual_base + virtual_offset, length, PROT_READ | PROT_WRITE,
+                         MAP_SHARED | MAP_FIXED, fd, host_offset);
+        ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
+    }
+
+    void Unmap(size_t virtual_offset, size_t length) {
+        // The method name is wrong. We're still talking about the virtual range.
+        // We don't want to unmap, we want to reserve this memory.
+
+        void* ret = mmap(virtual_base + virtual_offset, length, PROT_NONE,
+                         MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+        ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
+    }
+
+    void Protect(size_t virtual_offset, size_t length, bool read, bool write) {
+        int flags = 0;
+        if (read) {
+            flags |= PROT_READ;
+        }
+        if (write) {
+            flags |= PROT_WRITE;
+        }
+        int ret = mprotect(virtual_base + virtual_offset, length, flags);
+        ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
+    }
+
+    const size_t backing_size; ///< Size of the backing memory in bytes
+    const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
+
+    u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)};
+    u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)};
+
+private:
+    /// Release all resources in the object
+    void Release() {
+        if (virtual_base != MAP_FAILED) {
+            int ret = munmap(virtual_base, virtual_size);
+            ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno));
+        }
+
+        if (backing_base != MAP_FAILED) {
+            int ret = munmap(backing_base, backing_size);
+            ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno));
+        }
+
+        if (fd != -1) {
+            int ret = close(fd);
+            ASSERT_MSG(ret == 0, "close failed: {}", strerror(errno));
+        }
+    }
+
+    int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create
+};
+
+#else // ^^^ Linux ^^^
 
 #error Please implement the host memory for your platform
 

From 740edacc8dd03a8dccdd194ffed8e2b5ec490f73 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 5 Jun 2021 04:16:32 -0300
Subject: [PATCH 03/11] tests: Add tests for host memory

---
 src/tests/CMakeLists.txt         |   1 +
 src/tests/common/host_memory.cpp | 183 +++++++++++++++++++++++++++++++
 2 files changed, 184 insertions(+)
 create mode 100644 src/tests/common/host_memory.cpp

diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index d875c4fee1..96bc30cac3 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -2,6 +2,7 @@ add_executable(tests
     common/bit_field.cpp
     common/cityhash.cpp
     common/fibers.cpp
+    common/host_memory.cpp
     common/param_package.cpp
     common/ring_buffer.cpp
     core/core_timing.cpp
diff --git a/src/tests/common/host_memory.cpp b/src/tests/common/host_memory.cpp
new file mode 100644
index 0000000000..e241f8be51
--- /dev/null
+++ b/src/tests/common/host_memory.cpp
@@ -0,0 +1,183 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+
+#include "common/host_memory.h"
+
+using Common::HostMemory;
+
+static constexpr size_t VIRTUAL_SIZE = 1ULL << 39;
+static constexpr size_t BACKING_SIZE = 4ULL * 1024 * 1024 * 1024;
+
+TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") {
+    { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); }
+    { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); }
+}
+
+TEST_CASE("HostMemory: Simple map", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x5000, 0x8000, 0x1000);
+
+    volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
+    data[0] = 50;
+    REQUIRE(data[0] == 50);
+}
+
+TEST_CASE("HostMemory: Simple mirror map", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x5000, 0x3000, 0x2000);
+    mem.Map(0x8000, 0x4000, 0x1000);
+
+    volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000;
+    volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000;
+    mirror_b[0] = 76;
+    REQUIRE(mirror_a[0x1000] == 76);
+}
+
+TEST_CASE("HostMemory: Simple unmap", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x5000, 0x3000, 0x2000);
+
+    volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
+    data[75] = 50;
+    REQUIRE(data[75] == 50);
+
+    mem.Unmap(0x5000, 0x2000);
+}
+
+TEST_CASE("HostMemory: Simple unmap and remap", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x5000, 0x3000, 0x2000);
+
+    volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
+    data[0] = 50;
+    REQUIRE(data[0] == 50);
+
+    mem.Unmap(0x5000, 0x2000);
+
+    mem.Map(0x5000, 0x3000, 0x2000);
+    REQUIRE(data[0] == 50);
+
+    mem.Map(0x7000, 0x2000, 0x5000);
+    REQUIRE(data[0x3000] == 50);
+}
+
+TEST_CASE("HostMemory: Nieche allocation", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x0000, 0, 0x20000);
+    mem.Unmap(0x0000, 0x4000);
+    mem.Map(0x1000, 0, 0x2000);
+    mem.Map(0x3000, 0, 0x1000);
+    mem.Map(0, 0, 0x1000);
+}
+
+TEST_CASE("HostMemory: Full unmap", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x8000, 0, 0x4000);
+    mem.Unmap(0x8000, 0x4000);
+    mem.Map(0x6000, 0, 0x16000);
+}
+
+TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x0000, 0, 0x4000);
+    mem.Unmap(0x2000, 0x4000);
+    mem.Map(0x2000, 0x80000, 0x4000);
+}
+
+TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x8000, 0, 0x4000);
+    mem.Unmap(0x6000, 0x4000);
+    mem.Map(0x8000, 0, 0x2000);
+}
+
+TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x0000, 0, 0x4000);
+    mem.Map(0x4000, 0, 0x1b000);
+    mem.Unmap(0x3000, 0x1c000);
+    mem.Map(0x3000, 0, 0x20000);
+}
+
+TEST_CASE("HostMemory: Unmap between placeholders", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x0000, 0, 0x4000);
+    mem.Map(0x4000, 0, 0x4000);
+    mem.Unmap(0x2000, 0x4000);
+    mem.Map(0x2000, 0, 0x4000);
+}
+
+TEST_CASE("HostMemory: Unmap to origin", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x4000, 0, 0x4000);
+    mem.Map(0x8000, 0, 0x4000);
+    mem.Unmap(0x4000, 0x4000);
+    mem.Map(0, 0, 0x4000);
+    mem.Map(0x4000, 0, 0x4000);
+}
+
+TEST_CASE("HostMemory: Unmap to right", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x4000, 0, 0x4000);
+    mem.Map(0x8000, 0, 0x4000);
+    mem.Unmap(0x8000, 0x4000);
+    mem.Map(0x8000, 0, 0x4000);
+}
+
+TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x4000, 0x10000, 0x4000);
+
+    volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
+    ptr[0x1000] = 17;
+
+    mem.Unmap(0x6000, 0x2000);
+
+    REQUIRE(ptr[0x1000] == 17);
+}
+
+TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x4000, 0x10000, 0x4000);
+
+    volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
+    ptr[0x3000] = 19;
+    ptr[0x3fff] = 12;
+
+    mem.Unmap(0x4000, 0x2000);
+
+    REQUIRE(ptr[0x3000] == 19);
+    REQUIRE(ptr[0x3fff] == 12);
+}
+
+TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x4000, 0x10000, 0x4000);
+
+    volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
+    ptr[0x0000] = 19;
+    ptr[0x3fff] = 12;
+
+    mem.Unmap(0x1000, 0x2000);
+
+    REQUIRE(ptr[0x0000] == 19);
+    REQUIRE(ptr[0x3fff] == 12);
+}
+
+TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") {
+    HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
+    mem.Map(0x4000, 0x10000, 0x2000);
+    mem.Map(0x6000, 0x20000, 0x2000);
+
+    volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
+    ptr[0x0000] = 19;
+    ptr[0x3fff] = 12;
+
+    mem.Unmap(0x5000, 0x2000);
+
+    REQUIRE(ptr[0x0000] == 19);
+    REQUIRE(ptr[0x3fff] == 12);
+}

From 621f3f5f47bf9619148cc0ab7ed315e05abf79d7 Mon Sep 17 00:00:00 2001
From: Markus Wick <markus@selfnet.de>
Date: Sun, 19 Jan 2020 01:49:30 +0100
Subject: [PATCH 04/11] core: Make use of fastmem

---
 externals/dynarmic                        |  2 +-
 src/common/page_table.h                   |  2 ++
 src/core/arm/dynarmic/arm_dynarmic_32.cpp |  1 +
 src/core/arm/dynarmic/arm_dynarmic_64.cpp |  4 ++++
 src/core/device_memory.cpp                |  2 +-
 src/core/device_memory.h                  | 17 ++++++++++-------
 src/core/memory.cpp                       | 12 ++++++++++++
 7 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/externals/dynarmic b/externals/dynarmic
index 828959caed..0c12614d1a 160000
--- a/externals/dynarmic
+++ b/externals/dynarmic
@@ -1 +1 @@
-Subproject commit 828959caedfac2d456a0c877fda4612e35fffc03
+Subproject commit 0c12614d1a7a72d778609920dde96a4c63074ece
diff --git a/src/common/page_table.h b/src/common/page_table.h
index e92b66b2b0..8267e8b4d8 100644
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -111,6 +111,8 @@ struct PageTable {
     VirtualBuffer<u64> backing_addr;
 
     size_t current_address_space_width_in_bits;
+
+    u8* fastmem_arena;
 };
 
 } // namespace Common
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index cea7f0fb13..fb128f7358 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -128,6 +128,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
     if (page_table) {
         config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>(
             page_table->pointers.data());
+        config.fastmem_pointer = page_table->fastmem_arena;
     }
     config.absolute_offset_page_table = true;
     config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 63193dcb14..b0ac8cf8ac 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -160,6 +160,10 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
         config.absolute_offset_page_table = true;
         config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
         config.only_detect_misalignment_via_page_table_on_page_boundary = true;
+
+        config.fastmem_pointer = page_table->fastmem_arena;
+        config.fastmem_address_space_bits = address_space_bits;
+        config.silently_mirror_fastmem = false;
     }
 
     // Multi-process state
diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp
index 0c4b440ed7..f19c0515ff 100644
--- a/src/core/device_memory.cpp
+++ b/src/core/device_memory.cpp
@@ -6,7 +6,7 @@
 
 namespace Core {
 
-DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size} {}
+DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size, 1ULL << 39} {}
 DeviceMemory::~DeviceMemory() = default;
 
 } // namespace Core
diff --git a/src/core/device_memory.h b/src/core/device_memory.h
index 5b1ae28f3d..c4d17705f1 100644
--- a/src/core/device_memory.h
+++ b/src/core/device_memory.h
@@ -5,7 +5,7 @@
 #pragma once
 
 #include "common/common_types.h"
-#include "common/virtual_buffer.h"
+#include "common/host_memory.h"
 
 namespace Core {
 
@@ -21,27 +21,30 @@ enum : u64 {
 };
 }; // namespace DramMemoryMap
 
-class DeviceMemory : NonCopyable {
+class DeviceMemory {
 public:
     explicit DeviceMemory();
     ~DeviceMemory();
 
+    DeviceMemory& operator=(const DeviceMemory&) = delete;
+    DeviceMemory(const DeviceMemory&) = delete;
+
     template <typename T>
     PAddr GetPhysicalAddr(const T* ptr) const {
-        return (reinterpret_cast<uintptr_t>(ptr) - reinterpret_cast<uintptr_t>(buffer.data())) +
+        return (reinterpret_cast<uintptr_t>(ptr) -
+                reinterpret_cast<uintptr_t>(buffer.BackingBasePointer())) +
                DramMemoryMap::Base;
     }
 
     u8* GetPointer(PAddr addr) {
-        return buffer.data() + (addr - DramMemoryMap::Base);
+        return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base);
     }
 
     const u8* GetPointer(PAddr addr) const {
-        return buffer.data() + (addr - DramMemoryMap::Base);
+        return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base);
     }
 
-private:
-    Common::VirtualBuffer<u8> buffer;
+    Common::HostMemory buffer;
 };
 
 } // namespace Core
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 9857278f66..79468e4dc7 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -12,6 +12,7 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/page_table.h"
+#include "common/settings.h"
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
@@ -32,6 +33,7 @@ struct Memory::Impl {
 
     void SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) {
         current_page_table = &process.PageTable().PageTableImpl();
+        current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
 
         const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth();
 
@@ -41,13 +43,19 @@ struct Memory::Impl {
     void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) {
         ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
         ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
+        ASSERT_MSG(target >= DramMemoryMap::Base && target < DramMemoryMap::End,
+                   "Out of bounds target: {:016X}", target);
         MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
+
+        system.DeviceMemory().buffer.Map(base, target - DramMemoryMap::Base, size);
     }
 
     void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
         ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
         ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
         MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped);
+
+        system.DeviceMemory().buffer.Unmap(base, size);
     }
 
     bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const {
@@ -466,6 +474,10 @@ struct Memory::Impl {
         if (vaddr == 0) {
             return;
         }
+
+        const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
+        system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
+
         // Iterate over a contiguous CPU address space, which corresponds to the specified GPU
         // address space, marking the region as un/cached. The region is marked un/cached at a
         // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size

From c4609c92eea30558473f02082733c7e59c2d2013 Mon Sep 17 00:00:00 2001
From: Markus Wick <markus@selfnet.de>
Date: Sat, 5 Jun 2021 11:47:08 +0200
Subject: [PATCH 05/11] common/host_memory: Optimize for huge tables.

In theory, if we have 2 MB continously mapped, this should save one layer of TLB.
Let's make it at least more likely by aligning the memory.
---
 src/common/host_memory.cpp | 29 +++++++++++++++++++----------
 src/common/host_memory.h   |  6 +++++-
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index eb50fbd9f8..8a328f916d 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -27,6 +27,7 @@
 
 #include <mutex>
 
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/host_memory.h"
 #include "common/logging/log.h"
@@ -35,6 +36,7 @@
 namespace Common {
 
 constexpr size_t PageAlignment = 0x1000;
+constexpr size_t HugePageSize = 0x200000;
 
 #ifdef _WIN32
 
@@ -385,9 +387,16 @@ private:
 
 #endif
 
-HostMemory::HostMemory(size_t backing_size, size_t virtual_size)
-    : impl{std::make_unique<HostMemory::Impl>(backing_size, virtual_size)},
-      backing_base{impl->backing_base}, virtual_base{impl->virtual_base} {}
+HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_)
+    : backing_size(backing_size_),
+      virtual_size(virtual_size_), impl{std::make_unique<HostMemory::Impl>(
+                                       AlignUp(backing_size, PageAlignment),
+                                       AlignUp(virtual_size, PageAlignment) + 3 * HugePageSize)},
+      backing_base{impl->backing_base}, virtual_base{impl->virtual_base} {
+    virtual_base += 2 * HugePageSize - 1;
+    virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1);
+    virtual_base_offset = virtual_base - impl->virtual_base;
+}
 
 HostMemory::~HostMemory() = default;
 
@@ -399,32 +408,32 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) {
     ASSERT(virtual_offset % PageAlignment == 0);
     ASSERT(host_offset % PageAlignment == 0);
     ASSERT(length % PageAlignment == 0);
-    ASSERT(virtual_offset + length <= impl->virtual_size);
-    ASSERT(host_offset + length <= impl->backing_size);
+    ASSERT(virtual_offset + length <= virtual_size);
+    ASSERT(host_offset + length <= backing_size);
     if (length == 0) {
         return;
     }
-    impl->Map(virtual_offset, host_offset, length);
+    impl->Map(virtual_offset + virtual_base_offset, host_offset, length);
 }
 
 void HostMemory::Unmap(size_t virtual_offset, size_t length) {
     ASSERT(virtual_offset % PageAlignment == 0);
     ASSERT(length % PageAlignment == 0);
-    ASSERT(virtual_offset + length <= impl->virtual_size);
+    ASSERT(virtual_offset + length <= virtual_size);
     if (length == 0) {
         return;
     }
-    impl->Unmap(virtual_offset, length);
+    impl->Unmap(virtual_offset + virtual_base_offset, length);
 }
 
 void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) {
     ASSERT(virtual_offset % PageAlignment == 0);
     ASSERT(length % PageAlignment == 0);
-    ASSERT(virtual_offset + length <= impl->virtual_size);
+    ASSERT(virtual_offset + length <= virtual_size);
     if (length == 0) {
         return;
     }
-    impl->Protect(virtual_offset, length, read, write);
+    impl->Protect(virtual_offset + virtual_base_offset, length, read, write);
 }
 
 } // namespace Common
diff --git a/src/common/host_memory.h b/src/common/host_memory.h
index 98005df7a2..eaa7d18ab5 100644
--- a/src/common/host_memory.h
+++ b/src/common/host_memory.h
@@ -15,7 +15,7 @@ namespace Common {
  */
 class HostMemory {
 public:
-    explicit HostMemory(size_t backing_size, size_t virtual_size);
+    explicit HostMemory(size_t backing_size_, size_t virtual_size_);
     ~HostMemory();
 
     /**
@@ -52,11 +52,15 @@ public:
     }
 
 private:
+    size_t backing_size{};
+    size_t virtual_size{};
+
     // Low level handler for the platform dependent memory routines
     class Impl;
     std::unique_ptr<Impl> impl;
     u8* backing_base{};
     u8* virtual_base{};
+    size_t virtual_base_offset{};
 };
 
 } // namespace Common

From 5ba28325b262d44fcd7721aa00074955bd794015 Mon Sep 17 00:00:00 2001
From: FernandoS27 <fsahmkow27@gmail.com>
Date: Sun, 6 Jun 2021 09:57:24 +0200
Subject: [PATCH 06/11] General: Add settings for fastmem and disabling adress
 space check.

---
 src/common/settings.cpp                        |  8 ++++++++
 src/common/settings.h                          |  4 ++++
 src/core/arm/dynarmic/arm_dynarmic_32.cpp      |  5 ++++-
 src/core/arm/dynarmic/arm_dynarmic_64.cpp      |  8 +++++++-
 src/core/memory.cpp                            | 14 ++++++++++----
 src/yuzu/configuration/config.cpp              |  7 +++++++
 src/yuzu/configuration/configure_cpu.cpp       |  9 +++++++++
 src/yuzu/configuration/configure_cpu.h         |  1 +
 src/yuzu/configuration/configure_cpu.ui        | 12 ++++++++++++
 src/yuzu/configuration/configure_cpu_debug.cpp |  3 +++
 src/yuzu/configuration/configure_cpu_debug.ui  | 14 ++++++++++++++
 src/yuzu_cmd/default_ini.h                     |  4 ++++
 12 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index bcb4e4be1a..360e878d60 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -90,6 +90,13 @@ bool IsGPULevelHigh() {
            values.gpu_accuracy.GetValue() == GPUAccuracy::High;
 }
 
+bool IsFastmemEnabled() {
+    if (values.cpu_accuracy.GetValue() == CPUAccuracy::DebugMode) {
+        return values.cpuopt_fastmem;
+    }
+    return true;
+}
+
 float Volume() {
     if (values.audio_muted) {
         return 0.0f;
@@ -115,6 +122,7 @@ void RestoreGlobalState(bool is_powered_on) {
     values.cpuopt_unsafe_unfuse_fma.SetGlobal(true);
     values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true);
     values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
+    values.cpuopt_unsafe_fastmem_check.SetGlobal(true);
 
     // Renderer
     values.renderer_backend.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 48085b9a95..1af8c5ac2e 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -125,10 +125,12 @@ struct Values {
     bool cpuopt_const_prop;
     bool cpuopt_misc_ir;
     bool cpuopt_reduce_misalign_checks;
+    bool cpuopt_fastmem;
 
     Setting<bool> cpuopt_unsafe_unfuse_fma;
     Setting<bool> cpuopt_unsafe_reduce_fp_error;
     Setting<bool> cpuopt_unsafe_inaccurate_nan;
+    Setting<bool> cpuopt_unsafe_fastmem_check;
 
     // Renderer
     Setting<RendererBackend> renderer_backend;
@@ -249,6 +251,8 @@ void SetConfiguringGlobal(bool is_global);
 bool IsGPULevelExtreme();
 bool IsGPULevelHigh();
 
+bool IsFastmemEnabled();
+
 float Volume();
 
 std::string GetTimeZoneString();
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index fb128f7358..c8f6dc7653 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -144,7 +144,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
 
     // Code cache size
     config.code_cache_size = 512 * 1024 * 1024;
-    config.far_code_offset = 256 * 1024 * 1024;
+    config.far_code_offset = 400 * 1024 * 1024;
 
     // Safe optimizations
     if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) {
@@ -172,6 +172,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
         if (!Settings::values.cpuopt_reduce_misalign_checks) {
             config.only_detect_misalignment_via_page_table_on_page_boundary = false;
         }
+        if (!Settings::values.cpuopt_fastmem) {
+            config.fastmem_pointer = nullptr;
+        }
     }
 
     // Unsafe optimizations
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index b0ac8cf8ac..ba524cd058 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -185,7 +185,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
 
     // Code cache size
     config.code_cache_size = 512 * 1024 * 1024;
-    config.far_code_offset = 256 * 1024 * 1024;
+    config.far_code_offset = 400 * 1024 * 1024;
 
     // Safe optimizations
     if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) {
@@ -213,6 +213,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
         if (!Settings::values.cpuopt_reduce_misalign_checks) {
             config.only_detect_misalignment_via_page_table_on_page_boundary = false;
         }
+        if (!Settings::values.cpuopt_fastmem) {
+            config.fastmem_pointer = nullptr;
+        }
     }
 
     // Unsafe optimizations
@@ -227,6 +230,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
         if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) {
             config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
         }
+        if (Settings::values.cpuopt_unsafe_fastmem_check.GetValue()) {
+            config.fastmem_address_space_bits = 64;
+        }
     }
 
     return std::make_shared<Dynarmic::A64::Jit>(config);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 79468e4dc7..f285c6f639 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -47,7 +47,9 @@ struct Memory::Impl {
                    "Out of bounds target: {:016X}", target);
         MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
 
-        system.DeviceMemory().buffer.Map(base, target - DramMemoryMap::Base, size);
+        if (Settings::IsFastmemEnabled()) {
+            system.DeviceMemory().buffer.Map(base, target - DramMemoryMap::Base, size);
+        }
     }
 
     void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
@@ -55,7 +57,9 @@ struct Memory::Impl {
         ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
         MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped);
 
-        system.DeviceMemory().buffer.Unmap(base, size);
+        if (Settings::IsFastmemEnabled()) {
+            system.DeviceMemory().buffer.Unmap(base, size);
+        }
     }
 
     bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const {
@@ -475,8 +479,10 @@ struct Memory::Impl {
             return;
         }
 
-        const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
-        system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
+        if (Settings::IsFastmemEnabled()) {
+            const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
+            system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
+        }
 
         // Iterate over a contiguous CPU address space, which corresponds to the specified GPU
         // address space, marking the region as un/cached. The region is marked un/cached at a
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index e9d4bef607..a59b36e138 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -756,6 +756,8 @@ void Config::ReadCpuValues() {
                       QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true);
     ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan,
                       QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true);
+    ReadSettingGlobal(Settings::values.cpuopt_unsafe_fastmem_check,
+                      QStringLiteral("cpuopt_unsafe_fastmem_check"), true);
 
     if (global) {
         Settings::values.cpuopt_page_tables =
@@ -774,6 +776,8 @@ void Config::ReadCpuValues() {
             ReadSetting(QStringLiteral("cpuopt_misc_ir"), true).toBool();
         Settings::values.cpuopt_reduce_misalign_checks =
             ReadSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), true).toBool();
+        Settings::values.cpuopt_fastmem =
+            ReadSetting(QStringLiteral("cpuopt_fastmem"), true).toBool();
     }
 
     qt_config->endGroup();
@@ -1332,6 +1336,8 @@ void Config::SaveCpuValues() {
                        Settings::values.cpuopt_unsafe_reduce_fp_error, true);
     WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"),
                        Settings::values.cpuopt_unsafe_inaccurate_nan, true);
+    WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_fastmem_check"),
+                       Settings::values.cpuopt_unsafe_fastmem_check, true);
 
     if (global) {
         WriteSetting(QStringLiteral("cpuopt_page_tables"), Settings::values.cpuopt_page_tables,
@@ -1348,6 +1354,7 @@ void Config::SaveCpuValues() {
         WriteSetting(QStringLiteral("cpuopt_misc_ir"), Settings::values.cpuopt_misc_ir, true);
         WriteSetting(QStringLiteral("cpuopt_reduce_misalign_checks"),
                      Settings::values.cpuopt_reduce_misalign_checks, true);
+        WriteSetting(QStringLiteral("cpuopt_fastmem"), Settings::values.cpuopt_fastmem, true);
     }
 
     qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp
index 525c42ff0d..22219cbada 100644
--- a/src/yuzu/configuration/configure_cpu.cpp
+++ b/src/yuzu/configuration/configure_cpu.cpp
@@ -35,12 +35,15 @@ void ConfigureCpu::SetConfiguration() {
     ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock);
     ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
     ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
+    ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
 
     ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
     ui->cpuopt_unsafe_reduce_fp_error->setChecked(
         Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue());
     ui->cpuopt_unsafe_inaccurate_nan->setChecked(
         Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
+    ui->cpuopt_unsafe_fastmem_check->setChecked(
+        Settings::values.cpuopt_unsafe_fastmem_check.GetValue());
 
     if (Settings::IsConfiguringGlobal()) {
         ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue()));
@@ -84,6 +87,9 @@ void ConfigureCpu::ApplyConfiguration() {
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan,
                                              ui->cpuopt_unsafe_inaccurate_nan,
                                              cpuopt_unsafe_inaccurate_nan);
+    ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check,
+                                             ui->cpuopt_unsafe_fastmem_check,
+                                             cpuopt_unsafe_fastmem_check);
 
     if (Settings::IsConfiguringGlobal()) {
         // Guard if during game and set to game-specific value
@@ -134,4 +140,7 @@ void ConfigureCpu::SetupPerGameUI() {
     ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan,
                                             Settings::values.cpuopt_unsafe_inaccurate_nan,
                                             cpuopt_unsafe_inaccurate_nan);
+    ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check,
+                                            Settings::values.cpuopt_unsafe_fastmem_check,
+                                            cpuopt_unsafe_fastmem_check);
 }
diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h
index 8e2eeb7a68..57ff2772a6 100644
--- a/src/yuzu/configuration/configure_cpu.h
+++ b/src/yuzu/configuration/configure_cpu.h
@@ -41,4 +41,5 @@ private:
     ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma;
     ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error;
     ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
+    ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
 };
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui
index 99b5736405..31ef9e3f54 100644
--- a/src/yuzu/configuration/configure_cpu.ui
+++ b/src/yuzu/configuration/configure_cpu.ui
@@ -123,6 +123,18 @@
           </property>
          </widget>
         </item>
+        <item>
+         <widget class="QCheckBox" name="cpuopt_unsafe_fastmem_check">
+          <property name="toolTip">
+           <string>
+            &lt;div&gt;This option improves speed by eliminating a safety check before every memory read/write in guest. Disabling it may allow a game to read/write the emulator's memory.&lt;/div&gt;
+           </string>
+          </property>
+          <property name="text">
+           <string>Disable address space checks</string>
+          </property>
+         </widget>
+        </item>
        </layout>
       </widget>
      </item>
diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp
index c925c023c4..e25c52baf4 100644
--- a/src/yuzu/configuration/configure_cpu_debug.cpp
+++ b/src/yuzu/configuration/configure_cpu_debug.cpp
@@ -39,6 +39,8 @@ void ConfigureCpuDebug::SetConfiguration() {
     ui->cpuopt_misc_ir->setChecked(Settings::values.cpuopt_misc_ir);
     ui->cpuopt_reduce_misalign_checks->setEnabled(runtime_lock);
     ui->cpuopt_reduce_misalign_checks->setChecked(Settings::values.cpuopt_reduce_misalign_checks);
+    ui->cpuopt_fastmem->setEnabled(runtime_lock);
+    ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem);
 }
 
 void ConfigureCpuDebug::ApplyConfiguration() {
@@ -50,6 +52,7 @@ void ConfigureCpuDebug::ApplyConfiguration() {
     Settings::values.cpuopt_const_prop = ui->cpuopt_const_prop->isChecked();
     Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked();
     Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked();
+    Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked();
 }
 
 void ConfigureCpuDebug::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui
index a90dc64fed..11ee19a128 100644
--- a/src/yuzu/configuration/configure_cpu_debug.ui
+++ b/src/yuzu/configuration/configure_cpu_debug.ui
@@ -139,6 +139,20 @@
           </property>
          </widget>
         </item>
+        <item>
+         <widget class="QCheckBox" name="cpuopt_fastmem">
+          <property name="text">
+           <string>Enable Host MMU Emulation</string>
+          </property>
+          <property name="toolTip">
+           <string>
+            &lt;div style="white-space: nowrap"&gt;This optimization speeds up memory accesses by the guest program.&lt;/div&gt;
+            &lt;div style="white-space: nowrap"&gt;Enabling it causes guest memory reads/writes to be done directly into memory and make use of Host's MMU.&lt;/div&gt;
+            &lt;div style="white-space: nowrap"&gt;Disabling this forces all memory accesses to use Software MMU Emulation.&lt;/div&gt;
+           </string>
+          </property>
+         </widget>
+        </item>
        </layout>
       </widget>
      </item>
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 8ce2967ac2..f48d935a1f 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -150,6 +150,10 @@ cpuopt_misc_ir =
 # 0: Disabled, 1 (default): Enabled
 cpuopt_reduce_misalign_checks =
 
+# Enable Host MMU Emulation (faster guest memory access)
+# 0: Disabled, 1 (default): Enabled
+cpuopt_fastmem =
+
 [Renderer]
 # Which backend API to use.
 # 0 (default): OpenGL, 1: Vulkan

From ee67460ff0e12a1603431d86fe3919a24b3858fb Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 6 Jun 2021 20:53:26 -0300
Subject: [PATCH 07/11] host_memory: Support staged VirtualProtect calls

---
 src/common/host_memory.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index 8a328f916d..c6d65aab94 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -110,9 +110,18 @@ public:
         } else {
             UNIMPLEMENTED_MSG("Protection flag combination read={} write={}", read, write);
         }
-        DWORD old_flags{};
-        if (!VirtualProtect(virtual_base + virtual_offset, length, new_flags, &old_flags)) {
-            LOG_CRITICAL(HW_Memory, "Failed to change virtual memory protect rules");
+        const size_t virtual_end = virtual_offset + length;
+
+        std::lock_guard lock{placeholder_mutex};
+        auto [it, end] = placeholders.equal_range({virtual_offset, virtual_end});
+        while (it != end) {
+            const size_t offset = std::max(it->lower(), virtual_offset);
+            const size_t protect_length = std::min(it->upper(), virtual_end) - offset;
+            DWORD old_flags{};
+            if (!VirtualProtect(virtual_base + offset, protect_length, new_flags, &old_flags)) {
+                LOG_CRITICAL(HW_Memory, "Failed to change virtual memory protect rules");
+            }
+            ++it;
         }
     }
 

From 7b0d8bd1fbfd7d8fe4d939cb3b8649a29f61655c Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 6 Jun 2021 20:58:57 -0300
Subject: [PATCH 08/11] rasterizer: Update pages in batches

---
 src/video_core/rasterizer_accelerated.cpp | 56 +++++++++++++++++------
 1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
index 6decd25462..4c95247022 100644
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <atomic>
+
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/div_ceil.h"
@@ -10,35 +12,59 @@
 
 namespace VideoCore {
 
-RasterizerAccelerated::RasterizerAccelerated(Core::Memory::Memory& cpu_memory_)
-    : cpu_memory{cpu_memory_} {}
+using namespace Core::Memory;
+
+RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : cpu_memory{cpu_memory_} {}
 
 RasterizerAccelerated::~RasterizerAccelerated() = default;
 
 void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
-    const auto page_end = Common::DivCeil(addr + size, Core::Memory::PAGE_SIZE);
-    for (auto page = addr >> Core::Memory::PAGE_BITS; page != page_end; ++page) {
-        auto& count = cached_pages.at(page >> 2).Count(page);
+    u64 uncache_begin = 0;
+    u64 cache_begin = 0;
+    u64 uncache_bytes = 0;
+    u64 cache_bytes = 0;
+
+    std::atomic_thread_fence(std::memory_order_acquire);
+    const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
+    for (u64 page = addr >> PAGE_BITS; page != page_end; ++page) {
+        std::atomic_uint16_t& count = cached_pages.at(page >> 2).Count(page);
 
         if (delta > 0) {
-            ASSERT_MSG(count < UINT16_MAX, "Count may overflow!");
+            ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!");
         } else if (delta < 0) {
-            ASSERT_MSG(count > 0, "Count may underflow!");
+            ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
         } else {
-            ASSERT_MSG(true, "Delta must be non-zero!");
+            ASSERT_MSG(false, "Delta must be non-zero!");
         }
 
         // Adds or subtracts 1, as count is a unsigned 8-bit value
-        count += static_cast<u16>(delta);
+        count.fetch_add(static_cast<u16>(delta), std::memory_order_release);
 
         // Assume delta is either -1 or 1
-        if (count == 0) {
-            cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS,
-                                                  Core::Memory::PAGE_SIZE, false);
-        } else if (count == 1 && delta > 0) {
-            cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS,
-                                                  Core::Memory::PAGE_SIZE, true);
+        if (count.load(std::memory_order::relaxed) == 0) {
+            if (uncache_bytes == 0) {
+                uncache_begin = page;
+            }
+            uncache_bytes += PAGE_SIZE;
+        } else if (uncache_bytes > 0) {
+            cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false);
+            uncache_bytes = 0;
         }
+        if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
+            if (cache_bytes == 0) {
+                cache_begin = page;
+            }
+            cache_bytes += PAGE_SIZE;
+        } else if (cache_bytes > 0) {
+            cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true);
+            cache_bytes = 0;
+        }
+    }
+    if (uncache_bytes > 0) {
+        cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false);
+    }
+    if (cache_bytes > 0) {
+        cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true);
     }
 }
 

From 588ab44470e65d4e2ec010aa48bb5f430d301f81 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 7 Jun 2021 02:04:35 +0200
Subject: [PATCH 09/11] GPUTHread: Remove async reads from Normal Accuracy.

---
 src/video_core/gpu_thread.cpp | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index cd1fbb9bff..46f642b197 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -99,25 +99,13 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
         PushCommand(FlushRegionCommand(addr, size));
         return;
     }
-
-    // Asynchronous GPU mode
-    switch (Settings::values.gpu_accuracy.GetValue()) {
-    case Settings::GPUAccuracy::Normal:
-        PushCommand(FlushRegionCommand(addr, size));
-        break;
-    case Settings::GPUAccuracy::High:
-        // TODO(bunnei): Is this right? Preserving existing behavior for now
-        break;
-    case Settings::GPUAccuracy::Extreme: {
-        auto& gpu = system.GPU();
-        u64 fence = gpu.RequestFlush(addr, size);
-        PushCommand(GPUTickCommand(), true);
-        ASSERT(fence <= gpu.CurrentFlushRequestFence());
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue());
+    if (!Settings::IsGPULevelExtreme()) {
+        return;
     }
+    auto& gpu = system.GPU();
+    u64 fence = gpu.RequestFlush(addr, size);
+    PushCommand(GPUTickCommand(), true);
+    ASSERT(fence <= gpu.CurrentFlushRequestFence());
 }
 
 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {

From f332d4a9b548e9c7e18c245fd3b90ffc5a94b943 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 8 Jun 2021 02:14:12 -0300
Subject: [PATCH 10/11] common/host_shader: Load Windows 10 functions
 dynamically

Workaround old headers and libraries shipped on MinGW.
---
 src/common/host_memory.cpp | 117 ++++++++++++++++++++++++++++---------
 1 file changed, 88 insertions(+), 29 deletions(-)

diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index c6d65aab94..9ae384f01c 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -1,18 +1,10 @@
 #ifdef _WIN32
 
-#ifdef _WIN32_WINNT
-#undef _WIN32_WINNT
-#endif
-#define _WIN32_WINNT 0x0A00 // Windows 10
-
-#include <windows.h>
-
-#include <boost/icl/separate_interval_set.hpp>
-
 #include <iterator>
 #include <unordered_map>
-
-#pragma comment(lib, "mincore.lib")
+#include <boost/icl/separate_interval_set.hpp>
+#include <windows.h>
+#include "common/dynamic_library.h"
 
 #elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv
 
@@ -40,38 +32,99 @@ constexpr size_t HugePageSize = 0x200000;
 
 #ifdef _WIN32
 
+// Manually imported for MinGW compatibility
+#ifndef MEM_RESERVE_PLACEHOLDER
+#define MEM_RESERVE_PLACEHOLDER 0x0004000
+#endif
+#ifndef MEM_REPLACE_PLACEHOLDER
+#define MEM_REPLACE_PLACEHOLDER 0x00004000
+#endif
+#ifndef MEM_COALESCE_PLACEHOLDERS
+#define MEM_COALESCE_PLACEHOLDERS 0x00000001
+#endif
+#ifndef MEM_PRESERVE_PLACEHOLDER
+#define MEM_PRESERVE_PLACEHOLDER 0x00000002
+#endif
+
+using PFN_CreateFileMapping2 = _Ret_maybenull_ HANDLE(WINAPI*)(
+    _In_ HANDLE File, _In_opt_ SECURITY_ATTRIBUTES* SecurityAttributes, _In_ ULONG DesiredAccess,
+    _In_ ULONG PageProtection, _In_ ULONG AllocationAttributes, _In_ ULONG64 MaximumSize,
+    _In_opt_ PCWSTR Name,
+    _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters,
+    _In_ ULONG ParameterCount);
+
+using PFN_VirtualAlloc2 = _Ret_maybenull_ PVOID(WINAPI*)(
+    _In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress, _In_ SIZE_T Size,
+    _In_ ULONG AllocationType, _In_ ULONG PageProtection,
+    _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters,
+    _In_ ULONG ParameterCount);
+
+using PFN_MapViewOfFile3 = _Ret_maybenull_ PVOID(WINAPI*)(
+    _In_ HANDLE FileMapping, _In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress,
+    _In_ ULONG64 Offset, _In_ SIZE_T ViewSize, _In_ ULONG AllocationType, _In_ ULONG PageProtection,
+    _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters,
+    _In_ ULONG ParameterCount);
+
+using PFN_UnmapViewOfFile2 = BOOL(WINAPI*)(_In_ HANDLE Process, _In_ PVOID BaseAddress,
+                                           _In_ ULONG UnmapFlags);
+
+template <typename T>
+static void GetFuncAddress(Common::DynamicLibrary& dll, const char* name, T& pfn) {
+    if (!dll.GetSymbol(name, &pfn)) {
+        LOG_CRITICAL(HW_Memory, "Failed to load {}", name);
+        throw std::bad_alloc{};
+    }
+}
+
 class HostMemory::Impl {
 public:
     explicit Impl(size_t backing_size_, size_t virtual_size_)
-        : backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()} {
+        : backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()},
+          kernelbase_dll("Kernelbase") {
+        if (!kernelbase_dll.IsOpen()) {
+            LOG_CRITICAL(HW_Memory, "Failed to load Kernelbase.dll");
+            throw std::bad_alloc{};
+        }
+        GetFuncAddress(kernelbase_dll, "CreateFileMapping2", pfn_CreateFileMapping2);
+        GetFuncAddress(kernelbase_dll, "VirtualAlloc2", pfn_VirtualAlloc2);
+        GetFuncAddress(kernelbase_dll, "MapViewOfFile3", pfn_MapViewOfFile3);
+        GetFuncAddress(kernelbase_dll, "UnmapViewOfFile2", pfn_UnmapViewOfFile2);
+
         // Allocate backing file map
         backing_handle =
-            CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ,
-                               PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0);
+            pfn_CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ,
+                                   PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0);
         if (!backing_handle) {
+            LOG_CRITICAL(HW_Memory, "Failed to allocate {} MiB of backing memory",
+                         backing_size >> 20);
             throw std::bad_alloc{};
         }
         // Allocate a virtual memory for the backing file map as placeholder
-        backing_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, backing_size,
-                                                      MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
-                                                      PAGE_NOACCESS, nullptr, 0));
+        backing_base = static_cast<u8*>(pfn_VirtualAlloc2(process, nullptr, backing_size,
+                                                          MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
+                                                          PAGE_NOACCESS, nullptr, 0));
         if (!backing_base) {
             Release();
+            LOG_CRITICAL(HW_Memory, "Failed to reserve {} MiB of virtual memory",
+                         backing_size >> 20);
             throw std::bad_alloc{};
         }
         // Map backing placeholder
-        void* const ret = MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size,
-                                         MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0);
+        void* const ret = pfn_MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size,
+                                             MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0);
         if (ret != backing_base) {
             Release();
+            LOG_CRITICAL(HW_Memory, "Failed to map {} MiB of virtual memory", backing_size >> 20);
             throw std::bad_alloc{};
         }
         // Allocate virtual address placeholder
-        virtual_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, virtual_size,
-                                                      MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
-                                                      PAGE_NOACCESS, nullptr, 0));
+        virtual_base = static_cast<u8*>(pfn_VirtualAlloc2(process, nullptr, virtual_size,
+                                                          MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
+                                                          PAGE_NOACCESS, nullptr, 0));
         if (!virtual_base) {
             Release();
+            LOG_CRITICAL(HW_Memory, "Failed to reserve {} GiB of virtual memory",
+                         virtual_size >> 30);
             throw std::bad_alloc{};
         }
     }
@@ -136,8 +189,8 @@ private:
     void Release() {
         if (!placeholders.empty()) {
             for (const auto& placeholder : placeholders) {
-                if (!UnmapViewOfFile2(process, virtual_base + placeholder.lower(),
-                                      MEM_PRESERVE_PLACEHOLDER)) {
+                if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder.lower(),
+                                          MEM_PRESERVE_PLACEHOLDER)) {
                     LOG_CRITICAL(HW_Memory, "Failed to unmap virtual memory placeholder");
                 }
             }
@@ -149,7 +202,7 @@ private:
             }
         }
         if (backing_base) {
-            if (!UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) {
+            if (!pfn_UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) {
                 LOG_CRITICAL(HW_Memory, "Failed to unmap backing memory placeholder");
             }
             if (!VirtualFreeEx(process, backing_base, 0, MEM_RELEASE)) {
@@ -184,8 +237,8 @@ private:
         const bool split_left = unmap_begin > placeholder_begin;
         const bool split_right = unmap_end < placeholder_end;
 
-        if (!UnmapViewOfFile2(process, virtual_base + placeholder_begin,
-                              MEM_PRESERVE_PLACEHOLDER)) {
+        if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder_begin,
+                                  MEM_PRESERVE_PLACEHOLDER)) {
             LOG_CRITICAL(HW_Memory, "Failed to unmap placeholder");
         }
         // If we have to remap memory regions due to partial unmaps, we are in a data race as
@@ -235,8 +288,8 @@ private:
     }
 
     void MapView(size_t virtual_offset, size_t host_offset, size_t length) {
-        if (!MapViewOfFile3(backing_handle, process, virtual_base + virtual_offset, host_offset,
-                            length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) {
+        if (!pfn_MapViewOfFile3(backing_handle, process, virtual_base + virtual_offset, host_offset,
+                                length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) {
             LOG_CRITICAL(HW_Memory, "Failed to map placeholder");
         }
     }
@@ -279,6 +332,12 @@ private:
     HANDLE process{};        ///< Current process handle
     HANDLE backing_handle{}; ///< File based backing memory
 
+    DynamicLibrary kernelbase_dll;
+    PFN_CreateFileMapping2 pfn_CreateFileMapping2{};
+    PFN_VirtualAlloc2 pfn_VirtualAlloc2{};
+    PFN_MapViewOfFile3 pfn_MapViewOfFile3{};
+    PFN_UnmapViewOfFile2 pfn_UnmapViewOfFile2{};
+
     std::mutex placeholder_mutex;                                 ///< Mutex for placeholders
     boost::icl::separate_interval_set<size_t> placeholders;       ///< Mapped placeholders
     std::unordered_map<size_t, size_t> placeholder_host_pointers; ///< Placeholder backing offset

From 7f85abb28120fbb57bb813b828ee42f2a2031990 Mon Sep 17 00:00:00 2001
From: Markus Wick <markus@selfnet.de>
Date: Fri, 11 Jun 2021 11:47:23 +0200
Subject: [PATCH 11/11] common/host_memory: Implement a fallback if fastmem
 fails.

This falls back to the old approach of using a virtual buffer.

Windows is untested, but this build should fix support for Windows < 10 v1803. However without fastmem support at all.
---
 src/common/host_memory.cpp | 59 +++++++++++++++++++++++++++++---------
 src/common/host_memory.h   |  4 +++
 2 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index 9ae384f01c..8bd70abc79 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -449,21 +449,52 @@ private:
     int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create
 };
 
-#else // ^^^ Linux ^^^
+#else // ^^^ Linux ^^^ vvv Generic vvv
 
-#error Please implement the host memory for your platform
+class HostMemory::Impl {
+public:
+    explicit Impl(size_t /*backing_size */, size_t /* virtual_size */) {
+        // This is just a place holder.
+        // Please implement fastmem in a propper way on your platform.
+        throw std::bad_alloc{};
+    }
 
-#endif
+    void Map(size_t virtual_offset, size_t host_offset, size_t length) {}
+
+    void Unmap(size_t virtual_offset, size_t length) {}
+
+    void Protect(size_t virtual_offset, size_t length, bool read, bool write) {}
+
+    u8* backing_base{nullptr};
+    u8* virtual_base{nullptr};
+};
+
+#endif // ^^^ Generic ^^^
 
 HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_)
-    : backing_size(backing_size_),
-      virtual_size(virtual_size_), impl{std::make_unique<HostMemory::Impl>(
-                                       AlignUp(backing_size, PageAlignment),
-                                       AlignUp(virtual_size, PageAlignment) + 3 * HugePageSize)},
-      backing_base{impl->backing_base}, virtual_base{impl->virtual_base} {
-    virtual_base += 2 * HugePageSize - 1;
-    virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1);
-    virtual_base_offset = virtual_base - impl->virtual_base;
+    : backing_size(backing_size_), virtual_size(virtual_size_) {
+    try {
+        // Try to allocate a fastmem arena.
+        // The implementation will fail with std::bad_alloc on errors.
+        impl = std::make_unique<HostMemory::Impl>(AlignUp(backing_size, PageAlignment),
+                                                  AlignUp(virtual_size, PageAlignment) +
+                                                      3 * HugePageSize);
+        backing_base = impl->backing_base;
+        virtual_base = impl->virtual_base;
+
+        if (virtual_base) {
+            virtual_base += 2 * HugePageSize - 1;
+            virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1);
+            virtual_base_offset = virtual_base - impl->virtual_base;
+        }
+
+    } catch (const std::bad_alloc&) {
+        LOG_CRITICAL(HW_Memory,
+                     "Fastmem unavailable, falling back to VirtualBuffer for memory allocation");
+        fallback_buffer = std::make_unique<Common::VirtualBuffer<u8>>(backing_size);
+        backing_base = fallback_buffer->data();
+        virtual_base = nullptr;
+    }
 }
 
 HostMemory::~HostMemory() = default;
@@ -478,7 +509,7 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) {
     ASSERT(length % PageAlignment == 0);
     ASSERT(virtual_offset + length <= virtual_size);
     ASSERT(host_offset + length <= backing_size);
-    if (length == 0) {
+    if (length == 0 || !virtual_base || !impl) {
         return;
     }
     impl->Map(virtual_offset + virtual_base_offset, host_offset, length);
@@ -488,7 +519,7 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length) {
     ASSERT(virtual_offset % PageAlignment == 0);
     ASSERT(length % PageAlignment == 0);
     ASSERT(virtual_offset + length <= virtual_size);
-    if (length == 0) {
+    if (length == 0 || !virtual_base || !impl) {
         return;
     }
     impl->Unmap(virtual_offset + virtual_base_offset, length);
@@ -498,7 +529,7 @@ void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool w
     ASSERT(virtual_offset % PageAlignment == 0);
     ASSERT(length % PageAlignment == 0);
     ASSERT(virtual_offset + length <= virtual_size);
-    if (length == 0) {
+    if (length == 0 || !virtual_base || !impl) {
         return;
     }
     impl->Protect(virtual_offset + virtual_base_offset, length, read, write);
diff --git a/src/common/host_memory.h b/src/common/host_memory.h
index eaa7d18ab5..9b8326d0fb 100644
--- a/src/common/host_memory.h
+++ b/src/common/host_memory.h
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include "common/common_types.h"
+#include "common/virtual_buffer.h"
 
 namespace Common {
 
@@ -61,6 +62,9 @@ private:
     u8* backing_base{};
     u8* virtual_base{};
     size_t virtual_base_offset{};
+
+    // Fallback if fastmem is not supported on this platform
+    std::unique_ptr<Common::VirtualBuffer<u8>> fallback_buffer;
 };
 
 } // namespace Common