Merge pull request #3278 from ReinUsesLisp/vk-memory-manager

renderer_vulkan: Buffer cache, stream buffer and memory manager changes
2020-01-06 17:03:04 -05:00 · 2020-01-06 17:03:04 -05:00 · ee9b4a7f9a
commit ee9b4a7f9a
parent 984563b773 5b01f80a12
6 changed files with 426 additions and 320 deletions
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@ -2,124 +2,145 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <cstring>
 #include <memory>
 #include <optional>
 #include <tuple>
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
+#include "common/bit_util.h"
-#include "video_core/memory_manager.h"
+#include "core/core.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 namespace Vulkan {
-CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
+namespace {
                                     std::size_t alignment, u8* host_ptr)
    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
      alignment{alignment} {}
-VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
+const auto BufferUsage =
-                             Memory::Memory& cpu_memory_,
+    vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
-                             VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+    vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
-                             VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
+
-    : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
+const auto UploadPipelineStage =
-                                                                                   cpu_memory_} {
+    vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
-    const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
+    vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
-                       vk::BufferUsageFlagBits::eIndexBuffer |
+    vk::PipelineStageFlagBits::eComputeShader;
-                       vk::BufferUsageFlagBits::eUniformBuffer;
+
-    const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
+const auto UploadAccessBarriers =
-                        vk::AccessFlagBits::eUniformRead;
+    vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
-    stream_buffer =
+    vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
-        std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
+    vk::AccessFlagBits::eIndexRead;
-                                         vk::PipelineStageFlagBits::eAllCommands);
+
-    buffer_handle = stream_buffer->GetBuffer();
+auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
    return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
 }
 } // Anonymous namespace
 CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
                                     CacheAddr cache_addr, std::size_t size)
    : VideoCommon::BufferBlock{cache_addr, size} {
    const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
                                         BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
                                             vk::BufferUsageFlagBits::eTransferDst,
                                         vk::SharingMode::eExclusive, 0, nullptr);
    const auto& dld{device.GetDispatchLoader()};
    const auto dev{device.GetLogical()};
    buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
    buffer.commit = memory_manager.Commit(*buffer.handle, false);
 }
 CachedBufferBlock::~CachedBufferBlock() = default;
 VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
                             const VKDevice& device, VKMemoryManager& memory_manager,
                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
    : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
                                                                   CreateStreamBuffer(device,
                                                                                      scheduler)},
      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
                                                                                staging_pool} {}
 VKBufferCache::~VKBufferCache() = default;
-u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
+Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
-    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
    ASSERT_MSG(cpu_addr, "Invalid GPU address");
    // Cache management is a big overhead, so only cache entries with a given size.
    // TODO: Figure out which size is the best for given games.
    cache &= size >= 2048;
    u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
    if (cache) {
        const auto entry = TryGet(host_ptr);
        if (entry) {
            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
                return entry->GetOffset();
            }
            Unregister(entry);
        }
 }
-    AlignBuffer(alignment);
+const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
-    const u64 uploaded_offset = buffer_offset;
+    return buffer->GetHandle();
    if (host_ptr == nullptr) {
        return uploaded_offset;
 }
-    std::memcpy(buffer_ptr, host_ptr, size);
+const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
-    buffer_ptr += size;
+    size = std::max(size, std::size_t(4));
-    buffer_offset += size;
+    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
-
+    scheduler.RequestOutsideRenderPassOperationContext();
-    if (cache) {
+    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
-        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
+        cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
-                                                         alignment, host_ptr);
+    });
-        Register(entry);
+    return &*empty.handle;
 }
-    return uploaded_offset;
+void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                                    const u8* data) {
    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
    std::memcpy(staging.commit->Map(size), data, size);
    scheduler.RequestOutsideRenderPassOperationContext();
    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
                      size](auto cmdbuf, auto& dld) {
        cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
        cmdbuf.pipelineBarrier(
            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
                                     offset, size)},
            {}, dld);
    });
 }
-u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
+void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
-    AlignBuffer(alignment);
+                                      u8* data) {
-    std::memcpy(buffer_ptr, raw_pointer, size);
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
-    const u64 uploaded_offset = buffer_offset;
+    scheduler.RequestOutsideRenderPassOperationContext();
    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
                      size](auto cmdbuf, auto& dld) {
        cmdbuf.pipelineBarrier(
            vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
                vk::PipelineStageFlagBits::eComputeShader,
            vk::PipelineStageFlagBits::eTransfer, {}, {},
            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
                                     vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
                                     VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
            {}, dld);
        cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
    });
    scheduler.Finish();
-    buffer_ptr += size;
+    std::memcpy(data, staging.commit->Map(size), size);
    buffer_offset += size;
    return uploaded_offset;
 }
-std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
+void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
-    AlignBuffer(alignment);
+                              std::size_t dst_offset, std::size_t size) {
-    u8* const uploaded_ptr = buffer_ptr;
+    scheduler.RequestOutsideRenderPassOperationContext();
-    const u64 uploaded_offset = buffer_offset;
+    scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
-
+                      dst_offset, size](auto cmdbuf, auto& dld) {
-    buffer_ptr += size;
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
-    buffer_offset += size;
+        cmdbuf.pipelineBarrier(
-    return {uploaded_ptr, uploaded_offset};
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
-}
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
-
+                                     vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
-void VKBufferCache::Reserve(std::size_t max_size) {
+                                     VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
-    bool invalidate;
+             vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
-    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
-    buffer_offset = buffer_offset_base;
+                                     dst_offset, size)},
-
+            {}, dld);
-    if (invalidate) {
+    });
        InvalidateAll();
    }
 }
 void VKBufferCache::Send() {
    stream_buffer->Send(buffer_offset - buffer_offset_base);
 }
 void VKBufferCache::AlignBuffer(std::size_t alignment) {
    // Align the offset, not the mapped pointer
    const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
    buffer_ptr += offset_aligned - buffer_offset;
    buffer_offset = offset_aligned;
 }
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@ -5,105 +5,74 @@
 #pragma once
 #include <memory>
-#include <tuple>
+#include <unordered_map>
 #include <vector>
 #include "common/common_types.h"
-#include "video_core/gpu.h"
+#include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
-namespace Memory {
+namespace Core {
-class Memory;
+class System;
 }
 namespace Tegra {
 class MemoryManager;
 }
 namespace Vulkan {
 class VKDevice;
 class VKFence;
 class VKMemoryManager;
-class VKStreamBuffer;
+class VKScheduler;
-class CachedBufferEntry final : public RasterizerCacheObject {
+class CachedBufferBlock final : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
+    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
-                               u8* host_ptr);
+                               CacheAddr cache_addr, std::size_t size);
    ~CachedBufferBlock();
-    VAddr GetCpuAddr() const override {
+    const vk::Buffer* GetHandle() const {
-        return cpu_addr;
+        return &*buffer.handle;
    }
    std::size_t GetSizeInBytes() const override {
        return size;
    }
    std::size_t GetSize() const {
        return size;
    }
    u64 GetOffset() const {
        return offset;
    }
    std::size_t GetAlignment() const {
        return alignment;
    }
 private:
-    VAddr cpu_addr{};
+    VKBuffer buffer;
    std::size_t size{};
    u64 offset{};
    std::size_t alignment{};
 };
-class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+using Buffer = std::shared_ptr<CachedBufferBlock>;
 class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
 public:
-    explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_,
+    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
-                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                           const VKDevice& device, VKMemoryManager& memory_manager,
-                           VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
    ~VKBufferCache();
-    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
+    const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
    /// allocated.
    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
    u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
    std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
    /// Reserves a region of memory to be used in subsequent upload/reserve operations.
    void Reserve(std::size_t max_size);
    /// Ensures that the set data is sent to the device.
    void Send();
    /// Returns the buffer cache handle.
    vk::Buffer GetBuffer() const {
        return buffer_handle;
    }
 protected:
-    // We do not have to flush this cache as things in it are never modified by us.
+    void WriteBarrier() override {}
-    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+
    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
    const vk::Buffer* ToHandle(const Buffer& buffer) override;
    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                         const u8* data) override;
    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                           u8* data) override;
    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
                   std::size_t dst_offset, std::size_t size) override;
 private:
-    void AlignBuffer(std::size_t alignment);
+    const VKDevice& device;
-
+    VKMemoryManager& memory_manager;
-    Tegra::MemoryManager& tegra_memory_manager;
+    VKScheduler& scheduler;
-    Memory::Memory& cpu_memory;
+    VKStagingBufferPool& staging_pool;
    std::unique_ptr<VKStreamBuffer> stream_buffer;
    vk::Buffer buffer_handle;
    u8* buffer_ptr = nullptr;
    u64 buffer_offset = 0;
    u64 buffer_offset_base = 0;
 };
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@ -6,6 +6,7 @@
 #include <optional>
 #include <tuple>
 #include <vector>
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
@ -16,34 +17,32 @@
 namespace Vulkan {
-// TODO(Rodrigo): Fine tune this number
+namespace {
-constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+
 u64 GetAllocationChunkSize(u64 required_size) {
    static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
    auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
    return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
 }
 } // Anonymous namespace
 class VKMemoryAllocation final {
 public:
    explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
-                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
+                                vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
-        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
+        : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
-          shifted_type{ShiftType(type)}, is_mappable{properties &
+          shifted_type{ShiftType(type)} {}
                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
        if (is_mappable) {
            const auto dev = device.GetLogical();
            const auto& dld = device.GetDispatchLoader();
            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
        }
    }
    ~VKMemoryAllocation() {
        const auto dev = device.GetLogical();
        const auto& dld = device.GetDispatchLoader();
        if (is_mappable)
            dev.unmapMemory(memory, dld);
        dev.free(memory, nullptr, dld);
    }
    VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
-        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
+        auto found = TryFindFreeSection(free_iterator, allocation_size,
-                                        static_cast<u64>(alignment));
+                                        static_cast<u64>(commit_size), static_cast<u64>(alignment));
        if (!found) {
            found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
                                       static_cast<u64>(alignment));
@ -52,8 +51,7 @@ public:
                return nullptr;
            }
        }
-        u8* address = is_mappable ? base_address + *found : nullptr;
+        auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
                                                           *found + commit_size);
        commits.push_back(commit.get());
@ -65,12 +63,10 @@ public:
    void Free(const VKMemoryCommitImpl* commit) {
        ASSERT(commit);
-        const auto it =
+
-            std::find_if(commits.begin(), commits.end(),
+        const auto it = std::find(std::begin(commits), std::end(commits), commit);
                         [&](const auto& stored_commit) { return stored_commit == commit; });
        if (it == commits.end()) {
-            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
+            UNREACHABLE_MSG("Freeing unallocated commit!");
            UNREACHABLE();
            return;
        }
        commits.erase(it);
@ -88,11 +84,11 @@ private:
    }
    /// A memory allocator, it may return a free region between "start" and "end" with the solicited
-    /// requeriments.
+    /// requirements.
    std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
-        u64 iterator = start;
+        u64 iterator = Common::AlignUp(start, alignment);
-        while (iterator + size < end) {
+        while (iterator + size <= end) {
-            const u64 try_left = Common::AlignUp(iterator, alignment);
+            const u64 try_left = iterator;
            const u64 try_right = try_left + size;
            bool overlap = false;
@ -100,7 +96,7 @@ private:
                const auto [commit_left, commit_right] = commit->interval;
                if (try_left < commit_right && commit_left < try_right) {
                    // There's an overlap, continue the search where the overlapping commit ends.
-                    iterator = commit_right;
+                    iterator = Common::AlignUp(commit_right, alignment);
                    overlap = true;
                    break;
                }
@ -110,6 +106,7 @@ private:
                return try_left;
            }
        }
        // No free regions where found, return an empty optional.
        return std::nullopt;
    }
@ -117,12 +114,8 @@ private:
    const VKDevice& device;                   ///< Vulkan device.
    const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
    const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
-    const u64 alloc_size;                     ///< Size of this allocation.
+    const u64 allocation_size;                ///< Size of this allocation.
    const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
    const bool is_mappable;                   ///< Whether the allocation is mappable.
    /// Base address of the mapped pointer.
    u8* base_address{};
    /// Hints where the next free region is likely going to be.
    u64 free_iterator{};
@ -132,13 +125,15 @@ private:
 };
 VKMemoryManager::VKMemoryManager(const VKDevice& device)
-    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
+    : device{device}, properties{device.GetPhysical().getMemoryProperties(
-      is_memory_unified{GetMemoryUnified(props)} {}
+                          device.GetDispatchLoader())},
      is_memory_unified{GetMemoryUnified(properties)} {}
 VKMemoryManager::~VKMemoryManager() = default;
-VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
-    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+                                       bool host_visible) {
    const u64 chunk_size = GetAllocationChunkSize(requirements.size);
    // When a host visible commit is asked, search for host visible and coherent, otherwise search
    // for a fast device local type.
@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
            ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
            : vk::MemoryPropertyFlagBits::eDeviceLocal;
-    const auto TryCommit = [&]() -> VKMemoryCommit {
+    if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
        for (auto& alloc : allocs) {
            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
                continue;
            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
                return commit;
            }
        }
        return {};
    };
    if (auto commit = TryCommit(); commit) {
        return commit;
    }
    // Commit has failed, allocate more memory.
-    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
+    if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
-        // TODO(Rodrigo): Try to use host memory.
+        // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
-        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
+        // Allocation has failed, panic.
-        UNREACHABLE();
+        UNREACHABLE_MSG("Ran out of VRAM!");
        return {};
    }
    // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
    // there's a bug.
-    auto commit = TryCommit();
+    auto commit = TryAllocCommit(requirements, wanted_properties);
    ASSERT(commit);
    return commit;
 }
@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
 VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
+    auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
    auto commit = Commit(requeriments, host_visible);
    dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
    return commit;
 }
@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
 VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
+    auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
    auto commit = Commit(requeriments, host_visible);
    dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
    return commit;
 }
 bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
                                  u64 size) {
-    const u32 type = [&]() {
+    const u32 type = [&] {
-        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+        for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
-            const auto flags = props.memoryTypes[type_index].propertyFlags;
+            const auto flags = properties.memoryTypes[type_index].propertyFlags;
            if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
                // The type matches in type and in the wanted properties.
                return type_index;
            }
        }
-        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
+        UNREACHABLE_MSG("Couldn't find a compatible memory type!");
-        UNREACHABLE();
+        return 0U;
        return 0u;
    }();
    const auto dev = device.GetLogical();
@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
    // Try to allocate found type.
    const vk::MemoryAllocateInfo memory_ai(size, type);
    vk::DeviceMemory memory;
-    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+    if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
        res != vk::Result::eSuccess) {
        LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
        return false;
    }
-    allocs.push_back(
+    allocations.push_back(
        std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
    return true;
 }
-/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
+VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
-    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
+                                               vk::MemoryPropertyFlags wanted_properties) {
-        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+    for (auto& allocation : allocations) {
        if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
            continue;
        }
        if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
            return commit;
        }
    }
    return {};
 }
 /*static*/ bool VKMemoryManager::GetMemoryUnified(
    const vk::PhysicalDeviceMemoryProperties& properties) {
    for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
        if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
            // Memory is considered unified when heaps are device local only.
            return false;
        }
@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
    return true;
 }
-VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
+VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
-                                       u8* data, u64 begin, u64 end)
+                                       vk::DeviceMemory memory, u64 begin, u64 end)
-    : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
+    : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}
 VKMemoryCommitImpl::~VKMemoryCommitImpl() {
    allocation->Free(this);
 }
-u8* VKMemoryCommitImpl::GetData() const {
+MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
-    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
+    const auto dev = device.GetLogical();
-    return data;
+    const auto address = reinterpret_cast<u8*>(
        dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
    return MemoryMap{this, address};
 }
 void VKMemoryCommitImpl::Unmap() const {
    const auto dev = device.GetLogical();
    dev.unmapMemory(memory, device.GetDispatchLoader());
 }
 MemoryMap VKMemoryCommitImpl::Map() const {
    return Map(interval.second - interval.first);
 }
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@ -12,6 +12,7 @@
 namespace Vulkan {
 class MemoryMap;
 class VKDevice;
 class VKMemoryAllocation;
 class VKMemoryCommitImpl;
@ -21,11 +22,12 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
 class VKMemoryManager final {
 public:
    explicit VKMemoryManager(const VKDevice& device);
    VKMemoryManager(const VKMemoryManager&) = delete;
    ~VKMemoryManager();
    /**
     * Commits a memory with the specified requeriments.
-     * @param reqs Requeriments returned from a Vulkan call.
+     * @param requirements Requirements returned from a Vulkan call.
     * @param host_visible Signals the allocator that it *must* use host visible and coherent
     *                     memory. When passing false, it will try to allocate device local memory.
     * @returns A memory commit.
@ -47,25 +49,35 @@ private:
    /// Allocates a chunk of memory.
    bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
    /// Tries to allocate a memory commit.
    VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
                                  vk::MemoryPropertyFlags wanted_properties);
    /// Returns true if the device uses an unified memory model.
-    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
    const VKDevice& device;                              ///< Device handler.
-    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
+    const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
    const bool is_memory_unified;                        ///< True if memory model is unified.
-    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
 };
 class VKMemoryCommitImpl final {
    friend VKMemoryAllocation;
    friend MemoryMap;
 public:
-    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
+    explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
-                                u64 begin, u64 end);
+                                vk::DeviceMemory memory, u64 begin, u64 end);
    ~VKMemoryCommitImpl();
-    /// Returns the writeable memory map. The commit has to be mappable.
+    /// Maps a memory region and returns a pointer to it.
-    u8* GetData() const;
+    /// It's illegal to have more than one memory map at the same time.
    MemoryMap Map(u64 size, u64 offset = 0) const;
    /// Maps the whole commit and returns a pointer to it.
    /// It's illegal to have more than one memory map at the same time.
    MemoryMap Map() const;
    /// Returns the Vulkan memory handler.
    vk::DeviceMemory GetMemory() const {
@ -78,10 +90,46 @@ public:
    }
 private:
    /// Unmaps memory.
    void Unmap() const;
    const VKDevice& device;           ///< Vulkan device.
    std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
    vk::DeviceMemory memory;          ///< Vulkan device memory handler.
    VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
-    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
 /// Holds ownership of a memory map.
 class MemoryMap final {
 public:
    explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
        : commit{commit}, address{address} {}
    ~MemoryMap() {
        if (commit) {
            commit->Unmap();
        }
    }
    /// Prematurely releases the memory map.
    void Release() {
        commit->Unmap();
        commit = nullptr;
    }
    /// Returns the address of the memory map.
    u8* GetAddress() const {
        return address;
    }
    /// Returns the address of the memory map;
    operator u8*() const {
        return address;
    }
 private:
    const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
    u8* address{};                      ///< Address to the mapped memory.
 };
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@ -3,86 +3,144 @@
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <memory>
 #include <optional>
 #include <tuple>
 #include <vector>
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 namespace Vulkan {
 namespace {
 constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
 constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
-                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+
-                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
+std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
-    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
+                                  vk::MemoryPropertyFlags wanted) {
-                                                                                   pipeline_stage} {
+    const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
-    CreateBuffers(memory_manager, usage);
+    for (u32 i = 0; i < properties.memoryTypeCount; i++) {
-    ReserveWatches(WATCHES_INITIAL_RESERVE);
+        if (!(filter & (1 << i))) {
            continue;
        }
        if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
            return i;
        }
    }
    return {};
 }
 } // Anonymous namespace
 VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
                               vk::BufferUsageFlags usage)
    : device{device}, scheduler{scheduler} {
    CreateBuffers(usage);
    ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
    ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
 }
 VKStreamBuffer::~VKStreamBuffer() = default;
-std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
+std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
-    ASSERT(size <= buffer_size);
+    ASSERT(size <= STREAM_BUFFER_SIZE);
    mapped_size = size;
-    if (offset + size > buffer_size) {
+    if (alignment > 0) {
-        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
+        offset = Common::AlignUp(offset, alignment);
-        // reset the state.
+    }
-        invalidation_mark = used_watches;
+
-        used_watches = 0;
+    WaitPendingOperations(offset);
    bool invalidated = false;
    if (offset + size > STREAM_BUFFER_SIZE) {
        // The buffer would overflow, save the amount of used watches and reset the state.
        invalidation_mark = current_watch_cursor;
        current_watch_cursor = 0;
        offset = 0;
    }
-    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+        // Swap watches and reset waiting cursors.
-}
+        std::swap(previous_watches, current_watches);
        wait_cursor = 0;
        wait_bound = 0;
-void VKStreamBuffer::Send(u64 size) {
+        // Ensure that we don't wait for uncommitted fences.
    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
    if (invalidation_mark) {
        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
        scheduler.Flush();
        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
                      [&](auto& resource) { resource->Wait(); });
        invalidation_mark = std::nullopt;
    }
-    if (used_watches + 1 >= watches.size()) {
+        invalidated = true;
        // Ensure that there are enough watches.
        ReserveWatches(WATCHES_RESERVE_CHUNK);
    }
    // Add a watch for this allocation.
    watches[used_watches++]->Watch(scheduler.GetFence());
    offset += size;
 }
 void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
                                         nullptr);
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
-    commit = memory_manager.Commit(*buffer, true);
+    return {pointer, offset, invalidated};
    mapped_pointer = commit->GetData();
 }
-void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
+void VKStreamBuffer::Unmap(u64 size) {
-    const std::size_t previous_size = watches.size();
+    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
-    watches.resize(previous_size + grow_size);
+
-    std::generate(watches.begin() + previous_size, watches.end(),
+    const auto dev = device.GetLogical();
-                  []() { return std::make_unique<VKFenceWatch>(); });
+    dev.unmapMemory(*memory, device.GetDispatchLoader());
    offset += size;
    if (current_watch_cursor + 1 >= current_watches.size()) {
        // Ensure that there are enough watches.
        ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
    }
    auto& watch = current_watches[current_watch_cursor++];
    watch.upper_bound = offset;
    watch.fence.Watch(scheduler.GetFence());
 }
 void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
    const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
                                         0, nullptr);
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
    const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
    // Prefer device local host visible allocations (this should hit AMD's pinned memory).
    auto type = FindMemoryType(device, requirements.memoryTypeBits,
                               vk::MemoryPropertyFlagBits::eHostVisible |
                                   vk::MemoryPropertyFlagBits::eHostCoherent |
                                   vk::MemoryPropertyFlagBits::eDeviceLocal);
    if (!type) {
        // Otherwise search for a host visible allocation.
        type = FindMemoryType(device, requirements.memoryTypeBits,
                              vk::MemoryPropertyFlagBits::eHostVisible |
                                  vk::MemoryPropertyFlagBits::eHostCoherent);
        ASSERT_MSG(type, "No host visible and coherent memory type found");
    }
    const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
    memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
    dev.bindBufferMemory(*buffer, *memory, 0, dld);
 }
 void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
    watches.resize(watches.size() + grow_size);
 }
 void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
    if (!invalidation_mark) {
        return;
    }
    while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
        auto& watch = previous_watches[wait_cursor];
        wait_bound = watch.upper_bound;
        watch.fence.Wait();
        ++wait_cursor;
    }
 }
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@ -4,28 +4,24 @@
 #pragma once
 #include <memory>
 #include <optional>
 #include <tuple>
 #include <vector>
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 namespace Vulkan {
 class VKDevice;
 class VKFence;
 class VKFenceWatch;
 class VKResourceManager;
 class VKScheduler;
-class VKStreamBuffer {
+class VKStreamBuffer final {
 public:
-    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+    explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
-                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                            vk::BufferUsageFlags usage);
                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
    ~VKStreamBuffer();
    /**
@ -34,39 +30,47 @@ public:
     * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
     * offset and a boolean that's true when buffer has been invalidated.
     */
-    std::tuple<u8*, u64, bool> Reserve(u64 size);
+    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
-    void Send(u64 size);
+    void Unmap(u64 size);
-    vk::Buffer GetBuffer() const {
+    vk::Buffer GetHandle() const {
        return *buffer;
    }
 private:
    struct Watch final {
        VKFenceWatch fence;
        u64 upper_bound{};
    };
    /// Creates Vulkan buffer handles committing the required the required memory.
-    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+    void CreateBuffers(vk::BufferUsageFlags usage);
    /// Increases the amount of watches available.
-    void ReserveWatches(std::size_t grow_size);
+    void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
    void WaitPendingOperations(u64 requested_upper_bound);
    const VKDevice& device;                      ///< Vulkan device manager.
    VKScheduler& scheduler;                      ///< Command scheduler.
    const u64 buffer_size;                       ///< Total size of the stream buffer.
    const vk::AccessFlags access;                ///< Access usage of this stream buffer.
    const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
    UniqueBuffer buffer;       ///< Mapped buffer.
-    VKMemoryCommit commit; ///< Memory commit.
+    UniqueDeviceMemory memory; ///< Memory allocation.
    u8* mapped_pointer{};  ///< Pointer to the host visible commit
    u64 offset{};      ///< Buffer iterator.
    u64 mapped_size{}; ///< Size reserved for the current copy.
-    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
+    std::vector<Watch> current_watches;           ///< Watches recorded in the current iteration.
-    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
+    std::size_t current_watch_cursor{};           ///< Count of watches, reset on invalidation.
-    std::optional<std::size_t>
+    std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
-        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+
    std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
    std::size_t wait_cursor{};           ///< Last watch being waited for completion.
    u64 wait_bound{};                    ///< Highest offset being watched for completion.
 };
 } // namespace Vulkan