metal: add basic staging buffer pool and texture cache

This commit is contained in:
Samuliak 2024-04-06 08:55:11 +02:00
parent 35b751de1b
commit ea5dc91b9d
12 changed files with 602 additions and 12 deletions

View file

@ -377,7 +377,10 @@ if (APPLE)
renderer_metal/mtl_command_recorder.mm
renderer_metal/mtl_device.mm
renderer_metal/mtl_rasterizer.mm
renderer_metal/mtl_staging_buffer_pool.mm
renderer_metal/mtl_swap_chain.mm
renderer_metal/mtl_texture_cache.mm
renderer_metal/mtl_texture_cache_base.cpp
renderer_metal/renderer_metal.mm
)
endif()

View file

@ -18,6 +18,13 @@ public:
void BeginRenderPass(MTLRenderPassDescriptor* render_pass_descriptor);
void CheckIfRenderPassIsActive() {
if (!encoder || encoder_type != EncoderType::Render) {
throw std::runtime_error(
"Trying to perform render command, but render pass is not active");
}
}
void RequireComputeEncoder();
void RequireBlitEncoder();

View file

@ -49,10 +49,12 @@ void CommandRecorder::Present(CAMetalDrawable_t drawable) {
}
void CommandRecorder::Submit() {
EndEncoding();
[command_buffer commit];
[command_buffer release];
command_buffer = nil;
if (command_buffer) {
EndEncoding();
[command_buffer commit];
[command_buffer release];
command_buffer = nil;
}
}
void CommandRecorder::RequireCommandBuffer() {

View file

@ -15,6 +15,7 @@ class System;
namespace Metal {
class Device;
class CommandRecorder;
class SwapChain;
class RasterizerMetal;
@ -37,7 +38,8 @@ public:
class RasterizerMetal final : public VideoCore::RasterizerInterface,
protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit RasterizerMetal(Tegra::GPU& gpu_, const Device& device_, const SwapChain& swap_chain_);
explicit RasterizerMetal(Tegra::GPU& gpu_, const Device& device_,
CommandRecorder& command_recorder_, const SwapChain& swap_chain_);
~RasterizerMetal() override;
void Draw(bool is_indexed, u32 instance_count) override;
@ -91,9 +93,8 @@ private:
AccelerateDMA accelerate_dma;
const Device& device;
CommandRecorder& command_recorder;
const SwapChain& swap_chain;
MTLCommandBuffer_t command_buffer;
};
} // namespace Metal

View file

@ -8,8 +8,9 @@
#include "video_core/engines/draw_manager.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_metal/mtl_rasterizer.h"
#include "video_core/renderer_metal/mtl_command_recorder.h"
#include "video_core/renderer_metal/mtl_device.h"
#include "video_core/renderer_metal/mtl_rasterizer.h"
#include <iostream>
@ -24,11 +25,14 @@ bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
return true;
}
RasterizerMetal::RasterizerMetal(Tegra::GPU& gpu_, const Device& device_, const SwapChain& swap_chain_)
: gpu{gpu_}, device{device_}, swap_chain{swap_chain_} {}
RasterizerMetal::RasterizerMetal(Tegra::GPU& gpu_, const Device& device_,
CommandRecorder& command_recorder_, const SwapChain& swap_chain_)
: gpu{gpu_}, device{device_}, command_recorder{command_recorder_}, swap_chain{swap_chain_} {}
RasterizerMetal::~RasterizerMetal() = default;
void RasterizerMetal::Draw(bool is_indexed, u32 instance_count) {
// TODO: uncomment
//command_recorder.CheckIfRenderPassIsActive();
//const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (is_indexed) {
std::cout << "DrawIndexed" << std::endl;

View file

@ -0,0 +1,101 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <climits>
#include <span>
#include <vector>
#include "common/common_types.h"
#include "objc_bridge.h"
#include "video_core/renderer_metal/objc_bridge.h"
namespace Metal {
class Device;
class CommandRecorder;
enum class MemoryUsage {
DeviceLocal,
Upload,
Download,
};
struct StagingBufferRef {
StagingBufferRef(MTLBuffer_t buffer_, size_t offset_, std::span<u8> mapped_span_);
~StagingBufferRef();
MTLBuffer_t buffer;
size_t offset;
std::span<u8> mapped_span;
};
struct StagingBuffer {
StagingBuffer(MTLBuffer_t buffer_, std::span<u8> mapped_span_);
~StagingBuffer();
MTLBuffer_t buffer;
std::span<u8> mapped_span;
StagingBufferRef Ref() const noexcept;
};
class StagingBufferPool {
public:
static constexpr size_t NUM_SYNCS = 16;
explicit StagingBufferPool(const Device& device, CommandRecorder& command_recorder_);
~StagingBufferPool();
StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
void FreeDeferred(StagingBufferRef& ref);
[[nodiscard]] MTLBuffer_t GetSTreamBufferHandle() const noexcept {
return stream_buffer;
}
void TickFrame();
private:
struct StagingBuffers {
std::vector<StagingBuffer> entries;
size_t delete_index = 0;
size_t iterate_index = 0;
};
static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
StagingBufferRef GetStreamBuffer(size_t size);
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage, bool deferred = false);
StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage, bool deferred);
StagingBuffersCache& GetCache(MemoryUsage usage);
void ReleaseCache(MemoryUsage usage);
void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
const Device& device;
CommandRecorder& command_recorder;
MTLBuffer_t stream_buffer{};
size_t iterator = 0;
size_t used_iterator = 0;
size_t free_iterator = 0;
std::array<u64, NUM_SYNCS> sync_ticks{};
StagingBuffersCache device_local_cache;
StagingBuffersCache upload_cache;
StagingBuffersCache download_cache;
size_t current_delete_level = 0;
u64 buffer_index = 0;
u64 unique_ids{};
};
} // namespace Metal

View file

@ -0,0 +1,116 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <utility>
#include <vector>
#include <fmt/format.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/literals.h"
#include "video_core/renderer_metal/mtl_command_recorder.h"
#include "video_core/renderer_metal/mtl_device.h"
#include "video_core/renderer_metal/mtl_staging_buffer_pool.h"
namespace Metal {
StagingBufferRef::StagingBufferRef(MTLBuffer_t buffer_, size_t offset_, std::span<u8> mapped_span_)
: buffer{[buffer_ retain]}, offset{offset_}, mapped_span{mapped_span_} {}
StagingBufferRef::~StagingBufferRef() {
[buffer release];
}
StagingBuffer::StagingBuffer(MTLBuffer_t buffer_, std::span<u8> mapped_span_)
: buffer{[buffer_ retain]}, mapped_span{mapped_span_} {}
StagingBuffer::~StagingBuffer() {
[buffer release];
}
StagingBufferRef StagingBuffer::Ref() const noexcept {
return StagingBufferRef(buffer, 0, mapped_span);
}
// TODO: use the _MiB suffix
constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;//128_MiB;
constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
StagingBufferPool::StagingBufferPool(const Device& device_, CommandRecorder& command_recorder_)
: device{device_}, command_recorder{command_recorder_} {
stream_buffer = [device.GetDevice() newBufferWithLength:STREAM_BUFFER_SIZE
options:MTLResourceStorageModePrivate];
}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
if (!deferred && usage == MemoryUsage::Upload && size <= REGION_SIZE) {
return GetStreamBuffer(size);
}
return GetStagingBuffer(size, usage, deferred);
}
void StagingBufferPool::FreeDeferred(StagingBufferRef& ref) {
// TODO: implement this
}
void StagingBufferPool::TickFrame() {
current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
ReleaseCache(MemoryUsage::DeviceLocal);
ReleaseCache(MemoryUsage::Upload);
ReleaseCache(MemoryUsage::Download);
}
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
// TODO: implement this
// HACK
return GetStagingBuffer(size, MemoryUsage::Upload);
}
StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) {
return CreateStagingBuffer(size, usage, deferred);
}
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) {
const u32 log2 = Common::Log2Ceil64(size);
MTLBuffer_t buffer = [device.GetDevice() newBufferWithLength:size
options:MTLResourceStorageModePrivate];
// TODO: check if the mapped span is correct
std::span<u8> mapped_span(static_cast<u8*>([buffer contents]), size);
auto& entry = GetCache(usage)[log2].entries.emplace_back(buffer, mapped_span);
return entry.Ref();
}
StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
return device_local_cache;
case MemoryUsage::Upload:
return upload_cache;
case MemoryUsage::Download:
return download_cache;
default:
ASSERT_MSG(false, "Invalid memory usage={}", usage);
return upload_cache;
}
}
void StagingBufferPool::ReleaseCache(MemoryUsage usage) {
ReleaseLevel(GetCache(usage), current_delete_level);
}
void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) {
// TODO: implement this
}
} // namespace Metal

View file

@ -0,0 +1,219 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <span>
#include "video_core/texture_cache/texture_cache_base.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_metal/mtl_staging_buffer_pool.h"
#include "video_core/renderer_metal/objc_bridge.h"
#include "video_core/texture_cache/image_view_base.h"
namespace Settings {
struct ResolutionScalingInfo;
}
namespace Metal {
using Common::SlotVector;
using VideoCommon::ImageId;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCore::Surface::PixelFormat;
class Device;
class Image;
class ImageView;
class Framebuffer;
class TextureCacheRuntime {
public:
explicit TextureCacheRuntime(const Device& device_);
void Finish();
StagingBufferRef UploadStagingBuffer(size_t size);
StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
bool CanUploadMSAA() const noexcept {
return true;
}
void TickFrame();
u64 GetDeviceLocalMemory() const;
u64 GetDeviceMemoryUsage() const;
bool CanReportMemoryUsage() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
bool ShouldReinterpret(Image& dst, Image& src);
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
void InsertUploadMemoryBarrier();
void TransitionImageLayout(Image& image) {}
void AccelerateImageUpload(Image&, const StagingBufferRef&,
std::span<const VideoCommon::SwizzleParameters>);
bool HasNativeBgr() const noexcept {
return true;
}
bool HasBrokenTextureViewFormats() const noexcept {
return false;
}
void BarrierFeedbackLoop();
const Device& device;
const Settings::ResolutionScalingInfo& resolution;
};
class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info,
GPUVAddr gpu_addr, VAddr cpu_addr);
explicit Image(const VideoCommon::NullImageParams&);
~Image();
Image(const Image&) = delete;
Image& operator=(const Image&) = delete;
Image(Image&&) = default;
Image& operator=(Image&&) = default;
void UploadMemory(MTLBuffer_t buffer, size_t offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(MTLBuffer_t buffer, size_t offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(std::span<MTLBuffer_t> buffers, std::span<size_t> offsets,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
bool IsRescaled() const;
bool ScaleUp(bool ignore = false);
bool ScaleDown(bool ignore = false);
MTLTexture_t GetHandle() const noexcept {
return texture;
}
private:
MTLTexture_t texture;
bool initialized = false;
};
class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
const SlotVector<Image>&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
~ImageView();
ImageView(const ImageView&) = delete;
ImageView& operator=(const ImageView&) = delete;
ImageView(ImageView&&) = default;
ImageView& operator=(ImageView&&) = default;
MTLTexture_t GetHandle() const noexcept {
return texture;
}
private:
MTLTexture_t texture;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
MTLSamplerState_t GetHandle() const noexcept {
return sampler_state;
}
private:
MTLSamplerState_t sampler_state;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&&) = default;
Framebuffer& operator=(Framebuffer&&) = default;
void CreateRenderPassDescriptor(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, bool is_rescaled, size_t width,
size_t height);
MTLRenderPassDescriptor* GetHandle() const noexcept {
return render_pass;
}
private:
MTLRenderPassDescriptor* render_pass{};
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
using Runtime = Metal::TextureCacheRuntime;
using Image = Metal::Image;
using ImageAlloc = Metal::ImageAlloc;
using ImageView = Metal::ImageView;
using Sampler = Metal::Sampler;
using Framebuffer = Metal::Framebuffer;
using AsyncBuffer = Metal::StagingBufferRef;
using BufferType = MTLBuffer_t;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace Metal

View file

@ -0,0 +1,123 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <array>
#include <boost/container/small_vector.hpp>
#include <span>
#include <vector>
#include "common/bit_cast.h"
#include "common/bit_util.h"
#include "common/settings.h"
#include "video_core/renderer_metal/mtl_device.h"
#include "video_core/renderer_metal/mtl_texture_cache.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/util.h"
namespace Metal {
using Tegra::Engines::Fermi2D;
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
using VideoCommon::BufferImageCopy;
using VideoCommon::ImageFlagBits;
using VideoCommon::ImageInfo;
using VideoCommon::ImageType;
using VideoCommon::SubresourceRange;
using VideoCore::Surface::BytesPerBlock;
using VideoCore::Surface::IsPixelFormatASTC;
using VideoCore::Surface::IsPixelFormatInteger;
using VideoCore::Surface::SurfaceType;
TextureCacheRuntime::TextureCacheRuntime(const Device &device_)
: device{device_}, resolution{Settings::values.resolution_info} {}
void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime &runtime, const ImageInfo &info,
GPUVAddr gpu_addr_, VAddr cpu_addr_)
: VideoCommon::ImageBase(info, gpu_addr_, cpu_addr_) {
MTLTextureDescriptor *texture_descriptor =
[[MTLTextureDescriptor alloc] init];
// TODO: don't hardcode the format
texture_descriptor.pixelFormat = MTLPixelFormatRGBA8Unorm;
texture_descriptor.width = info.size.width;
texture_descriptor.height = info.size.height;
texture =
[runtime.device.GetDevice() newTextureWithDescriptor:texture_descriptor];
}
Image::~Image() { [texture release]; }
ImageView::ImageView(TextureCacheRuntime &runtime,
const VideoCommon::ImageViewInfo &info, ImageId image_id_,
Image &image)
: VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr} {
using Shader::TextureType;
texture = [image.GetHandle() retain];
// TODO: create texture view
}
ImageView::ImageView(TextureCacheRuntime &runtime,
const VideoCommon::ImageViewInfo &info, ImageId image_id_,
Image &image, const SlotVector<Image>& slot_imgs)
: ImageView(runtime, info, image_id_, image) {
// TODO: save slot images
}
ImageView::~ImageView() { [texture release]; }
Sampler::Sampler(TextureCacheRuntime &runtime,
const Tegra::Texture::TSCEntry &tsc) {
MTLSamplerDescriptor *sampler_descriptor =
[[MTLSamplerDescriptor alloc] init];
// TODO: configure the descriptor
sampler_state = [runtime.device.GetDevice()
newSamplerStateWithDescriptor:sampler_descriptor];
}
Framebuffer::Framebuffer(TextureCacheRuntime &runtime,
std::span<ImageView *, NUM_RT> color_buffers,
ImageView *depth_buffer,
const VideoCommon::RenderTargets &key) {
CreateRenderPassDescriptor(runtime, color_buffers, depth_buffer,
key.is_rescaled, key.size.width, key.size.height);
}
Framebuffer::~Framebuffer() = default;
void Framebuffer::CreateRenderPassDescriptor(
TextureCacheRuntime &runtime, std::span<ImageView *, NUM_RT> color_buffers,
ImageView *depth_buffer, bool is_rescaled, size_t width, size_t height) {
render_pass = [MTLRenderPassDescriptor renderPassDescriptor];
for (size_t index = 0; index < NUM_RT; ++index) {
const ImageView *const color_buffer = color_buffers[index];
if (!color_buffer) {
continue;
}
// TODO: don't use index as attachment index
render_pass.colorAttachments[index].clearColor =
MTLClearColorMake(0.5, 1.0, 0.0, 1.0);
render_pass.colorAttachments[index].loadAction = MTLLoadActionClear;
render_pass.colorAttachments[index].storeAction = MTLStoreActionStore;
render_pass.colorAttachments[index].texture = color_buffer->GetHandle();
}
if (depth_buffer) {
render_pass.depthAttachment.clearDepth = 1.0;
render_pass.depthAttachment.loadAction = MTLLoadActionClear;
render_pass.depthAttachment.storeAction = MTLStoreActionStore;
render_pass.depthAttachment.texture = depth_buffer->GetHandle();
}
}
} // namespace Vulkan

View file

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_metal/mtl_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<Metal::TextureCacheParams>;
}

View file

@ -9,14 +9,18 @@ typedef id<MTLDevice> MTLDevice_t;
typedef id<MTLCommandQueue> MTLCommandQueue_t;
typedef id<MTLCommandBuffer> MTLCommandBuffer_t;
typedef id<MTLCommandEncoder> MTLCommandEncoder_t;
typedef id<MTLBuffer> MTLBuffer_t;
typedef id<MTLTexture> MTLTexture_t;
typedef id<MTLSamplerState> MTLSamplerState_t;
typedef id<CAMetalDrawable> CAMetalDrawable_t;
#else
typedef void* MTLDevice_t;
typedef void* MTLCommandQueue_t;
typedef void* MTLCommandBuffer_t;
typedef void* MTLCommandEncoder_t;
typedef void* MTLBuffer_t;
typedef void* MTLTexture_t;
typedef void* MTLSamplerState_t;
typedef void MTLRenderPassDescriptor;
typedef void CAMetalLayer;
typedef void* CAMetalDrawable_t;

View file

@ -16,7 +16,7 @@ RendererMetal::RendererMetal(Core::Frontend::EmuWindow& emu_window,
command_recorder(device),
swap_chain(device, command_recorder,
static_cast<const CAMetalLayer*>(render_window.GetWindowInfo().render_surface)),
rasterizer(gpu_, device, swap_chain) {}
rasterizer(gpu_, device, command_recorder, swap_chain) {}
RendererMetal::~RendererMetal() = default;
@ -25,9 +25,10 @@ void RendererMetal::Composite(std::span<const Tegra::FramebufferConfig> framebuf
return;
}
// HACK
// Ask the swap chain to get next drawable
swap_chain.AcquireNextDrawable();
// TODO: copy the framebuffer to the drawable texture instead of this dummy render pass
MTLRenderPassDescriptor* render_pass_descriptor = [MTLRenderPassDescriptor renderPassDescriptor];
render_pass_descriptor.colorAttachments[0].clearColor = MTLClearColorMake(1.0, 0.5, 0.0, 1.0);
render_pass_descriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
@ -35,6 +36,7 @@ void RendererMetal::Composite(std::span<const Tegra::FramebufferConfig> framebuf
render_pass_descriptor.colorAttachments[0].texture = swap_chain.GetDrawableTexture();
command_recorder.BeginRenderPass(render_pass_descriptor);
swap_chain.Present();
command_recorder.Submit();