Merge pull request #9746 from ameerj/ogl-msaa-texcache
texture_cache: OpenGL: Implement MSAA uploads and copies
This commit is contained in:
commit
f70fcdb873
12 changed files with 136 additions and 14 deletions
|
@ -22,6 +22,8 @@ set(SHADER_FILES
|
||||||
convert_d24s8_to_abgr8.frag
|
convert_d24s8_to_abgr8.frag
|
||||||
convert_depth_to_float.frag
|
convert_depth_to_float.frag
|
||||||
convert_float_to_depth.frag
|
convert_float_to_depth.frag
|
||||||
|
convert_msaa_to_non_msaa.comp
|
||||||
|
convert_non_msaa_to_msaa.comp
|
||||||
convert_s8d24_to_abgr8.frag
|
convert_s8d24_to_abgr8.frag
|
||||||
full_screen_triangle.vert
|
full_screen_triangle.vert
|
||||||
fxaa.frag
|
fxaa.frag
|
||||||
|
|
30
src/video_core/host_shaders/convert_msaa_to_non_msaa.comp
Normal file
30
src/video_core/host_shaders/convert_msaa_to_non_msaa.comp
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#version 450 core
|
||||||
|
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout (binding = 0, rgba8) uniform readonly restrict image2DMSArray msaa_in;
|
||||||
|
layout (binding = 1, rgba8) uniform writeonly restrict image2DArray output_img;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
const ivec3 coords = ivec3(gl_GlobalInvocationID);
|
||||||
|
if (any(greaterThanEqual(coords, imageSize(msaa_in)))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Specialization constants for num_samples?
|
||||||
|
const int num_samples = imageSamples(msaa_in);
|
||||||
|
for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) {
|
||||||
|
const vec4 pixel = imageLoad(msaa_in, coords, curr_sample);
|
||||||
|
|
||||||
|
const int single_sample_x = 2 * coords.x + (curr_sample & 1);
|
||||||
|
const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1);
|
||||||
|
const ivec3 dest_coords = ivec3(single_sample_x, single_sample_y, coords.z);
|
||||||
|
|
||||||
|
if (any(greaterThanEqual(dest_coords, imageSize(output_img)))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
imageStore(output_img, dest_coords, pixel);
|
||||||
|
}
|
||||||
|
}
|
29
src/video_core/host_shaders/convert_non_msaa_to_msaa.comp
Normal file
29
src/video_core/host_shaders/convert_non_msaa_to_msaa.comp
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#version 450 core
|
||||||
|
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout (binding = 0, rgba8) uniform readonly restrict image2DArray img_in;
|
||||||
|
layout (binding = 1, rgba8) uniform writeonly restrict image2DMSArray output_msaa;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
const ivec3 coords = ivec3(gl_GlobalInvocationID);
|
||||||
|
if (any(greaterThanEqual(coords, imageSize(output_msaa)))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Specialization constants for num_samples?
|
||||||
|
const int num_samples = imageSamples(output_msaa);
|
||||||
|
for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) {
|
||||||
|
const int single_sample_x = 2 * coords.x + (curr_sample & 1);
|
||||||
|
const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1);
|
||||||
|
const ivec3 single_coords = ivec3(single_sample_x, single_sample_y, coords.z);
|
||||||
|
|
||||||
|
if (any(greaterThanEqual(single_coords, imageSize(img_in)))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const vec4 pixel = imageLoad(img_in, single_coords);
|
||||||
|
imageStore(output_msaa, coords, curr_sample, pixel);
|
||||||
|
}
|
||||||
|
}
|
|
@ -557,6 +557,14 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TextureCacheRuntime::CopyImageMSAA(Image& dst_image, Image& src_image,
|
||||||
|
std::span<const VideoCommon::ImageCopy> copies) {
|
||||||
|
LOG_DEBUG(Render_OpenGL, "Copying from {} samples to {} samples", src_image.info.num_samples,
|
||||||
|
dst_image.info.num_samples);
|
||||||
|
// TODO: Leverage the format conversion pass if possible/accurate.
|
||||||
|
util_shaders.CopyMSAA(dst_image, src_image, copies);
|
||||||
|
}
|
||||||
|
|
||||||
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
|
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
|
||||||
std::span<const VideoCommon::ImageCopy> copies) {
|
std::span<const VideoCommon::ImageCopy> copies) {
|
||||||
LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);
|
LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);
|
||||||
|
|
|
@ -93,12 +93,19 @@ public:
|
||||||
return device.CanReportMemoryUsage();
|
return device.CanReportMemoryUsage();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
|
bool ShouldReinterpret([[maybe_unused]] Image& dst,
|
||||||
|
[[maybe_unused]] Image& src) const noexcept {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CanUploadMSAA() const noexcept {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
|
void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
|
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
|
||||||
|
|
|
@ -12,6 +12,8 @@
|
||||||
#include "video_core/host_shaders/astc_decoder_comp.h"
|
#include "video_core/host_shaders/astc_decoder_comp.h"
|
||||||
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
|
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
|
||||||
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
|
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
|
||||||
|
#include "video_core/host_shaders/convert_msaa_to_non_msaa_comp.h"
|
||||||
|
#include "video_core/host_shaders/convert_non_msaa_to_msaa_comp.h"
|
||||||
#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
|
#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
|
||||||
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
||||||
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
||||||
|
@ -51,7 +53,9 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||||
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
|
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
|
||||||
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
|
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
|
||||||
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
|
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
|
||||||
convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) {
|
convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)),
|
||||||
|
convert_ms_to_nonms_program(MakeProgram(CONVERT_MSAA_TO_NON_MSAA_COMP)),
|
||||||
|
convert_nonms_to_ms_program(MakeProgram(CONVERT_NON_MSAA_TO_MSAA_COMP)) {
|
||||||
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
||||||
swizzle_table_buffer.Create();
|
swizzle_table_buffer.Create();
|
||||||
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
||||||
|
@ -269,6 +273,33 @@ void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copi
|
||||||
program_manager.RestoreGuestCompute();
|
program_manager.RestoreGuestCompute();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UtilShaders::CopyMSAA(Image& dst_image, Image& src_image,
|
||||||
|
std::span<const VideoCommon::ImageCopy> copies) {
|
||||||
|
const bool is_ms_to_non_ms = src_image.info.num_samples > 1 && dst_image.info.num_samples == 1;
|
||||||
|
const auto program_handle =
|
||||||
|
is_ms_to_non_ms ? convert_ms_to_nonms_program.handle : convert_nonms_to_ms_program.handle;
|
||||||
|
program_manager.BindComputeProgram(program_handle);
|
||||||
|
|
||||||
|
for (const ImageCopy& copy : copies) {
|
||||||
|
ASSERT(copy.src_subresource.base_layer == 0);
|
||||||
|
ASSERT(copy.src_subresource.num_layers == 1);
|
||||||
|
ASSERT(copy.dst_subresource.base_layer == 0);
|
||||||
|
ASSERT(copy.dst_subresource.num_layers == 1);
|
||||||
|
|
||||||
|
glBindImageTexture(0, src_image.StorageHandle(), copy.src_subresource.base_level, GL_TRUE,
|
||||||
|
0, GL_READ_ONLY, GL_RGBA8);
|
||||||
|
glBindImageTexture(1, dst_image.StorageHandle(), copy.dst_subresource.base_level, GL_TRUE,
|
||||||
|
0, GL_WRITE_ONLY, GL_RGBA8);
|
||||||
|
|
||||||
|
const u32 num_dispatches_x = Common::DivCeil(copy.extent.width, 8U);
|
||||||
|
const u32 num_dispatches_y = Common::DivCeil(copy.extent.height, 8U);
|
||||||
|
const u32 num_dispatches_z = copy.extent.depth;
|
||||||
|
|
||||||
|
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||||
|
}
|
||||||
|
program_manager.RestoreGuestCompute();
|
||||||
|
}
|
||||||
|
|
||||||
GLenum StoreFormat(u32 bytes_per_block) {
|
GLenum StoreFormat(u32 bytes_per_block) {
|
||||||
switch (bytes_per_block) {
|
switch (bytes_per_block) {
|
||||||
case 1:
|
case 1:
|
||||||
|
|
|
@ -40,6 +40,9 @@ public:
|
||||||
|
|
||||||
void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
|
void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
|
void CopyMSAA(Image& dst_image, Image& src_image,
|
||||||
|
std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ProgramManager& program_manager;
|
ProgramManager& program_manager;
|
||||||
|
|
||||||
|
@ -51,6 +54,8 @@ private:
|
||||||
OGLProgram pitch_unswizzle_program;
|
OGLProgram pitch_unswizzle_program;
|
||||||
OGLProgram copy_bc4_program;
|
OGLProgram copy_bc4_program;
|
||||||
OGLProgram convert_s8d24_program;
|
OGLProgram convert_s8d24_program;
|
||||||
|
OGLProgram convert_ms_to_nonms_program;
|
||||||
|
OGLProgram convert_nonms_to_ms_program;
|
||||||
};
|
};
|
||||||
|
|
||||||
GLenum StoreFormat(u32 bytes_per_block);
|
GLenum StoreFormat(u32 bytes_per_block);
|
||||||
|
|
|
@ -1230,6 +1230,11 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
|
||||||
|
std::span<const VideoCommon::ImageCopy> copies) {
|
||||||
|
UNIMPLEMENTED_MSG("Copying images with different samples is not implemented in Vulkan.");
|
||||||
|
}
|
||||||
|
|
||||||
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
|
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
|
||||||
return device.GetDeviceLocalMemory();
|
return device.GetDeviceLocalMemory();
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,6 +70,8 @@ public:
|
||||||
|
|
||||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
|
void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
bool ShouldReinterpret(Image& dst, Image& src);
|
bool ShouldReinterpret(Image& dst, Image& src);
|
||||||
|
|
||||||
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
@ -80,6 +82,11 @@ public:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CanUploadMSAA() const noexcept {
|
||||||
|
// TODO: Implement buffer to MSAA uploads
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void AccelerateImageUpload(Image&, const StagingBufferRef&,
|
void AccelerateImageUpload(Image&, const StagingBufferRef&,
|
||||||
std::span<const VideoCommon::SwizzleParameters>);
|
std::span<const VideoCommon::SwizzleParameters>);
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,9 @@ std::string Name(const ImageBase& image) {
|
||||||
const u32 num_layers = image.info.resources.layers;
|
const u32 num_layers = image.info.resources.layers;
|
||||||
const u32 num_levels = image.info.resources.levels;
|
const u32 num_levels = image.info.resources.levels;
|
||||||
std::string resource;
|
std::string resource;
|
||||||
|
if (image.info.num_samples > 1) {
|
||||||
|
resource += fmt::format(":{}xMSAA", image.info.num_samples);
|
||||||
|
}
|
||||||
if (num_layers > 1) {
|
if (num_layers > 1) {
|
||||||
resource += fmt::format(":L{}", num_layers);
|
resource += fmt::format(":L{}", num_layers);
|
||||||
}
|
}
|
||||||
|
|
|
@ -773,7 +773,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags &= ~ImageFlagBits::CpuModified;
|
||||||
TrackImage(image, image_id);
|
TrackImage(image, image_id);
|
||||||
|
|
||||||
if (image.info.num_samples > 1) {
|
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
|
||||||
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
|
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1167,14 +1167,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||||
new_image.flags |= ImageFlagBits::GpuModified;
|
new_image.flags |= ImageFlagBits::GpuModified;
|
||||||
}
|
}
|
||||||
if (overlap.info.num_samples != new_image.info.num_samples) {
|
|
||||||
LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
|
|
||||||
} else {
|
|
||||||
const auto& resolution = Settings::values.resolution_info;
|
const auto& resolution = Settings::values.resolution_info;
|
||||||
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
|
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
|
||||||
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
|
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
|
||||||
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
|
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
|
||||||
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
|
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
|
||||||
|
if (overlap.info.num_samples != new_image.info.num_samples) {
|
||||||
|
runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
|
||||||
|
} else {
|
||||||
runtime.CopyImage(new_image, overlap, std::move(copies));
|
runtime.CopyImage(new_image, overlap, std::move(copies));
|
||||||
}
|
}
|
||||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||||
|
|
|
@ -573,10 +573,6 @@ u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
|
||||||
if (info.type == ImageType::Buffer) {
|
if (info.type == ImageType::Buffer) {
|
||||||
return info.size.width * BytesPerBlock(info.format);
|
return info.size.width * BytesPerBlock(info.format);
|
||||||
}
|
}
|
||||||
if (info.num_samples > 1) {
|
|
||||||
// Multisample images can't be uploaded or downloaded to the host
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (info.type == ImageType::Linear) {
|
if (info.type == ImageType::Linear) {
|
||||||
return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
|
return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
|
||||||
}
|
}
|
||||||
|
@ -703,7 +699,6 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
|
||||||
std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
|
std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
|
||||||
SubresourceBase base, u32 up_scale, u32 down_shift) {
|
SubresourceBase base, u32 up_scale, u32 down_shift) {
|
||||||
ASSERT(dst.resources.levels >= src.resources.levels);
|
ASSERT(dst.resources.levels >= src.resources.levels);
|
||||||
ASSERT(dst.num_samples == src.num_samples);
|
|
||||||
|
|
||||||
const bool is_dst_3d = dst.type == ImageType::e3D;
|
const bool is_dst_3d = dst.type == ImageType::e3D;
|
||||||
if (is_dst_3d) {
|
if (is_dst_3d) {
|
||||||
|
|
Loading…
Reference in a new issue