3
0
Fork 0
forked from suyu/suyu

Merge pull request #4034 from ReinUsesLisp/storage-texels

vk_rasterizer: Implement storage texels and atomic image operations
This commit is contained in:
Rodrigo Locatti 2020-06-07 18:43:24 -03:00 committed by GitHub
commit 2293e8a11a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 145 additions and 93 deletions

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit a62c5bbc100a5e5a31ea0ccc4a78d8fa6a4167ce Subproject commit eefca56afd49379bdebc97ded8b480839f930881

View file

@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
}; };
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size()); add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
VkDescriptorSetLayoutCreateInfo ci; VkDescriptorSetLayoutCreateInfo ci;

View file

@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
VkDescriptorPoolCreateInfo ci; VkDescriptorPoolCreateInfo ci;

View file

@ -45,6 +45,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
@ -104,8 +105,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
u32 binding = base_binding; u32 binding = base_binding;
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers); AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
return binding; return binding;
} }
@ -377,16 +379,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
return; return;
} }
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) { if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
// Nvidia has a bug where updating multiple uniform texels at once causes the driver to descriptor_type == STORAGE_TEXEL_BUFFER) {
// crash. // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
// Note: Fixed in driver Windows 443.24, Linux 440.66.15
for (u32 i = 0; i < count; ++i) { for (u32 i = 0; i < count; ++i) {
VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
entry.dstBinding = binding + i; entry.dstBinding = binding + i;
entry.dstArrayElement = 0; entry.dstArrayElement = 0;
entry.descriptorCount = 1; entry.descriptorCount = 1;
entry.descriptorType = descriptor_type; entry.descriptorType = descriptor_type;
entry.offset = offset + i * entry_size; entry.offset = static_cast<std::size_t>(offset + i * entry_size);
entry.stride = entry_size; entry.stride = entry_size;
} }
} else if (count > 0) { } else if (count > 0) {
@ -407,8 +410,9 @@ void FillDescriptorUpdateTemplateEntries(
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers); AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
} }

View file

@ -468,8 +468,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
const auto& entries = pipeline.GetEntries(); const auto& entries = pipeline.GetEntries();
SetupComputeConstBuffers(entries); SetupComputeConstBuffers(entries);
SetupComputeGlobalBuffers(entries); SetupComputeGlobalBuffers(entries);
SetupComputeTexelBuffers(entries); SetupComputeUniformTexels(entries);
SetupComputeTextures(entries); SetupComputeTextures(entries);
SetupComputeStorageTexels(entries);
SetupComputeImages(entries); SetupComputeImages(entries);
buffer_cache.Unmap(); buffer_cache.Unmap();
@ -787,8 +788,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
const auto& entries = shader->GetEntries(); const auto& entries = shader->GetEntries();
SetupGraphicsConstBuffers(entries, stage); SetupGraphicsConstBuffers(entries, stage);
SetupGraphicsGlobalBuffers(entries, stage); SetupGraphicsGlobalBuffers(entries, stage);
SetupGraphicsTexelBuffers(entries, stage); SetupGraphicsUniformTexels(entries, stage);
SetupGraphicsTextures(entries, stage); SetupGraphicsTextures(entries, stage);
SetupGraphicsStorageTexels(entries, stage);
SetupGraphicsImages(entries, stage); SetupGraphicsImages(entries, stage);
} }
texture_cache.GuardSamplers(false); texture_cache.GuardSamplers(false);
@ -983,12 +985,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
} }
} }
void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) { void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D(); const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.texel_buffers) { for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, stage).tic; const auto image = GetTextureInfo(gpu, entry, stage).tic;
SetupTexelBuffer(image, entry); SetupUniformTexels(image, entry);
} }
} }
@ -1003,6 +1005,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
} }
} }
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.storage_texels) {
const auto image = GetTextureInfo(gpu, entry, stage).tic;
SetupStorageTexel(image, entry);
}
}
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images); MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().Maxwell3D(); const auto& gpu = system.GPU().Maxwell3D();
@ -1035,12 +1046,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
} }
} }
void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute(); const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.texel_buffers) { for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
SetupTexelBuffer(image, entry); SetupUniformTexels(image, entry);
} }
} }
@ -1055,6 +1066,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
} }
} }
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.storage_texels) {
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
SetupStorageTexel(image, entry);
}
}
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Images); MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().KeplerCompute(); const auto& gpu = system.GPU().KeplerCompute();
@ -1104,8 +1124,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
update_descriptor_queue.AddBuffer(buffer, offset, size); update_descriptor_queue.AddBuffer(buffer, offset, size);
} }
void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
const TexelBufferEntry& entry) { const UniformTexelEntry& entry) {
const auto view = texture_cache.GetTextureSurface(tic, entry); const auto view = texture_cache.GetTextureSurface(tic, entry);
ASSERT(view->IsBufferView()); ASSERT(view->IsBufferView());
@ -1127,6 +1147,14 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
sampled_views.push_back(ImageView{std::move(view), image_layout}); sampled_views.push_back(ImageView{std::move(view), image_layout});
} }
void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
const StorageTexelEntry& entry) {
const auto view = texture_cache.GetImageSurface(tic, entry);
ASSERT(view->IsBufferView());
update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
}
void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
auto view = texture_cache.GetImageSurface(tic, entry); auto view = texture_cache.GetImageSurface(tic, entry);

View file

@ -193,12 +193,15 @@ private:
/// Setup global buffers in the graphics pipeline. /// Setup global buffers in the graphics pipeline.
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup texel buffers in the graphics pipeline. /// Setup uniform texels in the graphics pipeline.
void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage); void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
/// Setup textures in the graphics pipeline. /// Setup textures in the graphics pipeline.
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
/// Setup storage texels in the graphics pipeline.
void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
/// Setup images in the graphics pipeline. /// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
@ -209,11 +212,14 @@ private:
void SetupComputeGlobalBuffers(const ShaderEntries& entries); void SetupComputeGlobalBuffers(const ShaderEntries& entries);
/// Setup texel buffers in the compute pipeline. /// Setup texel buffers in the compute pipeline.
void SetupComputeTexelBuffers(const ShaderEntries& entries); void SetupComputeUniformTexels(const ShaderEntries& entries);
/// Setup textures in the compute pipeline. /// Setup textures in the compute pipeline.
void SetupComputeTextures(const ShaderEntries& entries); void SetupComputeTextures(const ShaderEntries& entries);
/// Setup storage texels in the compute pipeline.
void SetupComputeStorageTexels(const ShaderEntries& entries);
/// Setup images in the compute pipeline. /// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries); void SetupComputeImages(const ShaderEntries& entries);
@ -222,10 +228,12 @@ private:
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry); void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);

View file

@ -400,8 +400,9 @@ private:
u32 binding = specialization.base_binding; u32 binding = specialization.base_binding;
binding = DeclareConstantBuffers(binding); binding = DeclareConstantBuffers(binding);
binding = DeclareGlobalBuffers(binding); binding = DeclareGlobalBuffers(binding);
binding = DeclareTexelBuffers(binding); binding = DeclareUniformTexels(binding);
binding = DeclareSamplers(binding); binding = DeclareSamplers(binding);
binding = DeclareStorageTexels(binding);
binding = DeclareImages(binding); binding = DeclareImages(binding);
const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
@ -889,7 +890,7 @@ private:
return binding; return binding;
} }
u32 DeclareTexelBuffers(u32 binding) { u32 DeclareUniformTexels(u32 binding) {
for (const auto& sampler : ir.GetSamplers()) { for (const auto& sampler : ir.GetSamplers()) {
if (!sampler.is_buffer) { if (!sampler.is_buffer) {
continue; continue;
@ -910,7 +911,7 @@ private:
Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id}); uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
} }
return binding; return binding;
} }
@ -945,31 +946,48 @@ private:
return binding; return binding;
} }
u32 DeclareImages(u32 binding) { u32 DeclareStorageTexels(u32 binding) {
for (const auto& image : ir.GetImages()) { for (const auto& image : ir.GetImages()) {
const auto [dim, arrayed] = GetImageDim(image); if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
constexpr int depth = 0; continue;
constexpr bool ms = false;
constexpr int sampled = 2; // This won't be accessed with a sampler
constexpr auto format = spv::ImageFormat::Unknown;
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
if (image.is_read && !image.is_written) {
Decorate(id, spv::Decoration::NonWritable);
} else if (image.is_written && !image.is_read) {
Decorate(id, spv::Decoration::NonReadable);
} }
DeclareImage(image, binding);
images.emplace(image.index, StorageImage{image_type, id});
} }
return binding; return binding;
} }
u32 DeclareImages(u32 binding) {
for (const auto& image : ir.GetImages()) {
if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
continue;
}
DeclareImage(image, binding);
}
return binding;
}
void DeclareImage(const Image& image, u32& binding) {
const auto [dim, arrayed] = GetImageDim(image);
constexpr int depth = 0;
constexpr bool ms = false;
constexpr int sampled = 2; // This won't be accessed with a sampler
const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
if (image.is_read && !image.is_written) {
Decorate(id, spv::Decoration::NonWritable);
} else if (image.is_written && !image.is_read) {
Decorate(id, spv::Decoration::NonReadable);
}
images.emplace(image.index, StorageImage{image_type, id});
}
bool IsRenderTargetEnabled(u32 rt) const { bool IsRenderTargetEnabled(u32 rt) const {
for (u32 component = 0; component < 4; ++component) { for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(rt, component)) { if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
@ -1256,7 +1274,7 @@ private:
} else { } else {
UNREACHABLE_MSG("Unmanaged offset node type"); UNREACHABLE_MSG("Unmanaged offset node type");
} }
pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index, pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
buffer_element); buffer_element);
} }
return {OpLoad(t_float, pointer), Type::Float}; return {OpLoad(t_float, pointer), Type::Float};
@ -1611,7 +1629,7 @@ private:
const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
const Id carry = OpCompositeExtract(t_uint, result, 1); const Id carry = OpCompositeExtract(t_uint, result, 1);
return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool}; return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
} }
Expression LogicalAssign(Operation operation) { Expression LogicalAssign(Operation operation) {
@ -1674,7 +1692,7 @@ private:
const auto& meta = std::get<MetaTexture>(operation.GetMeta()); const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 index = meta.sampler.index; const u32 index = meta.sampler.index;
if (meta.sampler.is_buffer) { if (meta.sampler.is_buffer) {
const auto& entry = texel_buffers.at(index); const auto& entry = uniform_texels.at(index);
return OpLoad(entry.image_type, entry.image); return OpLoad(entry.image_type, entry.image);
} else { } else {
const auto& entry = sampled_images.at(index); const auto& entry = sampled_images.at(index);
@ -1951,39 +1969,20 @@ private:
return {}; return {};
} }
Expression AtomicImageAdd(Operation operation) { template <Id (Module::*func)(Id, Id, Id, Id, Id)>
UNIMPLEMENTED(); Expression AtomicImage(Operation operation) {
return {}; const auto& meta{std::get<MetaImage>(operation.GetMeta())};
} ASSERT(meta.values.size() == 1);
Expression AtomicImageMin(Operation operation) { const Id coordinate = GetCoordinates(operation, Type::Int);
UNIMPLEMENTED(); const Id image = images.at(meta.image.index).image;
return {}; const Id sample = v_uint_zero;
} const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
Expression AtomicImageMax(Operation operation) { const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
UNIMPLEMENTED(); const Id semantics = v_uint_zero;
return {}; const Id value = AsUint(Visit(meta.values[0]));
} return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
Expression AtomicImageAnd(Operation operation) {
UNIMPLEMENTED();
return {};
}
Expression AtomicImageOr(Operation operation) {
UNIMPLEMENTED();
return {};
}
Expression AtomicImageXor(Operation operation) {
UNIMPLEMENTED();
return {};
}
Expression AtomicImageExchange(Operation operation) {
UNIMPLEMENTED();
return {};
} }
template <Id (Module::*func)(Id, Id, Id, Id, Id)> template <Id (Module::*func)(Id, Id, Id, Id, Id)>
@ -1998,7 +1997,7 @@ private:
return {v_float_zero, Type::Float}; return {v_float_zero, Type::Float};
} }
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
const Id semantics = Constant(t_uint, 0); const Id semantics = v_uint_zero;
const Id value = AsUint(Visit(operation[1])); const Id value = AsUint(Visit(operation[1]));
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
@ -2622,11 +2621,11 @@ private:
&SPIRVDecompiler::ImageLoad, &SPIRVDecompiler::ImageLoad,
&SPIRVDecompiler::ImageStore, &SPIRVDecompiler::ImageStore,
&SPIRVDecompiler::AtomicImageAdd, &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::AtomicImageAnd, &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
&SPIRVDecompiler::AtomicImageOr, &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
&SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
&SPIRVDecompiler::AtomicImageExchange, &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
@ -2768,8 +2767,11 @@ private:
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
const Id v_float_zero = Constant(t_float, 0.0f); const Id v_float_zero = Constant(t_float, 0.0f);
const Id v_float_one = Constant(t_float, 1.0f); const Id v_float_one = Constant(t_float, 1.0f);
const Id v_uint_zero = Constant(t_uint, 0);
// Nvidia uses these defaults for varyings (e.g. position and generic attributes) // Nvidia uses these defaults for varyings (e.g. position and generic attributes)
const Id v_varying_default = const Id v_varying_default =
@ -2794,15 +2796,16 @@ private:
std::unordered_map<u8, GenericVaryingDescription> output_attributes; std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers; std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers; std::map<GlobalMemoryBase, Id> global_buffers;
std::map<u32, TexelBuffer> texel_buffers; std::map<u32, TexelBuffer> uniform_texels;
std::map<u32, SampledImage> sampled_images; std::map<u32, SampledImage> sampled_images;
std::map<u32, TexelBuffer> storage_texels;
std::map<u32, StorageImage> images; std::map<u32, StorageImage> images;
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id instance_index{}; Id instance_index{};
Id vertex_index{}; Id vertex_index{};
Id base_instance{}; Id base_instance{};
Id base_vertex{}; Id base_vertex{};
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id frag_depth{}; Id frag_depth{};
Id frag_coord{}; Id frag_coord{};
Id front_facing{}; Id front_facing{};
@ -3058,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
} }
for (const auto& sampler : ir.GetSamplers()) { for (const auto& sampler : ir.GetSamplers()) {
if (sampler.is_buffer) { if (sampler.is_buffer) {
entries.texel_buffers.emplace_back(sampler); entries.uniform_texels.emplace_back(sampler);
} else { } else {
entries.samplers.emplace_back(sampler); entries.samplers.emplace_back(sampler);
} }
} }
for (const auto& image : ir.GetImages()) { for (const auto& image : ir.GetImages()) {
entries.images.emplace_back(image); if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
entries.storage_texels.emplace_back(image);
} else {
entries.images.emplace_back(image);
}
} }
for (const auto& attribute : ir.GetInputAttributes()) { for (const auto& attribute : ir.GetInputAttributes()) {
if (IsGenericAttribute(attribute)) { if (IsGenericAttribute(attribute)) {

View file

@ -21,8 +21,9 @@ class VKDevice;
namespace Vulkan { namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using TexelBufferEntry = VideoCommon::Shader::Sampler; using UniformTexelEntry = VideoCommon::Shader::Sampler;
using SamplerEntry = VideoCommon::Shader::Sampler; using SamplerEntry = VideoCommon::Shader::Sampler;
using StorageTexelEntry = VideoCommon::Shader::Image;
using ImageEntry = VideoCommon::Shader::Image; using ImageEntry = VideoCommon::Shader::Image;
constexpr u32 DESCRIPTOR_SET = 0; constexpr u32 DESCRIPTOR_SET = 0;
@ -66,13 +67,15 @@ private:
struct ShaderEntries { struct ShaderEntries {
u32 NumBindings() const { u32 NumBindings() const {
return static_cast<u32>(const_buffers.size() + global_buffers.size() + return static_cast<u32>(const_buffers.size() + global_buffers.size() +
texel_buffers.size() + samplers.size() + images.size()); uniform_texels.size() + samplers.size() + storage_texels.size() +
images.size());
} }
std::vector<ConstBufferEntry> const_buffers; std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalBufferEntry> global_buffers; std::vector<GlobalBufferEntry> global_buffers;
std::vector<TexelBufferEntry> texel_buffers; std::vector<UniformTexelEntry> uniform_texels;
std::vector<SamplerEntry> samplers; std::vector<SamplerEntry> samplers;
std::vector<StorageTexelEntry> storage_texels;
std::vector<ImageEntry> images; std::vector<ImageEntry> images;
std::set<u32> attributes; std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{}; std::array<bool, Maxwell::NumClipDistances> clip_distances{};

View file

@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
ci.pNext = nullptr; ci.pNext = nullptr;
ci.flags = 0; ci.flags = 0;
ci.size = static_cast<VkDeviceSize>(host_memory_size); ci.size = static_cast<VkDeviceSize>(host_memory_size);
ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT; VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
ci.queueFamilyIndexCount = 0; ci.queueFamilyIndexCount = 0;
ci.pQueueFamilyIndices = nullptr; ci.pQueueFamilyIndices = nullptr;