diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index ebe1395046..f46e81bb7c 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -93,6 +93,7 @@ public: virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const = 0; + virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; virtual u32 GetBoundBuffer() const = 0; virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index f6237fc6a2..a82b06a382 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con ASSERT(stage == ShaderType::Compute); const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; + return AccessSampler(memory_manager.Read(tex_info_address)); +} - const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; +SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { + const Texture::TextureHandle tex_handle{handle}; const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 18ceedfaf5..b7f668d887 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -219,6 +219,8 @@ public: SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const override; + SamplerDescriptor AccessSampler(u32 handle) const override; + u32 GetBoundBuffer() const override { return regs.tex_cb_index; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 13ef2e42db..d539ae7b18 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -743,8 +743,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b const auto& shader = state.shader_stages[static_cast(stage)]; const auto& tex_info_buffer = shader.const_buffers[const_buffer]; const GPUVAddr tex_info_address = tex_info_buffer.address + offset; + return AccessSampler(memory_manager.Read(tex_info_address)); +} - const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; +SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { + const Texture::TextureHandle tex_handle{handle}; const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 05dd6b39bf..c5f1b04999 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1403,6 +1403,8 @@ public: SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const override; + SamplerDescriptor AccessSampler(u32 handle) const override; + u32 GetBoundBuffer() const override { return regs.tex_cb_index; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 55e79aaf65..aedcdcb783 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -65,10 +65,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16; template Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, ShaderType shader_type, std::size_t index = 0) { - if (entry.is_bindless) { - const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(tex_handle); + if constexpr (std::is_same_v) { + if (entry.is_separated) { + const u32 buffer_1 = entry.buffer; + const u32 buffer_2 = entry.secondary_buffer; + const u32 offset_1 = entry.offset; + const u32 offset_2 = entry.secondary_offset; + const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); + const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); + return engine.GetTextureInfo(handle_1 | handle_2); + } } + if (entry.is_bindless) { + const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); + return engine.GetTextureInfo(handle); + } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); const u32 offset = entry.offset + static_cast(index * gpu_profile.GetTextureHandlerSize()); if constexpr (std::is_same_v) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 9e95a122b2..653c3f2f96 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap; namespace { +using VideoCommon::Shader::SeparateSamplerKey; + using ShaderCacheVersionHash = std::array; struct ConstBufferKey { @@ -37,18 +39,26 @@ struct ConstBufferKey { u32 value = 0; }; -struct BoundSamplerKey { +struct BoundSamplerEntry { u32 offset = 0; Tegra::Engines::SamplerDescriptor sampler; }; -struct BindlessSamplerKey { +struct SeparateSamplerEntry { + u32 cbuf1 = 0; + u32 cbuf2 = 0; + u32 offset1 = 0; + u32 offset2 = 0; + Tegra::Engines::SamplerDescriptor sampler; +}; + +struct BindlessSamplerEntry { u32 cbuf = 0; u32 offset = 0; Tegra::Engines::SamplerDescriptor sampler; }; -constexpr u32 NativeVersion = 20; +constexpr u32 NativeVersion = 21; ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; @@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { u32 texture_handler_size_value; u32 num_keys; u32 num_bound_samplers; + u32 num_separate_samplers; u32 num_bindless_samplers; if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || file.ReadArray(&is_texture_handler_size_known, 1) != 1 || file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || + file.ReadArray(&num_separate_samplers, 1) != 1 || file.ReadArray(&num_bindless_samplers, 1) != 1) { return false; } @@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { } std::vector flat_keys(num_keys); - std::vector flat_bound_samplers(num_bound_samplers); - std::vector flat_bindless_samplers(num_bindless_samplers); + std::vector flat_bound_samplers(num_bound_samplers); + std::vector flat_separate_samplers(num_separate_samplers); + std::vector flat_bindless_samplers(num_bindless_samplers); if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != flat_bound_samplers.size() || + file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) != + flat_separate_samplers.size() || file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != flat_bindless_samplers.size()) { return false; } - for (const auto& key : flat_keys) { - keys.insert({{key.cbuf, key.offset}, key.value}); + for (const auto& entry : flat_keys) { + keys.insert({{entry.cbuf, entry.offset}, entry.value}); } - for (const auto& key : flat_bound_samplers) { - bound_samplers.emplace(key.offset, key.sampler); + for (const auto& entry : flat_bound_samplers) { + bound_samplers.emplace(entry.offset, entry.sampler); } - for (const auto& key : flat_bindless_samplers) { - bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); + for (const auto& entry : flat_separate_samplers) { + SeparateSamplerKey key; + key.buffers = {entry.cbuf1, entry.cbuf2}; + key.offsets = {entry.offset1, entry.offset2}; + separate_samplers.emplace(key, entry.sampler); + } + for (const auto& entry : flat_bindless_samplers) { + bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); } return true; @@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || file.WriteObject(static_cast(keys.size())) != 1 || file.WriteObject(static_cast(bound_samplers.size())) != 1 || + file.WriteObject(static_cast(separate_samplers.size())) != 1 || file.WriteObject(static_cast(bindless_samplers.size())) != 1) { return false; } @@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); } - std::vector flat_bound_samplers; + std::vector flat_bound_samplers; flat_bound_samplers.reserve(bound_samplers.size()); for (const auto& [address, sampler] : bound_samplers) { - flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); + flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); } - std::vector flat_bindless_samplers; + std::vector flat_separate_samplers; + flat_separate_samplers.reserve(separate_samplers.size()); + for (const auto& [key, sampler] : separate_samplers) { + SeparateSamplerEntry entry; + std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; + std::tie(entry.offset1, entry.offset2) = key.offsets; + entry.sampler = sampler; + flat_separate_samplers.push_back(entry); + } + + std::vector flat_bindless_samplers; flat_bindless_samplers.reserve(bindless_samplers.size()); for (const auto& [address, sampler] : bindless_samplers) { flat_bindless_samplers.push_back( - BindlessSamplerKey{address.first, address.second, sampler}); + BindlessSamplerEntry{address.first, address.second, sampler}); } return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == flat_bound_samplers.size() && + file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) == + flat_separate_samplers.size() && file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == flat_bindless_samplers.size(); } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index d5be52e401..a79cef0e9c 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry { VideoCommon::Shader::ComputeInfo compute_info; VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::BoundSamplerMap bound_samplers; + VideoCommon::Shader::SeparateSamplerMap separate_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a3d992ed3f..42a20530d4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -117,6 +117,17 @@ template Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, std::size_t stage, std::size_t index = 0) { const auto stage_type = static_cast(stage); + if constexpr (std::is_same_v) { + if (entry.is_separated) { + const u32 buffer_1 = entry.buffer; + const u32 buffer_2 = entry.secondary_buffer; + const u32 offset_1 = entry.offset; + const u32 offset_2 = entry.secondary_offset; + const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); + const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); + return engine.GetTextureInfo(handle_1 | handle_2); + } + } if (entry.is_bindless) { const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); return engine.GetTextureInfo(tex_handle); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 8f0bb996ec..29ebf65bac 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, - std::optional buffer) { +ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( + SamplerInfo info, std::optional sampler) { if (info.IsComplete()) { return info; } - const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) - : registry.ObtainBoundSampler(offset); if (!sampler) { LOG_WARNING(HW_GPU, "Unknown sampler info"); info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); @@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo sampler_info) { - const auto offset = static_cast(sampler.index.Value()); - const auto info = GetSamplerInfo(sampler_info, offset); + const u32 offset = static_cast(sampler.index.Value()); + const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), @@ -404,20 +402,19 @@ std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, const Node sampler_register = GetRegister(reg); const auto [base_node, tracked_sampler_info] = TrackBindlessSampler(sampler_register, global_code, static_cast(global_code.size())); - ASSERT(base_node != nullptr); - if (base_node == nullptr) { + if (!base_node) { + UNREACHABLE(); return std::nullopt; } - if (const auto bindless_sampler_info = - std::get_if(&*tracked_sampler_info)) { - const u32 buffer = bindless_sampler_info->GetIndex(); - const u32 offset = bindless_sampler_info->GetOffset(); - info = GetSamplerInfo(info, offset, buffer); + if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { + const u32 buffer = sampler_info->index; + const u32 offset = sampler_info->offset; + info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer = buffer, offset = offset](const Sampler& entry) { + [buffer, offset](const Sampler& entry) { return entry.buffer == buffer && entry.offset == offset; }); if (it != used_samplers.end()) { @@ -431,10 +428,32 @@ std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, *info.is_shadow, *info.is_buffer, false); } - if (const auto array_sampler_info = std::get_if(&*tracked_sampler_info)) { - const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; - index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); - info = GetSamplerInfo(info, base_offset); + if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { + const std::pair indices = sampler_info->indices; + const std::pair offsets = sampler_info->offsets; + info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); + + // Try to use an already created sampler if it exists + const auto it = std::find_if( + used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { + return offsets == std::pair{entry.offset, entry.secondary_offset} && + indices == std::pair{entry.buffer, entry.secondary_buffer}; + }); + if (it != used_samplers.end()) { + ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && + it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); + return *it; + } + + // Otherwise create a new mapping for this sampler + const u32 next_index = static_cast(used_samplers.size()); + return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, + *info.is_shadow, *info.is_buffer); + } + if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { + const u32 base_offset = sampler_info->base_offset / 4; + index_var = GetCustomVariable(sampler_info->bindless_var); + info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if( diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index c5e5165ff8..8f230d57a6 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -275,10 +275,11 @@ using Node = std::shared_ptr; using Node4 = std::array; using NodeBlock = std::vector; -class BindlessSamplerNode; -class ArraySamplerNode; +struct ArraySamplerNode; +struct BindlessSamplerNode; +struct SeparateSamplerNode; -using TrackSamplerData = std::variant; +using TrackSamplerData = std::variant; using TrackSampler = std::shared_ptr; struct Sampler { @@ -288,63 +289,51 @@ struct Sampler { : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_buffer{is_buffer}, is_indexed{is_indexed} {} + /// Separate sampler constructor + constexpr explicit Sampler(u32 index, std::pair offsets, std::pair buffers, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow, + bool is_buffer) + : index{index}, offset{offsets.first}, secondary_offset{offsets.second}, + buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array}, + is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {} + /// Bindless samplers constructor constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} - u32 index = 0; ///< Emulated index given for the this sampler. - u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. - u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers). - u32 size = 1; ///< Size of the sampler. + u32 index = 0; ///< Emulated index given for the this sampler. + u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. + u32 secondary_offset = 0; ///< Secondary offset in the const buffer. + u32 buffer = 0; ///< Buffer where the bindless sampler is read. + u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. + u32 size = 1; ///< Size of the sampler. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. - bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. - bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. - bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. + bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. + bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. + bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. + bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. + bool is_separated = false; ///< Whether the image and sampler is separated or not. }; /// Represents a tracked bindless sampler into a direct const buffer -class ArraySamplerNode final { -public: - explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) - : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} - - constexpr u32 GetIndex() const { - return index; - } - - constexpr u32 GetBaseOffset() const { - return base_offset; - } - - constexpr u32 GetIndexVar() const { - return bindless_var; - } - -private: +struct ArraySamplerNode { u32 index; u32 base_offset; u32 bindless_var; }; +/// Represents a tracked separate sampler image pair that was folded statically +struct SeparateSamplerNode { + std::pair indices; + std::pair offsets; +}; + /// Represents a tracked bindless sampler into a direct const buffer -class BindlessSamplerNode final { -public: - explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} - - constexpr u32 GetIndex() const { - return index; - } - - constexpr u32 GetOffset() const { - return offset; - } - -private: +struct BindlessSamplerNode { u32 index; u32 offset; }; diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 11231bbea8..1e0886185d 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) { template TrackSampler MakeTrackSampler(Args&&... args) { static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); + return std::make_shared(T{std::forward(args)...}); } template diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index af70b3f357..cdf274e544 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -93,6 +93,26 @@ std::optional Registry::ObtainBoundSampler(u32 offset) { return value; } +std::optional Registry::ObtainSeparateSampler( + std::pair buffers, std::pair offsets) { + SeparateSamplerKey key; + key.buffers = buffers; + key.offsets = offsets; + const auto iter = separate_samplers.find(key); + if (iter != separate_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + + const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); + const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); + const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); + separate_samplers.emplace(key, value); + return value; +} + std::optional Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { const std::pair key = {buffer, offset}; diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 0c80d35fda..2312067657 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -19,8 +19,39 @@ namespace VideoCommon::Shader { +struct SeparateSamplerKey { + std::pair buffers; + std::pair offsets; +}; + +} // namespace VideoCommon::Shader + +namespace std { + +template <> +struct hash { + std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { + return std::hash{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ + key.offsets.second); + } +}; + +template <> +struct equal_to { + bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, + const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { + return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; + } +}; + +} // namespace std + +namespace VideoCommon::Shader { + using KeyMap = std::unordered_map, u32, Common::PairHash>; using BoundSamplerMap = std::unordered_map; +using SeparateSamplerMap = + std::unordered_map; using BindlessSamplerMap = std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; @@ -73,6 +104,9 @@ public: std::optional ObtainBoundSampler(u32 offset); + std::optional ObtainSeparateSampler( + std::pair buffers, std::pair offsets); + std::optional ObtainBindlessSampler(u32 buffer, u32 offset); /// Inserts a key. @@ -128,6 +162,7 @@ private: Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; KeyMap keys; BoundSamplerMap bound_samplers; + SeparateSamplerMap separate_samplers; BindlessSamplerMap bindless_samplers; u32 bound_buffer; GraphicsInfo graphics_info; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 8f522edf0a..3a98b2104f 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -330,8 +330,8 @@ private: OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); /// Queries the missing sampler info from the execution context. - SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset, - std::optional buffer = std::nullopt); + SamplerInfo GetSamplerInfo(SamplerInfo info, + std::optional sampler); /// Accesses a texture sampler. std::optional GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 435f4facbc..d5ed81442f 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -64,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { if (const auto operation = std::get_if(&*node)) { operation->SetAmendIndex(amend_index); return true; - } else if (const auto conditional = std::get_if(&*node)) { + } + if (const auto conditional = std::get_if(&*node)) { conditional->SetAmendIndex(amend_index); return true; } @@ -110,10 +111,23 @@ std::pair ShaderIR::TrackBindlessSampler(Node tracked, const return TrackBindlessSampler(source, code, new_cursor); } if (const auto operation = std::get_if(&*tracked)) { - for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); - std::get<0>(found)) { - // Cbuf found in operand. + const OperationNode& op = *operation; + + const OperationCode opcode = operation->GetCode(); + if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { + ASSERT(op.GetOperandsCount() == 2); + auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); + auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); + if (node_a && node_b) { + auto track = MakeTrackSampler(std::pair{index_a, index_b}, + std::pair{offset_a, offset_b}); + return {tracked, std::move(track)}; + } + } + std::size_t i = op.GetOperandsCount(); + while (i--) { + if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { + // Constant buffer found in operand. return found; } }