shader/texture: Join separate image and sampler pairs offline
Games using D3D idioms can join images and samplers when a shader executes, instead of baking them into a combined sampler image. This is also possible on Vulkan. One approach to this solution would be to use separate samplers on Vulkan and leave this unimplemented on OpenGL, but we can't do this because there's no consistent way of determining which constant buffer holds a sampler and which one an image. We could in theory find the first bit and if it's in the TIC area, it's an image; but this falls apart when an image or sampler handle use an index of zero. The used approach is to track for a LOP.OR operation (this is done at an IR level, not at an ISA level), track again the constant buffers used as source and store this pair. Then, outside of shader execution, join the sample and image pair with a bitwise or operation. This approach won't work on games that truly use separate samplers in a meaningful way. For example, pooling textures in a 2D array and determining at runtime what sampler to use. This invalidates OpenGL's disk shader cache :) - Used mostly by D3D ports to Switch
This commit is contained in:
parent
e1438f8e91
commit
5b2b6d594c
16 changed files with 235 additions and 89 deletions
|
@ -93,6 +93,7 @@ public:
|
||||||
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
|
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
|
||||||
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||||
u64 offset) const = 0;
|
u64 offset) const = 0;
|
||||||
|
virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
|
||||||
virtual u32 GetBoundBuffer() const = 0;
|
virtual u32 GetBoundBuffer() const = 0;
|
||||||
|
|
||||||
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
||||||
|
|
|
@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
|
||||||
ASSERT(stage == ShaderType::Compute);
|
ASSERT(stage == ShaderType::Compute);
|
||||||
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
|
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
|
||||||
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
|
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
|
||||||
|
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||||
|
}
|
||||||
|
|
||||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
|
||||||
|
const Texture::TextureHandle tex_handle{handle};
|
||||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||||
|
|
|
@ -219,6 +219,8 @@ public:
|
||||||
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||||
u64 offset) const override;
|
u64 offset) const override;
|
||||||
|
|
||||||
|
SamplerDescriptor AccessSampler(u32 handle) const override;
|
||||||
|
|
||||||
u32 GetBoundBuffer() const override {
|
u32 GetBoundBuffer() const override {
|
||||||
return regs.tex_cb_index;
|
return regs.tex_cb_index;
|
||||||
}
|
}
|
||||||
|
|
|
@ -743,8 +743,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
|
||||||
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
|
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
|
||||||
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
|
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
|
||||||
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
|
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
|
||||||
|
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||||
|
}
|
||||||
|
|
||||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
|
||||||
|
const Texture::TextureHandle tex_handle{handle};
|
||||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||||
|
|
|
@ -1403,6 +1403,8 @@ public:
|
||||||
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||||
u64 offset) const override;
|
u64 offset) const override;
|
||||||
|
|
||||||
|
SamplerDescriptor AccessSampler(u32 handle) const override;
|
||||||
|
|
||||||
u32 GetBoundBuffer() const override {
|
u32 GetBoundBuffer() const override {
|
||||||
return regs.tex_cb_index;
|
return regs.tex_cb_index;
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,10 +65,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16;
|
||||||
template <typename Engine, typename Entry>
|
template <typename Engine, typename Entry>
|
||||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||||
ShaderType shader_type, std::size_t index = 0) {
|
ShaderType shader_type, std::size_t index = 0) {
|
||||||
if (entry.is_bindless) {
|
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||||
const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
if (entry.is_separated) {
|
||||||
return engine.GetTextureInfo(tex_handle);
|
const u32 buffer_1 = entry.buffer;
|
||||||
|
const u32 buffer_2 = entry.secondary_buffer;
|
||||||
|
const u32 offset_1 = entry.offset;
|
||||||
|
const u32 offset_2 = entry.secondary_offset;
|
||||||
|
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
|
||||||
|
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
|
||||||
|
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if (entry.is_bindless) {
|
||||||
|
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||||
|
return engine.GetTextureInfo(handle);
|
||||||
|
}
|
||||||
|
|
||||||
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
||||||
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
||||||
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
||||||
|
|
|
@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
using VideoCommon::Shader::SeparateSamplerKey;
|
||||||
|
|
||||||
using ShaderCacheVersionHash = std::array<u8, 64>;
|
using ShaderCacheVersionHash = std::array<u8, 64>;
|
||||||
|
|
||||||
struct ConstBufferKey {
|
struct ConstBufferKey {
|
||||||
|
@ -37,18 +39,26 @@ struct ConstBufferKey {
|
||||||
u32 value = 0;
|
u32 value = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BoundSamplerKey {
|
struct BoundSamplerEntry {
|
||||||
u32 offset = 0;
|
u32 offset = 0;
|
||||||
Tegra::Engines::SamplerDescriptor sampler;
|
Tegra::Engines::SamplerDescriptor sampler;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BindlessSamplerKey {
|
struct SeparateSamplerEntry {
|
||||||
|
u32 cbuf1 = 0;
|
||||||
|
u32 cbuf2 = 0;
|
||||||
|
u32 offset1 = 0;
|
||||||
|
u32 offset2 = 0;
|
||||||
|
Tegra::Engines::SamplerDescriptor sampler;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BindlessSamplerEntry {
|
||||||
u32 cbuf = 0;
|
u32 cbuf = 0;
|
||||||
u32 offset = 0;
|
u32 offset = 0;
|
||||||
Tegra::Engines::SamplerDescriptor sampler;
|
Tegra::Engines::SamplerDescriptor sampler;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr u32 NativeVersion = 20;
|
constexpr u32 NativeVersion = 21;
|
||||||
|
|
||||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||||
ShaderCacheVersionHash hash{};
|
ShaderCacheVersionHash hash{};
|
||||||
|
@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
||||||
u32 texture_handler_size_value;
|
u32 texture_handler_size_value;
|
||||||
u32 num_keys;
|
u32 num_keys;
|
||||||
u32 num_bound_samplers;
|
u32 num_bound_samplers;
|
||||||
|
u32 num_separate_samplers;
|
||||||
u32 num_bindless_samplers;
|
u32 num_bindless_samplers;
|
||||||
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
||||||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
||||||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
|
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
|
||||||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
|
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
|
||||||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
||||||
|
file.ReadArray(&num_separate_samplers, 1) != 1 ||
|
||||||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ConstBufferKey> flat_keys(num_keys);
|
std::vector<ConstBufferKey> flat_keys(num_keys);
|
||||||
std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
|
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
|
||||||
std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
|
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
|
||||||
|
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
|
||||||
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
|
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
|
||||||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
|
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
|
||||||
flat_bound_samplers.size() ||
|
flat_bound_samplers.size() ||
|
||||||
|
file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
|
||||||
|
flat_separate_samplers.size() ||
|
||||||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
|
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
|
||||||
flat_bindless_samplers.size()) {
|
flat_bindless_samplers.size()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (const auto& key : flat_keys) {
|
for (const auto& entry : flat_keys) {
|
||||||
keys.insert({{key.cbuf, key.offset}, key.value});
|
keys.insert({{entry.cbuf, entry.offset}, entry.value});
|
||||||
}
|
}
|
||||||
for (const auto& key : flat_bound_samplers) {
|
for (const auto& entry : flat_bound_samplers) {
|
||||||
bound_samplers.emplace(key.offset, key.sampler);
|
bound_samplers.emplace(entry.offset, entry.sampler);
|
||||||
}
|
}
|
||||||
for (const auto& key : flat_bindless_samplers) {
|
for (const auto& entry : flat_separate_samplers) {
|
||||||
bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
|
SeparateSamplerKey key;
|
||||||
|
key.buffers = {entry.cbuf1, entry.cbuf2};
|
||||||
|
key.offsets = {entry.offset1, entry.offset2};
|
||||||
|
separate_samplers.emplace(key, entry.sampler);
|
||||||
|
}
|
||||||
|
for (const auto& entry : flat_bindless_samplers) {
|
||||||
|
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
||||||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
|
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
||||||
|
file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
||||||
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<BoundSamplerKey> flat_bound_samplers;
|
std::vector<BoundSamplerEntry> flat_bound_samplers;
|
||||||
flat_bound_samplers.reserve(bound_samplers.size());
|
flat_bound_samplers.reserve(bound_samplers.size());
|
||||||
for (const auto& [address, sampler] : bound_samplers) {
|
for (const auto& [address, sampler] : bound_samplers) {
|
||||||
flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
|
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<BindlessSamplerKey> flat_bindless_samplers;
|
std::vector<SeparateSamplerEntry> flat_separate_samplers;
|
||||||
|
flat_separate_samplers.reserve(separate_samplers.size());
|
||||||
|
for (const auto& [key, sampler] : separate_samplers) {
|
||||||
|
SeparateSamplerEntry entry;
|
||||||
|
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
|
||||||
|
std::tie(entry.offset1, entry.offset2) = key.offsets;
|
||||||
|
entry.sampler = sampler;
|
||||||
|
flat_separate_samplers.push_back(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
|
||||||
flat_bindless_samplers.reserve(bindless_samplers.size());
|
flat_bindless_samplers.reserve(bindless_samplers.size());
|
||||||
for (const auto& [address, sampler] : bindless_samplers) {
|
for (const auto& [address, sampler] : bindless_samplers) {
|
||||||
flat_bindless_samplers.push_back(
|
flat_bindless_samplers.push_back(
|
||||||
BindlessSamplerKey{address.first, address.second, sampler});
|
BindlessSamplerEntry{address.first, address.second, sampler});
|
||||||
}
|
}
|
||||||
|
|
||||||
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
|
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
|
||||||
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
|
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
|
||||||
flat_bound_samplers.size() &&
|
flat_bound_samplers.size() &&
|
||||||
|
file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
|
||||||
|
flat_separate_samplers.size() &&
|
||||||
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
|
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
|
||||||
flat_bindless_samplers.size();
|
flat_bindless_samplers.size();
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
|
||||||
VideoCommon::Shader::ComputeInfo compute_info;
|
VideoCommon::Shader::ComputeInfo compute_info;
|
||||||
VideoCommon::Shader::KeyMap keys;
|
VideoCommon::Shader::KeyMap keys;
|
||||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||||
|
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
|
||||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -117,6 +117,17 @@ template <typename Engine, typename Entry>
|
||||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||||
std::size_t stage, std::size_t index = 0) {
|
std::size_t stage, std::size_t index = 0) {
|
||||||
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
|
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
|
||||||
|
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||||
|
if (entry.is_separated) {
|
||||||
|
const u32 buffer_1 = entry.buffer;
|
||||||
|
const u32 buffer_2 = entry.secondary_buffer;
|
||||||
|
const u32 offset_1 = entry.offset;
|
||||||
|
const u32 offset_2 = entry.secondary_offset;
|
||||||
|
const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
|
||||||
|
const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
|
||||||
|
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (entry.is_bindless) {
|
if (entry.is_bindless) {
|
||||||
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
|
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
|
||||||
return engine.GetTextureInfo(tex_handle);
|
return engine.GetTextureInfo(tex_handle);
|
||||||
|
|
|
@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
return pc;
|
return pc;
|
||||||
}
|
}
|
||||||
|
|
||||||
ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
|
ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
|
||||||
std::optional<u32> buffer) {
|
SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
|
||||||
if (info.IsComplete()) {
|
if (info.IsComplete()) {
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
|
|
||||||
: registry.ObtainBoundSampler(offset);
|
|
||||||
if (!sampler) {
|
if (!sampler) {
|
||||||
LOG_WARNING(HW_GPU, "Unknown sampler info");
|
LOG_WARNING(HW_GPU, "Unknown sampler info");
|
||||||
info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
|
info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
|
||||||
|
@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
|
||||||
|
|
||||||
std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
|
std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
|
||||||
SamplerInfo sampler_info) {
|
SamplerInfo sampler_info) {
|
||||||
const auto offset = static_cast<u32>(sampler.index.Value());
|
const u32 offset = static_cast<u32>(sampler.index.Value());
|
||||||
const auto info = GetSamplerInfo(sampler_info, offset);
|
const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
|
||||||
|
|
||||||
// If this sampler has already been used, return the existing mapping.
|
// If this sampler has already been used, return the existing mapping.
|
||||||
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
|
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||||
|
@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
||||||
const Node sampler_register = GetRegister(reg);
|
const Node sampler_register = GetRegister(reg);
|
||||||
const auto [base_node, tracked_sampler_info] =
|
const auto [base_node, tracked_sampler_info] =
|
||||||
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
|
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
|
||||||
ASSERT(base_node != nullptr);
|
if (!base_node) {
|
||||||
if (base_node == nullptr) {
|
UNREACHABLE();
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto bindless_sampler_info =
|
if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
|
||||||
std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
|
const u32 buffer = sampler_info->index;
|
||||||
const u32 buffer = bindless_sampler_info->GetIndex();
|
const u32 offset = sampler_info->offset;
|
||||||
const u32 offset = bindless_sampler_info->GetOffset();
|
info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
|
||||||
info = GetSamplerInfo(info, offset, buffer);
|
|
||||||
|
|
||||||
// If this sampler has already been used, return the existing mapping.
|
// If this sampler has already been used, return the existing mapping.
|
||||||
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
|
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||||
[buffer = buffer, offset = offset](const Sampler& entry) {
|
[buffer, offset](const Sampler& entry) {
|
||||||
return entry.buffer == buffer && entry.offset == offset;
|
return entry.buffer == buffer && entry.offset == offset;
|
||||||
});
|
});
|
||||||
if (it != used_samplers.end()) {
|
if (it != used_samplers.end()) {
|
||||||
|
@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
||||||
return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
|
return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
|
||||||
*info.is_shadow, *info.is_buffer, false);
|
*info.is_shadow, *info.is_buffer, false);
|
||||||
}
|
}
|
||||||
if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
|
if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
|
||||||
const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
|
const std::pair indices = sampler_info->indices;
|
||||||
index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
|
const std::pair offsets = sampler_info->offsets;
|
||||||
info = GetSamplerInfo(info, base_offset);
|
info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
|
||||||
|
|
||||||
|
// Try to use an already created sampler if it exists
|
||||||
|
const auto it = std::find_if(
|
||||||
|
used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
|
||||||
|
return offsets == std::pair{entry.offset, entry.secondary_offset} &&
|
||||||
|
indices == std::pair{entry.buffer, entry.secondary_buffer};
|
||||||
|
});
|
||||||
|
if (it != used_samplers.end()) {
|
||||||
|
ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
|
||||||
|
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
|
||||||
|
return *it;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise create a new mapping for this sampler
|
||||||
|
const u32 next_index = static_cast<u32>(used_samplers.size());
|
||||||
|
return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
|
||||||
|
*info.is_shadow, *info.is_buffer);
|
||||||
|
}
|
||||||
|
if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
|
||||||
|
const u32 base_offset = sampler_info->base_offset / 4;
|
||||||
|
index_var = GetCustomVariable(sampler_info->bindless_var);
|
||||||
|
info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
|
||||||
|
|
||||||
// If this sampler has already been used, return the existing mapping.
|
// If this sampler has already been used, return the existing mapping.
|
||||||
const auto it = std::find_if(
|
const auto it = std::find_if(
|
||||||
|
|
|
@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
|
||||||
using Node4 = std::array<Node, 4>;
|
using Node4 = std::array<Node, 4>;
|
||||||
using NodeBlock = std::vector<Node>;
|
using NodeBlock = std::vector<Node>;
|
||||||
|
|
||||||
class BindlessSamplerNode;
|
struct ArraySamplerNode;
|
||||||
class ArraySamplerNode;
|
struct BindlessSamplerNode;
|
||||||
|
struct SeparateSamplerNode;
|
||||||
|
|
||||||
using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
|
using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
|
||||||
using TrackSampler = std::shared_ptr<TrackSamplerData>;
|
using TrackSampler = std::shared_ptr<TrackSamplerData>;
|
||||||
|
|
||||||
struct Sampler {
|
struct Sampler {
|
||||||
|
@ -288,63 +289,51 @@ struct Sampler {
|
||||||
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
|
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
|
||||||
is_buffer{is_buffer}, is_indexed{is_indexed} {}
|
is_buffer{is_buffer}, is_indexed{is_indexed} {}
|
||||||
|
|
||||||
|
/// Separate sampler constructor
|
||||||
|
constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
|
||||||
|
Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
|
||||||
|
bool is_buffer)
|
||||||
|
: index{index}, offset{offsets.first}, secondary_offset{offsets.second},
|
||||||
|
buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
|
||||||
|
is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
|
||||||
|
|
||||||
/// Bindless samplers constructor
|
/// Bindless samplers constructor
|
||||||
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
|
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
|
||||||
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
|
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
|
||||||
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
|
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
|
||||||
is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
|
is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
|
||||||
|
|
||||||
u32 index = 0; ///< Emulated index given for the this sampler.
|
u32 index = 0; ///< Emulated index given for the this sampler.
|
||||||
u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
|
u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
|
||||||
u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
|
u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
|
||||||
u32 size = 1; ///< Size of the sampler.
|
u32 buffer = 0; ///< Buffer where the bindless sampler is read.
|
||||||
|
u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
|
||||||
|
u32 size = 1; ///< Size of the sampler.
|
||||||
|
|
||||||
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
|
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
|
||||||
bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
|
bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
|
||||||
bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
|
bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
|
||||||
bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
|
bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
|
||||||
bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
|
bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
|
||||||
bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
|
bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
|
||||||
|
bool is_separated = false; ///< Whether the image and sampler is separated or not.
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Represents a tracked bindless sampler into a direct const buffer
|
/// Represents a tracked bindless sampler into a direct const buffer
|
||||||
class ArraySamplerNode final {
|
struct ArraySamplerNode {
|
||||||
public:
|
|
||||||
explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
|
|
||||||
: index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
|
|
||||||
|
|
||||||
constexpr u32 GetIndex() const {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr u32 GetBaseOffset() const {
|
|
||||||
return base_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr u32 GetIndexVar() const {
|
|
||||||
return bindless_var;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
u32 index;
|
u32 index;
|
||||||
u32 base_offset;
|
u32 base_offset;
|
||||||
u32 bindless_var;
|
u32 bindless_var;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Represents a tracked separate sampler image pair that was folded statically
|
||||||
|
struct SeparateSamplerNode {
|
||||||
|
std::pair<u32, u32> indices;
|
||||||
|
std::pair<u32, u32> offsets;
|
||||||
|
};
|
||||||
|
|
||||||
/// Represents a tracked bindless sampler into a direct const buffer
|
/// Represents a tracked bindless sampler into a direct const buffer
|
||||||
class BindlessSamplerNode final {
|
struct BindlessSamplerNode {
|
||||||
public:
|
|
||||||
explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
|
|
||||||
|
|
||||||
constexpr u32 GetIndex() const {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr u32 GetOffset() const {
|
|
||||||
return offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
u32 index;
|
u32 index;
|
||||||
u32 offset;
|
u32 offset;
|
||||||
};
|
};
|
||||||
|
|
|
@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
|
||||||
template <typename T, typename... Args>
|
template <typename T, typename... Args>
|
||||||
TrackSampler MakeTrackSampler(Args&&... args) {
|
TrackSampler MakeTrackSampler(Args&&... args) {
|
||||||
static_assert(std::is_convertible_v<T, TrackSamplerData>);
|
static_assert(std::is_convertible_v<T, TrackSamplerData>);
|
||||||
return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
|
return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
|
|
|
@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
|
||||||
|
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
|
||||||
|
SeparateSamplerKey key;
|
||||||
|
key.buffers = buffers;
|
||||||
|
key.offsets = offsets;
|
||||||
|
const auto iter = separate_samplers.find(key);
|
||||||
|
if (iter != separate_samplers.end()) {
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
if (!engine) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
|
||||||
|
const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
|
||||||
|
const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
|
||||||
|
separate_samplers.emplace(key, value);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
|
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
|
||||||
u32 offset) {
|
u32 offset) {
|
||||||
const std::pair key = {buffer, offset};
|
const std::pair key = {buffer, offset};
|
||||||
|
|
|
@ -19,8 +19,39 @@
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
struct SeparateSamplerKey {
|
||||||
|
std::pair<u32, u32> buffers;
|
||||||
|
std::pair<u32, u32> offsets;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCommon::Shader
|
||||||
|
|
||||||
|
namespace std {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct hash<VideoCommon::Shader::SeparateSamplerKey> {
|
||||||
|
std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
|
||||||
|
return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
|
||||||
|
key.offsets.second);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
|
||||||
|
bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
|
||||||
|
const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
|
||||||
|
return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace std
|
||||||
|
|
||||||
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
|
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
|
||||||
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
|
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
|
||||||
|
using SeparateSamplerMap =
|
||||||
|
std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
|
||||||
using BindlessSamplerMap =
|
using BindlessSamplerMap =
|
||||||
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
|
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
|
||||||
|
|
||||||
|
@ -73,6 +104,9 @@ public:
|
||||||
|
|
||||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
|
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
|
||||||
|
|
||||||
|
std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
|
||||||
|
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
|
||||||
|
|
||||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
|
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
|
||||||
|
|
||||||
/// Inserts a key.
|
/// Inserts a key.
|
||||||
|
@ -128,6 +162,7 @@ private:
|
||||||
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
||||||
KeyMap keys;
|
KeyMap keys;
|
||||||
BoundSamplerMap bound_samplers;
|
BoundSamplerMap bound_samplers;
|
||||||
|
SeparateSamplerMap separate_samplers;
|
||||||
BindlessSamplerMap bindless_samplers;
|
BindlessSamplerMap bindless_samplers;
|
||||||
u32 bound_buffer;
|
u32 bound_buffer;
|
||||||
GraphicsInfo graphics_info;
|
GraphicsInfo graphics_info;
|
||||||
|
|
|
@ -330,8 +330,8 @@ private:
|
||||||
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
|
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
|
||||||
|
|
||||||
/// Queries the missing sampler info from the execution context.
|
/// Queries the missing sampler info from the execution context.
|
||||||
SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset,
|
SamplerInfo GetSamplerInfo(SamplerInfo info,
|
||||||
std::optional<u32> buffer = std::nullopt);
|
std::optional<Tegra::Engines::SamplerDescriptor> sampler);
|
||||||
|
|
||||||
/// Accesses a texture sampler.
|
/// Accesses a texture sampler.
|
||||||
std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
|
std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
|
||||||
|
|
|
@ -64,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
|
||||||
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
||||||
operation->SetAmendIndex(amend_index);
|
operation->SetAmendIndex(amend_index);
|
||||||
return true;
|
return true;
|
||||||
} else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
}
|
||||||
|
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||||
conditional->SetAmendIndex(amend_index);
|
conditional->SetAmendIndex(amend_index);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -110,10 +111,23 @@ std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const
|
||||||
return TrackBindlessSampler(source, code, new_cursor);
|
return TrackBindlessSampler(source, code, new_cursor);
|
||||||
}
|
}
|
||||||
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
|
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
|
||||||
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
|
const OperationNode& op = *operation;
|
||||||
if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
|
|
||||||
std::get<0>(found)) {
|
const OperationCode opcode = operation->GetCode();
|
||||||
// Cbuf found in operand.
|
if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
|
||||||
|
ASSERT(op.GetOperandsCount() == 2);
|
||||||
|
auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
|
||||||
|
auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
|
||||||
|
if (node_a && node_b) {
|
||||||
|
auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
|
||||||
|
std::pair{offset_a, offset_b});
|
||||||
|
return {tracked, std::move(track)};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::size_t i = op.GetOperandsCount();
|
||||||
|
while (i--) {
|
||||||
|
if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
|
||||||
|
// Constant buffer found in operand.
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue