forked from suyu/suyu
Merge pull request #3282 from FernandoS27/indexed-samplers
Partially implement Indexed samplers in general and specific code in GLSL
This commit is contained in:
commit
b5bbe7e752
24 changed files with 610 additions and 58 deletions
|
@ -29,6 +29,8 @@ add_library(video_core STATIC
|
||||||
gpu_synch.h
|
gpu_synch.h
|
||||||
gpu_thread.cpp
|
gpu_thread.cpp
|
||||||
gpu_thread.h
|
gpu_thread.h
|
||||||
|
guest_driver.cpp
|
||||||
|
guest_driver.h
|
||||||
macro_interpreter.cpp
|
macro_interpreter.cpp
|
||||||
macro_interpreter.h
|
macro_interpreter.h
|
||||||
memory_manager.cpp
|
memory_manager.cpp
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/shader_bytecode.h"
|
#include "video_core/engines/shader_bytecode.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
|
#include "video_core/guest_driver.h"
|
||||||
#include "video_core/textures/texture.h"
|
#include "video_core/textures/texture.h"
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
@ -106,6 +107,9 @@ public:
|
||||||
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||||
u64 offset) const = 0;
|
u64 offset) const = 0;
|
||||||
virtual u32 GetBoundBuffer() const = 0;
|
virtual u32 GetBoundBuffer() const = 0;
|
||||||
|
|
||||||
|
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
||||||
|
virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Tegra::Engines
|
} // namespace Tegra::Engines
|
||||||
|
|
|
@ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
|
||||||
|
return rasterizer.AccessGuestDriverProfile();
|
||||||
|
}
|
||||||
|
|
||||||
|
const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
|
||||||
|
return rasterizer.AccessGuestDriverProfile();
|
||||||
|
}
|
||||||
|
|
||||||
void KeplerCompute::ProcessLaunch() {
|
void KeplerCompute::ProcessLaunch() {
|
||||||
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
||||||
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
||||||
|
|
|
@ -218,6 +218,10 @@ public:
|
||||||
return regs.tex_cb_index;
|
return regs.tex_cb_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
|
||||||
|
|
||||||
|
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
|
|
@ -784,4 +784,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
|
||||||
|
return rasterizer.AccessGuestDriverProfile();
|
||||||
|
}
|
||||||
|
|
||||||
|
const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
|
||||||
|
return rasterizer.AccessGuestDriverProfile();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Tegra::Engines
|
} // namespace Tegra::Engines
|
||||||
|
|
|
@ -1306,6 +1306,10 @@ public:
|
||||||
return regs.tex_cb_index;
|
return regs.tex_cb_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
|
||||||
|
|
||||||
|
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
|
||||||
|
|
||||||
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
||||||
/// we've seen used.
|
/// we've seen used.
|
||||||
using MacroMemory = std::array<u32, 0x40000>;
|
using MacroMemory = std::array<u32, 0x40000>;
|
||||||
|
|
36
src/video_core/guest_driver.cpp
Normal file
36
src/video_core/guest_driver.cpp
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
#include "video_core/guest_driver.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
|
||||||
|
if (texture_handler_size_deduced) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const std::size_t size = bound_offsets.size();
|
||||||
|
if (size < 2) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
|
||||||
|
u32 min_val = std::numeric_limits<u32>::max();
|
||||||
|
for (std::size_t i = 1; i < size; ++i) {
|
||||||
|
if (bound_offsets[i] == bound_offsets[i - 1]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
|
||||||
|
min_val = std::min(min_val, new_min);
|
||||||
|
}
|
||||||
|
if (min_val > 2) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
texture_handler_size_deduced = true;
|
||||||
|
texture_handler_size = min_texture_handler_size * min_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
41
src/video_core/guest_driver.h
Normal file
41
src/video_core/guest_driver.h
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
|
||||||
|
* information necessary for impossible to avoid HLE methods like shader tracks as they are
|
||||||
|
* Entscheidungsproblems.
|
||||||
|
*/
|
||||||
|
class GuestDriverProfile {
|
||||||
|
public:
|
||||||
|
void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
|
||||||
|
|
||||||
|
u32 GetTextureHandlerSize() const {
|
||||||
|
return texture_handler_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TextureHandlerSizeKnown() const {
|
||||||
|
return texture_handler_size_deduced;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Minimum size of texture handler any driver can use.
|
||||||
|
static constexpr u32 min_texture_handler_size = 4;
|
||||||
|
// This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
|
||||||
|
// use 4 bytes instead. Thus, certain drivers may squish the size.
|
||||||
|
static constexpr u32 default_texture_handler_size = 8;
|
||||||
|
|
||||||
|
u32 texture_handler_size = default_texture_handler_size;
|
||||||
|
bool texture_handler_size_deduced = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
|
@ -9,6 +9,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/fermi_2d.h"
|
#include "video_core/engines/fermi_2d.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/guest_driver.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
class MemoryManager;
|
class MemoryManager;
|
||||||
|
@ -78,5 +79,18 @@ public:
|
||||||
/// Initialize disk cached resources for the game being emulated
|
/// Initialize disk cached resources for the game being emulated
|
||||||
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
|
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
|
||||||
const DiskResourceLoadCallback& callback = {}) {}
|
const DiskResourceLoadCallback& callback = {}) {}
|
||||||
|
|
||||||
|
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
|
||||||
|
GuestDriverProfile& AccessGuestDriverProfile() {
|
||||||
|
return guest_driver_profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
|
||||||
|
const GuestDriverProfile& AccessGuestDriverProfile() const {
|
||||||
|
return guest_driver_profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
GuestDriverProfile guest_driver_profile{};
|
||||||
};
|
};
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -55,16 +55,20 @@ namespace {
|
||||||
|
|
||||||
template <typename Engine, typename Entry>
|
template <typename Engine, typename Entry>
|
||||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||||
Tegra::Engines::ShaderType shader_type) {
|
Tegra::Engines::ShaderType shader_type,
|
||||||
|
std::size_t index = 0) {
|
||||||
if (entry.IsBindless()) {
|
if (entry.IsBindless()) {
|
||||||
const Tegra::Texture::TextureHandle tex_handle =
|
const Tegra::Texture::TextureHandle tex_handle =
|
||||||
engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
|
engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
|
||||||
return engine.GetTextureInfo(tex_handle);
|
return engine.GetTextureInfo(tex_handle);
|
||||||
}
|
}
|
||||||
|
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
||||||
|
const u32 offset =
|
||||||
|
entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
||||||
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
||||||
return engine.GetStageTexture(shader_type, entry.GetOffset());
|
return engine.GetStageTexture(shader_type, offset);
|
||||||
} else {
|
} else {
|
||||||
return engine.GetTexture(entry.GetOffset());
|
return engine.GetTexture(offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -942,8 +946,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
|
||||||
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
||||||
for (const auto& entry : shader->GetShaderEntries().samplers) {
|
for (const auto& entry : shader->GetShaderEntries().samplers) {
|
||||||
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
|
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
|
||||||
|
if (!entry.IsIndexed()) {
|
||||||
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
|
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
|
||||||
SetupTexture(binding++, texture, entry);
|
SetupTexture(binding++, texture, entry);
|
||||||
|
} else {
|
||||||
|
for (std::size_t i = 0; i < entry.Size(); ++i) {
|
||||||
|
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
|
||||||
|
SetupTexture(binding++, texture, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -952,8 +963,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
|
||||||
const auto& compute = system.GPU().KeplerCompute();
|
const auto& compute = system.GPU().KeplerCompute();
|
||||||
u32 binding = 0;
|
u32 binding = 0;
|
||||||
for (const auto& entry : kernel->GetShaderEntries().samplers) {
|
for (const auto& entry : kernel->GetShaderEntries().samplers) {
|
||||||
const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
|
if (!entry.IsIndexed()) {
|
||||||
|
const auto texture =
|
||||||
|
GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
|
||||||
SetupTexture(binding++, texture, entry);
|
SetupTexture(binding++, texture, entry);
|
||||||
|
} else {
|
||||||
|
for (std::size_t i = 0; i < entry.Size(); ++i) {
|
||||||
|
const auto texture =
|
||||||
|
GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
|
||||||
|
SetupTexture(binding++, texture, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s
|
||||||
}
|
}
|
||||||
|
|
||||||
void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
|
void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
|
||||||
|
locker.SetBoundBuffer(usage.bound_buffer);
|
||||||
for (const auto& key : usage.keys) {
|
for (const auto& key : usage.keys) {
|
||||||
const auto [buffer, offset] = key.first;
|
const auto [buffer, offset] = key.first;
|
||||||
locker.InsertKey(buffer, offset, key.second);
|
locker.InsertKey(buffer, offset, key.second);
|
||||||
|
@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() {
|
||||||
|
|
||||||
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
|
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
|
||||||
const ConstBufferLocker& locker) const {
|
const ConstBufferLocker& locker) const {
|
||||||
return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(),
|
return ShaderDiskCacheUsage{unique_identifier, variant,
|
||||||
|
locker.GetBoundBuffer(), locker.GetKeys(),
|
||||||
locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
|
locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -391,6 +391,7 @@ public:
|
||||||
DeclareVertex();
|
DeclareVertex();
|
||||||
DeclareGeometry();
|
DeclareGeometry();
|
||||||
DeclareRegisters();
|
DeclareRegisters();
|
||||||
|
DeclareCustomVariables();
|
||||||
DeclarePredicates();
|
DeclarePredicates();
|
||||||
DeclareLocalMemory();
|
DeclareLocalMemory();
|
||||||
DeclareInternalFlags();
|
DeclareInternalFlags();
|
||||||
|
@ -503,6 +504,16 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DeclareCustomVariables() {
|
||||||
|
const u32 num_custom_variables = ir.GetNumCustomVariables();
|
||||||
|
for (u32 i = 0; i < num_custom_variables; ++i) {
|
||||||
|
code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
|
||||||
|
}
|
||||||
|
if (num_custom_variables > 0) {
|
||||||
|
code.AddNewLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void DeclarePredicates() {
|
void DeclarePredicates() {
|
||||||
const auto& predicates = ir.GetPredicates();
|
const auto& predicates = ir.GetPredicates();
|
||||||
for (const auto pred : predicates) {
|
for (const auto pred : predicates) {
|
||||||
|
@ -655,7 +666,8 @@ private:
|
||||||
u32 binding = device.GetBaseBindings(stage).sampler;
|
u32 binding = device.GetBaseBindings(stage).sampler;
|
||||||
for (const auto& sampler : ir.GetSamplers()) {
|
for (const auto& sampler : ir.GetSamplers()) {
|
||||||
const std::string name = GetSampler(sampler);
|
const std::string name = GetSampler(sampler);
|
||||||
const std::string description = fmt::format("layout (binding = {}) uniform", binding++);
|
const std::string description = fmt::format("layout (binding = {}) uniform", binding);
|
||||||
|
binding += sampler.IsIndexed() ? sampler.Size() : 1;
|
||||||
|
|
||||||
std::string sampler_type = [&]() {
|
std::string sampler_type = [&]() {
|
||||||
if (sampler.IsBuffer()) {
|
if (sampler.IsBuffer()) {
|
||||||
|
@ -682,7 +694,11 @@ private:
|
||||||
sampler_type += "Shadow";
|
sampler_type += "Shadow";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!sampler.IsIndexed()) {
|
||||||
code.AddLine("{} {} {};", description, sampler_type, name);
|
code.AddLine("{} {} {};", description, sampler_type, name);
|
||||||
|
} else {
|
||||||
|
code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!ir.GetSamplers().empty()) {
|
if (!ir.GetSamplers().empty()) {
|
||||||
code.AddNewLine();
|
code.AddNewLine();
|
||||||
|
@ -775,6 +791,11 @@ private:
|
||||||
return {GetRegister(index), Type::Float};
|
return {GetRegister(index), Type::Float};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
|
||||||
|
const u32 index = cv->GetIndex();
|
||||||
|
return {GetCustomVariable(index), Type::Float};
|
||||||
|
}
|
||||||
|
|
||||||
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
|
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
|
||||||
const u32 value = immediate->GetValue();
|
const u32 value = immediate->GetValue();
|
||||||
if (value < 10) {
|
if (value < 10) {
|
||||||
|
@ -1098,7 +1119,11 @@ private:
|
||||||
} else if (!meta->ptp.empty()) {
|
} else if (!meta->ptp.empty()) {
|
||||||
expr += "Offsets";
|
expr += "Offsets";
|
||||||
}
|
}
|
||||||
|
if (!meta->sampler.IsIndexed()) {
|
||||||
expr += '(' + GetSampler(meta->sampler) + ", ";
|
expr += '(' + GetSampler(meta->sampler) + ", ";
|
||||||
|
} else {
|
||||||
|
expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
|
||||||
|
}
|
||||||
expr += coord_constructors.at(count + (has_array ? 1 : 0) +
|
expr += coord_constructors.at(count + (has_array ? 1 : 0) +
|
||||||
(has_shadow && !separate_dc ? 1 : 0) - 1);
|
(has_shadow && !separate_dc ? 1 : 0) - 1);
|
||||||
expr += '(';
|
expr += '(';
|
||||||
|
@ -1310,6 +1335,8 @@ private:
|
||||||
const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
|
const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
|
||||||
target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
|
target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
|
||||||
Type::Uint};
|
Type::Uint};
|
||||||
|
} else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
|
||||||
|
target = {GetCustomVariable(cv->GetIndex()), Type::Float};
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE_MSG("Assign called without a proper target");
|
UNREACHABLE_MSG("Assign called without a proper target");
|
||||||
}
|
}
|
||||||
|
@ -2237,6 +2264,10 @@ private:
|
||||||
return GetDeclarationWithSuffix(index, "gpr");
|
return GetDeclarationWithSuffix(index, "gpr");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string GetCustomVariable(u32 index) const {
|
||||||
|
return GetDeclarationWithSuffix(index, "custom_var");
|
||||||
|
}
|
||||||
|
|
||||||
std::string GetPredicate(Tegra::Shader::Pred pred) const {
|
std::string GetPredicate(Tegra::Shader::Pred pred) const {
|
||||||
return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
|
return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,7 @@ struct BindlessSamplerKey {
|
||||||
Tegra::Engines::SamplerDescriptor sampler{};
|
Tegra::Engines::SamplerDescriptor sampler{};
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr u32 NativeVersion = 11;
|
constexpr u32 NativeVersion = 12;
|
||||||
|
|
||||||
// Making sure sizes doesn't change by accident
|
// Making sure sizes doesn't change by accident
|
||||||
static_assert(sizeof(ProgramVariant) == 20);
|
static_assert(sizeof(ProgramVariant) == 20);
|
||||||
|
@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
|
||||||
u32 num_bound_samplers{};
|
u32 num_bound_samplers{};
|
||||||
u32 num_bindless_samplers{};
|
u32 num_bindless_samplers{};
|
||||||
if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
|
if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
|
||||||
file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
|
file.ReadArray(&usage.variant, 1) != 1 ||
|
||||||
|
file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
|
||||||
file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
||||||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
||||||
LOG_ERROR(Render_OpenGL, error_loading);
|
LOG_ERROR(Render_OpenGL, error_loading);
|
||||||
|
@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
|
||||||
u32 num_bindless_samplers{};
|
u32 num_bindless_samplers{};
|
||||||
ShaderDiskCacheUsage usage;
|
ShaderDiskCacheUsage usage;
|
||||||
if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
|
if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
|
||||||
!LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
|
!LoadObjectFromPrecompiled(usage.variant) ||
|
||||||
|
!LoadObjectFromPrecompiled(usage.bound_buffer) ||
|
||||||
|
!LoadObjectFromPrecompiled(num_keys) ||
|
||||||
!LoadObjectFromPrecompiled(num_bound_samplers) ||
|
!LoadObjectFromPrecompiled(num_bound_samplers) ||
|
||||||
!LoadObjectFromPrecompiled(num_bindless_samplers)) {
|
!LoadObjectFromPrecompiled(num_bindless_samplers)) {
|
||||||
return {};
|
return {};
|
||||||
|
@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
|
||||||
|
|
||||||
if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
|
if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
|
||||||
file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
|
file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
|
||||||
|
file.WriteObject(usage.bound_buffer) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
|
file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
|
file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
|
file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
|
||||||
|
@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
|
if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
|
||||||
!SaveObjectToPrecompiled(usage.variant) ||
|
!SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
|
||||||
!SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
|
!SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
|
||||||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
|
!SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
|
||||||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
|
!SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
|
||||||
|
|
|
@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>);
|
||||||
struct ShaderDiskCacheUsage {
|
struct ShaderDiskCacheUsage {
|
||||||
u64 unique_identifier{};
|
u64 unique_identifier{};
|
||||||
ProgramVariant variant;
|
ProgramVariant variant;
|
||||||
|
u32 bound_buffer{};
|
||||||
VideoCommon::Shader::KeyMap keys;
|
VideoCommon::Shader::KeyMap keys;
|
||||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||||
|
|
|
@ -353,6 +353,7 @@ private:
|
||||||
DeclareFragment();
|
DeclareFragment();
|
||||||
DeclareCompute();
|
DeclareCompute();
|
||||||
DeclareRegisters();
|
DeclareRegisters();
|
||||||
|
DeclareCustomVariables();
|
||||||
DeclarePredicates();
|
DeclarePredicates();
|
||||||
DeclareLocalMemory();
|
DeclareLocalMemory();
|
||||||
DeclareSharedMemory();
|
DeclareSharedMemory();
|
||||||
|
@ -586,6 +587,15 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DeclareCustomVariables() {
|
||||||
|
const u32 num_custom_variables = ir.GetNumCustomVariables();
|
||||||
|
for (u32 i = 0; i < num_custom_variables; ++i) {
|
||||||
|
const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
|
||||||
|
Name(id, fmt::format("custom_var_{}", i));
|
||||||
|
custom_variables.emplace(i, AddGlobalVariable(id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void DeclarePredicates() {
|
void DeclarePredicates() {
|
||||||
for (const auto pred : ir.GetPredicates()) {
|
for (const auto pred : ir.GetPredicates()) {
|
||||||
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
|
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
|
||||||
|
@ -982,6 +992,11 @@ private:
|
||||||
return {OpLoad(t_float, registers.at(index)), Type::Float};
|
return {OpLoad(t_float, registers.at(index)), Type::Float};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
|
||||||
|
const u32 index = cv->GetIndex();
|
||||||
|
return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
|
||||||
|
}
|
||||||
|
|
||||||
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
|
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
|
||||||
return {Constant(t_uint, immediate->GetValue()), Type::Uint};
|
return {Constant(t_uint, immediate->GetValue()), Type::Uint};
|
||||||
}
|
}
|
||||||
|
@ -1333,6 +1348,9 @@ private:
|
||||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||||
target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
|
target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
|
||||||
|
|
||||||
|
} else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
|
||||||
|
target = {custom_variables.at(cv->GetIndex()), Type::Float};
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
|
@ -2508,6 +2526,7 @@ private:
|
||||||
Id out_vertex{};
|
Id out_vertex{};
|
||||||
Id in_vertex{};
|
Id in_vertex{};
|
||||||
std::map<u32, Id> registers;
|
std::map<u32, Id> registers;
|
||||||
|
std::map<u32, Id> custom_variables;
|
||||||
std::map<Tegra::Shader::Pred, Id> predicates;
|
std::map<Tegra::Shader::Pred, Id> predicates;
|
||||||
std::map<u32, Id> flow_variables;
|
std::map<u32, Id> flow_variables;
|
||||||
Id local_memory{};
|
Id local_memory{};
|
||||||
|
|
|
@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
|
||||||
|
if (bound_buffer_saved) {
|
||||||
|
return bound_buffer;
|
||||||
|
}
|
||||||
|
if (!engine) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
bound_buffer_saved = true;
|
||||||
|
bound_buffer = engine->GetBoundBuffer();
|
||||||
|
return bound_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
|
void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
|
||||||
keys.insert_or_assign({buffer, offset}, value);
|
keys.insert_or_assign({buffer, offset}, value);
|
||||||
}
|
}
|
||||||
|
@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes
|
||||||
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
|
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
|
||||||
|
bound_buffer_saved = true;
|
||||||
|
bound_buffer = buffer;
|
||||||
|
}
|
||||||
|
|
||||||
bool ConstBufferLocker::IsConsistent() const {
|
bool ConstBufferLocker::IsConsistent() const {
|
||||||
if (!engine) {
|
if (!engine) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include "common/hash.h"
|
#include "common/hash.h"
|
||||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
|
#include "video_core/guest_driver.h"
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
@ -40,6 +41,8 @@ public:
|
||||||
|
|
||||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
|
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
|
||||||
|
|
||||||
|
std::optional<u32> ObtainBoundBuffer();
|
||||||
|
|
||||||
/// Inserts a key.
|
/// Inserts a key.
|
||||||
void InsertKey(u32 buffer, u32 offset, u32 value);
|
void InsertKey(u32 buffer, u32 offset, u32 value);
|
||||||
|
|
||||||
|
@ -49,6 +52,9 @@ public:
|
||||||
/// Inserts a bindless sampler key.
|
/// Inserts a bindless sampler key.
|
||||||
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
|
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
|
||||||
|
|
||||||
|
/// Set the bound buffer for this locker.
|
||||||
|
void SetBoundBuffer(u32 buffer);
|
||||||
|
|
||||||
/// Checks keys and samplers against engine's current const buffers. Returns true if they are
|
/// Checks keys and samplers against engine's current const buffers. Returns true if they are
|
||||||
/// the same value, false otherwise;
|
/// the same value, false otherwise;
|
||||||
bool IsConsistent() const;
|
bool IsConsistent() const;
|
||||||
|
@ -71,12 +77,27 @@ public:
|
||||||
return bindless_samplers;
|
return bindless_samplers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Gets bound buffer used on this shader
|
||||||
|
u32 GetBoundBuffer() const {
|
||||||
|
return bound_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Obtains access to the guest driver's profile.
|
||||||
|
VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
|
||||||
|
if (engine) {
|
||||||
|
return &engine->AccessGuestDriverProfile();
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Tegra::Engines::ShaderType stage;
|
const Tegra::Engines::ShaderType stage;
|
||||||
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
||||||
KeyMap keys;
|
KeyMap keys;
|
||||||
BoundSamplerMap bound_samplers;
|
BoundSamplerMap bound_samplers;
|
||||||
BindlessSamplerMap bindless_samplers;
|
BindlessSamplerMap bindless_samplers;
|
||||||
|
bool bound_buffer_saved{};
|
||||||
|
u32 bound_buffer{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon::Shader
|
} // namespace VideoCommon::Shader
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <limits>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
|
||||||
return (absolute_offset % SchedPeriod) == 0;
|
return (absolute_offset % SchedPeriod) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
|
||||||
|
const std::list<Sampler>& used_samplers) {
|
||||||
|
if (gpu_driver == nullptr) {
|
||||||
|
LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
u32 count{};
|
||||||
|
std::vector<u32> bound_offsets;
|
||||||
|
for (const auto& sampler : used_samplers) {
|
||||||
|
if (sampler.IsBindless()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
++count;
|
||||||
|
bound_offsets.emplace_back(sampler.GetOffset());
|
||||||
|
}
|
||||||
|
if (count > 1) {
|
||||||
|
gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
|
||||||
|
VideoCore::GuestDriverProfile* gpu_driver,
|
||||||
|
const std::list<Sampler>& used_samplers) {
|
||||||
|
if (gpu_driver == nullptr) {
|
||||||
|
LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
const u32 base_offset = sampler_to_deduce.GetOffset();
|
||||||
|
u32 max_offset{std::numeric_limits<u32>::max()};
|
||||||
|
for (const auto& sampler : used_samplers) {
|
||||||
|
if (sampler.IsBindless()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (sampler.GetOffset() > base_offset) {
|
||||||
|
max_offset = std::min(sampler.GetOffset(), max_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (max_offset == std::numeric_limits<u32>::max()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
class ASTDecoder {
|
class ASTDecoder {
|
||||||
|
@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||||
return pc + 1;
|
return pc + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ShaderIR::PostDecode() {
|
||||||
|
// Deduce texture handler size if needed
|
||||||
|
auto gpu_driver = locker.AccessGuestDriverProfile();
|
||||||
|
DeduceTextureHandlerSize(gpu_driver, used_samplers);
|
||||||
|
// Deduce Indexed Samplers
|
||||||
|
if (!uses_indexed_samplers) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (auto& sampler : used_samplers) {
|
||||||
|
if (!sampler.IsIndexed()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
|
||||||
|
sampler.SetSize(*size);
|
||||||
|
} else {
|
||||||
|
LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
|
||||||
|
sampler.SetSize(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon::Shader
|
} // namespace VideoCommon::Shader
|
||||||
|
|
|
@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};
|
MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {},
|
||||||
|
{}, {}, component, element, {}};
|
||||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
const auto derivate_reg = instr.gpr20.Value();
|
const auto derivate_reg = instr.gpr20.Value();
|
||||||
const auto texture_type = instr.txd.texture_type.Value();
|
const auto texture_type = instr.txd.texture_type.Value();
|
||||||
const auto coord_count = GetCoordCount(texture_type);
|
const auto coord_count = GetCoordCount(texture_type);
|
||||||
|
Node index_var{};
|
||||||
const Sampler* sampler =
|
const Sampler* sampler =
|
||||||
is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}})
|
is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}})
|
||||||
: GetSampler(instr.sampler, {{texture_type, is_array, false}});
|
: GetSampler(instr.sampler, {{texture_type, is_array, false}});
|
||||||
Node4 values;
|
Node4 values;
|
||||||
if (sampler == nullptr) {
|
if (sampler == nullptr) {
|
||||||
|
@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element};
|
MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
|
||||||
|
{}, {}, {}, element, index_var};
|
||||||
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
|
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
// TODO: The new commits on the texture refactor, change the way samplers work.
|
// TODO: The new commits on the texture refactor, change the way samplers work.
|
||||||
// Sadly, not all texture instructions specify the type of texture their sampler
|
// Sadly, not all texture instructions specify the type of texture their sampler
|
||||||
// uses. This must be fixed at a later instance.
|
// uses. This must be fixed at a later instance.
|
||||||
|
Node index_var{};
|
||||||
const Sampler* sampler =
|
const Sampler* sampler =
|
||||||
is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler);
|
is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler);
|
||||||
|
|
||||||
if (sampler == nullptr) {
|
if (sampler == nullptr) {
|
||||||
u32 indexer = 0;
|
u32 indexer = 0;
|
||||||
|
@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
if (!instr.txq.IsComponentEnabled(element)) {
|
if (!instr.txq.IsComponentEnabled(element)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
|
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
|
||||||
const Node value =
|
const Node value =
|
||||||
Operation(OperationCode::TextureQueryDimensions, meta,
|
Operation(OperationCode::TextureQueryDimensions, meta,
|
||||||
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
|
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
|
||||||
|
@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
|
|
||||||
auto texture_type = instr.tmml.texture_type.Value();
|
auto texture_type = instr.tmml.texture_type.Value();
|
||||||
const bool is_array = instr.tmml.array != 0;
|
const bool is_array = instr.tmml.array != 0;
|
||||||
|
Node index_var{};
|
||||||
const Sampler* sampler =
|
const Sampler* sampler =
|
||||||
is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler);
|
is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler);
|
||||||
|
|
||||||
if (sampler == nullptr) {
|
if (sampler == nullptr) {
|
||||||
u32 indexer = 0;
|
u32 indexer = 0;
|
||||||
|
@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto params = coords;
|
auto params = coords;
|
||||||
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
|
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
|
||||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||||
SetTemporary(bb, indexer++, value);
|
SetTemporary(bb, indexer++, value);
|
||||||
}
|
}
|
||||||
|
@ -383,19 +387,23 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
|
||||||
// Otherwise create a new mapping for this sampler
|
// Otherwise create a new mapping for this sampler
|
||||||
const auto next_index = static_cast<u32>(used_samplers.size());
|
const auto next_index = static_cast<u32>(used_samplers.size());
|
||||||
return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
|
return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
|
||||||
info.is_buffer);
|
info.is_buffer, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
|
||||||
std::optional<SamplerInfo> sampler_info) {
|
std::optional<SamplerInfo> sampler_info) {
|
||||||
const Node sampler_register = GetRegister(reg);
|
const Node sampler_register = GetRegister(reg);
|
||||||
const auto [base_sampler, buffer, offset] =
|
const auto [base_node, tracked_sampler_info] =
|
||||||
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
|
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
|
||||||
ASSERT(base_sampler != nullptr);
|
ASSERT(base_node != nullptr);
|
||||||
if (base_sampler == nullptr) {
|
if (base_node == nullptr) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (const auto bindless_sampler_info =
|
||||||
|
std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
|
||||||
|
const u32 buffer = bindless_sampler_info->GetIndex();
|
||||||
|
const u32 offset = bindless_sampler_info->GetOffset();
|
||||||
const auto info = GetSamplerInfo(sampler_info, offset, buffer);
|
const auto info = GetSamplerInfo(sampler_info, offset, buffer);
|
||||||
|
|
||||||
// If this sampler has already been used, return the existing mapping.
|
// If this sampler has already been used, return the existing mapping.
|
||||||
|
@ -405,15 +413,39 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
||||||
return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
|
return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
|
||||||
});
|
});
|
||||||
if (it != used_samplers.end()) {
|
if (it != used_samplers.end()) {
|
||||||
ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
|
ASSERT(it->IsBindless() && it->GetType() == info.type &&
|
||||||
it->IsShadow() == info.is_shadow);
|
it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow);
|
||||||
return &*it;
|
return &*it;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise create a new mapping for this sampler
|
// Otherwise create a new mapping for this sampler
|
||||||
const auto next_index = static_cast<u32>(used_samplers.size());
|
const auto next_index = static_cast<u32>(used_samplers.size());
|
||||||
return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
|
return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
|
||||||
info.is_shadow, info.is_buffer);
|
info.is_shadow, info.is_buffer, false);
|
||||||
|
} else if (const auto array_sampler_info =
|
||||||
|
std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
|
||||||
|
const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
|
||||||
|
index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
|
||||||
|
const auto info = GetSamplerInfo(sampler_info, base_offset);
|
||||||
|
|
||||||
|
// If this sampler has already been used, return the existing mapping.
|
||||||
|
const auto it = std::find_if(
|
||||||
|
used_samplers.begin(), used_samplers.end(),
|
||||||
|
[base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; });
|
||||||
|
if (it != used_samplers.end()) {
|
||||||
|
ASSERT(!it->IsBindless() && it->GetType() == info.type &&
|
||||||
|
it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow &&
|
||||||
|
it->IsBuffer() == info.is_buffer && it->IsIndexed());
|
||||||
|
return &*it;
|
||||||
|
}
|
||||||
|
|
||||||
|
uses_indexed_samplers = true;
|
||||||
|
// Otherwise create a new mapping for this sampler
|
||||||
|
const auto next_index = static_cast<u32>(used_samplers.size());
|
||||||
|
return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array,
|
||||||
|
info.is_shadow, info.is_buffer, true);
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||||
|
@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||||
"This method is not supported.");
|
"This method is not supported.");
|
||||||
|
|
||||||
const SamplerInfo info{texture_type, is_array, is_shadow, false};
|
const SamplerInfo info{texture_type, is_array, is_shadow, false};
|
||||||
const Sampler* sampler =
|
Node index_var{};
|
||||||
is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info);
|
const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info)
|
||||||
|
: GetSampler(instr.sampler, info);
|
||||||
Node4 values;
|
Node4 values;
|
||||||
if (sampler == nullptr) {
|
if (sampler == nullptr) {
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
|
@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||||
|
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto copy_coords = coords;
|
auto copy_coords = coords;
|
||||||
MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};
|
MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
|
||||||
|
lod, {}, element, index_var};
|
||||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||||
u64 parameter_register = instr.gpr20.Value();
|
u64 parameter_register = instr.gpr20.Value();
|
||||||
|
|
||||||
const SamplerInfo info{texture_type, is_array, depth_compare, false};
|
const SamplerInfo info{texture_type, is_array, depth_compare, false};
|
||||||
const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
|
Node index_var{};
|
||||||
|
const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info)
|
||||||
: GetSampler(instr.sampler, info);
|
: GetSampler(instr.sampler, info);
|
||||||
Node4 values;
|
Node4 values;
|
||||||
if (sampler == nullptr) {
|
if (sampler == nullptr) {
|
||||||
|
@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{
|
MetaTexture meta{
|
||||||
*sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};
|
*sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
|
||||||
|
index_var};
|
||||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};
|
MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
|
||||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};
|
MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
|
||||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
return values;
|
return values;
|
||||||
|
|
|
@ -212,6 +212,7 @@ enum class MetaStackClass {
|
||||||
class OperationNode;
|
class OperationNode;
|
||||||
class ConditionalNode;
|
class ConditionalNode;
|
||||||
class GprNode;
|
class GprNode;
|
||||||
|
class CustomVarNode;
|
||||||
class ImmediateNode;
|
class ImmediateNode;
|
||||||
class InternalFlagNode;
|
class InternalFlagNode;
|
||||||
class PredicateNode;
|
class PredicateNode;
|
||||||
|
@ -223,26 +224,32 @@ class SmemNode;
|
||||||
class GmemNode;
|
class GmemNode;
|
||||||
class CommentNode;
|
class CommentNode;
|
||||||
|
|
||||||
using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
|
using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
|
||||||
InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
|
InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
|
||||||
LmemNode, SmemNode, GmemNode, CommentNode>;
|
LmemNode, SmemNode, GmemNode, CommentNode>;
|
||||||
using Node = std::shared_ptr<NodeData>;
|
using Node = std::shared_ptr<NodeData>;
|
||||||
using Node4 = std::array<Node, 4>;
|
using Node4 = std::array<Node, 4>;
|
||||||
using NodeBlock = std::vector<Node>;
|
using NodeBlock = std::vector<Node>;
|
||||||
|
|
||||||
|
class BindlessSamplerNode;
|
||||||
|
class ArraySamplerNode;
|
||||||
|
|
||||||
|
using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
|
||||||
|
using TrackSampler = std::shared_ptr<TrackSamplerData>;
|
||||||
|
|
||||||
class Sampler {
|
class Sampler {
|
||||||
public:
|
public:
|
||||||
/// This constructor is for bound samplers
|
/// This constructor is for bound samplers
|
||||||
constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
|
constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
|
||||||
bool is_array, bool is_shadow, bool is_buffer)
|
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
|
||||||
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
|
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
|
||||||
is_buffer{is_buffer} {}
|
is_buffer{is_buffer}, is_indexed{is_indexed} {}
|
||||||
|
|
||||||
/// This constructor is for bindless samplers
|
/// This constructor is for bindless samplers
|
||||||
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
|
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
|
||||||
bool is_array, bool is_shadow, bool is_buffer)
|
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
|
||||||
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
|
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
|
||||||
is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {}
|
is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
|
||||||
|
|
||||||
constexpr u32 GetIndex() const {
|
constexpr u32 GetIndex() const {
|
||||||
return index;
|
return index;
|
||||||
|
@ -276,16 +283,72 @@ public:
|
||||||
return is_bindless;
|
return is_bindless;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr bool IsIndexed() const {
|
||||||
|
return is_indexed;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr u32 Size() const {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr void SetSize(u32 new_size) {
|
||||||
|
size = new_size;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
u32 index{}; ///< Emulated index given for the this sampler.
|
u32 index{}; ///< Emulated index given for the this sampler.
|
||||||
u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
|
u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
|
||||||
u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
|
u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
|
||||||
|
u32 size{}; ///< Size of the sampler if indexed.
|
||||||
|
|
||||||
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
|
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
|
||||||
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
|
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
|
||||||
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
|
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
|
||||||
bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler.
|
bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler.
|
||||||
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
|
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
|
||||||
|
bool is_indexed{}; ///< Whether this sampler is an indexed array of textures.
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Represents a tracked bindless sampler into a direct const buffer
|
||||||
|
class ArraySamplerNode final {
|
||||||
|
public:
|
||||||
|
explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
|
||||||
|
: index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
|
||||||
|
|
||||||
|
constexpr u32 GetIndex() const {
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr u32 GetBaseOffset() const {
|
||||||
|
return base_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr u32 GetIndexVar() const {
|
||||||
|
return bindless_var;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 index;
|
||||||
|
u32 base_offset;
|
||||||
|
u32 bindless_var;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Represents a tracked bindless sampler into a direct const buffer
|
||||||
|
class BindlessSamplerNode final {
|
||||||
|
public:
|
||||||
|
explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
|
||||||
|
|
||||||
|
constexpr u32 GetIndex() const {
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr u32 GetOffset() const {
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 index;
|
||||||
|
u32 offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Image final {
|
class Image final {
|
||||||
|
@ -382,6 +445,7 @@ struct MetaTexture {
|
||||||
Node lod;
|
Node lod;
|
||||||
Node component{};
|
Node component{};
|
||||||
u32 element{};
|
u32 element{};
|
||||||
|
Node index{};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MetaImage {
|
struct MetaImage {
|
||||||
|
@ -488,6 +552,19 @@ private:
|
||||||
Tegra::Shader::Register index{};
|
Tegra::Shader::Register index{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// A custom variable
|
||||||
|
class CustomVarNode final {
|
||||||
|
public:
|
||||||
|
explicit constexpr CustomVarNode(u32 index) : index{index} {}
|
||||||
|
|
||||||
|
constexpr u32 GetIndex() const {
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 index{};
|
||||||
|
};
|
||||||
|
|
||||||
/// A 32-bits value that represents an immediate value
|
/// A 32-bits value that represents an immediate value
|
||||||
class ImmediateNode final {
|
class ImmediateNode final {
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) {
|
||||||
return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
|
return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, typename... Args>
|
||||||
|
TrackSampler MakeTrackSampler(Args&&... args) {
|
||||||
|
static_assert(std::is_convertible_v<T, TrackSamplerData>);
|
||||||
|
return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
|
||||||
|
}
|
||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
Node Operation(OperationCode code, Args&&... args) {
|
Node Operation(OperationCode code, Args&&... args) {
|
||||||
if constexpr (sizeof...(args) == 0) {
|
if constexpr (sizeof...(args) == 0) {
|
||||||
|
|
|
@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
|
||||||
ConstBufferLocker& locker)
|
ConstBufferLocker& locker)
|
||||||
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
|
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
|
||||||
Decode();
|
Decode();
|
||||||
|
PostDecode();
|
||||||
}
|
}
|
||||||
|
|
||||||
ShaderIR::~ShaderIR() = default;
|
ShaderIR::~ShaderIR() = default;
|
||||||
|
@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) {
|
||||||
return MakeNode<GprNode>(reg);
|
return MakeNode<GprNode>(reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Node ShaderIR::GetCustomVariable(u32 id) {
|
||||||
|
return MakeNode<CustomVarNode>(id);
|
||||||
|
}
|
||||||
|
|
||||||
Node ShaderIR::GetImmediate19(Instruction instr) {
|
Node ShaderIR::GetImmediate19(Instruction instr) {
|
||||||
return Immediate(instr.alu.GetImm20_19());
|
return Immediate(instr.alu.GetImm20_19());
|
||||||
}
|
}
|
||||||
|
@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) {
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 ShaderIR::NewCustomVariable() {
|
||||||
|
return num_custom_variables++;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon::Shader
|
} // namespace VideoCommon::Shader
|
||||||
|
|
|
@ -180,6 +180,10 @@ public:
|
||||||
return amend_code[index];
|
return amend_code[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 GetNumCustomVariables() const {
|
||||||
|
return num_custom_variables;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class ASTDecoder;
|
friend class ASTDecoder;
|
||||||
|
|
||||||
|
@ -191,6 +195,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
void Decode();
|
void Decode();
|
||||||
|
void PostDecode();
|
||||||
|
|
||||||
NodeBlock DecodeRange(u32 begin, u32 end);
|
NodeBlock DecodeRange(u32 begin, u32 end);
|
||||||
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
|
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
|
||||||
|
@ -235,6 +240,8 @@ private:
|
||||||
|
|
||||||
/// Generates a node for a passed register.
|
/// Generates a node for a passed register.
|
||||||
Node GetRegister(Tegra::Shader::Register reg);
|
Node GetRegister(Tegra::Shader::Register reg);
|
||||||
|
/// Generates a node for a custom variable
|
||||||
|
Node GetCustomVariable(u32 id);
|
||||||
/// Generates a node representing a 19-bit immediate value
|
/// Generates a node representing a 19-bit immediate value
|
||||||
Node GetImmediate19(Tegra::Shader::Instruction instr);
|
Node GetImmediate19(Tegra::Shader::Instruction instr);
|
||||||
/// Generates a node representing a 32-bit immediate value
|
/// Generates a node representing a 32-bit immediate value
|
||||||
|
@ -321,7 +328,7 @@ private:
|
||||||
std::optional<SamplerInfo> sampler_info = std::nullopt);
|
std::optional<SamplerInfo> sampler_info = std::nullopt);
|
||||||
|
|
||||||
/// Accesses a texture sampler for a bindless texture.
|
/// Accesses a texture sampler for a bindless texture.
|
||||||
const Sampler* GetBindlessSampler(Tegra::Shader::Register reg,
|
const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
|
||||||
std::optional<SamplerInfo> sampler_info = std::nullopt);
|
std::optional<SamplerInfo> sampler_info = std::nullopt);
|
||||||
|
|
||||||
/// Accesses an image.
|
/// Accesses an image.
|
||||||
|
@ -387,6 +394,9 @@ private:
|
||||||
|
|
||||||
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
|
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||||
|
|
||||||
|
std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||||
|
s64 cursor);
|
||||||
|
|
||||||
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
|
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||||
|
|
||||||
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
||||||
|
@ -399,6 +409,8 @@ private:
|
||||||
/// Register new amending code and obtain the reference id.
|
/// Register new amending code and obtain the reference id.
|
||||||
std::size_t DeclareAmend(Node new_amend);
|
std::size_t DeclareAmend(Node new_amend);
|
||||||
|
|
||||||
|
u32 NewCustomVariable();
|
||||||
|
|
||||||
const ProgramCode& program_code;
|
const ProgramCode& program_code;
|
||||||
const u32 main_offset;
|
const u32 main_offset;
|
||||||
const CompilerSettings settings;
|
const CompilerSettings settings;
|
||||||
|
@ -414,6 +426,7 @@ private:
|
||||||
NodeBlock global_code;
|
NodeBlock global_code;
|
||||||
ASTManager program_manager{true, true};
|
ASTManager program_manager{true, true};
|
||||||
std::vector<Node> amend_code;
|
std::vector<Node> amend_code;
|
||||||
|
u32 num_custom_variables{};
|
||||||
|
|
||||||
std::set<u32> used_registers;
|
std::set<u32> used_registers;
|
||||||
std::set<Tegra::Shader::Pred> used_predicates;
|
std::set<Tegra::Shader::Pred> used_predicates;
|
||||||
|
@ -431,6 +444,7 @@ private:
|
||||||
bool uses_instance_id{};
|
bool uses_instance_id{};
|
||||||
bool uses_vertex_id{};
|
bool uses_vertex_id{};
|
||||||
bool uses_warps{};
|
bool uses_warps{};
|
||||||
|
bool uses_indexed_samplers{};
|
||||||
|
|
||||||
Tegra::Shader::Header header;
|
Tegra::Shader::Header header;
|
||||||
};
|
};
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/shader/node.h"
|
#include "video_core/shader/node.h"
|
||||||
|
#include "video_core/shader/node_helper.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
namespace VideoCommon::Shader {
|
||||||
|
@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
|
||||||
|
if (operation.GetCode() != OperationCode::UAdd) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
Node gpr{};
|
||||||
|
Node offset{};
|
||||||
|
ASSERT(operation.GetOperandsCount() == 2);
|
||||||
|
for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
|
||||||
|
Node operand = operation[i];
|
||||||
|
if (std::holds_alternative<ImmediateNode>(*operand)) {
|
||||||
|
offset = operation[i];
|
||||||
|
} else if (std::holds_alternative<GprNode>(*operand)) {
|
||||||
|
gpr = operation[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (offset && gpr) {
|
||||||
|
return std::make_pair(gpr, offset);
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AmendNodeCv(std::size_t amend_index, Node node) {
|
||||||
|
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
||||||
|
operation->SetAmendIndex(amend_index);
|
||||||
|
return true;
|
||||||
|
} else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||||
|
conditional->SetAmendIndex(amend_index);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||||
|
s64 cursor) {
|
||||||
|
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
||||||
|
// Constant buffer found, test if it's an immediate
|
||||||
|
const auto offset = cbuf->GetOffset();
|
||||||
|
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||||
|
auto track =
|
||||||
|
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
|
||||||
|
return {tracked, track};
|
||||||
|
} else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
|
||||||
|
auto bound_buffer = locker.ObtainBoundBuffer();
|
||||||
|
if (!bound_buffer) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if (*bound_buffer != cbuf->GetIndex()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
auto pair = DecoupleIndirectRead(*operation);
|
||||||
|
if (!pair) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
auto [gpr, base_offset] = *pair;
|
||||||
|
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
|
||||||
|
auto gpu_driver = locker.AccessGuestDriverProfile();
|
||||||
|
if (gpu_driver == nullptr) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
const u32 bindless_cv = NewCustomVariable();
|
||||||
|
const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
|
||||||
|
Immediate(gpu_driver->GetTextureHandlerSize()));
|
||||||
|
|
||||||
|
const Node cv_node = GetCustomVariable(bindless_cv);
|
||||||
|
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
|
||||||
|
const std::size_t amend_index = DeclareAmend(amend_op);
|
||||||
|
AmendNodeCv(amend_index, code[cursor]);
|
||||||
|
// TODO Implement Bindless Index custom variable
|
||||||
|
auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
|
||||||
|
offset_inm->GetValue(), bindless_cv);
|
||||||
|
return {tracked, track};
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
|
||||||
|
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same
|
||||||
|
// register that it uses as operand
|
||||||
|
const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
|
||||||
|
if (!source) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
return TrackBindlessSampler(source, code, new_cursor);
|
||||||
|
}
|
||||||
|
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
|
||||||
|
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
|
||||||
|
if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
|
||||||
|
std::get<0>(found)) {
|
||||||
|
// Cbuf found in operand.
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
|
||||||
|
const auto& conditional_code = conditional->GetCode();
|
||||||
|
return TrackBindlessSampler(tracked, conditional_code,
|
||||||
|
static_cast<s64>(conditional_code.size()));
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
|
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
|
||||||
s64 cursor) const {
|
s64 cursor) const {
|
||||||
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
||||||
|
|
Loading…
Reference in a new issue