Merge pull request #2306 from ReinUsesLisp/aoffi
shader_ir: Implement AOFFI for TEX and TLD4
This commit is contained in:
commit
8aaf418bd6
4 changed files with 207 additions and 73 deletions
|
@ -21,6 +21,8 @@
|
||||||
|
|
||||||
namespace OpenGL::GLShader {
|
namespace OpenGL::GLShader {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
using Tegra::Shader::Attribute;
|
using Tegra::Shader::Attribute;
|
||||||
using Tegra::Shader::AttributeUse;
|
using Tegra::Shader::AttributeUse;
|
||||||
using Tegra::Shader::Header;
|
using Tegra::Shader::Header;
|
||||||
|
@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
|
using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
|
||||||
using Operation = const OperationNode&;
|
using Operation = const OperationNode&;
|
||||||
|
|
||||||
|
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||||
|
|
||||||
|
struct TextureAoffi {};
|
||||||
|
using TextureArgument = std::pair<Type, Node>;
|
||||||
|
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
|
||||||
|
|
||||||
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
|
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
|
||||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||||
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
|
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
|
||||||
constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
|
constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
|
||||||
static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
|
static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
|
||||||
|
|
||||||
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
|
|
||||||
|
|
||||||
class ShaderWriter {
|
class ShaderWriter {
|
||||||
public:
|
public:
|
||||||
void AddExpression(std::string_view text) {
|
void AddExpression(std::string_view text) {
|
||||||
|
@ -91,7 +97,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Generates code to use for a swizzle operation.
|
/// Generates code to use for a swizzle operation.
|
||||||
static std::string GetSwizzle(u32 elem) {
|
std::string GetSwizzle(u32 elem) {
|
||||||
ASSERT(elem <= 3);
|
ASSERT(elem <= 3);
|
||||||
std::string swizzle = ".";
|
std::string swizzle = ".";
|
||||||
swizzle += "xyzw"[elem];
|
swizzle += "xyzw"[elem];
|
||||||
|
@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Translate topology
|
/// Translate topology
|
||||||
static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
|
std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
|
||||||
switch (topology) {
|
switch (topology) {
|
||||||
case Tegra::Shader::OutputTopology::PointList:
|
case Tegra::Shader::OutputTopology::PointList:
|
||||||
return "points";
|
return "points";
|
||||||
|
@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if an object has to be treated as precise
|
/// Returns true if an object has to be treated as precise
|
||||||
static bool IsPrecise(Operation operand) {
|
bool IsPrecise(Operation operand) {
|
||||||
const auto& meta = operand.GetMeta();
|
const auto& meta = operand.GetMeta();
|
||||||
|
|
||||||
if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
|
if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
|
||||||
|
@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool IsPrecise(Node node) {
|
bool IsPrecise(Node node) {
|
||||||
if (const auto operation = std::get_if<OperationNode>(node)) {
|
if (const auto operation = std::get_if<OperationNode>(node)) {
|
||||||
return IsPrecise(*operation);
|
return IsPrecise(*operation);
|
||||||
}
|
}
|
||||||
|
@ -723,8 +729,8 @@ private:
|
||||||
result_type));
|
result_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GenerateTexture(Operation operation, const std::string& func,
|
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
|
||||||
const std::vector<std::pair<Type, Node>>& extras) {
|
const std::vector<TextureIR>& extras) {
|
||||||
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
|
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
|
||||||
|
|
||||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||||
|
@ -734,11 +740,11 @@ private:
|
||||||
const bool has_array = meta->sampler.IsArray();
|
const bool has_array = meta->sampler.IsArray();
|
||||||
const bool has_shadow = meta->sampler.IsShadow();
|
const bool has_shadow = meta->sampler.IsShadow();
|
||||||
|
|
||||||
std::string expr = func;
|
std::string expr = "texture" + function_suffix;
|
||||||
expr += '(';
|
if (!meta->aoffi.empty()) {
|
||||||
expr += GetSampler(meta->sampler);
|
expr += "Offset";
|
||||||
expr += ", ";
|
}
|
||||||
|
expr += '(' + GetSampler(meta->sampler) + ", ";
|
||||||
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
|
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
|
||||||
expr += '(';
|
expr += '(';
|
||||||
for (std::size_t i = 0; i < count; ++i) {
|
for (std::size_t i = 0; i < count; ++i) {
|
||||||
|
@ -756,13 +762,26 @@ private:
|
||||||
}
|
}
|
||||||
expr += ')';
|
expr += ')';
|
||||||
|
|
||||||
for (const auto& extra_pair : extras) {
|
for (const auto& variant : extras) {
|
||||||
const auto [type, operand] = extra_pair;
|
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
|
||||||
if (operand == nullptr) {
|
expr += GenerateTextureArgument(*argument);
|
||||||
continue;
|
} else if (std::get_if<TextureAoffi>(&variant)) {
|
||||||
|
expr += GenerateTextureAoffi(meta->aoffi);
|
||||||
|
} else {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
expr += ", ";
|
|
||||||
|
|
||||||
|
return expr + ')';
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GenerateTextureArgument(TextureArgument argument) {
|
||||||
|
const auto [type, operand] = argument;
|
||||||
|
if (operand == nullptr) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string expr = ", ";
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case Type::Int:
|
case Type::Int:
|
||||||
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
|
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
|
||||||
|
@ -783,9 +802,34 @@ private:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return expr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return expr + ')';
|
std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
|
||||||
|
if (aoffi.empty()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
|
||||||
|
std::string expr = ", ";
|
||||||
|
expr += coord_constructors.at(aoffi.size() - 1);
|
||||||
|
expr += '(';
|
||||||
|
|
||||||
|
for (std::size_t index = 0; index < aoffi.size(); ++index) {
|
||||||
|
const auto operand{aoffi.at(index)};
|
||||||
|
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
|
||||||
|
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
|
||||||
|
// to be constant by the standard).
|
||||||
|
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||||
|
} else {
|
||||||
|
expr += "ftoi(" + Visit(operand) + ')';
|
||||||
|
}
|
||||||
|
if (index + 1 < aoffi.size()) {
|
||||||
|
expr += ", ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
expr += ')';
|
||||||
|
|
||||||
|
return expr;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Assign(Operation operation) {
|
std::string Assign(Operation operation) {
|
||||||
|
@ -1164,7 +1208,8 @@ private:
|
||||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||||
ASSERT(meta);
|
ASSERT(meta);
|
||||||
|
|
||||||
std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
|
std::string expr = GenerateTexture(
|
||||||
|
operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
|
||||||
if (meta->sampler.IsShadow()) {
|
if (meta->sampler.IsShadow()) {
|
||||||
expr = "vec4(" + expr + ')';
|
expr = "vec4(" + expr + ')';
|
||||||
}
|
}
|
||||||
|
@ -1175,7 +1220,8 @@ private:
|
||||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||||
ASSERT(meta);
|
ASSERT(meta);
|
||||||
|
|
||||||
std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
|
std::string expr = GenerateTexture(
|
||||||
|
operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
|
||||||
if (meta->sampler.IsShadow()) {
|
if (meta->sampler.IsShadow()) {
|
||||||
expr = "vec4(" + expr + ')';
|
expr = "vec4(" + expr + ')';
|
||||||
}
|
}
|
||||||
|
@ -1187,7 +1233,8 @@ private:
|
||||||
ASSERT(meta);
|
ASSERT(meta);
|
||||||
|
|
||||||
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
|
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
|
||||||
return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
|
return GenerateTexture(operation, "Gather",
|
||||||
|
{TextureArgument{type, meta->component}, TextureAoffi{}}) +
|
||||||
GetSwizzle(meta->element);
|
GetSwizzle(meta->element);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1217,8 +1264,8 @@ private:
|
||||||
ASSERT(meta);
|
ASSERT(meta);
|
||||||
|
|
||||||
if (meta->element < 2) {
|
if (meta->element < 2) {
|
||||||
return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
|
return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
|
||||||
" * vec2(256))" + GetSwizzle(meta->element) + "))";
|
GetSwizzle(meta->element) + "))";
|
||||||
}
|
}
|
||||||
return "0";
|
return "0";
|
||||||
}
|
}
|
||||||
|
@ -1571,6 +1618,8 @@ private:
|
||||||
ShaderWriter code;
|
ShaderWriter code;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
std::string GetCommonDeclarations() {
|
std::string GetCommonDeclarations() {
|
||||||
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
|
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
|
||||||
const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
|
const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
|
||||||
|
|
|
@ -7,7 +7,9 @@
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "common/bit_field.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "common/logging/log.h"
|
||||||
#include "video_core/engines/shader_bytecode.h"
|
#include "video_core/engines/shader_bytecode.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
|
||||||
|
@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
|
|
||||||
switch (opcode->get().GetId()) {
|
switch (opcode->get().GetId()) {
|
||||||
case OpCode::Id::TEX: {
|
case OpCode::Id::TEX: {
|
||||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
|
||||||
"AOFFI is not implemented");
|
|
||||||
|
|
||||||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
||||||
}
|
}
|
||||||
|
|
||||||
const TextureType texture_type{instr.tex.texture_type};
|
const TextureType texture_type{instr.tex.texture_type};
|
||||||
const bool is_array = instr.tex.array != 0;
|
const bool is_array = instr.tex.array != 0;
|
||||||
|
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||||
WriteTexInstructionFloat(
|
WriteTexInstructionFloat(
|
||||||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
|
bb, instr,
|
||||||
|
GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::TEXS: {
|
case OpCode::Id::TEXS: {
|
||||||
|
@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
case OpCode::Id::TLD4: {
|
case OpCode::Id::TLD4: {
|
||||||
ASSERT(instr.tld4.array == 0);
|
ASSERT(instr.tld4.array == 0);
|
||||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
|
|
||||||
"AOFFI is not implemented");
|
|
||||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||||
"NDV is not implemented");
|
"NDV is not implemented");
|
||||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||||
|
@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
const auto texture_type = instr.tld4.texture_type.Value();
|
const auto texture_type = instr.tld4.texture_type.Value();
|
||||||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||||
const bool is_array = instr.tld4.array != 0;
|
const bool is_array = instr.tld4.array != 0;
|
||||||
WriteTexInstructionFloat(bb, instr,
|
const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||||
GetTld4Code(instr, texture_type, depth_compare, is_array));
|
WriteTexInstructionFloat(
|
||||||
|
bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::TLD4S: {
|
case OpCode::Id::TLD4S: {
|
||||||
|
@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
|
MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
|
||||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
if (!instr.txq.IsComponentEnabled(element)) {
|
if (!instr.txq.IsComponentEnabled(element)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||||
const Node value =
|
const Node value =
|
||||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||||
SetTemporal(bb, indexer++, value);
|
SetTemporal(bb, indexer++, value);
|
||||||
|
@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
|
|
||||||
for (u32 element = 0; element < 2; ++element) {
|
for (u32 element = 0; element < 2; ++element) {
|
||||||
auto params = coords;
|
auto params = coords;
|
||||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||||
SetTemporal(bb, element, value);
|
SetTemporal(bb, element, value);
|
||||||
}
|
}
|
||||||
|
@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||||
|
|
||||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||||
Node array, Node depth_compare, u32 bias_offset) {
|
Node array, Node depth_compare, u32 bias_offset,
|
||||||
|
std::vector<Node> aoffi) {
|
||||||
const bool is_array = array;
|
const bool is_array = array;
|
||||||
const bool is_shadow = depth_compare;
|
const bool is_shadow = depth_compare;
|
||||||
|
|
||||||
|
@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto copy_coords = coords;
|
auto copy_coords = coords;
|
||||||
MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
|
MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
|
||||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||||
}
|
}
|
||||||
|
|
||||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
TextureProcessMode process_mode, bool depth_compare, bool is_array,
|
||||||
const bool lod_bias_enabled =
|
bool is_aoffi) {
|
||||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
const bool lod_bias_enabled{
|
||||||
|
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
|
||||||
|
|
||||||
|
u64 parameter_register = instr.gpr20.Value();
|
||||||
|
if (lod_bias_enabled) {
|
||||||
|
++parameter_register;
|
||||||
|
}
|
||||||
|
|
||||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
||||||
|
@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||||
|
|
||||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||||
|
|
||||||
|
std::vector<Node> aoffi;
|
||||||
|
if (is_aoffi) {
|
||||||
|
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
|
||||||
|
}
|
||||||
|
|
||||||
Node dc{};
|
Node dc{};
|
||||||
if (depth_compare) {
|
if (depth_compare) {
|
||||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||||
// or bias are used
|
// or bias are used
|
||||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
dc = GetRegister(parameter_register++);
|
||||||
dc = GetRegister(depth_register);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
|
||||||
}
|
}
|
||||||
|
|
||||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||||
|
@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||||
dc = GetRegister(depth_register);
|
dc = GetRegister(depth_register);
|
||||||
}
|
}
|
||||||
|
|
||||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||||
bool is_array) {
|
bool is_array, bool is_aoffi) {
|
||||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||||
|
@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||||
|
|
||||||
std::vector<Node> coords;
|
std::vector<Node> coords;
|
||||||
for (size_t i = 0; i < coord_count; ++i)
|
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||||
coords.push_back(GetRegister(coord_register + i));
|
coords.push_back(GetRegister(coord_register + i));
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 parameter_register = instr.gpr20.Value();
|
||||||
|
std::vector<Node> aoffi;
|
||||||
|
if (is_aoffi) {
|
||||||
|
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
Node dc{};
|
||||||
|
if (depth_compare) {
|
||||||
|
dc = GetRegister(parameter_register++);
|
||||||
|
}
|
||||||
|
|
||||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||||
|
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
|
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
|
||||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
|
MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
|
||||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
return values;
|
return values;
|
||||||
|
@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
||||||
return {coord_count, total_coord_count};
|
return {coord_count, total_coord_count};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
|
||||||
|
bool is_tld4) {
|
||||||
|
const auto [coord_offsets, size, wrap_value,
|
||||||
|
diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
|
||||||
|
if (is_tld4) {
|
||||||
|
return {{0, 8, 16}, 6, 32, 64};
|
||||||
|
} else {
|
||||||
|
return {{0, 4, 8}, 4, 8, 16};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const u32 mask = (1U << size) - 1;
|
||||||
|
|
||||||
|
std::vector<Node> aoffi;
|
||||||
|
aoffi.reserve(coord_count);
|
||||||
|
|
||||||
|
const auto aoffi_immediate{
|
||||||
|
TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
|
||||||
|
if (!aoffi_immediate) {
|
||||||
|
// Variable access, not supported on AMD.
|
||||||
|
LOG_WARNING(HW_GPU,
|
||||||
|
"AOFFI constant folding failed, some hardware might have graphical issues");
|
||||||
|
for (std::size_t coord = 0; coord < coord_count; ++coord) {
|
||||||
|
const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
|
||||||
|
const Node condition =
|
||||||
|
Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
|
||||||
|
const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
|
||||||
|
aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
|
||||||
|
}
|
||||||
|
return aoffi;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (std::size_t coord = 0; coord < coord_count; ++coord) {
|
||||||
|
s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
|
||||||
|
if (value >= wrap_value) {
|
||||||
|
value -= diff_value;
|
||||||
|
}
|
||||||
|
aoffi.push_back(Immediate(value));
|
||||||
|
}
|
||||||
|
return aoffi;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon::Shader
|
} // namespace VideoCommon::Shader
|
|
@ -7,6 +7,7 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <optional>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
@ -290,6 +291,7 @@ struct MetaTexture {
|
||||||
const Sampler& sampler;
|
const Sampler& sampler;
|
||||||
Node array{};
|
Node array{};
|
||||||
Node depth_compare{};
|
Node depth_compare{};
|
||||||
|
std::vector<Node> aoffi;
|
||||||
Node bias{};
|
Node bias{};
|
||||||
Node lod{};
|
Node lod{};
|
||||||
Node component{};
|
Node component{};
|
||||||
|
@ -741,14 +743,14 @@ private:
|
||||||
|
|
||||||
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
||||||
bool is_array);
|
bool is_array, bool is_aoffi);
|
||||||
|
|
||||||
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
||||||
bool is_array);
|
bool is_array);
|
||||||
|
|
||||||
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
bool depth_compare, bool is_array);
|
bool depth_compare, bool is_array, bool is_aoffi);
|
||||||
|
|
||||||
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
bool is_array);
|
bool is_array);
|
||||||
|
@ -757,9 +759,11 @@ private:
|
||||||
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
|
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
|
||||||
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
|
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
|
||||||
|
|
||||||
|
std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
|
||||||
|
|
||||||
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
|
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
|
||||||
Node array, Node depth_compare, u32 bias_offset);
|
Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
|
||||||
|
|
||||||
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
|
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
|
||||||
u64 byte_height);
|
u64 byte_height);
|
||||||
|
@ -773,6 +777,8 @@ private:
|
||||||
|
|
||||||
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
|
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
|
||||||
|
|
||||||
|
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
|
||||||
|
|
||||||
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
|
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
|
||||||
|
|
||||||
template <typename... T>
|
template <typename... T>
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <variant>
|
#include <variant>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
namespace VideoCommon::Shader {
|
||||||
|
@ -14,7 +15,7 @@ namespace {
|
||||||
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||||
OperationCode operation_code) {
|
OperationCode operation_code) {
|
||||||
for (; cursor >= 0; --cursor) {
|
for (; cursor >= 0; --cursor) {
|
||||||
const Node node = code[cursor];
|
const Node node = code.at(cursor);
|
||||||
if (const auto operation = std::get_if<OperationNode>(node)) {
|
if (const auto operation = std::get_if<OperationNode>(node)) {
|
||||||
if (operation->GetCode() == operation_code)
|
if (operation->GetCode() == operation_code)
|
||||||
return {node, cursor};
|
return {node, cursor};
|
||||||
|
@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
|
||||||
|
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
|
||||||
|
// that it uses as operand
|
||||||
|
const auto [found, found_cursor] =
|
||||||
|
TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
|
||||||
|
if (!found) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if (const auto immediate = std::get_if<ImmediateNode>(found)) {
|
||||||
|
return immediate->GetValue();
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
||||||
s64 cursor) {
|
s64 cursor) {
|
||||||
for (; cursor >= 0; --cursor) {
|
for (; cursor >= 0; --cursor) {
|
||||||
|
|
Loading…
Reference in a new issue