From 6e386a334b0e61305c7473f2a4676292d0e289bc Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 5 Jun 2018 22:45:22 -0400 Subject: [PATCH 1/3] gl_shader_decompiler: Refactor uniform handling to allow different decodings. --- src/video_core/engines/shader_bytecode.h | 16 +++++--- .../renderer_opengl/gl_shader_decompiler.cpp | 39 +++++++++---------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 4eb5073256..7a74771ce7 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -109,11 +109,6 @@ union Sampler { u64 value{}; }; -union Uniform { - BitField<20, 14, u64> offset; - BitField<34, 5, u64> index; -}; - } // namespace Shader } // namespace Tegra @@ -354,12 +349,21 @@ union Instruction { } } bra; + union { + BitField<20, 14, u64> offset; + BitField<34, 5, u64> index; + } cbuf34; + + union { + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + } cbuf36; + BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; Attribute attribute; - Uniform uniform; Sampler sampler; u64 value; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 7a59ecccf3..94c4858eab 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -20,7 +20,6 @@ using Tegra::Shader::OpCode; using Tegra::Shader::Register; using Tegra::Shader::Sampler; using Tegra::Shader::SubOp; -using Tegra::Shader::Uniform; constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; @@ -365,11 +364,9 @@ public: } /// Generates code representing a uniform (C buffer) register, interpreted as the input type. - std::string GetUniform(const Uniform& uniform, GLSLRegister::Type type) { - declr_const_buffers[uniform.index].MarkAsUsed(static_cast(uniform.index), - static_cast(uniform.offset), stage); - std::string value = - 'c' + std::to_string(uniform.index) + '[' + std::to_string(uniform.offset) + ']'; + std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { + declr_const_buffers[index].MarkAsUsed(index, offset, stage); + std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']'; if (type == GLSLRegister::Type::Float) { return value; @@ -380,12 +377,6 @@ public: } } - /// Generates code representing a uniform (C buffer) register, interpreted as the type of the - /// destination register. - std::string GetUniform(const Uniform& uniform, const Register& dest_reg) { - return GetUniform(uniform, regs[dest_reg].GetActiveType()); - } - /// Add declarations for registers void GenerateDeclarations() { for (const auto& reg : regs) { @@ -747,7 +738,8 @@ private: if (instr.is_b_gpr) { op_b += regs.GetRegisterAsFloat(instr.gpr20); } else { - op_b += regs.GetUniform(instr.uniform, instr.gpr0); + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Float); } } @@ -904,7 +896,8 @@ private: if (instr.is_b_gpr) { op_b += regs.GetRegisterAsInteger(instr.gpr20); } else { - op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Integer); } } @@ -936,7 +929,8 @@ private: if (instr.is_b_gpr) { op_b += regs.GetRegisterAsInteger(instr.gpr20); } else { - op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Integer); } } @@ -953,7 +947,8 @@ private: switch (opcode->GetId()) { case OpCode::Id::FFMA_CR: { - op_b += regs.GetUniform(instr.uniform, instr.gpr0); + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Float); op_c += regs.GetRegisterAsFloat(instr.gpr39); break; } @@ -964,7 +959,8 @@ private: } case OpCode::Id::FFMA_RC: { op_b += regs.GetRegisterAsFloat(instr.gpr39); - op_c += regs.GetUniform(instr.uniform, instr.gpr0); + op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Float); break; } case OpCode::Id::FFMA_IMM: { @@ -1175,7 +1171,8 @@ private: if (instr.is_b_gpr) { op_b += regs.GetRegisterAsFloat(instr.gpr20); } else { - op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Float); } } @@ -1216,7 +1213,8 @@ private: if (instr.is_b_gpr) { op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed); } else { - op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Integer); } using Tegra::Shader::Pred; @@ -1262,7 +1260,8 @@ private: if (instr.is_b_gpr) { op_b += regs.GetRegisterAsFloat(instr.gpr20); } else { - op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Float); } } From 4112aa68a673db4a8b73bc53270fdfa7f68532bf Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 5 Jun 2018 23:34:37 -0400 Subject: [PATCH 2/3] gl_shader_gen: Add uniform handling for indirect const buffer access. --- .../renderer_opengl/gl_rasterizer.cpp | 11 ++++++++++- .../renderer_opengl/gl_shader_decompiler.cpp | 15 +++++++++++++++ src/video_core/renderer_opengl/gl_shader_gen.h | 18 +++++++++++++++--- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0a33868b79..30be38dd4b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -654,7 +654,16 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr buffer_draw_state.bindpoint = current_bindpoint + bindpoint; boost::optional addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); - std::vector data(used_buffer.GetSize() * sizeof(float)); + + std::vector data; + if (used_buffer.IsIndirect()) { + // Buffer is accessed indirectly, so upload the entire thing + data.resize(buffer.size * sizeof(float)); + } else { + // Buffer is accessed directly, upload just what we use + data.resize(used_buffer.GetSize() * sizeof(float)); + } + Memory::ReadBlock(*addr, data.data(), data.size()); glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 94c4858eab..44c8bf4d49 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -377,6 +377,21 @@ public: } } + std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, + GLSLRegister::Type type) { + declr_const_buffers[index].MarkAsUsedIndirect(index, stage); + std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" + + GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]"; + + if (type == GLSLRegister::Type::Float) { + return value; + } else if (type == GLSLRegister::Type::Integer) { + return "floatBitsToInt(" + value + ')'; + } else { + UNREACHABLE(); + } + } + /// Add declarations for registers void GenerateDeclarations() { for (const auto& reg : regs) { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 458032b5c4..ad795610c5 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -22,17 +22,28 @@ class ConstBufferEntry { using Maxwell = Tegra::Engines::Maxwell3D::Regs; public: - void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) { + void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) { is_used = true; - this->index = index; + this->index = static_cast(index); + this->stage = stage; + max_offset = std::max(max_offset, static_cast(offset)); + } + + void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) { + is_used = true; + is_indirect = true; + this->index = static_cast(index); this->stage = stage; - max_offset = std::max(max_offset, offset); } bool IsUsed() const { return is_used; } + bool IsIndirect() const { + return is_indirect; + } + unsigned GetIndex() const { return index; } @@ -51,6 +62,7 @@ private: }; bool is_used{}; + bool is_indirect{}; unsigned index{}; unsigned max_offset{}; Maxwell::ShaderStage stage; From 4669f15f8be26ddf3c1cc02d8aac78656c41d361 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 5 Jun 2018 23:46:23 -0400 Subject: [PATCH 3/3] gl_shader_decompiler: Implement LD_C instruction. --- src/video_core/engines/shader_bytecode.h | 16 +++++++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 27 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7a74771ce7..af18c2d817 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -175,6 +175,15 @@ enum class FloatRoundingOp : u64 { Trunc = 3, }; +enum class UniformType : u64 { + UnsignedByte = 0, + SignedByte = 1, + UnsignedShort = 2, + SignedShort = 3, + Single = 4, + Double = 5, +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -252,6 +261,11 @@ union Instruction { BitField<49, 1, u64> negate_c; } ffma; + union { + BitField<48, 3, UniformType> type; + BitField<44, 2, u64> unknown; + } ld_c; + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; @@ -378,6 +392,7 @@ public: KIL, BRA, LD_A, + LD_C, ST_A, TEX, TEXQ, // Texture Query @@ -552,6 +567,7 @@ private: INST("111000110011----", Id::KIL, Type::Flow, "KIL"), INST("111000100100----", Id::BRA, Type::Flow, "BRA"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), + INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 44c8bf4d49..a703b9151b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1090,6 +1090,33 @@ private: attribute); break; } + case OpCode::Id::LD_C: { + ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented"); + + std::string op_a = + regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8, + GLSLRegister::Type::Float); + std::string op_b = + regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8, + GLSLRegister::Type::Float); + + switch (instr.ld_c.type.Value()) { + case Tegra::Shader::UniformType::Single: + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); + break; + + case Tegra::Shader::UniformType::Double: + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); + regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1); + break; + + default: + NGLOG_CRITICAL(HW_GPU, "Unhandled type: {}", + static_cast(instr.ld_c.type.Value())); + UNREACHABLE(); + } + break; + } case OpCode::Id::ST_A: { ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); regs.SetOutputAttributeToRegister(attribute, instr.attribute.fmt20.element,