From 0315fe8c3dc8cb267284b061a20b85c09cd5d98b Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 5 Apr 2018 21:44:22 -0400 Subject: [PATCH 01/27] bit_field: Make all methods constexpr. --- src/common/bit_field.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 0cc0a1be05..5638bdbba2 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -115,7 +115,7 @@ private: // assignment would copy the full storage value, rather than just the bits // relevant to this particular bit field. // We don't delete it because we want BitField to be trivially copyable. - BitField& operator=(const BitField&) = default; + constexpr BitField& operator=(const BitField&) = default; // StorageType is T for non-enum types and the underlying type of T if // T is an enumeration. Note that T is wrapped within an enable_if in the @@ -166,20 +166,20 @@ public: // so that we can use this within unions constexpr BitField() = default; - FORCE_INLINE operator T() const { + constexpr FORCE_INLINE operator T() const { return Value(); } - FORCE_INLINE void Assign(const T& value) { + constexpr FORCE_INLINE void Assign(const T& value) { storage = (storage & ~mask) | FormatValue(value); } - FORCE_INLINE T Value() const { + constexpr T Value() const { return ExtractValue(storage); } // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015 - FORCE_INLINE bool ToBool() const { + constexpr FORCE_INLINE bool ToBool() const { return Value() != 0; } From 4e7e0f81125f30a108b90c5ee6cae9b248164d9e Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 4 Apr 2018 21:43:40 -0400 Subject: [PATCH 02/27] shader_bytecode: Add initial module for shader decoding. --- src/video_core/CMakeLists.txt | 1 + src/video_core/engines/shader_bytecode.h | 297 +++++++++++++++++++++++ 2 files changed, 298 insertions(+) create mode 100644 src/video_core/engines/shader_bytecode.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index a710c4bc53..4defb57863 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -9,6 +9,7 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_compute.cpp engines/maxwell_compute.h + engines/shader_bytecode.h gpu.cpp gpu.h macro_interpreter.cpp diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h new file mode 100644 index 0000000000..6660742ccf --- /dev/null +++ b/src/video_core/engines/shader_bytecode.h @@ -0,0 +1,297 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/bit_field.h" + +namespace Tegra { +namespace Shader { + +struct Register { + constexpr Register() = default; + + constexpr Register(u64 value) : value(value) {} + + constexpr u64 GetIndex() const { + return value; + } + + constexpr operator u64() const { + return value; + } + + template + constexpr u64 operator-(const T& oth) const { + return value - oth; + } + + template + constexpr u64 operator&(const T& oth) const { + return value & oth; + } + + constexpr u64 operator&(const Register& oth) const { + return value & oth.value; + } + + constexpr u64 operator~() const { + return ~value; + } + +private: + u64 value; +}; + +union Attribute { + constexpr Attribute() = default; + + constexpr Attribute(u64 value) : value(value) {} + + enum class Index : u64 { + Position = 7, + Attribute_0 = 8, + }; + + constexpr Index GetIndex() const { + return index; + } + +public: + BitField<24, 6, Index> index; + BitField<22, 2, u64> element; + BitField<39, 8, u64> reg; + BitField<47, 3, u64> size; + u64 value; +}; + +union Uniform { + BitField<20, 14, u64> offset; + BitField<34, 5, u64> index; +}; + +union OpCode { + enum class Id : u64 { + TEXS = 0x6C, + IPA = 0xE0, + FFMA_IMM = 0x65, + FFMA_CR = 0x93, + FFMA_RC = 0xA3, + FFMA_RR = 0xB3, + + FADD_C = 0x98B, + FMUL_C = 0x98D, + MUFU = 0xA10, + FADD_R = 0xB8B, + FMUL_R = 0xB8D, + LD_A = 0x1DFB, + ST_A = 0x1DFE, + + FSETP_R = 0x5BB, + FSETP_C = 0x4BB, + EXIT = 0xE30, + KIL = 0xE33, + + FMUL_IMM = 0x70D, + FMUL_IMM_x = 0x72D, + FADD_IMM = 0x70B, + FADD_IMM_x = 0x72B, + }; + + enum class Type { + Trivial, + Arithmetic, + Flow, + Memory, + Unknown, + }; + + struct Info { + Type type; + std::string name; + }; + + constexpr OpCode() = default; + + constexpr OpCode(Id value) : value(static_cast(value)) {} + + constexpr OpCode(u64 value) : value{value} {} + + constexpr Id EffectiveOpCode() const { + switch (op1) { + case Id::TEXS: + return op1; + } + + switch (op2) { + case Id::IPA: + return op2; + } + + switch (op3) { + case Id::FFMA_IMM: + case Id::FFMA_CR: + case Id::FFMA_RC: + case Id::FFMA_RR: + return op3; + } + + switch (op4) { + case Id::EXIT: + case Id::FSETP_R: + case Id::FSETP_C: + case Id::KIL: + return op4; + } + + switch (op5) { + case Id::MUFU: + case Id::LD_A: + case Id::ST_A: + case Id::FADD_R: + case Id::FADD_C: + case Id::FMUL_R: + case Id::FMUL_C: + return op5; + + case Id::FMUL_IMM: + case Id::FMUL_IMM_x: + return Id::FMUL_IMM; + + case Id::FADD_IMM: + case Id::FADD_IMM_x: + return Id::FADD_IMM; + } + + return static_cast(value); + } + + static const Info& GetInfo(const OpCode& opcode) { + static const std::map info_table{BuildInfoTable()}; + const auto& search{info_table.find(opcode.EffectiveOpCode())}; + if (search != info_table.end()) { + return search->second; + } + + static const Info unknown{Type::Unknown, "UNK"}; + return unknown; + } + + constexpr operator Id() const { + return static_cast(value); + } + + constexpr OpCode operator<<(size_t bits) const { + return value << bits; + } + + constexpr OpCode operator>>(size_t bits) const { + return value >> bits; + } + + template + constexpr u64 operator-(const T& oth) const { + return value - oth; + } + + constexpr u64 operator&(const OpCode& oth) const { + return value & oth.value; + } + + constexpr u64 operator~() const { + return ~value; + } + + static std::map BuildInfoTable() { + std::map info_table; + info_table[Id::TEXS] = {Type::Memory, "texs"}; + info_table[Id::LD_A] = {Type::Memory, "ld_a"}; + info_table[Id::ST_A] = {Type::Memory, "st_a"}; + info_table[Id::IPA] = {Type::Arithmetic, "ipa"}; + info_table[Id::MUFU] = {Type::Arithmetic, "mufu"}; + info_table[Id::FFMA_IMM] = {Type::Arithmetic, "ffma_imm"}; + info_table[Id::FFMA_CR] = {Type::Arithmetic, "ffma_cr"}; + info_table[Id::FFMA_RC] = {Type::Arithmetic, "ffma_rc"}; + info_table[Id::FFMA_RR] = {Type::Arithmetic, "ffma_rr"}; + info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"}; + info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"}; + info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"}; + info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"}; + info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"}; + info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"}; + info_table[Id::EXIT] = {Type::Trivial, "exit"}; + return info_table; + } + + BitField<57, 7, Id> op1; + BitField<56, 8, Id> op2; + BitField<55, 9, Id> op3; + BitField<52, 12, Id> op4; + BitField<51, 13, Id> op5; + u64 value; +}; +static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size"); + +} // namespace Shader +} // namespace Tegra + +namespace std { + +template <> +struct make_unsigned { + using type = Tegra::Shader::Attribute; +}; + +template <> +struct make_unsigned { + using type = Tegra::Shader::Register; +}; + +template <> +struct make_unsigned { + using type = Tegra::Shader::OpCode; +}; + +} // namespace std + +namespace Tegra { +namespace Shader { + +enum class Pred : u64 { + UnusedIndex = 0x7, + NeverExecute = 0xf, +}; + +#pragma pack(1) +union Instruction { + Instruction& operator=(const Instruction& instr) { + hex = instr.hex; + return *this; + } + + OpCode opcode; + BitField<0, 8, Register> gpr1; + BitField<8, 8, Register> gpr2; + BitField<16, 4, Pred> pred; + BitField<39, 8, Register> gpr3; + BitField<45, 1, u64> nb; + BitField<46, 1, u64> aa; + BitField<48, 1, u64> na; + BitField<49, 1, u64> ab; + BitField<50, 1, u64> ad; + Attribute attribute; + Uniform uniform; + + u64 hex; +}; +static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); +static_assert(std::is_standard_layout::value, + "Structure does not have standard layout"); + +#pragma pack() + +} // namespace Shader +} // namespace Tegra From ed7e597b4494f770f4907560af0aa778d7762226 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 4 Apr 2018 21:44:35 -0400 Subject: [PATCH 03/27] gl_shader_decompiler: Add skeleton code from Citra for shader analysis. --- .../renderer_opengl/gl_shader_decompiler.cpp | 167 ++++++++++++++---- .../renderer_opengl/gl_shader_decompiler.h | 19 +- 2 files changed, 142 insertions(+), 44 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 564ea8f9e5..3fc420649f 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2,57 +2,158 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include #include -#include #include "common/assert.h" #include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" -namespace Maxwell3D { +namespace Tegra { namespace Shader { namespace Decompiler { constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; -class Impl { +class DecompileFail : public std::runtime_error { public: - Impl(const std::array& program_code, - const std::array& swizzle_data, u32 main_offset, - const std::function& inputreg_getter, - const std::function& outputreg_getter, bool sanitize_mul, - const std::string& emit_cb, const std::string& setemit_cb) - : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset), - inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter), - sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} + using std::runtime_error::runtime_error; +}; - std::string Decompile() { - UNREACHABLE(); - return {}; +/// Describes the behaviour of code path of a given entry point and a return point. +enum class ExitMethod { + Undetermined, ///< Internal value. Only occur when analyzing JMP loop. + AlwaysReturn, ///< All code paths reach the return point. + Conditional, ///< Code path reaches the return point or an END instruction conditionally. + AlwaysEnd, ///< All code paths reach a END instruction. +}; + +/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction. +struct Subroutine { + /// Generates a name suitable for GLSL source code. + std::string GetName() const { + return "sub_" + std::to_string(begin) + "_" + std::to_string(end); + } + + u32 begin; ///< Entry point of the subroutine. + u32 end; ///< Return point of the subroutine. + ExitMethod exit_method; ///< Exit method of the subroutine. + std::set labels; ///< Addresses refereced by JMP instructions. + + bool operator<(const Subroutine& rhs) const { + return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); + } +}; + +/// Analyzes shader code and produces a set of subroutines. +class ControlFlowAnalyzer { +public: + ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) + : program_code(program_code) { + + // Recursively finds all subroutines. + const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); + if (program_main.exit_method != ExitMethod::AlwaysEnd) + throw DecompileFail("Program does not always end"); + } + + std::set GetSubroutines() { + return std::move(subroutines); } private: - const std::array& program_code; - const std::array& swizzle_data; - u32 main_offset; - const std::function& inputreg_getter; - const std::function& outputreg_getter; - bool sanitize_mul; - const std::string& emit_cb; - const std::string& setemit_cb; + const ProgramCode& program_code; + std::set subroutines; + std::map, ExitMethod> exit_method_map; + + /// Adds and analyzes a new subroutine if it is not added yet. + const Subroutine& AddSubroutine(u32 begin, u32 end) { + auto iter = subroutines.find(Subroutine{begin, end}); + if (iter != subroutines.end()) + return *iter; + + Subroutine subroutine{begin, end}; + subroutine.exit_method = Scan(begin, end, subroutine.labels); + if (subroutine.exit_method == ExitMethod::Undetermined) + throw DecompileFail("Recursive function detected"); + return *subroutines.insert(std::move(subroutine)).first; + } + + /// Scans a range of code for labels and determines the exit method. + ExitMethod Scan(u32 begin, u32 end, std::set& labels) { + auto [iter, inserted] = + exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); + ExitMethod& exit_method = iter->second; + if (!inserted) + return exit_method; + + for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { + const Instruction instr = {program_code[offset]}; + switch (instr.opcode.Value().EffectiveOpCode()) { + case OpCode::Id::EXIT: { + return exit_method = ExitMethod::AlwaysEnd; + } + } + } + return exit_method = ExitMethod::AlwaysReturn; + } }; -std::string DecompileProgram(const std::array& program_code, - const std::array& swizzle_data, - u32 main_offset, - const std::function& inputreg_getter, - const std::function& outputreg_getter, - bool sanitize_mul, const std::string& emit_cb, - const std::string& setemit_cb) { - Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, - sanitize_mul, emit_cb, setemit_cb); - return impl.Decompile(); +class ShaderWriter { +public: + void AddLine(const std::string& text) { + DEBUG_ASSERT(scope >= 0); + if (!text.empty()) { + shader_source += std::string(static_cast(scope) * 4, ' '); + } + shader_source += text + '\n'; + } + + std::string GetResult() { + return std::move(shader_source); + } + + int scope = 0; + +private: + std::string shader_source; +}; + +class GLSLGenerator { +public: + GLSLGenerator(const std::set& subroutines, const ProgramCode& program_code, + u32 main_offset) + : subroutines(subroutines), program_code(program_code), main_offset(main_offset) { + + Generate(); + } + + std::string GetShaderCode() { + return shader.GetResult(); + } + +private: + const std::set& subroutines; + const ProgramCode& program_code; + const u32 main_offset; + + ShaderWriter shader; + + void Generate() {} +}; + +boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset) { + try { + auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); + GLSLGenerator generator(subroutines, program_code, main_offset); + return generator.GetShaderCode(); + } catch (const DecompileFail& exception) { + LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what()); + } + return boost::none; } } // namespace Decompiler } // namespace Shader -} // namespace Maxwell3D +} // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 02ebfcbe8a..628f02c931 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -5,23 +5,20 @@ #include #include #include +#include #include "common/common_types.h" -namespace Maxwell3D { +namespace Tegra { namespace Shader { namespace Decompiler { -constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000}; -constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000}; +constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100}; +constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100}; -std::string DecompileProgram(const std::array& program_code, - const std::array& swizzle_data, - u32 main_offset, - const std::function& inputreg_getter, - const std::function& outputreg_getter, - bool sanitize_mul, const std::string& emit_cb = "", - const std::string& setemit_cb = ""); +using ProgramCode = std::array; + +boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset); } // namespace Decompiler } // namespace Shader -} // namespace Maxwell3D +} // namespace Tegra From dbfd106ba023e408bdb733f39af30134712b97bf Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 6 Apr 2018 23:53:19 -0400 Subject: [PATCH 04/27] gl_resource_manager: Grab latest upstream. --- .../renderer_opengl/gl_resource_manager.h | 116 +++++++++++++----- 1 file changed, 86 insertions(+), 30 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 7da5e74d18..557f73a513 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -13,14 +13,16 @@ class OGLTexture : private NonCopyable { public: OGLTexture() = default; - OGLTexture(OGLTexture&& o) { - std::swap(handle, o.handle); - } + + OGLTexture(OGLTexture&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLTexture() { Release(); } + OGLTexture& operator=(OGLTexture&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -46,14 +48,16 @@ public: class OGLSampler : private NonCopyable { public: OGLSampler() = default; - OGLSampler(OGLSampler&& o) { - std::swap(handle, o.handle); - } + + OGLSampler(OGLSampler&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLSampler() { Release(); } + OGLSampler& operator=(OGLSampler&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -79,25 +83,71 @@ public: class OGLShader : private NonCopyable { public: OGLShader() = default; - OGLShader(OGLShader&& o) { - std::swap(handle, o.handle); - } + + OGLShader(OGLShader&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLShader() { Release(); } + OGLShader& operator=(OGLShader&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } - /// Creates a new internal OpenGL resource and stores the handle - void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader, - const std::vector& feedback_vars = {}, - bool separable_program = false) { + void Create(const char* source, GLenum type) { if (handle != 0) return; - handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars, - separable_program); + if (source == nullptr) + return; + handle = GLShader::LoadShader(source, type); + } + + void Release() { + if (handle == 0) + return; + glDeleteShader(handle); + handle = 0; + } + + GLuint handle = 0; +}; + +class OGLProgram : private NonCopyable { +public: + OGLProgram() = default; + + OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {} + + ~OGLProgram() { + Release(); + } + + OGLProgram& operator=(OGLProgram&& o) { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + template + void Create(bool separable_program = false, T... shaders) { + if (handle != 0) + return; + handle = GLShader::LoadProgram(separable_program, shaders...); + } + + /// Creates a new internal OpenGL resource and stores the handle + void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, + bool separable_program = false) { + OGLShader vert, geo, frag; + if (vert_shader) + vert.Create(vert_shader, GL_VERTEX_SHADER); + if (geo_shader) + geo.Create(geo_shader, GL_GEOMETRY_SHADER); + if (frag_shader) + frag.Create(frag_shader, GL_FRAGMENT_SHADER); + Create(separable_program, vert.handle, geo.handle, frag.handle); } /// Deletes the internal OpenGL resource @@ -148,14 +198,16 @@ public: class OGLBuffer : private NonCopyable { public: OGLBuffer() = default; - OGLBuffer(OGLBuffer&& o) { - std::swap(handle, o.handle); - } + + OGLBuffer(OGLBuffer&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLBuffer() { Release(); } + OGLBuffer& operator=(OGLBuffer&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -214,14 +266,16 @@ public: class OGLVertexArray : private NonCopyable { public: OGLVertexArray() = default; - OGLVertexArray(OGLVertexArray&& o) { - std::swap(handle, o.handle); - } + + OGLVertexArray(OGLVertexArray&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLVertexArray() { Release(); } + OGLVertexArray& operator=(OGLVertexArray&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -247,14 +301,16 @@ public: class OGLFramebuffer : private NonCopyable { public: OGLFramebuffer() = default; - OGLFramebuffer(OGLFramebuffer&& o) { - std::swap(handle, o.handle); - } + + OGLFramebuffer(OGLFramebuffer&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLFramebuffer() { Release(); } + OGLFramebuffer& operator=(OGLFramebuffer&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } From 4f2b2d0bc5e56a5f1e05a2d1cae52d8890fa3ce9 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 6 Apr 2018 23:54:44 -0400 Subject: [PATCH 05/27] gl_shader_util: Grab latest upstream. --- .../renderer_opengl/gl_shader_util.cpp | 169 +++--------------- .../renderer_opengl/gl_shader_util.h | 56 +++++- 2 files changed, 75 insertions(+), 150 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a6c6204d52..8568fface0 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -10,156 +10,41 @@ namespace GLShader { -GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, - const char* fragment_shader, const std::vector& feedback_vars, - bool separable_program) { - // Create the shaders - GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0; - GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0; - GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0; +GLuint LoadShader(const char* source, GLenum type) { + const char* debug_type; + switch (type) { + case GL_VERTEX_SHADER: + debug_type = "vertex"; + break; + case GL_GEOMETRY_SHADER: + debug_type = "geometry"; + break; + case GL_FRAGMENT_SHADER: + debug_type = "fragment"; + break; + default: + UNREACHABLE(); + } + GLuint shader_id = glCreateShader(type); + glShaderSource(shader_id, 1, &source, nullptr); + NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); + glCompileShader(shader_id); GLint result = GL_FALSE; - int info_log_length; - - if (vertex_shader) { - // Compile Vertex Shader - LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); - - glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); - glCompileShader(vertex_shader_id); - - // Check Vertex Shader - glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector vertex_shader_error(info_log_length); - glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s", - &vertex_shader_error[0]); - } - } - } - - if (geometry_shader) { - // Compile Geometry Shader - LOG_DEBUG(Render_OpenGL, "Compiling geometry shader..."); - - glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr); - glCompileShader(geometry_shader_id); - - // Check Geometry Shader - glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector geometry_shader_error(info_log_length); - glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr, - &geometry_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s", - &geometry_shader_error[0]); - } - } - } - - if (fragment_shader) { - // Compile Fragment Shader - LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); - - glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); - glCompileShader(fragment_shader_id); - - // Check Fragment Shader - glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector fragment_shader_error(info_log_length); - glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, - &fragment_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s", - &fragment_shader_error[0]); - } - } - } - - // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); - - GLuint program_id = glCreateProgram(); - if (vertex_shader) { - glAttachShader(program_id, vertex_shader_id); - } - if (geometry_shader) { - glAttachShader(program_id, geometry_shader_id); - } - if (fragment_shader) { - glAttachShader(program_id, fragment_shader_id); - } - - if (!feedback_vars.empty()) { - auto varyings = feedback_vars; - glTransformFeedbackVaryings(program_id, static_cast(feedback_vars.size()), - &varyings[0], GL_INTERLEAVED_ATTRIBS); - } - - if (separable_program) { - glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); - } - - glLinkProgram(program_id); - - // Check the program - glGetProgramiv(program_id, GL_LINK_STATUS, &result); - glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); + GLint info_log_length; + glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); + glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); if (info_log_length > 1) { - std::vector program_error(info_log_length); - glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); + std::string shader_error(info_log_length, ' '); + glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); + NGLOG_DEBUG(Render_OpenGL, "{}", shader_error); } else { - LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); + NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); } } - - // If the program linking failed at least one of the shaders was probably bad - if (result == GL_FALSE) { - if (vertex_shader) { - LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); - } - if (geometry_shader) { - LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); - } - if (fragment_shader) { - LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); - } - } - ASSERT_MSG(result == GL_TRUE, "Shader not linked"); - - if (vertex_shader) { - glDetachShader(program_id, vertex_shader_id); - glDeleteShader(vertex_shader_id); - } - if (geometry_shader) { - glDetachShader(program_id, geometry_shader_id); - glDeleteShader(geometry_shader_id); - } - if (fragment_shader) { - glDetachShader(program_id, fragment_shader_id); - glDeleteShader(fragment_shader_id); - } - - return program_id; + return shader_id; } } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index fc7b5e080f..5a0008703d 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -10,14 +10,54 @@ namespace GLShader { /** - * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param vertex_shader String of the GLSL vertex shader program - * @param geometry_shader String of the GLSL geometry shader program - * @param fragment_shader String of the GLSL fragment shader program - * @returns Handle of the newly created OpenGL shader object + * Utility function to create and compile an OpenGL GLSL shader + * @param source String of the GLSL shader program + * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) */ -GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, - const char* fragment_shader, const std::vector& feedback_vars = {}, - bool separable_program = false); +GLuint LoadShader(const char* source, GLenum type); + +/** + * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) + * @param separable_program whether to create a separable program + * @param shaders ID of shaders to attach to the program + * @returns Handle of the newly created OpenGL program object + */ +template +GLuint LoadProgram(bool separable_program, T... shaders) { + // Link the program + NGLOG_DEBUG(Render_OpenGL, "Linking program..."); + + GLuint program_id = glCreateProgram(); + + ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); + + if (separable_program) { + glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); + } + + glLinkProgram(program_id); + + // Check the program + GLint result = GL_FALSE; + GLint info_log_length; + glGetProgramiv(program_id, GL_LINK_STATUS, &result); + glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); + + if (info_log_length > 1) { + std::string program_error(info_log_length, ' '); + glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); + if (result == GL_TRUE) { + NGLOG_DEBUG(Render_OpenGL, "{}", program_error); + } else { + NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); + } + } + + ASSERT_MSG(result == GL_TRUE, "Shader not linked"); + + ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); + + return program_id; +} } // namespace GLShader From da1114ca59ab2fbd4a1020db79f98b75cf4a6d5a Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 6 Apr 2018 23:56:42 -0400 Subject: [PATCH 06/27] renderer_opengl: Use OGLProgram instead of OGLShader. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 2 +- src/video_core/renderer_opengl/gl_rasterizer_cache.h | 2 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- src/video_core/renderer_opengl/renderer_opengl.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f217a265ba..fc69fbe5a7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -573,7 +573,7 @@ void main() { current_shader = &test_shader; if (has_ARB_separate_shader_objects) { - test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); + test_shader.shader.CreateFromSource(vertex_shader, nullptr, fragment_shader, true); glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); } else { UNREACHABLE(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d868bf421d..989c62d0d7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -45,7 +45,7 @@ public: /// OpenGL shader generated for a given Maxwell register state struct MaxwellShader { /// OpenGL shader resource - OGLShader shader; + OGLProgram shader; }; struct VertexShader { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5cbafa2e7d..213b20a21a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -818,7 +818,7 @@ void main() { color = texelFetch(tbo, tbo_offset).rabg; } )"; - d24s8_abgr_shader.Create(vs_source, nullptr, fs_source); + d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); OpenGLState state = OpenGLState::GetCurState(); GLuint old_program = state.draw.shader_program; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 06524fc59e..e7ce506cf5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -334,7 +334,7 @@ private: OGLVertexArray attributeless_vao; OGLBuffer d24s8_abgr_buffer; GLsizeiptr d24s8_abgr_buffer_size; - OGLShader d24s8_abgr_shader; + OGLProgram d24s8_abgr_shader; GLint d24s8_abgr_tbo_size_u_id; GLint d24s8_abgr_viewport_u_id; }; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 78b50b227e..2fabf5cabe 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -210,7 +210,7 @@ void RendererOpenGL::InitOpenGLObjects() { 0.0f); // Link shaders and get variable locations - shader.Create(vertex_shader, nullptr, fragment_shader); + shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); state.draw.shader_program = shader.handle; state.Apply(); uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index c52f40037a..2cc6d9a001 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -72,7 +72,7 @@ private: // OpenGL object IDs OGLVertexArray vertex_array; OGLBuffer vertex_buffer; - OGLShader shader; + OGLProgram shader; /// Display information for Switch screen ScreenInfo screen_info; From 45fd7c4a37d4b890f98f8a0a9373e57c0830cc8d Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 00:48:22 -0400 Subject: [PATCH 07/27] common: Port cityhash code from Citra. --- src/common/CMakeLists.txt | 3 +- src/common/cityhash.cpp | 340 ++++++++++++++++++++++++++++++++++++++ src/common/cityhash.h | 110 ++++++++++++ src/common/hash.cpp | 141 ---------------- src/common/hash.h | 55 +++++- 5 files changed, 502 insertions(+), 147 deletions(-) create mode 100644 src/common/cityhash.cpp create mode 100644 src/common/cityhash.h delete mode 100644 src/common/hash.cpp diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2ba1da1957..d6eb9055b4 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -32,6 +32,8 @@ add_library(common STATIC break_points.cpp break_points.h chunk_file.h + cityhash.cpp + cityhash.h code_block.h color.h common_funcs.h @@ -39,7 +41,6 @@ add_library(common STATIC common_types.h file_util.cpp file_util.h - hash.cpp hash.h linear_disk_cache.h logging/backend.cpp diff --git a/src/common/cityhash.cpp b/src/common/cityhash.cpp new file mode 100644 index 0000000000..de31ffbd81 --- /dev/null +++ b/src/common/cityhash.cpp @@ -0,0 +1,340 @@ +// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// This file provides CityHash64() and related functions. +// +// It's probably possible to create even faster hash functions by +// writing a program that systematically explores some of the space of +// possible hash functions, by using SIMD instructions, or by +// compromising on hash quality. + +#include +#include // for memcpy and memset +#include "cityhash.h" +#include "common/swap.h" + +// #include "config.h" +#ifdef __GNUC__ +#define HAVE_BUILTIN_EXPECT 1 +#endif +#ifdef COMMON_BIG_ENDIAN +#define WORDS_BIGENDIAN 1 +#endif + +using namespace std; + +typedef uint8_t uint8; +typedef uint32_t uint32; +typedef uint64_t uint64; + +namespace Common { + +static uint64 UNALIGNED_LOAD64(const char* p) { + uint64 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +static uint32 UNALIGNED_LOAD32(const char* p) { + uint32 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +#ifdef WORDS_BIGENDIAN +#define uint32_in_expected_order(x) (swap32(x)) +#define uint64_in_expected_order(x) (swap64(x)) +#else +#define uint32_in_expected_order(x) (x) +#define uint64_in_expected_order(x) (x) +#endif + +#if !defined(LIKELY) +#if HAVE_BUILTIN_EXPECT +#define LIKELY(x) (__builtin_expect(!!(x), 1)) +#else +#define LIKELY(x) (x) +#endif +#endif + +static uint64 Fetch64(const char* p) { + return uint64_in_expected_order(UNALIGNED_LOAD64(p)); +} + +static uint32 Fetch32(const char* p) { + return uint32_in_expected_order(UNALIGNED_LOAD32(p)); +} + +// Some primes between 2^63 and 2^64 for various uses. +static const uint64 k0 = 0xc3a5c85c97cb3127ULL; +static const uint64 k1 = 0xb492b66fbe98f273ULL; +static const uint64 k2 = 0x9ae16a3b2f90404fULL; + +// Bitwise right rotate. Normally this will compile to a single +// instruction, especially if the shift is a manifest constant. +static uint64 Rotate(uint64 val, int shift) { + // Avoid shifting by 64: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); +} + +static uint64 ShiftMix(uint64 val) { + return val ^ (val >> 47); +} + +static uint64 HashLen16(uint64 u, uint64 v) { + return Hash128to64(uint128(u, v)); +} + +static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) { + // Murmur-inspired hashing. + uint64 a = (u ^ v) * mul; + a ^= (a >> 47); + uint64 b = (v ^ a) * mul; + b ^= (b >> 47); + b *= mul; + return b; +} + +static uint64 HashLen0to16(const char* s, size_t len) { + if (len >= 8) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch64(s) + k2; + uint64 b = Fetch64(s + len - 8); + uint64 c = Rotate(b, 37) * mul + a; + uint64 d = (Rotate(a, 25) + b) * mul; + return HashLen16(c, d, mul); + } + if (len >= 4) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch32(s); + return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); + } + if (len > 0) { + uint8 a = s[0]; + uint8 b = s[len >> 1]; + uint8 c = s[len - 1]; + uint32 y = static_cast(a) + (static_cast(b) << 8); + uint32 z = static_cast(len) + (static_cast(c) << 2); + return ShiftMix(y * k2 ^ z * k0) * k2; + } + return k2; +} + +// This probably works well for 16-byte strings as well, but it may be overkill +// in that case. +static uint64 HashLen17to32(const char* s, size_t len) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch64(s) * k1; + uint64 b = Fetch64(s + 8); + uint64 c = Fetch64(s + len - 8) * mul; + uint64 d = Fetch64(s + len - 16) * k2; + return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul); +} + +// Return a 16-byte hash for 48 bytes. Quick and dirty. +// Callers do best to use "random-looking" values for a and b. +static pair WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, + uint64 b) { + a += w; + b = Rotate(b + a + z, 21); + uint64 c = a; + a += x; + a += y; + b += Rotate(a, 44); + return make_pair(a + z, b + c); +} + +// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. +static pair WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) { + return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a, + b); +} + +// Return an 8-byte hash for 33 to 64 bytes. +static uint64 HashLen33to64(const char* s, size_t len) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch64(s) * k2; + uint64 b = Fetch64(s + 8); + uint64 c = Fetch64(s + len - 24); + uint64 d = Fetch64(s + len - 32); + uint64 e = Fetch64(s + 16) * k2; + uint64 f = Fetch64(s + 24) * 9; + uint64 g = Fetch64(s + len - 8); + uint64 h = Fetch64(s + len - 16) * mul; + uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; + uint64 v = ((a + g) ^ d) + f + 1; + uint64 w = swap64((u + v) * mul) + h; + uint64 x = Rotate(e + f, 42) + c; + uint64 y = (swap64((v + w) * mul) + g) * mul; + uint64 z = e + f + c; + a = swap64((x + z) * mul + y) + b; + b = ShiftMix((z + a) * mul + d + h) * mul; + return b + x; +} + +uint64 CityHash64(const char* s, size_t len) { + if (len <= 32) { + if (len <= 16) { + return HashLen0to16(s, len); + } else { + return HashLen17to32(s, len); + } + } else if (len <= 64) { + return HashLen33to64(s, len); + } + + // For strings over 64 bytes we hash the end first, and then as we + // loop we keep 56 bytes of state: v, w, x, y, and z. + uint64 x = Fetch64(s + len - 40); + uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); + uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); + pair v = WeakHashLen32WithSeeds(s + len - 64, len, z); + pair w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); + x = x * k1 + Fetch64(s); + + // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. + len = (len - 1) & ~static_cast(63); + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 64; + } while (len != 0); + return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, + HashLen16(v.second, w.second) + x); +} + +uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) { + return CityHash64WithSeeds(s, len, k2, seed); +} + +uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) { + return HashLen16(CityHash64(s, len) - seed0, seed1); +} + +// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings +// of any length representable in signed long. Based on City and Murmur. +static uint128 CityMurmur(const char* s, size_t len, uint128 seed) { + uint64 a = Uint128Low64(seed); + uint64 b = Uint128High64(seed); + uint64 c = 0; + uint64 d = 0; + signed long l = static_cast(len) - 16; + if (l <= 0) { // len <= 16 + a = ShiftMix(a * k1) * k1; + c = b * k1 + HashLen0to16(s, len); + d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); + } else { // len > 16 + c = HashLen16(Fetch64(s + len - 8) + k1, a); + d = HashLen16(b + len, c + Fetch64(s + len - 16)); + a += d; + do { + a ^= ShiftMix(Fetch64(s) * k1) * k1; + a *= k1; + b ^= a; + c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; + c *= k1; + d ^= c; + s += 16; + l -= 16; + } while (l > 0); + } + a = HashLen16(a, c); + b = HashLen16(d, b); + return uint128(a ^ b, HashLen16(b, a)); +} + +uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) { + if (len < 128) { + return CityMurmur(s, len, seed); + } + + // We expect len >= 128 to be the common case. Keep 56 bytes of state: + // v, w, x, y, and z. + pair v, w; + uint64 x = Uint128Low64(seed); + uint64 y = Uint128High64(seed); + uint64 z = len * k1; + v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); + v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); + w.first = Rotate(y + z, 35) * k1 + x; + w.second = Rotate(x + Fetch64(s + 88), 53) * k1; + + // This is the same inner loop as CityHash64(), manually unrolled. + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 128; + } while (LIKELY(len >= 128)); + x += Rotate(v.first + z, 49) * k0; + y = y * k0 + Rotate(w.second, 37); + z = z * k0 + Rotate(w.first, 27); + w.first *= 9; + v.first *= k0; + // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. + for (size_t tail_done = 0; tail_done < len;) { + tail_done += 32; + y = Rotate(x + y, 42) * k0 + v.second; + w.first += Fetch64(s + len - tail_done + 16); + x = x * k0 + w.first; + z += w.second + Fetch64(s + len - tail_done); + w.second += v.first; + v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); + v.first *= k0; + } + // At this point our 56 bytes of state should contain more than + // enough information for a strong 128-bit hash. We use two + // different 56-byte-to-8-byte hashes to get a 16-byte final result. + x = HashLen16(x, v.first); + y = HashLen16(y + z, w.first); + return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)); +} + +uint128 CityHash128(const char* s, size_t len) { + return len >= 16 + ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0)) + : CityHash128WithSeed(s, len, uint128(k0, k1)); +} + +} // namespace Common diff --git a/src/common/cityhash.h b/src/common/cityhash.h new file mode 100644 index 0000000000..bcebdb1507 --- /dev/null +++ b/src/common/cityhash.h @@ -0,0 +1,110 @@ +// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// http://code.google.com/p/cityhash/ +// +// This file provides a few functions for hashing strings. All of them are +// high-quality functions in the sense that they pass standard tests such +// as Austin Appleby's SMHasher. They are also fast. +// +// For 64-bit x86 code, on short strings, we don't know of anything faster than +// CityHash64 that is of comparable quality. We believe our nearest competitor +// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash +// tables and most other hashing (excluding cryptography). +// +// For 64-bit x86 code, on long strings, the picture is more complicated. +// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc., +// CityHashCrc128 appears to be faster than all competitors of comparable +// quality. CityHash128 is also good but not quite as fast. We believe our +// nearest competitor is Bob Jenkins' Spooky. We don't have great data for +// other 64-bit CPUs, but for long strings we know that Spooky is slightly +// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example. +// Note that CityHashCrc128 is declared in citycrc.h. +// +// For 32-bit x86 code, we don't know of anything faster than CityHash32 that +// is of comparable quality. We believe our nearest competitor is Murmur3A. +// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.) +// +// Functions in the CityHash family are not suitable for cryptography. +// +// Please see CityHash's README file for more details on our performance +// measurements and so on. +// +// WARNING: This code has been only lightly tested on big-endian platforms! +// It is known to work well on little-endian platforms that have a small penalty +// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. +// It should work on all 32-bit and 64-bit platforms that allow unaligned reads; +// bug reports are welcome. +// +// By the way, for some hash functions, given strings a and b, the hash +// of a+b is easily derived from the hashes of a and b. This property +// doesn't hold for any hash functions in this file. + +#pragma once + +#include +#include +#include // for size_t. + +namespace Common { + +typedef std::pair uint128; + +inline uint64_t Uint128Low64(const uint128& x) { + return x.first; +} +inline uint64_t Uint128High64(const uint128& x) { + return x.second; +} + +// Hash function for a byte array. +uint64_t CityHash64(const char* buf, size_t len); + +// Hash function for a byte array. For convenience, a 64-bit seed is also +// hashed into the result. +uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed); + +// Hash function for a byte array. For convenience, two seeds are also +// hashed into the result. +uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1); + +// Hash function for a byte array. +uint128 CityHash128(const char* s, size_t len); + +// Hash function for a byte array. For convenience, a 128-bit seed is also +// hashed into the result. +uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed); + +// Hash 128 input bits down to 64 bits of output. +// This is intended to be a reasonably good hash function. +inline uint64_t Hash128to64(const uint128& x) { + // Murmur-inspired hashing. + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; + a ^= (a >> 47); + uint64_t b = (Uint128High64(x) ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; +} + +} // namespace Common diff --git a/src/common/hash.cpp b/src/common/hash.cpp deleted file mode 100644 index a02e9e5b9d..0000000000 --- a/src/common/hash.cpp +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#if defined(_MSC_VER) -#include -#endif -#include "common/common_funcs.h" -#include "common/common_types.h" -#include "common/hash.h" - -namespace Common { - -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. - -// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do -// the conversion here -static FORCE_INLINE u64 getblock64(const u64* p, size_t i) { - return p[i]; -} - -// Finalization mix - force all bits of a hash block to avalanche -static FORCE_INLINE u64 fmix64(u64 k) { - k ^= k >> 33; - k *= 0xff51afd7ed558ccdllu; - k ^= k >> 33; - k *= 0xc4ceb9fe1a85ec53llu; - k ^= k >> 33; - - return k; -} - -// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit -// platforms (MurmurHash3_x64_128). It was taken from: -// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp -void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) { - const u8* data = (const u8*)key; - const size_t nblocks = len / 16; - - u64 h1 = seed; - u64 h2 = seed; - - const u64 c1 = 0x87c37b91114253d5llu; - const u64 c2 = 0x4cf5ad432745937fllu; - - // Body - - const u64* blocks = (const u64*)(data); - - for (size_t i = 0; i < nblocks; i++) { - u64 k1 = getblock64(blocks, i * 2 + 0); - u64 k2 = getblock64(blocks, i * 2 + 1); - - k1 *= c1; - k1 = _rotl64(k1, 31); - k1 *= c2; - h1 ^= k1; - - h1 = _rotl64(h1, 27); - h1 += h2; - h1 = h1 * 5 + 0x52dce729; - - k2 *= c2; - k2 = _rotl64(k2, 33); - k2 *= c1; - h2 ^= k2; - - h2 = _rotl64(h2, 31); - h2 += h1; - h2 = h2 * 5 + 0x38495ab5; - } - - // Tail - - const u8* tail = (const u8*)(data + nblocks * 16); - - u64 k1 = 0; - u64 k2 = 0; - - switch (len & 15) { - case 15: - k2 ^= ((u64)tail[14]) << 48; - case 14: - k2 ^= ((u64)tail[13]) << 40; - case 13: - k2 ^= ((u64)tail[12]) << 32; - case 12: - k2 ^= ((u64)tail[11]) << 24; - case 11: - k2 ^= ((u64)tail[10]) << 16; - case 10: - k2 ^= ((u64)tail[9]) << 8; - case 9: - k2 ^= ((u64)tail[8]) << 0; - k2 *= c2; - k2 = _rotl64(k2, 33); - k2 *= c1; - h2 ^= k2; - - case 8: - k1 ^= ((u64)tail[7]) << 56; - case 7: - k1 ^= ((u64)tail[6]) << 48; - case 6: - k1 ^= ((u64)tail[5]) << 40; - case 5: - k1 ^= ((u64)tail[4]) << 32; - case 4: - k1 ^= ((u64)tail[3]) << 24; - case 3: - k1 ^= ((u64)tail[2]) << 16; - case 2: - k1 ^= ((u64)tail[1]) << 8; - case 1: - k1 ^= ((u64)tail[0]) << 0; - k1 *= c1; - k1 = _rotl64(k1, 31); - k1 *= c2; - h1 ^= k1; - }; - - // Finalization - - h1 ^= len; - h2 ^= len; - - h1 += h2; - h2 += h1; - - h1 = fmix64(h1); - h2 = fmix64(h2); - - h1 += h2; - h2 += h1; - - ((u64*)out)[0] = h1; - ((u64*)out)[1] = h2; -} - -} // namespace Common diff --git a/src/common/hash.h b/src/common/hash.h index ee2560dadb..73c326980e 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -5,12 +5,12 @@ #pragma once #include +#include +#include "common/cityhash.h" #include "common/common_types.h" namespace Common { -void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out); - /** * Computes a 64-bit hash over the specified block of data * @param data Block of data to compute hash over @@ -18,9 +18,54 @@ void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out); * @returns 64-bit hash value that was computed over the data block */ static inline u64 ComputeHash64(const void* data, size_t len) { - u64 res[2]; - MurmurHash3_128(data, len, 0, res); - return res[0]; + return CityHash64(static_cast(data), len); } +/** + * Computes a 64-bit hash of a struct. In addition to being trivially copyable, it is also critical + * that either the struct includes no padding, or that any padding is initialized to a known value + * by memsetting the struct to 0 before filling it in. + */ +template +static inline u64 ComputeStructHash64(const T& data) { + static_assert(std::is_trivially_copyable(), + "Type passed to ComputeStructHash64 must be trivially copyable"); + return ComputeHash64(&data, sizeof(data)); +} + +/// A helper template that ensures the padding in a struct is initialized by memsetting to 0. +template +struct HashableStruct { + // In addition to being trivially copyable, T must also have a trivial default constructor, + // because any member initialization would be overridden by memset + static_assert(std::is_trivial(), "Type passed to HashableStruct must be trivial"); + /* + * We use a union because "implicitly-defined copy/move constructor for a union X copies the + * object representation of X." and "implicitly-defined copy assignment operator for a union X + * copies the object representation (3.9) of X." = Bytewise copy instead of memberwise copy. + * This is important because the padding bytes are included in the hash and comparison between + * objects. + */ + union { + T state; + }; + + HashableStruct() { + // Memset structure to zero padding bits, so that they will be deterministic when hashing + std::memset(&state, 0, sizeof(T)); + } + + bool operator==(const HashableStruct& o) const { + return std::memcmp(&state, &o.state, sizeof(T)) == 0; + }; + + bool operator!=(const HashableStruct& o) const { + return !(*this == o); + }; + + size_t Hash() const { + return Common::ComputeStructHash64(state); + } +}; + } // namespace Common From 2fcbb35ad22cd9d683c9839db9179f93f061f4e2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 01:09:33 -0400 Subject: [PATCH 08/27] gl_shader_util: Add missing includes. --- src/video_core/renderer_opengl/gl_shader_util.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 5a0008703d..a1fa9e814a 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -6,6 +6,8 @@ #include #include +#include "common/assert.h" +#include "common/logging/log.h" namespace GLShader { From 10953495c1b7de412312b64370d99a2294dfe6a2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 04:51:42 -0400 Subject: [PATCH 09/27] gl_shader_gen: Add hashable setup/config structs. --- .../renderer_opengl/gl_shader_gen.cpp | 4 +- .../renderer_opengl/gl_shader_gen.h | 75 ++++++++++++------- 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8f3c988009..524c2cfb5d 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -7,12 +7,12 @@ namespace GLShader { -std::string GenerateVertexShader(const MaxwellVSConfig& config) { +std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) { UNREACHABLE(); return {}; } -std::string GenerateFragmentShader(const MaxwellFSConfig& config) { +std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) { UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 5101e7d300..925e66ee43 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -4,46 +4,67 @@ #pragma once -#include +#include #include #include +#include "common/common_types.h" #include "common/hash.h" namespace GLShader { -enum Attributes { - ATTRIBUTE_POSITION, - ATTRIBUTE_COLOR, - ATTRIBUTE_TEXCOORD0, - ATTRIBUTE_TEXCOORD1, - ATTRIBUTE_TEXCOORD2, - ATTRIBUTE_TEXCOORD0_W, - ATTRIBUTE_NORMQUAT, - ATTRIBUTE_VIEW, +constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; + +using ProgramCode = std::array; + +struct ShaderSetup { + ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} + + ProgramCode program_code; + bool program_code_hash_dirty = true; + + u64 GetProgramCodeHash() { + if (program_code_hash_dirty) { + program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash_dirty = false; + } + return program_code_hash; + } + +private: + u64 program_code_hash{}; }; struct MaxwellShaderConfigCommon { - explicit MaxwellShaderConfigCommon(){}; + void Init(ShaderSetup& setup) { + program_hash = setup.GetProgramCodeHash(); + } + + u64 program_hash; }; -struct MaxwellVSConfig : MaxwellShaderConfigCommon { - explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {} - - bool operator==(const MaxwellVSConfig& o) const { - return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0; - }; +struct MaxwellVSConfig : Common::HashableStruct { + explicit MaxwellVSConfig(ShaderSetup& setup) { + state.Init(setup); + } }; -struct MaxwellFSConfig : MaxwellShaderConfigCommon { - explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {} - - bool operator==(const MaxwellFSConfig& o) const { - return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0; - }; +struct MaxwellFSConfig : Common::HashableStruct { + explicit MaxwellFSConfig(ShaderSetup& setup) { + state.Init(setup); + } }; -std::string GenerateVertexShader(const MaxwellVSConfig& config); -std::string GenerateFragmentShader(const MaxwellFSConfig& config); +/** + * Generates the GLSL vertex shader program source code for the given VS program + * @returns String of the shader source code + */ +std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config); + +/** + * Generates the GLSL fragment shader program source code for the given FS program + * @returns String of the shader source code + */ +std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config); } // namespace GLShader @@ -52,14 +73,14 @@ namespace std { template <> struct hash { size_t operator()(const GLShader::MaxwellVSConfig& k) const { - return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig)); + return k.Hash(); } }; template <> struct hash { size_t operator()(const GLShader::MaxwellFSConfig& k) const { - return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig)); + return k.Hash(); } }; From 8aa21a03b30d5d1b2c29e10fd05d2f893e6bd014 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 04:54:52 -0400 Subject: [PATCH 10/27] maxwell_to_gl: Add a few types, etc. --- src/video_core/renderer_opengl/maxwell_to_gl.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 48ee80125e..7909dcfc3b 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -10,6 +10,14 @@ #include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" +using GLvec2 = std::array; +using GLvec3 = std::array; +using GLvec4 = std::array; + +using GLuvec2 = std::array; +using GLuvec3 = std::array; +using GLuvec4 = std::array; + namespace MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -39,6 +47,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { + case Maxwell::PrimitiveTopology::Triangles: + return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: return GL_TRIANGLE_STRIP; } From 459826a705f4a410acff41dd92532134300cf961 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 05:12:38 -0400 Subject: [PATCH 11/27] renderer_opengl: Add gl_shader_manager class. --- src/video_core/CMakeLists.txt | 2 + .../renderer_opengl/gl_shader_manager.cpp | 46 +++++ .../renderer_opengl/gl_shader_manager.h | 161 ++++++++++++++++++ 3 files changed, 209 insertions(+) create mode 100644 src/video_core/renderer_opengl/gl_shader_manager.cpp create mode 100644 src/video_core/renderer_opengl/gl_shader_manager.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4defb57863..2818103576 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -28,6 +28,8 @@ add_library(video_core STATIC renderer_opengl/gl_shader_decompiler.h renderer_opengl/gl_shader_gen.cpp renderer_opengl/gl_shader_gen.h + renderer_opengl/gl_shader_manager.cpp + renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state.cpp diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp new file mode 100644 index 0000000000..0da78bc652 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -0,0 +1,46 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "core/hle/kernel/process.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace GLShader { + +namespace Impl { +void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, + size_t expected_size) { + GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index != GL_INVALID_INDEX) { + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(ub_size == expected_size, + "Uniform block size did not match! Got %d, expected %zu", + static_cast(ub_size), expected_size); + glUniformBlockBinding(shader, ub_index, static_cast(binding)); + } +} + +void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, sizeof(VSUniformData)); +} + +void SetShaderSamplerBindings(GLuint shader) { + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); + cur_state.Apply(); + + // Set the texture samplers to correspond to different texture units + + cur_state.draw.shader_program = old_program; + cur_state.Apply(); +} + +} // namespace Impl + +void MaxwellUniformData::SetFromRegs() { +} + +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h new file mode 100644 index 0000000000..10e8b8b3af --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -0,0 +1,161 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" + +namespace GLShader { + +namespace Impl { +void SetShaderUniformBlockBindings(GLuint shader); +void SetShaderSamplerBindings(GLuint shader); +} // namespace Impl + +enum class UniformBindings : GLuint { Common, VS, GS, FS }; + +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +// Not following that rule will cause problems on some AMD drivers. +struct MaxwellUniformData { + void SetFromRegs(); + + using ConstBuffer = std::array; + using Regs = Tegra::Engines::Maxwell3D::Regs; + + alignas(16) std::array const_buffers; +}; +static_assert(sizeof(MaxwellUniformData) < 16384, + "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); + +struct VSUniformData { + MaxwellUniformData uniforms; +}; +static_assert(sizeof(VSUniformData) < 16384, + "VSUniformData structure must be less than 16kb as per the OpenGL spec"); + +struct FSUniformData { + MaxwellUniformData uniforms; +}; +static_assert(sizeof(FSUniformData) < 16384, + "VSUniformData structure must be less than 16kb as per the OpenGL spec"); + +class OGLShaderStage { +public: + OGLShaderStage() = default; + + void Create(const char* source, GLenum type) { + OGLShader shader; + shader.Create(source, type); + program.Create(true, shader.handle); + Impl::SetShaderUniformBlockBindings(program.handle); + Impl::SetShaderSamplerBindings(program.handle); + } + GLuint GetHandle() const { + return program.handle; + } + +private: + OGLProgram program; +}; + +// TODO(wwylele): beautify this doc +// This is a shader cache designed for translating PICA shader to GLSL shader. +// The double cache is needed because diffent KeyConfigType, which includes a hash of the code +// region (including its leftover unused code) can generate the same GLSL code. +template +class ShaderCache { +public: + ShaderCache() = default; + + GLuint Get(const KeyConfigType& key, const ShaderSetup& setup) { + auto map_it = shader_map.find(key); + if (map_it == shader_map.end()) { + std::string program = CodeGenerator(setup, key); + + auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{}); + OGLShaderStage& cached_shader = iter->second; + if (new_shader) { + cached_shader.Create(program.c_str(), ShaderType); + } + shader_map[key] = &cached_shader; + return cached_shader.GetHandle(); + } else { + return map_it->second->GetHandle(); + } + } + +private: + std::unordered_map shader_map; + std::unordered_map shader_cache; +}; + +using VertexShaders = ShaderCache; + +using FragmentShaders = ShaderCache; + +class ProgramManager { +public: + ProgramManager() { + pipeline.Create(); + } + + void UseProgrammableVertexShader(const MaxwellVSConfig& config, const ShaderSetup setup) { + current.vs = vertex_shaders.Get(config, setup); + } + + void UseTrivialGeometryShader() { + current.gs = 0; + } + + void UseProgrammableFragmentShader(const MaxwellFSConfig& config, const ShaderSetup setup) { + current.fs = fragment_shaders.Get(config, setup); + } + + void ApplyTo(OpenGLState& state) { + // Workaround for AMD bug + glUseProgramStages(pipeline.handle, + GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, + 0); + + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs); + glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs); + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs); + state.draw.shader_program = 0; + state.draw.program_pipeline = pipeline.handle; + } + +private: + struct ShaderTuple { + GLuint vs = 0, gs = 0, fs = 0; + bool operator==(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); + } + struct Hash { + std::size_t operator()(const ShaderTuple& tuple) const { + std::size_t hash = 0; + boost::hash_combine(hash, tuple.vs); + boost::hash_combine(hash, tuple.gs); + boost::hash_combine(hash, tuple.fs); + return hash; + } + }; + }; + ShaderTuple current; + VertexShaders vertex_shaders; + FragmentShaders fragment_shaders; + + std::unordered_map program_cache; + OGLPipeline pipeline; +}; + +} // namespace GLShader From 5617831d5fb42a692d09f2fd3c21cc1eac3ae903 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 05:22:08 -0400 Subject: [PATCH 12/27] gl_rasterizer: Use shader program manager, remove test shader. --- .../renderer_opengl/gl_rasterizer.cpp | 170 +++--------------- .../renderer_opengl/gl_rasterizer.h | 57 +----- 2 files changed, 31 insertions(+), 196 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fc69fbe5a7..2d58df45bf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -enum class UniformBindings : GLuint { Common, VS, FS }; - -static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, - size_t expected_size) { - GLuint ub_index = glGetUniformBlockIndex(shader, name); - if (ub_index != GL_INVALID_INDEX) { - GLint ub_size = 0; - glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); - ASSERT_MSG(ub_size == expected_size, - "Uniform block size did not match! Got %d, expected %zu", - static_cast(ub_size), expected_size); - glUniformBlockBinding(shader, ub_index, static_cast(binding)); - } -} - -static void SetShaderUniformBlockBindings(GLuint shader) { - SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, - sizeof(RasterizerOpenGL::UniformData)); - SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, - sizeof(RasterizerOpenGL::VSUniformData)); - SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS, - sizeof(RasterizerOpenGL::FSUniformData)); -} - RasterizerOpenGL::RasterizerOpenGL() { - shader_dirty = true; - has_ARB_buffer_storage = false; has_ARB_direct_state_access = false; has_ARB_separate_shader_objects = false; @@ -88,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() { } } + ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported"); + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; @@ -102,37 +78,28 @@ RasterizerOpenGL::RasterizerOpenGL() { state.draw.uniform_buffer = uniform_buffer.handle; state.Apply(); - glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW); - glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); - - uniform_block_data.dirty = true; - // Create render framebuffer framebuffer.Create(); - if (has_ARB_separate_shader_objects) { - hw_vao.Create(); - hw_vao_enabled_attributes.fill(false); + hw_vao.Create(); + hw_vao_enabled_attributes.fill(false); - stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); - stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); - state.draw.vertex_buffer = stream_buffer->GetHandle(); + stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); + stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); + state.draw.vertex_buffer = stream_buffer->GetHandle(); - pipeline.Create(); - state.draw.program_pipeline = pipeline.handle; - state.draw.shader_program = 0; - state.draw.vertex_array = hw_vao.handle; - state.Apply(); + shader_program_manager = std::make_unique(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); + state.draw.shader_program = 0; + state.draw.vertex_array = hw_vao.handle; + state.Apply(); - vs_uniform_buffer.Create(); - glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); - glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); - glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); - } else { - UNREACHABLE(); - } + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); + + vs_uniform_buffer.Create(); + glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); + glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::VSUniformData), nullptr, GL_STREAM_COPY); + glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); accelerate_draw = AccelDraw::Disabled; @@ -200,26 +167,20 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { buffer_offset += data_size; } -void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { +void RasterizerOpenGL::SetupVertexShader(GLShader::VSUniformData* ub_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VS); - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); + UNREACHABLE(); } -void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { +void RasterizerOpenGL::SetupFragmentShader(GLShader::FSUniformData* ub_ptr, + GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_FS); UNREACHABLE(); } bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { - if (!has_ARB_separate_shader_objects) { - UNREACHABLE(); - return false; - } - accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; DrawArrays(); - return true; } @@ -280,18 +241,6 @@ void RasterizerOpenGL::DrawArrays() { // Sync and bind the texture surfaces BindTextures(); - // Sync and bind the shader - if (shader_dirty) { - SetShader(); - shader_dirty = false; - } - - // Sync the uniform data - if (uniform_block_data.dirty) { - glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); - uniform_block_data.dirty = false; - } - // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable // scissor test to prevent drawing outside of the framebuffer region state.scissor.enabled = true; @@ -311,7 +260,7 @@ void RasterizerOpenGL::DrawArrays() { if (is_indexed) { UNREACHABLE(); } - buffer_size += sizeof(VSUniformData); + buffer_size += sizeof(GLShader::VSUniformData); size_t ptr_pos = 0; u8* buffer_ptr; @@ -327,10 +276,10 @@ void RasterizerOpenGL::DrawArrays() { UNREACHABLE(); } - SetupVertexShader(reinterpret_cast(&buffer_ptr[ptr_pos]), + SetupVertexShader(reinterpret_cast(&buffer_ptr[ptr_pos]), buffer_offset + static_cast(ptr_pos)); const GLintptr vs_ubo_offset = buffer_offset + static_cast(ptr_pos); - ptr_pos += sizeof(VSUniformData); + ptr_pos += sizeof(GLShader::VSUniformData); stream_buffer->Unmap(); @@ -343,9 +292,10 @@ void RasterizerOpenGL::DrawArrays() { } }; - copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); + copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(GLShader::VSUniformData)); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); + shader_program_manager->ApplyTo(state); + state.Apply(); if (is_indexed) { UNREACHABLE(); @@ -531,72 +481,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -void RasterizerOpenGL::SetShader() { - // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to - // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell - // shaders. - - static constexpr char vertex_shader[] = R"( -#version 150 core - -in vec2 vert_position; -in vec2 vert_tex_coord; -out vec2 frag_tex_coord; - -void main() { - // Multiply input position by the rotscale part of the matrix and then manually translate by - // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector - // to `vec3(vert_position.xy, 1.0)` - gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} -)"; - - static constexpr char fragment_shader[] = R"( -#version 150 core - -in vec2 frag_tex_coord; -out vec4 color; - -uniform sampler2D tex[32]; - -void main() { - color = texture(tex[0], frag_tex_coord); -} -)"; - - if (current_shader) { - return; - } - - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - - current_shader = &test_shader; - if (has_ARB_separate_shader_objects) { - test_shader.shader.CreateFromSource(vertex_shader, nullptr, fragment_shader, true); - glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); - } else { - UNREACHABLE(); - } - - state.draw.shader_program = test_shader.shader.handle; - state.Apply(); - - for (u32 texture = 0; texture < texture_samplers.size(); ++texture) { - // Set the texture samplers to correspond to different texture units - std::string uniform_name = "tex[" + std::to_string(texture) + "]"; - GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str()); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); - } - } - - if (has_ARB_separate_shader_objects) { - state.draw.shader_program = 0; - state.Apply(); - } -} - void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, bool has_stencil) { state.draw.draw_framebuffer = framebuffer.handle; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 989c62d0d7..b508f5accb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -19,6 +19,7 @@ #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -56,34 +57,6 @@ public: OGLShader shader; }; - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned - // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at - // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. - // Not following that rule will cause problems on some AMD drivers. - struct UniformData {}; - - // static_assert( - // sizeof(UniformData) == 0x460, - // "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16384, - "UniformData structure must be less than 16kb as per the OpenGL spec"); - - struct VSUniformData {}; - // static_assert( - // sizeof(VSUniformData) == 1856, - // "The size of the VSUniformData structure has changed, update the structure in the - // shader"); - static_assert(sizeof(VSUniformData) < 16384, - "VSUniformData structure must be less than 16kb as per the OpenGL spec"); - - struct FSUniformData {}; - // static_assert( - // sizeof(FSUniformData) == 1856, - // "The size of the FSUniformData structure has changed, update the structure in the - // shader"); - static_assert(sizeof(FSUniformData) < 16384, - "FSUniformData structure must be less than 16kb as per the OpenGL spec"); - private: class SamplerInfo { public: @@ -122,9 +95,6 @@ private: /// Syncs the clip coefficients to match the guest state void SyncClipCoef(); - /// Sets the OpenGL shader in accordance with the current guest state - void SetShader(); - /// Syncs the cull mode to match the guest state void SyncCullMode(); @@ -152,18 +122,7 @@ private: RasterizerCacheOpenGL res_cache; - /// Shader used for test renderering - to be removed once we have emulated shaders - MaxwellShader test_shader{}; - - const MaxwellShader* current_shader{}; - bool shader_dirty{}; - - struct { - UniformData data; - bool dirty; - } uniform_block_data = {}; - - OGLPipeline pipeline; + std::unique_ptr shader_program_manager; OGLVertexArray sw_vao; OGLVertexArray hw_vao; std::array hw_vao_enabled_attributes; @@ -183,18 +142,10 @@ private: void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); OGLBuffer vs_uniform_buffer; - std::unordered_map vs_shader_map; - std::unordered_map vs_shader_cache; - OGLShader vs_default_shader; - void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset); + void SetupVertexShader(GLShader::VSUniformData* ub_ptr, GLintptr buffer_offset); - OGLBuffer fs_uniform_buffer; - std::unordered_map fs_shader_map; - std::unordered_map fs_shader_cache; - OGLShader fs_default_shader; - - void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset); + void SetupFragmentShader(GLShader::FSUniformData* ub_ptr, GLintptr buffer_offset); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw; From 33bb53571bf7ca394fa8b6d5107e8dfb5758f1d2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 23:14:41 -0400 Subject: [PATCH 13/27] maxwell_3d: Fix shader_config decodings. --- src/video_core/engines/maxwell_3d.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 98b39b2ffe..528bb31d33 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -427,14 +427,11 @@ public: BitField<0, 1, u32> enable; BitField<4, 4, ShaderProgram> program; }; - u32 start_id; - INSERT_PADDING_WORDS(1); - u32 gpr_alloc; - ShaderStage type; - INSERT_PADDING_WORDS(9); + u32 offset; + INSERT_PADDING_WORDS(14); } shader_config[MaxShaderProgram]; - INSERT_PADDING_WORDS(0x8C); + INSERT_PADDING_WORDS(0x80); struct { u32 cb_size; From 35aca0bf1f22379c82f26edade0d18eb47565cf4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 23:24:50 -0400 Subject: [PATCH 14/27] maxwell_3d: Make memory_manager public. --- src/video_core/engines/maxwell_3d.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 528bb31d33..9c6236c394 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -504,6 +504,7 @@ public: }; State state{}; + MemoryManager& memory_manager; /// Reads a register value located at the input method address u32 GetRegisterValue(u32 method) const; @@ -518,8 +519,6 @@ public: std::vector GetStageTextures(Regs::ShaderStage stage) const; private: - MemoryManager& memory_manager; - std::unordered_map> uploaded_macros; /// Macro method that is currently being executed / being fed parameters. From 51f37f5061eec2d9a0a872aebd6b50e21bee19a6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 23:45:09 -0400 Subject: [PATCH 15/27] gl_shader_manager: Cleanup and consolidate uniform handling. --- .../renderer_opengl/gl_shader_manager.cpp | 19 +++++++++--- .../renderer_opengl/gl_shader_manager.h | 31 ++++++------------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 0da78bc652..a5835f2b16 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -10,8 +10,8 @@ namespace GLShader { namespace Impl { -void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, - size_t expected_size) { +void SetShaderUniformBlockBinding(GLuint shader, const char* name, + Maxwell3D::Regs::ShaderStage binding, size_t expected_size) { GLuint ub_index = glGetUniformBlockIndex(shader, name); if (ub_index != GL_INVALID_INDEX) { GLint ub_size = 0; @@ -24,7 +24,12 @@ void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindin } void SetShaderUniformBlockBindings(GLuint shader) { - SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, sizeof(VSUniformData)); + SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex, + sizeof(MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry, + sizeof(MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment, + sizeof(MaxwellUniformData)); } void SetShaderSamplerBindings(GLuint shader) { @@ -40,7 +45,13 @@ void SetShaderSamplerBindings(GLuint shader) { } // namespace Impl -void MaxwellUniformData::SetFromRegs() { +void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + for (unsigned index = 0; index < shader_stage.const_buffers.size(); ++index) { + const auto& const_buffer = shader_stage.const_buffers[index]; + const VAddr vaddr = memory_manager->PhysicalToVirtualAddress(const_buffer.address); + Memory::ReadBlock(vaddr, const_buffers[index].data(), sizeof(ConstBuffer)); + } } } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 10e8b8b3af..b5a7b2a18b 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -14,40 +14,27 @@ namespace GLShader { +using Tegra::Engines::Maxwell3D; + namespace Impl { void SetShaderUniformBlockBindings(GLuint shader); void SetShaderSamplerBindings(GLuint shader); } // namespace Impl -enum class UniformBindings : GLuint { Common, VS, GS, FS }; - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. // Not following that rule will cause problems on some AMD drivers. struct MaxwellUniformData { - void SetFromRegs(); + void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); using ConstBuffer = std::array; - using Regs = Tegra::Engines::Maxwell3D::Regs; - - alignas(16) std::array const_buffers; + alignas(16) std::array const_buffers; }; +static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is incorrect"); static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); -struct VSUniformData { - MaxwellUniformData uniforms; -}; -static_assert(sizeof(VSUniformData) < 16384, - "VSUniformData structure must be less than 16kb as per the OpenGL spec"); - -struct FSUniformData { - MaxwellUniformData uniforms; -}; -static_assert(sizeof(FSUniformData) < 16384, - "VSUniformData structure must be less than 16kb as per the OpenGL spec"); - class OGLShaderStage { public: OGLShaderStage() = default; @@ -113,14 +100,14 @@ public: current.vs = vertex_shaders.Get(config, setup); } - void UseTrivialGeometryShader() { - current.gs = 0; - } - void UseProgrammableFragmentShader(const MaxwellFSConfig& config, const ShaderSetup setup) { current.fs = fragment_shaders.Get(config, setup); } + void UseTrivialGeometryShader() { + current.gs = 0; + } + void ApplyTo(OpenGLState& state) { // Workaround for AMD bug glUseProgramStages(pipeline.handle, From 85d77a3d24f17040791fe66cc1278713cfb487ae Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 7 Apr 2018 23:48:38 -0400 Subject: [PATCH 16/27] gl_shader_decompiler: Basic impl. for very simple vertex shaders. - Tested with Puyo Puyo Tetris and Cave Story+ --- .../renderer_opengl/gl_shader_decompiler.cpp | 315 +++++++++++++++++- .../renderer_opengl/gl_shader_decompiler.h | 12 +- 2 files changed, 311 insertions(+), 16 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3fc420649f..60857c6236 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -10,10 +10,15 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" -namespace Tegra { -namespace Shader { +namespace GLShader { namespace Decompiler { +using Tegra::Shader::Attribute; +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; +using Tegra::Shader::Register; +using Tegra::Shader::Uniform; + constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; class DecompileFail : public std::runtime_error { @@ -90,7 +95,7 @@ private: for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { const Instruction instr = {program_code[offset]}; - switch (instr.opcode.Value().EffectiveOpCode()) { + switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::EXIT: { return exit_method = ExitMethod::AlwaysEnd; } @@ -130,7 +135,294 @@ public: } std::string GetShaderCode() { - return shader.GetResult(); + return declarations.GetResult() + shader.GetResult(); + } + +private: + /// Gets the Subroutine object corresponding to the specified address. + const Subroutine& GetSubroutine(u32 begin, u32 end) const { + auto iter = subroutines.find(Subroutine{begin, end}); + ASSERT(iter != subroutines.end()); + return *iter; + } + + /// Generates code representing an input attribute register. + std::string GetInputAttribute(Attribute::Index attribute) { + declr_input_attribute.insert(attribute); + + const u32 index{static_cast(attribute) - + static_cast(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + return "input_attribute_" + std::to_string(index); + } + + LOG_ERROR(HW_GPU, "Unhandled input attribute: 0x%02x", index); + UNREACHABLE(); + } + + /// Generates code representing an output attribute register. + std::string GetOutputAttribute(Attribute::Index attribute) { + switch (attribute) { + case Attribute::Index::Position: + return "gl_Position"; + default: + const u32 index{static_cast(attribute) - + static_cast(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + declr_output_attribute.insert(attribute); + return "output_attribute_" + std::to_string(index); + } + + LOG_ERROR(HW_GPU, "Unhandled output attribute: 0x%02x", index); + UNREACHABLE(); + } + } + + /// Generates code representing a temporary (GPR) register. + std::string GetRegister(const Register& reg) { + return *declr_register.insert("register_" + std::to_string(reg)).first; + } + + /// Generates code representing a uniform (C buffer) register. + std::string GetUniform(const Uniform& reg) const { + std::string index = std::to_string(reg.index); + return "uniform_" + index + "[" + std::to_string(reg.offset >> 2) + "][" + + std::to_string(reg.offset & 3) + "]"; + } + + /** + * Adds code that calls a subroutine. + * @param subroutine the subroutine to call. + */ + void CallSubroutine(const Subroutine& subroutine) { + if (subroutine.exit_method == ExitMethod::AlwaysEnd) { + shader.AddLine(subroutine.GetName() + "();"); + shader.AddLine("return true;"); + } else if (subroutine.exit_method == ExitMethod::Conditional) { + shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }"); + } else { + shader.AddLine(subroutine.GetName() + "();"); + } + } + + /** + * Writes code that does an assignment operation. + * @param reg the destination register code. + * @param value the code representing the value to assign. + */ + void SetDest(u64 elem, const std::string& reg, const std::string& value, + u64 dest_num_components, u64 value_num_components) { + std::string swizzle = "."; + swizzle += "xyzw"[elem]; + + std::string dest = reg + (dest_num_components != 1 ? swizzle : ""); + std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : ""); + + shader.AddLine(dest + " = " + src + ";"); + } + + /** + * Compiles a single instruction from Tegra to GLSL. + * @param offset the offset of the Tegra shader instruction. + * @return the offset of the next instruction to execute. Usually it is the current offset + * + 1. If the current instruction always terminates the program, returns PROGRAM_END. + */ + u32 CompileInstr(u32 offset) { + const Instruction instr = {program_code[offset]}; + + shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name); + + switch (OpCode::GetInfo(instr.opcode).type) { + case OpCode::Type::Arithmetic: { + ASSERT(!instr.nb); + ASSERT(!instr.aa); + ASSERT(!instr.na); + ASSERT(!instr.ab); + ASSERT(!instr.ad); + + std::string gpr1 = GetRegister(instr.gpr1); + std::string gpr2 = GetRegister(instr.gpr2); + std::string uniform = GetUniform(instr.uniform); + + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::FMUL_C: { + SetDest(0, gpr1, gpr2 + " * " + uniform, 1, 1); + break; + } + case OpCode::Id::FADD_C: { + SetDest(0, gpr1, gpr2 + " + " + uniform, 1, 1); + break; + } + case OpCode::Id::FFMA_CR: { + SetDest(0, gpr1, gpr2 + " * " + uniform + " + " + GetRegister(instr.gpr3), 1, 1); + break; + } + default: { + LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", + (int)instr.opcode.EffectiveOpCode(), OpCode::GetInfo(instr.opcode).name, + instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + break; + } + case OpCode::Type::Memory: { + ASSERT(instr.attribute.size == 0); + + std::string gpr1 = GetRegister(instr.gpr1); + const Attribute::Index attribute = instr.attribute.GetIndex(); + + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::LD_A: { + SetDest(instr.attribute.element, gpr1, GetInputAttribute(attribute), 1, 4); + break; + } + case OpCode::Id::ST_A: { + SetDest(instr.attribute.element, GetOutputAttribute(attribute), gpr1, 4, 1); + break; + } + default: { + LOG_ERROR(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", + (int)instr.opcode.EffectiveOpCode(), OpCode::GetInfo(instr.opcode).name, + instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + break; + } + + default: { + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::EXIT: { + shader.AddLine("return true;"); + offset = PROGRAM_END - 1; + break; + } + + default: { + LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", + (int)instr.opcode.EffectiveOpCode(), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + // throw DecompileFail("Unhandled instruction"); + break; + } + } + + break; + } + } + + return offset + 1; + } + + /** + * Compiles a range of instructions from Tegra to GLSL. + * @param begin the offset of the starting instruction. + * @param end the offset where the compilation should stop (exclusive). + * @return the offset of the next instruction to compile. PROGRAM_END if the program + * terminates. + */ + u32 CompileRange(u32 begin, u32 end) { + u32 program_counter; + for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) { + program_counter = CompileInstr(program_counter); + } + return program_counter; + } + + void Generate() { + // Add declarations for all subroutines + for (const auto& subroutine : subroutines) { + shader.AddLine("bool " + subroutine.GetName() + "();"); + } + shader.AddLine(""); + + // Add the main entry point + shader.AddLine("bool exec_shader() {"); + ++shader.scope; + CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); + --shader.scope; + shader.AddLine("}\n"); + + // Add definitions for all subroutines + for (const auto& subroutine : subroutines) { + std::set labels = subroutine.labels; + + shader.AddLine("bool " + subroutine.GetName() + "() {"); + ++shader.scope; + + if (labels.empty()) { + if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { + shader.AddLine("return false;"); + } + } else { + labels.insert(subroutine.begin); + shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); + shader.AddLine("while (true) {"); + ++shader.scope; + + shader.AddLine("switch (jmp_to) {"); + + for (auto label : labels) { + shader.AddLine("case " + std::to_string(label) + "u: {"); + ++shader.scope; + + auto next_it = labels.lower_bound(label + 1); + u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; + + u32 compile_end = CompileRange(label, next_label); + if (compile_end > next_label && compile_end != PROGRAM_END) { + // This happens only when there is a label inside a IF/LOOP block + shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); + labels.emplace(compile_end); + } + + --shader.scope; + shader.AddLine("}"); + } + + shader.AddLine("default: return false;"); + shader.AddLine("}"); + + --shader.scope; + shader.AddLine("}"); + + shader.AddLine("return false;"); + } + + --shader.scope; + shader.AddLine("}\n"); + + DEBUG_ASSERT(shader.scope == 0); + } + + GenerateDeclarations(); + } + + /// Add declarations for registers + void GenerateDeclarations() { + for (const auto& reg : declr_register) { + declarations.AddLine("float " + reg + " = 0.0;"); + } + declarations.AddLine(""); + + for (const auto& index : declr_input_attribute) { + // TODO(bunnei): Use proper number of elements for these + declarations.AddLine( + "layout(location = " + std::to_string(static_cast(index) - 8) + ") in vec4 " + + GetInputAttribute(index) + ";"); + } + declarations.AddLine(""); + + for (const auto& index : declr_output_attribute) { + // TODO(bunnei): Use proper number of elements for these + declarations.AddLine( + "layout(location = " + std::to_string(static_cast(index) - 8) + ") out vec4 " + + GetOutputAttribute(index) + ";"); + } + declarations.AddLine(""); } private: @@ -139,9 +431,17 @@ private: const u32 main_offset; ShaderWriter shader; + ShaderWriter declarations; - void Generate() {} -}; + // Declarations + std::set declr_register; + std::set declr_input_attribute; + std::set declr_output_attribute; +}; // namespace Decompiler + +std::string GetCommonDeclarations() { + return "bool exec_shader();"; +} boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset) { try { @@ -155,5 +455,4 @@ boost::optional DecompileProgram(const ProgramCode& program_code, u } } // namespace Decompiler -} // namespace Shader -} // namespace Tegra +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 628f02c931..061dd61024 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -7,18 +7,14 @@ #include #include #include "common/common_types.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" -namespace Tegra { -namespace Shader { +namespace GLShader { namespace Decompiler { -constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100}; -constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100}; - -using ProgramCode = std::array; +std::string GetCommonDeclarations(); boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset); } // namespace Decompiler -} // namespace Shader -} // namespace Tegra +} // namespace GLShader From beddc8afd208a71b1ec0f012103e3ac3e058c140 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 8 Apr 2018 00:00:11 -0400 Subject: [PATCH 17/27] gl_rasterizer: Generate shaders and upload uniforms. --- .../renderer_opengl/gl_rasterizer.cpp | 102 +++++++++++++----- .../renderer_opengl/gl_rasterizer.h | 7 +- 2 files changed, 77 insertions(+), 32 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d58df45bf..f75d4c658d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -96,10 +96,14 @@ RasterizerOpenGL::RasterizerOpenGL() { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); - vs_uniform_buffer.Create(); - glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); - glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::VSUniformData), nullptr, GL_STREAM_COPY); - glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); + for (unsigned index = 0; index < uniform_buffers.size(); ++index) { + auto& buffer = uniform_buffers[index]; + buffer.Create(); + glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); + glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, + GL_STREAM_COPY); + glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); + } accelerate_draw = AccelDraw::Disabled; @@ -167,15 +171,69 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { buffer_offset += data_size; } -void RasterizerOpenGL::SetupVertexShader(GLShader::VSUniformData* ub_ptr, GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_VS); - UNREACHABLE(); -} +void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { + // Helper function for uploading uniform data + const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { + if (has_ARB_direct_state_access) { + glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); + } else { + glBindBuffer(GL_COPY_WRITE_BUFFER, handle); + glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); + } + }; -void RasterizerOpenGL::SetupFragmentShader(GLShader::FSUniformData* ub_ptr, - GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_FS); - UNREACHABLE(); + auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); + ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); + + for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { + ptr_pos += sizeof(GLShader::MaxwellUniformData); + + auto& shader_config = gpu.regs.shader_config[index]; + const Maxwell::ShaderProgram program{static_cast(index)}; + + // VertexB program is always enabled, despite bit setting + const bool is_enabled{shader_config.enable || program == Maxwell::ShaderProgram::VertexB}; + + // Skip stages that are not enabled + if (!is_enabled) { + continue; + } + + // Upload uniform data as one UBO per stage + const auto& stage = index - 1; // Stage indices are 0 - 5 + const GLintptr ubo_offset = buffer_offset + static_cast(ptr_pos); + copy_buffer(uniform_buffers[stage].handle, ubo_offset, + sizeof(GLShader::MaxwellUniformData)); + GLShader::MaxwellUniformData* ub_ptr = + reinterpret_cast(&buffer_ptr[ptr_pos]); + ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); + + // Fetch program code from memory + GLShader::ProgramCode program_code; + const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; + const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; + Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + GLShader::ShaderSetup setup{std::move(program_code)}; + + switch (program) { + case Maxwell::ShaderProgram::VertexB: { + GLShader::MaxwellVSConfig vs_config{setup}; + shader_program_manager->UseProgrammableVertexShader(vs_config, setup); + break; + } + case Maxwell::ShaderProgram::Fragment: { + GLShader::MaxwellFSConfig fs_config{setup}; + shader_program_manager->UseProgrammableFragmentShader(fs_config, setup); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index, + shader_config.enable.Value(), shader_config.offset); + UNREACHABLE(); + } + } + + shader_program_manager->UseTrivialGeometryShader(); } bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { @@ -260,7 +318,9 @@ void RasterizerOpenGL::DrawArrays() { if (is_indexed) { UNREACHABLE(); } - buffer_size += sizeof(GLShader::VSUniformData); + + // Uniform space for the 5 shader stages + buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; size_t ptr_pos = 0; u8* buffer_ptr; @@ -276,24 +336,10 @@ void RasterizerOpenGL::DrawArrays() { UNREACHABLE(); } - SetupVertexShader(reinterpret_cast(&buffer_ptr[ptr_pos]), - buffer_offset + static_cast(ptr_pos)); - const GLintptr vs_ubo_offset = buffer_offset + static_cast(ptr_pos); - ptr_pos += sizeof(GLShader::VSUniformData); + SetupShaders(buffer_ptr, buffer_offset, ptr_pos); stream_buffer->Unmap(); - const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { - if (has_ARB_direct_state_access) { - glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); - } else { - glBindBuffer(GL_COPY_WRITE_BUFFER, handle); - glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); - } - }; - - copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(GLShader::VSUniformData)); - shader_program_manager->ApplyTo(state); state.Apply(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b508f5accb..32b897eb29 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -15,6 +15,7 @@ #include "common/common_types.h" #include "common/hash.h" #include "common/vector_math.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -141,11 +142,9 @@ private: void AnalyzeVertexArray(bool is_indexed); void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); - OGLBuffer vs_uniform_buffer; + std::array uniform_buffers; - void SetupVertexShader(GLShader::VSUniformData* ub_ptr, GLintptr buffer_offset); - - void SetupFragmentShader(GLShader::FSUniformData* ub_ptr, GLintptr buffer_offset); + void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw; From 0ca8fce9d0a073ee07818691cbc6534e9af5fc7a Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 8 Apr 2018 00:36:19 -0400 Subject: [PATCH 18/27] gl_shader_manager: Implement SetShaderSamplerBindings. --- src/video_core/renderer_opengl/gl_shader_manager.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index a5835f2b16..67f2be0563 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -38,6 +38,14 @@ void SetShaderSamplerBindings(GLuint shader) { cur_state.Apply(); // Set the texture samplers to correspond to different texture units + for (u32 texture = 0; texture < 32; ++texture) { + // Set the texture samplers to correspond to different texture units + std::string uniform_name = "tex[" + std::to_string(texture) + "]"; + GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str()); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); + } + } cur_state.draw.shader_program = old_program; cur_state.Apply(); From a992aac5eb92479f8c23e746c298e4ddc3765594 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 9 Apr 2018 22:02:59 -0400 Subject: [PATCH 19/27] renderer_opengl: Fix Morton copy byteswap, etc. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 10 +++++----- src/video_core/utils.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 2fabf5cabe..5e78723a2f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -57,7 +57,7 @@ uniform sampler2D color_texture; void main() { // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to // support more framebuffer pixel formats. - color = texture(color_texture, frag_tex_coord).abgr; + color = texture(color_texture, frag_tex_coord); } )"; @@ -311,10 +311,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, } std::array vertices = {{ - ScreenRectVertex(x, y, texcoords.top, right), - ScreenRectVertex(x + w, y, texcoords.bottom, right), - ScreenRectVertex(x, y + h, texcoords.top, left), - ScreenRectVertex(x + w, y + h, texcoords.bottom, left), + ScreenRectVertex(x, y, texcoords.top, left), + ScreenRectVertex(x + w, y, texcoords.bottom, left), + ScreenRectVertex(x, y + h, texcoords.top, right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, right), }}; state.texture_units[0].texture_2d = screen_info.display_texture; diff --git a/src/video_core/utils.h b/src/video_core/utils.h index be0f7e22bb..e0a14d48f1 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -151,7 +151,7 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe const u32 coarse_y = y & ~127; u32 morton_offset = GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; + u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel; data_ptrs[morton_to_gl] = morton_data + morton_offset; data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; From 50023bdae790a607b3a6e6279465c2520279f70b Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 9 Apr 2018 22:07:30 -0400 Subject: [PATCH 20/27] gl_shader_decompiler: Add shader stage hint. --- .../renderer_opengl/gl_shader_decompiler.cpp | 11 +++++++---- src/video_core/renderer_opengl/gl_shader_decompiler.h | 6 +++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 60857c6236..704b24307d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -128,8 +128,9 @@ private: class GLSLGenerator { public: GLSLGenerator(const std::set& subroutines, const ProgramCode& program_code, - u32 main_offset) - : subroutines(subroutines), program_code(program_code), main_offset(main_offset) { + u32 main_offset, Maxwell3D::Regs::ShaderStage stage) + : subroutines(subroutines), program_code(program_code), main_offset(main_offset), + stage(stage) { Generate(); } @@ -429,6 +430,7 @@ private: const std::set& subroutines; const ProgramCode& program_code; const u32 main_offset; + Maxwell3D::Regs::ShaderStage stage; ShaderWriter shader; ShaderWriter declarations; @@ -443,10 +445,11 @@ std::string GetCommonDeclarations() { return "bool exec_shader();"; } -boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset) { +boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage) { try { auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); - GLSLGenerator generator(subroutines, program_code, main_offset); + GLSLGenerator generator(subroutines, program_code, main_offset, stage); return generator.GetShaderCode(); } catch (const DecompileFail& exception) { LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what()); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 061dd61024..2f4047d87b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -7,14 +7,18 @@ #include #include #include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_gen.h" namespace GLShader { namespace Decompiler { +using Tegra::Engines::Maxwell3D; + std::string GetCommonDeclarations(); -boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset); +boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage); } // namespace Decompiler } // namespace GLShader From 5a47832221fb03a50af2583ec311114871ea1dd1 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 9 Apr 2018 22:09:23 -0400 Subject: [PATCH 21/27] shader_bytecode: Add SubOp decoding. --- src/video_core/engines/shader_bytecode.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6660742ccf..477d01f94e 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -265,6 +265,15 @@ enum class Pred : u64 { NeverExecute = 0xf, }; +enum class SubOp : u64 { + Cos = 0x0, + Sin = 0x1, + Ex2 = 0x2, + Lg2 = 0x3, + Rcp = 0x4, + Rsq = 0x5, +}; + #pragma pack(1) union Instruction { Instruction& operator=(const Instruction& instr) { @@ -276,6 +285,7 @@ union Instruction { BitField<0, 8, Register> gpr1; BitField<8, 8, Register> gpr2; BitField<16, 4, Pred> pred; + BitField<20, 7, SubOp> sub_op; BitField<39, 8, Register> gpr3; BitField<45, 1, u64> nb; BitField<46, 1, u64> aa; From 7639667562ae155d27b5f5fcec9a54b0c17c1682 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 9 Apr 2018 22:10:17 -0400 Subject: [PATCH 22/27] shader_bytecode: Add FSETP and KIL to GetInfo. --- src/video_core/engines/shader_bytecode.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 477d01f94e..a4d02e5729 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -222,7 +222,10 @@ union OpCode { info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"}; info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"}; info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"}; + info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"}; + info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"}; info_table[Id::EXIT] = {Type::Trivial, "exit"}; + info_table[Id::KIL] = {Type::Flow, "kil"}; return info_table; } From 86135864da9bbbd5906e96862994bd24f3cd4da5 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 9 Apr 2018 23:39:44 -0400 Subject: [PATCH 23/27] gl_shader_decompiler: Implement negate, abs, etc. and lots of cleanup. --- src/video_core/engines/shader_bytecode.h | 59 +++++++++----- .../renderer_opengl/gl_shader_decompiler.cpp | 81 ++++++++++++++----- 2 files changed, 98 insertions(+), 42 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index a4d02e5729..dbcd237552 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -56,15 +56,18 @@ union Attribute { Attribute_0 = 8, }; - constexpr Index GetIndex() const { - return index; - } + union { + BitField<22, 2, u64> element; + BitField<24, 6, Index> index; + BitField<47, 3, u64> size; + } fmt20; + + union { + BitField<30, 2, u64> element; + BitField<32, 6, Index> index; + } fmt28; -public: - BitField<24, 6, Index> index; - BitField<22, 2, u64> element; BitField<39, 8, u64> reg; - BitField<47, 3, u64> size; u64 value; }; @@ -104,6 +107,7 @@ union OpCode { enum class Type { Trivial, Arithmetic, + Ffma, Flow, Memory, Unknown, @@ -210,12 +214,11 @@ union OpCode { info_table[Id::TEXS] = {Type::Memory, "texs"}; info_table[Id::LD_A] = {Type::Memory, "ld_a"}; info_table[Id::ST_A] = {Type::Memory, "st_a"}; - info_table[Id::IPA] = {Type::Arithmetic, "ipa"}; info_table[Id::MUFU] = {Type::Arithmetic, "mufu"}; - info_table[Id::FFMA_IMM] = {Type::Arithmetic, "ffma_imm"}; - info_table[Id::FFMA_CR] = {Type::Arithmetic, "ffma_cr"}; - info_table[Id::FFMA_RC] = {Type::Arithmetic, "ffma_rc"}; - info_table[Id::FFMA_RR] = {Type::Arithmetic, "ffma_rr"}; + info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"}; + info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"}; + info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"}; + info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"}; info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"}; info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"}; info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"}; @@ -225,6 +228,7 @@ union OpCode { info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"}; info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"}; info_table[Id::EXIT] = {Type::Trivial, "exit"}; + info_table[Id::IPA] = {Type::Trivial, "ipa"}; info_table[Id::KIL] = {Type::Flow, "kil"}; return info_table; } @@ -285,16 +289,31 @@ union Instruction { } OpCode opcode; - BitField<0, 8, Register> gpr1; - BitField<8, 8, Register> gpr2; + BitField<0, 8, Register> gpr0; + BitField<8, 8, Register> gpr8; BitField<16, 4, Pred> pred; + BitField<20, 8, Register> gpr20; BitField<20, 7, SubOp> sub_op; - BitField<39, 8, Register> gpr3; - BitField<45, 1, u64> nb; - BitField<46, 1, u64> aa; - BitField<48, 1, u64> na; - BitField<49, 1, u64> ab; - BitField<50, 1, u64> ad; + BitField<28, 8, Register> gpr28; + BitField<36, 13, u64> imm36; + BitField<39, 8, Register> gpr39; + + union { + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + BitField<50, 1, u64> abs_d; + } alu; + + union { + BitField<48, 1, u64> negate_b; + BitField<49, 1, u64> negate_c; + } ffma; + + BitField<60, 1, u64> is_b_gpr; + BitField<59, 1, u64> is_c_gpr; + Attribute attribute; Uniform uniform; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 704b24307d..792b4b12e7 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -17,6 +17,7 @@ using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::SubOp; using Tegra::Shader::Uniform; constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; @@ -235,27 +236,34 @@ private: switch (OpCode::GetInfo(instr.opcode).type) { case OpCode::Type::Arithmetic: { - ASSERT(!instr.nb); - ASSERT(!instr.aa); - ASSERT(!instr.na); - ASSERT(!instr.ab); - ASSERT(!instr.ad); + ASSERT(!instr.alu.abs_d, "unimplemented"); - std::string gpr1 = GetRegister(instr.gpr1); - std::string gpr2 = GetRegister(instr.gpr2); - std::string uniform = GetUniform(instr.uniform); + std::string dest = GetRegister(instr.gpr0); + std::string op_a = instr.alu.negate_a ? "-" : ""; + op_a += GetRegister(instr.gpr8); + if (instr.alu.abs_a) { + op_a = "abs(" + op_a + ")"; + } + + std::string op_b = instr.alu.negate_b ? "-" : ""; + if (instr.is_b_gpr) { + op_b += GetRegister(instr.gpr20); + } else { + op_b += GetUniform(instr.uniform); + } + if (instr.alu.abs_b) { + op_b = "abs(" + op_b + ")"; + } switch (instr.opcode.EffectiveOpCode()) { - case OpCode::Id::FMUL_C: { - SetDest(0, gpr1, gpr2 + " * " + uniform, 1, 1); + case OpCode::Id::FMUL_C: + case OpCode::Id::FMUL_R: { + SetDest(0, dest, op_a + " * " + op_b, 1, 1); break; } - case OpCode::Id::FADD_C: { - SetDest(0, gpr1, gpr2 + " + " + uniform, 1, 1); - break; - } - case OpCode::Id::FFMA_CR: { - SetDest(0, gpr1, gpr2 + " * " + uniform + " + " + GetRegister(instr.gpr3), 1, 1); + case OpCode::Id::FADD_C: + case OpCode::Id::FADD_R: { + SetDest(0, dest, op_a + " + " + op_b, 1, 1); break; } default: { @@ -268,19 +276,48 @@ private: } break; } - case OpCode::Type::Memory: { - ASSERT(instr.attribute.size == 0); + case OpCode::Type::Ffma: { + ASSERT_MSG(!instr.ffma.negate_b, "untested"); + ASSERT_MSG(!instr.ffma.negate_c, "untested"); - std::string gpr1 = GetRegister(instr.gpr1); - const Attribute::Index attribute = instr.attribute.GetIndex(); + std::string dest = GetRegister(instr.gpr0); + std::string op_a = GetRegister(instr.gpr8); + + std::string op_b = instr.ffma.negate_b ? "-" : ""; + op_b += GetUniform(instr.uniform); + + std::string op_c = instr.ffma.negate_c ? "-" : ""; + op_c += GetRegister(instr.gpr39); + + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::FFMA_CR: { + SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); + break; + } + + default: { + LOG_ERROR(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", + (int)instr.opcode.EffectiveOpCode(), OpCode::GetInfo(instr.opcode).name, + instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + break; + } + case OpCode::Type::Memory: { + std::string gpr0 = GetRegister(instr.gpr0); + const Attribute::Index attribute = instr.attribute.fmt20.index; switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::LD_A: { - SetDest(instr.attribute.element, gpr1, GetInputAttribute(attribute), 1, 4); + ASSERT(instr.attribute.fmt20.size == 0); + SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4); break; } case OpCode::Id::ST_A: { - SetDest(instr.attribute.element, GetOutputAttribute(attribute), gpr1, 4, 1); + ASSERT(instr.attribute.fmt20.size == 0); + SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1); break; } default: { From 0d408b965b971c2b8df9a6e4c1cd1d7a8e9dc5d1 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 14 Apr 2018 14:09:32 -0400 Subject: [PATCH 24/27] shaders: Fix GCC and clang build issues. --- src/video_core/engines/shader_bytecode.h | 6 +++--- src/video_core/renderer_opengl/gl_resource_manager.h | 2 +- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index dbcd237552..e285d097d5 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -12,7 +12,7 @@ namespace Tegra { namespace Shader { struct Register { - constexpr Register() = default; + Register() = default; constexpr Register(u64 value) : value(value) {} @@ -47,7 +47,7 @@ private: }; union Attribute { - constexpr Attribute() = default; + Attribute() = default; constexpr Attribute(u64 value) : value(value) {} @@ -118,7 +118,7 @@ union OpCode { std::string name; }; - constexpr OpCode() = default; + OpCode() = default; constexpr OpCode(Id value) : value(static_cast(value)) {} diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 557f73a513..2f0e7ac1a8 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -131,7 +131,7 @@ public: } template - void Create(bool separable_program = false, T... shaders) { + void Create(bool separable_program, T... shaders) { if (handle != 0) return; handle = GLShader::LoadProgram(separable_program, shaders...); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 792b4b12e7..96d50dd9eb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -236,7 +236,7 @@ private: switch (OpCode::GetInfo(instr.opcode).type) { case OpCode::Type::Arithmetic: { - ASSERT(!instr.alu.abs_d, "unimplemented"); + ASSERT(!instr.alu.abs_d); std::string dest = GetRegister(instr.gpr0); std::string op_a = instr.alu.negate_a ? "-" : ""; From eabeedf6af1e5277074ca460fa48390195b21329 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 14 Apr 2018 14:42:47 -0400 Subject: [PATCH 25/27] gl_shader_decompiler: Cleanup log statements. --- .../renderer_opengl/gl_shader_decompiler.cpp | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 96d50dd9eb..6251a4be29 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -158,7 +158,7 @@ private: return "input_attribute_" + std::to_string(index); } - LOG_ERROR(HW_GPU, "Unhandled input attribute: 0x%02x", index); + LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index); UNREACHABLE(); } @@ -175,7 +175,7 @@ private: return "output_attribute_" + std::to_string(index); } - LOG_ERROR(HW_GPU, "Unhandled output attribute: 0x%02x", index); + LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index); UNREACHABLE(); } } @@ -267,9 +267,9 @@ private: break; } default: { - LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.EffectiveOpCode(), OpCode::GetInfo(instr.opcode).name, - instr.hex); + LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", + static_cast(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); throw DecompileFail("Unhandled instruction"); break; } @@ -296,9 +296,9 @@ private: } default: { - LOG_ERROR(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.EffectiveOpCode(), OpCode::GetInfo(instr.opcode).name, - instr.hex); + LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", + static_cast(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); throw DecompileFail("Unhandled instruction"); break; } @@ -321,9 +321,9 @@ private: break; } default: { - LOG_ERROR(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.EffectiveOpCode(), OpCode::GetInfo(instr.opcode).name, - instr.hex); + LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", + static_cast(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); throw DecompileFail("Unhandled instruction"); break; } @@ -340,10 +340,10 @@ private: } default: { - LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.EffectiveOpCode(), - OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); - // throw DecompileFail("Unhandled instruction"); + LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", + static_cast(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + throw DecompileFail("Unhandled instruction"); break; } } From e6224fec275a725bfbb261003c9db44a3da475df Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 14 Apr 2018 15:57:58 -0400 Subject: [PATCH 26/27] shaders: Address PR review feedback. --- src/video_core/engines/shader_bytecode.h | 2 +- .../renderer_opengl/gl_shader_decompiler.cpp | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e285d097d5..98af381dfa 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -247,6 +247,7 @@ static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size"); namespace std { +// TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330. template <> struct make_unsigned { using type = Tegra::Shader::Attribute; @@ -281,7 +282,6 @@ enum class SubOp : u64 { Rsq = 0x5, }; -#pragma pack(1) union Instruction { Instruction& operator=(const Instruction& instr) { hex = instr.hex; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6251a4be29..1290fa4cd5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -448,17 +448,19 @@ private: for (const auto& index : declr_input_attribute) { // TODO(bunnei): Use proper number of elements for these - declarations.AddLine( - "layout(location = " + std::to_string(static_cast(index) - 8) + ") in vec4 " + - GetInputAttribute(index) + ";"); + declarations.AddLine("layout(location = " + + std::to_string(static_cast(index) - + static_cast(Attribute::Index::Attribute_0)) + + ") in vec4 " + GetInputAttribute(index) + ";"); } declarations.AddLine(""); for (const auto& index : declr_output_attribute) { // TODO(bunnei): Use proper number of elements for these - declarations.AddLine( - "layout(location = " + std::to_string(static_cast(index) - 8) + ") out vec4 " + - GetOutputAttribute(index) + ";"); + declarations.AddLine("layout(location = " + + std::to_string(static_cast(index) - + static_cast(Attribute::Index::Attribute_0)) + + ") out vec4 " + GetOutputAttribute(index) + ";"); } declarations.AddLine(""); } From 1b41b875dcd24c662b947731f48f4d1c7131fa0b Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 14 Apr 2018 18:50:06 -0400 Subject: [PATCH 27/27] shaders: Add NumTextureSamplers const, remove unused #pragma. --- src/video_core/engines/shader_bytecode.h | 2 -- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_opengl/gl_shader_manager.cpp | 2 +- src/video_core/renderer_opengl/gl_shader_manager.h | 3 +++ 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 98af381dfa..eff0c35a15 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -323,7 +323,5 @@ static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); static_assert(std::is_standard_layout::value, "Structure does not have standard layout"); -#pragma pack() - } // namespace Shader } // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 32b897eb29..71c21c69b2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -128,7 +128,7 @@ private: OGLVertexArray hw_vao; std::array hw_vao_enabled_attributes; - std::array texture_samplers; + std::array texture_samplers; static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; std::unique_ptr vertex_buffer; OGLBuffer uniform_buffer; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 67f2be0563..7fceedce86 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -38,7 +38,7 @@ void SetShaderSamplerBindings(GLuint shader) { cur_state.Apply(); // Set the texture samplers to correspond to different texture units - for (u32 texture = 0; texture < 32; ++texture) { + for (u32 texture = 0; texture < NumTextureSamplers; ++texture) { // Set the texture samplers to correspond to different texture units std::string uniform_name = "tex[" + std::to_string(texture) + "]"; GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str()); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index b5a7b2a18b..5c8560cf54 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -14,6 +14,9 @@ namespace GLShader { +/// Number of OpenGL texture samplers that can be used in the fragment shader +static constexpr size_t NumTextureSamplers = 32; + using Tegra::Engines::Maxwell3D; namespace Impl {