From 34357b110c3f04f6b98ca586fd776b0df569b6d8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 26 Jun 2019 12:19:43 -0400 Subject: [PATCH] shader_ir: Correct parsing of scheduling instructions and correct sizing --- src/video_core/shader/control_flow.cpp | 41 ++++++++++++++++++-------- src/video_core/shader/decode.cpp | 2 +- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index a26de67958..1775dfd810 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -20,10 +20,10 @@ using Tegra::Shader::OpCode; constexpr s32 unassigned_branch = -2; -/*** +/** * 'ControlStack' represents a static stack of control jumps such as SSY and PBK * stacks in Maxwell. - ***/ + **/ struct ControlStack { static constexpr std::size_t stack_fixed_size = 20; std::array stack{}; @@ -105,9 +105,11 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) - : program_code{program_code}, program_size{program_size} {} + explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, + const u32 start) + : program_code{program_code}, program_size{program_size}, start{start} {} + u32 start{}; std::vector block_info{}; std::list inspect_queries{}; std::list queries{}; @@ -120,7 +122,7 @@ struct CFGRebuildState { const std::size_t program_size; }; -enum class BlockCollision : u32 { None = 0, Found = 1, Inside = 2 }; +enum class BlockCollision : u32 { None, Found, Inside }; std::pair::iterator> TryGetBlock(CFGRebuildState& state, u32 address) { @@ -155,15 +157,26 @@ Pred GetPredicate(u32 index, bool negated) { return static_cast(index + (negated ? 8 : 0)); } +/** + * Returns whether the instruction at the specified offset is a 'sched' instruction. + * Sched instructions always appear before a sequence of 3 instructions. + */ +constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { + constexpr u32 SchedPeriod = 4; + u32 absolute_offset = offset - main_offset; + + return (absolute_offset % SchedPeriod) == 0; +} + enum class ParseResult : u32 { - ControlCaught = 0, - BlockEnd = 1, - AbnormalFlow = 2, + ControlCaught, + BlockEnd, + AbnormalFlow, }; ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) { u32 offset = static_cast(address); - const u32 end_address = static_cast(state.program_size / 8U); + const u32 end_address = static_cast(state.program_size / sizeof(Instruction)); const auto insert_label = ([](CFGRebuildState& state, u32 address) { auto pair = state.labels.emplace(address); @@ -183,6 +196,10 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.ignore = true; break; } + if (IsSchedInstruction(offset, state.start)) { + offset++; + continue; + } const Instruction instr = {state.program_code[offset]}; const auto opcode = OpCode::Decode(instr); if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { @@ -447,11 +464,11 @@ bool TryQuery(CFGRebuildState& state) { std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address) { - CFGRebuildState state{program_code, program_size}; + CFGRebuildState state{program_code, program_size, start_address}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); - state.inspect_queries.push_back(start_address); + state.inspect_queries.push_back(state.start); while (!state.inspect_queries.empty()) { if (!TryInspectAddress(state)) { return {}; @@ -459,7 +476,7 @@ std::optional ScanFlow(const ProgramCode& program_code, u } // Decompile Stacks Query start_query{}; - start_query.address = start_address; + start_query.address = state.start; state.queries.push_back(start_query); bool decompiled = true; while (!state.queries.empty()) { diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index b0bd6630fc..07a154d770 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); disable_flow_stack = false; - const auto info = ScanFlow(program_code, program_size, main_offset); + const auto info = ScanFlow(program_code, MAX_PROGRAM_LENGTH * sizeof(u64), main_offset); if (info) { const auto& shader_info = *info; coverage_begin = shader_info.start;