From f37e39deb9abe88b4874ebc2889ed52e02ed9c13 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 14 Aug 2014 14:30:38 +0200 Subject: [PATCH] Pica: Add debug utilities for dumping shaders. --- src/video_core/debug_utils/debug_utils.cpp | 205 +++++++++++++++++++++ src/video_core/debug_utils/debug_utils.h | 3 + src/video_core/pica.h | 2 +- src/video_core/vertex_shader.cpp | 18 ++ 4 files changed, 227 insertions(+), 1 deletion(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index ac895ec3a9..f41249eacc 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 // Refer to the license.txt file included. +#include #include #include @@ -55,6 +56,210 @@ void GeometryDumper::Dump() { } } +#pragma pack(1) +struct DVLBHeader { + enum : u32 { + MAGIC_WORD = 0x424C5644, // "DVLB" + }; + + u32 magic_word; + u32 num_programs; +// u32 dvle_offset_table[]; +}; +static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size"); + +struct DVLPHeader { + enum : u32 { + MAGIC_WORD = 0x504C5644, // "DVLP" + }; + + u32 magic_word; + u32 version; + u32 binary_offset; // relative to DVLP start + u32 binary_size_words; + u32 swizzle_patterns_offset; + u32 swizzle_patterns_num_entries; + u32 unk2; +}; +static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size"); + +struct DVLEHeader { + enum : u32 { + MAGIC_WORD = 0x454c5644, // "DVLE" + }; + + enum class ShaderType : u8 { + VERTEX = 0, + GEOMETRY = 1, + }; + + u32 magic_word; + u16 pad1; + ShaderType type; + u8 pad2; + u32 main_offset_words; // offset within binary blob + u32 endmain_offset_words; + u32 pad3; + u32 pad4; + u32 constant_table_offset; + u32 constant_table_size; // number of entries + u32 label_table_offset; + u32 label_table_size; + u32 output_register_table_offset; + u32 output_register_table_size; + u32 uniform_table_offset; + u32 uniform_table_size; + u32 symbol_table_offset; + u32 symbol_table_size; + +}; +static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size"); +#pragma pack() + +void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, + u32 main_offset, const Regs::VSOutputAttributes* output_attributes) +{ + // NOTE: Permanently enabling this just trashes hard disks for no reason. + // Hence, this is currently disabled. + return; + + struct StuffToWrite { + u8* pointer; + u32 size; + }; + std::vector writing_queue; + u32 write_offset = 0; + + auto QueueForWriting = [&writing_queue,&write_offset](u8* pointer, u32 size) { + writing_queue.push_back({pointer, size}); + u32 old_write_offset = write_offset; + write_offset += size; + return old_write_offset; + }; + + // First off, try to translate Pica state (one enum for output attribute type and component) + // into shbin format (separate type and component mask). + union OutputRegisterInfo { + enum Type : u64 { + POSITION = 0, + COLOR = 2, + TEXCOORD0 = 3, + TEXCOORD1 = 5, + TEXCOORD2 = 6, + }; + + BitField< 0, 64, u64> hex; + + BitField< 0, 16, Type> type; + BitField<16, 16, u64> id; + BitField<32, 4, u64> component_mask; + }; + + // This is put into a try-catch block to make sure we notice unknown configurations. + std::vector output_info_table; + for (int i = 0; i < 7; ++i) { + using OutputAttributes = Pica::Regs::VSOutputAttributes; + + // TODO: It's still unclear how the attribute components map to the register! + // Once we know that, this code probably will not make much sense anymore. + std::map > map = { + { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} }, + { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, + { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, + { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, + { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, + { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, + { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, + { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} }, + { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} }, + { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} }, + { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, + { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, + { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, + { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} } + }; + + for (const auto& semantic : std::vector{ + output_attributes[i].map_x, + output_attributes[i].map_y, + output_attributes[i].map_z, + output_attributes[i].map_w }) { + if (semantic == OutputAttributes::INVALID) + continue; + + try { + OutputRegisterInfo::Type type = map.at(semantic).first; + u32 component_mask = map.at(semantic).second; + + auto it = std::find_if(output_info_table.begin(), output_info_table.end(), + [&i, &type](const OutputRegisterInfo& info) { + return info.id == i && info.type == type; + } + ); + + if (it == output_info_table.end()) { + output_info_table.push_back({}); + output_info_table.back().type = type; + output_info_table.back().component_mask = component_mask; + output_info_table.back().id = i; + } else { + it->component_mask = it->component_mask | component_mask; + } + } catch (const std::out_of_range& oor) { + _dbg_assert_msg_(GPU, 0, "Unknown output attribute mapping"); + ERROR_LOG(GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", + (int)output_attributes[i].map_x.Value(), + (int)output_attributes[i].map_y.Value(), + (int)output_attributes[i].map_z.Value(), + (int)output_attributes[i].map_w.Value()); + } + } + } + + + struct { + DVLBHeader header; + u32 dvle_offset; + } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE + + DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD }; + DVLEHeader dvle{ DVLEHeader::MAGIC_WORD }; + + QueueForWriting((u8*)&dvlb, sizeof(dvlb)); + u32 dvlp_offset = QueueForWriting((u8*)&dvlp, sizeof(dvlp)); + dvlb.dvle_offset = QueueForWriting((u8*)&dvle, sizeof(dvle)); + + // TODO: Reduce the amount of binary code written to relevant portions + dvlp.binary_offset = write_offset - dvlp_offset; + dvlp.binary_size_words = binary_size; + QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); + + dvlp.swizzle_patterns_offset = write_offset - dvlp_offset; + dvlp.swizzle_patterns_num_entries = swizzle_size; + u32 dummy = 0; + for (int i = 0; i < swizzle_size; ++i) { + QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); + QueueForWriting((u8*)&dummy, sizeof(dummy)); + } + + dvle.main_offset_words = main_offset; + dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; + dvle.output_register_table_size = output_info_table.size(); + QueueForWriting((u8*)output_info_table.data(), output_info_table.size() * sizeof(OutputRegisterInfo)); + + // TODO: Create a label table for "main" + + + // Write data to file + static int dump_index = 0; + std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); + std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); + + for (auto& chunk : writing_queue) { + file.write((char*)chunk.pointer, chunk.size); + } +} + } // namespace } // namespace diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 9b4dce539f..bd7a0a89b6 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -35,6 +35,9 @@ private: std::vector faces; }; +void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, + u32 main_offset, const Regs::VSOutputAttributes* output_attributes); + } // namespace } // namespace diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 640830144c..fe886c16fa 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -57,7 +57,7 @@ struct Regs { INSERT_PADDING_WORDS(0x1); - union { + union VSOutputAttributes { // Maps components of output vertex attributes to semantics enum Semantic : u32 { diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 93830a96af..8df14b51f9 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -4,6 +4,7 @@ #include "pica.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" #include #include @@ -50,6 +51,11 @@ struct VertexShaderState { }; u32 call_stack[8]; // TODO: What is the maximal call stack depth? u32* call_stack_pointer; + + struct { + u32 max_offset; // maximum program counter ever reached + u32 max_opdesc_id; // maximum swizzle pattern index ever used + } debug; }; static void ProcessShaderCode(VertexShaderState& state) { @@ -57,6 +63,7 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; + state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x @@ -88,6 +95,7 @@ static void ProcessShaderCode(VertexShaderState& state) { switch (instr.opcode) { case Instruction::OpCode::ADD: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -100,6 +108,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MUL: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -113,6 +122,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); float24 dot = float24::FromFloat32(0.f); int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) @@ -130,6 +140,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal case Instruction::OpCode::RCP: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -145,6 +156,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal Square Root case Instruction::OpCode::RSQ: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -159,6 +171,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MOV: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -212,6 +225,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) const u32* main = &shader_memory[registers.vs_main_offset]; state.program_counter = (u32*)main; + state.debug.max_offset = 0; + state.debug.max_opdesc_id = 0; // Setup input register table const auto& attribute_register_map = registers.vs_input_register_map; @@ -255,6 +270,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.call_stack_pointer = &state.call_stack[0]; ProcessShaderCode(state); + DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, + state.debug.max_opdesc_id, registers.vs_main_offset, + registers.vs_output_attributes); DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),