From 772cfc1db6374b793a4a717466d3efc4effee12b Mon Sep 17 00:00:00 2001 From: Adam Duke Date: Tue, 25 Jan 2022 17:04:16 -0500 Subject: [PATCH] allow dump_syms to operate on contents in memory dump_syms assumes it is operating on a file and can access a compliant file system. This change allows dump_syms to operate on the contents of a file that has already been read into memory. This is useful in a server context where the file does not exist on the local file system. Change-Id: Id63f115c2df287083d548dadd5ac487f97bde057 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3327644 Reviewed-by: Mark Mentovai --- src/client/mac/handler/minidump_generator.cc | 2 +- src/common/mac/dump_syms.cc | 48 +++--- src/common/mac/dump_syms.h | 36 +++-- src/common/mac/file_id.cc | 44 ++---- src/common/mac/file_id.h | 23 ++- src/common/mac/macho_id.cc | 148 +------------------ src/common/mac/macho_id.h | 26 +--- 7 files changed, 97 insertions(+), 230 deletions(-) diff --git a/src/client/mac/handler/minidump_generator.cc b/src/client/mac/handler/minidump_generator.cc index e0351c4b..d6b0561d 100644 --- a/src/client/mac/handler/minidump_generator.cc +++ b/src/client/mac/handler/minidump_generator.cc @@ -1449,7 +1449,7 @@ bool MinidumpGenerator::WriteCVRecord(MDRawModule* module, int cpu_type, unsigned char identifier[16]; bool result = false; if (in_memory) { - MacFileUtilities::MachoID macho(module_path, + MacFileUtilities::MachoID macho( reinterpret_cast(module->base_of_image), static_cast(module->size_of_image)); result = macho.UUIDCommand(cpu_type, CPU_SUBTYPE_MULTIPLE, identifier); diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc index 6df32bc1..bd69218d 100644 --- a/src/common/mac/dump_syms.cc +++ b/src/common/mac/dump_syms.cc @@ -128,10 +128,11 @@ bool DumpSymbols::Read(const string& filename) { return false; } - input_pathname_ = filename; + from_disk_ = true; // Does this filename refer to a dSYM bundle? - string contents_path = input_pathname_ + "/Contents/Resources/DWARF"; + string contents_path = filename + "/Contents/Resources/DWARF"; + string object_filename; if (S_ISDIR(st.st_mode) && access(contents_path.c_str(), F_OK) == 0) { // If there's one file under Contents/Resources/DWARF then use that, @@ -139,30 +140,31 @@ bool DumpSymbols::Read(const string& filename) { const vector entries = list_directory(contents_path); if (entries.size() == 0) { fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n", - input_pathname_.c_str()); + filename.c_str()); return false; } if (entries.size() > 1) { fprintf(stderr, "Too many DWARF files in bundle: %s\n", - input_pathname_.c_str()); + filename.c_str()); return false; } - object_filename_ = entries[0]; + object_filename = entries[0]; } else { - object_filename_ = input_pathname_; + object_filename = filename; } // Read the file's contents into memory. bool read_ok = true; string error; - if (stat(object_filename_.c_str(), &st) != -1) { - FILE* f = fopen(object_filename_.c_str(), "rb"); + scoped_array contents; + off_t total = 0; + if (stat(object_filename.c_str(), &st) != -1) { + FILE* f = fopen(object_filename.c_str(), "rb"); if (f) { - contents_.reset(new uint8_t[st.st_size]); - off_t total = 0; + contents.reset(new uint8_t[st.st_size]); while (total < st.st_size && !feof(f)) { - size_t read = fread(&contents_[0] + total, 1, st.st_size - total, f); + size_t read = fread(&contents[0] + total, 1, st.st_size - total, f); if (read == 0) { if (ferror(f)) { read_ok = false; @@ -180,16 +182,22 @@ bool DumpSymbols::Read(const string& filename) { if (!read_ok) { fprintf(stderr, "Error reading object file: %s: %s\n", - object_filename_.c_str(), - error.c_str()); + object_filename.c_str(), error.c_str()); return false; } + return ReadData(contents.release(), total, object_filename); +} + +bool DumpSymbols::ReadData(uint8_t* contents, size_t size, + const std::string& filename) { + contents_.reset(contents); + size_ = size; + object_filename_ = filename; // Get the list of object files present in the file. FatReader::Reporter fat_reporter(object_filename_); FatReader fat_reader(&fat_reporter); - if (!fat_reader.Read(&contents_[0], - st.st_size)) { + if (!fat_reader.Read(contents_.get(), size)) { return false; } @@ -283,7 +291,13 @@ SuperFatArch* DumpSymbols::FindBestMatchForArchitecture( } string DumpSymbols::Identifier() { - FileID file_id(object_filename_.c_str()); + scoped_ptr file_id; + + if (from_disk_) { + file_id.reset(new FileID(object_filename_.c_str())); + } else { + file_id.reset(new FileID(contents_.get(), size_)); + } unsigned char identifier_bytes[16]; scoped_ptr module; if (!selected_object_file_) { @@ -292,7 +306,7 @@ string DumpSymbols::Identifier() { } cpu_type_t cpu_type = selected_object_file_->cputype; cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype; - if (!file_id.MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) { + if (!file_id->MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) { fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n", object_filename_.c_str()); return ""; diff --git a/src/common/mac/dump_syms.h b/src/common/mac/dump_syms.h index 4365e294..34ba14bc 100644 --- a/src/common/mac/dump_syms.h +++ b/src/common/mac/dump_syms.h @@ -57,21 +57,30 @@ class DumpSymbols { DumpSymbols(SymbolData symbol_data, bool handle_inter_cu_refs) : symbol_data_(symbol_data), handle_inter_cu_refs_(handle_inter_cu_refs), - input_pathname_(), object_filename_(), contents_(), + size_(0), + from_disk_(false), object_files_(), selected_object_file_(), - selected_object_name_() { } + selected_object_name_() {} ~DumpSymbols() { } // Prepare to read debugging information from |filename|. |filename| may be - // the name of a universal binary, a Mach-O file, or a dSYM bundle - // containing either of the above. On success, return true; if there is a - // problem reading |filename|, report it and return false. + // the name of a fat file, a Mach-O file, or a dSYM bundle containing either + // of the above. On success, return true; if there is a problem reading + // |filename|, report it and return false. bool Read(const std::string& filename); + // Prepare to read debugging information from |contents|. |contents| is + // expected to be the data obtained from reading a fat file, or a Mach-O file. + // |filename| is used to determine the object filename in the generated + // output; there will not be an attempt to open this file as the data + // is already expected to be in memory. On success, return true; if there is a + // problem reading |contents|, report it and return false. + bool ReadData(uint8_t* contents, size_t size, const std::string& filename); + // If this dumper's file includes an object file for |cpu_type| and // |cpu_subtype|, then select that object file for dumping, and return // true. Otherwise, return false, and leave this dumper's selected @@ -162,19 +171,22 @@ class DumpSymbols { // Whether to handle references between compilation units. const bool handle_inter_cu_refs_; - // The name of the file or bundle whose symbols this will dump. - // This is the path given to Read, for use in error messages. - std::string input_pathname_; - // The name of the file this DumpSymbols will actually read debugging - // information from. Normally, this is the same as input_pathname_, but if - // filename refers to a dSYM bundle, then this is the resource file - // within that bundle. + // information from. If the filename passed to Read refers to a dSYM bundle, + // then this is the resource file within that bundle. std::string object_filename_; // The complete contents of object_filename_, mapped into memory. scoped_array contents_; + // The size of contents_. + size_t size_; + + // Indicates which entry point to DumpSymbols was used, i.e. Read vs ReadData. + // This is used to indicate that downstream code paths can/should also read + // from disk or not. + bool from_disk_; + // A vector of SuperFatArch structures describing the object files // object_filename_ contains. If object_filename_ refers to a fat binary, // this may have more than one element; if it refers to a Mach-O file, this diff --git a/src/common/mac/file_id.cc b/src/common/mac/file_id.cc index 4661d5d6..504e8202 100644 --- a/src/common/mac/file_id.cc +++ b/src/common/mac/file_id.cc @@ -33,53 +33,41 @@ // // Author: Dan Waylonis +#include "common/mac/file_id.h" + #include #include #include -#include -#include "common/mac/file_id.h" #include "common/mac/macho_id.h" +#include "common/scoped_ptr.h" using MacFileUtilities::MachoID; namespace google_breakpad { -FileID::FileID(const char *path) { +// Constructs a FileID given a path to a file +FileID::FileID(const char* path) : memory_(nullptr), size_(0) { snprintf(path_, sizeof(path_), "%s", path); } -bool FileID::FileIdentifier(unsigned char identifier[16]) { - int fd = open(path_, O_RDONLY); - if (fd == -1) - return false; - - MD5Context md5; - MD5Init(&md5); - - // Read 4k x 2 bytes at a time. This is faster than just 4k bytes, but - // doesn't seem to be an unreasonable size for the stack. - unsigned char buffer[4096 * 2]; - size_t buffer_size = sizeof(buffer); - while ((buffer_size = read(fd, buffer, buffer_size) > 0)) { - MD5Update(&md5, buffer, static_cast(buffer_size)); - } - - close(fd); - MD5Final(identifier, &md5); - - return true; -} +// Constructs a FileID given the contents of a file and its size +FileID::FileID(void* memory, size_t size) + : path_(), memory_(memory), size_(size) {} bool FileID::MachoIdentifier(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) { - MachoID macho(path_); - - if (macho.UUIDCommand(cpu_type, cpu_subtype, identifier)) + scoped_ptr macho; + if (memory_) { + macho.reset(new MachoID(memory_, size_)); + } else { + macho.reset(new MachoID(path_)); + } + if (macho->UUIDCommand(cpu_type, cpu_subtype, identifier)) return true; - return macho.MD5(cpu_type, cpu_subtype, identifier); + return macho->MD5(cpu_type, cpu_subtype, identifier); } // static diff --git a/src/common/mac/file_id.h b/src/common/mac/file_id.h index 5d60e84c..430d41d5 100644 --- a/src/common/mac/file_id.h +++ b/src/common/mac/file_id.h @@ -36,19 +36,18 @@ #include #include +#include namespace google_breakpad { class FileID { public: - FileID(const char *path); - ~FileID() {} + // Constructs a FileID given a path to a file + FileID(const char* path); - // Load the identifier for the file path specified in the constructor into - // |identifier|. Return false if the identifier could not be created for the - // file. - // The current implementation will return the MD5 hash of the file's bytes. - bool FileIdentifier(unsigned char identifier[16]); + // Constructs a FileID given the contents of a file and its size. + FileID(void* memory, size_t size); + ~FileID() {} // Treat the file as a mach-o file that will contain one or more archicture. // Accepted values for |cpu_type| and |cpu_subtype| (e.g., CPU_TYPE_X86 or @@ -74,6 +73,16 @@ class FileID { private: // Storage for the path specified char path_[PATH_MAX]; + + // Storage for contents of a file if this instance is used to operate on in + // memory file data rather than directly from a filesystem. If memory_ is + // null, the file represented by path_ will be opened/read. If memory_ is + // non-null, it is assumed to contain valid data, and no file operations will + // occur. + void* memory_; + + // Size of memory_ + size_t size_; }; } // namespace google_breakpad diff --git a/src/common/mac/macho_id.cc b/src/common/mac/macho_id.cc index 3cf1d4b5..9b7ca24f 100644 --- a/src/common/mac/macho_id.cc +++ b/src/common/mac/macho_id.cc @@ -37,11 +37,7 @@ #include #include #include -#include #include -#include -#include -#include #include "common/mac/macho_id.h" #include "common/mac/macho_walker.h" @@ -54,73 +50,18 @@ using google_breakpad::MD5Update; using google_breakpad::MD5Final; MachoID::MachoID(const char* path) - : memory_(0), - memory_size_(0), - crc_(0), - md5_context_(), - update_function_(NULL) { + : memory_(0), memory_size_(0), md5_context_(), update_function_(NULL) { snprintf(path_, sizeof(path_), "%s", path); } -MachoID::MachoID(const char* path, void* memory, size_t size) - : memory_(memory), - memory_size_(size), - crc_(0), - md5_context_(), - update_function_(NULL) { - snprintf(path_, sizeof(path_), "%s", path); -} +MachoID::MachoID(void* memory, size_t size) + : path_(), + memory_(memory), + memory_size_(size), + md5_context_(), + update_function_(NULL) {} -MachoID::~MachoID() { -} - -// The CRC info is from http://en.wikipedia.org/wiki/Adler-32 -// With optimizations from http://www.zlib.net/ - -// The largest prime smaller than 65536 -#define MOD_ADLER 65521 -// MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1 -#define MAX_BLOCK 5552 - -void MachoID::UpdateCRC(unsigned char* bytes, size_t size) { -// Unrolled loops for summing -#define DO1(buf,i) {sum1 += (buf)[i]; sum2 += sum1;} -#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); -#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); -#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); -#define DO16(buf) DO8(buf,0); DO8(buf,8); - // Split up the crc - uint32_t sum1 = crc_ & 0xFFFF; - uint32_t sum2 = (crc_ >> 16) & 0xFFFF; - - // Do large blocks - while (size >= MAX_BLOCK) { - size -= MAX_BLOCK; - int block_count = MAX_BLOCK / 16; - do { - DO16(bytes); - bytes += 16; - } while (--block_count); - sum1 %= MOD_ADLER; - sum2 %= MOD_ADLER; - } - - // Do remaining bytes - if (size) { - while (size >= 16) { - size -= 16; - DO16(bytes); - bytes += 16; - } - while (size--) { - sum1 += *bytes++; - sum2 += sum1; - } - sum1 %= MOD_ADLER; - sum2 %= MOD_ADLER; - crc_ = (sum2 << 16) | sum1; - } -} +MachoID::~MachoID() {} void MachoID::UpdateMD5(unsigned char* bytes, size_t size) { MD5Update(&md5_context_, bytes, static_cast(size)); @@ -169,59 +110,6 @@ bool MachoID::UUIDCommand(cpu_type_t cpu_type, return false; } -bool MachoID::IDCommand(cpu_type_t cpu_type, - cpu_subtype_t cpu_subtype, - unsigned char identifier[16]) { - struct dylib_command dylib_cmd; - dylib_cmd.cmd = 0; - if (!WalkHeader(cpu_type, cpu_subtype, IDWalkerCB, &dylib_cmd)) - return false; - - // If we found the command, we'll have initialized the dylib_command - // structure - if (dylib_cmd.cmd == LC_ID_DYLIB) { - // Take the hashed filename, version, and compatability version bytes - // to form the first 12 bytes, pad the rest with zeros - - // create a crude hash of the filename to generate the first 4 bytes - identifier[0] = 0; - identifier[1] = 0; - identifier[2] = 0; - identifier[3] = 0; - - for (int j = 0, i = (int)strlen(path_)-1; i>=0 && path_[i]!='/'; ++j, --i) { - identifier[j%4] += path_[i]; - } - - identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF; - identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF; - identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF; - identifier[7] = dylib_cmd.dylib.current_version & 0xFF; - identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF; - identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF; - identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF; - identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF; - identifier[12] = (cpu_type >> 24) & 0xFF; - identifier[13] = (cpu_type >> 16) & 0xFF; - identifier[14] = (cpu_type >> 8) & 0xFF; - identifier[15] = cpu_type & 0xFF; - - return true; - } - - return false; -} - -uint32_t MachoID::Adler32(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) { - update_function_ = &MachoID::UpdateCRC; - crc_ = 0; - - if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this)) - return 0; - - return crc_; -} - bool MachoID::MD5(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) { update_function_ = &MachoID::UpdateMD5; @@ -346,24 +234,4 @@ bool MachoID::UUIDWalkerCB(MachoWalker* walker, load_command* cmd, off_t offset, // Continue processing return true; } - -// static -bool MachoID::IDWalkerCB(MachoWalker* walker, load_command* cmd, off_t offset, - bool swap, void* context) { - if (cmd->cmd == LC_ID_DYLIB) { - struct dylib_command* dylib_cmd = (struct dylib_command*)context; - - if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset)) - return false; - - if (swap) - breakpad_swap_dylib_command(dylib_cmd); - - return false; - } - - // Continue processing - return true; -} - } // namespace MacFileUtilities diff --git a/src/common/mac/macho_id.h b/src/common/mac/macho_id.h index e8874c37..0262697d 100644 --- a/src/common/mac/macho_id.h +++ b/src/common/mac/macho_id.h @@ -46,7 +46,7 @@ namespace MacFileUtilities { class MachoID { public: MachoID(const char* path); - MachoID(const char* path, void* memory, size_t size); + MachoID(void* memory, size_t size); ~MachoID(); // For the given |cpu_type| and |cpu_subtype|, return a UUID from the LC_UUID @@ -56,19 +56,6 @@ class MachoID { cpu_subtype_t cpu_subtype, unsigned char identifier[16]); - // For the given |cpu_type| and |cpu_subtype|, return a UUID from the - // LC_ID_DYLIB command. - // Return false if there isn't a LC_ID_DYLIB command. - bool IDCommand(cpu_type_t cpu_type, - cpu_subtype_t cpu_subtype, - unsigned char identifier[16]); - - // For the given |cpu_type| and |cpu_subtype|, return the Adler32 CRC for the - // mach-o data segment(s). - // Return 0 on error (e.g., if the file is not a mach-o file) - uint32_t Adler32(cpu_type_t cpu_type, - cpu_subtype_t cpu_subtype); - // For the given |cpu_type|, and |cpu_subtype| return the MD5 for the mach-o // data segment(s). // Return true on success, false otherwise @@ -80,10 +67,6 @@ class MachoID { // Signature of class member function to be called with data read from file typedef void (MachoID::*UpdateFunction)(unsigned char* bytes, size_t size); - // Update the CRC value by examining |size| |bytes| and applying the algorithm - // to each byte. - void UpdateCRC(unsigned char* bytes, size_t size); - // Update the MD5 value by examining |size| |bytes| and applying the algorithm // to each byte. void UpdateMD5(unsigned char* bytes, size_t size); @@ -103,10 +86,6 @@ class MachoID { static bool UUIDWalkerCB(MachoWalker* walker, load_command* cmd, off_t offset, bool swap, void* context); - // The callback from the MachoWalker for LC_ID_DYLIB - static bool IDWalkerCB(MachoWalker* walker, load_command* cmd, off_t offset, - bool swap, void* context); - // File path char path_[PATH_MAX]; @@ -116,9 +95,6 @@ class MachoID { // Size of the memory region size_t memory_size_; - // The current crc value - uint32_t crc_; - // The MD5 context google_breakpad::MD5Context md5_context_;