allow dump_syms to operate on contents in memory

dump_syms assumes it is operating on a file and can access a compliant
file system. This change allows dump_syms to operate on the contents of
a file that has already been read into memory. This is useful in a
server context where the file does not exist on the local file system.

Change-Id: Id63f115c2df287083d548dadd5ac487f97bde057
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3327644
Reviewed-by: Mark Mentovai <mark@chromium.org>
This commit is contained in:
Adam Duke 2022-01-25 17:04:16 -05:00 committed by Mark Mentovai
parent 92a20b6504
commit 772cfc1db6
7 changed files with 97 additions and 230 deletions

View file

@ -1449,7 +1449,7 @@ bool MinidumpGenerator::WriteCVRecord(MDRawModule* module, int cpu_type,
unsigned char identifier[16]; unsigned char identifier[16];
bool result = false; bool result = false;
if (in_memory) { if (in_memory) {
MacFileUtilities::MachoID macho(module_path, MacFileUtilities::MachoID macho(
reinterpret_cast<void*>(module->base_of_image), reinterpret_cast<void*>(module->base_of_image),
static_cast<size_t>(module->size_of_image)); static_cast<size_t>(module->size_of_image));
result = macho.UUIDCommand(cpu_type, CPU_SUBTYPE_MULTIPLE, identifier); result = macho.UUIDCommand(cpu_type, CPU_SUBTYPE_MULTIPLE, identifier);

View file

@ -128,10 +128,11 @@ bool DumpSymbols::Read(const string& filename) {
return false; return false;
} }
input_pathname_ = filename; from_disk_ = true;
// Does this filename refer to a dSYM bundle? // Does this filename refer to a dSYM bundle?
string contents_path = input_pathname_ + "/Contents/Resources/DWARF"; string contents_path = filename + "/Contents/Resources/DWARF";
string object_filename;
if (S_ISDIR(st.st_mode) && if (S_ISDIR(st.st_mode) &&
access(contents_path.c_str(), F_OK) == 0) { access(contents_path.c_str(), F_OK) == 0) {
// If there's one file under Contents/Resources/DWARF then use that, // If there's one file under Contents/Resources/DWARF then use that,
@ -139,30 +140,31 @@ bool DumpSymbols::Read(const string& filename) {
const vector<string> entries = list_directory(contents_path); const vector<string> entries = list_directory(contents_path);
if (entries.size() == 0) { if (entries.size() == 0) {
fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n", fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n",
input_pathname_.c_str()); filename.c_str());
return false; return false;
} }
if (entries.size() > 1) { if (entries.size() > 1) {
fprintf(stderr, "Too many DWARF files in bundle: %s\n", fprintf(stderr, "Too many DWARF files in bundle: %s\n",
input_pathname_.c_str()); filename.c_str());
return false; return false;
} }
object_filename_ = entries[0]; object_filename = entries[0];
} else { } else {
object_filename_ = input_pathname_; object_filename = filename;
} }
// Read the file's contents into memory. // Read the file's contents into memory.
bool read_ok = true; bool read_ok = true;
string error; string error;
if (stat(object_filename_.c_str(), &st) != -1) { scoped_array<uint8_t> contents;
FILE* f = fopen(object_filename_.c_str(), "rb"); off_t total = 0;
if (stat(object_filename.c_str(), &st) != -1) {
FILE* f = fopen(object_filename.c_str(), "rb");
if (f) { if (f) {
contents_.reset(new uint8_t[st.st_size]); contents.reset(new uint8_t[st.st_size]);
off_t total = 0;
while (total < st.st_size && !feof(f)) { while (total < st.st_size && !feof(f)) {
size_t read = fread(&contents_[0] + total, 1, st.st_size - total, f); size_t read = fread(&contents[0] + total, 1, st.st_size - total, f);
if (read == 0) { if (read == 0) {
if (ferror(f)) { if (ferror(f)) {
read_ok = false; read_ok = false;
@ -180,16 +182,22 @@ bool DumpSymbols::Read(const string& filename) {
if (!read_ok) { if (!read_ok) {
fprintf(stderr, "Error reading object file: %s: %s\n", fprintf(stderr, "Error reading object file: %s: %s\n",
object_filename_.c_str(), object_filename.c_str(), error.c_str());
error.c_str());
return false; return false;
} }
return ReadData(contents.release(), total, object_filename);
}
bool DumpSymbols::ReadData(uint8_t* contents, size_t size,
const std::string& filename) {
contents_.reset(contents);
size_ = size;
object_filename_ = filename;
// Get the list of object files present in the file. // Get the list of object files present in the file.
FatReader::Reporter fat_reporter(object_filename_); FatReader::Reporter fat_reporter(object_filename_);
FatReader fat_reader(&fat_reporter); FatReader fat_reader(&fat_reporter);
if (!fat_reader.Read(&contents_[0], if (!fat_reader.Read(contents_.get(), size)) {
st.st_size)) {
return false; return false;
} }
@ -283,7 +291,13 @@ SuperFatArch* DumpSymbols::FindBestMatchForArchitecture(
} }
string DumpSymbols::Identifier() { string DumpSymbols::Identifier() {
FileID file_id(object_filename_.c_str()); scoped_ptr<FileID> file_id;
if (from_disk_) {
file_id.reset(new FileID(object_filename_.c_str()));
} else {
file_id.reset(new FileID(contents_.get(), size_));
}
unsigned char identifier_bytes[16]; unsigned char identifier_bytes[16];
scoped_ptr<Module> module; scoped_ptr<Module> module;
if (!selected_object_file_) { if (!selected_object_file_) {
@ -292,7 +306,7 @@ string DumpSymbols::Identifier() {
} }
cpu_type_t cpu_type = selected_object_file_->cputype; cpu_type_t cpu_type = selected_object_file_->cputype;
cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype; cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype;
if (!file_id.MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) { if (!file_id->MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) {
fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n", fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n",
object_filename_.c_str()); object_filename_.c_str());
return ""; return "";

View file

@ -57,21 +57,30 @@ class DumpSymbols {
DumpSymbols(SymbolData symbol_data, bool handle_inter_cu_refs) DumpSymbols(SymbolData symbol_data, bool handle_inter_cu_refs)
: symbol_data_(symbol_data), : symbol_data_(symbol_data),
handle_inter_cu_refs_(handle_inter_cu_refs), handle_inter_cu_refs_(handle_inter_cu_refs),
input_pathname_(),
object_filename_(), object_filename_(),
contents_(), contents_(),
size_(0),
from_disk_(false),
object_files_(), object_files_(),
selected_object_file_(), selected_object_file_(),
selected_object_name_() { } selected_object_name_() {}
~DumpSymbols() { ~DumpSymbols() {
} }
// Prepare to read debugging information from |filename|. |filename| may be // Prepare to read debugging information from |filename|. |filename| may be
// the name of a universal binary, a Mach-O file, or a dSYM bundle // the name of a fat file, a Mach-O file, or a dSYM bundle containing either
// containing either of the above. On success, return true; if there is a // of the above. On success, return true; if there is a problem reading
// problem reading |filename|, report it and return false. // |filename|, report it and return false.
bool Read(const std::string& filename); bool Read(const std::string& filename);
// Prepare to read debugging information from |contents|. |contents| is
// expected to be the data obtained from reading a fat file, or a Mach-O file.
// |filename| is used to determine the object filename in the generated
// output; there will not be an attempt to open this file as the data
// is already expected to be in memory. On success, return true; if there is a
// problem reading |contents|, report it and return false.
bool ReadData(uint8_t* contents, size_t size, const std::string& filename);
// If this dumper's file includes an object file for |cpu_type| and // If this dumper's file includes an object file for |cpu_type| and
// |cpu_subtype|, then select that object file for dumping, and return // |cpu_subtype|, then select that object file for dumping, and return
// true. Otherwise, return false, and leave this dumper's selected // true. Otherwise, return false, and leave this dumper's selected
@ -162,19 +171,22 @@ class DumpSymbols {
// Whether to handle references between compilation units. // Whether to handle references between compilation units.
const bool handle_inter_cu_refs_; const bool handle_inter_cu_refs_;
// The name of the file or bundle whose symbols this will dump.
// This is the path given to Read, for use in error messages.
std::string input_pathname_;
// The name of the file this DumpSymbols will actually read debugging // The name of the file this DumpSymbols will actually read debugging
// information from. Normally, this is the same as input_pathname_, but if // information from. If the filename passed to Read refers to a dSYM bundle,
// filename refers to a dSYM bundle, then this is the resource file // then this is the resource file within that bundle.
// within that bundle.
std::string object_filename_; std::string object_filename_;
// The complete contents of object_filename_, mapped into memory. // The complete contents of object_filename_, mapped into memory.
scoped_array<uint8_t> contents_; scoped_array<uint8_t> contents_;
// The size of contents_.
size_t size_;
// Indicates which entry point to DumpSymbols was used, i.e. Read vs ReadData.
// This is used to indicate that downstream code paths can/should also read
// from disk or not.
bool from_disk_;
// A vector of SuperFatArch structures describing the object files // A vector of SuperFatArch structures describing the object files
// object_filename_ contains. If object_filename_ refers to a fat binary, // object_filename_ contains. If object_filename_ refers to a fat binary,
// this may have more than one element; if it refers to a Mach-O file, this // this may have more than one element; if it refers to a Mach-O file, this

View file

@ -33,53 +33,41 @@
// //
// Author: Dan Waylonis // Author: Dan Waylonis
#include "common/mac/file_id.h"
#include <fcntl.h> #include <fcntl.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <unistd.h>
#include "common/mac/file_id.h"
#include "common/mac/macho_id.h" #include "common/mac/macho_id.h"
#include "common/scoped_ptr.h"
using MacFileUtilities::MachoID; using MacFileUtilities::MachoID;
namespace google_breakpad { namespace google_breakpad {
FileID::FileID(const char *path) { // Constructs a FileID given a path to a file
FileID::FileID(const char* path) : memory_(nullptr), size_(0) {
snprintf(path_, sizeof(path_), "%s", path); snprintf(path_, sizeof(path_), "%s", path);
} }
bool FileID::FileIdentifier(unsigned char identifier[16]) { // Constructs a FileID given the contents of a file and its size
int fd = open(path_, O_RDONLY); FileID::FileID(void* memory, size_t size)
if (fd == -1) : path_(), memory_(memory), size_(size) {}
return false;
MD5Context md5;
MD5Init(&md5);
// Read 4k x 2 bytes at a time. This is faster than just 4k bytes, but
// doesn't seem to be an unreasonable size for the stack.
unsigned char buffer[4096 * 2];
size_t buffer_size = sizeof(buffer);
while ((buffer_size = read(fd, buffer, buffer_size) > 0)) {
MD5Update(&md5, buffer, static_cast<unsigned>(buffer_size));
}
close(fd);
MD5Final(identifier, &md5);
return true;
}
bool FileID::MachoIdentifier(cpu_type_t cpu_type, bool FileID::MachoIdentifier(cpu_type_t cpu_type,
cpu_subtype_t cpu_subtype, cpu_subtype_t cpu_subtype,
unsigned char identifier[16]) { unsigned char identifier[16]) {
MachoID macho(path_); scoped_ptr<MachoID> macho;
if (memory_) {
if (macho.UUIDCommand(cpu_type, cpu_subtype, identifier)) macho.reset(new MachoID(memory_, size_));
} else {
macho.reset(new MachoID(path_));
}
if (macho->UUIDCommand(cpu_type, cpu_subtype, identifier))
return true; return true;
return macho.MD5(cpu_type, cpu_subtype, identifier); return macho->MD5(cpu_type, cpu_subtype, identifier);
} }
// static // static

View file

@ -36,19 +36,18 @@
#include <limits.h> #include <limits.h>
#include <mach/machine.h> #include <mach/machine.h>
#include <stddef.h>
namespace google_breakpad { namespace google_breakpad {
class FileID { class FileID {
public: public:
FileID(const char *path); // Constructs a FileID given a path to a file
~FileID() {} FileID(const char* path);
// Load the identifier for the file path specified in the constructor into // Constructs a FileID given the contents of a file and its size.
// |identifier|. Return false if the identifier could not be created for the FileID(void* memory, size_t size);
// file. ~FileID() {}
// The current implementation will return the MD5 hash of the file's bytes.
bool FileIdentifier(unsigned char identifier[16]);
// Treat the file as a mach-o file that will contain one or more archicture. // Treat the file as a mach-o file that will contain one or more archicture.
// Accepted values for |cpu_type| and |cpu_subtype| (e.g., CPU_TYPE_X86 or // Accepted values for |cpu_type| and |cpu_subtype| (e.g., CPU_TYPE_X86 or
@ -74,6 +73,16 @@ class FileID {
private: private:
// Storage for the path specified // Storage for the path specified
char path_[PATH_MAX]; char path_[PATH_MAX];
// Storage for contents of a file if this instance is used to operate on in
// memory file data rather than directly from a filesystem. If memory_ is
// null, the file represented by path_ will be opened/read. If memory_ is
// non-null, it is assumed to contain valid data, and no file operations will
// occur.
void* memory_;
// Size of memory_
size_t size_;
}; };
} // namespace google_breakpad } // namespace google_breakpad

View file

@ -37,11 +37,7 @@
#include <fcntl.h> #include <fcntl.h>
#include <mach-o/loader.h> #include <mach-o/loader.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include "common/mac/macho_id.h" #include "common/mac/macho_id.h"
#include "common/mac/macho_walker.h" #include "common/mac/macho_walker.h"
@ -54,73 +50,18 @@ using google_breakpad::MD5Update;
using google_breakpad::MD5Final; using google_breakpad::MD5Final;
MachoID::MachoID(const char* path) MachoID::MachoID(const char* path)
: memory_(0), : memory_(0), memory_size_(0), md5_context_(), update_function_(NULL) {
memory_size_(0),
crc_(0),
md5_context_(),
update_function_(NULL) {
snprintf(path_, sizeof(path_), "%s", path); snprintf(path_, sizeof(path_), "%s", path);
} }
MachoID::MachoID(const char* path, void* memory, size_t size) MachoID::MachoID(void* memory, size_t size)
: memory_(memory), : path_(),
memory_size_(size), memory_(memory),
crc_(0), memory_size_(size),
md5_context_(), md5_context_(),
update_function_(NULL) { update_function_(NULL) {}
snprintf(path_, sizeof(path_), "%s", path);
}
MachoID::~MachoID() { MachoID::~MachoID() {}
}
// The CRC info is from http://en.wikipedia.org/wiki/Adler-32
// With optimizations from http://www.zlib.net/
// The largest prime smaller than 65536
#define MOD_ADLER 65521
// MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1
#define MAX_BLOCK 5552
void MachoID::UpdateCRC(unsigned char* bytes, size_t size) {
// Unrolled loops for summing
#define DO1(buf,i) {sum1 += (buf)[i]; sum2 += sum1;}
#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
#define DO16(buf) DO8(buf,0); DO8(buf,8);
// Split up the crc
uint32_t sum1 = crc_ & 0xFFFF;
uint32_t sum2 = (crc_ >> 16) & 0xFFFF;
// Do large blocks
while (size >= MAX_BLOCK) {
size -= MAX_BLOCK;
int block_count = MAX_BLOCK / 16;
do {
DO16(bytes);
bytes += 16;
} while (--block_count);
sum1 %= MOD_ADLER;
sum2 %= MOD_ADLER;
}
// Do remaining bytes
if (size) {
while (size >= 16) {
size -= 16;
DO16(bytes);
bytes += 16;
}
while (size--) {
sum1 += *bytes++;
sum2 += sum1;
}
sum1 %= MOD_ADLER;
sum2 %= MOD_ADLER;
crc_ = (sum2 << 16) | sum1;
}
}
void MachoID::UpdateMD5(unsigned char* bytes, size_t size) { void MachoID::UpdateMD5(unsigned char* bytes, size_t size) {
MD5Update(&md5_context_, bytes, static_cast<unsigned>(size)); MD5Update(&md5_context_, bytes, static_cast<unsigned>(size));
@ -169,59 +110,6 @@ bool MachoID::UUIDCommand(cpu_type_t cpu_type,
return false; return false;
} }
bool MachoID::IDCommand(cpu_type_t cpu_type,
cpu_subtype_t cpu_subtype,
unsigned char identifier[16]) {
struct dylib_command dylib_cmd;
dylib_cmd.cmd = 0;
if (!WalkHeader(cpu_type, cpu_subtype, IDWalkerCB, &dylib_cmd))
return false;
// If we found the command, we'll have initialized the dylib_command
// structure
if (dylib_cmd.cmd == LC_ID_DYLIB) {
// Take the hashed filename, version, and compatability version bytes
// to form the first 12 bytes, pad the rest with zeros
// create a crude hash of the filename to generate the first 4 bytes
identifier[0] = 0;
identifier[1] = 0;
identifier[2] = 0;
identifier[3] = 0;
for (int j = 0, i = (int)strlen(path_)-1; i>=0 && path_[i]!='/'; ++j, --i) {
identifier[j%4] += path_[i];
}
identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF;
identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF;
identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF;
identifier[7] = dylib_cmd.dylib.current_version & 0xFF;
identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF;
identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF;
identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF;
identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF;
identifier[12] = (cpu_type >> 24) & 0xFF;
identifier[13] = (cpu_type >> 16) & 0xFF;
identifier[14] = (cpu_type >> 8) & 0xFF;
identifier[15] = cpu_type & 0xFF;
return true;
}
return false;
}
uint32_t MachoID::Adler32(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) {
update_function_ = &MachoID::UpdateCRC;
crc_ = 0;
if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
return 0;
return crc_;
}
bool MachoID::MD5(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) { bool MachoID::MD5(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) {
update_function_ = &MachoID::UpdateMD5; update_function_ = &MachoID::UpdateMD5;
@ -346,24 +234,4 @@ bool MachoID::UUIDWalkerCB(MachoWalker* walker, load_command* cmd, off_t offset,
// Continue processing // Continue processing
return true; return true;
} }
// static
bool MachoID::IDWalkerCB(MachoWalker* walker, load_command* cmd, off_t offset,
bool swap, void* context) {
if (cmd->cmd == LC_ID_DYLIB) {
struct dylib_command* dylib_cmd = (struct dylib_command*)context;
if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset))
return false;
if (swap)
breakpad_swap_dylib_command(dylib_cmd);
return false;
}
// Continue processing
return true;
}
} // namespace MacFileUtilities } // namespace MacFileUtilities

View file

@ -46,7 +46,7 @@ namespace MacFileUtilities {
class MachoID { class MachoID {
public: public:
MachoID(const char* path); MachoID(const char* path);
MachoID(const char* path, void* memory, size_t size); MachoID(void* memory, size_t size);
~MachoID(); ~MachoID();
// For the given |cpu_type| and |cpu_subtype|, return a UUID from the LC_UUID // For the given |cpu_type| and |cpu_subtype|, return a UUID from the LC_UUID
@ -56,19 +56,6 @@ class MachoID {
cpu_subtype_t cpu_subtype, cpu_subtype_t cpu_subtype,
unsigned char identifier[16]); unsigned char identifier[16]);
// For the given |cpu_type| and |cpu_subtype|, return a UUID from the
// LC_ID_DYLIB command.
// Return false if there isn't a LC_ID_DYLIB command.
bool IDCommand(cpu_type_t cpu_type,
cpu_subtype_t cpu_subtype,
unsigned char identifier[16]);
// For the given |cpu_type| and |cpu_subtype|, return the Adler32 CRC for the
// mach-o data segment(s).
// Return 0 on error (e.g., if the file is not a mach-o file)
uint32_t Adler32(cpu_type_t cpu_type,
cpu_subtype_t cpu_subtype);
// For the given |cpu_type|, and |cpu_subtype| return the MD5 for the mach-o // For the given |cpu_type|, and |cpu_subtype| return the MD5 for the mach-o
// data segment(s). // data segment(s).
// Return true on success, false otherwise // Return true on success, false otherwise
@ -80,10 +67,6 @@ class MachoID {
// Signature of class member function to be called with data read from file // Signature of class member function to be called with data read from file
typedef void (MachoID::*UpdateFunction)(unsigned char* bytes, size_t size); typedef void (MachoID::*UpdateFunction)(unsigned char* bytes, size_t size);
// Update the CRC value by examining |size| |bytes| and applying the algorithm
// to each byte.
void UpdateCRC(unsigned char* bytes, size_t size);
// Update the MD5 value by examining |size| |bytes| and applying the algorithm // Update the MD5 value by examining |size| |bytes| and applying the algorithm
// to each byte. // to each byte.
void UpdateMD5(unsigned char* bytes, size_t size); void UpdateMD5(unsigned char* bytes, size_t size);
@ -103,10 +86,6 @@ class MachoID {
static bool UUIDWalkerCB(MachoWalker* walker, load_command* cmd, off_t offset, static bool UUIDWalkerCB(MachoWalker* walker, load_command* cmd, off_t offset,
bool swap, void* context); bool swap, void* context);
// The callback from the MachoWalker for LC_ID_DYLIB
static bool IDWalkerCB(MachoWalker* walker, load_command* cmd, off_t offset,
bool swap, void* context);
// File path // File path
char path_[PATH_MAX]; char path_[PATH_MAX];
@ -116,9 +95,6 @@ class MachoID {
// Size of the memory region // Size of the memory region
size_t memory_size_; size_t memory_size_;
// The current crc value
uint32_t crc_;
// The MD5 context // The MD5 context
google_breakpad::MD5Context md5_context_; google_breakpad::MD5Context md5_context_;