Linux dumper: Move the data structures representing the breakpad data into their own class.

src/linux/common/module.h defines a new class, google_breakpad::Module,
that can represent the contents of a breakpad symbol file.  Module::Write
writes a well-formed symbol file to the given stream.

src/linux/common/dump_symbols.cc can now lose its symbol-file-writing
code, and change DumpStabsHandler to populate a Module object, rather
than the old SymbolInfo/SourceFileInfo/... collection of types.

The code to compute function and line sizes, even in the absence of
reliable size data in STABS, is moved into a new Finalize method of
DumpStabsHandler, which is responsible for completing the Module's
contents.

a=jimblandy
r=nealsid


git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@380 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
jimblandy@gmail.com 2009-08-07 19:28:45 +00:00
parent f7cc9ef6f5
commit eab03fdb72
4 changed files with 577 additions and 446 deletions

View file

@ -41,128 +41,34 @@
#include <unistd.h>
#include <algorithm>
#include <string>
#include <cstring>
#include <functional>
#include <list>
#include <vector>
#include <map>
#include <string.h>
#include <string>
#include <vector>
#include "common/linux/stabs_reader.h"
#include "common/linux/dump_symbols.h"
#include "common/linux/file_id.h"
#include "common/linux/guid_creator.h"
#include "common/linux/module.h"
#include "common/linux/stabs_reader.h"
#include "processor/scoped_ptr.h"
// This namespace contains helper functions.
namespace {
struct SourceFileInfo;
// Infomation of a line.
struct LineInfo {
// Offset from start of the function.
// Load from stab symbol.
ElfW(Off) rva_to_func;
// Offset from base of the loading binary.
ElfW(Off) rva_to_base;
// Size of the line.
// It is the difference of the starting address of the line and starting
// address of the next N_SLINE, N_FUN or N_SO.
uint32_t size;
// Line number.
uint32_t line_num;
// The source file this line belongs to.
SourceFileInfo *file;
};
typedef std::list<struct LineInfo> LineInfoList;
// Information of a function.
struct FuncInfo {
// Name of the function.
std::string name;
// Offset from the base of the loading address.
ElfW(Off) rva_to_base;
// Virtual address of the function.
// Load from stab symbol.
ElfW(Addr) addr;
// Size of the function.
// It is the difference of the starting address of the function and starting
// address of the next N_FUN or N_SO.
uint32_t size;
// Total size of stack parameters.
uint32_t stack_param_size;
// Line information array.
LineInfoList line_info;
};
typedef std::list<struct FuncInfo> FuncInfoList;
// Information of a source file.
struct SourceFileInfo {
// Name of the source file.
const char *name;
// Starting address of the source file.
ElfW(Addr) addr;
// Id of the source file.
int source_id;
// Functions information.
FuncInfoList func_info;
};
// A simple std::list of pointers to SourceFileInfo structures, that
// owns the structures pointed to: destroying the list destroys them,
// as well.
class SourceFileInfoList : public std::list<SourceFileInfo *> {
public:
~SourceFileInfoList() {
for (iterator it = this->begin(); it != this->end(); it++)
delete *it;
}
};
typedef std::map<const char *, SourceFileInfo *> NameToFileMap;
// Information of a symbol table.
// This is the root of all types of symbol.
struct SymbolInfo {
// The main files used in this module. This does not include header
// files; it includes only files that were provided as the primary
// source file for the compilation unit. In STABS, these are files
// named in 'N_SO' entries.
SourceFileInfoList main_files;
// Map from file names to source file structures. Note that this
// map's keys are compared as pointers, not strings, so if the same
// name appears at two different addresses in stabstr, the map will
// treat that as two different names. If the linker didn't unify
// names in .stabstr (which it does), this would result in duplicate
// FILE lines, which is benign.
NameToFileMap name_to_file;
// An array of some addresses at which a file boundary occurs.
//
// The STABS information describing a compilation unit gives the
// unit's start address, but not its ending address or size. Those
// must be inferred by finding the start address of the next file.
// For the last compilation unit, or when one compilation unit ends
// before the next one starts, STABS includes an N_SO entry whose
// filename is the empty string; such an entry's address serves
// simply to mark the end of the preceding compilation unit. Rather
// than create FuncInfoList for such entries, we record their
// addresses here. These are not necessarily sorted.
std::vector<ElfW(Addr)> file_boundaries;
};
using google_breakpad::Module;
using std::vector;
// Stab section name.
static const char *kStabName = ".stab";
// Demangle using abi call.
// Older GCC may not support it.
static std::string Demangle(const char *mangled) {
static std::string Demangle(const std::string &mangled) {
int status = 0;
char *demangled = abi::__cxa_demangle(mangled, NULL, NULL, &status);
char *demangled = abi::__cxa_demangle(mangled.c_str(), NULL, NULL, &status);
if (status == 0 && demangled != NULL) {
std::string str(demangled);
free(demangled);
@ -222,141 +128,15 @@ static const ElfW(Shdr) *FindSectionByName(const char *name,
return NULL;
}
// Return the SourceFileInfo for the file named NAME in SYMBOLS, as
// recorden in the name_to_file map. If none exists, create a new
// one.
//
// If the file is a main file, it is the caller's responsibility to
// set its address and add it to the list of main files.
//
// When creating a new file, this function does not make a copy of
// NAME; NAME must stay alive for as long as the symbol table does.
static SourceFileInfo *FindSourceFileInfo(SymbolInfo *symbols,
const char *name) {
SourceFileInfo **map_entry = &symbols->name_to_file[name];
SourceFileInfo *file;
if (*map_entry)
file = *map_entry;
else {
file = new SourceFileInfo;
file->name = name;
file->source_id = -1;
file->addr = 0;
*map_entry = file;
}
return file;
}
// Compute size and rva information based on symbols loaded from stab section.
static bool ComputeSizeAndRVA(ElfW(Addr) loading_addr,
struct SymbolInfo *symbols) {
SourceFileInfoList::iterator file_it;
FuncInfoList::iterator func_it;
LineInfoList::iterator line_it;
// A table of all the addresses at which files and functions start
// or end. We build this from the file boundary list and our lists
// of files and functions, sort it, and then use it to find the ends
// of functions and source lines for which we have no size
// information.
std::vector<ElfW(Addr)> boundaries = symbols->file_boundaries;
for (file_it = symbols->main_files.begin();
file_it != symbols->main_files.end(); file_it++) {
boundaries.push_back((*file_it)->addr);
for (func_it = (*file_it)->func_info.begin();
func_it != (*file_it)->func_info.end(); func_it++)
boundaries.push_back(func_it->addr);
}
std::sort(boundaries.begin(), boundaries.end());
int no_next_addr_count = 0;
for (file_it = symbols->main_files.begin();
file_it != symbols->main_files.end(); file_it++) {
for (func_it = (*file_it)->func_info.begin();
func_it != (*file_it)->func_info.end(); func_it++) {
struct FuncInfo &func_info = *func_it;
assert(func_info.addr >= loading_addr);
func_info.rva_to_base = func_info.addr - loading_addr;
func_info.size = 0;
std::vector<ElfW(Addr)>::iterator boundary
= std::upper_bound(boundaries.begin(), boundaries.end(),
func_info.addr);
ElfW(Addr) next_addr = (boundary == boundaries.end()) ? 0 : *boundary;
// I've noticed functions with an address bigger than any other functions
// and source files modules, this is probably the last function in the
// module, due to limitions of Linux stab symbol, it is impossible to get
// the exact size of this kind of function, thus we give it a default
// very big value. This should be safe since this is the last function.
// But it is a ugly hack.....
// The following code can reproduce the case:
// template<class T>
// void Foo(T value) {
// }
//
// int main(void) {
// Foo(10);
// Foo(std::string("hello"));
// return 0;
// }
// TODO(liuli): Find a better solution.
static const int kDefaultSize = 0x10000000;
if (next_addr != 0) {
func_info.size = next_addr - func_info.addr;
} else {
if (no_next_addr_count > 1) {
fprintf(stderr, "Got more than one funtion without the \
following symbol. Igore this function.\n");
fprintf(stderr, "The dumped symbol may not correct.\n");
assert(!"This should not happen!\n");
func_info.size = 0;
continue;
}
no_next_addr_count++;
func_info.size = kDefaultSize;
}
// Compute line size.
for (line_it = func_info.line_info.begin();
line_it != func_info.line_info.end(); line_it++) {
struct LineInfo &line_info = *line_it;
LineInfoList::iterator next_line_it = line_it;
next_line_it++;
line_info.size = 0;
if (next_line_it != func_info.line_info.end()) {
line_info.size =
next_line_it->rva_to_func - line_info.rva_to_func;
} else {
// The last line in the function.
// If we can find a function or source file symbol immediately
// following the line, we can get the size of the line by computing
// the difference of the next address to the starting address of this
// line.
// Otherwise, we need to set a default big enough value. This occurs
// mostly because the this function is the last one in the module.
if (next_addr != 0) {
ElfW(Off) next_addr_offset = next_addr - func_info.addr;
line_info.size = next_addr_offset - line_info.rva_to_func;
} else {
line_info.size = kDefaultSize;
}
}
line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base;
} // for each line.
} // for each function.
} // for each source file.
return true;
}
// Our handler class for STABS data.
class DumpStabsHandler: public google_breakpad::StabsHandler {
public:
DumpStabsHandler(struct SymbolInfo *symbols, ElfW(Addr) loading_addr):
symbols_(symbols),
loading_addr_(loading_addr),
current_comp_unit_(NULL),
current_source_file_(NULL) {
current_function_.addr = 0;
}
DumpStabsHandler(Module *module) :
module_(module),
comp_unit_base_address_(0),
current_function_(NULL),
current_source_file_(NULL),
current_source_file_name_(NULL) { }
bool StartCompilationUnit(const char *name, uint64_t address,
const char *build_directory);
@ -365,112 +145,195 @@ class DumpStabsHandler: public google_breakpad::StabsHandler {
bool EndFunction(uint64_t address);
bool Line(uint64_t address, const char *name, int number);
// Do any final processing necessary to make module_ contain all the
// data provided by the STABS reader.
//
// Because STABS does not provide reliable size information for
// functions and lines, we need to make a pass over the data after
// processing all the STABS to compute those sizes. We take care of
// that here.
void Finalize();
private:
// The symbol info we're contributing to.
struct SymbolInfo *symbols_;
// The address at which this module gets loaded.
ElfW(Addr) loading_addr_;
// An arbitrary, but very large, size to use for functions whose
// size we can't compute properly.
static const uint64_t kFallbackSize = 0x10000000;
// The main file we're currently contributing functions/lines to.
struct SourceFileInfo *current_comp_unit_;
// The module we're contributing debugging info to.
Module *module_;
// The functions we've generated so far. We don't add these to
// module_ as we parse them. Instead, we wait until we've computed
// their ending address, and their lines' ending addresses.
//
// We could just stick them in module_ from the outset, but if
// module_ already contains data gathered from other debugging
// formats, that would complicate the size computation.
vector<Module::Function *> functions_;
// Boundary addresses. STABS doesn't necessarily supply sizes for
// functions and lines, so we need to compute them ourselves by
// finding the next object.
vector<Module::Address> boundaries_;
// The base address of the current compilation unit. We use this to
// recognize functions we should omit from the symbol file. (If you
// know the details of why we omit these, please patch this
// comment.)
Module::Address comp_unit_base_address_;
// The function we're currently contributing lines to.
// FIXME: This gets copied, along with all its lines. Should be a pointer.
struct FuncInfo current_function_;
Module::Function *current_function_;
// The SourceFileInfo structure for the last file we got a line
// number in. Instead of hashing on the name ('s address) on every
// line, we just check whether the name is the same as this file's
// (which it usually is).
SourceFileInfo *current_source_file_;
// The last Module::File we got a line number in.
Module::File *current_source_file_;
// The pointer in the .stabstr section of the name that
// current_source_file_ is built from. This allows us to quickly
// recognize when the current line is in the same file as the
// previous one (which it usually is).
const char *current_source_file_name_;
};
bool DumpStabsHandler::StartCompilationUnit(const char *name, uint64_t address,
const char *build_directory) {
assert(! current_comp_unit_);
current_comp_unit_ = FindSourceFileInfo(symbols_, name);
current_source_file_ = current_comp_unit_;
// Add it to the list; use ADDR to tell whether we've already done so.
if (! current_comp_unit_->addr)
symbols_->main_files.push_back(current_comp_unit_);
current_comp_unit_->addr = address;
assert(! comp_unit_base_address_);
current_source_file_name_ = name;
current_source_file_ = module_->FindFile(name);
comp_unit_base_address_ = address;
boundaries_.push_back(static_cast<Module::Address>(address));
return true;
}
bool DumpStabsHandler::EndCompilationUnit(uint64_t address) {
assert(current_comp_unit_);
// We compute everything's size later.
symbols_->file_boundaries.push_back(address);
current_comp_unit_ = NULL;
assert(comp_unit_base_address_);
comp_unit_base_address_ = 0;
current_source_file_ = NULL;
current_source_file_name_ = NULL;
if (address)
boundaries_.push_back(static_cast<Module::Address>(address));
return true;
}
bool DumpStabsHandler::StartFunction(const std::string &name,
uint64_t address) {
assert(! current_function_.addr);
current_function_.name = name;
current_function_.rva_to_base = 0;
current_function_.addr = address;
current_function_.size = 0;
current_function_.stack_param_size = 0;
current_function_.line_info.clear();
assert(! current_function_);
Module::Function *f = new Module::Function;
f->name_ = Demangle(name);
f->address_ = address;
f->size_ = 0; // We compute this in DumpStabsHandler::Finalize().
f->parameter_size_ = 0; // We don't provide this information.
current_function_ = f;
boundaries_.push_back(static_cast<Module::Address>(address));
return true;
}
bool DumpStabsHandler::EndFunction(uint64_t address) {
assert(current_function_.addr);
if (current_function_.addr >= current_comp_unit_->addr)
// This is a big copy, then free. Should use a pointer.
current_comp_unit_->func_info.push_back(current_function_);
current_function_.addr = 0;
current_function_.line_info.clear();
assert(current_function_);
// Functions in this compilation unit should have address bigger
// than the compilation unit's starting address. There may be a lot
// of duplicated entries for functions in the STABS data; only one
// entry can meet this requirement.
//
// (I don't really understand the above comment; just bringing it
// along from the previous code, and leaving the behaivor unchanged.
// If you know the whole story, please patch this comment. --jimb)
if (current_function_->address_ >= comp_unit_base_address_)
functions_.push_back(current_function_);
else
delete current_function_;
current_function_ = NULL;
if (address)
boundaries_.push_back(static_cast<Module::Address>(address));
return true;
}
bool DumpStabsHandler::Line(uint64_t address, const char *name, int number) {
assert(current_function_.addr);
assert(current_function_);
assert(current_source_file_);
if (name != current_source_file_->name)
current_source_file_ = FindSourceFileInfo(symbols_, name);
struct LineInfo line;
// FIXME: might as well set rva_to_base directly.
line.rva_to_func = address - current_function_.addr;
line.file = current_source_file_;
line.line_num = number;
line.size = 0;
line.rva_to_base = 0;
current_function_.line_info.push_back(line);
if (name != current_source_file_name_) {
current_source_file_ = module_->FindFile(name);
current_source_file_name_ = name;
}
Module::Line line;
line.address_ = address;
line.size_ = 0; // We compute this in DumpStabsHandler::Finalize().
line.file_ = current_source_file_;
line.number_ = number;
current_function_->lines_.push_back(line);
return true;
}
void DumpStabsHandler::Finalize() {
// Sort our boundary list, so we can search it quickly.
sort(boundaries_.begin(), boundaries_.end());
// Sort all functions by address, just for neatness.
sort(functions_.begin(), functions_.end(),
Module::Function::CompareByAddress);
for (vector<Module::Function *>::iterator func_it = functions_.begin();
func_it != functions_.end();
func_it++) {
Module::Function *f = *func_it;
// Compute the function f's size.
vector<Module::Address>::iterator boundary
= std::upper_bound(boundaries_.begin(), boundaries_.end(), f->address_);
if (boundary != boundaries_.end())
f->size_ = *boundary - f->address_;
else
// If this is the last function in the module, and the STABS
// reader was unable to give us its ending address, then assign
// it a bogus, very large value. This will happen at most once
// per module: since we've added all functions' addresses to the
// boundary table, only one can be the last.
f->size_ = kFallbackSize;
// Compute sizes for each of the function f's lines --- if it has any.
if (! f->lines_.empty()) {
stable_sort(f->lines_.begin(), f->lines_.end(),
Module::Line::CompareByAddress);
vector<Module::Line>::iterator last_line = f->lines_.end() - 1;
for (vector<Module::Line>::iterator line_it = f->lines_.begin();
line_it != last_line; line_it++)
line_it[0].size_ = line_it[1].address_ - line_it[0].address_;
// Compute the size of the last line from f's end address.
last_line->size_ = (f->address_ + f->size_) - last_line->address_;
}
}
// Now that everything has a size, add our functions to the module, and
// dispose of our private list.
module_->AddFunctions(functions_.begin(), functions_.end());
functions_.clear();
}
static bool LoadSymbols(const ElfW(Shdr) *stab_section,
const ElfW(Shdr) *stabstr_section,
ElfW(Addr) loading_addr,
struct SymbolInfo *symbols) {
Module *module) {
if (stab_section == NULL || stabstr_section == NULL)
return false;
// A callback object to handle data from the STABS reader.
DumpStabsHandler handler(module);
// Find the addresses of the STABS data, and create a STABS reader object.
uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset);
uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset);
DumpStabsHandler handler(symbols, loading_addr);
google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
stabstr, stabstr_section->sh_size,
&handler);
// Read the STABS data, and do post-processing.
if (! reader.Process())
return false;
// Second pass, compute the size of functions and lines.
return ComputeSizeAndRVA(loading_addr, symbols);
handler.Finalize();
return true;
}
static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) {
static bool LoadSymbols(ElfW(Ehdr) *elf_header, Module *module) {
// Translate all offsets in section headers into address.
FixAddress(elf_header);
ElfW(Addr) loading_addr = GetLoadingAddress(
reinterpret_cast<ElfW(Phdr) *>(elf_header->e_phoff),
elf_header->e_phnum);
module->SetLoadAddress(loading_addr);
const ElfW(Shdr) *sections =
reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
@ -484,153 +347,7 @@ static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) {
const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections;
// Load symbols.
return LoadSymbols(stab_section, stabstr_section, loading_addr, symbols);
}
static bool WriteModuleInfo(FILE *file,
ElfW(Half) arch,
const std::string &obj_file) {
const char *arch_name = NULL;
if (arch == EM_386)
arch_name = "x86";
else if (arch == EM_X86_64)
arch_name = "x86_64";
else
return false;
unsigned char identifier[16];
google_breakpad::FileID file_id(obj_file.c_str());
if (file_id.ElfFileIdentifier(identifier)) {
char identifier_str[40];
file_id.ConvertIdentifierToString(identifier,
identifier_str, sizeof(identifier_str));
char id_no_dash[40];
int id_no_dash_len = 0;
memset(id_no_dash, 0, sizeof(id_no_dash));
for (int i = 0; identifier_str[i] != '\0'; ++i)
if (identifier_str[i] != '-')
id_no_dash[id_no_dash_len++] = identifier_str[i];
// Add an extra "0" by the end.
id_no_dash[id_no_dash_len++] = '0';
std::string filename = obj_file;
size_t slash_pos = obj_file.find_last_of("/");
if (slash_pos != std::string::npos)
filename = obj_file.substr(slash_pos + 1);
return 0 <= fprintf(file, "MODULE Linux %s %s %s\n", arch_name,
id_no_dash, filename.c_str());
}
return false;
}
// Set *INCLUDED_FILES to the list of included files in SYMBOLS,
// ordered appropriately for output. Included files should appear in
// the order in which they are first referenced by source line info.
// Assign these files source id numbers starting with NEXT_SOURCE_ID.
//
// Note that the name_to_file map may contain #included files that are
// unreferenced; these are the result of LoadFuncSymbols omitting
// functions from the list whose addresses fall outside the address
// range of the file that contains them.
static void CollectIncludedFiles(const struct SymbolInfo &symbols,
std::vector<SourceFileInfo *> *included_files,
int next_source_id) {
for (SourceFileInfoList::const_iterator file_it = symbols.main_files.begin();
file_it != symbols.main_files.end(); file_it++) {
for (FuncInfoList::const_iterator func_it = (*file_it)->func_info.begin();
func_it != (*file_it)->func_info.end(); func_it++) {
for (LineInfoList::const_iterator line_it = func_it->line_info.begin();
line_it != func_it->line_info.end(); line_it++) {
SourceFileInfo *file = line_it->file;
if (file->source_id == -1) {
file->source_id = next_source_id++;
// Here we use the source id as a mark, ensuring that each
// file appears in the list only once.
included_files->push_back(file);
}
}
}
}
}
// Write 'FILE' lines for all source files in SYMBOLS to FILE. We
// assign source id numbers to files here.
static bool WriteSourceFileInfo(FILE *file, struct SymbolInfo &symbols) {
int next_source_id = 0;
// Assign source id numbers to main files, and write them out to the file.
for (SourceFileInfoList::iterator file_it = symbols.main_files.begin();
file_it != symbols.main_files.end(); file_it++) {
SourceFileInfo *file_info = *file_it;
assert(file_info->addr);
// We only output 'FILE' lines for main files if their names
// contain '.'. The extensionless C++ header files are #included,
// not main files, so it wouldn't affect them. If you know the
// story, please patch this comment.
if (strchr(file_info->name, '.')) {
file_info->source_id = next_source_id++;
if (0 > fprintf(file, "FILE %d %s\n",
file_info->source_id, file_info->name))
return false;
}
}
// Compute the list of included files, and write them out.
// Can't use SourceFileInfoList here, because that owns the files it
// points to.
std::vector<SourceFileInfo *> included_files;
std::vector<SourceFileInfo *>::const_iterator file_it;
CollectIncludedFiles(symbols, &included_files, next_source_id);
for (file_it = included_files.begin(); file_it != included_files.end();
file_it++) {
if (0 > fprintf(file, "FILE %d %s\n",
(*file_it)->source_id, (*file_it)->name))
return false;
}
return true;
}
static bool WriteOneFunction(FILE *file,
const struct FuncInfo &func_info){
std::string func_name = Demangle(func_info.name.c_str());
if (func_info.size <= 0)
return true;
if (0 <= fprintf(file, "FUNC %lx %lx %d %s\n",
(unsigned long) func_info.rva_to_base,
(unsigned long) func_info.size,
func_info.stack_param_size,
func_name.c_str())) {
for (LineInfoList::const_iterator it = func_info.line_info.begin();
it != func_info.line_info.end(); it++) {
const struct LineInfo &line_info = *it;
if (0 > fprintf(file, "%lx %lx %d %d\n",
(unsigned long) line_info.rva_to_base,
(unsigned long) line_info.size,
line_info.line_num,
line_info.file->source_id))
return false;
}
return true;
}
return false;
}
static bool WriteFunctionInfo(FILE *file, const struct SymbolInfo &symbols) {
for (SourceFileInfoList::const_iterator it = symbols.main_files.begin();
it != symbols.main_files.end(); it++) {
const struct SourceFileInfo &file_info = **it;
for (FuncInfoList::const_iterator fiIt = file_info.func_info.begin();
fiIt != file_info.func_info.end(); fiIt++) {
const struct FuncInfo &func_info = *fiIt;
if (!WriteOneFunction(file, func_info))
return false;
}
}
return true;
}
static bool DumpStabSymbols(FILE *file, struct SymbolInfo &symbols) {
return WriteSourceFileInfo(file, symbols) &&
WriteFunctionInfo(file, symbols);
return LoadSymbols(stab_section, stabstr_section, module);
}
//
@ -685,6 +402,48 @@ class MmapWrapper {
size_t size_;
};
// Return the breakpad symbol file identifier for the architecture of
// ELF_HEADER.
const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) {
ElfW(Half) arch = elf_header->e_machine;
if (arch == EM_386)
return "x86";
else if (arch == EM_X86_64)
return "x86_64";
else
return NULL;
}
// Format the Elf file identifier in IDENTIFIER as a UUID with the
// dashes removed.
std::string FormatIdentifier(unsigned char identifier[16]) {
char identifier_str[40];
google_breakpad::FileID::ConvertIdentifierToString(
identifier,
identifier_str,
sizeof(identifier_str));
std::string id_no_dash;
for (int i = 0; identifier_str[i] != '\0'; ++i)
if (identifier_str[i] != '-')
id_no_dash += identifier_str[i];
// Add an extra "0" by the end. PDB files on Windows have an 'age'
// number appended to the end of the file identifier; this isn't
// really used or necessary on other platforms, but let's preserve
// the pattern.
id_no_dash += '0';
return id_no_dash;
}
// Return the non-directory portion of FILENAME: the portion after the
// last slash, or the whole filename if there are no slashes.
std::string BaseFileName(const std::string &filename) {
// Lots of copies! basename's behavior is less than ideal.
char *c_filename = strdup(filename.c_str());
std::string base = basename(c_filename);
free(c_filename);
return base;
}
} // namespace
namespace google_breakpad {
@ -706,16 +465,27 @@ bool DumpSymbols::WriteSymbolFile(const std::string &obj_file,
ElfW(Ehdr) *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_base);
if (!IsValidElf(elf_header))
return false;
struct SymbolInfo symbols;
if (!LoadSymbols(elf_header, &symbols))
unsigned char identifier[16];
google_breakpad::FileID file_id(obj_file.c_str());
if (! file_id.ElfFileIdentifier(identifier))
return false;
// Write to symbol file.
if (WriteModuleInfo(sym_file, elf_header->e_machine, obj_file) &&
DumpStabSymbols(sym_file, symbols))
const char *architecture = ElfArchitecture(elf_header);
if (! architecture)
return false;
std::string name = BaseFileName(obj_file);
std::string os = "Linux";
std::string id = FormatIdentifier(identifier);
Module module(name, os, architecture, id);
if (!LoadSymbols(elf_header, &module))
return false;
if (!module.Write(sym_file))
return false;
return true;
return false;
}
} // namespace google_breakpad

167
src/common/linux/module.cc Normal file
View file

@ -0,0 +1,167 @@
// Copyright (c) 2009, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cerrno>
#include <cstring>
#include "common/linux/module.h"
namespace google_breakpad {
Module::Module(const string &name, const string &os,
const string &architecture, const string &id) :
name_(name),
os_(os),
architecture_(architecture),
id_(id),
load_address_(0) { }
Module::~Module() {
for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); it++)
delete it->second;
for (vector<Function *>::iterator it = functions_.begin();
it != functions_.end(); it++)
delete *it;
}
void Module::SetLoadAddress(Address address) {
load_address_ = address;
}
void Module::AddFunction(Function *function) {
functions_.push_back(function);
}
void Module::AddFunctions(vector<Function *>::iterator begin,
vector<Function *>::iterator end) {
functions_.insert(functions_.end(), begin, end);
}
Module::File *Module::FindFile(const string &name) {
// A tricky bit here. The key of each map entry needs to be a
// pointer to the entry's File's name string. This means that we
// can't do the initial lookup with any operation that would create
// an empty entry for us if the name isn't found (like, say,
// operator[] or insert do), because such a created entry's key will
// be a pointer the string passed as our argument. Since the key of
// a map's value type is const, we can't fix it up once we've
// created our file. lower_bound does the lookup without doing an
// insertion, and returns a good hint iterator to pass to insert.
// Our "destiny" is where we belong, whether we're there or not now.
FileByNameMap::iterator destiny = files_.lower_bound(&name);
if (destiny == files_.end()
|| *destiny->first != name) { // Repeated string comparison, boo hoo.
File *file = new File;
file->name_ = name;
file->source_id_ = -1;
destiny = files_.insert(destiny,
FileByNameMap::value_type(&file->name_, file));
}
return destiny->second;
}
Module::File *Module::FindFile(const char *name) {
string name_string = name;
return FindFile(name_string);
}
void Module::AssignSourceIds() {
// First, give every source file an id of -1.
for (FileByNameMap::iterator file_it = files_.begin();
file_it != files_.end(); file_it++)
file_it->second->source_id_ = -1;
// Next, mark all files actually cited by our functions' line number
// info, by setting each one's source id to zero.
for (vector<Function *>::const_iterator func_it = functions_.begin();
func_it != functions_.end(); func_it++) {
Function *func = *func_it;
for (vector<Line>::iterator line_it = func->lines_.begin();
line_it != func->lines_.end(); line_it++)
line_it->file_->source_id_ = 0;
}
// Finally, assign source ids to those files that have been marked.
// We could have just assigned source id numbers while traversing
// the line numbers, but doing it this way numbers the files in
// lexicographical order by name, which is neat.
int next_source_id = 0;
for (FileByNameMap::iterator file_it = files_.begin();
file_it != files_.end(); file_it++)
if (! file_it->second->source_id_)
file_it->second->source_id_ = next_source_id++;
}
bool Module::ReportError() {
fprintf(stderr, "error writing symbol file: %s\n",
strerror (errno));
return false;
}
bool Module::Write(FILE *stream) {
if (0 > fprintf(stream, "MODULE %s %s %s %s\n",
os_.c_str(), architecture_.c_str(), id_.c_str(),
name_.c_str()))
return ReportError();
// Write out files.
AssignSourceIds();
for (FileByNameMap::iterator file_it = files_.begin();
file_it != files_.end(); file_it++) {
File *file = file_it->second;
if (file->source_id_ >= 0) {
if (0 > fprintf(stream, "FILE %d %s\n",
file->source_id_, file->name_.c_str()))
return ReportError();
}
}
// Write out functions and their lines.
for (vector<Function *>::const_iterator func_it = functions_.begin();
func_it != functions_.end(); func_it++) {
Function *func = *func_it;
if (0 > fprintf(stream, "FUNC %lx %lx %lu %s\n",
(unsigned long) (func->address_ - load_address_),
(unsigned long) func->size_,
(unsigned long) func->parameter_size_,
func->name_.c_str()))
return ReportError();
for (vector<Line>::iterator line_it = func->lines_.begin();
line_it != func->lines_.end(); line_it++)
if (0 > fprintf(stream, "%lx %lx %d %d\n",
(unsigned long) (line_it->address_ - load_address_),
(unsigned long) line_it->size_,
line_it->number_,
line_it->file_->source_id_))
return ReportError();
}
return true;
}
} // namespace google_breakpad

191
src/common/linux/module.h Normal file
View file

@ -0,0 +1,191 @@
// Copyright (c) 2009, Google Inc. -*- mode: c++ -*-
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// module.h: defines google_breakpad::Module, for writing breakpad symbol files
#ifndef COMMON_LINUX_MODULE_H__
#define COMMON_LINUX_MODULE_H__
#include <map>
#include <string>
#include <vector>
#include <cstdio>
#include "google_breakpad/common/breakpad_types.h"
namespace google_breakpad {
using std::string;
using std::vector;
using std::map;
// A Module represents the contents of a module, and supports methods
// for adding information produced by parsing STABS or DWARF data
// --- possibly both from the same file --- and then writing out the
// unified contents as a Breakpad-format symbol file.
class Module {
public:
// The type of addresses and sizes in a symbol table.
typedef u_int64_t Address;
struct File;
struct Function;
struct Line;
// Addresses appearing in File, Function, and Line structures are
// absolute, not relative to the the module's load address. That
// is, if the module were loaded at its nominal load address, the
// addresses would be correct.
// A source file.
struct File {
// The name of the source file.
string name_;
// The file's source id. The Write member function clears this
// field and assigns source ids a fresh, so any value placed here
// before calling Write will be lost.
int source_id_;
};
// A function.
struct Function {
// For sorting by address. (Not style-guide compliant, but it's
// stupid not to put this in the struct.)
static bool CompareByAddress(const Function *x, const Function *y) {
return x->address_ < y->address_;
}
// The function's name.
string name_;
// The start address and length of the function's code.
Address address_, size_;
// The function's parameter size.
Address parameter_size_;
// Source lines belonging to this function, sorted by increasing
// address.
vector<Line> lines_;
};
// A source line.
struct Line {
// For sorting by address. (Not style-guide compliant, but it's
// stupid not to put this in the struct.)
static bool CompareByAddress(const Module::Line &x, const Module::Line &y) {
return x.address_ < y.address_;
}
Address address_, size_; // The address and size of the line's code.
File *file_; // The source file.
int number_; // The source line number.
};
// Create a new module with the given name, operating system,
// architecture, and ID string.
Module(const string &name, const string &os, const string &architecture,
const string &id);
~Module();
// Set the module's load address to LOAD_ADDRESS; addresses given
// for functions and lines will be written to the Breakpad symbol
// file as offsets from this address. Construction initializes this
// module's load address to zero: addresses written to the symbol
// file will be the same as they appear in the File and Line
// structures.
void SetLoadAddress(Address load_address);
// Add FUNCTION to the module.
// Destroying this module frees all Function objects that have been
// added with this function.
void AddFunction(Function *function);
// Add all the functions in [BEGIN,END) to the module.
// Destroying this module frees all Function objects that have been
// added with this function.
void AddFunctions(vector<Function *>::iterator begin,
vector<Function *>::iterator end);
// If this module has a file named NAME, return a pointer to a
// pointer to it. If it has none, then create one and return a
// pointer to the new file.
// Destroying this module frees all File objects that have been created
// using this function, or with Insert.
File *FindFile(const string &name);
File *FindFile(const char *name);
// Write this module to STREAM in the breakpad symbol format.
// Return true if all goes well, or false if an error occurs. This
// method writes out a header based on the values given to the
// constructor, writes the source files added via Insert and
// FindFile, and then the functions added via Insert, along with
// their lines.
bool Write(FILE *stream);
private:
// Assign source id numbers to this modules' files that functions'
// line number data actually refers to. Set the source id numbers
// for all other files to -1. We do this before writing out the
// symbol file, omitting any unused files.
void AssignSourceIds();
// Report an error that has occurred writing the symbol file, using
// errno to find the appropriate cause. Return false.
static bool ReportError();
// Module header entries.
string name_, os_, architecture_, id_;
// The module's nominal load address. Addresses for functions and
// lines are absolute, assuming the module is loaded at this
// address.
Address load_address_;
// Relation for maps whose keys are strings shared with some other
// structure.
struct CompareStringPtrs {
bool operator()(const string *x, const string *y) { return *x < *y; };
};
// A map from filenames to File structures. The map's keys are
// pointers to the Files' names.
typedef map<const string *, File *, CompareStringPtrs> FileByNameMap;
// The module owns all the files and functions that have been added
// to it; destroying the module frees the Files and Functions these
// point to.
FileByNameMap files_; // This module's source files.
vector<Function *> functions_; // This module's functions.
};
} // namespace google_breakpad
#endif // COMMON_LINUX_MODULE_H__

View file

@ -17,7 +17,7 @@ BIN=dump_syms
all:$(BIN)
DUMP_OBJ=dump_symbols.o guid_creator.o dump_syms.o file_id.o md5.o \
stabs_reader.o
stabs_reader.o module.o
dump_syms:$(DUMP_OBJ)
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^
@ -28,6 +28,9 @@ dump_symbols.o:../../../common/linux/dump_symbols.cc
stabs_reader.o:../../../common/linux/stabs_reader.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^
module.o:../../../common/linux/module.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^
guid_creator.o:../../../common/linux/guid_creator.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^