[dump_syms/Mac] New -x option to prefer extern names when there's a mismatch

When built with -gmlt, .dSYMs are (by design) missing the
`DW_AT_linkage_name` which Breakpad uses to fill out the
(name-mangled) function names.

Thankfully, the .dSYM contains both the old-school LC_SYMTAB command
containing the STABS-format symbols (which include the fully-qualified
C++ symbol names we want, but no actual compilation unit data), as
well as the LC_SEGMENT_64 containing the __DWARF segment with the
minimal -gmlt debug information (which excludes the name-mangled C++
symbols).

Unfortunately, since the .dSYM's STABS does not define compilation
units, the usual path in `StabsReader` ignores all the fully-qualified
C++ symbol names for the functions:

bd9d94c708/src/common/stabs_reader.cc (100)

Fortunately, when built for macOS platforms (`HAVE_MACH_O_NLIST_H`),
`StabsReader` supports storing all the STABS-format symbols as
`Extern`s, regardless of whether or not they're in a compilation unit:

bd9d94c708/src/common/stabs_reader.cc (119)

Currently, when there's both a `Function` and an `Extern` with the same address, `Module` discards the `Extern`:

bd9d94c708/src/common/module.cc (161)

This CL adds a new `-x` option to the Mac `dump_syms` which prefers
the Extern function name if there's a mismatch.

Bug: https://bugs.chromium.org/p/google-breakpad/issues/detail?id=883
Change-Id: I0d32adc64fbf567600b0a5ca63c71c422b7f0f8c
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/4453650
Reviewed-by: Joshua Peraza <jperaza@chromium.org>
This commit is contained in:
Ben Hamilton 2023-04-21 12:02:42 -06:00 committed by Joshua Peraza
parent 16cee17997
commit f548d75c9f
8 changed files with 123 additions and 18 deletions

View file

@ -330,7 +330,10 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler {
// Use this from EndAttributes member functions, not ProcessAttribute* // Use this from EndAttributes member functions, not ProcessAttribute*
// functions; only the former can be sure that all the DIE's attributes // functions; only the former can be sure that all the DIE's attributes
// have been seen. // have been seen.
StringView ComputeQualifiedName(); //
// On return, if has_qualified_name is non-NULL, *has_qualified_name is set to
// true if the DIE includes a fully-qualified name, false otherwise.
StringView ComputeQualifiedName(bool* has_qualified_name);
CUContext* cu_context_; CUContext* cu_context_;
DIEContext* parent_context_; DIEContext* parent_context_;
@ -466,7 +469,8 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
} }
} }
StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName(
bool* has_qualified_name) {
// Use the demangled name, if one is available. Demangled names are // Use the demangled name, if one is available. Demangled names are
// preferable to those inferred from the DWARF structure because they // preferable to those inferred from the DWARF structure because they
// include argument types. // include argument types.
@ -482,6 +486,15 @@ StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
StringView* unqualified_name = nullptr; StringView* unqualified_name = nullptr;
StringView* enclosing_name = nullptr; StringView* enclosing_name = nullptr;
if (!qualified_name) { if (!qualified_name) {
if (has_qualified_name) {
// dSYMs built with -gmlt do not include the DW_AT_linkage_name
// with the unmangled symbol, but rather include it in the
// LC_SYMTAB STABS, which end up in the externs of the module.
//
// Remember this so the Module can copy over the extern name later.
*has_qualified_name = false;
}
// Find the unqualified name. If the DIE has its own DW_AT_name // Find the unqualified name. If the DIE has its own DW_AT_name
// attribute, then use that; otherwise, check the specification. // attribute, then use that; otherwise, check the specification.
if (!name_attribute_.empty()) { if (!name_attribute_.empty()) {
@ -500,6 +513,10 @@ StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
} else if (parent_context_) { } else if (parent_context_) {
enclosing_name = &parent_context_->name; enclosing_name = &parent_context_->name;
} }
} else {
if (has_qualified_name) {
*has_qualified_name = true;
}
} }
// Prepare the return value before upcoming mutations possibly invalidate the // Prepare the return value before upcoming mutations possibly invalidate the
@ -722,7 +739,8 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
ranges_form_(DW_FORM_sec_offset), ranges_form_(DW_FORM_sec_offset),
ranges_data_(0), ranges_data_(0),
inline_(false), inline_(false),
handle_inline_(handle_inline) {} handle_inline_(handle_inline),
has_qualified_name_(false) {}
void ProcessAttributeUnsigned(enum DwarfAttribute attr, void ProcessAttributeUnsigned(enum DwarfAttribute attr,
enum DwarfForm form, enum DwarfForm form,
@ -745,6 +763,7 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
bool inline_; bool inline_;
vector<unique_ptr<Module::Inline>> child_inlines_; vector<unique_ptr<Module::Inline>> child_inlines_;
bool handle_inline_; bool handle_inline_;
bool has_qualified_name_;
DIEContext child_context_; // A context for our children. DIEContext child_context_; // A context for our children.
}; };
@ -808,7 +827,7 @@ DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler(
bool DwarfCUToModule::FuncHandler::EndAttributes() { bool DwarfCUToModule::FuncHandler::EndAttributes() {
// Compute our name, and record a specification, if appropriate. // Compute our name, and record a specification, if appropriate.
name_ = ComputeQualifiedName(); name_ = ComputeQualifiedName(&has_qualified_name_);
if (name_.empty() && abstract_origin_) { if (name_.empty() && abstract_origin_) {
name_ = abstract_origin_->name; name_ = abstract_origin_->name;
} }
@ -881,6 +900,9 @@ void DwarfCUToModule::FuncHandler::Finish() {
scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_)); scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_));
func->ranges = ranges; func->ranges = ranges;
func->parameter_size = 0; func->parameter_size = 0;
// If the name was unqualified, prefer the Extern name if there's a mismatch
// (the Extern name will be fully-qualified in that case).
func->prefer_extern_name = !has_qualified_name_;
if (func->address) { if (func->address) {
// If the function address is zero this is a sign that this function // If the function address is zero this is a sign that this function
// description is just empty debug data and should just be discarded. // description is just empty debug data and should just be discarded.
@ -915,7 +937,7 @@ void DwarfCUToModule::FuncHandler::Finish() {
} }
bool DwarfCUToModule::NamedScopeHandler::EndAttributes() { bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
child_context_.name = ComputeQualifiedName(); child_context_.name = ComputeQualifiedName(NULL);
if (child_context_.name.empty() && no_specification) { if (child_context_.name.empty() && no_specification) {
cu_context_->reporter->UnknownSpecification(offset_, specification_offset_); cu_context_->reporter->UnknownSpecification(offset_, specification_offset_);
} }

View file

@ -267,6 +267,10 @@ class CUFixtureBase {
void TestFunction(int i, const string& name, void TestFunction(int i, const string& name,
Module::Address address, Module::Address size); Module::Address address, Module::Address size);
// Test that the I'th function (ordered by address) in the module
// this.module_ has the given prefer_extern_name.
void TestFunctionPreferExternName(int i, bool prefer_extern_name);
// Test that the number of source lines owned by the I'th function // Test that the number of source lines owned by the I'th function
// in the module this.module_ is equal to EXPECTED. // in the module this.module_ is equal to EXPECTED.
void TestLineCount(int i, size_t expected); void TestLineCount(int i, size_t expected);
@ -615,6 +619,15 @@ void CUFixtureBase::TestFunction(int i, const string& name,
EXPECT_EQ(0U, function->parameter_size); EXPECT_EQ(0U, function->parameter_size);
} }
void CUFixtureBase::TestFunctionPreferExternName(int i,
bool prefer_extern_name) {
FillFunctions();
ASSERT_LT((size_t)i, functions_.size());
Module::Function* function = functions_[i];
EXPECT_EQ(prefer_extern_name, function->prefer_extern_name);
}
void CUFixtureBase::TestLineCount(int i, size_t expected) { void CUFixtureBase::TestLineCount(int i, size_t expected) {
FillFunctions(); FillFunctions();
ASSERT_LT((size_t) i, functions_.size()); ASSERT_LT((size_t) i, functions_.size());

View file

@ -420,7 +420,7 @@ bool DumpSymbols::CreateEmptyModule(scoped_ptr<Module>& module) {
// Create a module to hold the debugging information. // Create a module to hold the debugging information.
module.reset(new Module(module_name, "mac", selected_arch_name, identifier, module.reset(new Module(module_name, "mac", selected_arch_name, identifier,
"", enable_multiple_)); "", enable_multiple_, prefer_extern_name_));
return true; return true;
} }

View file

@ -57,7 +57,8 @@ class DumpSymbols {
DumpSymbols(SymbolData symbol_data, DumpSymbols(SymbolData symbol_data,
bool handle_inter_cu_refs, bool handle_inter_cu_refs,
bool enable_multiple = false, bool enable_multiple = false,
const std::string& module_name = "") const std::string& module_name = "",
bool prefer_extern_name = false)
: symbol_data_(symbol_data), : symbol_data_(symbol_data),
handle_inter_cu_refs_(handle_inter_cu_refs), handle_inter_cu_refs_(handle_inter_cu_refs),
object_filename_(), object_filename_(),
@ -68,7 +69,8 @@ class DumpSymbols {
selected_object_file_(), selected_object_file_(),
selected_object_name_(), selected_object_name_(),
enable_multiple_(enable_multiple), enable_multiple_(enable_multiple),
module_name_(module_name) {} module_name_(module_name),
prefer_extern_name_(prefer_extern_name) {}
~DumpSymbols() = default; ~DumpSymbols() = default;
// Prepare to read debugging information from |filename|. |filename| may be // Prepare to read debugging information from |filename|. |filename| may be
@ -205,6 +207,15 @@ class DumpSymbols {
// If non-empty, used as the module name. Otherwise, the basename of // If non-empty, used as the module name. Otherwise, the basename of
// |object_filename_| is used as the module name. // |object_filename_| is used as the module name.
const std::string module_name_; const std::string module_name_;
// If a Function and an Extern share the same address but have a different
// name, prefer the name of the Extern.
//
// Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
// as the Function's fully-qualified name will only be present in the STABS
// (which are placed in the Extern), not in the DWARF symbols (which are
// placed in the Function).
bool prefer_extern_name_;
}; };
} // namespace google_breakpad } // namespace google_breakpad

View file

@ -105,14 +105,16 @@ Module::Module(const string& name,
const string& architecture, const string& architecture,
const string& id, const string& id,
const string& code_id /* = "" */, const string& code_id /* = "" */,
bool enable_multiple_field /* = false*/) bool enable_multiple_field /* = false*/,
bool prefer_extern_name /* = false*/)
: name_(name), : name_(name),
os_(os), os_(os),
architecture_(architecture), architecture_(architecture),
id_(id), id_(id),
code_id_(code_id), code_id_(code_id),
load_address_(0), load_address_(0),
enable_multiple_field_(enable_multiple_field) {} enable_multiple_field_(enable_multiple_field),
prefer_extern_name_(prefer_extern_name) {}
Module::~Module() { Module::~Module() {
for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it) for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
@ -152,11 +154,14 @@ bool Module::AddFunction(Function* function) {
it_ext = externs_.find(&arm_thumb_ext); it_ext = externs_.find(&arm_thumb_ext);
} }
if (it_ext != externs_.end()) { if (it_ext != externs_.end()) {
if (enable_multiple_field_) {
Extern* found_ext = it_ext->get(); Extern* found_ext = it_ext->get();
bool name_mismatch = found_ext->name != function->name;
if (enable_multiple_field_) {
// If the PUBLIC is for the same symbol as the FUNC, don't mark multiple. // If the PUBLIC is for the same symbol as the FUNC, don't mark multiple.
function->is_multiple |= function->is_multiple |= name_mismatch || found_ext->is_multiple;
found_ext->name != function->name || found_ext->is_multiple; }
if (name_mismatch && prefer_extern_name_) {
function->name = AddStringToPool(it_ext->get()->name);
} }
externs_.erase(it_ext); externs_.erase(it_ext);
} }

View file

@ -131,6 +131,10 @@ class Module {
// If this symbol has been folded with other symbols in the linked binary. // If this symbol has been folded with other symbols in the linked binary.
bool is_multiple = false; bool is_multiple = false;
// If the function's name should be filled out from a matching Extern,
// should they not match.
bool prefer_extern_name = false;
}; };
struct InlineOrigin { struct InlineOrigin {
@ -317,7 +321,8 @@ class Module {
const string& architecture, const string& architecture,
const string& id, const string& id,
const string& code_id = "", const string& code_id = "",
bool enable_multiple_field = false); bool enable_multiple_field = false,
bool prefer_extern_name = false);
~Module(); ~Module();
// Set the module's load address to LOAD_ADDRESS; addresses given // Set the module's load address to LOAD_ADDRESS; addresses given
@ -502,6 +507,15 @@ class Module {
// at // at
// https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3 // https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3
bool enable_multiple_field_; bool enable_multiple_field_;
// If a Function and an Extern share the same address but have a different
// name, prefer the name of the Extern.
//
// Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
// as the Function's fully-qualified name will only be present in the STABS
// (which are placed in the Extern), not in the DWARF symbols (which are
// placed in the Function).
bool prefer_extern_name_;
}; };
} // namespace google_breakpad } // namespace google_breakpad

View file

@ -640,6 +640,37 @@ TEST(Module, ConstructFunctionsAndExternsWithSameAddress) {
contents.c_str()); contents.c_str());
} }
// If there exists an extern and a function at the same address, only write
// out the FUNC entry.
TEST(Module, ConstructFunctionsAndExternsWithSameAddressPreferExternName) {
stringstream s;
Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", false, true);
// Two externs.
auto extern1 = std::make_unique<Module::Extern>(0xabc0);
extern1->name = "extern1";
auto extern2 = std::make_unique<Module::Extern>(0xfff0);
extern2->name = "extern2";
m.AddExtern(std::move(extern1));
m.AddExtern(std::move(extern2));
Module::Function* function = new Module::Function("function2", 0xfff0);
Module::Range range(0xfff0, 0x10);
function->ranges.push_back(range);
function->parameter_size = 0;
m.AddFunction(function);
m.Write(s, ALL_SYMBOL_DATA);
string contents = s.str();
EXPECT_STREQ("MODULE " MODULE_OS " " MODULE_ARCH " " MODULE_ID " " MODULE_NAME
"\n"
"FUNC fff0 10 0 extern2\n"
"PUBLIC abc0 0 extern1\n",
contents.c_str());
}
// If there exists an extern and a function at the same address, only write // If there exists an extern and a function at the same address, only write
// out the FUNC entry, and mark it with `m` if the multiple field is enabled. // out the FUNC entry, and mark it with `m` if the multiple field is enabled.
TEST(Module, ConstructFunctionsAndExternsWithSameAddressMultiple) { TEST(Module, ConstructFunctionsAndExternsWithSameAddressMultiple) {

View file

@ -64,7 +64,8 @@ struct Options {
handle_inter_cu_refs(true), handle_inter_cu_refs(true),
handle_inlines(false), handle_inlines(false),
enable_multiple(false), enable_multiple(false),
module_name() {} module_name(),
prefer_extern_name(false) {}
string srcPath; string srcPath;
string dsymPath; string dsymPath;
@ -75,6 +76,7 @@ struct Options {
bool handle_inlines; bool handle_inlines;
bool enable_multiple; bool enable_multiple;
string module_name; string module_name;
bool prefer_extern_name;
}; };
static bool StackFrameEntryComparator(const Module::StackFrameEntry* a, static bool StackFrameEntryComparator(const Module::StackFrameEntry* a,
@ -151,7 +153,8 @@ static bool Start(const Options& options) {
(options.handle_inlines ? INLINES : NO_DATA) | (options.handle_inlines ? INLINES : NO_DATA) |
(options.cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES; (options.cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES;
DumpSymbols dump_symbols(symbol_data, options.handle_inter_cu_refs, DumpSymbols dump_symbols(symbol_data, options.handle_inter_cu_refs,
options.enable_multiple, options.module_name); options.enable_multiple, options.module_name,
options.prefer_extern_name);
// For x86_64 binaries, the CFI data is in the __TEXT,__eh_frame of the // For x86_64 binaries, the CFI data is in the __TEXT,__eh_frame of the
// Mach-O file, which is not copied into the dSYM. Whereas in i386, the CFI // Mach-O file, which is not copied into the dSYM. Whereas in i386, the CFI
@ -244,7 +247,7 @@ static void Usage(int argc, const char *argv[]) {
fprintf(stderr, "Output a Breakpad symbol file from a Mach-o file.\n"); fprintf(stderr, "Output a Breakpad symbol file from a Mach-o file.\n");
fprintf(stderr, fprintf(stderr,
"Usage: %s [-a ARCHITECTURE] [-c] [-g dSYM path] " "Usage: %s [-a ARCHITECTURE] [-c] [-g dSYM path] "
"[-n MODULE] <Mach-o file>\n", "[-n MODULE] [-x] <Mach-o file>\n",
argv[0]); argv[0]);
fprintf(stderr, "\t-i: Output module header information only.\n"); fprintf(stderr, "\t-i: Output module header information only.\n");
fprintf(stderr, "\t-a: Architecture type [default: native, or whatever is\n"); fprintf(stderr, "\t-a: Architecture type [default: native, or whatever is\n");
@ -260,6 +263,9 @@ static void Usage(int argc, const char *argv[]) {
fprintf(stderr, fprintf(stderr,
"\t-n: Use MODULE as the name of the module rather than \n" "\t-n: Use MODULE as the name of the module rather than \n"
"the basename of the Mach-O file/dSYM.\n"); "the basename of the Mach-O file/dSYM.\n");
fprintf(stderr,
"\t-x: Prefer the PUBLIC (extern) name over the FUNC if\n"
"they do not match.\n");
fprintf(stderr, "\t-h: Usage\n"); fprintf(stderr, "\t-h: Usage\n");
fprintf(stderr, "\t-?: Usage\n"); fprintf(stderr, "\t-?: Usage\n");
} }
@ -269,7 +275,7 @@ static void SetupOptions(int argc, const char *argv[], Options *options) {
extern int optind; extern int optind;
signed char ch; signed char ch;
while ((ch = getopt(argc, (char* const*)argv, "ia:g:crdm?hn:")) != -1) { while ((ch = getopt(argc, (char* const*)argv, "ia:g:crdm?hn:x")) != -1) {
switch (ch) { switch (ch) {
case 'i': case 'i':
options->header_only = true; options->header_only = true;
@ -302,6 +308,9 @@ static void SetupOptions(int argc, const char *argv[], Options *options) {
case 'n': case 'n':
options->module_name = optarg; options->module_name = optarg;
break; break;
case 'x':
options->prefer_extern_name = true;
break;
case '?': case '?':
case 'h': case 'h':
Usage(argc, argv); Usage(argc, argv);