From f548d75c9fa8bc062a580dbe5edb2fae2106d5c9 Mon Sep 17 00:00:00 2001 From: Ben Hamilton Date: Fri, 21 Apr 2023 12:02:42 -0600 Subject: [PATCH] [dump_syms/Mac] New -x option to prefer extern names when there's a mismatch When built with -gmlt, .dSYMs are (by design) missing the `DW_AT_linkage_name` which Breakpad uses to fill out the (name-mangled) function names. Thankfully, the .dSYM contains both the old-school LC_SYMTAB command containing the STABS-format symbols (which include the fully-qualified C++ symbol names we want, but no actual compilation unit data), as well as the LC_SEGMENT_64 containing the __DWARF segment with the minimal -gmlt debug information (which excludes the name-mangled C++ symbols). Unfortunately, since the .dSYM's STABS does not define compilation units, the usual path in `StabsReader` ignores all the fully-qualified C++ symbol names for the functions: https://chromium.googlesource.com/breakpad/breakpad/+/bd9d94c70843620adeebcd73c243001237c6d426/src/common/stabs_reader.cc#100 Fortunately, when built for macOS platforms (`HAVE_MACH_O_NLIST_H`), `StabsReader` supports storing all the STABS-format symbols as `Extern`s, regardless of whether or not they're in a compilation unit: https://chromium.googlesource.com/breakpad/breakpad/+/bd9d94c70843620adeebcd73c243001237c6d426/src/common/stabs_reader.cc#119 Currently, when there's both a `Function` and an `Extern` with the same address, `Module` discards the `Extern`: https://chromium.googlesource.com/breakpad/breakpad/+/bd9d94c70843620adeebcd73c243001237c6d426/src/common/module.cc#161 This CL adds a new `-x` option to the Mac `dump_syms` which prefers the Extern function name if there's a mismatch. Bug: https://bugs.chromium.org/p/google-breakpad/issues/detail?id=883 Change-Id: I0d32adc64fbf567600b0a5ca63c71c422b7f0f8c Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/4453650 Reviewed-by: Joshua Peraza --- src/common/dwarf_cu_to_module.cc | 32 +++++++++++++++++++---- src/common/dwarf_cu_to_module_unittest.cc | 13 +++++++++ src/common/mac/dump_syms.cc | 2 +- src/common/mac/dump_syms.h | 15 +++++++++-- src/common/module.cc | 15 +++++++---- src/common/module.h | 16 +++++++++++- src/common/module_unittest.cc | 31 ++++++++++++++++++++++ src/tools/mac/dump_syms/dump_syms_tool.cc | 17 +++++++++--- 8 files changed, 123 insertions(+), 18 deletions(-) diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc index 43b468c1..708ed143 100644 --- a/src/common/dwarf_cu_to_module.cc +++ b/src/common/dwarf_cu_to_module.cc @@ -330,7 +330,10 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler { // Use this from EndAttributes member functions, not ProcessAttribute* // functions; only the former can be sure that all the DIE's attributes // have been seen. - StringView ComputeQualifiedName(); + // + // On return, if has_qualified_name is non-NULL, *has_qualified_name is set to + // true if the DIE includes a fully-qualified name, false otherwise. + StringView ComputeQualifiedName(bool* has_qualified_name); CUContext* cu_context_; DIEContext* parent_context_; @@ -466,7 +469,8 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( } } -StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { +StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName( + bool* has_qualified_name) { // Use the demangled name, if one is available. Demangled names are // preferable to those inferred from the DWARF structure because they // include argument types. @@ -482,6 +486,15 @@ StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { StringView* unqualified_name = nullptr; StringView* enclosing_name = nullptr; if (!qualified_name) { + if (has_qualified_name) { + // dSYMs built with -gmlt do not include the DW_AT_linkage_name + // with the unmangled symbol, but rather include it in the + // LC_SYMTAB STABS, which end up in the externs of the module. + // + // Remember this so the Module can copy over the extern name later. + *has_qualified_name = false; + } + // Find the unqualified name. If the DIE has its own DW_AT_name // attribute, then use that; otherwise, check the specification. if (!name_attribute_.empty()) { @@ -500,6 +513,10 @@ StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { } else if (parent_context_) { enclosing_name = &parent_context_->name; } + } else { + if (has_qualified_name) { + *has_qualified_name = true; + } } // Prepare the return value before upcoming mutations possibly invalidate the @@ -722,7 +739,8 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler { ranges_form_(DW_FORM_sec_offset), ranges_data_(0), inline_(false), - handle_inline_(handle_inline) {} + handle_inline_(handle_inline), + has_qualified_name_(false) {} void ProcessAttributeUnsigned(enum DwarfAttribute attr, enum DwarfForm form, @@ -745,6 +763,7 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler { bool inline_; vector> child_inlines_; bool handle_inline_; + bool has_qualified_name_; DIEContext child_context_; // A context for our children. }; @@ -808,7 +827,7 @@ DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler( bool DwarfCUToModule::FuncHandler::EndAttributes() { // Compute our name, and record a specification, if appropriate. - name_ = ComputeQualifiedName(); + name_ = ComputeQualifiedName(&has_qualified_name_); if (name_.empty() && abstract_origin_) { name_ = abstract_origin_->name; } @@ -881,6 +900,9 @@ void DwarfCUToModule::FuncHandler::Finish() { scoped_ptr func(new Module::Function(name, low_pc_)); func->ranges = ranges; func->parameter_size = 0; + // If the name was unqualified, prefer the Extern name if there's a mismatch + // (the Extern name will be fully-qualified in that case). + func->prefer_extern_name = !has_qualified_name_; if (func->address) { // If the function address is zero this is a sign that this function // description is just empty debug data and should just be discarded. @@ -915,7 +937,7 @@ void DwarfCUToModule::FuncHandler::Finish() { } bool DwarfCUToModule::NamedScopeHandler::EndAttributes() { - child_context_.name = ComputeQualifiedName(); + child_context_.name = ComputeQualifiedName(NULL); if (child_context_.name.empty() && no_specification) { cu_context_->reporter->UnknownSpecification(offset_, specification_offset_); } diff --git a/src/common/dwarf_cu_to_module_unittest.cc b/src/common/dwarf_cu_to_module_unittest.cc index 7ab2f15c..134b2c24 100644 --- a/src/common/dwarf_cu_to_module_unittest.cc +++ b/src/common/dwarf_cu_to_module_unittest.cc @@ -267,6 +267,10 @@ class CUFixtureBase { void TestFunction(int i, const string& name, Module::Address address, Module::Address size); + // Test that the I'th function (ordered by address) in the module + // this.module_ has the given prefer_extern_name. + void TestFunctionPreferExternName(int i, bool prefer_extern_name); + // Test that the number of source lines owned by the I'th function // in the module this.module_ is equal to EXPECTED. void TestLineCount(int i, size_t expected); @@ -615,6 +619,15 @@ void CUFixtureBase::TestFunction(int i, const string& name, EXPECT_EQ(0U, function->parameter_size); } +void CUFixtureBase::TestFunctionPreferExternName(int i, + bool prefer_extern_name) { + FillFunctions(); + ASSERT_LT((size_t)i, functions_.size()); + + Module::Function* function = functions_[i]; + EXPECT_EQ(prefer_extern_name, function->prefer_extern_name); +} + void CUFixtureBase::TestLineCount(int i, size_t expected) { FillFunctions(); ASSERT_LT((size_t) i, functions_.size()); diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc index 04ccae25..dd91196a 100644 --- a/src/common/mac/dump_syms.cc +++ b/src/common/mac/dump_syms.cc @@ -420,7 +420,7 @@ bool DumpSymbols::CreateEmptyModule(scoped_ptr& module) { // Create a module to hold the debugging information. module.reset(new Module(module_name, "mac", selected_arch_name, identifier, - "", enable_multiple_)); + "", enable_multiple_, prefer_extern_name_)); return true; } diff --git a/src/common/mac/dump_syms.h b/src/common/mac/dump_syms.h index d5aa7185..5bcb0b58 100644 --- a/src/common/mac/dump_syms.h +++ b/src/common/mac/dump_syms.h @@ -57,7 +57,8 @@ class DumpSymbols { DumpSymbols(SymbolData symbol_data, bool handle_inter_cu_refs, bool enable_multiple = false, - const std::string& module_name = "") + const std::string& module_name = "", + bool prefer_extern_name = false) : symbol_data_(symbol_data), handle_inter_cu_refs_(handle_inter_cu_refs), object_filename_(), @@ -68,7 +69,8 @@ class DumpSymbols { selected_object_file_(), selected_object_name_(), enable_multiple_(enable_multiple), - module_name_(module_name) {} + module_name_(module_name), + prefer_extern_name_(prefer_extern_name) {} ~DumpSymbols() = default; // Prepare to read debugging information from |filename|. |filename| may be @@ -205,6 +207,15 @@ class DumpSymbols { // If non-empty, used as the module name. Otherwise, the basename of // |object_filename_| is used as the module name. const std::string module_name_; + + // If a Function and an Extern share the same address but have a different + // name, prefer the name of the Extern. + // + // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables), + // as the Function's fully-qualified name will only be present in the STABS + // (which are placed in the Extern), not in the DWARF symbols (which are + // placed in the Function). + bool prefer_extern_name_; }; } // namespace google_breakpad diff --git a/src/common/module.cc b/src/common/module.cc index a5c1b6ad..e61c3b7a 100644 --- a/src/common/module.cc +++ b/src/common/module.cc @@ -105,14 +105,16 @@ Module::Module(const string& name, const string& architecture, const string& id, const string& code_id /* = "" */, - bool enable_multiple_field /* = false*/) + bool enable_multiple_field /* = false*/, + bool prefer_extern_name /* = false*/) : name_(name), os_(os), architecture_(architecture), id_(id), code_id_(code_id), load_address_(0), - enable_multiple_field_(enable_multiple_field) {} + enable_multiple_field_(enable_multiple_field), + prefer_extern_name_(prefer_extern_name) {} Module::~Module() { for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it) @@ -152,11 +154,14 @@ bool Module::AddFunction(Function* function) { it_ext = externs_.find(&arm_thumb_ext); } if (it_ext != externs_.end()) { + Extern* found_ext = it_ext->get(); + bool name_mismatch = found_ext->name != function->name; if (enable_multiple_field_) { - Extern* found_ext = it_ext->get(); // If the PUBLIC is for the same symbol as the FUNC, don't mark multiple. - function->is_multiple |= - found_ext->name != function->name || found_ext->is_multiple; + function->is_multiple |= name_mismatch || found_ext->is_multiple; + } + if (name_mismatch && prefer_extern_name_) { + function->name = AddStringToPool(it_ext->get()->name); } externs_.erase(it_ext); } diff --git a/src/common/module.h b/src/common/module.h index c1fd9f59..d736701f 100644 --- a/src/common/module.h +++ b/src/common/module.h @@ -131,6 +131,10 @@ class Module { // If this symbol has been folded with other symbols in the linked binary. bool is_multiple = false; + + // If the function's name should be filled out from a matching Extern, + // should they not match. + bool prefer_extern_name = false; }; struct InlineOrigin { @@ -317,7 +321,8 @@ class Module { const string& architecture, const string& id, const string& code_id = "", - bool enable_multiple_field = false); + bool enable_multiple_field = false, + bool prefer_extern_name = false); ~Module(); // Set the module's load address to LOAD_ADDRESS; addresses given @@ -502,6 +507,15 @@ class Module { // at // https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3 bool enable_multiple_field_; + + // If a Function and an Extern share the same address but have a different + // name, prefer the name of the Extern. + // + // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables), + // as the Function's fully-qualified name will only be present in the STABS + // (which are placed in the Extern), not in the DWARF symbols (which are + // placed in the Function). + bool prefer_extern_name_; }; } // namespace google_breakpad diff --git a/src/common/module_unittest.cc b/src/common/module_unittest.cc index 213b3154..6a6762e5 100644 --- a/src/common/module_unittest.cc +++ b/src/common/module_unittest.cc @@ -640,6 +640,37 @@ TEST(Module, ConstructFunctionsAndExternsWithSameAddress) { contents.c_str()); } +// If there exists an extern and a function at the same address, only write +// out the FUNC entry. +TEST(Module, ConstructFunctionsAndExternsWithSameAddressPreferExternName) { + stringstream s; + Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", false, true); + + // Two externs. + auto extern1 = std::make_unique(0xabc0); + extern1->name = "extern1"; + auto extern2 = std::make_unique(0xfff0); + extern2->name = "extern2"; + + m.AddExtern(std::move(extern1)); + m.AddExtern(std::move(extern2)); + + Module::Function* function = new Module::Function("function2", 0xfff0); + Module::Range range(0xfff0, 0x10); + function->ranges.push_back(range); + function->parameter_size = 0; + m.AddFunction(function); + + m.Write(s, ALL_SYMBOL_DATA); + string contents = s.str(); + + EXPECT_STREQ("MODULE " MODULE_OS " " MODULE_ARCH " " MODULE_ID " " MODULE_NAME + "\n" + "FUNC fff0 10 0 extern2\n" + "PUBLIC abc0 0 extern1\n", + contents.c_str()); +} + // If there exists an extern and a function at the same address, only write // out the FUNC entry, and mark it with `m` if the multiple field is enabled. TEST(Module, ConstructFunctionsAndExternsWithSameAddressMultiple) { diff --git a/src/tools/mac/dump_syms/dump_syms_tool.cc b/src/tools/mac/dump_syms/dump_syms_tool.cc index ab36164f..9fb8d13f 100644 --- a/src/tools/mac/dump_syms/dump_syms_tool.cc +++ b/src/tools/mac/dump_syms/dump_syms_tool.cc @@ -64,7 +64,8 @@ struct Options { handle_inter_cu_refs(true), handle_inlines(false), enable_multiple(false), - module_name() {} + module_name(), + prefer_extern_name(false) {} string srcPath; string dsymPath; @@ -75,6 +76,7 @@ struct Options { bool handle_inlines; bool enable_multiple; string module_name; + bool prefer_extern_name; }; static bool StackFrameEntryComparator(const Module::StackFrameEntry* a, @@ -151,7 +153,8 @@ static bool Start(const Options& options) { (options.handle_inlines ? INLINES : NO_DATA) | (options.cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES; DumpSymbols dump_symbols(symbol_data, options.handle_inter_cu_refs, - options.enable_multiple, options.module_name); + options.enable_multiple, options.module_name, + options.prefer_extern_name); // For x86_64 binaries, the CFI data is in the __TEXT,__eh_frame of the // Mach-O file, which is not copied into the dSYM. Whereas in i386, the CFI @@ -244,7 +247,7 @@ static void Usage(int argc, const char *argv[]) { fprintf(stderr, "Output a Breakpad symbol file from a Mach-o file.\n"); fprintf(stderr, "Usage: %s [-a ARCHITECTURE] [-c] [-g dSYM path] " - "[-n MODULE] \n", + "[-n MODULE] [-x] \n", argv[0]); fprintf(stderr, "\t-i: Output module header information only.\n"); fprintf(stderr, "\t-a: Architecture type [default: native, or whatever is\n"); @@ -260,6 +263,9 @@ static void Usage(int argc, const char *argv[]) { fprintf(stderr, "\t-n: Use MODULE as the name of the module rather than \n" "the basename of the Mach-O file/dSYM.\n"); + fprintf(stderr, + "\t-x: Prefer the PUBLIC (extern) name over the FUNC if\n" + "they do not match.\n"); fprintf(stderr, "\t-h: Usage\n"); fprintf(stderr, "\t-?: Usage\n"); } @@ -269,7 +275,7 @@ static void SetupOptions(int argc, const char *argv[], Options *options) { extern int optind; signed char ch; - while ((ch = getopt(argc, (char* const*)argv, "ia:g:crdm?hn:")) != -1) { + while ((ch = getopt(argc, (char* const*)argv, "ia:g:crdm?hn:x")) != -1) { switch (ch) { case 'i': options->header_only = true; @@ -302,6 +308,9 @@ static void SetupOptions(int argc, const char *argv[], Options *options) { case 'n': options->module_name = optarg; break; + case 'x': + options->prefer_extern_name = true; + break; case '?': case 'h': Usage(argc, argv);