From ff5892c5da86c50af1951328215a5a3a203a9bb1 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Thu, 30 Sep 2021 12:49:44 -0700 Subject: [PATCH] Add a string pool to store functions names - Added StringView which is used as a reference to a string, but doesn't own the string. - Removed the old string pool in DwarfCUToModule::FilePrivate, since it's doing string copy. - Added a string pool in Module to store functions/inline origins' names (mangled and demangled). - The peak memory usage drops from 20.6 GB to 12.5 GB when disabling inline records and drops from 36 GB to 20.3 GB when enabling inline records. Bug: chromium:1246974, chromium:1250351 Change-Id: Ie7e9740ea10c1930a0fc58c6becaae2d718b83b8 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3189410 Reviewed-by: Joshua Peraza --- src/common/dwarf_cu_to_module.cc | 138 ++++++++++++------------------- src/common/module.cc | 3 +- src/common/module.h | 22 +++-- src/common/stabs_to_module.cc | 3 +- src/common/string_view.h | 113 +++++++++++++++++++++++++ 5 files changed, 188 insertions(+), 91 deletions(-) create mode 100644 src/common/string_view.h diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc index 3ec486aa..04d19479 100644 --- a/src/common/dwarf_cu_to_module.cc +++ b/src/common/dwarf_cu_to_module.cc @@ -48,8 +48,8 @@ #include #include +#include "common/string_view.h" #include "common/dwarf_line_to_module.h" -#include "common/unordered.h" #include "google_breakpad/common/breakpad_types.h" namespace google_breakpad { @@ -80,22 +80,21 @@ using std::unique_ptr; // we may need if we find a DW_AT_specification link pointing to it. struct DwarfCUToModule::Specification { // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name. - string qualified_name; + StringView qualified_name; // The name of the enclosing scope, or the empty string if there is none. - string enclosing_name; + StringView enclosing_name; // The name for the specification DIE itself, without any enclosing // name components. - string unqualified_name; + StringView unqualified_name; }; // An abstract origin -- base definition of an inline function. struct AbstractOrigin { - AbstractOrigin() : name() {} - explicit AbstractOrigin(const string& name) : name(name) {} + explicit AbstractOrigin(StringView name) : name(name) {} - string name; + StringView name; }; typedef map AbstractOriginByOffset; @@ -103,25 +102,6 @@ typedef map AbstractOriginByOffset; // Data global to the DWARF-bearing file that is private to the // DWARF-to-Module process. struct DwarfCUToModule::FilePrivate { - // A set of strings used in this CU. Before storing a string in one of - // our data structures, insert it into this set, and then use the string - // from the set. - // - // In some STL implementations, strings are reference-counted internally, - // meaning that simply using strings from this set, even if passed by - // value, assigned, or held directly in structures and containers - // (map, for example), causes those strings to share a - // single instance of each distinct piece of text. GNU's libstdc++ uses - // reference counts, and I believe MSVC did as well, at some point. - // However, C++ '11 implementations are moving away from reference - // counting. - // - // In other implementations, string assignments copy the string's text, - // so this set will actually hold yet another copy of the string (although - // everything will still work). To improve memory consumption portably, - // we will probably need to use pointers to strings held in this set. - unordered_set common_strings; - // A map from offsets of DIEs within the .debug_info section to // Specifications describing those DIEs. Specification references can // cross compilation unit boundaries. @@ -293,7 +273,7 @@ struct DwarfCUToModule::DIEContext { // in a C++ compilation unit, the DIEContext's name for the // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's // name for the DW_TAG_namespace DIE would be "". - string name; + StringView name; }; // An abstract base class for all the dumper's DIE handlers. @@ -339,20 +319,12 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler { // Use this from EndAttributes member functions, not ProcessAttribute* // functions; only the former can be sure that all the DIE's attributes // have been seen. - string ComputeQualifiedName(); + StringView ComputeQualifiedName(); CUContext* cu_context_; DIEContext* parent_context_; uint64_t offset_; - // Place the name in the global set of strings. Even though this looks - // like a copy, all the major string implementations use reference - // counting internally, so the effect is to have all the data structures - // share copies of strings whenever possible. - // FIXME: Should this return something like a string_ref to avoid the - // assumption about how strings are implemented? - string AddStringToPool(const string& str); - // If this DIE has a DW_AT_declaration attribute, this is its value. // It is false on DIEs with no DW_AT_declaration attribute. bool declaration_; @@ -377,16 +349,16 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler { // The value of the DW_AT_name attribute, or the empty string if the // DIE has no such attribute. - string name_attribute_; + StringView name_attribute_; // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty // string if the DIE has no such attribute or its content could not be // demangled. - string demangled_name_; + StringView demangled_name_; // The non-demangled value of the DW_AT_MIPS_linkage_name attribute, // it its content count not be demangled. - string raw_name_; + StringView raw_name_; }; void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned( @@ -445,19 +417,14 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference( } } -string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string& str) { - pair::iterator, bool> result = - cu_context_->file_context->file_private_->common_strings.insert(str); - return *result.first; -} - void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( enum DwarfAttribute attr, enum DwarfForm form, const string& data) { switch (attr) { case DW_AT_name: - name_attribute_ = AddStringToPool(data); + name_attribute_ = + cu_context_->file_context->module_->AddStringToPool(data); break; case DW_AT_MIPS_linkage_name: case DW_AT_linkage_name: { @@ -466,15 +433,16 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( cu_context_->language->DemangleName(data, &demangled); switch (result) { case Language::kDemangleSuccess: - demangled_name_ = AddStringToPool(demangled); + demangled_name_ = + cu_context_->file_context->module_->AddStringToPool(demangled); break; case Language::kDemangleFailure: cu_context_->reporter->DemangleError(data); // fallthrough case Language::kDontDemangle: - demangled_name_.clear(); - raw_name_ = AddStringToPool(data); + demangled_name_ = StringView(); + raw_name_ = cu_context_->file_context->module_->AddStringToPool(data); break; } break; @@ -483,11 +451,11 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( } } -string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { +StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { // Use the demangled name, if one is available. Demangled names are // preferable to those inferred from the DWARF structure because they // include argument types. - const string* qualified_name = NULL; + StringView* qualified_name = nullptr; if (!demangled_name_.empty()) { // Found it is this DIE. qualified_name = &demangled_name_; @@ -496,37 +464,39 @@ string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { qualified_name = &specification_->qualified_name; } - const string* unqualified_name = NULL; - const string* enclosing_name; + StringView* unqualified_name = nullptr; + StringView* enclosing_name = nullptr; if (!qualified_name) { // Find the unqualified name. If the DIE has its own DW_AT_name // attribute, then use that; otherwise, check the specification. - if (!name_attribute_.empty()) + if (!name_attribute_.empty()) { unqualified_name = &name_attribute_; - else if (specification_) + } else if (specification_) { unqualified_name = &specification_->unqualified_name; - else if (!raw_name_.empty()) + } else if (!raw_name_.empty()) { unqualified_name = &raw_name_; + } // Find the name of the enclosing context. If this DIE has a // specification, it's the specification's enclosing context that // counts; otherwise, use this DIE's context. - if (specification_) + if (specification_) { enclosing_name = &specification_->enclosing_name; - else + } else { enclosing_name = &parent_context_->name; + } } // Prepare the return value before upcoming mutations possibly invalidate the // existing pointers. string return_value; if (qualified_name) { - return_value = *qualified_name; + return_value = qualified_name->str(); } else if (unqualified_name && enclosing_name) { // Combine the enclosing name and unqualified name to produce our // own fully-qualified name. - return_value = cu_context_->language->MakeQualifiedName(*enclosing_name, - *unqualified_name); + return_value = cu_context_->language->MakeQualifiedName( + enclosing_name->str(), unqualified_name->str()); } // If this DIE was marked as a declaration, record its names in the @@ -543,7 +513,7 @@ string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { cu_context_->file_context->file_private_->specifications[offset_] = spec; } - return return_value; + return cu_context_->file_context->module_->AddStringToPool(return_value); } static bool IsEmptyRange(const vector& ranges) { @@ -585,7 +555,7 @@ class DwarfCUToModule::InlineHandler : public GenericDIEHandler { private: // The fully-qualified name, as derived from name_attribute_, // specification_, parent_context_. Computed in EndAttributes. - string name_; + StringView name_; uint64_t low_pc_; // DW_AT_low_pc uint64_t high_pc_; // DW_AT_high_pc DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. @@ -644,7 +614,8 @@ bool DwarfCUToModule::InlineHandler::EndAttributes() { // We haven't seen the abstract origin yet, which might appears later and we // will fix the name after calling // InlineOriginMap::GetOrCreateInlineOrigin with right name. - name_ = ""; + name_ = + cu_context_->file_context->module_->AddStringToPool(""); } return true; } @@ -698,13 +669,19 @@ void DwarfCUToModule::InlineHandler::Finish() { // A handler class for DW_TAG_subprogram DIEs. class DwarfCUToModule::FuncHandler: public GenericDIEHandler { public: - FuncHandler(CUContext* cu_context, DIEContext* parent_context, - uint64_t offset, bool handle_inline) + FuncHandler(CUContext* cu_context, + DIEContext* parent_context, + uint64_t offset, + bool handle_inline) : GenericDIEHandler(cu_context, parent_context, offset), - low_pc_(0), high_pc_(0), high_pc_form_(DW_FORM_addr), - ranges_form_(DW_FORM_sec_offset), ranges_data_(0), - decl_file_data_(UINT64_MAX), inline_(false), - handle_inline_(handle_inline) { } + low_pc_(0), + high_pc_(0), + high_pc_form_(DW_FORM_addr), + ranges_form_(DW_FORM_sec_offset), + ranges_data_(0), + decl_file_data_(UINT64_MAX), + inline_(false), + handle_inline_(handle_inline) {} void ProcessAttributeUnsigned(enum DwarfAttribute attr, enum DwarfForm form, @@ -719,7 +696,7 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler { private: // The fully-qualified name, as derived from name_attribute_, // specification_, parent_context_. Computed in EndAttributes. - string name_; + StringView name_; uint64_t low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. DwarfForm ranges_form_; // DW_FORM_sec_offset or DW_FORM_rnglistx @@ -841,6 +818,8 @@ void DwarfCUToModule::FuncHandler::Finish() { } } + StringView name_omitted = + cu_context_->file_context->module_->AddStringToPool(""); bool empty_range = IsEmptyRange(ranges); // Did we collect the information we need? Not all DWARF function // entries are non-empty (for example, inlined functions that were never @@ -848,16 +827,9 @@ void DwarfCUToModule::FuncHandler::Finish() { // bytes. if (!empty_range) { low_pc_ = ranges.front().address; - // Malformed DWARF may omit the name, but all Module::Functions must // have names. - string name; - if (!name_.empty()) { - name = name_; - } else { - name = ""; - } - + StringView name = name_.empty() ? name_omitted : name_; // Create a Module::Function based on the data we've gathered, and // add it to the functions_ list. scoped_ptr func(new Module::Function(name, low_pc_)); @@ -880,20 +852,20 @@ void DwarfCUToModule::FuncHandler::Finish() { } } else if (inline_) { AbstractOrigin origin(name_); - cu_context_->file_context->file_private_->origins[offset_] = origin; + cu_context_->file_context->file_private_->origins.insert({offset_, origin}); } // Only keep track of DW_TAG_subprogram which have the attributes we are // interested. if (handle_inline_ && (!empty_range || inline_ || decl_file_data_ != UINT64_MAX)) { + StringView name = name_.empty() ? name_omitted : name_; uint64_t offset = specification_offset_ != 0 ? specification_offset_ : offset_; cu_context_->file_context->module_->inline_origin_map.SetReference(offset_, offset); cu_context_->file_context->module_->inline_origin_map - .GetOrCreateInlineOrigin(offset_, - name_.empty() ? "" : name_); + .GetOrCreateInlineOrigin(offset_, name); if (decl_file_data_ != UINT64_MAX) cu_context_->inline_origins[decl_file_data_].push_back(offset_); } @@ -993,7 +965,7 @@ void DwarfCUToModule::WarningReporter::UncoveredFunction( UncoveredHeading(); fprintf(stderr, " function%s: %s\n", IsEmptyRange(function.ranges) ? " (zero-length)" : "", - function.name.c_str()); + function.name.str().c_str()); } void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line& line) { diff --git a/src/common/module.cc b/src/common/module.cc index 8b65bd07..3945e2dd 100644 --- a/src/common/module.cc +++ b/src/common/module.cc @@ -32,6 +32,7 @@ // module.cc: Implement google_breakpad::Module. See module.h. #include "common/module.h" +#include "common/string_view.h" #include #include @@ -51,7 +52,7 @@ using std::unique_ptr; Module::InlineOrigin* Module::InlineOriginMap::GetOrCreateInlineOrigin( uint64_t offset, - const string& name) { + StringView name) { uint64_t specification_offset = references_[offset]; // Find the root offset. auto iter = references_.find(specification_offset); diff --git a/src/common/module.h b/src/common/module.h index 7ee6b37e..c5e0abfc 100644 --- a/src/common/module.h +++ b/src/common/module.h @@ -46,7 +46,9 @@ #include #include +#include "common/string_view.h" #include "common/symbol_data.h" +#include "common/unordered.h" #include "common/using_std_string.h" #include "google_breakpad/common/breakpad_types.h" @@ -101,7 +103,7 @@ class Module { // A function. struct Function { - Function(const string& name_input, const Address& address_input) : + Function(StringView name_input, const Address& address_input) : name(name_input), address(address_input), parameter_size(0) {} // For sorting by address. (Not style-guide compliant, but it's @@ -111,7 +113,7 @@ class Module { } // The function's name. - string name; + StringView name; // The start address and the address ranges covered by the function. const Address address; @@ -129,15 +131,14 @@ class Module { }; struct InlineOrigin { - explicit InlineOrigin(const string& name) - : id(-1), name(name), file(nullptr) {} + explicit InlineOrigin(StringView name): id(-1), name(name), file(nullptr) {} // A unique id for each InlineOrigin object. INLINE records use the id to // refer to its INLINE_ORIGIN record. int id; // The inlined function's name. - string name; + StringView name; File* file; @@ -175,7 +176,7 @@ class Module { class InlineOriginMap { public: // Add INLINE ORIGIN to the module. Return a pointer to origin . - InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, const string& name); + InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, StringView name); // offset is the offset of a DW_TAG_subprogram. specification_offset is the // value of its DW_AT_specification or equals to offset if @@ -382,6 +383,13 @@ class Module { // established by SetLoadAddress. bool Write(std::ostream& stream, SymbolData symbol_data); + // Place the name in the global set of strings. Return a StringView points to + // a string inside the pool. + StringView AddStringToPool(const string& str) { + auto result = common_strings_.insert(str); + return *(result.first); + } + string name() const { return name_; } string os() const { return os_; } string architecture() const { return architecture_; } @@ -443,6 +451,8 @@ class Module { // The module owns all the externs that have been added to it; // destroying the module frees the Externs these point to. ExternSet externs_; + + unordered_set common_strings_; }; } // namespace google_breakpad diff --git a/src/common/stabs_to_module.cc b/src/common/stabs_to_module.cc index dd61d4d4..cbddd33d 100644 --- a/src/common/stabs_to_module.cc +++ b/src/common/stabs_to_module.cc @@ -90,7 +90,8 @@ bool StabsToModule::EndCompilationUnit(uint64_t address) { bool StabsToModule::StartFunction(const string& name, uint64_t address) { assert(!current_function_); - Module::Function *f = new Module::Function(Demangle(name), address); + Module::Function* f = + new Module::Function(module_->AddStringToPool(Demangle(name)), address); Module::Range r(address, 0); // We compute this in StabsToModule::Finalize(). f->ranges.push_back(r); f->parameter_size = 0; // We don't provide this information. diff --git a/src/common/string_view.h b/src/common/string_view.h new file mode 100644 index 00000000..3c2fc744 --- /dev/null +++ b/src/common/string_view.h @@ -0,0 +1,113 @@ +// Copyright (c) 2021 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef COMMON_STRING_VIEW_H__ +#define COMMON_STRING_VIEW_H__ + +#include +#include +#include +#include "common/using_std_string.h" + +namespace google_breakpad { + +// A StringView is a string reference to a string object, but not own the +// string object. +class StringView { + private: + // The start of the string, in an external buffer. It doesn't have to be + // null-terminated. + const char* data_ = ""; + + size_t length_ = 0; + + public: + // Construct an empty StringView. + StringView() = default; + + // Disallow construct StringView from nullptr. + StringView(nullptr_t) = delete; + + // Construct a StringView from a cstring. + StringView(const char* str) : data_(str) { + assert(str); + length_ = strlen(str); + } + + // Construct a StringView from a cstring with fixed length. + StringView(const char* str, size_t length) : data_(str), length_(length) { + assert(str); + } + + // Construct a StringView from an std::string. + StringView(const string& str) : data_(str.data()), length_(str.length()) {} + + string str() const { return string(data_, length_); } + + const char* data() const { return data_; } + + bool empty() const { return length_ == 0; } + + size_t size() const { return length_; } + + int compare(StringView rhs) const { + size_t min_len = std::min(size(), rhs.size()); + int res = memcmp(data_, rhs.data(), min_len); + if (res != 0) + return res; + if (size() == rhs.size()) + return 0; + return size() < rhs.size() ? -1 : 1; + } +}; + +inline bool operator==(StringView lhs, StringView rhs) { + return lhs.compare(rhs) == 0; +} + +inline bool operator!=(StringView lhs, StringView rhs) { + return lhs.compare(rhs) != 0; +} + +inline bool operator<(StringView lhs, StringView rhs) { + return lhs.compare(rhs) < 0; +} + +inline bool operator>(StringView lhs, StringView rhs) { + return lhs.compare(rhs) > 0; +} + +inline std::ostream& operator<<(std::ostream& os, StringView s) { + os << s.str(); + return os; +} + +} // namespace google_breakpad + +#endif // COMMON_STRING_VIEW_H__