Add a string pool to store functions names

- Added StringView which is used as a reference to a string, but
doesn't own the string.
- Removed the old string pool in DwarfCUToModule::FilePrivate, since
it's doing string copy.
- Added a string pool in Module to store functions/inline origins'
names (mangled and demangled).
- The peak memory usage drops from 20.6 GB to 12.5 GB when disabling
inline records and drops from 36 GB to 20.3 GB when enabling inline records.

Bug: chromium:1246974, chromium:1250351
Change-Id: Ie7e9740ea10c1930a0fc58c6becaae2d718b83b8
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3189410
Reviewed-by: Joshua Peraza <jperaza@chromium.org>
This commit is contained in:
Zequan Wu 2021-09-30 12:49:44 -07:00 committed by Joshua Peraza
parent d4bf038be7
commit ff5892c5da
5 changed files with 188 additions and 91 deletions

View file

@ -48,8 +48,8 @@
#include <numeric>
#include <utility>
#include "common/string_view.h"
#include "common/dwarf_line_to_module.h"
#include "common/unordered.h"
#include "google_breakpad/common/breakpad_types.h"
namespace google_breakpad {
@ -80,22 +80,21 @@ using std::unique_ptr;
// we may need if we find a DW_AT_specification link pointing to it.
struct DwarfCUToModule::Specification {
// The qualified name that can be found by demangling DW_AT_MIPS_linkage_name.
string qualified_name;
StringView qualified_name;
// The name of the enclosing scope, or the empty string if there is none.
string enclosing_name;
StringView enclosing_name;
// The name for the specification DIE itself, without any enclosing
// name components.
string unqualified_name;
StringView unqualified_name;
};
// An abstract origin -- base definition of an inline function.
struct AbstractOrigin {
AbstractOrigin() : name() {}
explicit AbstractOrigin(const string& name) : name(name) {}
explicit AbstractOrigin(StringView name) : name(name) {}
string name;
StringView name;
};
typedef map<uint64_t, AbstractOrigin> AbstractOriginByOffset;
@ -103,25 +102,6 @@ typedef map<uint64_t, AbstractOrigin> AbstractOriginByOffset;
// Data global to the DWARF-bearing file that is private to the
// DWARF-to-Module process.
struct DwarfCUToModule::FilePrivate {
// A set of strings used in this CU. Before storing a string in one of
// our data structures, insert it into this set, and then use the string
// from the set.
//
// In some STL implementations, strings are reference-counted internally,
// meaning that simply using strings from this set, even if passed by
// value, assigned, or held directly in structures and containers
// (map<string, ...>, for example), causes those strings to share a
// single instance of each distinct piece of text. GNU's libstdc++ uses
// reference counts, and I believe MSVC did as well, at some point.
// However, C++ '11 implementations are moving away from reference
// counting.
//
// In other implementations, string assignments copy the string's text,
// so this set will actually hold yet another copy of the string (although
// everything will still work). To improve memory consumption portably,
// we will probably need to use pointers to strings held in this set.
unordered_set<string> common_strings;
// A map from offsets of DIEs within the .debug_info section to
// Specifications describing those DIEs. Specification references can
// cross compilation unit boundaries.
@ -293,7 +273,7 @@ struct DwarfCUToModule::DIEContext {
// in a C++ compilation unit, the DIEContext's name for the
// DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's
// name for the DW_TAG_namespace DIE would be "".
string name;
StringView name;
};
// An abstract base class for all the dumper's DIE handlers.
@ -339,20 +319,12 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler {
// Use this from EndAttributes member functions, not ProcessAttribute*
// functions; only the former can be sure that all the DIE's attributes
// have been seen.
string ComputeQualifiedName();
StringView ComputeQualifiedName();
CUContext* cu_context_;
DIEContext* parent_context_;
uint64_t offset_;
// Place the name in the global set of strings. Even though this looks
// like a copy, all the major string implementations use reference
// counting internally, so the effect is to have all the data structures
// share copies of strings whenever possible.
// FIXME: Should this return something like a string_ref to avoid the
// assumption about how strings are implemented?
string AddStringToPool(const string& str);
// If this DIE has a DW_AT_declaration attribute, this is its value.
// It is false on DIEs with no DW_AT_declaration attribute.
bool declaration_;
@ -377,16 +349,16 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler {
// The value of the DW_AT_name attribute, or the empty string if the
// DIE has no such attribute.
string name_attribute_;
StringView name_attribute_;
// The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty
// string if the DIE has no such attribute or its content could not be
// demangled.
string demangled_name_;
StringView demangled_name_;
// The non-demangled value of the DW_AT_MIPS_linkage_name attribute,
// it its content count not be demangled.
string raw_name_;
StringView raw_name_;
};
void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned(
@ -445,19 +417,14 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference(
}
}
string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string& str) {
pair<unordered_set<string>::iterator, bool> result =
cu_context_->file_context->file_private_->common_strings.insert(str);
return *result.first;
}
void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
enum DwarfAttribute attr,
enum DwarfForm form,
const string& data) {
switch (attr) {
case DW_AT_name:
name_attribute_ = AddStringToPool(data);
name_attribute_ =
cu_context_->file_context->module_->AddStringToPool(data);
break;
case DW_AT_MIPS_linkage_name:
case DW_AT_linkage_name: {
@ -466,15 +433,16 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
cu_context_->language->DemangleName(data, &demangled);
switch (result) {
case Language::kDemangleSuccess:
demangled_name_ = AddStringToPool(demangled);
demangled_name_ =
cu_context_->file_context->module_->AddStringToPool(demangled);
break;
case Language::kDemangleFailure:
cu_context_->reporter->DemangleError(data);
// fallthrough
case Language::kDontDemangle:
demangled_name_.clear();
raw_name_ = AddStringToPool(data);
demangled_name_ = StringView();
raw_name_ = cu_context_->file_context->module_->AddStringToPool(data);
break;
}
break;
@ -483,11 +451,11 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
}
}
string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
// Use the demangled name, if one is available. Demangled names are
// preferable to those inferred from the DWARF structure because they
// include argument types.
const string* qualified_name = NULL;
StringView* qualified_name = nullptr;
if (!demangled_name_.empty()) {
// Found it is this DIE.
qualified_name = &demangled_name_;
@ -496,37 +464,39 @@ string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
qualified_name = &specification_->qualified_name;
}
const string* unqualified_name = NULL;
const string* enclosing_name;
StringView* unqualified_name = nullptr;
StringView* enclosing_name = nullptr;
if (!qualified_name) {
// Find the unqualified name. If the DIE has its own DW_AT_name
// attribute, then use that; otherwise, check the specification.
if (!name_attribute_.empty())
if (!name_attribute_.empty()) {
unqualified_name = &name_attribute_;
else if (specification_)
} else if (specification_) {
unqualified_name = &specification_->unqualified_name;
else if (!raw_name_.empty())
} else if (!raw_name_.empty()) {
unqualified_name = &raw_name_;
}
// Find the name of the enclosing context. If this DIE has a
// specification, it's the specification's enclosing context that
// counts; otherwise, use this DIE's context.
if (specification_)
if (specification_) {
enclosing_name = &specification_->enclosing_name;
else
} else {
enclosing_name = &parent_context_->name;
}
}
// Prepare the return value before upcoming mutations possibly invalidate the
// existing pointers.
string return_value;
if (qualified_name) {
return_value = *qualified_name;
return_value = qualified_name->str();
} else if (unqualified_name && enclosing_name) {
// Combine the enclosing name and unqualified name to produce our
// own fully-qualified name.
return_value = cu_context_->language->MakeQualifiedName(*enclosing_name,
*unqualified_name);
return_value = cu_context_->language->MakeQualifiedName(
enclosing_name->str(), unqualified_name->str());
}
// If this DIE was marked as a declaration, record its names in the
@ -543,7 +513,7 @@ string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
cu_context_->file_context->file_private_->specifications[offset_] = spec;
}
return return_value;
return cu_context_->file_context->module_->AddStringToPool(return_value);
}
static bool IsEmptyRange(const vector<Module::Range>& ranges) {
@ -585,7 +555,7 @@ class DwarfCUToModule::InlineHandler : public GenericDIEHandler {
private:
// The fully-qualified name, as derived from name_attribute_,
// specification_, parent_context_. Computed in EndAttributes.
string name_;
StringView name_;
uint64_t low_pc_; // DW_AT_low_pc
uint64_t high_pc_; // DW_AT_high_pc
DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address.
@ -644,7 +614,8 @@ bool DwarfCUToModule::InlineHandler::EndAttributes() {
// We haven't seen the abstract origin yet, which might appears later and we
// will fix the name after calling
// InlineOriginMap::GetOrCreateInlineOrigin with right name.
name_ = "<name omitted>";
name_ =
cu_context_->file_context->module_->AddStringToPool("<name omitted>");
}
return true;
}
@ -698,12 +669,18 @@ void DwarfCUToModule::InlineHandler::Finish() {
// A handler class for DW_TAG_subprogram DIEs.
class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
public:
FuncHandler(CUContext* cu_context, DIEContext* parent_context,
uint64_t offset, bool handle_inline)
FuncHandler(CUContext* cu_context,
DIEContext* parent_context,
uint64_t offset,
bool handle_inline)
: GenericDIEHandler(cu_context, parent_context, offset),
low_pc_(0), high_pc_(0), high_pc_form_(DW_FORM_addr),
ranges_form_(DW_FORM_sec_offset), ranges_data_(0),
decl_file_data_(UINT64_MAX), inline_(false),
low_pc_(0),
high_pc_(0),
high_pc_form_(DW_FORM_addr),
ranges_form_(DW_FORM_sec_offset),
ranges_data_(0),
decl_file_data_(UINT64_MAX),
inline_(false),
handle_inline_(handle_inline) {}
void ProcessAttributeUnsigned(enum DwarfAttribute attr,
@ -719,7 +696,7 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
private:
// The fully-qualified name, as derived from name_attribute_,
// specification_, parent_context_. Computed in EndAttributes.
string name_;
StringView name_;
uint64_t low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc
DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address.
DwarfForm ranges_form_; // DW_FORM_sec_offset or DW_FORM_rnglistx
@ -841,6 +818,8 @@ void DwarfCUToModule::FuncHandler::Finish() {
}
}
StringView name_omitted =
cu_context_->file_context->module_->AddStringToPool("<name omitted>");
bool empty_range = IsEmptyRange(ranges);
// Did we collect the information we need? Not all DWARF function
// entries are non-empty (for example, inlined functions that were never
@ -848,16 +827,9 @@ void DwarfCUToModule::FuncHandler::Finish() {
// bytes.
if (!empty_range) {
low_pc_ = ranges.front().address;
// Malformed DWARF may omit the name, but all Module::Functions must
// have names.
string name;
if (!name_.empty()) {
name = name_;
} else {
name = "<name omitted>";
}
StringView name = name_.empty() ? name_omitted : name_;
// Create a Module::Function based on the data we've gathered, and
// add it to the functions_ list.
scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_));
@ -880,20 +852,20 @@ void DwarfCUToModule::FuncHandler::Finish() {
}
} else if (inline_) {
AbstractOrigin origin(name_);
cu_context_->file_context->file_private_->origins[offset_] = origin;
cu_context_->file_context->file_private_->origins.insert({offset_, origin});
}
// Only keep track of DW_TAG_subprogram which have the attributes we are
// interested.
if (handle_inline_ &&
(!empty_range || inline_ || decl_file_data_ != UINT64_MAX)) {
StringView name = name_.empty() ? name_omitted : name_;
uint64_t offset =
specification_offset_ != 0 ? specification_offset_ : offset_;
cu_context_->file_context->module_->inline_origin_map.SetReference(offset_,
offset);
cu_context_->file_context->module_->inline_origin_map
.GetOrCreateInlineOrigin(offset_,
name_.empty() ? "<name omitted>" : name_);
.GetOrCreateInlineOrigin(offset_, name);
if (decl_file_data_ != UINT64_MAX)
cu_context_->inline_origins[decl_file_data_].push_back(offset_);
}
@ -993,7 +965,7 @@ void DwarfCUToModule::WarningReporter::UncoveredFunction(
UncoveredHeading();
fprintf(stderr, " function%s: %s\n",
IsEmptyRange(function.ranges) ? " (zero-length)" : "",
function.name.c_str());
function.name.str().c_str());
}
void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line& line) {

View file

@ -32,6 +32,7 @@
// module.cc: Implement google_breakpad::Module. See module.h.
#include "common/module.h"
#include "common/string_view.h"
#include <assert.h>
#include <errno.h>
@ -51,7 +52,7 @@ using std::unique_ptr;
Module::InlineOrigin* Module::InlineOriginMap::GetOrCreateInlineOrigin(
uint64_t offset,
const string& name) {
StringView name) {
uint64_t specification_offset = references_[offset];
// Find the root offset.
auto iter = references_.find(specification_offset);

View file

@ -46,7 +46,9 @@
#include <string>
#include <vector>
#include "common/string_view.h"
#include "common/symbol_data.h"
#include "common/unordered.h"
#include "common/using_std_string.h"
#include "google_breakpad/common/breakpad_types.h"
@ -101,7 +103,7 @@ class Module {
// A function.
struct Function {
Function(const string& name_input, const Address& address_input) :
Function(StringView name_input, const Address& address_input) :
name(name_input), address(address_input), parameter_size(0) {}
// For sorting by address. (Not style-guide compliant, but it's
@ -111,7 +113,7 @@ class Module {
}
// The function's name.
string name;
StringView name;
// The start address and the address ranges covered by the function.
const Address address;
@ -129,15 +131,14 @@ class Module {
};
struct InlineOrigin {
explicit InlineOrigin(const string& name)
: id(-1), name(name), file(nullptr) {}
explicit InlineOrigin(StringView name): id(-1), name(name), file(nullptr) {}
// A unique id for each InlineOrigin object. INLINE records use the id to
// refer to its INLINE_ORIGIN record.
int id;
// The inlined function's name.
string name;
StringView name;
File* file;
@ -175,7 +176,7 @@ class Module {
class InlineOriginMap {
public:
// Add INLINE ORIGIN to the module. Return a pointer to origin .
InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, const string& name);
InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, StringView name);
// offset is the offset of a DW_TAG_subprogram. specification_offset is the
// value of its DW_AT_specification or equals to offset if
@ -382,6 +383,13 @@ class Module {
// established by SetLoadAddress.
bool Write(std::ostream& stream, SymbolData symbol_data);
// Place the name in the global set of strings. Return a StringView points to
// a string inside the pool.
StringView AddStringToPool(const string& str) {
auto result = common_strings_.insert(str);
return *(result.first);
}
string name() const { return name_; }
string os() const { return os_; }
string architecture() const { return architecture_; }
@ -443,6 +451,8 @@ class Module {
// The module owns all the externs that have been added to it;
// destroying the module frees the Externs these point to.
ExternSet externs_;
unordered_set<string> common_strings_;
};
} // namespace google_breakpad

View file

@ -90,7 +90,8 @@ bool StabsToModule::EndCompilationUnit(uint64_t address) {
bool StabsToModule::StartFunction(const string& name,
uint64_t address) {
assert(!current_function_);
Module::Function *f = new Module::Function(Demangle(name), address);
Module::Function* f =
new Module::Function(module_->AddStringToPool(Demangle(name)), address);
Module::Range r(address, 0); // We compute this in StabsToModule::Finalize().
f->ranges.push_back(r);
f->parameter_size = 0; // We don't provide this information.

113
src/common/string_view.h Normal file
View file

@ -0,0 +1,113 @@
// Copyright (c) 2021 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef COMMON_STRING_VIEW_H__
#define COMMON_STRING_VIEW_H__
#include <cassert>
#include <cstring>
#include <ostream>
#include "common/using_std_string.h"
namespace google_breakpad {
// A StringView is a string reference to a string object, but not own the
// string object.
class StringView {
private:
// The start of the string, in an external buffer. It doesn't have to be
// null-terminated.
const char* data_ = "";
size_t length_ = 0;
public:
// Construct an empty StringView.
StringView() = default;
// Disallow construct StringView from nullptr.
StringView(nullptr_t) = delete;
// Construct a StringView from a cstring.
StringView(const char* str) : data_(str) {
assert(str);
length_ = strlen(str);
}
// Construct a StringView from a cstring with fixed length.
StringView(const char* str, size_t length) : data_(str), length_(length) {
assert(str);
}
// Construct a StringView from an std::string.
StringView(const string& str) : data_(str.data()), length_(str.length()) {}
string str() const { return string(data_, length_); }
const char* data() const { return data_; }
bool empty() const { return length_ == 0; }
size_t size() const { return length_; }
int compare(StringView rhs) const {
size_t min_len = std::min(size(), rhs.size());
int res = memcmp(data_, rhs.data(), min_len);
if (res != 0)
return res;
if (size() == rhs.size())
return 0;
return size() < rhs.size() ? -1 : 1;
}
};
inline bool operator==(StringView lhs, StringView rhs) {
return lhs.compare(rhs) == 0;
}
inline bool operator!=(StringView lhs, StringView rhs) {
return lhs.compare(rhs) != 0;
}
inline bool operator<(StringView lhs, StringView rhs) {
return lhs.compare(rhs) < 0;
}
inline bool operator>(StringView lhs, StringView rhs) {
return lhs.compare(rhs) > 0;
}
inline std::ostream& operator<<(std::ostream& os, StringView s) {
os << s.str();
return os;
}
} // namespace google_breakpad
#endif // COMMON_STRING_VIEW_H__