Add INLINE and INLINE_ORIGIN records on Windows dump_syms

This adds INLINE and INLINE_ORIGIN records on Window dump_syms. It also
adds more LINE records that represents the inner most callsite line info
inside a function.

Bug: chromium:1190878
Change-Id: I15c2044709f8ca831b03a453910d036f749452c6
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3133606
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Joshua Peraza <jperaza@chromium.org>
Reviewed-by: Ivan Penkov <ivanpe@chromium.org>
This commit is contained in:
Zequan Wu 2022-01-04 16:03:22 -08:00 committed by Joshua Peraza
parent 634a7b3fad
commit 10afee3916
3 changed files with 482 additions and 60 deletions

View file

@ -40,6 +40,7 @@
#include <algorithm>
#include <limits>
#include <map>
#include <memory>
#include <set>
#include <utility>
@ -58,6 +59,8 @@ namespace google_breakpad {
namespace {
using std::set;
using std::unique_ptr;
using std::vector;
// The symbol (among possibly many) selected to represent an rva.
@ -208,9 +211,160 @@ void StripLlvmSuffixAndUndecorate(BSTR* name) {
} // namespace
PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) {
PDBSourceLineWriter::Inline::Inline(int inline_nest_level)
: inline_nest_level_(inline_nest_level) {}
void PDBSourceLineWriter::Inline::SetOriginId(int origin_id) {
origin_id_ = origin_id;
}
void PDBSourceLineWriter::Inline::ExtendRanges(const Line& line) {
if (ranges_.empty()) {
ranges_[line.rva] = line.length;
return;
}
auto iter = ranges_.lower_bound(line.rva);
// There is no overlap if this function is called with inlinee lines from
// the same callsite.
if (iter == ranges_.begin()) {
return;
}
if (line.rva + line.length == iter->first) {
// If they are connected, merge their ranges into one.
DWORD length = line.length + iter->second;
ranges_.erase(iter);
ranges_[line.rva] = length;
} else {
--iter;
if (iter->first + iter->second == line.rva) {
ranges_[iter->first] = iter->second + line.length;
} else {
ranges_[line.rva] = line.length;
}
}
}
void PDBSourceLineWriter::Inline::SetCallSiteLine(DWORD call_site_line) {
call_site_line_ = call_site_line;
}
void PDBSourceLineWriter::Inline::SetCallSiteFileId(DWORD call_site_file_id) {
call_site_file_id_ = call_site_file_id;
}
void PDBSourceLineWriter::Inline::SetChildInlines(
vector<unique_ptr<Inline>> child_inlines) {
child_inlines_ = std::move(child_inlines);
}
void PDBSourceLineWriter::Inline::Print(FILE* output) const {
// Ignore INLINE record that doesn't have any range.
if (ranges_.empty())
return;
fprintf(output, "INLINE %d %lu %lu %d", inline_nest_level_, call_site_line_,
call_site_file_id_, origin_id_);
for (const auto& r : ranges_) {
fprintf(output, " %lx %lx", r.first, r.second);
}
fprintf(output, "\n");
for (const unique_ptr<Inline>& in : child_inlines_) {
in->Print(output);
}
}
const PDBSourceLineWriter::Line* PDBSourceLineWriter::Lines::GetLine(
DWORD rva) const {
auto iter = line_map_.find(rva);
if (iter == line_map_.end()) {
// If not found exact rva, check if it's within any range.
iter = line_map_.lower_bound(rva);
if (iter == line_map_.begin())
return nullptr;
--iter;
auto l = iter->second;
// This happens when there is no top level lines cover this rva (e.g. empty
// lines found for the function). Then we don't know the call site line
// number for this inlined function.
if (rva >= l.rva + l.length)
return nullptr;
}
return &iter->second;
}
DWORD PDBSourceLineWriter::Lines::GetLineNum(DWORD rva) const {
const Line* line = GetLine(rva);
return line ? line->line_num : 0;
}
DWORD PDBSourceLineWriter::Lines::GetFileId(DWORD rva) const {
const Line* line = GetLine(rva);
return line ? line->file_id : 0;
}
void PDBSourceLineWriter::Lines::AddLine(const Line& line) {
if (line_map_.empty()) {
line_map_[line.rva] = line;
return;
}
// Given an existing line in line_map_, remove it from line_map_ if it
// overlaps with the line and add a new line for the non-overlap range. Return
// true if there is an overlap.
auto intercept = [&](Line old_line) {
DWORD end = old_line.rva + old_line.length;
// No overlap.
if (old_line.rva >= line.rva + line.length || line.rva >= end)
return false;
// old_line is within the line.
if (old_line.rva >= line.rva && end <= line.rva + line.length) {
line_map_.erase(old_line.rva);
return true;
}
// Then there is a overlap.
if (old_line.rva < line.rva) {
old_line.length -= end - line.rva;
if (end > line.rva + line.length) {
Line new_line = old_line;
new_line.rva = line.rva + line.length;
new_line.length = end - new_line.rva;
line_map_[new_line.rva] = new_line;
}
} else {
line_map_.erase(old_line.rva);
old_line.length -= line.rva + line.length - old_line.rva;
old_line.rva = line.rva + line.length;
}
line_map_[old_line.rva] = old_line;
return true;
};
bool is_intercept;
// Use a loop in cases that there are multiple lines within the given line.
do {
auto iter = line_map_.lower_bound(line.rva);
if (iter == line_map_.end()) {
--iter;
intercept(iter->second);
break;
}
is_intercept = false;
if (iter != line_map_.begin()) {
// Check if the given line overlaps a line with smaller in the map.
auto prev = line_map_.lower_bound(line.rva);
--prev;
is_intercept = intercept(prev->second);
}
// Check if the given line overlaps a line with greater or equal rva in the
// map. Using operator |= here since it's possible that there are multiple
// lines with greater rva in the map overlap with the given line.
is_intercept |= intercept(iter->second);
} while (is_intercept);
line_map_[line.rva] = line;
}
PDBSourceLineWriter::PDBSourceLineWriter(bool handle_inline)
: output_(NULL), handle_inline_(handle_inline) {}
PDBSourceLineWriter::~PDBSourceLineWriter() {
Close();
}
@ -280,50 +434,63 @@ bool PDBSourceLineWriter::Open(const wstring& file, FileFormat format) {
return true;
}
bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers* lines) {
// The line number format is:
// <rva> <line number> <source file id>
bool PDBSourceLineWriter::GetLine(IDiaLineNumber* dia_line, Line* line) const {
if (FAILED(dia_line->get_relativeVirtualAddress(&line->rva))) {
fprintf(stderr, "failed to get line rva\n");
return false;
}
if (FAILED(dia_line->get_length(&line->length))) {
fprintf(stderr, "failed to get line code length\n");
return false;
}
DWORD dia_source_id;
if (FAILED(dia_line->get_sourceFileId(&dia_source_id))) {
fprintf(stderr, "failed to get line source file id\n");
return false;
}
// duplicate file names are coalesced to share one ID
line->file_id = GetRealFileID(dia_source_id);
if (FAILED(dia_line->get_lineNumber(&line->line_num))) {
fprintf(stderr, "failed to get line number\n");
return false;
}
return true;
}
bool PDBSourceLineWriter::GetLines(IDiaEnumLineNumbers* lines,
Lines* line_list) const {
CComPtr<IDiaLineNumber> line;
ULONG count;
while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) {
DWORD rva;
if (FAILED(line->get_relativeVirtualAddress(&rva))) {
fprintf(stderr, "failed to get line rva\n");
Line l;
if (!GetLine(line, &l))
return false;
}
DWORD length;
if (FAILED(line->get_length(&length))) {
fprintf(stderr, "failed to get line code length\n");
return false;
}
DWORD dia_source_id;
if (FAILED(line->get_sourceFileId(&dia_source_id))) {
fprintf(stderr, "failed to get line source file id\n");
return false;
}
// duplicate file names are coalesced to share one ID
DWORD source_id = GetRealFileID(dia_source_id);
DWORD line_num;
if (FAILED(line->get_lineNumber(&line_num))) {
fprintf(stderr, "failed to get line number\n");
return false;
}
AddressRangeVector ranges;
MapAddressRange(image_map_, AddressRange(rva, length), &ranges);
for (size_t i = 0; i < ranges.size(); ++i) {
fprintf(output_, "%lx %lx %lu %lu\n", ranges[i].rva, ranges[i].length,
line_num, source_id);
}
// Silently ignore zero-length lines.
if (l.length != 0)
line_list->AddLine(l);
line.Release();
}
return true;
}
void PDBSourceLineWriter::PrintLines(const Lines& lines) const {
// The line number format is:
// <rva> <line number> <source file id>
for (const auto& kv : lines.GetLineMap()) {
const Line& l = kv.second;
AddressRangeVector ranges;
MapAddressRange(image_map_, AddressRange(l.rva, l.length), &ranges);
for (auto& range : ranges) {
fprintf(output_, "%lx %lx %lu %lu\n", range.rva, range.length, l.line_num,
l.file_id);
}
}
}
bool PDBSourceLineWriter::PrintFunction(IDiaSymbol* function,
IDiaSymbol* block,
bool has_multiple_symbols) {
@ -372,9 +539,20 @@ bool PDBSourceLineWriter::PrintFunction(IDiaSymbol* function,
return false;
}
if (!PrintLines(lines)) {
// Get top level lines first, which later may be split into multiple smaller
// lines if any inline exists in their ranges if we want to handle inline.
Lines line_list;
if (!GetLines(lines, &line_list)) {
return false;
}
if (handle_inline_) {
vector<unique_ptr<Inline>> inlines;
if (!GetInlines(block, &line_list, 0, &inlines)) {
return false;
}
PrintInlines(inlines);
}
PrintLines(line_list);
return true;
}
@ -555,6 +733,97 @@ bool PDBSourceLineWriter::PrintFunctions() {
return true;
}
void PDBSourceLineWriter::PrintInlineOrigins() const {
struct OriginCompare {
bool operator()(const InlineOrigin lhs, const InlineOrigin rhs) const {
return lhs.id < rhs.id;
}
};
set<InlineOrigin, OriginCompare> origins;
// Sort by origin id.
for (auto const& origin : inline_origins_)
origins.insert(origin.second);
for (auto o : origins) {
fprintf(output_, "INLINE_ORIGIN %d %ls\n", o.id, o.name.c_str());
}
}
bool PDBSourceLineWriter::GetInlines(IDiaSymbol* block,
Lines* line_list,
int inline_nest_level,
vector<unique_ptr<Inline>>* inlines) {
CComPtr<IDiaEnumSymbols> inline_callsites;
if (FAILED(block->findChildrenEx(SymTagInlineSite, nullptr, nsNone,
&inline_callsites))) {
return false;
}
ULONG count;
CComPtr<IDiaSymbol> callsite;
while (SUCCEEDED(inline_callsites->Next(1, &callsite, &count)) &&
count == 1) {
unique_ptr<Inline> new_inline(new Inline(inline_nest_level));
CComPtr<IDiaEnumLineNumbers> lines;
// All inlinee lines have the same file id.
DWORD file_id = 0;
DWORD call_site_line = 0;
if (FAILED(session_->findInlineeLines(callsite, &lines))) {
return false;
}
CComPtr<IDiaLineNumber> dia_line;
while (SUCCEEDED(lines->Next(1, &dia_line, &count)) && count == 1) {
Line line;
if (!GetLine(dia_line, &line)) {
return false;
}
// Silently ignore zero-length lines.
if (line.length != 0) {
// Use the first line num and file id at rva as this inline's call site
// line number, because after adding lines it may be changed to inner
// line number and inner file id.
if (call_site_line == 0)
call_site_line = line_list->GetLineNum(line.rva);
if (file_id == 0)
file_id = line_list->GetFileId(line.rva);
line_list->AddLine(line);
new_inline->ExtendRanges(line);
}
dia_line.Release();
}
BSTR name;
callsite->get_name(&name);
if (SysStringLen(name) == 0) {
name = SysAllocString(L"<name omitted>");
}
auto iter = inline_origins_.find(name);
if (iter == inline_origins_.end()) {
InlineOrigin origin;
origin.id = inline_origins_.size();
origin.name = name;
inline_origins_[name] = origin;
}
new_inline->SetOriginId(inline_origins_[name].id);
new_inline->SetCallSiteLine(call_site_line);
new_inline->SetCallSiteFileId(file_id);
// Go to next level.
vector<unique_ptr<Inline>> child_inlines;
if (!GetInlines(callsite, line_list, inline_nest_level + 1,
&child_inlines)) {
return false;
}
new_inline->SetChildInlines(std::move(child_inlines));
inlines->push_back(std::move(new_inline));
callsite.Release();
}
return true;
}
void PDBSourceLineWriter::PrintInlines(
const vector<unique_ptr<Inline>>& inlines) const {
for (const unique_ptr<Inline>& in : inlines) {
in->Print(output_);
}
}
#undef max
bool PDBSourceLineWriter::PrintFrameDataUsingPDB() {
@ -1105,10 +1374,8 @@ bool PDBSourceLineWriter::WriteSymbols(FILE* symbol_file) {
bool ret = PrintPDBInfo();
// This is not a critical piece of the symbol file.
PrintPEInfo();
ret = ret &&
PrintSourceFiles() &&
PrintFunctions() &&
PrintFrameData();
ret = ret && PrintSourceFiles() && PrintFunctions() && PrintFrameData();
PrintInlineOrigins();
output_ = NULL;
return ret;

View file

@ -35,8 +35,10 @@
#include <atlcomcli.h>
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "common/windows/module_info.h"
#include "common/windows/omap.h"
@ -47,6 +49,8 @@ struct IDiaSymbol;
namespace google_breakpad {
using std::map;
using std::vector;
using std::wstring;
using std::unordered_map;
@ -58,7 +62,7 @@ class PDBSourceLineWriter {
ANY_FILE // try PDB_FILE and then EXE_FILE
};
explicit PDBSourceLineWriter();
explicit PDBSourceLineWriter(bool handle_inline);
~PDBSourceLineWriter();
// Opens the given file. For executable files, the corresponding pdb
@ -99,9 +103,110 @@ class PDBSourceLineWriter {
bool UsesGUID(bool *uses_guid);
private:
// Outputs the line/address pairs for each line in the enumerator.
// InlineOrigin represents INLINE_ORIGIN record in a symbol file. It's an
// inlined function.
struct InlineOrigin {
// The unique id for an InlineOrigin.
int id;
// The name of the inlined function.
wstring name;
};
// Line represents LINE record in a symbol file. It represents a source code
// line.
struct Line {
// The relative address of a line.
DWORD rva;
// The number bytes this line has.
DWORD length;
// The source line number.
DWORD line_num;
// The source file id where the source line is located at.
DWORD file_id;
};
// Inline represents INLINE record in a symbol file.
class Inline {
public:
explicit Inline(int inline_nest_level);
void SetOriginId(int origin_id);
// Adding inlinee line's range into ranges. If line is adjacent with any
// existing lines, extend the range. Otherwise, add line as a new range.
void ExtendRanges(const Line& line);
void SetCallSiteLine(DWORD call_site_line);
void SetCallSiteFileId(DWORD call_site_file_id);
void SetChildInlines(std::vector<std::unique_ptr<Inline>> child_inlines);
void Print(FILE* output) const;
private:
// The nest level of this inline record.
int inline_nest_level_;
// The source line number at where this inlined function is called.
DWORD call_site_line_ = 0;
// The call site file id at where this inlined function is called.
DWORD call_site_file_id_ = 0;
// The id used for referring to an InlineOrigin.
int origin_id_ = 0;
// A map from rva to length. This is the address ranges covered by this
// Inline.
map<DWORD, DWORD> ranges_;
// The list of direct Inlines inlined inside this Inline.
vector<std::unique_ptr<Inline>> child_inlines_;
};
// Lines represents a map of lines inside a function with rva as the key.
// AddLine function adds a line into the map and ensures that there is no
// overlap between any two lines in the map.
class Lines {
public:
const map<DWORD, Line>& GetLineMap() const { return line_map_; }
// Finds the line from line_map_ that contains the given rva returns its
// line_num. If not found, return 0.
DWORD GetLineNum(DWORD rva) const;
// Finds the line from line_map_ that contains the given rva returns its
// file_id. If not found, return 0.
DWORD GetFileId(DWORD rva) const;
// Add the `line` into line_map_. If the `line` overlaps with existing
// lines, truncate the existing lines and add the given line. It ensures
// that all lines in line_map_ do not overlap with each other. For example,
// suppose there is a line A in the map and we call AddLine with Line B.
// Line A: rva: 100, length: 20, line_num: 10, file_id: 1
// Line B: rva: 105, length: 10, line_num: 4, file_id: 2
// After calling AddLine with Line B, we will have the following lines:
// Line 1: rva: 100, length: 5, line_num: 10, file_id: 1
// Line 2: rva: 105, length: 10, line_num: 4, file_id: 2
// Line 3: rva: 115, length: 5, line_num: 10, file_id: 1
void AddLine(const Line& line);
private:
// Finds the line from line_map_ that contains the given rva. If not found,
// return nullptr.
const Line* GetLine(DWORD rva) const;
// The key is rva. AddLine function ensures that any two lines in the map do
// not overlap.
map<DWORD, Line> line_map_;
};
// Construct Line from IDiaLineNumber. The output Line is stored at line.
// Return true on success.
bool GetLine(IDiaLineNumber* dia_line, Line* line) const;
// Construct Lines from IDiaEnumLineNumbers. The list of Lines are stored at
// line_list.
// Returns true on success.
bool PrintLines(IDiaEnumLineNumbers *lines);
bool GetLines(IDiaEnumLineNumbers* lines, Lines* line_list) const;
// Outputs the line/address pairs for each line in the enumerator.
void PrintLines(const Lines& lines) const;
// Outputs a function address and name, followed by its source line list.
// block can be the same object as function, or it can be a reference to a
@ -118,6 +223,25 @@ class PDBSourceLineWriter {
// Returns true on success.
bool PrintSourceFiles();
// Output all inline origins.
void PrintInlineOrigins() const;
// Retrieve inlines inside the given block. It also adds inlinee lines to
// `line_list` since inner lines are more precise source location. If the
// block has children wih SymTagInlineSite Tag, it will recursively (DFS) call
// itself with each child as first argument. Returns true on success.
// `block`: the IDiaSymbol that may have inline sites.
// `line_list`: the list of lines inside current function.
// `inline_nest_level`: the nest level of block's Inlines.
// `inlines`: the vector to store the list of inlines for the block.
bool GetInlines(IDiaSymbol* block,
Lines* line_list,
int inline_nest_level,
vector<std::unique_ptr<Inline>>* inlines);
// Outputs all inlines.
void PrintInlines(const vector<std::unique_ptr<Inline>>& inlines) const;
// Outputs all of the frame information necessary to construct stack
// backtraces in the absence of frame pointers. For x86 data stored in
// .pdb files. Returns true on success.
@ -172,8 +296,8 @@ class PDBSourceLineWriter {
// reference it. There may be multiple files with identical filenames
// but different unique IDs. The cache attempts to coalesce these into
// one ID per unique filename.
DWORD GetRealFileID(DWORD id) {
unordered_map<DWORD, DWORD>::iterator iter = file_ids_.find(id);
DWORD GetRealFileID(DWORD id) const {
unordered_map<DWORD, DWORD>::const_iterator iter = file_ids_.find(id);
if (iter == file_ids_.end())
return id;
return iter->second;
@ -213,9 +337,15 @@ class PDBSourceLineWriter {
// This maps unique filenames to file IDs.
unordered_map<wstring, DWORD> unique_files_;
// The INLINE_ORIGINS records. The key is the function name.
std::map<wstring, InlineOrigin> inline_origins_;
// This is used for calculating post-transform symbol addresses and lengths.
ImageMap image_map_;
// If we should output INLINE/INLINE_ORIGIN records
bool handle_inline_;
// Disallow copy ctor and operator=
PDBSourceLineWriter(const PDBSourceLineWriter&);
void operator=(const PDBSourceLineWriter&);

View file

@ -38,30 +38,55 @@
#include "common/windows/pdb_source_line_writer.h"
#include "common/windows/pe_source_line_writer.h"
using std::wstring;
using google_breakpad::PDBSourceLineWriter;
using google_breakpad::PESourceLineWriter;
using std::unique_ptr;
using std::wstring;
int usage(const wchar_t* self) {
fprintf(stderr, "Usage: %ws [--pe] [--i] <file.[pdb|exe|dll]>\n", self);
fprintf(stderr, "Options:\n");
fprintf(stderr,
"--pe:\tRead debugging information from PE file and do "
"not attempt to locate matching PDB file.\n"
"\tThis is only supported for PE32+ (64 bit) PE files.\n");
fprintf(stderr,
"--i:\tOutput INLINE/INLINE_ORIGIN record\n"
"\tThis cannot be used with [--pe].\n");
return 1;
}
int wmain(int argc, wchar_t** argv) {
bool success;
if (argc == 2) {
PDBSourceLineWriter pdb_writer;
if (!pdb_writer.Open(wstring(argv[1]), PDBSourceLineWriter::ANY_FILE)) {
bool success = false;
bool pe = false;
bool handle_inline = false;
int arg_index = 1;
while (arg_index < argc && wcslen(argv[arg_index]) > 0 &&
wcsncmp(L"--", argv[arg_index], 2) == 0) {
if (wcscmp(L"--pe", argv[arg_index]) == 0) {
pe = true;
} else if (wcscmp(L"--i", argv[arg_index]) == 0) {
handle_inline = true;
}
++arg_index;
}
if ((pe && handle_inline) || arg_index == argc) {
usage(argv[0]);
return 1;
}
wchar_t* file_path = argv[arg_index];
if (pe) {
PESourceLineWriter pe_writer(file_path);
success = pe_writer.WriteSymbols(stdout);
} else {
PDBSourceLineWriter pdb_writer(handle_inline);
if (!pdb_writer.Open(wstring(file_path), PDBSourceLineWriter::ANY_FILE)) {
fprintf(stderr, "Open failed.\n");
return 1;
}
success = pdb_writer.WriteSymbols(stdout);
} else if (argc == 3 && wcscmp(argv[1], L"--pe") == 0) {
PESourceLineWriter pe_writer(argv[2]);
success = pe_writer.WriteSymbols(stdout);
} else {
fprintf(stderr, "Usage: %ws [--pe] <file.[pdb|exe|dll]>\n", argv[0]);
fprintf(stderr, "Options:\n");
fprintf(stderr, "--pe:\tRead debugging information from PE file and do "
"not attempt to locate matching PDB file.\n"
"\tThis is only supported for PE32+ (64 bit) PE files.\n");
return 1;
}
if (!success) {