Breakpad Linux symbol dumper: Handle programs linked with --gc-sections.

As explained in the code:

Given the right options, the GNU toolchain will omit unreferenced
functions from the final executable. Unfortunately, when it does so,
it does not remove the associated portions of the line number program;
instead, it lets the symbol references in the DW_LNE_set_address
instructions pointing to the now-deleted code resolve to zero. Given
this input, the DWARF line parser will call AddLine with a series of
lines starting at address zero.

Rather than collecting series of lines describing code that is not
there, we should drop them. Since the linker doesn't explicitly
distinguish references to dropped sections from genuine references to
zero, we must use a heuristic. We have chosen:

 - If a line starts at address zero, omit it. (On the platforms
   breakpad targets, it is extremely unlikely that there will be code
   at address zero.)

 - If a line starts immediately after an omitted line, omit it too.

a=jimblandy, r=nealsid


git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@538 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
jimblandy 2010-02-24 22:36:20 +00:00
parent 5cf2e760b6
commit 841ad48a37
3 changed files with 142 additions and 0 deletions

View file

@ -102,6 +102,14 @@ void DwarfLineToModule::AddLine(uint64 address, uint64 length,
if (address + length < address)
length = -address;
// Should we omit this line? (See the comments for omitted_line_end_.)
if (address == 0 || address == omitted_line_end_) {
omitted_line_end_ = address + length;
return;
} else {
omitted_line_end_ = 0;
}
// Find the source file being referred to.
Module::File *file = files_[file_num];
if (!file) {

View file

@ -52,6 +52,62 @@ namespace google_breakpad {
// uses it to produce a vector of google_breakpad::Module::Line
// objects, referring to google_breakpad::Module::File objects added
// to a particular google_breakpad::Module.
//
// GNU toolchain omitted sections support:
// ======================================
//
// Given the right options, the GNU toolchain will omit unreferenced
// functions from the final executable. Unfortunately, when it does so, it
// does not remove the associated portions of the DWARF line number
// program; instead, it gives the DW_LNE_set_address instructions referring
// to the now-deleted code addresses of zero. Given this input, the DWARF
// line parser will call AddLine with a series of lines starting at address
// zero. For example, here is the output from 'readelf -wl' for a program
// with four functions, the first three of which have been omitted:
//
// Line Number Statements:
// Extended opcode 2: set Address to 0x0
// Advance Line by 14 to 15
// Copy
// Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
// Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
// Advance PC by 2 to 0xd
// Extended opcode 1: End of Sequence
//
// Extended opcode 2: set Address to 0x0
// Advance Line by 14 to 15
// Copy
// Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
// Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
// Advance PC by 2 to 0xd
// Extended opcode 1: End of Sequence
//
// Extended opcode 2: set Address to 0x0
// Advance Line by 19 to 20
// Copy
// Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 21
// Special opcode 76: advance Address by 5 to 0x8 and Line by 1 to 22
// Advance PC by 2 to 0xa
// Extended opcode 1: End of Sequence
//
// Extended opcode 2: set Address to 0x80483a4
// Advance Line by 23 to 24
// Copy
// Special opcode 202: advance Address by 14 to 0x80483b2 and Line by 1 to 25
// Special opcode 76: advance Address by 5 to 0x80483b7 and Line by 1 to 26
// Advance PC by 6 to 0x80483bd
// Extended opcode 1: End of Sequence
//
// Instead of collecting runs of lines describing code that is not there,
// we try to recognize and drop them. Since the linker doesn't explicitly
// distinguish references to dropped sections from genuine references to
// code at address zero, we must use a heuristic. We have chosen:
//
// - If a line starts at address zero, omit it. (On the platforms
// breakpad targets, it is extremely unlikely that there will be code
// at address zero.)
//
// - If a line starts immediately after an omitted line, omit it too.
class DwarfLineToModule: public dwarf2reader::LineInfoHandler {
public:
// As the DWARF line info parser passes us line records, add source
@ -65,6 +121,7 @@ class DwarfLineToModule: public dwarf2reader::LineInfoHandler {
: module_(module),
lines_(lines),
highest_file_number_(-1),
omitted_line_end_(0),
warned_bad_file_number_(false),
warned_bad_directory_number_(false) { }
@ -106,6 +163,11 @@ class DwarfLineToModule: public dwarf2reader::LineInfoHandler {
// The highest file number we've seen so far, or -1 if we've seen
// none. Used for dynamically defined file numbers.
int32 highest_file_number_;
// This is the ending address of the last line we omitted, or zero if we
// didn't omit the previous line. It is zero before we have received any
// AddLine calls.
uint64 omitted_line_end_;
// True if we've warned about:
bool warned_bad_file_number_; // bad file numbers

View file

@ -265,3 +265,75 @@ TEST(Errors, BigLine) {
ASSERT_EQ(1U, lines.size());
EXPECT_EQ(1U, lines[0].size);
}
// The 'Omitted' tests verify that we correctly omit line information
// for code in sections that the linker has dropped. See "GNU
// toolchain omitted sections support" at the top of the
// DwarfLineToModule class.
TEST(Omitted, DroppedThenGood) {
Module m("name", "os", "architecture", "id");
vector<Module::Line> lines;
DwarfLineToModule h(&m, &lines);
h.DefineFile("filename1", 1, 0, 0, 0);
h.AddLine(0, 10, 1, 83816211, 0); // should be omitted
h.AddLine(20, 10, 1, 13059195, 0); // should be recorded
ASSERT_EQ(1U, lines.size());
EXPECT_EQ(13059195, lines[0].number);
}
TEST(Omitted, GoodThenDropped) {
Module m("name", "os", "architecture", "id");
vector<Module::Line> lines;
DwarfLineToModule h(&m, &lines);
h.DefineFile("filename1", 1, 0, 0, 0);
h.AddLine(0x9dd6a372, 10, 1, 41454594, 0); // should be recorded
h.AddLine(0, 10, 1, 44793413, 0); // should be omitted
ASSERT_EQ(1U, lines.size());
EXPECT_EQ(41454594, lines[0].number);
}
TEST(Omitted, Mix1) {
Module m("name", "os", "architecture", "id");
vector<Module::Line> lines;
DwarfLineToModule h(&m, &lines);
h.DefineFile("filename1", 1, 0, 0, 0);
h.AddLine(0x679ed72f, 10, 1, 58932642, 0); // should be recorded
h.AddLine(0xdfb5a72d, 10, 1, 39847385, 0); // should be recorded
h.AddLine(0, 0x78, 1, 23053829, 0); // should be omitted
h.AddLine(0x78, 0x6a, 1, 65317783, 0); // should be omitted
h.AddLine(0x78 + 0x6a, 0x2a, 1, 77601423, 0); // should be omitted
h.AddLine(0x9fe0cea5, 10, 1, 91806582, 0); // should be recorded
h.AddLine(0x7e41a109, 10, 1, 56169221, 0); // should be recorded
ASSERT_EQ(4U, lines.size());
EXPECT_EQ(58932642, lines[0].number);
EXPECT_EQ(39847385, lines[1].number);
EXPECT_EQ(91806582, lines[2].number);
EXPECT_EQ(56169221, lines[3].number);
}
TEST(Omitted, Mix2) {
Module m("name", "os", "architecture", "id");
vector<Module::Line> lines;
DwarfLineToModule h(&m, &lines);
h.DefineFile("filename1", 1, 0, 0, 0);
h.AddLine(0, 0xf2, 1, 58802211, 0); // should be omitted
h.AddLine(0xf2, 0xb9, 1, 78958222, 0); // should be omitted
h.AddLine(0xf2 + 0xb9, 0xf7, 1, 64861892, 0); // should be omitted
h.AddLine(0x4e4d271e, 9, 1, 67355743, 0); // should be recorded
h.AddLine(0xdfb5a72d, 30, 1, 23365776, 0); // should be recorded
h.AddLine(0, 0x64, 1, 76196762, 0); // should be omitted
h.AddLine(0x64, 0x33, 1, 71066611, 0); // should be omitted
h.AddLine(0x64 + 0x33, 0xe3, 1, 61749337, 0); // should be omitted
ASSERT_EQ(2U, lines.size());
EXPECT_EQ(67355743, lines[0].number);
EXPECT_EQ(23365776, lines[1].number);
}