Dwarf5 fixes [2 of 5]: Handle .debug_str_offsets properly.

Second of 5 small patches to fix various breakpad issues found
while testing dump_syms on DWARF v5 in ChromeOS.

This patch adds code to properly find & parse the
DW_AT_str_offsets_base attribute, and use it to handle strings
of the forms DW_FORM_strx, DW_FORM_strx1..DW_FORM_strx4 This is the
largest of the DWARF5 fixes.  It also includes a unittest to test
using a string offset.

Change-Id: I5d1def862d9d91cae4b2853578441e04ea85449d
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/2634547
Reviewed-by: Sterling Augustine <saugustine@google.com>
This commit is contained in:
Caroline Tice 2021-01-16 15:44:26 -08:00 committed by Sterling Augustine
parent f4115fad24
commit ac9712d9b4
5 changed files with 284 additions and 3 deletions

View file

@ -76,7 +76,7 @@ CompilationUnit::CompilationUnit(const string& path,
addr_buffer_(NULL), addr_buffer_length_(0),
is_split_dwarf_(false), dwo_id_(0), dwo_name_(),
skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0),
have_checked_for_dwp_(false), dwp_path_(),
str_offsets_base_(0), have_checked_for_dwp_(false), dwp_path_(),
dwp_byte_reader_(), dwp_reader_() {}
// Initialize a compilation unit from a .dwo or .dwp file.
@ -454,7 +454,7 @@ void CompilationUnit::ProcessFormStringIndex(
uint64_t dieoffset, enum DwarfAttribute attr, enum DwarfForm form,
uint64_t str_index) {
const uint8_t* offset_ptr =
str_offsets_buffer_ + str_index * reader_->OffsetSize();
str_offsets_buffer_ + str_offsets_base_ + str_index * reader_->OffsetSize();
const uint64_t offset = reader_->ReadOffset(offset_ptr);
if (offset >= string_buffer_length_) {
return;
@ -464,6 +464,161 @@ void CompilationUnit::ProcessFormStringIndex(
ProcessAttributeString(dieoffset, attr, form, str);
}
// Special function for pre-processing the DW_AT_str_offsets_base in a
// DW_TAG_compile_unit die (for DWARF v5). We must make sure to find and
// process the DW_AT_str_offsets_base attribute before attempting to read
// any string attribute in the compile unit.
const uint8_t* CompilationUnit::ProcessStrOffsetBaseAttribute(
uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr,
enum DwarfForm form, uint64_t implicit_const) {
size_t len;
switch (form) {
// DW_FORM_indirect is never used because it is such a space
// waster.
case DW_FORM_indirect:
form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
&len));
start += len;
return ProcessStrOffsetBaseAttribute(dieoffset, start, attr, form,
implicit_const);
case DW_FORM_flag_present:
return start;
case DW_FORM_data1:
case DW_FORM_flag:
return start + 1;
case DW_FORM_data2:
return start + 2;
case DW_FORM_data4:
return start + 4;
case DW_FORM_data8:
return start + 8;
case DW_FORM_data16:
// This form is designed for an md5 checksum inside line tables.
return start + 16;
case DW_FORM_string: {
const char* str = reinterpret_cast<const char*>(start);
return start + strlen(str) + 1;
}
case DW_FORM_udata:
reader_->ReadUnsignedLEB128(start, &len);
return start + len;
case DW_FORM_sdata:
reader_->ReadSignedLEB128(start, &len);
return start + len;
case DW_FORM_addr:
reader_->ReadAddress(start);
return start + reader_->AddressSize();
// This is the important one here!
case DW_FORM_sec_offset:
if (attr == dwarf2reader::DW_AT_str_offsets_base)
ProcessAttributeUnsigned(dieoffset, attr, form,
reader_->ReadOffset(start));
else
reader_->ReadOffset(start);
return start + reader_->OffsetSize();
case DW_FORM_ref1:
return start + 1;
case DW_FORM_ref2:
return start + 2;
case DW_FORM_ref4:
return start + 4;
case DW_FORM_ref8:
return start + 8;
case DW_FORM_ref_udata:
reader_->ReadUnsignedLEB128(start, &len);
return start + len;
case DW_FORM_ref_addr:
// DWARF2 and 3/4 differ on whether ref_addr is address size or
// offset size.
assert(header_.version >= 2);
if (header_.version == 2) {
reader_->ReadAddress(start);
return start + reader_->AddressSize();
} else if (header_.version >= 3) {
reader_->ReadOffset(start);
return start + reader_->OffsetSize();
}
break;
case DW_FORM_ref_sig8:
return start + 8;
case DW_FORM_implicit_const:
return start;
case DW_FORM_block1: {
uint64_t datalen = reader_->ReadOneByte(start);
return start + 1 + datalen;
}
case DW_FORM_block2: {
uint64_t datalen = reader_->ReadTwoBytes(start);
return start + 2 + datalen;
}
case DW_FORM_block4: {
uint64_t datalen = reader_->ReadFourBytes(start);
return start + 4 + datalen;
}
case DW_FORM_block:
case DW_FORM_exprloc: {
uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
return start + datalen + len;
}
case DW_FORM_strp: {
const uint64_t offset = reader_->ReadOffset(start);
return start + reader_->OffsetSize();
}
case DW_FORM_line_strp: {
const uint64_t offset = reader_->ReadOffset(start);
return start + reader_->OffsetSize();
}
case DW_FORM_strp_sup:
return start + 4;
case DW_FORM_ref_sup4:
return start + 4;
case DW_FORM_ref_sup8:
return start + 8;
case DW_FORM_loclistx:
reader_->ReadUnsignedLEB128(start, &len);
return start + len;
case DW_FORM_strx:
case DW_FORM_GNU_str_index: {
uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len);
return start + len;
}
case DW_FORM_strx1: {
return start + 1;
}
case DW_FORM_strx2: {
return start + 2;
}
case DW_FORM_strx3: {
return start + 3;
}
case DW_FORM_strx4: {
return start + 4;
}
case DW_FORM_addrx:
case DW_FORM_GNU_addr_index:
reader_->ReadUnsignedLEB128(start, &len);
return start + len;
case DW_FORM_addrx1:
return start + 1;
case DW_FORM_addrx2:
return start + 2;
case DW_FORM_addrx3:
return start + 3;
case DW_FORM_addrx4:
return start + 4;
case DW_FORM_rnglistx:
reader_->ReadUnsignedLEB128(start, &len);
return start + len;
}
fprintf(stderr, "Unhandled form type\n");
return NULL;
}
// If one really wanted, you could merge SkipAttribute and
// ProcessAttribute
// This is all boring data manipulation and calling of the handler.
@ -699,6 +854,22 @@ const uint8_t* CompilationUnit::ProcessAttribute(
const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset,
const uint8_t* start,
const Abbrev& abbrev) {
// With DWARF v5, the compile_unit die may contain a
// DW_AT_str_offsets_base. If it does, that attribute must be found
// and processed before trying to process the other attributes; otherwise
// the string values will all come out incorrect.
if (abbrev.tag == DW_TAG_compile_unit && header_.version == 5) {
uint64_t dieoffset_copy = dieoffset;
const uint8_t* start_copy = start;
for (AttributeList::const_iterator i = abbrev.attributes.begin();
i != abbrev.attributes.end();
i++) {
start_copy = ProcessStrOffsetBaseAttribute(dieoffset_copy, start_copy,
i->attr_, i->form_,
i->value_);
}
}
for (AttributeList::const_iterator i = abbrev.attributes.begin();
i != abbrev.attributes.end();
i++) {

View file

@ -541,6 +541,14 @@ class CompilationUnit {
enum DwarfForm form,
uint64_t implicit_const);
// Special version of ProcessAttribute, for finding str_offsets_base in
// DW_TAG_compile_unit, for DWARF v5.
const uint8_t* ProcessStrOffsetBaseAttribute(uint64_t dieoffset,
const uint8_t* start,
enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t implicit_const);
// Called when we have an attribute with unsigned data to give to
// our handler. The attribute is for the DIE at OFFSET from the
// beginning of compilation unit, has a name of ATTR, a form of
@ -557,6 +565,9 @@ class CompilationUnit {
else if (attr == DW_AT_GNU_addr_base || attr == DW_AT_addr_base) {
addr_base_ = data;
}
else if (attr == DW_AT_str_offsets_base) {
str_offsets_base_ = data;
}
else if (attr == DW_AT_GNU_ranges_base || attr == DW_AT_rnglists_base) {
ranges_base_ = data;
}
@ -726,6 +737,9 @@ class CompilationUnit {
// The value of the DW_AT_GNU_addr_base attribute, if any.
uint64_t addr_base_;
// The value of DW_AT_str_offsets_base attribute, if any.
uint64_t str_offsets_base_;
// True if we have already looked for a .dwp file.
bool have_checked_for_dwp_;

View file

@ -319,6 +319,90 @@ TEST_P(DwarfForms, addr) {
ParseCompilationUnit(GetParam());
}
TEST_P(DwarfForms, strx1) {
if (GetParam().version != 5) {
return;
}
Label abbrev_table = abbrevs.Here();
abbrevs.Abbrev(1, dwarf2reader::DW_TAG_compile_unit,
dwarf2reader::DW_children_no)
.Attribute(dwarf2reader::DW_AT_name, dwarf2reader::DW_FORM_strx1)
.Attribute(dwarf2reader::DW_AT_low_pc, dwarf2reader::DW_FORM_addr)
.Attribute(dwarf2reader::DW_AT_str_offsets_base,
dwarf2reader::DW_FORM_sec_offset)
.EndAbbrev()
.EndTable();
info.set_format_size(GetParam().format_size);
info.set_endianness(GetParam().endianness);
info.Header(GetParam().version, abbrev_table, GetParam().address_size,
dwarf2reader::DW_UT_compile)
.ULEB128(1) // abbrev index
.D8(2); // string index
uint64_t value;
uint64_t offsets_base;
if (GetParam().address_size == 4) {
value = 0xc8e9ffcc;
offsets_base = 8;
info.D32(value); // low pc
info.D32(offsets_base); // str_offsets_base
} else {
value = 0xe942517fc2768564ULL;
offsets_base = 16;
info.D64(value); // low_pc
info.D64(offsets_base); // str_offsets_base
}
info.Finish();
Section debug_strings;
// no header, just a series of null-terminated strings.
debug_strings.AppendCString("apple"); // offset = 0
debug_strings.AppendCString("bird"); // offset = 6
debug_strings.AppendCString("canary"); // offset = 11
debug_strings.AppendCString("dinosaur"); // offset = 18
Section str_offsets;
str_offsets.set_endianness(GetParam().endianness);
// Header for .debug_str_offsets
if (GetParam().address_size == 4) {
str_offsets.D32(24); // section length (4 bytes)
} else {
str_offsets.D32(0xffffffff);
str_offsets.D64(48); // section length (12 bytes)
}
str_offsets.D16(GetParam().version); // version (2 bytes)
str_offsets.D16(0); // padding (2 bytes)
// .debug_str_offsets data (the offsets)
if (GetParam().address_size == 4) {
str_offsets.D32(0);
str_offsets.D32(6);
str_offsets.D32(11);
str_offsets.D32(18);
} else {
str_offsets.D64(0);
str_offsets.D64(6);
str_offsets.D64(11);
str_offsets.D64(18);
}
ExpectBeginCompilationUnit(GetParam(), dwarf2reader::DW_TAG_compile_unit);
EXPECT_CALL(handler, ProcessAttributeString(_, dwarf2reader::DW_AT_name,
dwarf2reader::DW_FORM_strx1,
"bird"))
.WillOnce(Return());
EXPECT_CALL(handler, ProcessAttributeUnsigned(_, dwarf2reader::DW_AT_low_pc,
dwarf2reader::DW_FORM_addr,
value))
.InSequence(s)
.WillOnce(Return());
ExpectEndCompilationUnit();
ParseCompilationUnit(GetParam());
}
TEST_P(DwarfForms, block2_empty) {
StartSingleAttributeDIE(GetParam(), (DwarfTag) 0x16e4d2f7,
(DwarfAttribute) 0xe52c4463,

View file

@ -181,7 +181,8 @@ struct DwarfCUToModule::CUContext {
high_pc(0),
ranges_form(dwarf2reader::DW_FORM_sec_offset),
ranges_data(0),
ranges_base(0) { }
ranges_base(0),
str_offsets_base(0) { }
~CUContext() {
for (vector<Module::Function*>::iterator it = functions.begin();
@ -224,6 +225,9 @@ struct DwarfCUToModule::CUContext {
// form DW_FORM_addrxX is relative to this offset.
uint64_t addr_base;
// Offset into this CU's contribution to .debug_str_offsets.
uint64_t str_offsets_base;
// Collect all the data from the CU that a RangeListReader needs to read a
// range.
bool AssembleRangeListInfo(
@ -909,6 +913,9 @@ void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr,
case dwarf2reader::DW_AT_GNU_addr_base:
cu_context_->addr_base = data;
break;
case dwarf2reader::DW_AT_str_offsets_base:
cu_context_->str_offsets_base = data;
break;
default:
break;

View file

@ -613,4 +613,9 @@ TEST(Write, OutOfRangeAddresses) {
"STACK CFI INIT 2000 100 \n",
s.str().c_str());
// Cleanup - Prevent Memory Leak errors.
delete (extern1);
delete (function);
delete (entry3);
delete (entry1);
}