diff --git a/docs/symbol_files.md b/docs/symbol_files.md index 90782016..237b6567 100644 --- a/docs/symbol_files.md +++ b/docs/symbol_files.md @@ -100,12 +100,17 @@ the file; it may contain spaces. A `FUNC` record describes a source-language function. It has the form: -> `FUNC` _address_ _size_ _parameter\_size_ _name_ +> `FUNC` _[m]_ _address_ _size_ _parameter\_size_ _name_ -For example: `FUNC c184 30 0 nsQueryInterfaceWithError::operator()(nsID const&, +For example: `FUNC m c184 30 0 nsQueryInterfaceWithError::operator()(nsID const&, void**) const ` +The _m_ field is optional. If present it indicates that multiple symbols +reference this function's instructions. (In which case, only one symbol name is +mentioned within the breakpad file.) Multiple symbols referencing the same +instructions may occur due to identical code folding by the linker. + The _address_ and _size_ fields are hexadecimal numbers indicating the start address and length in bytes of the machine code instructions the function occupies. (Breakpad symbol files cannot accurately describe functions whose code @@ -158,9 +163,9 @@ A `PUBLIC` record describes a publicly visible linker symbol, such as that used to identify an assembly language entry point or region of memory. It has the form: -> PUBLIC _address_ _parameter\_size_ _name_ +> PUBLIC _[m]_ _address_ _parameter\_size_ _name_ -For example: `PUBLIC 2160 0 Public2_1 +For example: `PUBLIC m 2160 0 Public2_1 ` The Breakpad processor essentially treats a `PUBLIC` record as defining a @@ -168,6 +173,11 @@ function with no line number data and an indeterminate size: the code extends to the next address mentioned. If a given address is covered by both a `PUBLIC` record and a `FUNC` record, the processor uses the `FUNC` data. +The _m_ field is optional. If present it indicates that multiple symbols +reference this function's instructions. (In which case, only one symbol name is +mentioned within the breakpad file.) Multiple symbols referencing the same +instructions may occur due to identical code folding by the linker. + The _address_ field is a hexadecimal number indicating the symbol's address, relative to the module's load address. diff --git a/src/common/windows/pdb_source_line_writer.cc b/src/common/windows/pdb_source_line_writer.cc index dace8860..19c63852 100644 --- a/src/common/windows/pdb_source_line_writer.cc +++ b/src/common/windows/pdb_source_line_writer.cc @@ -109,7 +109,53 @@ namespace { using std::vector; -typedef std::multimap> SymbolMultimap; +// The symbol (among possibly many) selected to represent an rva. +struct SelectedSymbol { + SelectedSymbol(const CComPtr& symbol, bool is_public) + : symbol(symbol), is_public(is_public), is_multiple(false) {} + + // The symbol to use for an rva. + CComPtr symbol; + // Whether this is a public or function symbol. + bool is_public; + // Whether the rva has multiple associated symbols. An rva will correspond to + // multiple symbols in the case of linker identical symbol folding. + bool is_multiple; +}; + +// Maps rva to the symbol to use for that address. +typedef std::map SymbolMap; + +// Record this in the map as the selected symbol for the rva if it satisfies the +// necessary conditions. +void MaybeRecordSymbol(DWORD rva, + const CComPtr symbol, + bool is_public, + SymbolMap* map) { + SymbolMap::iterator loc = map->find(rva); + if (loc == map->end()) { + map->insert(std::make_pair(rva, SelectedSymbol(symbol, is_public))); + return; + } + + // Prefer function symbols to public symbols. + if (is_public && !loc->second.is_public) { + return; + } + + loc->second.is_multiple = true; + + // Take the 'least' symbol by lexicographical order of the decorated name. We + // use the decorated rather than undecorated name because computing the latter + // is expensive. + BSTR current_name, new_name; + loc->second.symbol->get_name(¤t_name); + symbol->get_name(&new_name); + if (wcscmp(new_name, current_name) < 0) { + loc->second.symbol = symbol; + loc->second.is_public = is_public; + } +} // A helper class to scope a PLOADED_IMAGE. class AutoImage { @@ -171,19 +217,10 @@ bool CreateDiaDataSourceInstance(CComPtr &data_source) { return false; } -// Computing undecorated names for all symbols is expensive, so we compare -// decorated names. -bool CompareSymbols(const SymbolMultimap::value_type& a, - const SymbolMultimap::value_type& b) { - BSTR a_name, b_name; - a.second->get_name(&a_name); - b.second->get_name(&b_name); - return wcscmp(a_name, b_name) < 0; -} - } // namespace -PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) { +PDBSourceLineWriter::PDBSourceLineWriter(bool enable_multiple_field) + : enable_multiple_field_(enable_multiple_field), output_(NULL) { } PDBSourceLineWriter::~PDBSourceLineWriter() { @@ -299,7 +336,8 @@ bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers *lines) { } bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function, - IDiaSymbol *block) { + IDiaSymbol *block, + bool has_multiple_symbols) { // The function format is: // FUNC
DWORD rva; @@ -335,9 +373,10 @@ bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function, MapAddressRange(image_map_, AddressRange(rva, static_cast(length)), &ranges); for (size_t i = 0; i < ranges.size(); ++i) { - fprintf(output_, "FUNC %lx %lx %x %ws\n", - ranges[i].rva, ranges[i].length, stack_param_size, - name.m_str); + const char* optional_multiple_field = + enable_multiple_field_ && has_multiple_symbols ? "m " : ""; + fprintf(output_, "FUNC %s%lx %lx %x %ws\n", optional_multiple_field, + ranges[i].rva, ranges[i].length, stack_param_size, name.m_str); } CComPtr lines; @@ -415,7 +454,7 @@ bool PDBSourceLineWriter::PrintFunctions() { CComPtr symbols = NULL; // Find all function symbols first. - SymbolMultimap rva_symbols; + SymbolMap rva_symbol; hr = global->findChildren(SymTagFunction, NULL, nsNone, &symbols); if (SUCCEEDED(hr)) { @@ -423,10 +462,8 @@ bool PDBSourceLineWriter::PrintFunctions() { while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) { if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) { - // Place the symbols into a multimap indexed by rva, so we can choose - // the apropriate symbol name to use when multiple symbols share an - // address. - rva_symbols.insert(std::make_pair(rva, symbol)); + // Potentially record this as the canonical symbol for this rva. + MaybeRecordSymbol(rva, symbol, false, &rva_symbol); } else { fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n"); return false; @@ -438,9 +475,8 @@ bool PDBSourceLineWriter::PrintFunctions() { symbols.Release(); } - // Find all public symbols. Store public symbols that are not also private - // symbols for later. - std::set public_only_rvas; + // Find all public symbols and record public symbols that are not also private + // symbols. hr = global->findChildren(SymTagPublicSymbol, NULL, nsNone, &symbols); if (SUCCEEDED(hr)) { @@ -448,11 +484,8 @@ bool PDBSourceLineWriter::PrintFunctions() { while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) { if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) { - if (rva_symbols.find(rva) == rva_symbols.end()) { - // Keep symbols in rva order. - rva_symbols.insert(std::make_pair(rva, symbol)); - public_only_rvas.insert(rva); - } + // Potentially record this as the canonical symbol for this rva. + MaybeRecordSymbol(rva, symbol, true, &rva_symbol); } else { fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n"); return false; @@ -464,29 +497,18 @@ bool PDBSourceLineWriter::PrintFunctions() { symbols.Release(); } - // For each rva, dump one symbol at the address. - SymbolMultimap::iterator it = rva_symbols.begin(); - while (it != rva_symbols.end()) { - std::pair symbol_range = - rva_symbols.equal_range(it->first); - // Find the minimum symbol by name to make the output more consistent - // between runs on different releases of the same module, in the case of - // multiple symbols sharing an address. - SymbolMultimap::iterator least_symbol_iter = - std::min_element(symbol_range.first, symbol_range.second, CompareSymbols); - CComPtr symbol = least_symbol_iter->second; + // For each rva, dump the selected symbol at the address. + SymbolMap::iterator it; + for (it = rva_symbol.begin(); it != rva_symbol.end(); ++it) { + CComPtr symbol = it->second.symbol; // Only print public symbols if there is no function symbol for the address. - if (public_only_rvas.count(it->first) == 0) { - if (!PrintFunction(symbol, symbol)) + if (!it->second.is_public) { + if (!PrintFunction(symbol, symbol, it->second.is_multiple)) return false; - symbol.Release(); } else { - if (!PrintCodePublicSymbol(symbol)) + if (!PrintCodePublicSymbol(symbol, it->second.is_multiple)) return false; - symbol.Release(); } - - it = symbol_range.second; } // When building with PGO, the compiler can split functions into @@ -528,7 +550,7 @@ bool PDBSourceLineWriter::PrintFunctions() { SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) && SUCCEEDED(parent->get_length(&func_length))) { if (block_rva < func_rva || block_rva > (func_rva + func_length)) { - if (!PrintFunction(parent, block)) { + if (!PrintFunction(parent, block, false)) { return false; } } @@ -845,7 +867,8 @@ bool PDBSourceLineWriter::PrintFrameData() { return false; } -bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol) { +bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol, + bool has_multiple_symbols) { BOOL is_code; if (FAILED(symbol->get_code(&is_code))) { return false; @@ -868,8 +891,10 @@ bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol) { AddressRangeVector ranges; MapAddressRange(image_map_, AddressRange(rva, 1), &ranges); for (size_t i = 0; i < ranges.size(); ++i) { - fprintf(output_, "PUBLIC %lx %x %ws\n", ranges[i].rva, - stack_param_size > 0 ? stack_param_size : 0, + const char* optional_multiple_field = + enable_multiple_field_ && has_multiple_symbols ? "m " : ""; + fprintf(output_, "PUBLIC %s%lx %x %ws\n", optional_multiple_field, + ranges[i].rva, stack_param_size > 0 ? stack_param_size : 0, name.m_str); } diff --git a/src/common/windows/pdb_source_line_writer.h b/src/common/windows/pdb_source_line_writer.h index e9e89bb2..5a8bcbe7 100644 --- a/src/common/windows/pdb_source_line_writer.h +++ b/src/common/windows/pdb_source_line_writer.h @@ -92,7 +92,9 @@ class PDBSourceLineWriter { ANY_FILE // try PDB_FILE and then EXE_FILE }; - explicit PDBSourceLineWriter(); + // NB: |enable_multiple_field| is temporary while transitioning to enabling + // writing the multiple field permanently. + explicit PDBSourceLineWriter(bool enable_multiple_field = false); ~PDBSourceLineWriter(); // Opens the given file. For executable files, the corresponding pdb @@ -138,11 +140,12 @@ class PDBSourceLineWriter { bool PrintLines(IDiaEnumLineNumbers *lines); // Outputs a function address and name, followed by its source line list. - // block can be the same object as function, or it can be a reference - // to a code block that is lexically part of this function, but - // resides at a separate address. - // Returns true on success. - bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block); + // block can be the same object as function, or it can be a reference to a + // code block that is lexically part of this function, but resides at a + // separate address. If has_multiple_symbols is true, this function's + // instructions correspond to multiple symbols. Returns true on success. + bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block, + bool has_multiple_symbols); // Outputs all functions as described above. Returns true on success. bool PrintFunctions(); @@ -167,8 +170,10 @@ class PDBSourceLineWriter { // Outputs a single public symbol address and name, if the symbol corresponds // to a code address. Returns true on success. If symbol is does not - // correspond to code, returns true without outputting anything. - bool PrintCodePublicSymbol(IDiaSymbol *symbol); + // correspond to code, returns true without outputting anything. If + // has_multiple_symbols is true, the symbol corresponds to a code address and + // the instructions correspond to multiple symbols. + bool PrintCodePublicSymbol(IDiaSymbol *symbol, bool has_multiple_symbols); // Outputs a line identifying the PDB file that is being dumped, along with // its uuid and age. @@ -227,6 +232,10 @@ class PDBSourceLineWriter { // a failure, returns 0, which is also a valid number of bytes. static int GetFunctionStackParamSize(IDiaSymbol *function); + // True if the optional 'm' field on FUNC and PUBLIC for multiple symbols at + // the same address should be output. + bool enable_multiple_field_; + // The filename of the PE file corresponding to the currently-open // pdb file. wstring code_file_; diff --git a/src/google_breakpad/processor/basic_source_line_resolver.h b/src/google_breakpad/processor/basic_source_line_resolver.h index 6bb6d863..91fb7841 100644 --- a/src/google_breakpad/processor/basic_source_line_resolver.h +++ b/src/google_breakpad/processor/basic_source_line_resolver.h @@ -95,12 +95,14 @@ class SymbolParseHelper { char **filename); // out // Parses a |function_line| declaration. Returns true on success. - // Format: FUNC
. + // Format: FUNC []
. // Notice, that this method modifies the input |function_line| which is why it - // can't be const. On success,
, , , and - // are stored in |*address|, |*size|, |*stack_param_size|, and |*name|. - // No allocation is done, |*name| simply points inside |function_line|. + // can't be const. On success, the presence of ,
, , + // , and are stored in |*is_multiple|, |*address|, + // |*size|, |*stack_param_size|, and |*name|. No allocation is done, |*name| + // simply points inside |function_line|. static bool ParseFunction(char *function_line, // in + bool *is_multiple, // out uint64_t *address, // out uint64_t *size, // out long *stack_param_size, // out @@ -119,12 +121,14 @@ class SymbolParseHelper { long *source_file); // out // Parses a |public_line| declaration. Returns true on success. - // Format: PUBLIC
+ // Format: PUBLIC []
// Notice, that this method modifies the input |function_line| which is why - // it can't be const. On success,
, , - // are stored in |*address|, |*stack_param_size|, and |*name|. - // No allocation is done, |*name| simply points inside |public_line|. + // it can't be const. On success, the presence of ,
, + // , are stored in |*is_multiple|, |*address|, + // |*stack_param_size|, and |*name|. No allocation is done, |*name| simply + // points inside |public_line|. static bool ParsePublicSymbol(char *public_line, // in + bool *is_multiple, // out uint64_t *address, // out long *stack_param_size, // out char **name); // out diff --git a/src/processor/basic_source_line_resolver.cc b/src/processor/basic_source_line_resolver.cc index aa66e159..c4aa949c 100644 --- a/src/processor/basic_source_line_resolver.cc +++ b/src/processor/basic_source_line_resolver.cc @@ -62,6 +62,42 @@ namespace google_breakpad { #define strtoull _strtoui64 #endif +namespace { + +// Utility function to tokenize given the presence of an optional initial +// field. In this case, optional_field is the expected string for the optional +// field, and max_tokens is the maximum number of tokens including the optional +// field. Refer to the documentation for Tokenize for descriptions of the other +// arguments. +bool TokenizeWithOptionalField(char *line, + const char *optional_field, + const char *separators, + int max_tokens, + vector *tokens) { + // First tokenize assuming the optional field is not present. If we then see + // the optional field, additionally tokenize the last token into two tokens. + if (!Tokenize(line, separators, max_tokens - 1, tokens)) { + return false; + } + + if (strcmp(tokens->front(), optional_field) == 0) { + // The optional field is present. Split the last token in two to recover the + // field prior to the last. + vector last_tokens; + if (!Tokenize(tokens->back(), separators, 2, &last_tokens)) { + return false; + } + // Replace the previous last token with the two new tokens. + tokens->pop_back(); + tokens->push_back(last_tokens[0]); + tokens->push_back(last_tokens[1]); + } + + return true; +} + +} // namespace + static const char *kWhitespace = " \r\n"; static const int kMaxErrorsPrinted = 5; static const int kMaxErrorsBeforeBailing = 100; @@ -323,13 +359,14 @@ bool BasicSourceLineResolver::Module::ParseFile(char *file_line) { BasicSourceLineResolver::Function* BasicSourceLineResolver::Module::ParseFunction(char *function_line) { + bool is_multiple; uint64_t address; uint64_t size; long stack_param_size; char *name; - if (SymbolParseHelper::ParseFunction(function_line, &address, &size, - &stack_param_size, &name)) { - return new Function(name, address, size, stack_param_size); + if (SymbolParseHelper::ParseFunction(function_line, &is_multiple, &address, + &size, &stack_param_size, &name)) { + return new Function(name, address, size, stack_param_size, is_multiple); } return NULL; } @@ -349,11 +386,12 @@ BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine( } bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) { + bool is_multiple; uint64_t address; long stack_param_size; char *name; - if (SymbolParseHelper::ParsePublicSymbol(public_line, &address, + if (SymbolParseHelper::ParsePublicSymbol(public_line, &is_multiple, &address, &stack_param_size, &name)) { // A few public symbols show up with an address of 0. This has been seen // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow, @@ -366,7 +404,8 @@ bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) { } linked_ptr symbol(new PublicSymbol(name, address, - stack_param_size)); + stack_param_size, + is_multiple)); return public_symbols_.Store(address, symbol); } return false; @@ -491,36 +530,39 @@ bool SymbolParseHelper::ParseFile(char *file_line, long *index, } // static -bool SymbolParseHelper::ParseFunction(char *function_line, uint64_t *address, - uint64_t *size, long *stack_param_size, - char **name) { - // FUNC
+bool SymbolParseHelper::ParseFunction(char *function_line, bool *is_multiple, + uint64_t *address, uint64_t *size, + long *stack_param_size, char **name) { + // FUNC []
assert(strncmp(function_line, "FUNC ", 5) == 0); function_line += 5; // skip prefix vector tokens; - if (!Tokenize(function_line, kWhitespace, 4, &tokens)) { + if (!TokenizeWithOptionalField(function_line, "m", kWhitespace, 5, &tokens)) { return false; } + *is_multiple = strcmp(tokens[0], "m") == 0; + int next_token = *is_multiple ? 1 : 0; + char *after_number; - *address = strtoull(tokens[0], &after_number, 16); + *address = strtoull(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *address == std::numeric_limits::max()) { return false; } - *size = strtoull(tokens[1], &after_number, 16); + *size = strtoull(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *size == std::numeric_limits::max()) { return false; } - *stack_param_size = strtol(tokens[2], &after_number, 16); + *stack_param_size = strtol(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *stack_param_size == std::numeric_limits::max() || *stack_param_size < 0) { return false; } - *name = tokens[3]; + *name = tokens[next_token++]; return true; } @@ -571,32 +613,35 @@ bool SymbolParseHelper::ParseLine(char *line_line, uint64_t *address, } // static -bool SymbolParseHelper::ParsePublicSymbol(char *public_line, +bool SymbolParseHelper::ParsePublicSymbol(char *public_line, bool *is_multiple, uint64_t *address, long *stack_param_size, char **name) { - // PUBLIC
+ // PUBLIC []
assert(strncmp(public_line, "PUBLIC ", 7) == 0); public_line += 7; // skip prefix vector tokens; - if (!Tokenize(public_line, kWhitespace, 3, &tokens)) { + if (!TokenizeWithOptionalField(public_line, "m", kWhitespace, 4, &tokens)) { return false; } + *is_multiple = strcmp(tokens[0], "m") == 0; + int next_token = *is_multiple ? 1 : 0; + char *after_number; - *address = strtoull(tokens[0], &after_number, 16); + *address = strtoull(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *address == std::numeric_limits::max()) { return false; } - *stack_param_size = strtol(tokens[1], &after_number, 16); + *stack_param_size = strtol(tokens[next_token++], &after_number, 16); if (!IsValidAfterNumber(after_number) || *stack_param_size == std::numeric_limits::max() || *stack_param_size < 0) { return false; } - *name = tokens[2]; + *name = tokens[next_token++]; return true; } diff --git a/src/processor/basic_source_line_resolver_types.h b/src/processor/basic_source_line_resolver_types.h index a022bc0d..89eb57e8 100644 --- a/src/processor/basic_source_line_resolver_types.h +++ b/src/processor/basic_source_line_resolver_types.h @@ -60,11 +60,13 @@ BasicSourceLineResolver::Function : public SourceLineResolverBase::Function { Function(const string &function_name, MemAddr function_address, MemAddr code_size, - int set_parameter_size) : Base(function_name, - function_address, - code_size, - set_parameter_size), - lines() { } + int set_parameter_size, + bool is_mutiple) : Base(function_name, + function_address, + code_size, + set_parameter_size, + is_mutiple), + lines() { } RangeMap< MemAddr, linked_ptr > lines; private: typedef SourceLineResolverBase::Function Base; diff --git a/src/processor/basic_source_line_resolver_unittest.cc b/src/processor/basic_source_line_resolver_unittest.cc index 9fab8ca6..90c34172 100644 --- a/src/processor/basic_source_line_resolver_unittest.cc +++ b/src/processor/basic_source_line_resolver_unittest.cc @@ -455,16 +455,19 @@ TEST(SymbolParseHelper, ParseFileInvalid) { } // Test parsing of valid FUNC lines. The format is: -// FUNC
+// FUNC []
TEST(SymbolParseHelper, ParseFunctionValid) { + bool multiple; uint64_t address; uint64_t size; long stack_param_size; char *name; char kTestLine[] = "FUNC 1 2 3 function name"; - ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine, &multiple, &address, + &size, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(1ULL, address); EXPECT_EQ(2ULL, size); EXPECT_EQ(3, stack_param_size); @@ -472,25 +475,41 @@ TEST(SymbolParseHelper, ParseFunctionValid) { // Test hex address, size, and param size. char kTestLine1[] = "FUNC a1 a2 a3 function name"; - ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine1, &multiple, &address, + &size, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(0xa1ULL, address); EXPECT_EQ(0xa2ULL, size); EXPECT_EQ(0xa3, stack_param_size); EXPECT_EQ("function name", string(name)); char kTestLine2[] = "FUNC 0 0 0 function name"; - ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine2, &multiple, &address, + &size, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(0ULL, address); EXPECT_EQ(0ULL, size); EXPECT_EQ(0, stack_param_size); EXPECT_EQ("function name", string(name)); + + // Test optional multiple field. + char kTestLine3[] = "FUNC m a1 a2 a3 function name"; + ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine3, &multiple, &address, + &size, &stack_param_size, + &name)); + EXPECT_TRUE(multiple); + EXPECT_EQ(0xa1ULL, address); + EXPECT_EQ(0xa2ULL, size); + EXPECT_EQ(0xa3, stack_param_size); + EXPECT_EQ("function name", string(name)); } // Test parsing of invalid FUNC lines. The format is: -// FUNC
+// FUNC []
TEST(SymbolParseHelper, ParseFunctionInvalid) { + bool multiple; uint64_t address; uint64_t size; long stack_param_size; @@ -498,36 +517,49 @@ TEST(SymbolParseHelper, ParseFunctionInvalid) { // Test missing function name. char kTestLine[] = "FUNC 1 2 3 "; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine, &multiple, &address, + &size, &stack_param_size, + &name)); // Test bad address. char kTestLine1[] = "FUNC 1z 2 3 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine1, &multiple, &address, + &size, &stack_param_size, + &name)); // Test large address. char kTestLine2[] = "FUNC 123123123123123123123123123 2 3 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine2, &multiple, &address, + &size, &stack_param_size, + &name)); // Test bad size. char kTestLine3[] = "FUNC 1 z2 3 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine3, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine3, &multiple, &address, + &size, &stack_param_size, + &name)); // Test large size. char kTestLine4[] = "FUNC 1 231231231231231231231231232 3 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine4, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine4, &multiple, &address, + &size, &stack_param_size, + &name)); // Test bad param size. char kTestLine5[] = "FUNC 1 2 3z function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine5, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine5, &multiple, &address, + &size, &stack_param_size, + &name)); // Test large param size. char kTestLine6[] = "FUNC 1 2 312312312312312312312312323 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine6, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine6, &multiple, &address, + &size, &stack_param_size, + &name)); // Negative param size. char kTestLine7[] = "FUNC 1 2 -5 function name"; - ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine7, &address, &size, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine7, &multiple, &address, + &size, &stack_param_size, + &name)); + // Test invalid optional field. + char kTestLine8[] = "FUNC x 1 2 5 function name"; + ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine8, &multiple, &address, + &size, &stack_param_size, + &name)); } // Test parsing of valid lines. The format is: @@ -612,67 +644,96 @@ TEST(SymbolParseHelper, ParseLineInvalid) { } // Test parsing of valid PUBLIC lines. The format is: -// PUBLIC
+// PUBLIC []
TEST(SymbolParseHelper, ParsePublicSymbolValid) { + bool multiple; uint64_t address; long stack_param_size; char *name; char kTestLine[] = "PUBLIC 1 2 3"; - ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &multiple, + &address, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(1ULL, address); EXPECT_EQ(2, stack_param_size); EXPECT_EQ("3", string(name)); // Test hex size and address. char kTestLine1[] = "PUBLIC a1 a2 function name"; - ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &multiple, + &address, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(0xa1ULL, address); EXPECT_EQ(0xa2, stack_param_size); EXPECT_EQ("function name", string(name)); // Test 0 is a valid address. char kTestLine2[] = "PUBLIC 0 a2 function name"; - ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address, - &stack_param_size, &name)); + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &multiple, + &address, &stack_param_size, + &name)); + EXPECT_FALSE(multiple); EXPECT_EQ(0ULL, address); EXPECT_EQ(0xa2, stack_param_size); EXPECT_EQ("function name", string(name)); + + // Test optional multiple field. + char kTestLine3[] = "PUBLIC m a1 a2 function name"; + ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &multiple, + &address, &stack_param_size, + &name)); + EXPECT_TRUE(multiple); + EXPECT_EQ(0xa1ULL, address); + EXPECT_EQ(0xa2, stack_param_size); + EXPECT_EQ("function name", string(name)); } // Test parsing of invalid PUBLIC lines. The format is: -// PUBLIC
+// PUBLIC []
TEST(SymbolParseHelper, ParsePublicSymbolInvalid) { + bool multiple; uint64_t address; long stack_param_size; char *name; // Test missing source function name. char kTestLine[] = "PUBLIC 1 2 "; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &multiple, + &address, &stack_param_size, + &name)); // Test bad address. char kTestLine1[] = "PUBLIC 1z 2 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &multiple, + &address, &stack_param_size, + &name)); // Test large address. char kTestLine2[] = "PUBLIC 123123123123123123123123 2 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &multiple, + &address, &stack_param_size, + &name)); // Test bad param stack size. char kTestLine3[] = "PUBLIC 1 z2 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &multiple, + &address, &stack_param_size, + &name)); // Test large param stack size. char kTestLine4[] = "PUBLIC 1 123123123123123123123123123 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine4, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine4, &multiple, + &address, &stack_param_size, + &name)); // Test negative param stack size. char kTestLine5[] = "PUBLIC 1 -5 3"; - ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine5, &address, - &stack_param_size, &name)); + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine5, &multiple, + &address, &stack_param_size, + &name)); + // Test invalid optional field. + char kTestLine6[] = "PUBLIC x 1 5 3"; + ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine6, &multiple, + &address, &stack_param_size, + &name)); } } // namespace diff --git a/src/processor/source_line_resolver_base_types.h b/src/processor/source_line_resolver_base_types.h index 4a9dfb3c..ca744e00 100644 --- a/src/processor/source_line_resolver_base_types.h +++ b/src/processor/source_line_resolver_base_types.h @@ -85,9 +85,10 @@ struct SourceLineResolverBase::Function { Function(const string &function_name, MemAddr function_address, MemAddr code_size, - int set_parameter_size) + int set_parameter_size, + bool is_multiple) : name(function_name), address(function_address), size(code_size), - parameter_size(set_parameter_size) { } + parameter_size(set_parameter_size), is_multiple(is_multiple) { } string name; MemAddr address; @@ -95,16 +96,21 @@ struct SourceLineResolverBase::Function { // The size of parameters passed to this function on the stack. int32_t parameter_size; + + // If the function's instructions correspond to multiple symbols. + bool is_multiple; }; struct SourceLineResolverBase::PublicSymbol { PublicSymbol() { } PublicSymbol(const string& set_name, MemAddr set_address, - int set_parameter_size) + int set_parameter_size, + bool is_multiple) : name(set_name), address(set_address), - parameter_size(set_parameter_size) {} + parameter_size(set_parameter_size), + is_multiple(is_multiple) {} string name; MemAddr address; @@ -113,6 +119,9 @@ struct SourceLineResolverBase::PublicSymbol { // is set to the size of the parameters passed to the funciton on the // stack, if known. int32_t parameter_size; + + // If the function's instructions correspond to multiple symbols. + bool is_multiple; }; class SourceLineResolverBase::Module { diff --git a/src/tools/windows/binaries/dump_syms.exe b/src/tools/windows/binaries/dump_syms.exe index ca4676f5..f27f3ece 100644 Binary files a/src/tools/windows/binaries/dump_syms.exe and b/src/tools/windows/binaries/dump_syms.exe differ diff --git a/src/tools/windows/binaries/symupload.exe b/src/tools/windows/binaries/symupload.exe index ba319b26..cdfee745 100644 Binary files a/src/tools/windows/binaries/symupload.exe and b/src/tools/windows/binaries/symupload.exe differ diff --git a/src/tools/windows/dump_syms/dump_syms.cc b/src/tools/windows/dump_syms/dump_syms.cc index 3e8827b6..dcbe39ae 100644 --- a/src/tools/windows/dump_syms/dump_syms.cc +++ b/src/tools/windows/dump_syms/dump_syms.cc @@ -31,6 +31,7 @@ // a text-based format that we can use from the minidump processor. #include +#include #include @@ -41,12 +42,22 @@ using google_breakpad::PDBSourceLineWriter; int wmain(int argc, wchar_t **argv) { if (argc < 2) { - fprintf(stderr, "Usage: %ws \n", argv[0]); + fprintf(stderr, + "Usage: %ws [--enable_multiple_field] \n", + argv[0]); return 1; } - PDBSourceLineWriter writer; - if (!writer.Open(wstring(argv[1]), PDBSourceLineWriter::ANY_FILE)) { + // This is a temporary option to enable writing the optional 'm' field on FUNC + // and PUBLIC, denoting multiple symbols for the address. This option will be + // removed, with the behavior enabled by default, after all symbol file + // readers have had a chance to update. + bool enable_multiple_field = + (argc >= 3 && wcscmp(L"--enable_multiple_field", argv[1]) == 0); + + PDBSourceLineWriter writer(enable_multiple_field); + if (!writer.Open(wstring(argv[enable_multiple_field ? 2 : 1]), + PDBSourceLineWriter::ANY_FILE)) { fprintf(stderr, "Open failed\n"); return 1; } diff --git a/src/tools/windows/dump_syms/testdata/dump_syms_regtest64.sym b/src/tools/windows/dump_syms/testdata/dump_syms_regtest64.sym index a43d3aec..39815245 100644 --- a/src/tools/windows/dump_syms/testdata/dump_syms_regtest64.sym +++ b/src/tools/windows/dump_syms/testdata/dump_syms_regtest64.sym @@ -1262,7 +1262,7 @@ FUNC 3320 18 0 NLG_Notify 332a 5 107 69 332f 7 108 69 3336 a 109 69 -FUNC 3340 1 0 _NLG_Dispatch +FUNC 3340 1 0 _NLG_Dispatch2 3340 0 113 69 3340 10 114 69 FUNC 3350 1 0 _NLG_Return2