Lift some code out of parse_identifiers
Make parse_identifiers less complex. Pylint was complaining that it had too many local variables, and it had a point. * Lift the constants identifier_regex and exclusion_lines to class constants (renamed to uppercase because they're constants). * Lift the per-file loop into a new function parse_identifiers_in_file. No intended behavior change. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
This commit is contained in:
parent
c8794202e6
commit
152de23518
1 changed files with 100 additions and 88 deletions
|
@ -457,6 +457,105 @@ class CodeParser():
|
|||
|
||||
return enum_consts
|
||||
|
||||
IDENTIFIER_REGEX = re.compile(
|
||||
# Match " something(a" or " *something(a". Functions.
|
||||
# Assumptions:
|
||||
# - function definition from return type to one of its arguments is
|
||||
# all on one line
|
||||
# - function definition line only contains alphanumeric, asterisk,
|
||||
# underscore, and open bracket
|
||||
r".* \**(\w+) *\( *\w|"
|
||||
# Match "(*something)(".
|
||||
r".*\( *\* *(\w+) *\) *\(|"
|
||||
# Match names of named data structures.
|
||||
r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|"
|
||||
# Match names of typedef instances, after closing bracket.
|
||||
r"}? *(\w+)[;[].*"
|
||||
)
|
||||
# The regex below is indented for clarity.
|
||||
EXCLUSION_LINES = re.compile(
|
||||
r"^("
|
||||
r"extern +\"C\"|" # pylint: disable=bad-continuation
|
||||
r"(typedef +)?(struct|union|enum)( *{)?$|"
|
||||
r"} *;?$|"
|
||||
r"$|"
|
||||
r"//|"
|
||||
r"#"
|
||||
r")"
|
||||
)
|
||||
|
||||
def parse_identifiers_in_file(self, header_file, identifiers):
|
||||
"""
|
||||
Parse all lines of a header where a function/enum/struct/union/typedef
|
||||
identifier is declared, based on some regex and heuristics. Highly
|
||||
dependent on formatting style.
|
||||
|
||||
Append found matches to the list ``identifiers``.
|
||||
"""
|
||||
|
||||
with open(header_file, "r", encoding="utf-8") as header:
|
||||
in_block_comment = False
|
||||
# The previous line variable is used for concatenating lines
|
||||
# when identifiers are formatted and spread across multiple
|
||||
# lines.
|
||||
previous_line = ""
|
||||
|
||||
for line_no, line in enumerate(header):
|
||||
# Terminate current comment?
|
||||
if in_block_comment:
|
||||
line = re.sub(r".*?\*/", r"", line, 1)
|
||||
in_block_comment = False
|
||||
# Remove full comments and string literals
|
||||
line = re.sub(r'/\*.*?\*/|(")(?:[^\\\"]|\\.)*"',
|
||||
lambda s: '""' if s.group(1) else ' ',
|
||||
line)
|
||||
# Start an unfinished comment?
|
||||
m = re.match(r"/\*", line)
|
||||
if m:
|
||||
in_block_comment = True
|
||||
line = line[:m.end(0)]
|
||||
|
||||
if self.EXCLUSION_LINES.search(line):
|
||||
previous_line = ""
|
||||
continue
|
||||
|
||||
# If the line contains only space-separated alphanumeric
|
||||
# characters (or underscore, asterisk, or, open bracket),
|
||||
# and nothing else, high chance it's a declaration that
|
||||
# continues on the next line
|
||||
if re.search(r"^([\w\*\(]+\s+)+$", line):
|
||||
previous_line += line
|
||||
continue
|
||||
|
||||
# If previous line seemed to start an unfinished declaration
|
||||
# (as above), concat and treat them as one.
|
||||
if previous_line:
|
||||
line = previous_line.strip() + " " + line.strip() + "\n"
|
||||
previous_line = ""
|
||||
|
||||
# Skip parsing if line has a space in front = heuristic to
|
||||
# skip function argument lines (highly subject to formatting
|
||||
# changes)
|
||||
if line[0] == " ":
|
||||
continue
|
||||
|
||||
identifier = self.IDENTIFIER_REGEX.search(line)
|
||||
|
||||
if not identifier:
|
||||
continue
|
||||
|
||||
# Find the group that matched, and append it
|
||||
for group in identifier.groups():
|
||||
if not group:
|
||||
continue
|
||||
|
||||
identifiers.append(Match(
|
||||
header_file,
|
||||
line,
|
||||
line_no,
|
||||
identifier.span(),
|
||||
group))
|
||||
|
||||
def parse_identifiers(self, include, exclude=None):
|
||||
"""
|
||||
Parse all lines of a header where a function/enum/struct/union/typedef
|
||||
|
@ -469,100 +568,13 @@ class CodeParser():
|
|||
|
||||
Returns a List of Match objects with identifiers.
|
||||
"""
|
||||
identifier_regex = re.compile(
|
||||
# Match " something(a" or " *something(a". Functions.
|
||||
# Assumptions:
|
||||
# - function definition from return type to one of its arguments is
|
||||
# all on one line
|
||||
# - function definition line only contains alphanumeric, asterisk,
|
||||
# underscore, and open bracket
|
||||
r".* \**(\w+) *\( *\w|"
|
||||
# Match "(*something)(".
|
||||
r".*\( *\* *(\w+) *\) *\(|"
|
||||
# Match names of named data structures.
|
||||
r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|"
|
||||
# Match names of typedef instances, after closing bracket.
|
||||
r"}? *(\w+)[;[].*"
|
||||
)
|
||||
# The regex below is indented for clarity.
|
||||
exclusion_lines = re.compile(
|
||||
r"^("
|
||||
r"extern +\"C\"|" # pylint: disable=bad-continuation
|
||||
r"(typedef +)?(struct|union|enum)( *{)?$|"
|
||||
r"} *;?$|"
|
||||
r"$|"
|
||||
r"//|"
|
||||
r"#"
|
||||
r")"
|
||||
)
|
||||
|
||||
files = self.get_files(include, exclude)
|
||||
self.log.debug("Looking for identifiers in {} files".format(len(files)))
|
||||
|
||||
identifiers = []
|
||||
for header_file in files:
|
||||
with open(header_file, "r", encoding="utf-8") as header:
|
||||
in_block_comment = False
|
||||
# The previous line variable is used for concatenating lines
|
||||
# when identifiers are formatted and spread across multiple
|
||||
# lines.
|
||||
previous_line = ""
|
||||
|
||||
for line_no, line in enumerate(header):
|
||||
# Terminate current comment?
|
||||
if in_block_comment:
|
||||
line = re.sub(r".*?\*/", r"", line, 1)
|
||||
in_block_comment = False
|
||||
# Remove full comments and string literals
|
||||
line = re.sub(r'/\*.*?\*/|(")(?:[^\\\"]|\\.)*"',
|
||||
lambda s: '""' if s.group(1) else ' ',
|
||||
line)
|
||||
# Start an unfinished comment?
|
||||
m = re.match(r"/\*", line)
|
||||
if m:
|
||||
in_block_comment = True
|
||||
line = line[:m.end(0)]
|
||||
|
||||
if exclusion_lines.search(line):
|
||||
previous_line = ""
|
||||
continue
|
||||
|
||||
# If the line contains only space-separated alphanumeric
|
||||
# characters (or underscore, asterisk, or, open bracket),
|
||||
# and nothing else, high chance it's a declaration that
|
||||
# continues on the next line
|
||||
if re.search(r"^([\w\*\(]+\s+)+$", line):
|
||||
previous_line += line
|
||||
continue
|
||||
|
||||
# If previous line seemed to start an unfinished declaration
|
||||
# (as above), concat and treat them as one.
|
||||
if previous_line:
|
||||
line = previous_line.strip() + " " + line.strip() + "\n"
|
||||
previous_line = ""
|
||||
|
||||
# Skip parsing if line has a space in front = heuristic to
|
||||
# skip function argument lines (highly subject to formatting
|
||||
# changes)
|
||||
if line[0] == " ":
|
||||
continue
|
||||
|
||||
identifier = identifier_regex.search(line)
|
||||
|
||||
if not identifier:
|
||||
continue
|
||||
|
||||
# Find the group that matched, and append it
|
||||
for group in identifier.groups():
|
||||
if not group:
|
||||
continue
|
||||
|
||||
identifiers.append(Match(
|
||||
header_file,
|
||||
line,
|
||||
line_no,
|
||||
identifier.span(),
|
||||
group))
|
||||
self.parse_identifiers_in_file(header_file, identifiers)
|
||||
|
||||
return identifiers
|
||||
|
||||
|
|
Loading…
Reference in a new issue