Separate code parsing and name checking in two classes

Signed-off-by: Yuto Takano <yuto.takano@arm.com>
This commit is contained in:
Yuto Takano 2021-08-09 15:35:19 +01:00
parent 50953433a4
commit 55c6c87d95
2 changed files with 99 additions and 97 deletions

View file

@ -20,16 +20,26 @@ This script confirms that the naming of all symbols and identifiers in Mbed TLS
are consistent with the house style and are also self-consistent. It only runs
on Linux and macOS since it depends on nm.
The script performs the following checks:
It contains two major Python classes, CodeParser and NameChecker. They both have
a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
but the individual functions can also be used for specific needs.
CodeParser makes heavy use of regular expressions to parse the code, and is
dependent on the current code formatting. Many Python C parser libraries require
preprocessed C code, which means no macro parsing. Compiler tools are also not
very helpful when we want the exact location in the original source (which
becomes impossible when e.g. comments are stripped).
NameChecker performs the following checks:
- All exported and available symbols in the library object files, are explicitly
declared in the header files. This uses the nm command.
- All macros, constants, and identifiers (function names, struct names, etc)
follow the required pattern.
follow the required regex pattern.
- Typo checking: All words that begin with MBED exist as macros or constants.
Returns 0 on success, 1 on test failure, and 2 if there is a script error or a
subprocess error. Must be run from Mbed TLS root.
The script returns 0 on success, 1 on test failure, and 2 if there is a script
error error. Must be run from Mbed TLS root.
"""
import argparse
@ -168,16 +178,15 @@ class Typo(Problem): # pylint: disable=too-few-public-methods
.format(self.match.filename, self.match.pos[0], self.match.name)
) + "\n" + str(self.match)
class NameCheck():
class CodeParser():
"""
Representation of the core name checking operation performed by this script.
Shares a common logger, and a shared return code.
Class for retrieving files and parsing the code. This can be used
independently of the checks that NameChecker performs, for example for
list_internal_identifiers.py.
"""
def __init__(self, verbose=False):
self.log = None
def __init__(self, log):
self.log = log
self.check_repo_path()
self.return_code = 0
self.setup_logger(verbose)
# Memo for storing "glob expression": set(filepaths)
self.files = {}
@ -185,9 +194,6 @@ class NameCheck():
# Globally excluded filenames
self.excluded_files = ["**/bn_mul", "**/compat-2.x.h"]
# Will contain the parse result after a comprehensive parse
self.parse_result = {}
@staticmethod
def check_repo_path():
"""
@ -197,71 +203,12 @@ class NameCheck():
if not all(os.path.isdir(d) for d in ["include", "library", "tests"]):
raise Exception("This script must be run from Mbed TLS root")
def set_return_code(self, return_code):
if return_code > self.return_code:
self.log.debug("Setting new return code to {}".format(return_code))
self.return_code = return_code
def setup_logger(self, verbose=False):
def comprehensive_parse(self):
"""
Set up a logger and set the change the default logging level from
WARNING to INFO. Loggers are better than print statements since their
verbosity can be controlled.
"""
self.log = logging.getLogger()
if verbose:
self.log.setLevel(logging.DEBUG)
else:
self.log.setLevel(logging.INFO)
self.log.addHandler(logging.StreamHandler())
Comprehensive ("default") function to call each parsing function and
retrieve various elements of the code, together with the source location.
def get_files(self, include_wildcards, exclude_wildcards):
"""
Get all files that match any of the UNIX-style wildcards. While the
check_names script is designed only for use on UNIX/macOS (due to nm),
this function alone would work fine on Windows even with forward slashes
in the wildcard.
Args:
* include_wildcards: a List of shell-style wildcards to match filepaths.
* exclude_wildacrds: a List of shell-style wildcards to exclude.
Returns a List of relative filepaths.
"""
accumulator = set()
# exclude_wildcards may be None. Also, consider the global exclusions.
exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
# Perform set union on the glob results. Memoise individual sets.
for include_wildcard in include_wildcards:
if include_wildcard not in self.files:
self.files[include_wildcard] = set(glob.glob(
include_wildcard,
recursive=True
))
accumulator = accumulator.union(self.files[include_wildcard])
# Perform set difference to exclude. Also use the same memo since their
# behaviour is pretty much identical and it can benefit from the cache.
for exclude_wildcard in exclude_wildcards:
if exclude_wildcard not in self.files:
self.files[exclude_wildcard] = set(glob.glob(
exclude_wildcard,
recursive=True
))
accumulator = accumulator.difference(self.files[exclude_wildcard])
return list(accumulator)
def parse_names_in_source(self):
"""
Comprehensive function to call each parsing function and retrieve
various elements of the code, together with their source location.
Puts the parsed values in the internal variable self.parse_result, so
they can be used from perform_checks().
Returns a dict of parsed item key to the corresponding List of Matches.
"""
self.log.info("Parsing source code...")
self.log.debug(
@ -315,8 +262,7 @@ class NameCheck():
self.log.debug(" {} Enum Constants".format(len(enum_consts)))
self.log.debug(" {} Identifiers".format(len(identifiers)))
self.log.debug(" {} Exported Symbols".format(len(symbols)))
self.log.info("Analysing...")
self.parse_result = {
return {
"macros": actual_macros,
"enum_consts": enum_consts,
"identifiers": identifiers,
@ -324,6 +270,47 @@ class NameCheck():
"mbed_words": mbed_words
}
def get_files(self, include_wildcards, exclude_wildcards):
"""
Get all files that match any of the UNIX-style wildcards. While the
check_names script is designed only for use on UNIX/macOS (due to nm),
this function alone would work fine on Windows even with forward slashes
in the wildcard.
Args:
* include_wildcards: a List of shell-style wildcards to match filepaths.
* exclude_wildcards: a List of shell-style wildcards to exclude.
Returns a List of relative filepaths.
"""
accumulator = set()
# exclude_wildcards may be None. Also, consider the global exclusions.
exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
# Perform set union on the glob results. Memoise individual sets.
for include_wildcard in include_wildcards:
if include_wildcard not in self.files:
self.files[include_wildcard] = set(glob.glob(
include_wildcard,
recursive=True
))
accumulator = accumulator.union(self.files[include_wildcard])
# Perform set difference to exclude. Also use the same memo since their
# behaviour is pretty much identical and it can benefit from the cache.
for exclude_wildcard in exclude_wildcards:
if exclude_wildcard not in self.files:
self.files[exclude_wildcard] = set(glob.glob(
exclude_wildcard,
recursive=True
))
accumulator = accumulator.difference(self.files[exclude_wildcard])
return list(accumulator)
def parse_macros(self, include, exclude=None):
"""
Parse all macros defined by #define preprocessor directives.
@ -456,11 +443,11 @@ class NameCheck():
# Match " something(a" or " *something(a". Functions.
# Assumptions:
# - function definition from return type to one of its arguments is
# all on one line (enforced by the previous_line concat below)
# all on one line
# - function definition line only contains alphanumeric, asterisk,
# underscore, and open bracket
r".* \**(\w+) *\( *\w|"
# Match "(*something)(". Flexible with spaces.
# Match "(*something)(".
r".*\( *\* *(\w+) *\) *\(|"
# Match names of named data structures.
r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|"
@ -485,7 +472,7 @@ class NameCheck():
for header_file in files:
with open(header_file, "r", encoding="utf-8") as header:
in_block_comment = False
# The previous line varibale is used for concatenating lines
# The previous line variable is used for concatenating lines
# when identifiers are formatted and spread across multiple.
previous_line = ""
@ -596,7 +583,6 @@ class NameCheck():
)
except subprocess.CalledProcessError as error:
self.log.debug(error.output)
self.set_return_code(2)
raise error
finally:
# Put back the original config regardless of there being errors.
@ -614,7 +600,7 @@ class NameCheck():
Does not return the position data since it is of no use.
Args:
* object_files: a List of compiled object files to search through.
* object_files: a List of compiled object filepaths to search through.
Returns a List of unique symbols defined and used in any of the object
files.
@ -646,18 +632,24 @@ class NameCheck():
return symbols
class NameChecker():
"""
Representation of the core name checking operation performed by this script.
"""
def __init__(self, parse_result, log):
self.parse_result = parse_result
self.log = log
def perform_checks(self, quiet=False):
"""
Perform each check in order, output its PASS/FAIL status. Maintain an
overall test status, and output that at the end.
Assumes parse_names_in_source() was called before this.
A comprehensive checker that performs each check in order, and outputs
a final verdict.
Args:
* quiet: whether to hide detailed problem explanation.
"""
self.log.info("=============")
problems = 0
problems += self.check_symbols_declared_in_header(quiet)
pattern_checks = [
@ -677,8 +669,10 @@ class NameCheck():
self.log.info("Remove --quiet to see explanations.")
else:
self.log.info("Use --quiet for minimal output.")
return 1
else:
self.log.info("PASS")
return 0
def check_symbols_declared_in_header(self, quiet):
"""
@ -782,7 +776,6 @@ class NameCheck():
* problems: a List of encountered Problems
"""
if problems:
self.set_return_code(1)
self.log.info("{}: FAIL\n".format(name))
for problem in problems:
problem.quiet = quiet
@ -792,8 +785,8 @@ class NameCheck():
def main():
"""
Perform argument parsing, and create an instance of NameCheck to begin the
core operation.
Perform argument parsing, and create an instance of CodeParser and
NameChecker to begin the core operation.
"""
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
@ -816,14 +809,22 @@ def main():
args = parser.parse_args()
# Configure the global logger, which is then passed to the classes below
log = logging.getLogger()
log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
log.addHandler(logging.StreamHandler())
try:
name_check = NameCheck(verbose=args.verbose)
name_check.parse_names_in_source()
name_check.perform_checks(quiet=args.quiet)
sys.exit(name_check.return_code)
code_parser = CodeParser(log)
parse_result = code_parser.comprehensive_parse()
except Exception: # pylint: disable=broad-except
traceback.print_exc()
sys.exit(2)
name_checker = NameChecker(parse_result, log)
return_code = name_checker.perform_checks(quiet=args.quiet)
sys.exit(return_code)
if __name__ == "__main__":
main()

View file

@ -29,9 +29,10 @@ Must be run from Mbed TLS root.
"""
import argparse
import logging
import traceback
import sys
from check_names import NameCheck
from check_names import CodeParser
def main():
parser = argparse.ArgumentParser(
@ -44,7 +45,7 @@ def main():
parser.parse_args()
try:
name_check = NameCheck()
name_check = CodeParser(logging.getLogger())
result = name_check.parse_identifiers([
"include/mbedtls/*_internal.h",
"library/*.h"