From d5802926d983a09230cafa9b7b1f601c44a814af Mon Sep 17 00:00:00 2001 From: Darryl Green Date: Tue, 8 May 2018 15:30:59 +0100 Subject: [PATCH 01/68] Rewrite check-names.sh in python Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 276 +++++++++++++++++++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100755 tests/scripts/check-names.py diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py new file mode 100755 index 000000000..6af6f8d54 --- /dev/null +++ b/tests/scripts/check-names.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 +""" +This file is part of Mbed TLS (https://tls.mbed.org) + +Copyright (c) 2018, Arm Limited, All Rights Reserved + +Purpose + +This script confirms that the naming of all symbols and identifiers in mbed +TLS are consistent with the house style and are also self-consistent. +""" +import os +import sys +import traceback +import re +import shutil +import subprocess +import logging + + +class NameCheck(object): + def __init__(self): + self.log = None + self.setup_logger() + self.check_repo_path() + self.return_code = 0 + self.excluded_files = ["compat-1.3.h"] + self.header_files = self.get_files(os.path.join("include", "mbedtls")) + self.library_files = self.get_files("library") + self.macros = [] + self.MBED_names = [] + self.enum_consts = [] + self.identifiers = [] + self.actual_macros = [] + self.symbols = [] + self.macro_pattern = r"#define (?P\w+)" + self.MBED_pattern = r"\bMBED.+?_[A-Z0-9_]*" + self.symbol_pattern = r"^\S+( [0-9A-Fa-f]+)* . _*(?P\w+)" + self.identifier_check_pattern = r"^mbedtls_[0-9a-z_]*[0-9a-z]$" + self.decls_pattern = ( + r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)" + ) + self.macro_const_check_pattern = ( + r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$|^YOTTA_[0-9A-Z_]*[0-9A-Z]$" + ) + self.typo_check_pattern = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$" + self.non_macros = ( + "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" + ) + + def set_return_code(self, return_code): + if return_code > self.return_code: + self.return_code = return_code + + def setup_logger(self): + self.log = logging.getLogger() + self.log.setLevel(logging.INFO) + self.log.addHandler(logging.StreamHandler()) + + def check_repo_path(self): + current_dir = os.path.realpath('.') + root_dir = os.path.dirname(os.path.dirname( + os.path.dirname(os.path.realpath(__file__)))) + if current_dir != root_dir: + raise Exception("Must be run from Mbed TLS root") + + def get_files(self, directory): + filenames = [] + for root, dirs, files in sorted(os.walk(directory)): + for filename in sorted(files): + if (filename not in self.excluded_files and + filename.endswith((".c", ".h"))): + filenames.append(os.path.join(root, filename)) + return filenames + + def get_macros(self): + for header_file in self.header_files: + with open(header_file, "r") as header: + for line in iter(header.readline, ""): + macro = re.search(self.macro_pattern, line) + if (macro and not + macro.group("macro").startswith(self.non_macros)): + self.macros.append((macro.group("macro"), header_file)) + self.macros = list(set(self.macros)) + + def get_MBED_names(self): + for file_group in [self.header_files, self.library_files]: + for filename in file_group: + with open(filename, "r") as f: + for line in iter(f.readline, ""): + mbed_names = re.findall(self.MBED_pattern, line) + if mbed_names: + for name in mbed_names: + self.MBED_names.append((name, filename)) + self.MBED_names = list(set(self.MBED_names)) + + def get_enum_consts(self): + for header_file in self.header_files: + state = 0 + with open(header_file, "r") as header: + for line in iter(header.readline, ""): + if state is 0 and re.match(r"^(typedef )?enum {", line): + state = 1 + elif state is 0 and re.match(r"^(typedef )?enum", line): + state = 2 + elif state is 2 and re.match(r"^{", line): + state = 1 + elif state is 1 and re.match(r"^}", line): + state = 0 + elif state is 1: + enum_const = re.match(r"^\s*(?P\w+)", line) + if enum_const: + self.enum_consts.append( + (enum_const.group("enum_const"), header_file) + ) + + def line_contains_declaration(self, line): + return (re.match(r"^[^ /#{]", line) + and not re.match(self.decls_pattern, line)) + + def get_identifier_from_declaration(self, declaration): + identifier = re.search( + r"([a-zA-Z_][a-zA-Z0-9_]*)\(|" + r"\(\*(.+)\)\(|" + r"(\w+)\W*$", + declaration + ) + if identifier: + for group in identifier.groups(): + if group: + return group + self.log.error(declaration) + raise Exception("No identifier found") + + def get_identifiers(self): + for header_file in self.header_files: + with open(header_file, "r") as header: + for line in iter(header.readline, ""): + if self.line_contains_declaration(line): + self.identifiers.append( + (self.get_identifier_from_declaration(line), + header_file) + ) + + def get_symbols(self): + try: + shutil.copy("include/mbedtls/config.h", + "include/mbedtls/config.h.bak") + subprocess.run( + ["perl", "scripts/config.pl", "full"], + encoding=sys.stdout.encoding, + check=True + ) + my_environment = os.environ.copy() + my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables" + subprocess.run( + ["make", "clean", "lib"], + env=my_environment, + encoding=sys.stdout.encoding, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + check=True + ) + shutil.move("include/mbedtls/config.h.bak", + "include/mbedtls/config.h") + nm_output = "" + for lib in ["library/libmbedcrypto.a", + "library/libmbedtls.a", + "library/libmbedx509.a"]: + nm_output += subprocess.run( + ["nm", "-og", lib], + encoding=sys.stdout.encoding, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + check=True + ).stdout + for line in nm_output.splitlines(): + if not re.match(r"^\S+: +U |^$|^\S+:$", line): + symbol = re.match(self.symbol_pattern, line) + if symbol: + self.symbols.append(symbol.group('symbol')) + else: + self.log.error(line) + self.symbols.sort() + subprocess.run( + ["make", "clean"], + encoding=sys.stdout.encoding, + check=True + ) + except subprocess.CalledProcessError as error: + self.log.error(error) + self.set_return_code(2) + + def check_symbols_declared_in_header(self): + identifiers = [x[0] for x in self.identifiers] + bad_names = [] + for symbol in self.symbols: + if symbol not in identifiers: + bad_names.append(symbol) + if bad_names: + self.set_return_code(1) + self.log.info("Names of identifiers: FAIL") + for name in bad_names: + self.log.info(name) + else: + self.log.info("Names of identifiers: PASS") + + def check_group(self, group_to_check, check_pattern, name): + bad_names = [] + for item in group_to_check: + if not re.match(check_pattern, item[0]): + bad_names.append("{} - {}".format(item[0], item[1])) + if bad_names: + self.set_return_code(1) + self.log.info("Names of {}: FAIL".format(name)) + for name in bad_names: + self.log.info(name) + else: + self.log.info("Names of {}: PASS".format(name)) + + def check_for_typos(self): + bad_names = [] + all_caps_names = list(set( + [x[0] for x in self.actual_macros + self.enum_consts] + )) + for name in self.MBED_names: + if name[0] not in all_caps_names: + if not re.search(self.typo_check_pattern, name[0]): + bad_names.append("{} - {}".format(name[0], name[1])) + if bad_names: + self.set_return_code(1) + self.log.info("Likely typos: FAIL") + for name in bad_names: + self.log.info(name) + else: + self.log.info("Likely typos: PASS") + + def get_names_from_source_code(self): + self.log.info("Analysing source code...") + self.get_macros() + self.get_enum_consts() + self.get_identifiers() + self.get_symbols() + self.get_MBED_names() + self.actual_macros = list(set(self.macros) - set(self.identifiers)) + self.log.info("{} macros".format(len(self.macros))) + self.log.info("{} enum-consts".format(len(self.enum_consts))) + self.log.info("{} identifiers".format(len(self.identifiers))) + self.log.info("{} exported-symbols".format(len(self.symbols))) + + def check_names(self): + self.check_symbols_declared_in_header() + for group, check_pattern, name in [ + (self.actual_macros, self.macro_const_check_pattern, + "actual-macros"), + (self.enum_consts, self.macro_const_check_pattern, + "enum-consts"), + (self.identifiers, self.identifier_check_pattern, + "identifiers")]: + self.check_group(group, check_pattern, name) + self.check_for_typos() + + +def run_main(): + try: + name_check = NameCheck() + name_check.get_names_from_source_code() + name_check.check_names() + sys.exit(name_check.return_code) + except Exception: + traceback.print_exc() + sys.exit(2) + + +if __name__ == "__main__": + run_main() From 4e9b51bc1830dd898f86b2f3a7fbd2318e3ddc7e Mon Sep 17 00:00:00 2001 From: Darryl Green Date: Wed, 16 May 2018 22:32:41 +0100 Subject: [PATCH 02/68] Update scripts to use check-names.py Signed-off-by: Yuto Takano --- .travis.yml | 2 +- tests/git-scripts/pre-push.sh | 2 +- tests/scripts/all.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index fa01e5a24..06495e4eb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ script: - tests/scripts/recursion.pl library/*.c - tests/scripts/check-generated-files.sh - tests/scripts/check-doxy-blocks.pl -- tests/scripts/check-names.sh +- tests/scripts/check-names.py - tests/scripts/doxygen.sh - cmake -D CMAKE_BUILD_TYPE:String="Check" . - make diff --git a/tests/git-scripts/pre-push.sh b/tests/git-scripts/pre-push.sh index ee54a6cff..2058a57f9 100755 --- a/tests/git-scripts/pre-push.sh +++ b/tests/git-scripts/pre-push.sh @@ -43,5 +43,5 @@ run_test() } run_test ./tests/scripts/check-doxy-blocks.pl -run_test ./tests/scripts/check-names.sh +run_test ./tests/scripts/check-names.py run_test ./tests/scripts/check-generated-files.sh diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh index e6c7549e6..851287f10 100755 --- a/tests/scripts/all.sh +++ b/tests/scripts/all.sh @@ -415,7 +415,7 @@ tests/scripts/check-doxy-blocks.pl msg "test/build: declared and exported names" # < 3s cleanup -tests/scripts/check-names.sh +tests/scripts/check-names.py msg "test: doxygen warnings" # ~ 3s cleanup From a783d9c5ef2eb5c83a241c2d37f3423a1201529f Mon Sep 17 00:00:00 2001 From: Darryl Green Date: Thu, 17 May 2018 09:21:06 +0100 Subject: [PATCH 03/68] Remove check-names.sh and sub-scripts it used Signed-off-by: Yuto Takano --- tests/scripts/check-names.sh | 93 ------------------------------- tests/scripts/list-enum-consts.pl | 35 ------------ tests/scripts/list-identifiers.sh | 34 ----------- tests/scripts/list-macros.sh | 16 ------ tests/scripts/list-symbols.sh | 26 --------- 5 files changed, 204 deletions(-) delete mode 100755 tests/scripts/check-names.sh delete mode 100755 tests/scripts/list-enum-consts.pl delete mode 100755 tests/scripts/list-identifiers.sh delete mode 100755 tests/scripts/list-macros.sh delete mode 100755 tests/scripts/list-symbols.sh diff --git a/tests/scripts/check-names.sh b/tests/scripts/check-names.sh deleted file mode 100755 index 4c66440e2..000000000 --- a/tests/scripts/check-names.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/sh -# -# This file is part of mbed TLS (https://tls.mbed.org) -# -# Copyright (c) 2015-2016, ARM Limited, All Rights Reserved -# -# Purpose -# -# This script confirms that the naming of all symbols and identifiers in mbed -# TLS are consistent with the house style and are also self-consistent. -# -set -eu - -if grep --version|head -n1|grep GNU >/dev/null; then :; else - echo "This script requires GNU grep.">&2 - exit 1 -fi - -printf "Analysing source code...\n" - -tests/scripts/list-macros.sh -tests/scripts/list-enum-consts.pl -tests/scripts/list-identifiers.sh -tests/scripts/list-symbols.sh - -FAIL=0 - -printf "\nExported symbols declared in header: " -UNDECLARED=$( diff exported-symbols identifiers | sed -n -e 's/^< //p' ) -if [ "x$UNDECLARED" = "x" ]; then - echo "PASS" -else - echo "FAIL" - echo "$UNDECLARED" - FAIL=1 -fi - -diff macros identifiers | sed -n -e 's/< //p' > actual-macros - -for THING in actual-macros enum-consts; do - printf "Names of $THING: " - test -r $THING - BAD=$( grep -v '^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$\|^YOTTA_[0-9A-Z_]*[0-9A-Z]$' $THING || true ) - if [ "x$BAD" = "x" ]; then - echo "PASS" - else - echo "FAIL" - echo "$BAD" - FAIL=1 - fi -done - -for THING in identifiers; do - printf "Names of $THING: " - test -r $THING - BAD=$( grep -v '^mbedtls_[0-9a-z_]*[0-9a-z]$' $THING || true ) - if [ "x$BAD" = "x" ]; then - echo "PASS" - else - echo "FAIL" - echo "$BAD" - FAIL=1 - fi -done - -printf "Likely typos: " -sort -u actual-macros enum-consts > _caps -HEADERS=$( ls include/mbedtls/*.h | egrep -v 'compat-1\.3\.h' ) -NL=' -' -sed -n 's/MBED..._[A-Z0-9_]*/\'"$NL"'&\'"$NL"/gp \ - $HEADERS library/*.c \ - | grep MBEDTLS | sort -u > _MBEDTLS_XXX -TYPOS=$( diff _caps _MBEDTLS_XXX | sed -n 's/^> //p' \ - | egrep -v 'XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$' || true ) -rm _MBEDTLS_XXX _caps -if [ "x$TYPOS" = "x" ]; then - echo "PASS" -else - echo "FAIL" - echo "$TYPOS" - FAIL=1 -fi - -printf "\nOverall: " -if [ "$FAIL" -eq 0 ]; then - rm macros actual-macros enum-consts identifiers exported-symbols - echo "PASSED" - exit 0 -else - echo "FAILED" - exit 1 -fi diff --git a/tests/scripts/list-enum-consts.pl b/tests/scripts/list-enum-consts.pl deleted file mode 100755 index 633e3fdf9..000000000 --- a/tests/scripts/list-enum-consts.pl +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/perl - -use warnings; -use strict; - -use utf8; -use open qw(:std utf8); - --d 'include/mbedtls' or die "$0: must be run from root\n"; - -@ARGV = grep { ! /compat-1\.3\.h/ } ; - -my @consts; -my $state = 'out'; -while (<>) -{ - if( $state eq 'out' and /^(typedef )?enum \{/ ) { - $state = 'in'; - } elsif( $state eq 'out' and /^(typedef )?enum/ ) { - $state = 'start'; - } elsif( $state eq 'start' and /{/ ) { - $state = 'in'; - } elsif( $state eq 'in' and /}/ ) { - $state = 'out'; - } elsif( $state eq 'in' ) { - s/=.*//; s!/\*.*!!; s/,.*//; s/\s+//g; chomp; - push @consts, $_ if $_; - } -} - -open my $fh, '>', 'enum-consts' or die; -print $fh "$_\n" for sort @consts; -close $fh or die; - -printf "%8d enum-consts\n", scalar @consts; diff --git a/tests/scripts/list-identifiers.sh b/tests/scripts/list-identifiers.sh deleted file mode 100755 index 130d9d63f..000000000 --- a/tests/scripts/list-identifiers.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -set -eu - -if [ -d include/mbedtls ]; then :; else - echo "$0: must be run from root" >&2 - exit 1 -fi - -HEADERS=$( ls include/mbedtls/*.h | egrep -v 'compat-1\.3\.h|bn_mul' ) - -rm -f identifiers - -grep '^[^ /#{]' $HEADERS | \ - sed -e 's/^[^:]*://' | \ - egrep -v '^(extern "C"|(typedef )?(struct|enum)( {)?$|};?$)' \ - > _decls - -if true; then -sed -n -e 's/.* \**\([a-zA-Z_][a-zA-Z0-9_]*\)(.*/\1/p' \ - -e 's/.*(\*\(.*\))(.*/\1/p' _decls -grep -v '(' _decls | sed -e 's/\([a-zA-Z0-9_]*\)[;[].*/\1/' -e 's/.* \**//' -fi > _identifiers - -if [ $( wc -l < _identifiers ) -eq $( wc -l < _decls ) ]; then - rm _decls - egrep -v '^(u?int(16|32|64)_t)$' _identifiers | sort > identifiers - rm _identifiers -else - echo "$0: oops, lost some identifiers" 2>&1 - exit 1 -fi - -wc -l identifiers diff --git a/tests/scripts/list-macros.sh b/tests/scripts/list-macros.sh deleted file mode 100755 index 3c84adba6..000000000 --- a/tests/scripts/list-macros.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh - -set -eu - -if [ -d include/mbedtls ]; then :; else - echo "$0: must be run from root" >&2 - exit 1 -fi - -HEADERS=$( ls include/mbedtls/*.h | egrep -v 'compat-1\.3\.h' ) - -sed -n -e 's/.*#define \([a-zA-Z0-9_]*\).*/\1/p' $HEADERS \ - | egrep -v '^(asm|inline|EMIT|_CRT_SECURE_NO_DEPRECATE)$|^MULADDC_' \ - | sort -u > macros - -wc -l macros diff --git a/tests/scripts/list-symbols.sh b/tests/scripts/list-symbols.sh deleted file mode 100755 index c25871942..000000000 --- a/tests/scripts/list-symbols.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh - -set -eu - -if [ -d include/mbedtls ]; then :; else - echo "$0: must be run from root" >&2 - exit 1 -fi - -if grep -i cmake Makefile >/dev/null; then - echo "$0: not compatible with cmake" >&2 - exit 1 -fi - -cp include/mbedtls/config.h include/mbedtls/config.h.bak -scripts/config.pl full -CFLAGS=-fno-asynchronous-unwind-tables make clean lib >/dev/null 2>&1 -mv include/mbedtls/config.h.bak include/mbedtls/config.h -if uname | grep -F Darwin >/dev/null; then - nm -gUj library/libmbed*.a 2>/dev/null | sed -n -e 's/^_//p' -elif uname | grep -F Linux >/dev/null; then - nm -og library/libmbed*.a | grep -v '^[^ ]*: *U \|^$\|^[^ ]*:$' | sed 's/^[^ ]* . //' -fi | sort > exported-symbols -make clean - -wc -l exported-symbols From 6c79b5dce728e3754c6e6886a7c153705cfc6ce6 Mon Sep 17 00:00:00 2001 From: Darryl Green Date: Thu, 17 May 2018 14:14:50 +0100 Subject: [PATCH 04/68] Keep compatibility with python versions prior to 3.5 Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 6af6f8d54..8a8e2dbbe 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python2 """ This file is part of Mbed TLS (https://tls.mbed.org) @@ -146,20 +146,17 @@ class NameCheck(object): try: shutil.copy("include/mbedtls/config.h", "include/mbedtls/config.h.bak") - subprocess.run( + subprocess.check_output( ["perl", "scripts/config.pl", "full"], - encoding=sys.stdout.encoding, - check=True + universal_newlines=True, ) my_environment = os.environ.copy() my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables" - subprocess.run( + subprocess.check_output( ["make", "clean", "lib"], env=my_environment, - encoding=sys.stdout.encoding, - stdout=subprocess.PIPE, + universal_newlines=True, stderr=subprocess.STDOUT, - check=True ) shutil.move("include/mbedtls/config.h.bak", "include/mbedtls/config.h") @@ -167,13 +164,11 @@ class NameCheck(object): for lib in ["library/libmbedcrypto.a", "library/libmbedtls.a", "library/libmbedx509.a"]: - nm_output += subprocess.run( + nm_output += subprocess.check_output( ["nm", "-og", lib], - encoding=sys.stdout.encoding, - stdout=subprocess.PIPE, + universal_newlines=True, stderr=subprocess.STDOUT, - check=True - ).stdout + ) for line in nm_output.splitlines(): if not re.match(r"^\S+: +U |^$|^\S+:$", line): symbol = re.match(self.symbol_pattern, line) @@ -182,10 +177,9 @@ class NameCheck(object): else: self.log.error(line) self.symbols.sort() - subprocess.run( + subprocess.check_output( ["make", "clean"], - encoding=sys.stdout.encoding, - check=True + universal_newlines=True, ) except subprocess.CalledProcessError as error: self.log.error(error) From 3963967ebc4bf0b61b1d5fea26680cbc56fe29b1 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 19:47:48 +0100 Subject: [PATCH 05/68] Restructure check-names.py with more verbose error messages Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 596 +++++++++++++++++++++++++---------- 1 file changed, 429 insertions(+), 167 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 8a8e2dbbe..431bcbb5c 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -1,14 +1,27 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 +# +# Copyright The Mbed TLS Contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ -This file is part of Mbed TLS (https://tls.mbed.org) - -Copyright (c) 2018, Arm Limited, All Rights Reserved - -Purpose - -This script confirms that the naming of all symbols and identifiers in mbed -TLS are consistent with the house style and are also self-consistent. +This script confirms that the naming of all symbols and identifiers in Mbed TLS +are consistent with the house style and are also self-consistent. """ + +import argparse +import textwrap import os import sys import traceback @@ -17,47 +30,89 @@ import shutil import subprocess import logging +# Naming patterns to check against +MACRO_PATTERN = r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$|^YOTTA_[0-9A-Z_]*[0-9A-Z]$" +IDENTIFIER_PATTERN = r"^mbedtls_[0-9a-z_]*[0-9a-z]$" + +class Match(object): + def __init__(self, filename, line, pos, name): + self.filename = filename + self.line = line + self.pos = pos + self.name = name + + def __str__(self): + return self.name + +class Problem(object): + def __init__(self): + self.textwrapper = textwrap.TextWrapper() + self.textwrapper.initial_indent = " * " + self.textwrapper.subsequent_indent = " " + +class SymbolNotInHeader(Problem): + def __init__(self, symbol_name): + self.symbol_name = symbol_name + Problem.__init__(self) + + def __str__(self): + return self.textwrapper.fill( + "'{0}' was found as an available symbol in the output of nm, " + "however it was not declared in any header files." + .format(self.symbol_name)) + +class PatternMismatch(Problem): + def __init__(self, pattern, match): + self.pattern = pattern + self.match = match + Problem.__init__(self) + + def __str__(self): + return self.textwrapper.fill( + "{0}: '{1}' does not match the required pattern '{2}'." + .format(self.match.filename, self.match.name, self.pattern)) + +class Typo(Problem): + def __init__(self, match): + self.match = match + Problem.__init__(self) + + def __str__(self): + return self.textwrapper.fill( + "{0}: '{1}' looks like a typo. It was not found in any macros or " + "any enums. If this is not a typo, put //no-check-names after it." + .format(self.match.filename, self.match.name)) class NameCheck(object): def __init__(self): self.log = None - self.setup_logger() self.check_repo_path() self.return_code = 0 self.excluded_files = ["compat-1.3.h"] - self.header_files = self.get_files(os.path.join("include", "mbedtls")) - self.library_files = self.get_files("library") - self.macros = [] - self.MBED_names = [] - self.enum_consts = [] - self.identifiers = [] - self.actual_macros = [] - self.symbols = [] - self.macro_pattern = r"#define (?P\w+)" - self.MBED_pattern = r"\bMBED.+?_[A-Z0-9_]*" - self.symbol_pattern = r"^\S+( [0-9A-Fa-f]+)* . _*(?P\w+)" - self.identifier_check_pattern = r"^mbedtls_[0-9a-z_]*[0-9a-z]$" - self.decls_pattern = ( - r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)" - ) - self.macro_const_check_pattern = ( - r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$|^YOTTA_[0-9A-Z_]*[0-9A-Z]$" - ) self.typo_check_pattern = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$" - self.non_macros = ( - "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" - ) def set_return_code(self, return_code): if return_code > self.return_code: self.return_code = return_code - def setup_logger(self): + def setup_logger(self, verbose=False): + """ + Set up a logger and set the change the default logging level from + WARNING to INFO. Loggers are better than print statements since their + verbosity can be controlled. + """ self.log = logging.getLogger() - self.log.setLevel(logging.INFO) + if verbose: + self.log.setLevel(logging.DEBUG) + else: + self.log.setLevel(logging.INFO) self.log.addHandler(logging.StreamHandler()) def check_repo_path(self): + """ + Check that the current working directory is the project root, and throw + an exception if not. + """ current_dir = os.path.realpath('.') root_dir = os.path.dirname(os.path.dirname( os.path.dirname(os.path.realpath(__file__)))) @@ -73,32 +128,81 @@ class NameCheck(object): filenames.append(os.path.join(root, filename)) return filenames - def get_macros(self): - for header_file in self.header_files: + def parse_macros(self, header_files): + """ + Parse all macros defined by #define preprocessor directives. + + Args: + header_files: A list of filepaths to look through. + + Returns: + A list of Match objects for the macros. + """ + MACRO_REGEX = r"#define (?P\w+)" + NON_MACROS = ( + "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" + ) + + macros = [] + + for header_file in header_files: with open(header_file, "r") as header: - for line in iter(header.readline, ""): - macro = re.search(self.macro_pattern, line) - if (macro and not - macro.group("macro").startswith(self.non_macros)): - self.macros.append((macro.group("macro"), header_file)) - self.macros = list(set(self.macros)) + for line in header: + macro = re.search(MACRO_REGEX, line) + if (macro and + not macro.group("macro").startswith(NON_MACROS)): + macros.append(Match( + header_file, + line, + (macro.start(), macro.end()), + macro.group("macro"))) - def get_MBED_names(self): - for file_group in [self.header_files, self.library_files]: - for filename in file_group: - with open(filename, "r") as f: - for line in iter(f.readline, ""): - mbed_names = re.findall(self.MBED_pattern, line) - if mbed_names: - for name in mbed_names: - self.MBED_names.append((name, filename)) - self.MBED_names = list(set(self.MBED_names)) + return macros - def get_enum_consts(self): - for header_file in self.header_files: + def parse_MBED_names(self, header_files, library_files): + """ + Parse all words in the file that begin with MBED. Includes macros. + + Args: + header_files: A list of filepaths to look through. + library_files: A list of filepaths to look through. + + Returns: + A list of Match objects for words beginning with MBED. + """ + MBED_names = [] + + for filename in header_files + library_files: + with open(filename, "r") as fp: + for line in fp: + for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line): + MBED_names.append(Match( + filename, + line, + (name.start(), name.end()), + name.group(0) + )) + + return MBED_names + + def parse_enum_consts(self, header_files): + """ + Parse all enum value constants that are declared. + + Args: + header_files: A list of filepaths to look through. + + Returns: + A list of (enum constants, containing filename). + """ + + enum_consts = [] + + for header_file in header_files: + # Emulate a finite state machine to parse enum declarations. state = 0 with open(header_file, "r") as header: - for line in iter(header.readline, ""): + for line in header: if state is 0 and re.match(r"^(typedef )?enum {", line): state = 1 elif state is 0 and re.match(r"^(typedef )?enum", line): @@ -110,156 +214,314 @@ class NameCheck(object): elif state is 1: enum_const = re.match(r"^\s*(?P\w+)", line) if enum_const: - self.enum_consts.append( - (enum_const.group("enum_const"), header_file) - ) + enum_consts.append(Match( + header_file, + line, + (enum_const.start(), enum_const.end()), + enum_const.group("enum_const"))) + + return enum_consts - def line_contains_declaration(self, line): - return (re.match(r"^[^ /#{]", line) - and not re.match(self.decls_pattern, line)) + def parse_identifiers(self, header_files): + """ + Parse all lines of a header where a function identifier is declared, + based on some huersitics. Assumes every line that is not a comment or a + preprocessor directive contains some identifier. - def get_identifier_from_declaration(self, declaration): - identifier = re.search( - r"([a-zA-Z_][a-zA-Z0-9_]*)\(|" - r"\(\*(.+)\)\(|" - r"(\w+)\W*$", - declaration + Args: + header_files: A list of filepaths to look through. + + Returns: + A list of (identifier, containing filename) + """ + EXCLUDED_DECLARATIONS = ( + r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)" ) - if identifier: - for group in identifier.groups(): - if group: - return group - self.log.error(declaration) - raise Exception("No identifier found") - def get_identifiers(self): - for header_file in self.header_files: + identifiers = [] + + for header_file in header_files: with open(header_file, "r") as header: - for line in iter(header.readline, ""): - if self.line_contains_declaration(line): - self.identifiers.append( - (self.get_identifier_from_declaration(line), - header_file) - ) + in_block_comment = False - def get_symbols(self): + for line in header: + # Skip parsing this line if it begins or ends a block + # comment, and set the state machine's state. + if re.search(r"/\*", line): + in_block_comment = True + continue + elif re.search(r"\*/", line) and in_block_comment: + in_block_comment = False + continue + + # Skip parsing this line if it's a line comment, or if it + # begins with a preprocessor directive + if in_block_comment or re.match(r"(//|#)", line): + continue + + if re.match(EXCLUDED_DECLARATIONS, line): + continue + + identifier = re.search( + # Matches: "mbedtls_aes_init(" + r"([a-zA-Z_][a-zA-Z0-9_]*)\(|" + # Matches: "(*f_rng)(" + r"\(\*(.+)\)\(|" + # TODO: unknown purpose + r"(\w+)\W*$", + line + ) + + if identifier: + for group in identifier.groups(): + if group: + identifiers.append(Match( + header_file, + line, + (identifier.start(), identifier.end()), + identifier.group(0))) + + return identifiers + + def parse_symbols(self): + """ + Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509 + object files using nm to retrieve the list of referenced symbols. + + Returns: + A list of unique symbols defined and used in the libraries. + """ + + symbols = [] + + # Back up the config and atomically compile with the full configratuion. + shutil.copy("include/mbedtls/mbedtls_config.h", + "include/mbedtls/mbedtls_config.h.bak") try: - shutil.copy("include/mbedtls/config.h", - "include/mbedtls/config.h.bak") - subprocess.check_output( + subprocess.run( ["perl", "scripts/config.pl", "full"], - universal_newlines=True, + encoding=sys.stdout.encoding, + check=True ) my_environment = os.environ.copy() my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables" - subprocess.check_output( + subprocess.run( ["make", "clean", "lib"], env=my_environment, - universal_newlines=True, + encoding=sys.stdout.encoding, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + check=True ) - shutil.move("include/mbedtls/config.h.bak", - "include/mbedtls/config.h") - nm_output = "" - for lib in ["library/libmbedcrypto.a", - "library/libmbedtls.a", - "library/libmbedx509.a"]: - nm_output += subprocess.check_output( - ["nm", "-og", lib], - universal_newlines=True, - stderr=subprocess.STDOUT, - ) - for line in nm_output.splitlines(): - if not re.match(r"^\S+: +U |^$|^\S+:$", line): - symbol = re.match(self.symbol_pattern, line) - if symbol: - self.symbols.append(symbol.group('symbol')) - else: - self.log.error(line) - self.symbols.sort() - subprocess.check_output( + + # Perform object file analysis using nm + symbols = self.parse_symbols_from_nm( + ["library/libmbedcrypto.a", + "library/libmbedtls.a", + "library/libmbedx509.a"]) + + symbols.sort() + + subprocess.run( ["make", "clean"], - universal_newlines=True, + encoding=sys.stdout.encoding, + check=True ) except subprocess.CalledProcessError as error: self.log.error(error) self.set_return_code(2) + finally: + shutil.move("include/mbedtls/mbedtls_config.h.bak", + "include/mbedtls/mbedtls_config.h") + + return symbols + + def parse_symbols_from_nm(self, object_files): + """ + Run nm to retrieve the list of referenced symbols in each object file. + Does not return the position data since it is of no use. + + Returns: + A list of unique symbols defined and used in any of the object files. + """ + UNDEFINED_SYMBOL = r"^\S+: +U |^$|^\S+:$" + VALID_SYMBOL = r"^\S+( [0-9A-Fa-f]+)* . _*(?P\w+)" + + symbols = [] + + nm_output = "" + for lib in object_files: + nm_output += subprocess.run( + ["nm", "-og", lib], + encoding=sys.stdout.encoding, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + check=True + ).stdout + for line in nm_output.splitlines(): + if not re.match(UNDEFINED_SYMBOL, line): + symbol = re.match(VALID_SYMBOL, line) + if symbol: + symbols.append(symbol.group('symbol')) + else: + self.log.error(line) + + return symbols + + def parse_names_in_source(self): + """ + Calls each parsing function to retrieve various elements of the code, + together with their source location. Puts the parsed values in the + internal variable self.parse_result. + """ + self.log.info("Parsing source code...") + + m_headers = self.get_files(os.path.join("include", "mbedtls")) + libraries = self.get_files("library") + + all_macros = self.parse_macros(m_headers) + enum_consts = self.parse_enum_consts(m_headers) + identifiers = self.parse_identifiers(m_headers) + symbols = self.parse_symbols() + mbed_names = self.parse_MBED_names(m_headers, libraries) + + # Remove identifier macros like mbedtls_printf or mbedtls_calloc + macros = list(set(all_macros) - set(identifiers)) + + self.log.info("Found:") + self.log.info(" {} Macros".format(len(all_macros))) + self.log.info(" {} Enum Constants".format(len(enum_consts))) + self.log.info(" {} Identifiers".format(len(identifiers))) + self.log.info(" {} Exported Symbols".format(len(symbols))) + self.log.info("Analysing...") + + self.parse_result = { + "macros": macros, + "enum_consts": enum_consts, + "identifiers": identifiers, + "symbols": symbols, + "mbed_names": mbed_names + } + + def perform_checks(self): + """ + Perform each check in order, output its PASS/FAIL status. Maintain an + overall test status, and output that at the end. + """ + problems = 0 + + problems += self.check_symbols_declared_in_header() + + pattern_checks = [ + ("macros", MACRO_PATTERN), + ("enum_consts", MACRO_PATTERN), + ("identifiers", IDENTIFIER_PATTERN)] + for group, check_pattern in pattern_checks: + problems += self.check_match_pattern(group, check_pattern) + + problems += self.check_for_typos() + + self.log.info("=============") + if problems > 0: + self.log.info("FAIL: {0} problem(s) to fix".format(str(problems))) + else: + self.log.info("PASS") def check_symbols_declared_in_header(self): - identifiers = [x[0] for x in self.identifiers] - bad_names = [] - for symbol in self.symbols: - if symbol not in identifiers: - bad_names.append(symbol) - if bad_names: - self.set_return_code(1) - self.log.info("Names of identifiers: FAIL") - for name in bad_names: - self.log.info(name) - else: - self.log.info("Names of identifiers: PASS") + """ + Perform a check that all detected symbols in the library object files + are properly declared in headers. + + Outputs to the logger the PASS/FAIL status, followed by the location of + problems. - def check_group(self, group_to_check, check_pattern, name): - bad_names = [] - for item in group_to_check: - if not re.match(check_pattern, item[0]): - bad_names.append("{} - {}".format(item[0], item[1])) - if bad_names: + Returns the number of problems that needs fixing. + """ + problems = [] + for symbol in self.parse_result["symbols"]: + found_symbol_declared = False + for identifier_match in self.parse_result["identifiers"]: + if symbol == identifier_match.name: + found_symbol_declared = True + break + + if not found_symbol_declared: + problems.append(SymbolNotInHeader(symbol)) + + if problems: self.set_return_code(1) - self.log.info("Names of {}: FAIL".format(name)) - for name in bad_names: - self.log.info(name) + self.log.info("All symbols in header: FAIL") + for problem in problems: + self.log.info(str(problem) + "\n") else: - self.log.info("Names of {}: PASS".format(name)) + self.log.info("All symbols in header: PASS") + + return len(problems) + + def check_match_pattern(self, group_to_check, check_pattern): + problems = [] + for item_match in self.parse_result[group_to_check]: + if not re.match(check_pattern, item_match.name): + problems.append(PatternMismatch(check_pattern, item_match)) + + if problems: + self.set_return_code(1) + self.log.info("Naming patterns of {}: FAIL".format(group_to_check)) + for problem in problems: + self.log.info(str(problem) + "\n") + else: + self.log.info("Naming patterns of {}: PASS".format(group_to_check)) + + return len(problems) def check_for_typos(self): - bad_names = [] - all_caps_names = list(set( - [x[0] for x in self.actual_macros + self.enum_consts] + problems = [] + all_caps_names = list(set([ + match.name for match + in self.parse_result["macros"] + self.parse_result["enum_consts"]] )) - for name in self.MBED_names: - if name[0] not in all_caps_names: - if not re.search(self.typo_check_pattern, name[0]): - bad_names.append("{} - {}".format(name[0], name[1])) - if bad_names: + + TYPO_EXCLUSION = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$" + + for name_match in self.parse_result["mbed_names"]: + if name_match.name not in all_caps_names: + if not re.search(TYPO_EXCLUSION, name_match.name): + problems.append(Typo(name_match)) + + if problems: self.set_return_code(1) self.log.info("Likely typos: FAIL") - for name in bad_names: - self.log.info(name) + for problem in problems: + self.log.info(str(problem) + "\n") else: self.log.info("Likely typos: PASS") + + return len(problems) - def get_names_from_source_code(self): - self.log.info("Analysing source code...") - self.get_macros() - self.get_enum_consts() - self.get_identifiers() - self.get_symbols() - self.get_MBED_names() - self.actual_macros = list(set(self.macros) - set(self.identifiers)) - self.log.info("{} macros".format(len(self.macros))) - self.log.info("{} enum-consts".format(len(self.enum_consts))) - self.log.info("{} identifiers".format(len(self.identifiers))) - self.log.info("{} exported-symbols".format(len(self.symbols))) +def main(): + """ + Main function, parses command-line arguments. + """ - def check_names(self): - self.check_symbols_declared_in_header() - for group, check_pattern, name in [ - (self.actual_macros, self.macro_const_check_pattern, - "actual-macros"), - (self.enum_consts, self.macro_const_check_pattern, - "enum-consts"), - (self.identifiers, self.identifier_check_pattern, - "identifiers")]: - self.check_group(group, check_pattern, name) - self.check_for_typos() + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description=( + "This script confirms that the naming of all symbols and identifiers " + "in Mbed TLS are consistent with the house style and are also " + "self-consistent.\n\n" + "Expected to be run from the MbedTLS root directory.")) + parser.add_argument("-v", "--verbose", + action="store_true", + help="enable script debug outputs") + + args = parser.parse_args() -def run_main(): try: name_check = NameCheck() - name_check.get_names_from_source_code() - name_check.check_names() + name_check.setup_logger(verbose=args.verbose) + name_check.parse_names_in_source() + name_check.perform_checks() sys.exit(name_check.return_code) except Exception: traceback.print_exc() @@ -267,4 +529,4 @@ def run_main(): if __name__ == "__main__": - run_main() + main() From c1838937f1278bffa807313d46d43037cdad3899 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 19:52:09 +0100 Subject: [PATCH 06/68] Also check PSA: Python port of 2d9d6db60f5fd0a4993d90e47f39462647624ad6 Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 431bcbb5c..e14d140c4 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -32,7 +32,7 @@ import logging # Naming patterns to check against MACRO_PATTERN = r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$|^YOTTA_[0-9A-Z_]*[0-9A-Z]$" -IDENTIFIER_PATTERN = r"^mbedtls_[0-9a-z_]*[0-9a-z]$" +IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$" class Match(object): def __init__(self, filename, line, pos, name): @@ -377,11 +377,12 @@ class NameCheck(object): self.log.info("Parsing source code...") m_headers = self.get_files(os.path.join("include", "mbedtls")) + p_headers = self.get_files(os.path.join("include", "psa")) libraries = self.get_files("library") - all_macros = self.parse_macros(m_headers) + all_macros = self.parse_macros(m_headers + ["configs/config-default.h"]) enum_consts = self.parse_enum_consts(m_headers) - identifiers = self.parse_identifiers(m_headers) + identifiers = self.parse_identifiers(m_headers + p_headers) symbols = self.parse_symbols() mbed_names = self.parse_MBED_names(m_headers, libraries) From ed91cf003a07dfaec7dcb66bd4e0eb412c848671 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 19:52:43 +0100 Subject: [PATCH 07/68] Remove Yotta: Python port of 3ad2efdc82a3d15f373b9d12e6764efec3577b55 Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index e14d140c4..9467ec47c 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -31,7 +31,7 @@ import subprocess import logging # Naming patterns to check against -MACRO_PATTERN = r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$|^YOTTA_[0-9A-Z_]*[0-9A-Z]$" +MACRO_PATTERN = r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$" IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$" class Match(object): From bb7dca495fca9f2750529b40b99019417a49b849 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 19:57:58 +0100 Subject: [PATCH 08/68] Work with PSA constants: Python port of 03091d1114450dd19a10215094682f14761540d9 Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 9467ec47c..03b6a5803 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -31,7 +31,7 @@ import subprocess import logging # Naming patterns to check against -MACRO_PATTERN = r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$" +MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$" IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$" class Match(object): @@ -159,20 +159,19 @@ class NameCheck(object): return macros - def parse_MBED_names(self, header_files, library_files): + def parse_MBED_names(self, files): """ Parse all words in the file that begin with MBED. Includes macros. Args: - header_files: A list of filepaths to look through. - library_files: A list of filepaths to look through. + files: A list of filepaths to look through. Returns: A list of Match objects for words beginning with MBED. """ MBED_names = [] - for filename in header_files + library_files: + for filename in files: with open(filename, "r") as fp: for line in fp: for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line): @@ -380,11 +379,12 @@ class NameCheck(object): p_headers = self.get_files(os.path.join("include", "psa")) libraries = self.get_files("library") - all_macros = self.parse_macros(m_headers + ["configs/config-default.h"]) + all_macros = self.parse_macros( + m_headers + p_headers) enum_consts = self.parse_enum_consts(m_headers) identifiers = self.parse_identifiers(m_headers + p_headers) symbols = self.parse_symbols() - mbed_names = self.parse_MBED_names(m_headers, libraries) + mbed_names = self.parse_MBED_names(m_headers + p_headers + libraries) # Remove identifier macros like mbedtls_printf or mbedtls_calloc macros = list(set(all_macros) - set(identifiers)) From fa950ae3448effe05031428423dbcca91d356758 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:03:44 +0100 Subject: [PATCH 09/68] Look in 3rdparty: Python port of 8a0f5bb3c11196a5bc0df6393a47e56c40adb7ac Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 03b6a5803..52854858d 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -377,14 +377,19 @@ class NameCheck(object): m_headers = self.get_files(os.path.join("include", "mbedtls")) p_headers = self.get_files(os.path.join("include", "psa")) - libraries = self.get_files("library") + t_headers = ["3rdparty/everest/include/everest/everest.h", + "3rdparty/everest/include/everest/x25519.h"] + libraries = self.get_files("library") + [ + "3rdparty/everest/library/everest.c", + "3rdparty/everest/library/x25519.c"] all_macros = self.parse_macros( - m_headers + p_headers) - enum_consts = self.parse_enum_consts(m_headers) - identifiers = self.parse_identifiers(m_headers + p_headers) + m_headers + p_headers + t_headers) + enum_consts = self.parse_enum_consts(m_headers + t_headers) + identifiers = self.parse_identifiers(m_headers + p_headers + t_headers) symbols = self.parse_symbols() - mbed_names = self.parse_MBED_names(m_headers + p_headers + libraries) + mbed_names = self.parse_MBED_names( + m_headers + p_headers + t_headers + libraries) # Remove identifier macros like mbedtls_printf or mbedtls_calloc macros = list(set(all_macros) - set(identifiers)) From c763cc368fc27a3124a0af828153fd93bc5821a9 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:06:34 +0100 Subject: [PATCH 10/68] Check for double underscores: Python port of 712f7a804e391737b0e9d2593abe291f4ccb0303 Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 52854858d..35fda6114 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -469,6 +469,8 @@ class NameCheck(object): for item_match in self.parse_result[group_to_check]: if not re.match(check_pattern, item_match.name): problems.append(PatternMismatch(check_pattern, item_match)) + if re.match(r".*__.*", item_match.name): + problems.append(PatternMismatch("double underscore", item_match)) if problems: self.set_return_code(1) From 157444c24d84a0a7d05aeb926f1919b87c9ec342 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:10:45 +0100 Subject: [PATCH 11/68] Add library header files: Python port of 65a6fa3e2669cb02af5399d0f60b5bed3e62a9be Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 35fda6114..f494f7666 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -119,12 +119,12 @@ class NameCheck(object): if current_dir != root_dir: raise Exception("Must be run from Mbed TLS root") - def get_files(self, directory): + def get_files(self, extension, directory): filenames = [] for root, dirs, files in sorted(os.walk(directory)): for filename in sorted(files): if (filename not in self.excluded_files and - filename.endswith((".c", ".h"))): + filename.endswith("." + extension)): filenames.append(os.path.join(root, filename)) return filenames @@ -375,21 +375,22 @@ class NameCheck(object): """ self.log.info("Parsing source code...") - m_headers = self.get_files(os.path.join("include", "mbedtls")) - p_headers = self.get_files(os.path.join("include", "psa")) + m_headers = self.get_files("h", os.path.join("include", "mbedtls")) + p_headers = self.get_files("h", os.path.join("include", "psa")) t_headers = ["3rdparty/everest/include/everest/everest.h", "3rdparty/everest/include/everest/x25519.h"] - libraries = self.get_files("library") + [ + l_headers = self.get_files("h", "library") + libraries = self.get_files("c", "library") + [ "3rdparty/everest/library/everest.c", "3rdparty/everest/library/x25519.c"] all_macros = self.parse_macros( - m_headers + p_headers + t_headers) + m_headers + p_headers + t_headers + l_headers) enum_consts = self.parse_enum_consts(m_headers + t_headers) identifiers = self.parse_identifiers(m_headers + p_headers + t_headers) symbols = self.parse_symbols() mbed_names = self.parse_MBED_names( - m_headers + p_headers + t_headers + libraries) + m_headers + p_headers + t_headers + l_headers + libraries) # Remove identifier macros like mbedtls_printf or mbedtls_calloc macros = list(set(all_macros) - set(identifiers)) From e503d61b998205caa04c2a6b1e1604f969e00254 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:14:05 +0100 Subject: [PATCH 12/68] Remove 1.3 to 2.0 helpers: Python port of 7d48b2821808e964ab594462e419fbed0e015729 Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index f494f7666..46cb00e22 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -88,8 +88,7 @@ class NameCheck(object): self.log = None self.check_repo_path() self.return_code = 0 - self.excluded_files = ["compat-1.3.h"] - self.typo_check_pattern = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$" + self.excluded_files = ["bn_mul"] def set_return_code(self, return_code): if return_code > self.return_code: From c62b4084a2ae3977f21354da3b7d4dda781cc3bf Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:17:07 +0100 Subject: [PATCH 13/68] Per-line opt-out of typo check: Python port of b6837761815e1a8f6f475be4575824fc386a08dd Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 46cb00e22..f480a830e 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -173,6 +173,10 @@ class NameCheck(object): for filename in files: with open(filename, "r") as fp: for line in fp: + # Ignore any names that are deliberately opted-out + if re.search(r"// *no-check-names", line): + continue + for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line): MBED_names.append(Match( filename, From 062289c6578c75f04744cd69d96a883d44ae55f4 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:19:57 +0100 Subject: [PATCH 14/68] Invoke config.py instead of pl: Python port of 5d46f6a89b25603f0a77466c618213200c328510 Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index f480a830e..2bb1b0201 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -301,7 +301,7 @@ class NameCheck(object): "include/mbedtls/mbedtls_config.h.bak") try: subprocess.run( - ["perl", "scripts/config.pl", "full"], + ["perl", "scripts/config.py", "full"], encoding=sys.stdout.encoding, check=True ) From e77f699ed554b010b80f5f2acf5994772e6f77be Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:22:59 +0100 Subject: [PATCH 15/68] Exclude FStar and Hacl: Python port of 9b33e7d7d7426e3d7f27cd7d206765ae33e3e61f Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 2bb1b0201..2d1eb8359 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -348,6 +348,7 @@ class NameCheck(object): """ UNDEFINED_SYMBOL = r"^\S+: +U |^$|^\S+:$" VALID_SYMBOL = r"^\S+( [0-9A-Fa-f]+)* . _*(?P\w+)" + EXCLUSIONS = ("FStar", "Hacl") symbols = [] @@ -363,8 +364,8 @@ class NameCheck(object): for line in nm_output.splitlines(): if not re.match(UNDEFINED_SYMBOL, line): symbol = re.match(VALID_SYMBOL, line) - if symbol: - symbols.append(symbol.group('symbol')) + if symbol and not symbol.group("symbol").startswith(EXCLUSIONS): + symbols.append(symbol.group("symbol")) else: self.log.error(line) From 56e3a5caa6c7d16ddca230b09cf8e13d97712b52 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:29:42 +0100 Subject: [PATCH 16/68] Add test driver symbols: Python port of 7f13fa2454282b21930045a3f4f9a2835d80425e Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 2d1eb8359..c3da69e44 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -383,13 +383,14 @@ class NameCheck(object): p_headers = self.get_files("h", os.path.join("include", "psa")) t_headers = ["3rdparty/everest/include/everest/everest.h", "3rdparty/everest/include/everest/x25519.h"] + d_headers = self.get_files("h", os.path.join("tests", "include", "test", "drivers")) l_headers = self.get_files("h", "library") libraries = self.get_files("c", "library") + [ "3rdparty/everest/library/everest.c", "3rdparty/everest/library/x25519.c"] all_macros = self.parse_macros( - m_headers + p_headers + t_headers + l_headers) + m_headers + p_headers + t_headers + l_headers + d_headers) enum_consts = self.parse_enum_consts(m_headers + t_headers) identifiers = self.parse_identifiers(m_headers + p_headers + t_headers) symbols = self.parse_symbols() From 17220988dcb2ca4d75bec0b8cb0963fba7366a44 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:30:18 +0100 Subject: [PATCH 17/68] Parse identifiers from library headers: Python port of d9eee3b417c2e8f63dd10d835ab9a9472242c2ed Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index c3da69e44..5b8159681 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -392,7 +392,7 @@ class NameCheck(object): all_macros = self.parse_macros( m_headers + p_headers + t_headers + l_headers + d_headers) enum_consts = self.parse_enum_consts(m_headers + t_headers) - identifiers = self.parse_identifiers(m_headers + p_headers + t_headers) + identifiers = self.parse_identifiers(m_headers + p_headers + t_headers + l_headers) symbols = self.parse_symbols() mbed_names = self.parse_MBED_names( m_headers + p_headers + t_headers + l_headers + libraries) From 0fd48f793907f0b053334303074e42cdcc80eb5f Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:32:55 +0100 Subject: [PATCH 18/68] Python port of 7cc4c68eb63a24f9cbf814254cd537df819958e5 Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 5b8159681..96838f2f3 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -213,7 +213,7 @@ class NameCheck(object): state = 1 elif state is 1 and re.match(r"^}", line): state = 0 - elif state is 1: + elif state is 1 and not re.match(r"^#", line): enum_const = re.match(r"^\s*(?P\w+)", line) if enum_const: enum_consts.append(Match( From fe02684049e6036ffa1e3cd98570381a8710c0fc Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 20:34:24 +0100 Subject: [PATCH 19/68] Python port of f6643ccd90694ae99d05541990b78738a8444ab0 Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 96838f2f3..531ff3508 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -391,7 +391,7 @@ class NameCheck(object): all_macros = self.parse_macros( m_headers + p_headers + t_headers + l_headers + d_headers) - enum_consts = self.parse_enum_consts(m_headers + t_headers) + enum_consts = self.parse_enum_consts(m_headers + l_headers + t_headers) identifiers = self.parse_identifiers(m_headers + p_headers + t_headers + l_headers) symbols = self.parse_symbols() mbed_names = self.parse_MBED_names( From 6f38ab3bcac69624ad78ab152d0fd0dbe52662fb Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Thu, 5 Aug 2021 21:07:14 +0100 Subject: [PATCH 20/68] Fix legacy troublesome regex Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 531ff3508..0d14429f2 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -237,7 +237,7 @@ class NameCheck(object): A list of (identifier, containing filename) """ EXCLUDED_DECLARATIONS = ( - r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)" + r"^(extern \"C\"|(typedef )?(struct|union|enum)( {)?$|};?$|$)" ) identifiers = [] @@ -258,19 +258,15 @@ class NameCheck(object): # Skip parsing this line if it's a line comment, or if it # begins with a preprocessor directive - if in_block_comment or re.match(r"(//|#)", line): + if in_block_comment or re.match(r"^(//|#)", line): continue if re.match(EXCLUDED_DECLARATIONS, line): continue - + identifier = re.search( # Matches: "mbedtls_aes_init(" - r"([a-zA-Z_][a-zA-Z0-9_]*)\(|" - # Matches: "(*f_rng)(" - r"\(\*(.+)\)\(|" - # TODO: unknown purpose - r"(\w+)\W*$", + r"([a-zA-Z_][a-zA-Z0-9_]*)\(", line ) @@ -281,7 +277,7 @@ class NameCheck(object): header_file, line, (identifier.start(), identifier.end()), - identifier.group(0))) + identifier.group(1))) return identifiers From 81528c058a9e23fe78cbebac903beb63f9eebc55 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 16:22:06 +0100 Subject: [PATCH 21/68] Add documentation, fix identifier parsing - Add documentation to all classes and functions that were not self-explanatory. - Fix the parsing of identifiers, so it now behaves identically to the original shell script. Detects the same amount of identifiers. - Fix macro parsing so MBEDTLS_PSA_ACCEL didn't error out - Reformat output to be comprehensible Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 467 +++++++++++++++++++++++------------ 1 file changed, 312 insertions(+), 155 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 0d14429f2..828702b17 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -17,7 +17,14 @@ """ This script confirms that the naming of all symbols and identifiers in Mbed TLS -are consistent with the house style and are also self-consistent. +are consistent with the house style and are also self-consistent. It performs +the following checks: + +- All exported and available symbols in the library object files, are explicitly + declared in the header files. +- All macros, constants, and identifiers (function names, struct names, etc) + follow the required pattern. +- Typo checking: All words that begin with MBED exist as macros or constants. """ import argparse @@ -30,27 +37,50 @@ import shutil import subprocess import logging -# Naming patterns to check against +# Naming patterns to check against. These are defined outside the NameCheck +# class for ease of modification. MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$" +CONSTANTS_PATTERN = MACRO_PATTERN IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$" class Match(object): + """ + A class representing a match, together with its found position. + + Fields: + * filename: the file that the match was in. + * line: the full line containing the match. + * pos: a tuple of (start, end) positions on the line where the match is. + * name: the match itself. + """ def __init__(self, filename, line, pos, name): self.filename = filename self.line = line self.pos = pos self.name = name - - def __str__(self): - return self.name class Problem(object): + """ + A parent class representing a form of static analysis error. + + Fields: + * textwrapper: a TextWrapper instance to format problems nicely. + """ def __init__(self): self.textwrapper = textwrap.TextWrapper() - self.textwrapper.initial_indent = " * " - self.textwrapper.subsequent_indent = " " + self.textwrapper.width = 80 + self.textwrapper.initial_indent = " * " + self.textwrapper.subsequent_indent = " " class SymbolNotInHeader(Problem): + """ + A problem that occurs when an exported/available symbol in the object file + is not explicitly declared in header files. Created with + NameCheck.check_symbols_declared_in_header() + + Fields: + * symbol_name: the name of the symbol. + """ def __init__(self, symbol_name): self.symbol_name = symbol_name Problem.__init__(self) @@ -62,21 +92,36 @@ class SymbolNotInHeader(Problem): .format(self.symbol_name)) class PatternMismatch(Problem): + """ + A problem that occurs when something doesn't match the expected pattern. + Created with NameCheck.check_match_pattern() + + Fields: + * pattern: the expected regex pattern + * match: the Match object in question + """ def __init__(self, pattern, match): self.pattern = pattern self.match = match Problem.__init__(self) - + def __str__(self): return self.textwrapper.fill( "{0}: '{1}' does not match the required pattern '{2}'." .format(self.match.filename, self.match.name, self.pattern)) class Typo(Problem): + """ + A problem that occurs when a word using MBED doesn't appear to be defined as + constants nor enum values. Created with NameCheck.check_for_typos() + + Fields: + * match: the Match object of the MBED name in question. + """ def __init__(self, match): self.match = match Problem.__init__(self) - + def __str__(self): return self.textwrapper.fill( "{0}: '{1}' looks like a typo. It was not found in any macros or " @@ -84,11 +129,15 @@ class Typo(Problem): .format(self.match.filename, self.match.name)) class NameCheck(object): + """ + Representation of the core name checking operation performed by this script. + Shares a common logger, common excluded filenames, and a shared return_code. + """ def __init__(self): self.log = None self.check_repo_path() self.return_code = 0 - self.excluded_files = ["bn_mul"] + self.excluded_files = ["bn_mul", "compat-2.x.h"] def set_return_code(self, return_code): if return_code > self.return_code: @@ -97,7 +146,7 @@ class NameCheck(object): def setup_logger(self, verbose=False): """ Set up a logger and set the change the default logging level from - WARNING to INFO. Loggers are better than print statements since their + WARNING to INFO. Loggers are better than print statements since their verbosity can be controlled. """ self.log = logging.getLogger() @@ -119,6 +168,16 @@ class NameCheck(object): raise Exception("Must be run from Mbed TLS root") def get_files(self, extension, directory): + """ + Get all files that end with .extension in the specified directory + recursively. + + Args: + * extension: the file extension to search for, without the dot + * directory: the directory to recursively search for + + Returns a List of relative filepaths. + """ filenames = [] for root, dirs, files in sorted(os.walk(directory)): for filename in sorted(files): @@ -127,15 +186,65 @@ class NameCheck(object): filenames.append(os.path.join(root, filename)) return filenames + def parse_names_in_source(self): + """ + Calls each parsing function to retrieve various elements of the code, + together with their source location. Puts the parsed values in the + internal variable self.parse_result. + """ + self.log.info("Parsing source code...") + + m_headers = self.get_files("h", os.path.join("include", "mbedtls")) + p_headers = self.get_files("h", os.path.join("include", "psa")) + t_headers = ["3rdparty/everest/include/everest/everest.h", + "3rdparty/everest/include/everest/x25519.h"] + d_headers = self.get_files("h", os.path.join("tests", "include", "test", "drivers")) + l_headers = self.get_files("h", "library") + libraries = self.get_files("c", "library") + [ + "3rdparty/everest/library/everest.c", + "3rdparty/everest/library/x25519.c"] + + all_macros = self.parse_macros( + m_headers + p_headers + t_headers + l_headers + d_headers) + enum_consts = self.parse_enum_consts( + m_headers + l_headers + t_headers) + identifiers = self.parse_identifiers( + m_headers + p_headers + t_headers + l_headers) + mbed_names = self.parse_MBED_names( + m_headers + p_headers + t_headers + l_headers + libraries) + symbols = self.parse_symbols() + + # Remove identifier macros like mbedtls_printf or mbedtls_calloc + identifiers_justname = [x.name for x in identifiers] + actual_macros = [] + for macro in all_macros: + if macro.name not in identifiers_justname: + actual_macros.append(macro) + + self.log.debug("Found:") + self.log.debug(" {} Macros".format(len(all_macros))) + self.log.debug(" {} Non-identifier Macros".format(len(actual_macros))) + self.log.debug(" {} Enum Constants".format(len(enum_consts))) + self.log.debug(" {} Identifiers".format(len(identifiers))) + self.log.debug(" {} Exported Symbols".format(len(symbols))) + self.log.info("Analysing...") + + self.parse_result = { + "macros": actual_macros, + "enum_consts": enum_consts, + "identifiers": identifiers, + "symbols": symbols, + "mbed_names": mbed_names + } + def parse_macros(self, header_files): """ Parse all macros defined by #define preprocessor directives. Args: - header_files: A list of filepaths to look through. - - Returns: - A list of Match objects for the macros. + * header_files: A List of filepaths to look through. + + Returns a List of Match objects for the found macros. """ MACRO_REGEX = r"#define (?P\w+)" NON_MACROS = ( @@ -147,36 +256,36 @@ class NameCheck(object): for header_file in header_files: with open(header_file, "r") as header: for line in header: - macro = re.search(MACRO_REGEX, line) - if (macro and - not macro.group("macro").startswith(NON_MACROS)): - macros.append(Match( - header_file, - line, - (macro.start(), macro.end()), - macro.group("macro"))) + for macro in re.finditer(MACRO_REGEX, line): + if not macro.group("macro").startswith(NON_MACROS): + macros.append(Match( + header_file, + line, + (macro.start(), macro.end()), + macro.group("macro"))) return macros def parse_MBED_names(self, files): """ Parse all words in the file that begin with MBED. Includes macros. + There have been typos of TLS, hence the broader check than MBEDTLS. Args: - files: A list of filepaths to look through. - - Returns: - A list of Match objects for words beginning with MBED. + * files: a List of filepaths to look through. + + Returns a List of Match objects for words beginning with MBED. """ MBED_names = [] - + for filename in files: with open(filename, "r") as fp: for line in fp: - # Ignore any names that are deliberately opted-out - if re.search(r"// *no-check-names", line): + # Ignore any names that are deliberately opted-out or in + # legacy error directives + if re.search(r"// *no-check-names|#error", line): continue - + for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line): MBED_names.append(Match( filename, @@ -192,16 +301,18 @@ class NameCheck(object): Parse all enum value constants that are declared. Args: - header_files: A list of filepaths to look through. + * header_files: A List of filepaths to look through. - Returns: - A list of (enum constants, containing filename). + Returns a List of Match objects for the findings. """ enum_consts = [] for header_file in header_files: # Emulate a finite state machine to parse enum declarations. + # 0 = not in enum + # 1 = inside enum + # 2 = almost inside enum state = 0 with open(header_file, "r") as header: for line in header: @@ -221,23 +332,28 @@ class NameCheck(object): line, (enum_const.start(), enum_const.end()), enum_const.group("enum_const"))) - + return enum_consts def parse_identifiers(self, header_files): """ Parse all lines of a header where a function identifier is declared, - based on some huersitics. Assumes every line that is not a comment or a - preprocessor directive contains some identifier. + based on some huersitics. Highly dependent on formatting style. Args: - header_files: A list of filepaths to look through. - - Returns: - A list of (identifier, containing filename) + * header_files: A List of filepaths to look through. + + Returns a List of Match objects with identifiers. """ - EXCLUDED_DECLARATIONS = ( - r"^(extern \"C\"|(typedef )?(struct|union|enum)( {)?$|};?$|$)" + EXCLUDED_LINES = ( + r"^(" + r"extern \"C\"|" + r"(typedef )?(struct|union|enum)( {)?$|" + r"};?$|" + r"$|" + r"//|" + r"#" + r")" ) identifiers = [] @@ -245,39 +361,69 @@ class NameCheck(object): for header_file in header_files: with open(header_file, "r") as header: in_block_comment = False + previous_line = None for line in header: - # Skip parsing this line if it begins or ends a block - # comment, and set the state machine's state. + # Skip parsing this line if a block comment ends on it, + # but don't skip if it has just started -- there is a chance + # it ends on the same line. if re.search(r"/\*", line): - in_block_comment = True - continue - elif re.search(r"\*/", line) and in_block_comment: - in_block_comment = False - continue - - # Skip parsing this line if it's a line comment, or if it - # begins with a preprocessor directive - if in_block_comment or re.match(r"^(//|#)", line): + in_block_comment = not in_block_comment + if re.search(r"\*/", line): + in_block_comment = not in_block_comment continue - if re.match(EXCLUDED_DECLARATIONS, line): + if in_block_comment: + previous_line = None + continue + + if re.match(EXCLUDED_LINES, line): + previous_line = None + continue + + # Match "^something something$", with optional inline/static + # This *might* be a function with its argument brackets on + # the next line, or a struct declaration, so keep note of it + if re.match( + r"(inline |static |typedef )*\w+ \w+$", + line): + previous_line = line + continue + + # If previous line seemed to start an unfinished declaration + # (as above), and this line begins with a bracket, concat + # them and treat them as one line. + if previous_line and re.match(" *[\({]", line): + line = previous_line.strip() + line.strip() + previous_line = None + + # Skip parsing if line has a space in front = hueristic to + # skip function argument lines (highly subject to formatting + # changes) + if line[0] == " ": continue identifier = re.search( - # Matches: "mbedtls_aes_init(" - r"([a-zA-Z_][a-zA-Z0-9_]*)\(", + # Match something( + r".* \**(\w+)\(|" + # Match (*something)( + r".*\( *\* *(\w+) *\) *\(|" + # Match names of named data structures + r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|" + # Match names of typedef instances, after closing bracket + r"}? *(\w+)[;[].*", line ) if identifier: + # Find the group that matched, and append it for group in identifier.groups(): if group: identifiers.append(Match( header_file, line, (identifier.start(), identifier.end()), - identifier.group(1))) + group)) return identifiers @@ -285,19 +431,23 @@ class NameCheck(object): """ Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509 object files using nm to retrieve the list of referenced symbols. - - Returns: - A list of unique symbols defined and used in the libraries. - """ + Exceptions thrown here are rethrown because they would be critical + errors that void several tests, and thus needs to halt the program. This + is explicitly done for clarity. + Returns a List of unique symbols defined and used in the libraries. + """ + self.log.info("Compiling...") symbols = [] # Back up the config and atomically compile with the full configratuion. shutil.copy("include/mbedtls/mbedtls_config.h", - "include/mbedtls/mbedtls_config.h.bak") + "include/mbedtls/mbedtls_config.h.bak") try: + # Use check=True in all subprocess calls so that failures are raised + # as exceptions and logged. subprocess.run( - ["perl", "scripts/config.py", "full"], + ["python3", "scripts/config.py", "full"], encoding=sys.stdout.encoding, check=True ) @@ -326,8 +476,8 @@ class NameCheck(object): check=True ) except subprocess.CalledProcessError as error: - self.log.error(error) self.set_return_code(2) + raise error finally: shutil.move("include/mbedtls/mbedtls_config.h.bak", "include/mbedtls/mbedtls_config.h") @@ -339,8 +489,11 @@ class NameCheck(object): Run nm to retrieve the list of referenced symbols in each object file. Does not return the position data since it is of no use. - Returns: - A list of unique symbols defined and used in any of the object files. + Args: + * object_files: a List of compiled object files to search through. + + Returns a List of unique symbols defined and used in any of the object + files. """ UNDEFINED_SYMBOL = r"^\S+: +U |^$|^\S+:$" VALID_SYMBOL = r"^\S+( [0-9A-Fa-f]+)* . _*(?P\w+)" @@ -348,6 +501,7 @@ class NameCheck(object): symbols = [] + # Gather all outputs of nm nm_output = "" for lib in object_files: nm_output += subprocess.run( @@ -357,6 +511,7 @@ class NameCheck(object): stderr=subprocess.STDOUT, check=True ).stdout + for line in nm_output.splitlines(): if not re.match(UNDEFINED_SYMBOL, line): symbol = re.match(VALID_SYMBOL, line) @@ -364,86 +519,49 @@ class NameCheck(object): symbols.append(symbol.group("symbol")) else: self.log.error(line) - + return symbols - def parse_names_in_source(self): - """ - Calls each parsing function to retrieve various elements of the code, - together with their source location. Puts the parsed values in the - internal variable self.parse_result. - """ - self.log.info("Parsing source code...") - - m_headers = self.get_files("h", os.path.join("include", "mbedtls")) - p_headers = self.get_files("h", os.path.join("include", "psa")) - t_headers = ["3rdparty/everest/include/everest/everest.h", - "3rdparty/everest/include/everest/x25519.h"] - d_headers = self.get_files("h", os.path.join("tests", "include", "test", "drivers")) - l_headers = self.get_files("h", "library") - libraries = self.get_files("c", "library") + [ - "3rdparty/everest/library/everest.c", - "3rdparty/everest/library/x25519.c"] - - all_macros = self.parse_macros( - m_headers + p_headers + t_headers + l_headers + d_headers) - enum_consts = self.parse_enum_consts(m_headers + l_headers + t_headers) - identifiers = self.parse_identifiers(m_headers + p_headers + t_headers + l_headers) - symbols = self.parse_symbols() - mbed_names = self.parse_MBED_names( - m_headers + p_headers + t_headers + l_headers + libraries) - - # Remove identifier macros like mbedtls_printf or mbedtls_calloc - macros = list(set(all_macros) - set(identifiers)) - - self.log.info("Found:") - self.log.info(" {} Macros".format(len(all_macros))) - self.log.info(" {} Enum Constants".format(len(enum_consts))) - self.log.info(" {} Identifiers".format(len(identifiers))) - self.log.info(" {} Exported Symbols".format(len(symbols))) - self.log.info("Analysing...") - - self.parse_result = { - "macros": macros, - "enum_consts": enum_consts, - "identifiers": identifiers, - "symbols": symbols, - "mbed_names": mbed_names - } - - def perform_checks(self): + def perform_checks(self, show_problems: True): """ Perform each check in order, output its PASS/FAIL status. Maintain an overall test status, and output that at the end. + + Args: + * show_problems: whether to show the problematic examples. """ + self.log.info("=============") problems = 0 - problems += self.check_symbols_declared_in_header() + problems += self.check_symbols_declared_in_header(show_problems) pattern_checks = [ ("macros", MACRO_PATTERN), - ("enum_consts", MACRO_PATTERN), + ("enum_consts", CONSTANTS_PATTERN), ("identifiers", IDENTIFIER_PATTERN)] for group, check_pattern in pattern_checks: - problems += self.check_match_pattern(group, check_pattern) + problems += self.check_match_pattern( + show_problems, group, check_pattern) - problems += self.check_for_typos() + problems += self.check_for_typos(show_problems) self.log.info("=============") if problems > 0: self.log.info("FAIL: {0} problem(s) to fix".format(str(problems))) + if not show_problems: + self.log.info("Remove --quiet to show the problems.") else: self.log.info("PASS") - def check_symbols_declared_in_header(self): + def check_symbols_declared_in_header(self, show_problems): """ Perform a check that all detected symbols in the library object files are properly declared in headers. - - Outputs to the logger the PASS/FAIL status, followed by the location of - problems. - Returns the number of problems that needs fixing. + Args: + * show_problems: whether to show the problematic examples. + + Returns the number of problems that need fixing. """ problems = [] for symbol in self.parse_result["symbols"]: @@ -452,39 +570,48 @@ class NameCheck(object): if symbol == identifier_match.name: found_symbol_declared = True break - + if not found_symbol_declared: problems.append(SymbolNotInHeader(symbol)) - if problems: - self.set_return_code(1) - self.log.info("All symbols in header: FAIL") - for problem in problems: - self.log.info(str(problem) + "\n") - else: - self.log.info("All symbols in header: PASS") - + self.output_check_result("All symbols in header", problems, show_problems) return len(problems) - def check_match_pattern(self, group_to_check, check_pattern): + + def check_match_pattern(self, show_problems, group_to_check, check_pattern): + """ + Perform a check that all items of a group conform to a regex pattern. + + Args: + * show_problems: whether to show the problematic examples. + * group_to_check: string key to index into self.parse_result. + * check_pattern: the regex to check against. + + Returns the number of problems that need fixing. + """ problems = [] for item_match in self.parse_result[group_to_check]: if not re.match(check_pattern, item_match.name): problems.append(PatternMismatch(check_pattern, item_match)) if re.match(r".*__.*", item_match.name): problems.append(PatternMismatch("double underscore", item_match)) - - if problems: - self.set_return_code(1) - self.log.info("Naming patterns of {}: FAIL".format(group_to_check)) - for problem in problems: - self.log.info(str(problem) + "\n") - else: - self.log.info("Naming patterns of {}: PASS".format(group_to_check)) - + + self.output_check_result( + "Naming patterns of {}".format(group_to_check), + problems, + show_problems) return len(problems) - def check_for_typos(self): + def check_for_typos(self, show_problems): + """ + Perform a check that all words in the soure code beginning with MBED are + either defined as macros, or as enum constants. + + Args: + * show_problems: whether to show the problematic examples. + + Returns the number of problems that need fixing. + """ problems = [] all_caps_names = list(set([ match.name for match @@ -494,23 +621,45 @@ class NameCheck(object): TYPO_EXCLUSION = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$" for name_match in self.parse_result["mbed_names"]: - if name_match.name not in all_caps_names: - if not re.search(TYPO_EXCLUSION, name_match.name): + found = name_match.name in all_caps_names + + # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the + # PSA driver, they will not exist as macros. However, they + # should still be checked for typos using the equivalent + # BUILTINs that exist. + if "MBEDTLS_PSA_ACCEL_" in name_match.name: + found = name_match.name.replace( + "MBEDTLS_PSA_ACCEL_", + "MBEDTLS_PSA_BUILTIN_") in all_caps_names + + if not found and not re.search(TYPO_EXCLUSION, name_match.name): problems.append(Typo(name_match)) + self.output_check_result("Likely typos", problems, show_problems) + return len(problems) + + def output_check_result(self, name, problems, show_problems): + """ + Write out the PASS/FAIL status of a performed check depending on whether + there were problems. + + Args: + * show_problems: whether to show the problematic examples. + """ if problems: self.set_return_code(1) - self.log.info("Likely typos: FAIL") - for problem in problems: - self.log.info(str(problem) + "\n") + self.log.info("{}: FAIL".format(name)) + if show_problems: + self.log.info("") + for problem in problems: + self.log.warn(str(problem) + "\n") else: - self.log.info("Likely typos: PASS") - - return len(problems) + self.log.info("{}: PASS".format(name)) def main(): """ - Main function, parses command-line arguments. + Perform argument parsing, and create an instance of NameCheck to begin the + core operation. """ parser = argparse.ArgumentParser( @@ -523,20 +672,28 @@ def main(): parser.add_argument("-v", "--verbose", action="store_true", - help="enable script debug outputs") - + help="show parse results") + + parser.add_argument("-q", "--quiet", + action="store_true", + help="hide unnecessary text and problematic examples") + args = parser.parse_args() try: name_check = NameCheck() name_check.setup_logger(verbose=args.verbose) name_check.parse_names_in_source() - name_check.perform_checks() + name_check.perform_checks(show_problems=not args.quiet) + sys.exit(name_check.return_code) + except subprocess.CalledProcessError as error: + traceback.print_exc() + print("!! Compilation faced a critical error, " + "check-names can't continue further.") sys.exit(name_check.return_code) except Exception: traceback.print_exc() sys.exit(2) - if __name__ == "__main__": main() From 201f9e85cad648db01895af6e79084a064bdab8a Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 16:36:54 +0100 Subject: [PATCH 22/68] Make use of -v to log some debug information Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 828702b17..e3863cfc8 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -141,6 +141,7 @@ class NameCheck(object): def set_return_code(self, return_code): if return_code > self.return_code: + self.log.debug("Setting new return code to {}".format(return_code)) self.return_code = return_code def setup_logger(self, verbose=False): @@ -252,7 +253,7 @@ class NameCheck(object): ) macros = [] - + self.log.debug("Looking for macros in {} files".format(len(header_files))) for header_file in header_files: with open(header_file, "r") as header: for line in header: @@ -277,7 +278,7 @@ class NameCheck(object): Returns a List of Match objects for words beginning with MBED. """ MBED_names = [] - + self.log.debug("Looking for MBED names in {} files".format(len(files))) for filename in files: with open(filename, "r") as fp: for line in fp: @@ -307,7 +308,7 @@ class NameCheck(object): """ enum_consts = [] - + self.log.debug("Looking for enum consts in {} files".format(len(header_files))) for header_file in header_files: # Emulate a finite state machine to parse enum declarations. # 0 = not in enum @@ -357,7 +358,7 @@ class NameCheck(object): ) identifiers = [] - + self.log.debug("Looking for identifiers in {} files".format(len(header_files))) for header_file in header_files: with open(header_file, "r") as header: in_block_comment = False @@ -593,6 +594,7 @@ class NameCheck(object): for item_match in self.parse_result[group_to_check]: if not re.match(check_pattern, item_match.name): problems.append(PatternMismatch(check_pattern, item_match)) + # Double underscore is a reserved identifier, never to be used if re.match(r".*__.*", item_match.name): problems.append(PatternMismatch("double underscore", item_match)) @@ -633,7 +635,7 @@ class NameCheck(object): "MBEDTLS_PSA_BUILTIN_") in all_caps_names if not found and not re.search(TYPO_EXCLUSION, name_match.name): - problems.append(Typo(name_match)) + problems.append(Typo(name_match)) self.output_check_result("Likely typos", problems, show_problems) return len(problems) From 5939a2a4de08e4c1be482532d329411af823294e Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 16:40:30 +0100 Subject: [PATCH 23/68] Check environment by verifying other dirs' existence Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index e3863cfc8..b9e028bdd 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -162,11 +162,10 @@ class NameCheck(object): Check that the current working directory is the project root, and throw an exception if not. """ - current_dir = os.path.realpath('.') - root_dir = os.path.dirname(os.path.dirname( - os.path.dirname(os.path.realpath(__file__)))) - if current_dir != root_dir: - raise Exception("Must be run from Mbed TLS root") + if (not os.path.isdir("include") or + not os.path.isdir("tests") or + not os.path.isdir("library")): + raise Exception("This script must be run from Mbed TLS root") def get_files(self, extension, directory): """ From d24e037dd1742af979d046cff4be0ea583f3791c Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 16:42:33 +0100 Subject: [PATCH 24/68] Warn user if files are excluded from search Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index b9e028bdd..406810b8c 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -193,6 +193,10 @@ class NameCheck(object): internal variable self.parse_result. """ self.log.info("Parsing source code...") + self.log.debug( + "The following files are excluded from the search: {}" + .format(str(self.excluded_files)) + ) m_headers = self.get_files("h", os.path.join("include", "mbedtls")) p_headers = self.get_files("h", os.path.join("include", "psa")) From 5c1acf2735c103c70251df2d389a152138ae0e8f Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 16:44:08 +0100 Subject: [PATCH 25/68] Match macros with spaces between # and define Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 406810b8c..7286cb751 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -250,7 +250,7 @@ class NameCheck(object): Returns a List of Match objects for the found macros. """ - MACRO_REGEX = r"#define (?P\w+)" + MACRO_REGEX = r"# *define +(?P\w+)" NON_MACROS = ( "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" ) From 13ecd996fc87e9da03fd0b4013e00f2be8e00394 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 16:56:52 +0100 Subject: [PATCH 26/68] Improve regex to adapt to flexible spaces Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 7286cb751..100001bee 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -320,16 +320,19 @@ class NameCheck(object): state = 0 with open(header_file, "r") as header: for line in header: - if state is 0 and re.match(r"^(typedef )?enum {", line): + # Match typedefs and brackets only when they are at the + # beginning of the line -- if they are indented, they might + # be sub-structures within structs, etc. + if state is 0 and re.match(r"^(typedef +)?enum +{", line): state = 1 - elif state is 0 and re.match(r"^(typedef )?enum", line): + elif state is 0 and re.match(r"^(typedef +)?enum", line): state = 2 elif state is 2 and re.match(r"^{", line): state = 1 elif state is 1 and re.match(r"^}", line): state = 0 - elif state is 1 and not re.match(r"^#", line): - enum_const = re.match(r"^\s*(?P\w+)", line) + elif state is 1 and not re.match(r" *#", line): + enum_const = re.match(r" *(?P\w+)", line) if enum_const: enum_consts.append(Match( header_file, @@ -351,9 +354,9 @@ class NameCheck(object): """ EXCLUDED_LINES = ( r"^(" - r"extern \"C\"|" - r"(typedef )?(struct|union|enum)( {)?$|" - r"};?$|" + r"extern +\"C\"|" + r"(typedef +)?(struct|union|enum)( *{)?$|" + r"} *;?$|" r"$|" r"//|" r"#" @@ -389,7 +392,7 @@ class NameCheck(object): # This *might* be a function with its argument brackets on # the next line, or a struct declaration, so keep note of it if re.match( - r"(inline |static |typedef )*\w+ \w+$", + r"(inline +|static +|typedef +)*\w+ +\w+$", line): previous_line = line continue @@ -408,7 +411,7 @@ class NameCheck(object): continue identifier = re.search( - # Match something( + # Match " something(" or " *something(". function calls. r".* \**(\w+)\(|" # Match (*something)( r".*\( *\* *(\w+) *\) *\(|" From 159255ab7bacc519b4364b8ff9f23ed0d9e88744 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 17:00:28 +0100 Subject: [PATCH 27/68] Document dependency on nm Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 100001bee..659eda903 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -17,11 +17,13 @@ """ This script confirms that the naming of all symbols and identifiers in Mbed TLS -are consistent with the house style and are also self-consistent. It performs -the following checks: +are consistent with the house style and are also self-consistent. It only runs +on Linux and macOS since it depends on nm. + +The script performs the following checks: - All exported and available symbols in the library object files, are explicitly - declared in the header files. + declared in the header files. This uses the nm command. - All macros, constants, and identifiers (function names, struct names, etc) follow the required pattern. - Typo checking: All words that begin with MBED exist as macros or constants. From a4e751257a27af4fd8277d37c4c85e852b3429c0 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 17:23:28 +0100 Subject: [PATCH 28/68] Output line number, line, and position of error Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 43 ++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 659eda903..5213bf4da 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -52,15 +52,23 @@ class Match(object): Fields: * filename: the file that the match was in. * line: the full line containing the match. + * line_no: the line number of the file. * pos: a tuple of (start, end) positions on the line where the match is. * name: the match itself. """ - def __init__(self, filename, line, pos, name): + def __init__(self, filename, line, line_no, pos, name): self.filename = filename self.line = line + self.line_no = line_no self.pos = pos self.name = name + def __str__(self): + return ( + " |\n" + + " | {}".format(self.line) + + " | " + self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^" + ) class Problem(object): """ A parent class representing a form of static analysis error. @@ -71,7 +79,7 @@ class Problem(object): def __init__(self): self.textwrapper = textwrap.TextWrapper() self.textwrapper.width = 80 - self.textwrapper.initial_indent = " * " + self.textwrapper.initial_indent = " > " self.textwrapper.subsequent_indent = " " class SymbolNotInHeader(Problem): @@ -109,8 +117,12 @@ class PatternMismatch(Problem): def __str__(self): return self.textwrapper.fill( - "{0}: '{1}' does not match the required pattern '{2}'." - .format(self.match.filename, self.match.name, self.pattern)) + "{0}:{1}: '{2}' does not match the required pattern '{3}'." + .format( + self.match.filename, + self.match.line_no, + self.match.name, + self.pattern)) + "\n" + str(self.match) class Typo(Problem): """ @@ -125,10 +137,15 @@ class Typo(Problem): Problem.__init__(self) def __str__(self): + match_len = self.match.pos[1] - self.match.pos[0] return self.textwrapper.fill( - "{0}: '{1}' looks like a typo. It was not found in any macros or " - "any enums. If this is not a typo, put //no-check-names after it." - .format(self.match.filename, self.match.name)) + "{0}:{1}: '{2}' looks like a typo. It was not found in any " + "macros or any enums. If this is not a typo, put " + "//no-check-names after it." + .format( + self.match.filename, + self.match.line_no, + self.match.name)) + "\n" + str(self.match) class NameCheck(object): """ @@ -261,12 +278,15 @@ class NameCheck(object): self.log.debug("Looking for macros in {} files".format(len(header_files))) for header_file in header_files: with open(header_file, "r") as header: + line_no = 0 for line in header: + line_no += 1 for macro in re.finditer(MACRO_REGEX, line): if not macro.group("macro").startswith(NON_MACROS): macros.append(Match( header_file, line, + line_no, (macro.start(), macro.end()), macro.group("macro"))) @@ -286,7 +306,9 @@ class NameCheck(object): self.log.debug("Looking for MBED names in {} files".format(len(files))) for filename in files: with open(filename, "r") as fp: + line_no = 0 for line in fp: + line_no += 1 # Ignore any names that are deliberately opted-out or in # legacy error directives if re.search(r"// *no-check-names|#error", line): @@ -296,6 +318,7 @@ class NameCheck(object): MBED_names.append(Match( filename, line, + line_no, (name.start(), name.end()), name.group(0) )) @@ -321,7 +344,9 @@ class NameCheck(object): # 2 = almost inside enum state = 0 with open(header_file, "r") as header: + line_no = 0 for line in header: + line_no += 1 # Match typedefs and brackets only when they are at the # beginning of the line -- if they are indented, they might # be sub-structures within structs, etc. @@ -339,6 +364,7 @@ class NameCheck(object): enum_consts.append(Match( header_file, line, + line_no, (enum_const.start(), enum_const.end()), enum_const.group("enum_const"))) @@ -369,10 +395,12 @@ class NameCheck(object): self.log.debug("Looking for identifiers in {} files".format(len(header_files))) for header_file in header_files: with open(header_file, "r") as header: + line_no = 0 in_block_comment = False previous_line = None for line in header: + line_no += 1 # Skip parsing this line if a block comment ends on it, # but don't skip if it has just started -- there is a chance # it ends on the same line. @@ -431,6 +459,7 @@ class NameCheck(object): identifiers.append(Match( header_file, line, + line_no, (identifier.start(), identifier.end()), group)) From 8f457cf2227151c7b1239167d586226e396fe337 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 17:54:58 +0100 Subject: [PATCH 29/68] Use enumerate() to get line numbers Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 5213bf4da..07e29458c 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -278,9 +278,7 @@ class NameCheck(object): self.log.debug("Looking for macros in {} files".format(len(header_files))) for header_file in header_files: with open(header_file, "r") as header: - line_no = 0 - for line in header: - line_no += 1 + for line_no, line in enumerate(header): for macro in re.finditer(MACRO_REGEX, line): if not macro.group("macro").startswith(NON_MACROS): macros.append(Match( @@ -306,9 +304,7 @@ class NameCheck(object): self.log.debug("Looking for MBED names in {} files".format(len(files))) for filename in files: with open(filename, "r") as fp: - line_no = 0 - for line in fp: - line_no += 1 + for line_no, line in enumerate(fp): # Ignore any names that are deliberately opted-out or in # legacy error directives if re.search(r"// *no-check-names|#error", line): @@ -344,9 +340,7 @@ class NameCheck(object): # 2 = almost inside enum state = 0 with open(header_file, "r") as header: - line_no = 0 - for line in header: - line_no += 1 + for line_no, line in enumerate(header): # Match typedefs and brackets only when they are at the # beginning of the line -- if they are indented, they might # be sub-structures within structs, etc. @@ -395,12 +389,10 @@ class NameCheck(object): self.log.debug("Looking for identifiers in {} files".format(len(header_files))) for header_file in header_files: with open(header_file, "r") as header: - line_no = 0 in_block_comment = False previous_line = None - for line in header: - line_no += 1 + for line_no, line in enumerate(header): # Skip parsing this line if a block comment ends on it, # but don't skip if it has just started -- there is a chance # it ends on the same line. From cfc9e4a275072d9e2e7dabc9a09bbdd005e1cdfa Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 20:02:32 +0100 Subject: [PATCH 30/68] Change identifier regex to better support multiline declarations Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 07e29458c..8ee50702f 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -410,20 +410,21 @@ class NameCheck(object): previous_line = None continue - # Match "^something something$", with optional inline/static - # This *might* be a function with its argument brackets on - # the next line, or a struct declaration, so keep note of it - if re.match( - r"(inline +|static +|typedef +)*\w+ +\w+$", - line): - previous_line = line + # If the line contains only space-separated alphanumeric + # characters (or underscore, asterisk, or, open bracket), + # and nothing else, high chance it's a declaration that + # continues on the next line + if re.match(r"^([\w\*\(]+\s+)+$", line): + if previous_line: + previous_line += " " + line + else: + previous_line = line continue # If previous line seemed to start an unfinished declaration - # (as above), and this line begins with a bracket, concat - # them and treat them as one line. - if previous_line and re.match(" *[\({]", line): - line = previous_line.strip() + line.strip() + # (as above), concat and treat them as one. + if previous_line: + line = previous_line.strip() + " " + line.strip() previous_line = None # Skip parsing if line has a space in front = hueristic to @@ -433,9 +434,15 @@ class NameCheck(object): continue identifier = re.search( - # Match " something(" or " *something(". function calls. - r".* \**(\w+)\(|" - # Match (*something)( + # Match " something(a" or " *something(a". Functions. + # Assumptions: + # - function definition from return type to one of its + # arguments is all on one line (enforced by the above + # previous_line concat) + # - function definition line only contains alphanumeric, + # asterisk, underscore, and open bracket + r".* \**(\w+) *\( *\w|" + # Match "(*something)(". Flexible with spaces. r".*\( *\* *(\w+) *\) *\(|" # Match names of named data structures r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|" From 25eeb7b935418fdbd4c6b63cd724e9eaff468703 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 21:27:59 +0100 Subject: [PATCH 31/68] Output subprocess stdout if it errored Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 8ee50702f..3310ae2a0 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -513,6 +513,7 @@ class NameCheck(object): check=True ) except subprocess.CalledProcessError as error: + self.log.debug(error.output) self.set_return_code(2) raise error finally: From 9e0e0e9980788278eb8d41bfccf16a69bf21a252 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 22:01:37 +0100 Subject: [PATCH 32/68] Fix Pylint-detected function argument syntax error Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 3310ae2a0..349e66bdf 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -560,7 +560,7 @@ class NameCheck(object): return symbols - def perform_checks(self, show_problems: True): + def perform_checks(self, show_problems=True): """ Perform each check in order, output its PASS/FAIL status. Maintain an overall test status, and output that at the end. From d93fa37aa67bb6dfa153292eb7ce47e178ebb0d6 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 23:05:55 +0100 Subject: [PATCH 33/68] Address all pylint issues to follow style Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 234 +++++++++++++++++------------------ 1 file changed, 113 insertions(+), 121 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 349e66bdf..786356990 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -45,21 +45,20 @@ MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$" CONSTANTS_PATTERN = MACRO_PATTERN IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$" -class Match(object): +class Match(): # pylint: disable=too-few-public-methods """ A class representing a match, together with its found position. Fields: * filename: the file that the match was in. * line: the full line containing the match. - * line_no: the line number of the file. - * pos: a tuple of (start, end) positions on the line where the match is. + * pos: a tuple of (line_no, start, end) positions on the file line where the + match is. * name: the match itself. """ - def __init__(self, filename, line, line_no, pos, name): + def __init__(self, filename, line, pos, name): self.filename = filename self.line = line - self.line_no = line_no self.pos = pos self.name = name @@ -67,9 +66,10 @@ class Match(object): return ( " |\n" + " | {}".format(self.line) + - " | " + self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^" + " | " + self.pos[1] * " " + (self.pos[2] - self.pos[1]) * "^" ) -class Problem(object): + +class Problem(): # pylint: disable=too-few-public-methods """ A parent class representing a form of static analysis error. @@ -82,7 +82,7 @@ class Problem(object): self.textwrapper.initial_indent = " > " self.textwrapper.subsequent_indent = " " -class SymbolNotInHeader(Problem): +class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods """ A problem that occurs when an exported/available symbol in the object file is not explicitly declared in header files. Created with @@ -101,7 +101,7 @@ class SymbolNotInHeader(Problem): "however it was not declared in any header files." .format(self.symbol_name)) -class PatternMismatch(Problem): +class PatternMismatch(Problem): # pylint: disable=too-few-public-methods """ A problem that occurs when something doesn't match the expected pattern. Created with NameCheck.check_match_pattern() @@ -120,11 +120,11 @@ class PatternMismatch(Problem): "{0}:{1}: '{2}' does not match the required pattern '{3}'." .format( self.match.filename, - self.match.line_no, + self.match.pos[0], self.match.name, self.pattern)) + "\n" + str(self.match) -class Typo(Problem): +class Typo(Problem): # pylint: disable=too-few-public-methods """ A problem that occurs when a word using MBED doesn't appear to be defined as constants nor enum values. Created with NameCheck.check_for_typos() @@ -137,26 +137,25 @@ class Typo(Problem): Problem.__init__(self) def __str__(self): - match_len = self.match.pos[1] - self.match.pos[0] return self.textwrapper.fill( "{0}:{1}: '{2}' looks like a typo. It was not found in any " "macros or any enums. If this is not a typo, put " "//no-check-names after it." .format( self.match.filename, - self.match.line_no, + self.match.pos[0], self.match.name)) + "\n" + str(self.match) -class NameCheck(object): +class NameCheck(): """ Representation of the core name checking operation performed by this script. Shares a common logger, common excluded filenames, and a shared return_code. """ def __init__(self): self.log = None - self.check_repo_path() self.return_code = 0 self.excluded_files = ["bn_mul", "compat-2.x.h"] + self.parse_result = {} def set_return_code(self, return_code): if return_code > self.return_code: @@ -176,16 +175,6 @@ class NameCheck(object): self.log.setLevel(logging.INFO) self.log.addHandler(logging.StreamHandler()) - def check_repo_path(self): - """ - Check that the current working directory is the project root, and throw - an exception if not. - """ - if (not os.path.isdir("include") or - not os.path.isdir("tests") or - not os.path.isdir("library")): - raise Exception("This script must be run from Mbed TLS root") - def get_files(self, extension, directory): """ Get all files that end with .extension in the specified directory @@ -198,7 +187,7 @@ class NameCheck(object): Returns a List of relative filepaths. """ filenames = [] - for root, dirs, files in sorted(os.walk(directory)): + for root, _, files in sorted(os.walk(directory)): for filename in sorted(files): if (filename not in self.excluded_files and filename.endswith("." + extension)): @@ -233,7 +222,7 @@ class NameCheck(object): m_headers + l_headers + t_headers) identifiers = self.parse_identifiers( m_headers + p_headers + t_headers + l_headers) - mbed_names = self.parse_MBED_names( + mbed_words = self.parse_mbed_words( m_headers + p_headers + t_headers + l_headers + libraries) symbols = self.parse_symbols() @@ -257,7 +246,7 @@ class NameCheck(object): "enum_consts": enum_consts, "identifiers": identifiers, "symbols": symbols, - "mbed_names": mbed_names + "mbed_words": mbed_words } def parse_macros(self, header_files): @@ -269,28 +258,29 @@ class NameCheck(object): Returns a List of Match objects for the found macros. """ - MACRO_REGEX = r"# *define +(?P\w+)" - NON_MACROS = ( + macro_regex = re.compile(r"# *define +(?P\w+)") + exclusions = ( "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" ) - macros = [] self.log.debug("Looking for macros in {} files".format(len(header_files))) + + macros = [] + for header_file in header_files: with open(header_file, "r") as header: for line_no, line in enumerate(header): - for macro in re.finditer(MACRO_REGEX, line): - if not macro.group("macro").startswith(NON_MACROS): + for macro in macro_regex.finditer(line): + if not macro.group("macro").startswith(exclusions): macros.append(Match( header_file, line, - line_no, - (macro.start(), macro.end()), + (line_no, macro.start(), macro.end()), macro.group("macro"))) return macros - def parse_MBED_names(self, files): + def parse_mbed_words(self, files): """ Parse all words in the file that begin with MBED. Includes macros. There have been typos of TLS, hence the broader check than MBEDTLS. @@ -300,26 +290,28 @@ class NameCheck(object): Returns a List of Match objects for words beginning with MBED. """ - MBED_names = [] + mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*") + exclusions = re.compile(r"// *no-check-names|#error") + self.log.debug("Looking for MBED names in {} files".format(len(files))) + + mbed_words = [] + for filename in files: with open(filename, "r") as fp: for line_no, line in enumerate(fp): - # Ignore any names that are deliberately opted-out or in - # legacy error directives - if re.search(r"// *no-check-names|#error", line): + if exclusions.search(line): continue - for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line): - MBED_names.append(Match( + for name in mbed_regex.finditer(line): + mbed_words.append(Match( filename, line, - line_no, - (name.start(), name.end()), + (line_no, name.start(), name.end()), name.group(0) )) - return MBED_names + return mbed_words def parse_enum_consts(self, header_files): """ @@ -330,9 +322,10 @@ class NameCheck(object): Returns a List of Match objects for the findings. """ + self.log.debug("Looking for enum consts in {} files".format(len(header_files))) enum_consts = [] - self.log.debug("Looking for enum consts in {} files".format(len(header_files))) + for header_file in header_files: # Emulate a finite state machine to parse enum declarations. # 0 = not in enum @@ -344,22 +337,21 @@ class NameCheck(object): # Match typedefs and brackets only when they are at the # beginning of the line -- if they are indented, they might # be sub-structures within structs, etc. - if state is 0 and re.match(r"^(typedef +)?enum +{", line): + if state == 0 and re.match(r"^(typedef +)?enum +{", line): state = 1 - elif state is 0 and re.match(r"^(typedef +)?enum", line): + elif state == 0 and re.match(r"^(typedef +)?enum", line): state = 2 - elif state is 2 and re.match(r"^{", line): + elif state == 2 and re.match(r"^{", line): state = 1 - elif state is 1 and re.match(r"^}", line): + elif state == 1 and re.match(r"^}", line): state = 0 - elif state is 1 and not re.match(r" *#", line): + elif state == 1 and not re.match(r" *#", line): enum_const = re.match(r" *(?P\w+)", line) if enum_const: enum_consts.append(Match( header_file, line, - line_no, - (enum_const.start(), enum_const.end()), + (line_no, enum_const.start(), enum_const.end()), enum_const.group("enum_const"))) return enum_consts @@ -374,23 +366,37 @@ class NameCheck(object): Returns a List of Match objects with identifiers. """ - EXCLUDED_LINES = ( - r"^(" - r"extern +\"C\"|" - r"(typedef +)?(struct|union|enum)( *{)?$|" - r"} *;?$|" - r"$|" - r"//|" - r"#" - r")" - ) + identifier_regex = re.compile( + # Match " something(a" or " *something(a". Functions. + # Assumptions: + # - function definition from return type to one of its arguments is + # all on one line (enforced by the previous_line concat below) + # - function definition line only contains alphanumeric, asterisk, + # underscore, and open bracket + r".* \**(\w+) *\( *\w|" + # Match "(*something)(". Flexible with spaces. + r".*\( *\* *(\w+) *\) *\(|" + # Match names of named data structures. + r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|" + # Match names of typedef instances, after closing bracket. + r"}? *(\w+)[;[].*") + exclusion_lines = re.compile(r"^(" + r"extern +\"C\"|" + r"(typedef +)?(struct|union|enum)( *{)?$|" + r"} *;?$|" + r"$|" + r"//|" + r"#" + r")") + + self.log.debug("Looking for identifiers in {} files".format(len(header_files))) identifiers = [] - self.log.debug("Looking for identifiers in {} files".format(len(header_files))) + for header_file in header_files: with open(header_file, "r") as header: in_block_comment = False - previous_line = None + previous_line = "" for line_no, line in enumerate(header): # Skip parsing this line if a block comment ends on it, @@ -403,11 +409,11 @@ class NameCheck(object): continue if in_block_comment: - previous_line = None + previous_line = "" continue - if re.match(EXCLUDED_LINES, line): - previous_line = None + if exclusion_lines.match(line): + previous_line = "" continue # If the line contains only space-separated alphanumeric @@ -415,17 +421,14 @@ class NameCheck(object): # and nothing else, high chance it's a declaration that # continues on the next line if re.match(r"^([\w\*\(]+\s+)+$", line): - if previous_line: - previous_line += " " + line - else: - previous_line = line + previous_line += line continue # If previous line seemed to start an unfinished declaration # (as above), concat and treat them as one. if previous_line: line = previous_line.strip() + " " + line.strip() - previous_line = None + previous_line = "" # Skip parsing if line has a space in front = hueristic to # skip function argument lines (highly subject to formatting @@ -433,23 +436,7 @@ class NameCheck(object): if line[0] == " ": continue - identifier = re.search( - # Match " something(a" or " *something(a". Functions. - # Assumptions: - # - function definition from return type to one of its - # arguments is all on one line (enforced by the above - # previous_line concat) - # - function definition line only contains alphanumeric, - # asterisk, underscore, and open bracket - r".* \**(\w+) *\( *\w|" - # Match "(*something)(". Flexible with spaces. - r".*\( *\* *(\w+) *\) *\(|" - # Match names of named data structures - r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|" - # Match names of typedef instances, after closing bracket - r"}? *(\w+)[;[].*", - line - ) + identifier = identifier_regex.search(line) if identifier: # Find the group that matched, and append it @@ -458,8 +445,7 @@ class NameCheck(object): identifiers.append(Match( header_file, line, - line_no, - (identifier.start(), identifier.end()), + (line_no, identifier.start(), identifier.end()), group)) return identifiers @@ -502,10 +488,8 @@ class NameCheck(object): # Perform object file analysis using nm symbols = self.parse_symbols_from_nm( ["library/libmbedcrypto.a", - "library/libmbedtls.a", - "library/libmbedx509.a"]) - - symbols.sort() + "library/libmbedtls.a", + "library/libmbedx509.a"]) subprocess.run( ["make", "clean"], @@ -533,9 +517,9 @@ class NameCheck(object): Returns a List of unique symbols defined and used in any of the object files. """ - UNDEFINED_SYMBOL = r"^\S+: +U |^$|^\S+:$" - VALID_SYMBOL = r"^\S+( [0-9A-Fa-f]+)* . _*(?P\w+)" - EXCLUSIONS = ("FStar", "Hacl") + nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$") + nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P\w+)") + nm_exclusions = ("FStar", "Hacl") symbols = [] @@ -551,9 +535,10 @@ class NameCheck(object): ).stdout for line in nm_output.splitlines(): - if not re.match(UNDEFINED_SYMBOL, line): - symbol = re.match(VALID_SYMBOL, line) - if symbol and not symbol.group("symbol").startswith(EXCLUSIONS): + if not nm_undefined_regex.match(line): + symbol = nm_valid_regex.match(line) + if (symbol and not symbol.group("symbol").startswith( + nm_exclusions)): symbols.append(symbol.group("symbol")) else: self.log.error(line) @@ -573,10 +558,9 @@ class NameCheck(object): problems += self.check_symbols_declared_in_header(show_problems) - pattern_checks = [ - ("macros", MACRO_PATTERN), - ("enum_consts", CONSTANTS_PATTERN), - ("identifiers", IDENTIFIER_PATTERN)] + pattern_checks = [("macros", MACRO_PATTERN), + ("enum_consts", CONSTANTS_PATTERN), + ("identifiers", IDENTIFIER_PATTERN)] for group, check_pattern in pattern_checks: problems += self.check_match_pattern( show_problems, group, check_pattern) @@ -602,6 +586,7 @@ class NameCheck(object): Returns the number of problems that need fixing. """ problems = [] + for symbol in self.parse_result["symbols"]: found_symbol_declared = False for identifier_match in self.parse_result["identifiers"]: @@ -628,6 +613,7 @@ class NameCheck(object): Returns the number of problems that need fixing. """ problems = [] + for item_match in self.parse_result[group_to_check]: if not re.match(check_pattern, item_match.name): problems.append(PatternMismatch(check_pattern, item_match)) @@ -652,14 +638,15 @@ class NameCheck(object): Returns the number of problems that need fixing. """ problems = [] - all_caps_names = list(set([ - match.name for match - in self.parse_result["macros"] + self.parse_result["enum_consts"]] - )) - TYPO_EXCLUSION = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$" + # Set comprehension, equivalent to a list comprehension inside set() + all_caps_names = { + match.name + for match + in self.parse_result["macros"] + self.parse_result["enum_consts"]} + typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$") - for name_match in self.parse_result["mbed_names"]: + for name_match in self.parse_result["mbed_words"]: found = name_match.name in all_caps_names # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the @@ -671,7 +658,7 @@ class NameCheck(object): "MBEDTLS_PSA_ACCEL_", "MBEDTLS_PSA_BUILTIN_") in all_caps_names - if not found and not re.search(TYPO_EXCLUSION, name_match.name): + if not found and not typo_exclusion.search(name_match.name): problems.append(Typo(name_match)) self.output_check_result("Likely typos", problems, show_problems) @@ -691,16 +678,25 @@ class NameCheck(object): if show_problems: self.log.info("") for problem in problems: - self.log.warn(str(problem) + "\n") + self.log.warning("{}\n".format(str(problem))) else: self.log.info("{}: PASS".format(name)) +def check_repo_path(): + """ + Check that the current working directory is the project root, and throw + an exception if not. + """ + if (not os.path.isdir("include") or + not os.path.isdir("tests") or + not os.path.isdir("library")): + raise Exception("This script must be run from Mbed TLS root") + def main(): """ Perform argument parsing, and create an instance of NameCheck to begin the core operation. """ - parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=( @@ -720,17 +716,13 @@ def main(): args = parser.parse_args() try: + check_repo_path() name_check = NameCheck() name_check.setup_logger(verbose=args.verbose) name_check.parse_names_in_source() name_check.perform_checks(show_problems=not args.quiet) sys.exit(name_check.return_code) - except subprocess.CalledProcessError as error: - traceback.print_exc() - print("!! Compilation faced a critical error, " - "check-names can't continue further.") - sys.exit(name_check.return_code) - except Exception: + except Exception: # pylint: disable=broad-except traceback.print_exc() sys.exit(2) From bcc3d99cc10561fd40f8c1eba585a9cf4f8bd123 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 23:14:58 +0100 Subject: [PATCH 34/68] Fix compatibiliy with Python 3.5 on the CI Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 786356990..b1835c680 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -471,7 +471,7 @@ class NameCheck(): # as exceptions and logged. subprocess.run( ["python3", "scripts/config.py", "full"], - encoding=sys.stdout.encoding, + universal_newlines=True, check=True ) my_environment = os.environ.copy() @@ -479,7 +479,7 @@ class NameCheck(): subprocess.run( ["make", "clean", "lib"], env=my_environment, - encoding=sys.stdout.encoding, + universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True @@ -493,7 +493,7 @@ class NameCheck(): subprocess.run( ["make", "clean"], - encoding=sys.stdout.encoding, + universal_newlines=True, check=True ) except subprocess.CalledProcessError as error: @@ -528,7 +528,7 @@ class NameCheck(): for lib in object_files: nm_output += subprocess.run( ["nm", "-og", lib], - encoding=sys.stdout.encoding, + universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True From 381fda8550212a6250a7dbb6b1a56300674cd9e6 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Fri, 6 Aug 2021 23:37:20 +0100 Subject: [PATCH 35/68] Print line number next to problem in check-names Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index b1835c680..5878bfa49 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -63,10 +63,15 @@ class Match(): # pylint: disable=too-few-public-methods self.name = name def __str__(self): + ln_str = str(self.pos[0]) + gutter_len = max(4, len(ln_str)) + gutter = (gutter_len - len(ln_str)) * " " + ln_str + underline = self.pos[1] * " " + (self.pos[2] - self.pos[1]) * "^" + return ( - " |\n" + - " | {}".format(self.line) + - " | " + self.pos[1] * " " + (self.pos[2] - self.pos[1]) * "^" + " {0} |\n".format(gutter_len * " ") + + " {0} | {1}".format(gutter, self.line) + + " {0} | {1}".format(gutter_len * " ", underline) ) class Problem(): # pylint: disable=too-few-public-methods From a083d15eddbe0c55b91ce5289dd672b6490452e0 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Sat, 7 Aug 2021 00:25:59 +0100 Subject: [PATCH 36/68] Specify file open encoding as utf-8 in check-names Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 5878bfa49..f47d7e6c5 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -273,7 +273,7 @@ class NameCheck(): macros = [] for header_file in header_files: - with open(header_file, "r") as header: + with open(header_file, "r", encoding="utf-8") as header: for line_no, line in enumerate(header): for macro in macro_regex.finditer(line): if not macro.group("macro").startswith(exclusions): @@ -303,7 +303,7 @@ class NameCheck(): mbed_words = [] for filename in files: - with open(filename, "r") as fp: + with open(filename, "r", encoding="utf-8") as fp: for line_no, line in enumerate(fp): if exclusions.search(line): continue @@ -337,7 +337,7 @@ class NameCheck(): # 1 = inside enum # 2 = almost inside enum state = 0 - with open(header_file, "r") as header: + with open(header_file, "r", encoding="utf-8") as header: for line_no, line in enumerate(header): # Match typedefs and brackets only when they are at the # beginning of the line -- if they are indented, they might @@ -399,7 +399,7 @@ class NameCheck(): identifiers = [] for header_file in header_files: - with open(header_file, "r") as header: + with open(header_file, "r", encoding="utf-8") as header: in_block_comment = False previous_line = "" From 12a7ecda5afa649d43a388dd280c116f5505d071 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Sat, 7 Aug 2021 00:40:29 +0100 Subject: [PATCH 37/68] Fix further pylint issues picked up by Travis CI Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index f47d7e6c5..81fd5ffb8 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -386,13 +386,13 @@ class NameCheck(): # Match names of typedef instances, after closing bracket. r"}? *(\w+)[;[].*") exclusion_lines = re.compile(r"^(" - r"extern +\"C\"|" - r"(typedef +)?(struct|union|enum)( *{)?$|" - r"} *;?$|" - r"$|" - r"//|" - r"#" - r")") + r"extern +\"C\"|" + r"(typedef +)?(struct|union|enum)( *{)?$|" + r"} *;?$|" + r"$|" + r"//|" + r"#" + r")") self.log.debug("Looking for identifiers in {} files".format(len(header_files))) @@ -524,7 +524,7 @@ class NameCheck(): """ nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$") nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P\w+)") - nm_exclusions = ("FStar", "Hacl") + exclusions = ("FStar", "Hacl") symbols = [] @@ -542,8 +542,7 @@ class NameCheck(): for line in nm_output.splitlines(): if not nm_undefined_regex.match(line): symbol = nm_valid_regex.match(line) - if (symbol and not symbol.group("symbol").startswith( - nm_exclusions)): + if (symbol and not symbol.group("symbol").startswith(exclusions)): symbols.append(symbol.group("symbol")) else: self.log.error(line) From b47b504418e146dc0fae6be266bbdfd71a2d9827 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Sat, 7 Aug 2021 00:42:54 +0100 Subject: [PATCH 38/68] Improve comments in parse_mbed_words() Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 81fd5ffb8..228ab4c64 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -287,14 +287,15 @@ class NameCheck(): def parse_mbed_words(self, files): """ - Parse all words in the file that begin with MBED. Includes macros. - There have been typos of TLS, hence the broader check than MBEDTLS. + Parse all words in the file that begin with MBED, in and out of macros, + comments, anything. Args: * files: a List of filepaths to look through. Returns a List of Match objects for words beginning with MBED. """ + # Typos of TLS are common, hence the broader check below than MBEDTLS. mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*") exclusions = re.compile(r"// *no-check-names|#error") From 55614b51f11d992266e552be9b0b41185d777a10 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Sat, 7 Aug 2021 01:00:18 +0100 Subject: [PATCH 39/68] Use --quiet to hide explanations and show only minimal necessary info Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 88 +++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 36 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 228ab4c64..509b4353a 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -71,7 +71,7 @@ class Match(): # pylint: disable=too-few-public-methods return ( " {0} |\n".format(gutter_len * " ") + " {0} | {1}".format(gutter, self.line) + - " {0} | {1}".format(gutter_len * " ", underline) + " {0} | {1}\n".format(gutter_len * " ", underline) ) class Problem(): # pylint: disable=too-few-public-methods @@ -96,11 +96,15 @@ class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods Fields: * symbol_name: the name of the symbol. """ - def __init__(self, symbol_name): + def __init__(self, symbol_name, quiet=False): self.symbol_name = symbol_name + self.quiet = quiet Problem.__init__(self) def __str__(self): + if self.quiet: + return "{0}".format(self.symbol_name) + return self.textwrapper.fill( "'{0}' was found as an available symbol in the output of nm, " "however it was not declared in any header files." @@ -115,12 +119,20 @@ class PatternMismatch(Problem): # pylint: disable=too-few-public-methods * pattern: the expected regex pattern * match: the Match object in question """ - def __init__(self, pattern, match): + def __init__(self, pattern, match, quiet=False): self.pattern = pattern self.match = match + self.quiet = quiet Problem.__init__(self) def __str__(self): + if self.quiet: + return ("{0}:{1}:{3}" + .format( + self.match.filename, + self.match.pos[0], + self.match.name)) + return self.textwrapper.fill( "{0}:{1}: '{2}' does not match the required pattern '{3}'." .format( @@ -137,11 +149,19 @@ class Typo(Problem): # pylint: disable=too-few-public-methods Fields: * match: the Match object of the MBED name in question. """ - def __init__(self, match): + def __init__(self, match, quiet=False): self.match = match + self.quiet = quiet Problem.__init__(self) def __str__(self): + if self.quiet: + return ("{0}:{1}:{2}" + .format( + self.match.filename, + self.match.pos[0], + self.match.name)) + return self.textwrapper.fill( "{0}:{1}: '{2}' looks like a typo. It was not found in any " "macros or any enums. If this is not a typo, put " @@ -550,43 +570,42 @@ class NameCheck(): return symbols - def perform_checks(self, show_problems=True): + def perform_checks(self, quiet=False): """ Perform each check in order, output its PASS/FAIL status. Maintain an overall test status, and output that at the end. Args: - * show_problems: whether to show the problematic examples. + * quiet: whether to hide detailed problem explanation. """ self.log.info("=============") problems = 0 - problems += self.check_symbols_declared_in_header(show_problems) + problems += self.check_symbols_declared_in_header(quiet) pattern_checks = [("macros", MACRO_PATTERN), ("enum_consts", CONSTANTS_PATTERN), ("identifiers", IDENTIFIER_PATTERN)] for group, check_pattern in pattern_checks: - problems += self.check_match_pattern( - show_problems, group, check_pattern) + problems += self.check_match_pattern(quiet, group, check_pattern) - problems += self.check_for_typos(show_problems) + problems += self.check_for_typos(quiet) self.log.info("=============") if problems > 0: self.log.info("FAIL: {0} problem(s) to fix".format(str(problems))) - if not show_problems: - self.log.info("Remove --quiet to show the problems.") + if quiet: + self.log.info("Remove --quiet to see explanations.") else: self.log.info("PASS") - def check_symbols_declared_in_header(self, show_problems): + def check_symbols_declared_in_header(self, quiet): """ Perform a check that all detected symbols in the library object files are properly declared in headers. Args: - * show_problems: whether to show the problematic examples. + * quiet: whether to hide detailed problem explanation. Returns the number of problems that need fixing. """ @@ -600,18 +619,18 @@ class NameCheck(): break if not found_symbol_declared: - problems.append(SymbolNotInHeader(symbol)) + problems.append(SymbolNotInHeader(symbol, quiet=quiet)) - self.output_check_result("All symbols in header", problems, show_problems) + self.output_check_result("All symbols in header", problems) return len(problems) - def check_match_pattern(self, show_problems, group_to_check, check_pattern): + def check_match_pattern(self, quiet, group_to_check, check_pattern): """ Perform a check that all items of a group conform to a regex pattern. Args: - * show_problems: whether to show the problematic examples. + * quiet: whether to hide detailed problem explanation. * group_to_check: string key to index into self.parse_result. * check_pattern: the regex to check against. @@ -624,21 +643,23 @@ class NameCheck(): problems.append(PatternMismatch(check_pattern, item_match)) # Double underscore is a reserved identifier, never to be used if re.match(r".*__.*", item_match.name): - problems.append(PatternMismatch("double underscore", item_match)) + problems.append(PatternMismatch( + "double underscore", + item_match, + quiet=quiet)) self.output_check_result( "Naming patterns of {}".format(group_to_check), - problems, - show_problems) + problems) return len(problems) - def check_for_typos(self, show_problems): + def check_for_typos(self, quiet): """ Perform a check that all words in the soure code beginning with MBED are either defined as macros, or as enum constants. Args: - * show_problems: whether to show the problematic examples. + * quiet: whether to hide detailed problem explanation. Returns the number of problems that need fixing. """ @@ -664,26 +685,21 @@ class NameCheck(): "MBEDTLS_PSA_BUILTIN_") in all_caps_names if not found and not typo_exclusion.search(name_match.name): - problems.append(Typo(name_match)) + problems.append(Typo(name_match, quiet=quiet)) - self.output_check_result("Likely typos", problems, show_problems) + self.output_check_result("Likely typos", problems) return len(problems) - def output_check_result(self, name, problems, show_problems): + def output_check_result(self, name, problems): """ Write out the PASS/FAIL status of a performed check depending on whether there were problems. - - Args: - * show_problems: whether to show the problematic examples. """ if problems: self.set_return_code(1) - self.log.info("{}: FAIL".format(name)) - if show_problems: - self.log.info("") - for problem in problems: - self.log.warning("{}\n".format(str(problem))) + self.log.info("{}: FAIL\n".format(name)) + for problem in problems: + self.log.warning(str(problem)) else: self.log.info("{}: PASS".format(name)) @@ -716,7 +732,7 @@ def main(): parser.add_argument("-q", "--quiet", action="store_true", - help="hide unnecessary text and problematic examples") + help="hide unnecessary text, explanations, and highlighs") args = parser.parse_args() @@ -725,7 +741,7 @@ def main(): name_check = NameCheck() name_check.setup_logger(verbose=args.verbose) name_check.parse_names_in_source() - name_check.perform_checks(show_problems=not args.quiet) + name_check.perform_checks(quiet=args.quiet) sys.exit(name_check.return_code) except Exception: # pylint: disable=broad-except traceback.print_exc() From fc54dfb0d662301de595fc5e8a9c83a8d766bd35 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Sat, 7 Aug 2021 17:18:28 +0100 Subject: [PATCH 40/68] Move check_repo_path into NameCheck as staticmethod Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index 509b4353a..b8a12885f 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -27,6 +27,9 @@ The script performs the following checks: - All macros, constants, and identifiers (function names, struct names, etc) follow the required pattern. - Typo checking: All words that begin with MBED exist as macros or constants. + +Returns 0 on success, 1 on test failure, and 2 if there is a script error or a +subprocess error. Must be run from Mbed TLS root. """ import argparse @@ -178,6 +181,7 @@ class NameCheck(): """ def __init__(self): self.log = None + self.check_repo_path() self.return_code = 0 self.excluded_files = ["bn_mul", "compat-2.x.h"] self.parse_result = {} @@ -187,6 +191,15 @@ class NameCheck(): self.log.debug("Setting new return code to {}".format(return_code)) self.return_code = return_code + @staticmethod + def check_repo_path(): + """ + Check that the current working directory is the project root, and throw + an exception if not. + """ + if not all(os.path.isdir(d) for d in ["include", "library", "tests"]): + raise Exception("This script must be run from Mbed TLS root") + def setup_logger(self, verbose=False): """ Set up a logger and set the change the default logging level from @@ -596,6 +609,8 @@ class NameCheck(): self.log.info("FAIL: {0} problem(s) to fix".format(str(problems))) if quiet: self.log.info("Remove --quiet to see explanations.") + else: + self.log.info("Use --quiet for minimal output.") else: self.log.info("PASS") @@ -703,16 +718,6 @@ class NameCheck(): else: self.log.info("{}: PASS".format(name)) -def check_repo_path(): - """ - Check that the current working directory is the project root, and throw - an exception if not. - """ - if (not os.path.isdir("include") or - not os.path.isdir("tests") or - not os.path.isdir("library")): - raise Exception("This script must be run from Mbed TLS root") - def main(): """ Perform argument parsing, and create an instance of NameCheck to begin the @@ -737,7 +742,6 @@ def main(): args = parser.parse_args() try: - check_repo_path() name_check = NameCheck() name_check.setup_logger(verbose=args.verbose) name_check.parse_names_in_source() From 6fececf192e653a37fe100ae931fcad874c0ee7b Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Sat, 7 Aug 2021 17:28:23 +0100 Subject: [PATCH 41/68] Comment why is used Signed-off-by: Yuto Takano --- tests/scripts/check-names.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/scripts/check-names.py b/tests/scripts/check-names.py index b8a12885f..5fe85b7bd 100755 --- a/tests/scripts/check-names.py +++ b/tests/scripts/check-names.py @@ -540,6 +540,8 @@ class NameCheck(): self.set_return_code(2) raise error finally: + # Put back the original config regardless of there being errors. + # Works also for keyboard interrupts. shutil.move("include/mbedtls/mbedtls_config.h.bak", "include/mbedtls/mbedtls_config.h") From 51efcb143da3e25a9bcda904c3830c69b5a69c51 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 9 Aug 2021 11:54:12 +0100 Subject: [PATCH 42/68] Rename check-names.py to check_names.py This is necessary to import check_names from other scripts, which will inevitably happen in the next few commits to implement the equivalent of `list-identifiers.sh --internal`. Signed-off-by: Yuto Takano --- tests/scripts/all.sh | 2 +- tests/scripts/{check-names.py => check_names.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/scripts/{check-names.py => check_names.py} (100%) diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh index efcb5543e..cbb337ffa 100755 --- a/tests/scripts/all.sh +++ b/tests/scripts/all.sh @@ -733,7 +733,7 @@ component_check_changelog () { component_check_names () { msg "Check: declared and exported names (builds the library)" # < 3s - record_status tests/scripts/check-names.py -v + record_status tests/scripts/check_names.py -v } component_check_test_cases () { diff --git a/tests/scripts/check-names.py b/tests/scripts/check_names.py similarity index 100% rename from tests/scripts/check-names.py rename to tests/scripts/check_names.py From 977e07f5c8248cc27c36035685ba82faee0ac01a Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 9 Aug 2021 11:56:15 +0100 Subject: [PATCH 43/68] Use glob in get_files(), call setup_logger on init glob is more flexible and simplifies the function arguments drastically. It is also much more intuitive to extend in the future when the filepaths need to be extended or changed. setup_logger had to be called as the first thing after instantiation, so this commit simplify makes it automatic. Several clarification comments are added too. Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 66 +++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 5fe85b7bd..c129def4e 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -33,6 +33,7 @@ subprocess error. Must be run from Mbed TLS root. """ import argparse +import glob import textwrap import os import sys @@ -177,13 +178,19 @@ class Typo(Problem): # pylint: disable=too-few-public-methods class NameCheck(): """ Representation of the core name checking operation performed by this script. - Shares a common logger, common excluded filenames, and a shared return_code. + Shares a common logger, and a shared return code. """ - def __init__(self): + def __init__(self, verbose=False): self.log = None self.check_repo_path() self.return_code = 0 + + self.setup_logger(verbose) + + # Globally excluded filenames self.excluded_files = ["bn_mul", "compat-2.x.h"] + + # Will contain the parse result after a comprehensive parse self.parse_result = {} def set_return_code(self, return_code): @@ -213,30 +220,30 @@ class NameCheck(): self.log.setLevel(logging.INFO) self.log.addHandler(logging.StreamHandler()) - def get_files(self, extension, directory): + def get_files(self, wildcard): """ - Get all files that end with .extension in the specified directory - recursively. + Get all files that match a UNIX-style wildcard recursively. While the + script is designed only for use on UNIX/macOS (due to nm), this function + would work fine on Windows even with forward slashes in the wildcard. Args: - * extension: the file extension to search for, without the dot - * directory: the directory to recursively search for + * wildcard: shell-style wildcards to match filepaths against. Returns a List of relative filepaths. """ - filenames = [] - for root, _, files in sorted(os.walk(directory)): - for filename in sorted(files): - if (filename not in self.excluded_files and - filename.endswith("." + extension)): - filenames.append(os.path.join(root, filename)) - return filenames + accumulator = [] + + for filepath in glob.iglob(wildcard, recursive=True): + if os.path.basename(filepath) not in self.excluded_files: + accumulator.append(filepath) + return accumulator def parse_names_in_source(self): """ - Calls each parsing function to retrieve various elements of the code, - together with their source location. Puts the parsed values in the - internal variable self.parse_result. + Comprehensive function to call each parsing function and retrieve + various elements of the code, together with their source location. + Puts the parsed values in the internal variable self.parse_result, so + they can be used from perform_checks(). """ self.log.info("Parsing source code...") self.log.debug( @@ -244,13 +251,13 @@ class NameCheck(): .format(str(self.excluded_files)) ) - m_headers = self.get_files("h", os.path.join("include", "mbedtls")) - p_headers = self.get_files("h", os.path.join("include", "psa")) + m_headers = self.get_files("include/mbedtls/*.h") + p_headers = self.get_files("include/psa/*.h") t_headers = ["3rdparty/everest/include/everest/everest.h", "3rdparty/everest/include/everest/x25519.h"] - d_headers = self.get_files("h", os.path.join("tests", "include", "test", "drivers")) - l_headers = self.get_files("h", "library") - libraries = self.get_files("c", "library") + [ + d_headers = self.get_files("tests/include/test/drivers/*.h") + l_headers = self.get_files("library/*.h") + libraries = self.get_files("library/*.c") + [ "3rdparty/everest/library/everest.c", "3rdparty/everest/library/x25519.c"] @@ -589,6 +596,7 @@ class NameCheck(): """ Perform each check in order, output its PASS/FAIL status. Maintain an overall test status, and output that at the end. + Assumes parse_names_in_source() was called before this. Args: * quiet: whether to hide detailed problem explanation. @@ -620,6 +628,7 @@ class NameCheck(): """ Perform a check that all detected symbols in the library object files are properly declared in headers. + Assumes parse_names_in_source() was called before this. Args: * quiet: whether to hide detailed problem explanation. @@ -645,6 +654,7 @@ class NameCheck(): def check_match_pattern(self, quiet, group_to_check, check_pattern): """ Perform a check that all items of a group conform to a regex pattern. + Assumes parse_names_in_source() was called before this. Args: * quiet: whether to hide detailed problem explanation. @@ -674,6 +684,7 @@ class NameCheck(): """ Perform a check that all words in the soure code beginning with MBED are either defined as macros, or as enum constants. + Assumes parse_names_in_source() was called before this. Args: * quiet: whether to hide detailed problem explanation. @@ -725,7 +736,7 @@ def main(): Perform argument parsing, and create an instance of NameCheck to begin the core operation. """ - parser = argparse.ArgumentParser( + argparser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=( "This script confirms that the naming of all symbols and identifiers " @@ -733,19 +744,18 @@ def main(): "self-consistent.\n\n" "Expected to be run from the MbedTLS root directory.")) - parser.add_argument("-v", "--verbose", + argparser.add_argument("-v", "--verbose", action="store_true", help="show parse results") - parser.add_argument("-q", "--quiet", + argparser.add_argument("-q", "--quiet", action="store_true", help="hide unnecessary text, explanations, and highlighs") - args = parser.parse_args() + args = argparser.parse_args() try: - name_check = NameCheck() - name_check.setup_logger(verbose=args.verbose) + name_check = NameCheck(verbose=args.verbose) name_check.parse_names_in_source() name_check.perform_checks(quiet=args.quiet) sys.exit(name_check.return_code) From 68d241211b4e18334f0e959204888a07c790cade Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 9 Aug 2021 12:10:31 +0100 Subject: [PATCH 44/68] Create list_internal_identifiers.py This is the equivalent of `list-identifiers.sh --internal`, which is useful for generating an exclusion file for ABI/API checking. Signed-off-by: Yuto Takano --- tests/scripts/list_internal_identifiers.py | 64 ++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100755 tests/scripts/list_internal_identifiers.py diff --git a/tests/scripts/list_internal_identifiers.py b/tests/scripts/list_internal_identifiers.py new file mode 100755 index 000000000..d58cb3f05 --- /dev/null +++ b/tests/scripts/list_internal_identifiers.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# +# Copyright The Mbed TLS Contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script generates a file called _identifiers that contains all Mbed TLS +identifiers found on internal headers. This is the equivalent of what was +previously `list-identifiers.sh --internal`, and is useful for generating an +exclusion file list for ABI/API checking, since we do not promise compatibility +for them. + +It uses the NameCeck class from check_names.py to perform the parsing. + +Returns 0 on success, 1 if there is a script error. +Must be run from Mbed TLS root. +""" + +import argparse +import traceback +import sys +from check_names import NameCheck + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description=( + "This script writes a list of parsed identifiers in internal " + "headers to \"_identifiers\". This is useful for generating a list " + "of names to exclude from ABI checking. ")) + + parser.parse_args() + + try: + name_check = NameCheck() + internal_headers = ( + name_check.get_files("include/mbedtls/*_internal.h") + + name_check.get_files("library/*.h") + ) + + result = name_check.parse_identifiers(internal_headers) + + identifiers = ["{}\n".format(match.name) for match in result] + with open("_identifiers", "w", encoding="utf-8") as f: + f.writelines(identifiers) + + except Exception: # pylint: disable=broad-except + traceback.print_exc() + sys.exit(1) + +if __name__ == "__main__": + main() From d70d446d6925f24872c751038431db66df47b47b Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 9 Aug 2021 12:45:51 +0100 Subject: [PATCH 45/68] Improve code style consistency in check_names.py Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 236 ++++++++++++++++++++--------------- 1 file changed, 133 insertions(+), 103 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index c129def4e..32eac3c74 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -81,11 +81,9 @@ class Match(): # pylint: disable=too-few-public-methods class Problem(): # pylint: disable=too-few-public-methods """ A parent class representing a form of static analysis error. - - Fields: - * textwrapper: a TextWrapper instance to format problems nicely. """ def __init__(self): + self.quiet = False self.textwrapper = textwrap.TextWrapper() self.textwrapper.width = 80 self.textwrapper.initial_indent = " > " @@ -100,9 +98,8 @@ class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods Fields: * symbol_name: the name of the symbol. """ - def __init__(self, symbol_name, quiet=False): + def __init__(self, symbol_name): self.symbol_name = symbol_name - self.quiet = quiet Problem.__init__(self) def __str__(self): @@ -123,19 +120,17 @@ class PatternMismatch(Problem): # pylint: disable=too-few-public-methods * pattern: the expected regex pattern * match: the Match object in question """ - def __init__(self, pattern, match, quiet=False): + def __init__(self, pattern, match): self.pattern = pattern self.match = match - self.quiet = quiet Problem.__init__(self) def __str__(self): if self.quiet: - return ("{0}:{1}:{3}" - .format( - self.match.filename, - self.match.pos[0], - self.match.name)) + return ( + "{0}:{1}:{3}" + .format(self.match.filename, self.match.pos[0], self.match.name) + ) return self.textwrapper.fill( "{0}:{1}: '{2}' does not match the required pattern '{3}'." @@ -143,7 +138,9 @@ class PatternMismatch(Problem): # pylint: disable=too-few-public-methods self.match.filename, self.match.pos[0], self.match.name, - self.pattern)) + "\n" + str(self.match) + self.pattern + ) + ) + "\n" + str(self.match) class Typo(Problem): # pylint: disable=too-few-public-methods """ @@ -153,27 +150,23 @@ class Typo(Problem): # pylint: disable=too-few-public-methods Fields: * match: the Match object of the MBED name in question. """ - def __init__(self, match, quiet=False): + def __init__(self, match): self.match = match - self.quiet = quiet Problem.__init__(self) def __str__(self): if self.quiet: - return ("{0}:{1}:{2}" - .format( - self.match.filename, - self.match.pos[0], - self.match.name)) + return ( + "{0}:{1}:{2}" + .format(self.match.filename, self.match.pos[0], self.match.name) + ) return self.textwrapper.fill( "{0}:{1}: '{2}' looks like a typo. It was not found in any " "macros or any enums. If this is not a typo, put " "//no-check-names after it." - .format( - self.match.filename, - self.match.pos[0], - self.match.name)) + "\n" + str(self.match) + .format(self.match.filename, self.match.pos[0], self.match.name) + ) + "\n" + str(self.match) class NameCheck(): """ @@ -184,7 +177,6 @@ class NameCheck(): self.log = None self.check_repo_path() self.return_code = 0 - self.setup_logger(verbose) # Globally excluded filenames @@ -193,11 +185,6 @@ class NameCheck(): # Will contain the parse result after a comprehensive parse self.parse_result = {} - def set_return_code(self, return_code): - if return_code > self.return_code: - self.log.debug("Setting new return code to {}".format(return_code)) - self.return_code = return_code - @staticmethod def check_repo_path(): """ @@ -207,6 +194,11 @@ class NameCheck(): if not all(os.path.isdir(d) for d in ["include", "library", "tests"]): raise Exception("This script must be run from Mbed TLS root") + def set_return_code(self, return_code): + if return_code > self.return_code: + self.log.debug("Setting new return code to {}".format(return_code)) + self.return_code = return_code + def setup_logger(self, verbose=False): """ Set up a logger and set the change the default logging level from @@ -247,28 +239,35 @@ class NameCheck(): """ self.log.info("Parsing source code...") self.log.debug( - "The following files are excluded from the search: {}" + "The following filenames are excluded from the search: {}" .format(str(self.excluded_files)) ) m_headers = self.get_files("include/mbedtls/*.h") p_headers = self.get_files("include/psa/*.h") - t_headers = ["3rdparty/everest/include/everest/everest.h", - "3rdparty/everest/include/everest/x25519.h"] + t_headers = [ + "3rdparty/everest/include/everest/everest.h", + "3rdparty/everest/include/everest/x25519.h" + ] d_headers = self.get_files("tests/include/test/drivers/*.h") l_headers = self.get_files("library/*.h") libraries = self.get_files("library/*.c") + [ "3rdparty/everest/library/everest.c", - "3rdparty/everest/library/x25519.c"] + "3rdparty/everest/library/x25519.c" + ] all_macros = self.parse_macros( - m_headers + p_headers + t_headers + l_headers + d_headers) + m_headers + p_headers + t_headers + l_headers + d_headers + ) enum_consts = self.parse_enum_consts( - m_headers + l_headers + t_headers) + m_headers + l_headers + t_headers + ) identifiers = self.parse_identifiers( - m_headers + p_headers + t_headers + l_headers) + m_headers + p_headers + t_headers + l_headers + ) mbed_words = self.parse_mbed_words( - m_headers + p_headers + t_headers + l_headers + libraries) + m_headers + p_headers + t_headers + l_headers + libraries + ) symbols = self.parse_symbols() # Remove identifier macros like mbedtls_printf or mbedtls_calloc @@ -279,7 +278,7 @@ class NameCheck(): actual_macros.append(macro) self.log.debug("Found:") - self.log.debug(" {} Macros".format(len(all_macros))) + self.log.debug(" {} Total Macros".format(len(all_macros))) self.log.debug(" {} Non-identifier Macros".format(len(actual_macros))) self.log.debug(" {} Enum Constants".format(len(enum_consts))) self.log.debug(" {} Identifiers".format(len(identifiers))) @@ -294,12 +293,12 @@ class NameCheck(): "mbed_words": mbed_words } - def parse_macros(self, header_files): + def parse_macros(self, files): """ Parse all macros defined by #define preprocessor directives. Args: - * header_files: A List of filepaths to look through. + * files: A List of filepaths to look through. Returns a List of Match objects for the found macros. """ @@ -308,20 +307,22 @@ class NameCheck(): "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" ) - self.log.debug("Looking for macros in {} files".format(len(header_files))) + self.log.debug("Looking for macros in {} files".format(len(files))) macros = [] - for header_file in header_files: + for header_file in files: with open(header_file, "r", encoding="utf-8") as header: for line_no, line in enumerate(header): for macro in macro_regex.finditer(line): - if not macro.group("macro").startswith(exclusions): - macros.append(Match( - header_file, - line, - (line_no, macro.start(), macro.end()), - macro.group("macro"))) + if macro.group("macro").startswith(exclusions): + continue + + macros.append(Match( + header_file, + line, + (line_no, macro.start(), macro.end()), + macro.group("macro"))) return macros @@ -359,20 +360,23 @@ class NameCheck(): return mbed_words - def parse_enum_consts(self, header_files): + def parse_enum_consts(self, files): """ Parse all enum value constants that are declared. Args: - * header_files: A List of filepaths to look through. + * files: A List of filepaths to look through. Returns a List of Match objects for the findings. """ - self.log.debug("Looking for enum consts in {} files".format(len(header_files))) + self.log.debug( + "Looking for enum consts in {} files" + .format(len(files)) + ) enum_consts = [] - for header_file in header_files: + for header_file in files: # Emulate a finite state machine to parse enum declarations. # 0 = not in enum # 1 = inside enum @@ -393,22 +397,26 @@ class NameCheck(): state = 0 elif state == 1 and not re.match(r" *#", line): enum_const = re.match(r" *(?P\w+)", line) - if enum_const: - enum_consts.append(Match( - header_file, - line, - (line_no, enum_const.start(), enum_const.end()), - enum_const.group("enum_const"))) + if not enum_const: + continue + + enum_consts.append(Match( + header_file, + line, + (line_no, enum_const.start(), enum_const.end()), + enum_const.group("enum_const"))) return enum_consts - def parse_identifiers(self, header_files): + def parse_identifiers(self, files): """ Parse all lines of a header where a function identifier is declared, based on some huersitics. Highly dependent on formatting style. + Note: .match() checks at the beginning of the string (implicit ^), while + .search() checks throughout. Args: - * header_files: A List of filepaths to look through. + * files: A List of filepaths to look through. Returns a List of Match objects with identifiers. """ @@ -425,23 +433,31 @@ class NameCheck(): # Match names of named data structures. r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|" # Match names of typedef instances, after closing bracket. - r"}? *(\w+)[;[].*") - exclusion_lines = re.compile(r"^(" - r"extern +\"C\"|" - r"(typedef +)?(struct|union|enum)( *{)?$|" - r"} *;?$|" - r"$|" - r"//|" - r"#" - r")") + r"}? *(\w+)[;[].*" + ) + exclusion_lines = re.compile( + r"^(" + r"extern +\"C\"|" + r"(typedef +)?(struct|union|enum)( *{)?$|" + r"} *;?$|" + r"$|" + r"//|" + r"#" + r")" + ) - self.log.debug("Looking for identifiers in {} files".format(len(header_files))) + self.log.debug( + "Looking for identifiers in {} files" + .format(len(files)) + ) identifiers = [] - for header_file in header_files: + for header_file in files: with open(header_file, "r", encoding="utf-8") as header: in_block_comment = False + # The previous line varibale is used for concatenating lines + # when identifiers are formatted and spread across multiple. previous_line = "" for line_no, line in enumerate(header): @@ -484,15 +500,19 @@ class NameCheck(): identifier = identifier_regex.search(line) - if identifier: - # Find the group that matched, and append it - for group in identifier.groups(): - if group: - identifiers.append(Match( - header_file, - line, - (line_no, identifier.start(), identifier.end()), - group)) + if not identifier: + continue + + # Find the group that matched, and append it + for group in identifier.groups(): + if not group: + continue + + identifiers.append(Match( + header_file, + line, + (line_no, identifier.start(), identifier.end()), + group)) return identifiers @@ -510,8 +530,10 @@ class NameCheck(): symbols = [] # Back up the config and atomically compile with the full configratuion. - shutil.copy("include/mbedtls/mbedtls_config.h", - "include/mbedtls/mbedtls_config.h.bak") + shutil.copy( + "include/mbedtls/mbedtls_config.h", + "include/mbedtls/mbedtls_config.h.bak" + ) try: # Use check=True in all subprocess calls so that failures are raised # as exceptions and logged. @@ -532,10 +554,11 @@ class NameCheck(): ) # Perform object file analysis using nm - symbols = self.parse_symbols_from_nm( - ["library/libmbedcrypto.a", - "library/libmbedtls.a", - "library/libmbedx509.a"]) + symbols = self.parse_symbols_from_nm([ + "library/libmbedcrypto.a", + "library/libmbedtls.a", + "library/libmbedx509.a" + ]) subprocess.run( ["make", "clean"], @@ -549,8 +572,10 @@ class NameCheck(): finally: # Put back the original config regardless of there being errors. # Works also for keyboard interrupts. - shutil.move("include/mbedtls/mbedtls_config.h.bak", - "include/mbedtls/mbedtls_config.h") + shutil.move( + "include/mbedtls/mbedtls_config.h.bak", + "include/mbedtls/mbedtls_config.h" + ) return symbols @@ -606,9 +631,11 @@ class NameCheck(): problems += self.check_symbols_declared_in_header(quiet) - pattern_checks = [("macros", MACRO_PATTERN), - ("enum_consts", CONSTANTS_PATTERN), - ("identifiers", IDENTIFIER_PATTERN)] + pattern_checks = [ + ("macros", MACRO_PATTERN), + ("enum_consts", CONSTANTS_PATTERN), + ("identifiers", IDENTIFIER_PATTERN) + ] for group, check_pattern in pattern_checks: problems += self.check_match_pattern(quiet, group, check_pattern) @@ -645,12 +672,11 @@ class NameCheck(): break if not found_symbol_declared: - problems.append(SymbolNotInHeader(symbol, quiet=quiet)) + problems.append(SymbolNotInHeader(symbol)) - self.output_check_result("All symbols in header", problems) + self.output_check_result(quiet, "All symbols in header", problems) return len(problems) - def check_match_pattern(self, quiet, group_to_check, check_pattern): """ Perform a check that all items of a group conform to a regex pattern. @@ -670,12 +696,10 @@ class NameCheck(): problems.append(PatternMismatch(check_pattern, item_match)) # Double underscore is a reserved identifier, never to be used if re.match(r".*__.*", item_match.name): - problems.append(PatternMismatch( - "double underscore", - item_match, - quiet=quiet)) + problems.append(PatternMismatch("double underscore", item_match)) self.output_check_result( + quiet, "Naming patterns of {}".format(group_to_check), problems) return len(problems) @@ -693,7 +717,7 @@ class NameCheck(): """ problems = [] - # Set comprehension, equivalent to a list comprehension inside set() + # Set comprehension, equivalent to a list comprehension wrapped by set() all_caps_names = { match.name for match @@ -713,20 +737,26 @@ class NameCheck(): "MBEDTLS_PSA_BUILTIN_") in all_caps_names if not found and not typo_exclusion.search(name_match.name): - problems.append(Typo(name_match, quiet=quiet)) + problems.append(Typo(name_match)) - self.output_check_result("Likely typos", problems) + self.output_check_result(quiet, "Likely typos", problems) return len(problems) - def output_check_result(self, name, problems): + def output_check_result(self, quiet, name, problems): """ Write out the PASS/FAIL status of a performed check depending on whether there were problems. + + Args: + * quiet: whether to hide detailed problem explanation. + * name: the name of the test + * problems: a List of encountered Problems """ if problems: self.set_return_code(1) self.log.info("{}: FAIL\n".format(name)) for problem in problems: + problem.quiet = quiet self.log.warning(str(problem)) else: self.log.info("{}: PASS".format(name)) From f005c3369ac17be7860e4256ff2d497354d12338 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 9 Aug 2021 13:56:36 +0100 Subject: [PATCH 46/68] Change variable name argparser to parser Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 32eac3c74..ce03b8a66 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -766,23 +766,26 @@ def main(): Perform argument parsing, and create an instance of NameCheck to begin the core operation. """ - argparser = argparse.ArgumentParser( + parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=( "This script confirms that the naming of all symbols and identifiers " "in Mbed TLS are consistent with the house style and are also " "self-consistent.\n\n" - "Expected to be run from the MbedTLS root directory.")) + "Expected to be run from the MbedTLS root directory.") + ) + parser.add_argument( + "-v", "--verbose", + action="store_true", + help="show parse results" + ) + parser.add_argument( + "-q", "--quiet", + action="store_true", + help="hide unnecessary text, explanations, and highlighs" + ) - argparser.add_argument("-v", "--verbose", - action="store_true", - help="show parse results") - - argparser.add_argument("-q", "--quiet", - action="store_true", - help="hide unnecessary text, explanations, and highlighs") - - args = argparser.parse_args() + args = parser.parse_args() try: name_check = NameCheck(verbose=args.verbose) From 8e9a219310b498465cbf172e239e81317850118f Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 9 Aug 2021 14:48:53 +0100 Subject: [PATCH 47/68] Improve ease of specifying which files to look in (check_names) - Instead of os.path.join, use glob patterns (supports Windows too) - Instead of creating the lists beforehand (which adds messiness), pass glob expessions to functions and let them memoise it. - Add support for excluding based on glob patterns, which isn't used now but could come in handy. Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 140 ++++++++++++--------- tests/scripts/list_internal_identifiers.py | 10 +- 2 files changed, 84 insertions(+), 66 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index ce03b8a66..37a8be325 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -179,8 +179,11 @@ class NameCheck(): self.return_code = 0 self.setup_logger(verbose) + # Memo for storing "glob expression": set(filepaths) + self.files = {} + # Globally excluded filenames - self.excluded_files = ["bn_mul", "compat-2.x.h"] + self.excluded_files = ["**/bn_mul", "**/compat-2.x.h"] # Will contain the parse result after a comprehensive parse self.parse_result = {} @@ -212,23 +215,46 @@ class NameCheck(): self.log.setLevel(logging.INFO) self.log.addHandler(logging.StreamHandler()) - def get_files(self, wildcard): + def get_files(self, include_wildcards, exclude_wildcards): """ - Get all files that match a UNIX-style wildcard recursively. While the - script is designed only for use on UNIX/macOS (due to nm), this function - would work fine on Windows even with forward slashes in the wildcard. + Get all files that match any of the UNIX-style wildcards. While the + check_names script is designed only for use on UNIX/macOS (due to nm), + this function alone would work fine on Windows even with forward slashes + in the wildcard. Args: - * wildcard: shell-style wildcards to match filepaths against. + * include_wildcards: a List of shell-style wildcards to match filepaths. + * exclude_wildacrds: a List of shell-style wildcards to exclude. Returns a List of relative filepaths. """ - accumulator = [] + accumulator = set() - for filepath in glob.iglob(wildcard, recursive=True): - if os.path.basename(filepath) not in self.excluded_files: - accumulator.append(filepath) - return accumulator + # exclude_wildcards may be None. Also, consider the global exclusions. + exclude_wildcards = (exclude_wildcards or []) + self.excluded_files + + # Perform set union on the glob results. Memoise individual sets. + for include_wildcard in include_wildcards: + if include_wildcard not in self.files: + self.files[include_wildcard] = set(glob.glob( + include_wildcard, + recursive=True + )) + + accumulator = accumulator.union(self.files[include_wildcard]) + + # Perform set difference to exclude. Also use the same memo since their + # behaviour is pretty much identical and it can benefit from the cache. + for exclude_wildcard in exclude_wildcards: + if exclude_wildcard not in self.files: + self.files[exclude_wildcard] = set(glob.glob( + exclude_wildcard, + recursive=True + )) + + accumulator = accumulator.difference(self.files[exclude_wildcard]) + + return list(accumulator) def parse_names_in_source(self): """ @@ -243,31 +269,37 @@ class NameCheck(): .format(str(self.excluded_files)) ) - m_headers = self.get_files("include/mbedtls/*.h") - p_headers = self.get_files("include/psa/*.h") - t_headers = [ + all_macros = self.parse_macros([ + "include/mbedtls/*.h", + "include/psa/*.h", + "library/*.h", + "tests/include/test/drivers/*.h", "3rdparty/everest/include/everest/everest.h", "3rdparty/everest/include/everest/x25519.h" - ] - d_headers = self.get_files("tests/include/test/drivers/*.h") - l_headers = self.get_files("library/*.h") - libraries = self.get_files("library/*.c") + [ + ]) + enum_consts = self.parse_enum_consts([ + "include/mbedtls/*.h", + "library/*.h", + "3rdparty/everest/include/everest/everest.h", + "3rdparty/everest/include/everest/x25519.h" + ]) + identifiers = self.parse_identifiers([ + "include/mbedtls/*.h", + "include/psa/*.h", + "library/*.h", + "3rdparty/everest/include/everest/everest.h", + "3rdparty/everest/include/everest/x25519.h" + ]) + mbed_words = self.parse_mbed_words([ + "include/mbedtls/*.h", + "include/psa/*.h", + "library/*.h", + "3rdparty/everest/include/everest/everest.h", + "3rdparty/everest/include/everest/x25519.h", + "library/*.c", "3rdparty/everest/library/everest.c", "3rdparty/everest/library/x25519.c" - ] - - all_macros = self.parse_macros( - m_headers + p_headers + t_headers + l_headers + d_headers - ) - enum_consts = self.parse_enum_consts( - m_headers + l_headers + t_headers - ) - identifiers = self.parse_identifiers( - m_headers + p_headers + t_headers + l_headers - ) - mbed_words = self.parse_mbed_words( - m_headers + p_headers + t_headers + l_headers + libraries - ) + ]) symbols = self.parse_symbols() # Remove identifier macros like mbedtls_printf or mbedtls_calloc @@ -284,7 +316,6 @@ class NameCheck(): self.log.debug(" {} Identifiers".format(len(identifiers))) self.log.debug(" {} Exported Symbols".format(len(symbols))) self.log.info("Analysing...") - self.parse_result = { "macros": actual_macros, "enum_consts": enum_consts, @@ -293,12 +324,13 @@ class NameCheck(): "mbed_words": mbed_words } - def parse_macros(self, files): + def parse_macros(self, include, exclude=None): """ Parse all macros defined by #define preprocessor directives. Args: - * files: A List of filepaths to look through. + * include: A List of glob expressions to look for files through. + * exclude: A List of glob expressions for excluding files. Returns a List of Match objects for the found macros. """ @@ -307,11 +339,9 @@ class NameCheck(): "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" ) - self.log.debug("Looking for macros in {} files".format(len(files))) - macros = [] - for header_file in files: + for header_file in self.get_files(include, exclude): with open(header_file, "r", encoding="utf-8") as header: for line_no, line in enumerate(header): for macro in macro_regex.finditer(line): @@ -326,13 +356,14 @@ class NameCheck(): return macros - def parse_mbed_words(self, files): + def parse_mbed_words(self, include, exclude=None): """ Parse all words in the file that begin with MBED, in and out of macros, comments, anything. Args: - * files: a List of filepaths to look through. + * include: A List of glob expressions to look for files through. + * exclude: A List of glob expressions for excluding files. Returns a List of Match objects for words beginning with MBED. """ @@ -340,11 +371,9 @@ class NameCheck(): mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*") exclusions = re.compile(r"// *no-check-names|#error") - self.log.debug("Looking for MBED names in {} files".format(len(files))) - mbed_words = [] - for filename in files: + for filename in self.get_files(include, exclude): with open(filename, "r", encoding="utf-8") as fp: for line_no, line in enumerate(fp): if exclusions.search(line): @@ -360,23 +389,19 @@ class NameCheck(): return mbed_words - def parse_enum_consts(self, files): + def parse_enum_consts(self, include, exclude=None): """ Parse all enum value constants that are declared. Args: - * files: A List of filepaths to look through. + * include: A List of glob expressions to look for files through. + * exclude: A List of glob expressions for excluding files. Returns a List of Match objects for the findings. """ - self.log.debug( - "Looking for enum consts in {} files" - .format(len(files)) - ) - enum_consts = [] - for header_file in files: + for header_file in self.get_files(include, exclude): # Emulate a finite state machine to parse enum declarations. # 0 = not in enum # 1 = inside enum @@ -408,7 +433,7 @@ class NameCheck(): return enum_consts - def parse_identifiers(self, files): + def parse_identifiers(self, include, exclude=None): """ Parse all lines of a header where a function identifier is declared, based on some huersitics. Highly dependent on formatting style. @@ -416,7 +441,8 @@ class NameCheck(): .search() checks throughout. Args: - * files: A List of filepaths to look through. + * include: A List of glob expressions to look for files through. + * exclude: A List of glob expressions for excluding files. Returns a List of Match objects with identifiers. """ @@ -445,15 +471,9 @@ class NameCheck(): r"#" r")" ) - - self.log.debug( - "Looking for identifiers in {} files" - .format(len(files)) - ) - identifiers = [] - for header_file in files: + for header_file in self.get_files(include, exclude): with open(header_file, "r", encoding="utf-8") as header: in_block_comment = False # The previous line varibale is used for concatenating lines diff --git a/tests/scripts/list_internal_identifiers.py b/tests/scripts/list_internal_identifiers.py index d58cb3f05..75b1646aa 100755 --- a/tests/scripts/list_internal_identifiers.py +++ b/tests/scripts/list_internal_identifiers.py @@ -45,12 +45,10 @@ def main(): try: name_check = NameCheck() - internal_headers = ( - name_check.get_files("include/mbedtls/*_internal.h") + - name_check.get_files("library/*.h") - ) - - result = name_check.parse_identifiers(internal_headers) + result = name_check.parse_identifiers([ + "include/mbedtls/*_internal.h", + "library/*.h" + ]) identifiers = ["{}\n".format(match.name) for match in result] with open("_identifiers", "w", encoding="utf-8") as f: From 50953433a440dc0c2d377d0bc0289716a92d2edd Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 9 Aug 2021 14:54:36 +0100 Subject: [PATCH 48/68] Bring back logging of number of files Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 37a8be325..9a7f3918e 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -265,7 +265,7 @@ class NameCheck(): """ self.log.info("Parsing source code...") self.log.debug( - "The following filenames are excluded from the search: {}" + "The following files are excluded from the search: {}" .format(str(self.excluded_files)) ) @@ -339,9 +339,11 @@ class NameCheck(): "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" ) - macros = [] + files = self.get_files(include, exclude) + self.log.debug("Looking for macros in {} files".format(len(files))) - for header_file in self.get_files(include, exclude): + macros = [] + for header_file in files: with open(header_file, "r", encoding="utf-8") as header: for line_no, line in enumerate(header): for macro in macro_regex.finditer(line): @@ -371,9 +373,11 @@ class NameCheck(): mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*") exclusions = re.compile(r"// *no-check-names|#error") - mbed_words = [] + files = self.get_files(include, exclude) + self.log.debug("Looking for MBED words in {} files".format(len(files))) - for filename in self.get_files(include, exclude): + mbed_words = [] + for filename in files: with open(filename, "r", encoding="utf-8") as fp: for line_no, line in enumerate(fp): if exclusions.search(line): @@ -399,9 +403,11 @@ class NameCheck(): Returns a List of Match objects for the findings. """ - enum_consts = [] + files = self.get_files(include, exclude) + self.log.debug("Looking for enum consts in {} files".format(len(files))) - for header_file in self.get_files(include, exclude): + enum_consts = [] + for header_file in files: # Emulate a finite state machine to parse enum declarations. # 0 = not in enum # 1 = inside enum @@ -471,9 +477,12 @@ class NameCheck(): r"#" r")" ) - identifiers = [] - for header_file in self.get_files(include, exclude): + files = self.get_files(include, exclude) + self.log.debug("Looking for identifiers in {} files".format(len(files))) + + identifiers = [] + for header_file in files: with open(header_file, "r", encoding="utf-8") as header: in_block_comment = False # The previous line varibale is used for concatenating lines From 55c6c87d951823b31e353a6bad5d7cc2e7957557 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 9 Aug 2021 15:35:19 +0100 Subject: [PATCH 49/68] Separate code parsing and name checking in two classes Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 191 +++++++++++---------- tests/scripts/list_internal_identifiers.py | 5 +- 2 files changed, 99 insertions(+), 97 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 9a7f3918e..957701433 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -20,16 +20,26 @@ This script confirms that the naming of all symbols and identifiers in Mbed TLS are consistent with the house style and are also self-consistent. It only runs on Linux and macOS since it depends on nm. -The script performs the following checks: +It contains two major Python classes, CodeParser and NameChecker. They both have +a comprehensive "run-all" function (comprehensive_parse() and perform_checks()) +but the individual functions can also be used for specific needs. + +CodeParser makes heavy use of regular expressions to parse the code, and is +dependent on the current code formatting. Many Python C parser libraries require +preprocessed C code, which means no macro parsing. Compiler tools are also not +very helpful when we want the exact location in the original source (which +becomes impossible when e.g. comments are stripped). + +NameChecker performs the following checks: - All exported and available symbols in the library object files, are explicitly declared in the header files. This uses the nm command. - All macros, constants, and identifiers (function names, struct names, etc) - follow the required pattern. + follow the required regex pattern. - Typo checking: All words that begin with MBED exist as macros or constants. -Returns 0 on success, 1 on test failure, and 2 if there is a script error or a -subprocess error. Must be run from Mbed TLS root. +The script returns 0 on success, 1 on test failure, and 2 if there is a script +error error. Must be run from Mbed TLS root. """ import argparse @@ -168,16 +178,15 @@ class Typo(Problem): # pylint: disable=too-few-public-methods .format(self.match.filename, self.match.pos[0], self.match.name) ) + "\n" + str(self.match) -class NameCheck(): +class CodeParser(): """ - Representation of the core name checking operation performed by this script. - Shares a common logger, and a shared return code. + Class for retrieving files and parsing the code. This can be used + independently of the checks that NameChecker performs, for example for + list_internal_identifiers.py. """ - def __init__(self, verbose=False): - self.log = None + def __init__(self, log): + self.log = log self.check_repo_path() - self.return_code = 0 - self.setup_logger(verbose) # Memo for storing "glob expression": set(filepaths) self.files = {} @@ -185,9 +194,6 @@ class NameCheck(): # Globally excluded filenames self.excluded_files = ["**/bn_mul", "**/compat-2.x.h"] - # Will contain the parse result after a comprehensive parse - self.parse_result = {} - @staticmethod def check_repo_path(): """ @@ -197,71 +203,12 @@ class NameCheck(): if not all(os.path.isdir(d) for d in ["include", "library", "tests"]): raise Exception("This script must be run from Mbed TLS root") - def set_return_code(self, return_code): - if return_code > self.return_code: - self.log.debug("Setting new return code to {}".format(return_code)) - self.return_code = return_code - - def setup_logger(self, verbose=False): + def comprehensive_parse(self): """ - Set up a logger and set the change the default logging level from - WARNING to INFO. Loggers are better than print statements since their - verbosity can be controlled. - """ - self.log = logging.getLogger() - if verbose: - self.log.setLevel(logging.DEBUG) - else: - self.log.setLevel(logging.INFO) - self.log.addHandler(logging.StreamHandler()) + Comprehensive ("default") function to call each parsing function and + retrieve various elements of the code, together with the source location. - def get_files(self, include_wildcards, exclude_wildcards): - """ - Get all files that match any of the UNIX-style wildcards. While the - check_names script is designed only for use on UNIX/macOS (due to nm), - this function alone would work fine on Windows even with forward slashes - in the wildcard. - - Args: - * include_wildcards: a List of shell-style wildcards to match filepaths. - * exclude_wildacrds: a List of shell-style wildcards to exclude. - - Returns a List of relative filepaths. - """ - accumulator = set() - - # exclude_wildcards may be None. Also, consider the global exclusions. - exclude_wildcards = (exclude_wildcards or []) + self.excluded_files - - # Perform set union on the glob results. Memoise individual sets. - for include_wildcard in include_wildcards: - if include_wildcard not in self.files: - self.files[include_wildcard] = set(glob.glob( - include_wildcard, - recursive=True - )) - - accumulator = accumulator.union(self.files[include_wildcard]) - - # Perform set difference to exclude. Also use the same memo since their - # behaviour is pretty much identical and it can benefit from the cache. - for exclude_wildcard in exclude_wildcards: - if exclude_wildcard not in self.files: - self.files[exclude_wildcard] = set(glob.glob( - exclude_wildcard, - recursive=True - )) - - accumulator = accumulator.difference(self.files[exclude_wildcard]) - - return list(accumulator) - - def parse_names_in_source(self): - """ - Comprehensive function to call each parsing function and retrieve - various elements of the code, together with their source location. - Puts the parsed values in the internal variable self.parse_result, so - they can be used from perform_checks(). + Returns a dict of parsed item key to the corresponding List of Matches. """ self.log.info("Parsing source code...") self.log.debug( @@ -315,8 +262,7 @@ class NameCheck(): self.log.debug(" {} Enum Constants".format(len(enum_consts))) self.log.debug(" {} Identifiers".format(len(identifiers))) self.log.debug(" {} Exported Symbols".format(len(symbols))) - self.log.info("Analysing...") - self.parse_result = { + return { "macros": actual_macros, "enum_consts": enum_consts, "identifiers": identifiers, @@ -324,6 +270,47 @@ class NameCheck(): "mbed_words": mbed_words } + def get_files(self, include_wildcards, exclude_wildcards): + """ + Get all files that match any of the UNIX-style wildcards. While the + check_names script is designed only for use on UNIX/macOS (due to nm), + this function alone would work fine on Windows even with forward slashes + in the wildcard. + + Args: + * include_wildcards: a List of shell-style wildcards to match filepaths. + * exclude_wildcards: a List of shell-style wildcards to exclude. + + Returns a List of relative filepaths. + """ + accumulator = set() + + # exclude_wildcards may be None. Also, consider the global exclusions. + exclude_wildcards = (exclude_wildcards or []) + self.excluded_files + + # Perform set union on the glob results. Memoise individual sets. + for include_wildcard in include_wildcards: + if include_wildcard not in self.files: + self.files[include_wildcard] = set(glob.glob( + include_wildcard, + recursive=True + )) + + accumulator = accumulator.union(self.files[include_wildcard]) + + # Perform set difference to exclude. Also use the same memo since their + # behaviour is pretty much identical and it can benefit from the cache. + for exclude_wildcard in exclude_wildcards: + if exclude_wildcard not in self.files: + self.files[exclude_wildcard] = set(glob.glob( + exclude_wildcard, + recursive=True + )) + + accumulator = accumulator.difference(self.files[exclude_wildcard]) + + return list(accumulator) + def parse_macros(self, include, exclude=None): """ Parse all macros defined by #define preprocessor directives. @@ -456,11 +443,11 @@ class NameCheck(): # Match " something(a" or " *something(a". Functions. # Assumptions: # - function definition from return type to one of its arguments is - # all on one line (enforced by the previous_line concat below) + # all on one line # - function definition line only contains alphanumeric, asterisk, # underscore, and open bracket r".* \**(\w+) *\( *\w|" - # Match "(*something)(". Flexible with spaces. + # Match "(*something)(". r".*\( *\* *(\w+) *\) *\(|" # Match names of named data structures. r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$|" @@ -485,7 +472,7 @@ class NameCheck(): for header_file in files: with open(header_file, "r", encoding="utf-8") as header: in_block_comment = False - # The previous line varibale is used for concatenating lines + # The previous line variable is used for concatenating lines # when identifiers are formatted and spread across multiple. previous_line = "" @@ -596,7 +583,6 @@ class NameCheck(): ) except subprocess.CalledProcessError as error: self.log.debug(error.output) - self.set_return_code(2) raise error finally: # Put back the original config regardless of there being errors. @@ -614,7 +600,7 @@ class NameCheck(): Does not return the position data since it is of no use. Args: - * object_files: a List of compiled object files to search through. + * object_files: a List of compiled object filepaths to search through. Returns a List of unique symbols defined and used in any of the object files. @@ -646,18 +632,24 @@ class NameCheck(): return symbols +class NameChecker(): + """ + Representation of the core name checking operation performed by this script. + """ + def __init__(self, parse_result, log): + self.parse_result = parse_result + self.log = log + def perform_checks(self, quiet=False): """ - Perform each check in order, output its PASS/FAIL status. Maintain an - overall test status, and output that at the end. - Assumes parse_names_in_source() was called before this. + A comprehensive checker that performs each check in order, and outputs + a final verdict. Args: * quiet: whether to hide detailed problem explanation. """ self.log.info("=============") problems = 0 - problems += self.check_symbols_declared_in_header(quiet) pattern_checks = [ @@ -677,8 +669,10 @@ class NameCheck(): self.log.info("Remove --quiet to see explanations.") else: self.log.info("Use --quiet for minimal output.") + return 1 else: self.log.info("PASS") + return 0 def check_symbols_declared_in_header(self, quiet): """ @@ -782,7 +776,6 @@ class NameCheck(): * problems: a List of encountered Problems """ if problems: - self.set_return_code(1) self.log.info("{}: FAIL\n".format(name)) for problem in problems: problem.quiet = quiet @@ -792,8 +785,8 @@ class NameCheck(): def main(): """ - Perform argument parsing, and create an instance of NameCheck to begin the - core operation. + Perform argument parsing, and create an instance of CodeParser and + NameChecker to begin the core operation. """ parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, @@ -816,14 +809,22 @@ def main(): args = parser.parse_args() + # Configure the global logger, which is then passed to the classes below + log = logging.getLogger() + log.setLevel(logging.DEBUG if args.verbose else logging.INFO) + log.addHandler(logging.StreamHandler()) + try: - name_check = NameCheck(verbose=args.verbose) - name_check.parse_names_in_source() - name_check.perform_checks(quiet=args.quiet) - sys.exit(name_check.return_code) + code_parser = CodeParser(log) + parse_result = code_parser.comprehensive_parse() except Exception: # pylint: disable=broad-except traceback.print_exc() sys.exit(2) + name_checker = NameChecker(parse_result, log) + return_code = name_checker.perform_checks(quiet=args.quiet) + + sys.exit(return_code) + if __name__ == "__main__": main() diff --git a/tests/scripts/list_internal_identifiers.py b/tests/scripts/list_internal_identifiers.py index 75b1646aa..64a4c3531 100755 --- a/tests/scripts/list_internal_identifiers.py +++ b/tests/scripts/list_internal_identifiers.py @@ -29,9 +29,10 @@ Must be run from Mbed TLS root. """ import argparse +import logging import traceback import sys -from check_names import NameCheck +from check_names import CodeParser def main(): parser = argparse.ArgumentParser( @@ -44,7 +45,7 @@ def main(): parser.parse_args() try: - name_check = NameCheck() + name_check = CodeParser(logging.getLogger()) result = name_check.parse_identifiers([ "include/mbedtls/*_internal.h", "library/*.h" From 7bfac1d7fe9bc5c13a55e87a2352ea44b88cba84 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 9 Aug 2021 15:49:25 +0100 Subject: [PATCH 50/68] Fix incorrect reference to NameCheck in script docstring Signed-off-by: Yuto Takano --- tests/scripts/list_internal_identifiers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/scripts/list_internal_identifiers.py b/tests/scripts/list_internal_identifiers.py index 64a4c3531..822486ae4 100755 --- a/tests/scripts/list_internal_identifiers.py +++ b/tests/scripts/list_internal_identifiers.py @@ -22,9 +22,9 @@ previously `list-identifiers.sh --internal`, and is useful for generating an exclusion file list for ABI/API checking, since we do not promise compatibility for them. -It uses the NameCeck class from check_names.py to perform the parsing. +It uses the CodeParser class from check_names.py to perform the parsing. -Returns 0 on success, 1 if there is a script error. +The script returns 0 on success, 1 if there is a script error. Must be run from Mbed TLS root. """ @@ -40,7 +40,7 @@ def main(): description=( "This script writes a list of parsed identifiers in internal " "headers to \"_identifiers\". This is useful for generating a list " - "of names to exclude from ABI checking. ")) + "of names to exclude from API/ABI compatibility checking. ")) parser.parse_args() From 7828ca2ea435464a5bbd2cc6f948684ced2d82c5 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Tue, 10 Aug 2021 11:26:15 +0100 Subject: [PATCH 51/68] Fix typos pointed out by check_names Signed-off-by: Yuto Takano --- include/mbedtls/config_psa.h | 2 +- include/mbedtls/mbedtls_config.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mbedtls/config_psa.h b/include/mbedtls/config_psa.h index 9080cd19b..3b01b78d2 100644 --- a/include/mbedtls/config_psa.h +++ b/include/mbedtls/config_psa.h @@ -586,7 +586,7 @@ extern "C" { #define MBEDTLS_PSA_BUILTIN_ALG_RSA_PKCS1V15_SIGN 1 #define PSA_WANT_ALG_RSA_PKCS1V15_SIGN 1 #define PSA_WANT_ALG_RSA_PKCS1V15_SIGN_RAW 1 -#endif /* MBEDTLSS_PKCS1_V15 */ +#endif /* MBEDTLS_PKCS1_V15 */ #if defined(MBEDTLS_PKCS1_V21) #define MBEDTLS_PSA_BUILTIN_ALG_RSA_OAEP 1 #define PSA_WANT_ALG_RSA_OAEP 1 diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h index a60db7e93..ecf10bbf5 100644 --- a/include/mbedtls/mbedtls_config.h +++ b/include/mbedtls/mbedtls_config.h @@ -3179,7 +3179,7 @@ * Maximum number of heap-allocated bytes for the purpose of * DTLS handshake message reassembly and future message buffering. * - * This should be at least 9/8 * MBEDTLSSL_IN_CONTENT_LEN + * This should be at least 9/8 * MBEDTLS_SSL_IN_CONTENT_LEN * to account for a reassembled handshake message of maximum size, * together with its reassembly bitmap. * From 206b022ad0e192ce759d7ca3ebf596294ccba4b5 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Tue, 10 Aug 2021 11:30:43 +0100 Subject: [PATCH 52/68] Fix off-by-one error in string formatting in Python Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 957701433..591389b96 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -138,7 +138,7 @@ class PatternMismatch(Problem): # pylint: disable=too-few-public-methods def __str__(self): if self.quiet: return ( - "{0}:{1}:{3}" + "{0}:{1}:{2}" .format(self.match.filename, self.match.pos[0], self.match.name) ) From ac72fac465b720c55b472e38cfa7664890bfbc32 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Tue, 10 Aug 2021 15:09:16 +0100 Subject: [PATCH 53/68] Put back list-identifiers.sh as a thin wrapper around the python script Signed-off-by: Yuto Takano --- tests/scripts/list-identifiers.sh | 66 ++++++++++++++++++++++ tests/scripts/list_internal_identifiers.py | 7 ++- 2 files changed, 70 insertions(+), 3 deletions(-) create mode 100755 tests/scripts/list-identifiers.sh diff --git a/tests/scripts/list-identifiers.sh b/tests/scripts/list-identifiers.sh new file mode 100755 index 000000000..49ecc93ef --- /dev/null +++ b/tests/scripts/list-identifiers.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# +# Create a file named identifiers containing identifiers from internal header +# files, based on the --internal flag. +# Outputs the line count of the file to stdout. +# A very thin wrapper around list_internal_identifiers.py for backwards +# compatibility. +# Must be run from Mbed TLS root. +# +# Usage: list-identifiers.sh [ -i | --internal ] +# +# Copyright The Mbed TLS Contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu + +if [ -d include/mbedtls ]; then :; else + echo "$0: Must be run from Mbed TLS root" >&2 + exit 1 +fi + +INTERNAL="" + +until [ -z "${1-}" ] +do + case "$1" in + -i|--internal) + INTERNAL="1" + ;; + *) + # print error + echo "Unknown argument: '$1'" + exit 1 + ;; + esac + shift +done + +if [ $INTERNAL ] +then + tests/scripts/list_internal_identifiers.py + wc -l identifiers +else + cat < Date: Tue, 10 Aug 2021 15:45:28 +0100 Subject: [PATCH 54/68] Add newline at end of list-identifiers.sh Signed-off-by: Yuto Takano --- tests/scripts/list-identifiers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/list-identifiers.sh b/tests/scripts/list-identifiers.sh index 49ecc93ef..781c609fc 100755 --- a/tests/scripts/list-identifiers.sh +++ b/tests/scripts/list-identifiers.sh @@ -63,4 +63,4 @@ is a thin wrapper around list_internal_identifiers.py. check-names.sh, which used to depend on this script, has been replaced with check_names.py and is now self-complete. EOF -fi \ No newline at end of file +fi From fb86ac70f5ca4c19d39bf219c9dcd1499db2c281 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 16 Aug 2021 10:32:40 +0100 Subject: [PATCH 55/68] Comment Match.__str__ and use format() to simplify calculation Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 591389b96..5dc38fc52 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -77,15 +77,16 @@ class Match(): # pylint: disable=too-few-public-methods self.name = name def __str__(self): - ln_str = str(self.pos[0]) - gutter_len = max(4, len(ln_str)) - gutter = (gutter_len - len(ln_str)) * " " + ln_str + """ + Return a formatted code listing representation of the erroneous line. + """ + gutter = format(self.pos[0], "4d") underline = self.pos[1] * " " + (self.pos[2] - self.pos[1]) * "^" return ( - " {0} |\n".format(gutter_len * " ") + + " {0} |\n".format(" " * len(gutter)) + " {0} | {1}".format(gutter, self.line) + - " {0} | {1}\n".format(gutter_len * " ", underline) + " {0} | {1}\n".format(" " * len(gutter), underline) ) class Problem(): # pylint: disable=too-few-public-methods From 8246eb8fb61dd577625f2a35f0aa200154757f15 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 16 Aug 2021 10:37:24 +0100 Subject: [PATCH 56/68] Fix English typos in comments of check_names and list-identifiers Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 9 +++++---- tests/scripts/list-identifiers.sh | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 5dc38fc52..113854c28 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -39,7 +39,7 @@ NameChecker performs the following checks: - Typo checking: All words that begin with MBED exist as macros or constants. The script returns 0 on success, 1 on test failure, and 2 if there is a script -error error. Must be run from Mbed TLS root. +error. It must be run from Mbed TLS root. """ import argparse @@ -429,8 +429,9 @@ class CodeParser(): def parse_identifiers(self, include, exclude=None): """ - Parse all lines of a header where a function identifier is declared, - based on some huersitics. Highly dependent on formatting style. + Parse all lines of a header where a function/enum/struct/union/typedef + identifier is declared, based on some heuristics. Highly dependent on + formatting style. Note: .match() checks at the beginning of the string (implicit ^), while .search() checks throughout. @@ -509,7 +510,7 @@ class CodeParser(): line = previous_line.strip() + " " + line.strip() previous_line = "" - # Skip parsing if line has a space in front = hueristic to + # Skip parsing if line has a space in front = heuristic to # skip function argument lines (highly subject to formatting # changes) if line[0] == " ": diff --git a/tests/scripts/list-identifiers.sh b/tests/scripts/list-identifiers.sh index 781c609fc..9b930802f 100755 --- a/tests/scripts/list-identifiers.sh +++ b/tests/scripts/list-identifiers.sh @@ -56,9 +56,9 @@ else cat < Date: Mon, 16 Aug 2021 10:39:24 +0100 Subject: [PATCH 57/68] Remove unnecessary try/catch in list_internal_identifiers The try/catch was used to catch Exceptions and exit with code 1, a legacy from check_names.py which uses the pattern to exit with code 2. But code 1 is the default for the Python runtime anyway, so it is redundant and can be removed. Signed-off-by: Yuto Takano --- tests/scripts/list_internal_identifiers.py | 23 +++++++++------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/tests/scripts/list_internal_identifiers.py b/tests/scripts/list_internal_identifiers.py index f18491bad..d1b55138f 100755 --- a/tests/scripts/list_internal_identifiers.py +++ b/tests/scripts/list_internal_identifiers.py @@ -44,21 +44,16 @@ def main(): parser.parse_args() - try: - name_check = CodeParser(logging.getLogger()) - result = name_check.parse_identifiers([ - "include/mbedtls/*_internal.h", - "library/*.h" - ]) - result.sort(key=lambda x: x.name) + name_check = CodeParser(logging.getLogger()) + result = name_check.parse_identifiers([ + "include/mbedtls/*_internal.h", + "library/*.h" + ]) + result.sort(key=lambda x: x.name) - identifiers = ["{}\n".format(match.name) for match in result] - with open("identifiers", "w", encoding="utf-8") as f: - f.writelines(identifiers) - - except Exception: # pylint: disable=broad-except - traceback.print_exc() - sys.exit(1) + identifiers = ["{}\n".format(match.name) for match in result] + with open("identifiers", "w", encoding="utf-8") as f: + f.writelines(identifiers) if __name__ == "__main__": main() From 9d9c6dc46e00889c93f565da6d6fe08490414444 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 16 Aug 2021 10:43:45 +0100 Subject: [PATCH 58/68] Align the item counts in check_names for ease of reading Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 113854c28..b12f40644 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -258,11 +258,12 @@ class CodeParser(): actual_macros.append(macro) self.log.debug("Found:") - self.log.debug(" {} Total Macros".format(len(all_macros))) - self.log.debug(" {} Non-identifier Macros".format(len(actual_macros))) - self.log.debug(" {} Enum Constants".format(len(enum_consts))) - self.log.debug(" {} Identifiers".format(len(identifiers))) - self.log.debug(" {} Exported Symbols".format(len(symbols))) + # Aligns the counts on the assumption that none exceeds 4 digits + self.log.debug(" {:4} Total Macros".format(len(all_macros))) + self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros))) + self.log.debug(" {:4} Enum Constants".format(len(enum_consts))) + self.log.debug(" {:4} Identifiers".format(len(identifiers))) + self.log.debug(" {:4} Exported Symbols".format(len(symbols))) return { "macros": actual_macros, "enum_consts": enum_consts, From 90bc026913b50e7ebe200ac9a2935367b774e485 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 16 Aug 2021 11:34:10 +0100 Subject: [PATCH 59/68] Exclusively use re.search() to avoid confusion with .match() Also fix newline being removed when lines were concatenated Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 42 +++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index b12f40644..c0fc20fcc 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -407,16 +407,16 @@ class CodeParser(): # Match typedefs and brackets only when they are at the # beginning of the line -- if they are indented, they might # be sub-structures within structs, etc. - if state == 0 and re.match(r"^(typedef +)?enum +{", line): + if state == 0 and re.search(r"^(typedef +)?enum +{", line): state = 1 - elif state == 0 and re.match(r"^(typedef +)?enum", line): + elif state == 0 and re.search(r"^(typedef +)?enum", line): state = 2 - elif state == 2 and re.match(r"^{", line): + elif state == 2 and re.search(r"^{", line): state = 1 - elif state == 1 and re.match(r"^}", line): + elif state == 1 and re.search(r"^}", line): state = 0 - elif state == 1 and not re.match(r" *#", line): - enum_const = re.match(r" *(?P\w+)", line) + elif state == 1 and not re.search(r"^ *#", line): + enum_const = re.search(r"^ *(?P\w+)", line) if not enum_const: continue @@ -433,8 +433,6 @@ class CodeParser(): Parse all lines of a header where a function/enum/struct/union/typedef identifier is declared, based on some heuristics. Highly dependent on formatting style. - Note: .match() checks at the beginning of the string (implicit ^), while - .search() checks throughout. Args: * include: A List of glob expressions to look for files through. @@ -459,12 +457,12 @@ class CodeParser(): ) exclusion_lines = re.compile( r"^(" - r"extern +\"C\"|" - r"(typedef +)?(struct|union|enum)( *{)?$|" - r"} *;?$|" - r"$|" - r"//|" - r"#" + r"extern +\"C\"|" + r"(typedef +)?(struct|union|enum)( *{)?$|" + r"} *;?$|" + r"$|" + r"//|" + r"#" r")" ) @@ -493,7 +491,7 @@ class CodeParser(): previous_line = "" continue - if exclusion_lines.match(line): + if exclusion_lines.search(line): previous_line = "" continue @@ -501,14 +499,14 @@ class CodeParser(): # characters (or underscore, asterisk, or, open bracket), # and nothing else, high chance it's a declaration that # continues on the next line - if re.match(r"^([\w\*\(]+\s+)+$", line): + if re.search(r"^([\w\*\(]+\s+)+$", line): previous_line += line continue # If previous line seemed to start an unfinished declaration # (as above), concat and treat them as one. if previous_line: - line = previous_line.strip() + " " + line.strip() + line = previous_line.strip() + " " + line.strip() + "\n" previous_line = "" # Skip parsing if line has a space in front = heuristic to @@ -626,8 +624,8 @@ class CodeParser(): ).stdout for line in nm_output.splitlines(): - if not nm_undefined_regex.match(line): - symbol = nm_valid_regex.match(line) + if not nm_undefined_regex.search(line): + symbol = nm_valid_regex.search(line) if (symbol and not symbol.group("symbol").startswith(exclusions)): symbols.append(symbol.group("symbol")) else: @@ -718,10 +716,10 @@ class NameChecker(): problems = [] for item_match in self.parse_result[group_to_check]: - if not re.match(check_pattern, item_match.name): + if not re.search(check_pattern, item_match.name): problems.append(PatternMismatch(check_pattern, item_match)) - # Double underscore is a reserved identifier, never to be used - if re.match(r".*__.*", item_match.name): + # Double underscore should not be used for names + if re.search(r".*__.*", item_match.name): problems.append(PatternMismatch("double underscore", item_match)) self.output_check_result( From 6adb2879602f109830855467bc530a2c014e9b10 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 16 Aug 2021 11:38:34 +0100 Subject: [PATCH 60/68] Move duplicated behaviour in get_files to own function Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index c0fc20fcc..10ed5bba3 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -290,26 +290,20 @@ class CodeParser(): # exclude_wildcards may be None. Also, consider the global exclusions. exclude_wildcards = (exclude_wildcards or []) + self.excluded_files - # Perform set union on the glob results. Memoise individual sets. + # Internal function to hit the memoisation cache or add to it the result + # of a glob operation. Used both for inclusion and exclusion since the + # only difference between them is whether they perform set union or + # difference on the return value of this function. + def hit_cache(wildcard): + if wildcard not in self.files: + self.files[wildcard] = set(glob.glob(wildcard, recursive=True)) + return self.files[wildcard] + for include_wildcard in include_wildcards: - if include_wildcard not in self.files: - self.files[include_wildcard] = set(glob.glob( - include_wildcard, - recursive=True - )) + accumulator = accumulator.union(hit_cache(include_wildcard)) - accumulator = accumulator.union(self.files[include_wildcard]) - - # Perform set difference to exclude. Also use the same memo since their - # behaviour is pretty much identical and it can benefit from the cache. for exclude_wildcard in exclude_wildcards: - if exclude_wildcard not in self.files: - self.files[exclude_wildcard] = set(glob.glob( - exclude_wildcard, - recursive=True - )) - - accumulator = accumulator.difference(self.files[exclude_wildcard]) + accumulator = accumulator.difference(hit_cache(exclude_wildcard)) return list(accumulator) From 5473be29142350fdb8b6f9b2c4c16319b8e5a169 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Tue, 17 Aug 2021 10:14:01 +0100 Subject: [PATCH 61/68] Use a class variable for `quiet` instead of passing it around Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 10ed5bba3..16d5aba8b 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -93,8 +93,9 @@ class Problem(): # pylint: disable=too-few-public-methods """ A parent class representing a form of static analysis error. """ + # Class variable to control the quietness of all problems + quiet = False def __init__(self): - self.quiet = False self.textwrapper = textwrap.TextWrapper() self.textwrapper.width = 80 self.textwrapper.initial_indent = " > " @@ -644,8 +645,9 @@ class NameChecker(): * quiet: whether to hide detailed problem explanation. """ self.log.info("=============") + Problem.quiet = quiet problems = 0 - problems += self.check_symbols_declared_in_header(quiet) + problems += self.check_symbols_declared_in_header() pattern_checks = [ ("macros", MACRO_PATTERN), @@ -653,9 +655,9 @@ class NameChecker(): ("identifiers", IDENTIFIER_PATTERN) ] for group, check_pattern in pattern_checks: - problems += self.check_match_pattern(quiet, group, check_pattern) + problems += self.check_match_pattern(group, check_pattern) - problems += self.check_for_typos(quiet) + problems += self.check_for_typos() self.log.info("=============") if problems > 0: @@ -669,15 +671,12 @@ class NameChecker(): self.log.info("PASS") return 0 - def check_symbols_declared_in_header(self, quiet): + def check_symbols_declared_in_header(self): """ Perform a check that all detected symbols in the library object files are properly declared in headers. Assumes parse_names_in_source() was called before this. - Args: - * quiet: whether to hide detailed problem explanation. - Returns the number of problems that need fixing. """ problems = [] @@ -692,16 +691,15 @@ class NameChecker(): if not found_symbol_declared: problems.append(SymbolNotInHeader(symbol)) - self.output_check_result(quiet, "All symbols in header", problems) + self.output_check_result("All symbols in header", problems) return len(problems) - def check_match_pattern(self, quiet, group_to_check, check_pattern): + def check_match_pattern(self, group_to_check, check_pattern): """ Perform a check that all items of a group conform to a regex pattern. Assumes parse_names_in_source() was called before this. Args: - * quiet: whether to hide detailed problem explanation. * group_to_check: string key to index into self.parse_result. * check_pattern: the regex to check against. @@ -717,20 +715,16 @@ class NameChecker(): problems.append(PatternMismatch("double underscore", item_match)) self.output_check_result( - quiet, "Naming patterns of {}".format(group_to_check), problems) return len(problems) - def check_for_typos(self, quiet): + def check_for_typos(self): """ Perform a check that all words in the soure code beginning with MBED are either defined as macros, or as enum constants. Assumes parse_names_in_source() was called before this. - Args: - * quiet: whether to hide detailed problem explanation. - Returns the number of problems that need fixing. """ problems = [] @@ -757,23 +751,21 @@ class NameChecker(): if not found and not typo_exclusion.search(name_match.name): problems.append(Typo(name_match)) - self.output_check_result(quiet, "Likely typos", problems) + self.output_check_result("Likely typos", problems) return len(problems) - def output_check_result(self, quiet, name, problems): + def output_check_result(self, name, problems): """ Write out the PASS/FAIL status of a performed check depending on whether there were problems. Args: - * quiet: whether to hide detailed problem explanation. * name: the name of the test * problems: a List of encountered Problems """ if problems: self.log.info("{}: FAIL\n".format(name)) for problem in problems: - problem.quiet = quiet self.log.warning(str(problem)) else: self.log.info("{}: PASS".format(name)) From b1417b4554bd7b52d6b413035df1a886c3ef6152 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Tue, 17 Aug 2021 10:30:20 +0100 Subject: [PATCH 62/68] Use Enums for the enum-parsing state machine Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 46 ++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 16d5aba8b..ecb00454a 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -49,6 +49,7 @@ import os import sys import traceback import re +import enum import shutil import subprocess import logging @@ -390,27 +391,33 @@ class CodeParser(): files = self.get_files(include, exclude) self.log.debug("Looking for enum consts in {} files".format(len(files))) + # Emulate a finite state machine to parse enum declarations. + # OUTSIDE_KEYWORD = outside the enum keyword + # IN_BRACES = inside enum opening braces + # IN_BETWEEN = between enum keyword and opening braces + states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"]) enum_consts = [] for header_file in files: - # Emulate a finite state machine to parse enum declarations. - # 0 = not in enum - # 1 = inside enum - # 2 = almost inside enum - state = 0 + state = states.OUTSIDE_KEYWORD with open(header_file, "r", encoding="utf-8") as header: for line_no, line in enumerate(header): # Match typedefs and brackets only when they are at the # beginning of the line -- if they are indented, they might # be sub-structures within structs, etc. - if state == 0 and re.search(r"^(typedef +)?enum +{", line): - state = 1 - elif state == 0 and re.search(r"^(typedef +)?enum", line): - state = 2 - elif state == 2 and re.search(r"^{", line): - state = 1 - elif state == 1 and re.search(r"^}", line): - state = 0 - elif state == 1 and not re.search(r"^ *#", line): + if (state == states.OUTSIDE_KEYWORD and + re.search(r"^(typedef +)?enum +{", line)): + state = states.IN_BRACES + elif (state == states.OUTSIDE_KEYWORD and + re.search(r"^(typedef +)?enum", line)): + state = states.IN_BETWEEN + elif (state == states.IN_BETWEEN and + re.search(r"^{", line)): + state = states.IN_BRACES + elif (state == states.IN_BRACES and + re.search(r"^}", line)): + state = states.OUTSIDE_KEYWORD + elif (state == states.IN_BRACES and + not re.search(r"^ *#", line)): enum_const = re.search(r"^ *(?P\w+)", line) if not enum_const: continue @@ -418,7 +425,9 @@ class CodeParser(): enum_consts.append(Match( header_file, line, - (line_no, enum_const.start(), enum_const.end()), + (line_no, + enum_const.start("enum_const"), + enum_const.end("enum_const")), enum_const.group("enum_const"))) return enum_consts @@ -426,8 +435,8 @@ class CodeParser(): def parse_identifiers(self, include, exclude=None): """ Parse all lines of a header where a function/enum/struct/union/typedef - identifier is declared, based on some heuristics. Highly dependent on - formatting style. + identifier is declared, based on some regex and heuristics. Highly + dependent on formatting style. Args: * include: A List of glob expressions to look for files through. @@ -469,7 +478,8 @@ class CodeParser(): with open(header_file, "r", encoding="utf-8") as header: in_block_comment = False # The previous line variable is used for concatenating lines - # when identifiers are formatted and spread across multiple. + # when identifiers are formatted and spread across multiple + # lines. previous_line = "" for line_no, line in enumerate(header): From 704b0f77e1eeed77d7653edb20e5df0ef8ae7ba0 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Tue, 17 Aug 2021 10:41:23 +0100 Subject: [PATCH 63/68] Use .span() for positions, and separate line_no argument in Match This reverts a previous change where line_no was removed and put into a triple tuple. It was discovered that re.Match.span() conveniently returns (start, end), so separating line_no again makes the code cleaner. The legibility of the code heavily outweighs the issues pointed out by Pylint (hence disabled). Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index ecb00454a..604dfd416 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -67,13 +67,15 @@ class Match(): # pylint: disable=too-few-public-methods Fields: * filename: the file that the match was in. * line: the full line containing the match. - * pos: a tuple of (line_no, start, end) positions on the file line where the - match is. + * line_no: the line number. + * pos: a tuple of (start, end) positions on the line where the match is. * name: the match itself. """ - def __init__(self, filename, line, pos, name): + def __init__(self, filename, line, line_no, pos, name): + # pylint: disable=too-many-arguments self.filename = filename self.line = line + self.line_no = line_no self.pos = pos self.name = name @@ -81,8 +83,8 @@ class Match(): # pylint: disable=too-few-public-methods """ Return a formatted code listing representation of the erroneous line. """ - gutter = format(self.pos[0], "4d") - underline = self.pos[1] * " " + (self.pos[2] - self.pos[1]) * "^" + gutter = format(self.line_no, "4d") + underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^" return ( " {0} |\n".format(" " * len(gutter)) + @@ -338,7 +340,8 @@ class CodeParser(): macros.append(Match( header_file, line, - (line_no, macro.start(), macro.end()), + line_no, + macro.span("macro"), macro.group("macro"))) return macros @@ -372,9 +375,9 @@ class CodeParser(): mbed_words.append(Match( filename, line, - (line_no, name.start(), name.end()), - name.group(0) - )) + line_no, + name.span(0), + name.group(0))) return mbed_words @@ -425,9 +428,8 @@ class CodeParser(): enum_consts.append(Match( header_file, line, - (line_no, - enum_const.start("enum_const"), - enum_const.end("enum_const")), + line_no, + enum_const.span("enum_const"), enum_const.group("enum_const"))) return enum_consts @@ -533,7 +535,8 @@ class CodeParser(): identifiers.append(Match( header_file, line, - (line_no, identifier.start(), identifier.end()), + line_no, + identifier.span(), group)) return identifiers @@ -722,7 +725,8 @@ class NameChecker(): problems.append(PatternMismatch(check_pattern, item_match)) # Double underscore should not be used for names if re.search(r".*__.*", item_match.name): - problems.append(PatternMismatch("double underscore", item_match)) + problems.append( + PatternMismatch("no double underscore allowed", item_match)) self.output_check_result( "Naming patterns of {}".format(group_to_check), From 4b7d23dfa6cf6806c8bcd500000b5ea749f22ae5 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Tue, 17 Aug 2021 10:48:22 +0100 Subject: [PATCH 64/68] Separate make clean and make lib in check_names Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 604dfd416..3f65b44d0 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -569,8 +569,15 @@ class CodeParser(): ) my_environment = os.environ.copy() my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables" + # Run make clean separately to lib to prevent unwanted behavior when + # make is invoked with parallelism. subprocess.run( - ["make", "clean", "lib"], + ["make", "clean"], + universal_newlines=True, + check=True + ) + subprocess.run( + ["make", "lib"], env=my_environment, universal_newlines=True, stdout=subprocess.PIPE, From 3590691bad2d8f1a4cef47edc00114a8ba4faab6 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Tue, 17 Aug 2021 11:05:43 +0100 Subject: [PATCH 65/68] Fix issues raised by Pylint 2.4.4 on CI Locally they were unreported by Pylint 2.9.2. Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 3f65b44d0..a5cbd70ca 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -408,7 +408,7 @@ class CodeParser(): # beginning of the line -- if they are indented, they might # be sub-structures within structs, etc. if (state == states.OUTSIDE_KEYWORD and - re.search(r"^(typedef +)?enum +{", line)): + re.search(r"^(typedef +)?enum +{", line)): state = states.IN_BRACES elif (state == states.OUTSIDE_KEYWORD and re.search(r"^(typedef +)?enum", line)): @@ -461,9 +461,10 @@ class CodeParser(): # Match names of typedef instances, after closing bracket. r"}? *(\w+)[;[].*" ) + # The regex below is indented for clarity. exclusion_lines = re.compile( r"^(" - r"extern +\"C\"|" + r"extern +\"C\"|" # pylint: disable=bad-continuation r"(typedef +)?(struct|union|enum)( *{)?$|" r"} *;?$|" r"$|" From 71432096042951b894fb300a7cc7b59c7a69c8f9 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Tue, 17 Aug 2021 12:44:16 +0100 Subject: [PATCH 66/68] Remove unused imports in list_internal_identifiers.py Signed-off-by: Yuto Takano --- tests/scripts/list_internal_identifiers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/scripts/list_internal_identifiers.py b/tests/scripts/list_internal_identifiers.py index d1b55138f..779a16ffb 100755 --- a/tests/scripts/list_internal_identifiers.py +++ b/tests/scripts/list_internal_identifiers.py @@ -30,8 +30,6 @@ Must be run from Mbed TLS root. import argparse import logging -import traceback -import sys from check_names import CodeParser def main(): From 5f831719994c94606933a70bc275194efc87b0f4 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Wed, 18 Aug 2021 18:03:24 +0100 Subject: [PATCH 67/68] Fix listing line number wrongly using start char pos Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index a5cbd70ca..0eba96740 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -144,14 +144,14 @@ class PatternMismatch(Problem): # pylint: disable=too-few-public-methods if self.quiet: return ( "{0}:{1}:{2}" - .format(self.match.filename, self.match.pos[0], self.match.name) + .format(self.match.filename, self.match.line_no, self.match.name) ) return self.textwrapper.fill( "{0}:{1}: '{2}' does not match the required pattern '{3}'." .format( self.match.filename, - self.match.pos[0], + self.match.line_no, self.match.name, self.pattern ) @@ -173,14 +173,14 @@ class Typo(Problem): # pylint: disable=too-few-public-methods if self.quiet: return ( "{0}:{1}:{2}" - .format(self.match.filename, self.match.pos[0], self.match.name) + .format(self.match.filename, self.match.line_no, self.match.name) ) return self.textwrapper.fill( "{0}:{1}: '{2}' looks like a typo. It was not found in any " "macros or any enums. If this is not a typo, put " "//no-check-names after it." - .format(self.match.filename, self.match.pos[0], self.match.name) + .format(self.match.filename, self.match.line_no, self.match.name) ) + "\n" + str(self.match) class CodeParser(): From fc1e9ffcb21fa4e6b23f912522f821c64e12da28 Mon Sep 17 00:00:00 2001 From: Yuto Takano Date: Mon, 23 Aug 2021 13:54:56 +0100 Subject: [PATCH 68/68] Use Abstract Base Classes to ensure Problem is not instantiated - Problem() is a parent abstract class that should only be used for subclassing. - With the help of ABC, implement abstract methods that force subclasses to implement quiet and verbose outputs. - The repeated logic of "if self.quiet" is consolidated in Problem. Signed-off-by: Yuto Takano --- tests/scripts/check_names.py | 60 ++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 0eba96740..a9aa118ea 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -42,6 +42,7 @@ The script returns 0 on success, 1 on test failure, and 2 if there is a script error. It must be run from Mbed TLS root. """ +import abc import argparse import glob import textwrap @@ -92,9 +93,11 @@ class Match(): # pylint: disable=too-few-public-methods " {0} | {1}\n".format(" " * len(gutter), underline) ) -class Problem(): # pylint: disable=too-few-public-methods +class Problem(abc.ABC): # pylint: disable=too-few-public-methods """ - A parent class representing a form of static analysis error. + An abstract parent class representing a form of static analysis error. + It extends an Abstract Base Class, which means it is not instantiable, and + it also mandates certain abstract methods to be implemented in subclasses. """ # Class variable to control the quietness of all problems quiet = False @@ -104,6 +107,28 @@ class Problem(): # pylint: disable=too-few-public-methods self.textwrapper.initial_indent = " > " self.textwrapper.subsequent_indent = " " + def __str__(self): + """ + Unified string representation method for all Problems. + """ + if self.__class__.quiet: + return self.quiet_output() + return self.verbose_output() + + @abc.abstractmethod + def quiet_output(self): + """ + The output when --quiet is enabled. + """ + pass + + @abc.abstractmethod + def verbose_output(self): + """ + The default output with explanation and code snippet if appropriate. + """ + pass + class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods """ A problem that occurs when an exported/available symbol in the object file @@ -117,10 +142,10 @@ class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods self.symbol_name = symbol_name Problem.__init__(self) - def __str__(self): - if self.quiet: - return "{0}".format(self.symbol_name) + def quiet_output(self): + return "{0}".format(self.symbol_name) + def verbose_output(self): return self.textwrapper.fill( "'{0}' was found as an available symbol in the output of nm, " "however it was not declared in any header files." @@ -140,13 +165,14 @@ class PatternMismatch(Problem): # pylint: disable=too-few-public-methods self.match = match Problem.__init__(self) - def __str__(self): - if self.quiet: - return ( - "{0}:{1}:{2}" - .format(self.match.filename, self.match.line_no, self.match.name) - ) + def quiet_output(self): + return ( + "{0}:{1}:{2}" + .format(self.match.filename, self.match.line_no, self.match.name) + ) + + def verbose_output(self): return self.textwrapper.fill( "{0}:{1}: '{2}' does not match the required pattern '{3}'." .format( @@ -169,13 +195,13 @@ class Typo(Problem): # pylint: disable=too-few-public-methods self.match = match Problem.__init__(self) - def __str__(self): - if self.quiet: - return ( - "{0}:{1}:{2}" - .format(self.match.filename, self.match.line_no, self.match.name) - ) + def quiet_output(self): + return ( + "{0}:{1}:{2}" + .format(self.match.filename, self.match.line_no, self.match.name) + ) + def verbose_output(self): return self.textwrapper.fill( "{0}:{1}: '{2}' looks like a typo. It was not found in any " "macros or any enums. If this is not a typo, put "