diff --git a/tests/scripts/check_files.py b/tests/scripts/check_files.py index 31edf8b61..554ed4797 100755 --- a/tests/scripts/check_files.py +++ b/tests/scripts/check_files.py @@ -12,6 +12,7 @@ Note: requires python 3, must be run from Mbed TLS root. import argparse import codecs +import inspect import logging import os import re @@ -345,6 +346,84 @@ class MergeArtifactIssueTracker(LineIssueTracker): return False +THIS_FILE_BASE_NAME = \ + os.path.basename(inspect.getframeinfo(inspect.currentframe()).filename) +LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = \ + inspect.getframeinfo(inspect.currentframe()).lineno +class LicenseIssueTracker(LineIssueTracker): + """Check copyright statements and license indications. + + This class only checks that statements are correct if present. It does + not enforce the presence of statements in each file. + """ + + heading = "License issue:" + + LICENSE_EXEMPTION_RE_LIST = [ + # Third-party code, other than whitelisted third-party modules, + # may be under a different license. + r'3rdparty/(?!(p256-m)/.*)', + # Documentation explaining the license may have accidental + # false positives. + r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z', + # Files imported from TF-M, and not used except in test builds, + # may be under a different license. + r'configs/crypto_config_profile_medium\.h\Z', + r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z', + # Third-party file. + r'dco\.txt\Z', + ] + path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST + + LICENSE_EXEMPTION_RE_LIST)) + + COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors' + # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc. + COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I) + + SPDX_HEADER_KEY = b'SPDX-License-Identifier' + LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later' + SPDX_RE = re.compile(br'.*?(' + + re.escape(SPDX_HEADER_KEY) + + br')(:\s*(.*?)\W*\Z|.*)', re.I) + + def __init__(self): + super().__init__() + # Record what problem was caused. We can't easily report it due to + # the structure of the script. To be fixed after + # https://github.com/Mbed-TLS/mbedtls/pull/2506 + self.problem = None + + def issue_with_line(self, line, filepath, line_number): + # Use endswith() rather than the more correct os.path.basename() + # because experimentally, it makes a significant difference to + # the running time. + if filepath.endswith(THIS_FILE_BASE_NAME) and \ + line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER: + # Avoid false positives from the code in this class. + # Also skip the rest of this file, which is highly unlikely to + # contain any problematic statements since we put those near the + # top of files. + return False + + m = self.COPYRIGHT_RE.match(line) + if m and m.group(1) != self.COPYRIGHT_HOLDER: + self.problem = 'Invalid copyright line' + return True + + m = self.SPDX_RE.match(line) + if m: + if m.group(1) != self.SPDX_HEADER_KEY: + self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode() + return True + if not m.group(3): + self.problem = 'Improperly formatted SPDX license identifier' + return True + if m.group(3) != self.LICENSE_IDENTIFIER: + self.problem = 'Wrong SPDX license identifier' + return True + return False + + class IntegrityChecker: """Sanity-check files under the current directory.""" @@ -365,6 +444,7 @@ class IntegrityChecker: TrailingWhitespaceIssueTracker(), TabIssueTracker(), MergeArtifactIssueTracker(), + LicenseIssueTracker(), ] def setup_logger(self, log_file, level=logging.INFO):