Check copyright statements and SPDX license identifier

Enforce a specific copyright statement and a specific SPDX license
identifier where they are present.

Binary files, third-party modules and a few other exceptions are not
checked.

There is currently no check that copyright statements and license
identifiers are present.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
This commit is contained in:
Gilles Peskine 2023-11-03 14:13:55 +01:00
parent 990030bce0
commit f2fb9f667c

View file

@ -12,6 +12,7 @@ Note: requires python 3, must be run from Mbed TLS root.
import argparse
import codecs
import inspect
import logging
import os
import re
@ -345,6 +346,84 @@ class MergeArtifactIssueTracker(LineIssueTracker):
return False
THIS_FILE_BASE_NAME = \
os.path.basename(inspect.getframeinfo(inspect.currentframe()).filename)
LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = \
inspect.getframeinfo(inspect.currentframe()).lineno
class LicenseIssueTracker(LineIssueTracker):
"""Check copyright statements and license indications.
This class only checks that statements are correct if present. It does
not enforce the presence of statements in each file.
"""
heading = "License issue:"
LICENSE_EXEMPTION_RE_LIST = [
# Third-party code, other than whitelisted third-party modules,
# may be under a different license.
r'3rdparty/(?!(p256-m)/.*)',
# Documentation explaining the license may have accidental
# false positives.
r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
# Files imported from TF-M, and not used except in test builds,
# may be under a different license.
r'configs/crypto_config_profile_medium\.h\Z',
r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z',
# Third-party file.
r'dco\.txt\Z',
]
path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
LICENSE_EXEMPTION_RE_LIST))
COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
# Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
SPDX_HEADER_KEY = b'SPDX-License-Identifier'
LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
SPDX_RE = re.compile(br'.*?(' +
re.escape(SPDX_HEADER_KEY) +
br')(:\s*(.*?)\W*\Z|.*)', re.I)
def __init__(self):
super().__init__()
# Record what problem was caused. We can't easily report it due to
# the structure of the script. To be fixed after
# https://github.com/Mbed-TLS/mbedtls/pull/2506
self.problem = None
def issue_with_line(self, line, filepath, line_number):
# Use endswith() rather than the more correct os.path.basename()
# because experimentally, it makes a significant difference to
# the running time.
if filepath.endswith(THIS_FILE_BASE_NAME) and \
line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
# Avoid false positives from the code in this class.
# Also skip the rest of this file, which is highly unlikely to
# contain any problematic statements since we put those near the
# top of files.
return False
m = self.COPYRIGHT_RE.match(line)
if m and m.group(1) != self.COPYRIGHT_HOLDER:
self.problem = 'Invalid copyright line'
return True
m = self.SPDX_RE.match(line)
if m:
if m.group(1) != self.SPDX_HEADER_KEY:
self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
return True
if not m.group(3):
self.problem = 'Improperly formatted SPDX license identifier'
return True
if m.group(3) != self.LICENSE_IDENTIFIER:
self.problem = 'Wrong SPDX license identifier'
return True
return False
class IntegrityChecker:
"""Sanity-check files under the current directory."""
@ -365,6 +444,7 @@ class IntegrityChecker:
TrailingWhitespaceIssueTracker(),
TabIssueTracker(),
MergeArtifactIssueTracker(),
LicenseIssueTracker(),
]
def setup_logger(self, log_file, level=logging.INFO):