Merge pull request #3458 from gilles-peskine-arm/analyze_outcomes-count_test_cases-1

Test outcome analysis: check that all available test cases have been executed
2020-07-03 15:12:44 +02:00 · 2020-07-03 15:12:44 +02:00 · 2426506fa0
commit 2426506fa0
parent 74ff13cfae bbb3664957
7 changed files with 334 additions and 148 deletions
--- a/docs/architecture/testing/test-framework.md
+++ b/docs/architecture/testing/test-framework.md
@ -22,7 +22,7 @@ Each test case has a description which succinctly describes for a human audience
 * Make the description descriptive. “foo: x=2, y=4” is more descriptive than “foo #2”. “foo: 0<x<y, both even” is even better if these inequalities and parities are why this particular test data was chosen.
 * Avoid changing the description of an existing test case without a good reason. This breaks the tracking of failures across CI runs, since this tracking is based on the descriptions.
-`tests/scripts/check-test-cases.py` enforces some rules and warns if some guidelines are violated.
+`tests/scripts/check_test_cases.py` enforces some rules and warns if some guidelines are violated.
 ## TLS tests
@ -32,7 +32,7 @@ Each test case has a description which succinctly describes for a human audience
 Each test case in `ssl-opt.sh` has a description which succinctly describes for a human audience what the test does. The test description is the first parameter to `run_tests`.
-The same rules and guidelines apply as for [unit test descriptions](#unit-test-descriptions). In addition, the description must be written on the same line as `run_test`, in double quotes, for the sake of `check-test-cases.py`.
+The same rules and guidelines apply as for [unit test descriptions](#unit-test-descriptions). In addition, the description must be written on the same line as `run_test`, in double quotes, for the sake of `check_test_cases.py`.
 ## Running tests
--- a/tests/scripts/all.sh
+++ b/tests/scripts/all.sh
@ -680,7 +680,7 @@ component_check_doxy_blocks () {
 component_check_files () {
    msg "Check: file sanity checks (permissions, encodings)" # < 1s
-    record_status tests/scripts/check-files.py
+    record_status tests/scripts/check_files.py
 }
 component_check_changelog () {
@ -707,7 +707,7 @@ component_check_test_cases () {
    else
        opt=''
    fi
-    record_status tests/scripts/check-test-cases.py $opt
+    record_status tests/scripts/check_test_cases.py $opt
    unset opt
 }
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@ -0,0 +1,131 @@
 #!/usr/bin/env python3
 """Analyze the test outcomes from a full CI run.
 This script can also run on outcomes from a partial run, but the results are
 less likely to be useful.
 """
 import argparse
 import re
 import sys
 import traceback
 import check_test_cases
 class Results:
    """Process analysis results."""
    def __init__(self):
        self.error_count = 0
        self.warning_count = 0
    @staticmethod
    def log(fmt, *args, **kwargs):
        sys.stderr.write((fmt + '\n').format(*args, **kwargs))
    def error(self, fmt, *args, **kwargs):
        self.log('Error: ' + fmt, *args, **kwargs)
        self.error_count += 1
    def warning(self, fmt, *args, **kwargs):
        self.log('Warning: ' + fmt, *args, **kwargs)
        self.warning_count += 1
 class TestCaseOutcomes:
    """The outcomes of one test case across many configurations."""
    # pylint: disable=too-few-public-methods
    def __init__(self):
        # Collect a list of witnesses of the test case succeeding or failing.
        # Currently we don't do anything with witnesses except count them.
        # The format of a witness is determined by the read_outcome_file
        # function; it's the platform and configuration joined by ';'.
        self.successes = []
        self.failures = []
    def hits(self):
        """Return the number of times a test case has been run.
        This includes passes and failures, but not skips.
        """
        return len(self.successes) + len(self.failures)
 class TestDescriptions(check_test_cases.TestDescriptionExplorer):
    """Collect the available test cases."""
    def __init__(self):
        super().__init__()
        self.descriptions = set()
    def process_test_case(self, _per_file_state,
                          file_name, _line_number, description):
        """Record an available test case."""
        base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name))
        key = ';'.join([base_name, description.decode('utf-8')])
        self.descriptions.add(key)
 def collect_available_test_cases():
    """Collect the available test cases."""
    explorer = TestDescriptions()
    explorer.walk_all()
    return sorted(explorer.descriptions)
 def analyze_coverage(results, outcomes):
    """Check that all available test cases are executed at least once."""
    available = collect_available_test_cases()
    for key in available:
        hits = outcomes[key].hits() if key in outcomes else 0
        if hits == 0:
            # Make this a warning, not an error, as long as we haven't
            # fixed this branch to have full coverage of test cases.
            results.warning('Test case not executed: {}', key)
 def analyze_outcomes(outcomes):
    """Run all analyses on the given outcome collection."""
    results = Results()
    analyze_coverage(results, outcomes)
    return results
 def read_outcome_file(outcome_file):
    """Parse an outcome file and return an outcome collection.
 An outcome collection is a dictionary mapping keys to TestCaseOutcomes objects.
 The keys are the test suite name and the test case description, separated
 by a semicolon.
 """
    outcomes = {}
    with open(outcome_file, 'r', encoding='utf-8') as input_file:
        for line in input_file:
            (platform, config, suite, case, result, _cause) = line.split(';')
            key = ';'.join([suite, case])
            setup = ';'.join([platform, config])
            if key not in outcomes:
                outcomes[key] = TestCaseOutcomes()
            if result == 'PASS':
                outcomes[key].successes.append(setup)
            elif result == 'FAIL':
                outcomes[key].failures.append(setup)
    return outcomes
 def analyze_outcome_file(outcome_file):
    """Analyze the given outcome file."""
    outcomes = read_outcome_file(outcome_file)
    return analyze_outcomes(outcomes)
 def main():
    try:
        parser = argparse.ArgumentParser(description=__doc__)
        parser.add_argument('outcomes', metavar='OUTCOMES.CSV',
                            help='Outcome file to analyze')
        options = parser.parse_args()
        results = analyze_outcome_file(options.outcomes)
        if results.error_count > 0:
            sys.exit(1)
    except Exception: # pylint: disable=broad-except
        # Print the backtrace and exit explicitly with our chosen status.
        traceback.print_exc()
        sys.exit(120)
 if __name__ == '__main__':
    main()
--- a/tests/scripts/basic-in-docker.sh
+++ b/tests/scripts/basic-in-docker.sh
@ -4,8 +4,10 @@
 #
 # Purpose
 # -------
-# This runs a rough equivalent of the travis.yml in a Docker container.
+# This runs sanity checks and library tests in a Docker container. The tests
-# The tests are run for both clang and gcc.
+# are run for both clang and gcc. The testing includes a full test run
 # in the default configuration, partial test runs in the reference
 # configurations, and some dependency tests.
 #
 # Notes for users
 # ---------------
@ -30,12 +32,7 @@
 source tests/scripts/docker_env.sh
-run_in_docker tests/scripts/recursion.pl library/*.c
+run_in_docker tests/scripts/all.sh 'check_*'
 run_in_docker tests/scripts/check-generated-files.sh
 run_in_docker tests/scripts/check-doxy-blocks.pl
 run_in_docker tests/scripts/check-names.sh
 run_in_docker tests/scripts/check-files.py
 run_in_docker tests/scripts/doxygen.sh
 for compiler in clang gcc; do
    run_in_docker -e CC=${compiler} cmake -D CMAKE_BUILD_TYPE:String="Check" .
--- a/tests/scripts/check-test-cases.py
+++ b/tests/scripts/check-test-cases.py
@ -1,136 +0,0 @@
 #!/usr/bin/env python3
 """Sanity checks for test data.
 """
 # Copyright (C) 2019, Arm Limited, All Rights Reserved
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # This file is part of Mbed TLS (https://tls.mbed.org)
 import argparse
 import glob
 import os
 import re
 import sys
 class Results:
    """Store file and line information about errors or warnings in test suites."""
    def __init__(self, options):
        self.errors = 0
        self.warnings = 0
        self.ignore_warnings = options.quiet
    def error(self, file_name, line_number, fmt, *args):
        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
                         format(file_name, line_number, *args))
        self.errors += 1
    def warning(self, file_name, line_number, fmt, *args):
        if not self.ignore_warnings:
            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
                             .format(file_name, line_number, *args))
            self.warnings += 1
 def collect_test_directories():
    """Get the relative path for the TLS and Crypto test directories."""
    if os.path.isdir('tests'):
        tests_dir = 'tests'
    elif os.path.isdir('suites'):
        tests_dir = '.'
    elif os.path.isdir('../suites'):
        tests_dir = '..'
    directories = [tests_dir]
    return directories
 def check_description(results, seen, file_name, line_number, description):
    """Check test case descriptions for errors."""
    if description in seen:
        results.error(file_name, line_number,
                      'Duplicate description (also line {})',
                      seen[description])
        return
    if re.search(br'[\t;]', description):
        results.error(file_name, line_number,
                      'Forbidden character \'{}\' in description',
                      re.search(br'[\t;]', description).group(0).decode('ascii'))
    if re.search(br'[^ -~]', description):
        results.error(file_name, line_number,
                      'Non-ASCII character in description')
    if len(description) > 66:
        results.warning(file_name, line_number,
                        'Test description too long ({} > 66)',
                        len(description))
    seen[description] = line_number
 def check_test_suite(results, data_file_name):
    """Check the test cases in the given unit test data file."""
    in_paragraph = False
    descriptions = {}
    with open(data_file_name, 'rb') as data_file:
        for line_number, line in enumerate(data_file, 1):
            line = line.rstrip(b'\r\n')
            if not line:
                in_paragraph = False
                continue
            if line.startswith(b'#'):
                continue
            if not in_paragraph:
                # This is a test case description line.
                check_description(results, descriptions,
                                  data_file_name, line_number, line)
            in_paragraph = True
 def check_ssl_opt_sh(results, file_name):
    """Check the test cases in ssl-opt.sh or a file with a similar format."""
    descriptions = {}
    with open(file_name, 'rb') as file_contents:
        for line_number, line in enumerate(file_contents, 1):
            # Assume that all run_test calls have the same simple form
            # with the test description entirely on the same line as the
            # function name.
            m = re.match(br'\s*run_test\s+"((?:[^\\"]|\\.)*)"', line)
            if not m:
                continue
            description = m.group(1)
            check_description(results, descriptions,
                              file_name, line_number, description)
 def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('--quiet', '-q',
                        action='store_true',
                        help='Hide warnings')
    parser.add_argument('--verbose', '-v',
                        action='store_false', dest='quiet',
                        help='Show warnings (default: on; undoes --quiet)')
    options = parser.parse_args()
    test_directories = collect_test_directories()
    results = Results(options)
    for directory in test_directories:
        for data_file_name in glob.glob(os.path.join(directory, 'suites',
                                                     '*.data')):
            check_test_suite(results, data_file_name)
        ssl_opt_sh = os.path.join(directory, 'ssl-opt.sh')
        if os.path.exists(ssl_opt_sh):
            check_ssl_opt_sh(results, ssl_opt_sh)
    if (results.warnings or results.errors) and not options.quiet:
        sys.stderr.write('{}: {} errors, {} warnings\n'
                         .format(sys.argv[0], results.errors, results.warnings))
    sys.exit(1 if results.errors else 0)
 if __name__ == '__main__':
    main()
--- a/tests/scripts/check_files.py
+++ b/tests/scripts/check_files.py
--- a/tests/scripts/check_test_cases.py
+++ b/tests/scripts/check_test_cases.py
@ -0,0 +1,194 @@
 #!/usr/bin/env python3
 """Sanity checks for test data.
 This program contains a class for traversing test cases that can be used
 independently of the checks.
 """
 # Copyright (C) 2019, Arm Limited, All Rights Reserved
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # This file is part of Mbed TLS (https://tls.mbed.org)
 import argparse
 import glob
 import os
 import re
 import sys
 class Results:
    """Store file and line information about errors or warnings in test suites."""
    def __init__(self, options):
        self.errors = 0
        self.warnings = 0
        self.ignore_warnings = options.quiet
    def error(self, file_name, line_number, fmt, *args):
        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
                         format(file_name, line_number, *args))
        self.errors += 1
    def warning(self, file_name, line_number, fmt, *args):
        if not self.ignore_warnings:
            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
                             .format(file_name, line_number, *args))
            self.warnings += 1
 class TestDescriptionExplorer:
    """An iterator over test cases with descriptions.
 The test cases that have descriptions are:
 * Individual unit tests (entries in a .data file) in test suites.
 * Individual test cases in ssl-opt.sh.
 This is an abstract class. To use it, derive a class that implements
 the process_test_case method, and call walk_all().
 """
    def process_test_case(self, per_file_state,
                          file_name, line_number, description):
        """Process a test case.
 per_file_state: an object created by new_per_file_state() at the beginning
                of each file.
 file_name: a relative path to the file containing the test case.
 line_number: the line number in the given file.
 description: the test case description as a byte string.
 """
        raise NotImplementedError
    def new_per_file_state(self):
        """Return a new per-file state object.
 The default per-file state object is None. Child classes that require per-file
 state may override this method.
 """
        #pylint: disable=no-self-use
        return None
    def walk_test_suite(self, data_file_name):
        """Iterate over the test cases in the given unit test data file."""
        in_paragraph = False
        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
        with open(data_file_name, 'rb') as data_file:
            for line_number, line in enumerate(data_file, 1):
                line = line.rstrip(b'\r\n')
                if not line:
                    in_paragraph = False
                    continue
                if line.startswith(b'#'):
                    continue
                if not in_paragraph:
                    # This is a test case description line.
                    self.process_test_case(descriptions,
                                           data_file_name, line_number, line)
                in_paragraph = True
    def walk_ssl_opt_sh(self, file_name):
        """Iterate over the test cases in ssl-opt.sh or a file with a similar format."""
        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
        with open(file_name, 'rb') as file_contents:
            for line_number, line in enumerate(file_contents, 1):
                # Assume that all run_test calls have the same simple form
                # with the test description entirely on the same line as the
                # function name.
                m = re.match(br'\s*run_test\s+"((?:[^\\"]|\\.)*)"', line)
                if not m:
                    continue
                description = m.group(1)
                self.process_test_case(descriptions,
                                       file_name, line_number, description)
    @staticmethod
    def collect_test_directories():
        """Get the relative path for the TLS and Crypto test directories."""
        if os.path.isdir('tests'):
            tests_dir = 'tests'
        elif os.path.isdir('suites'):
            tests_dir = '.'
        elif os.path.isdir('../suites'):
            tests_dir = '..'
        directories = [tests_dir]
        return directories
    def walk_all(self):
        """Iterate over all named test cases."""
        test_directories = self.collect_test_directories()
        for directory in test_directories:
            for data_file_name in glob.glob(os.path.join(directory, 'suites',
                                                         '*.data')):
                self.walk_test_suite(data_file_name)
            ssl_opt_sh = os.path.join(directory, 'ssl-opt.sh')
            if os.path.exists(ssl_opt_sh):
                self.walk_ssl_opt_sh(ssl_opt_sh)
 class DescriptionChecker(TestDescriptionExplorer):
    """Check all test case descriptions.
 * Check that each description is valid (length, allowed character set, etc.).
 * Check that there is no duplicated description inside of one test suite.
 """
    def __init__(self, results):
        self.results = results
    def new_per_file_state(self):
        """Dictionary mapping descriptions to their line number."""
        return {}
    def process_test_case(self, per_file_state,
                          file_name, line_number, description):
        """Check test case descriptions for errors."""
        results = self.results
        seen = per_file_state
        if description in seen:
            results.error(file_name, line_number,
                          'Duplicate description (also line {})',
                          seen[description])
            return
        if re.search(br'[\t;]', description):
            results.error(file_name, line_number,
                          'Forbidden character \'{}\' in description',
                          re.search(br'[\t;]', description).group(0).decode('ascii'))
        if re.search(br'[^ -~]', description):
            results.error(file_name, line_number,
                          'Non-ASCII character in description')
        if len(description) > 66:
            results.warning(file_name, line_number,
                            'Test description too long ({} > 66)',
                            len(description))
        seen[description] = line_number
 def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('--quiet', '-q',
                        action='store_true',
                        help='Hide warnings')
    parser.add_argument('--verbose', '-v',
                        action='store_false', dest='quiet',
                        help='Show warnings (default: on; undoes --quiet)')
    options = parser.parse_args()
    results = Results(options)
    checker = DescriptionChecker(results)
    checker.walk_all()
    if (results.warnings or results.errors) and not options.quiet:
        sys.stderr.write('{}: {} errors, {} warnings\n'
                         .format(sys.argv[0], results.errors, results.warnings))
    sys.exit(1 if results.errors else 0)
 if __name__ == '__main__':
    main()