mbedtls/tests/scripts/analyze_outcomes.py

#!/usr/bin/env python3

"""Analyze the test outcomes from a full CI run.

This script can also run on outcomes from a partial run, but the results are
less likely to be useful.
"""

import argparse
import sys
import traceback

import check_test_cases

class Results:
    """Process analysis results."""

    def __init__(self):
        self.error_count = 0
        self.warning_count = 0

    @staticmethod
    def log(fmt, *args, **kwargs):
        sys.stderr.write((fmt + '\n').format(*args, **kwargs))

    def error(self, fmt, *args, **kwargs):
        self.log('Error: ' + fmt, *args, **kwargs)
        self.error_count += 1

    def warning(self, fmt, *args, **kwargs):
        self.log('Warning: ' + fmt, *args, **kwargs)
        self.warning_count += 1

class TestCaseOutcomes:
    """The outcomes of one test case across many configurations."""
    # pylint: disable=too-few-public-methods

    def __init__(self):
        # Collect a list of witnesses of the test case succeeding or failing.
        # Currently we don't do anything with witnesses except count them.
        # The format of a witness is determined by the read_outcome_file
        # function; it's the platform and configuration joined by ';'.
        self.successes = []
        self.failures = []

    def hits(self):
        """Return the number of times a test case has been run.

        This includes passes and failures, but not skips.
        """
        return len(self.successes) + len(self.failures)

def analyze_coverage(results, outcomes):
    """Check that all available test cases are executed at least once."""
    available = check_test_cases.collect_available_test_cases()
    for key in available:
        hits = outcomes[key].hits() if key in outcomes else 0
        if hits == 0:
            # Make this a warning, not an error, as long as we haven't
            # fixed this branch to have full coverage of test cases.
            results.warning('Test case not executed: {}', key)

def analyze_driver_vs_reference(outcomes, components, ignored_tests):
    """Check that all tests executed in the reference component are also
    executed in the corresponding driver component.
    Skip test suits provided in ignored_tests list.
    """
    driver_component = components[0]
    reference_component = components[1]
    available = check_test_cases.collect_available_test_cases()
    result = True

    for key in available:
        # Skip ignored test suites
        test_suit = key.split(';')[0] # retrieve test suit name
        test_suit = test_suit.split('.')[0] # retrieve main part of test suit name
        if(test_suit in ignored_tests):
            continue
        # Continue if test was not executed by any component
        hits = outcomes[key].hits() if key in outcomes else 0
        if(hits == 0):
            continue
        # Search for tests that run in reference component and not in driver component
        driver_test_passed = False
        reference_test_passed = False
        for entry in outcomes[key].successes:
            if(driver_component in entry):
                driver_test_passed = True
            if(reference_component in entry):
                reference_test_passed = True
        #if(driver_test_passed == True and reference_test_passed == False):
        #    print('{}: driver: passed; reference: skipped'.format(key))
        if(driver_test_passed == False and reference_test_passed == True):
            print('{}: driver: skipped/failed; reference: passed'.format(key))
            result = False
    return result

def analyze_outcomes(outcomes):
    """Run all analyses on the given outcome collection."""
    results = Results()
    analyze_coverage(results, outcomes)
    return results

def read_outcome_file(outcome_file):
    """Parse an outcome file and return an outcome collection.

An outcome collection is a dictionary mapping keys to TestCaseOutcomes objects.
The keys are the test suite name and the test case description, separated
by a semicolon.
"""
    outcomes = {}
    with open(outcome_file, 'r', encoding='utf-8') as input_file:
        for line in input_file:
            (platform, config, suite, case, result, _cause) = line.split(';')
            key = ';'.join([suite, case])
            setup = ';'.join([platform, config])
            if key not in outcomes:
                outcomes[key] = TestCaseOutcomes()
            if result == 'PASS':
                outcomes[key].successes.append(setup)
            elif result == 'FAIL':
                outcomes[key].failures.append(setup)
    return outcomes

def do_analyze_coverage(outcome_file):
    """Perform coverage analyze."""
    outcomes = read_outcome_file(outcome_file)
    results = analyze_outcomes(outcomes)
    return (True if results.error_count == 0 else False)

def do_analyze_driver_vs_reference(outcome_file, components, ignored_tests):
    """Perform driver vs reference analyze."""
    # We need exactly 2 components to analyze (first driver and second reference)
    if(len(components) != 2 or "accel" not in components[0] or "reference" not in components[1]):
        print('Error: Wrong component list. Exactly 2 components are required (driver,reference). ')
        return False
    outcomes = read_outcome_file(outcome_file)
    return analyze_driver_vs_reference(outcomes, components, ignored_tests)

def main():
    try:
        parser = argparse.ArgumentParser(description=__doc__)
        parser.add_argument('--outcomes', metavar='OUTCOMES.CSV',
                            help='Outcome file to analyze')
        parser.add_argument('--task',
                            help='Analyze to be done: analyze_coverage or analyze_driver_vs_reference')
        parser.add_argument('--components',
                            help='List of test components to compare. Must be exactly 2 in valid order: driver,reference. '
                            'Apply only for analyze_driver_vs_reference task.')
        parser.add_argument('--ignore',
                            help='List of test suits to ignore. Apply only for analyze_driver_vs_reference task.')
        options = parser.parse_args()

        result = False

        if(options.task == 'analyze_coverage'):
            result = do_analyze_coverage(options.outcomes)
        elif(options.task == 'analyze_driver_vs_reference'):
            components_list = options.components.split(',')
            ignored_tests_list = options.ignore.split(',')
            ignored_tests_list = ['test_suite_' + x for x in ignored_tests_list]
            result = do_analyze_driver_vs_reference(options.outcomes, components_list, ignored_tests_list)
        else:
            print('Error: Unknown task: {}'.format(options.task))

        if(result == False):
            sys.exit(1)
        print("SUCCESS :-)")
    except Exception: # pylint: disable=broad-except
        # Print the backtrace and exit explicitly with our chosen status.
        traceback.print_exc()
        sys.exit(120)

if __name__ == '__main__':
    main()
New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`#!/usr/bin/env python3`

			`"""Analyze the test outcomes from a full CI run.`

			`This script can also run on outcomes from a partial run, but the results are`
			`less likely to be useful.`
			`"""`

			`import argparse`
			`import sys`
			`import traceback`

Check test case coverage Check that every available test case in the test suites and ssl-opt.sh has been executed at least once. For the time being, only report a warning, because our coverage is incomplete. Once we've updated all.sh to have full coverage, this warning should become an error. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:37:43 +02:00			`import check_test_cases`

New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`class Results:`
			`"""Process analysis results."""`

			`def __init__(self):`
			`self.error_count = 0`
			`self.warning_count = 0`

			`@staticmethod`
			`def log(fmt, args, *kwargs):`
			`sys.stderr.write((fmt + '\n').format(args, *kwargs))`

			`def error(self, fmt, args, *kwargs):`
			`self.log('Error: ' + fmt, args, *kwargs)`
			`self.error_count += 1`

			`def warning(self, fmt, args, *kwargs):`
			`self.log('Warning: ' + fmt, args, *kwargs)`
			`self.warning_count += 1`

			`class TestCaseOutcomes:`
			`"""The outcomes of one test case across many configurations."""`
			`# pylint: disable=too-few-public-methods`

			`def __init__(self):`
Document the fields of TestCasesOutcomes Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-26 13:02:30 +02:00			`# Collect a list of witnesses of the test case succeeding or failing.`
			`# Currently we don't do anything with witnesses except count them.`
			`# The format of a witness is determined by the read_outcome_file`
			`# function; it's the platform and configuration joined by ';'.`
New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`self.successes = []`
			`self.failures = []`

			`def hits(self):`
			`"""Return the number of times a test case has been run.`

			`This includes passes and failures, but not skips.`
			`"""`
			`return len(self.successes) + len(self.failures)`

Check test case coverage Check that every available test case in the test suites and ssl-opt.sh has been executed at least once. For the time being, only report a warning, because our coverage is incomplete. Once we've updated all.sh to have full coverage, this warning should become an error. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:37:43 +02:00			`def analyze_coverage(results, outcomes):`
			`"""Check that all available test cases are executed at least once."""`
Move collect_available_test_cases to check_test_cases.py No behavior change. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2022-01-07 15:58:38 +01:00			`available = check_test_cases.collect_available_test_cases()`
Check test case coverage Check that every available test case in the test suites and ssl-opt.sh has been executed at least once. For the time being, only report a warning, because our coverage is incomplete. Once we've updated all.sh to have full coverage, this warning should become an error. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:37:43 +02:00			`for key in available:`
			`hits = outcomes[key].hits() if key in outcomes else 0`
			`if hits == 0:`
			`# Make this a warning, not an error, as long as we haven't`
			`# fixed this branch to have full coverage of test cases.`
			`results.warning('Test case not executed: {}', key)`

analyze_outcomes.py: Add test coverage regresion analyze for driver only builds Signed-off-by: Przemek Stekiel <przemyslaw.stekiel@mobica.com> 2022-10-21 13:42:08 +02:00			`def analyze_driver_vs_reference(outcomes, components, ignored_tests):`
			`"""Check that all tests executed in the reference component are also`
			`executed in the corresponding driver component.`
			`Skip test suits provided in ignored_tests list.`
			`"""`
			`driver_component = components[0]`
			`reference_component = components[1]`
			`available = check_test_cases.collect_available_test_cases()`
			`result = True`

			`for key in available:`
			`# Skip ignored test suites`
			`test_suit = key.split(';')[0] # retrieve test suit name`
			`test_suit = test_suit.split('.')[0] # retrieve main part of test suit name`
			`if(test_suit in ignored_tests):`
			`continue`
			`# Continue if test was not executed by any component`
			`hits = outcomes[key].hits() if key in outcomes else 0`
			`if(hits == 0):`
			`continue`
			`# Search for tests that run in reference component and not in driver component`
			`driver_test_passed = False`
			`reference_test_passed = False`
			`for entry in outcomes[key].successes:`
			`if(driver_component in entry):`
			`driver_test_passed = True`
			`if(reference_component in entry):`
			`reference_test_passed = True`
			`#if(driver_test_passed == True and reference_test_passed == False):`
			`# print('{}: driver: passed; reference: skipped'.format(key))`
			`if(driver_test_passed == False and reference_test_passed == True):`
			`print('{}: driver: skipped/failed; reference: passed'.format(key))`
			`result = False`
			`return result`

New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`def analyze_outcomes(outcomes):`
			`"""Run all analyses on the given outcome collection."""`
			`results = Results()`
Check test case coverage Check that every available test case in the test suites and ssl-opt.sh has been executed at least once. For the time being, only report a warning, because our coverage is incomplete. Once we've updated all.sh to have full coverage, this warning should become an error. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:37:43 +02:00			`analyze_coverage(results, outcomes)`
New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`return results`

			`def read_outcome_file(outcome_file):`
			`"""Parse an outcome file and return an outcome collection.`

			`An outcome collection is a dictionary mapping keys to TestCaseOutcomes objects.`
			`The keys are the test suite name and the test case description, separated`
			`by a semicolon.`
			`"""`
			`outcomes = {}`
			`with open(outcome_file, 'r', encoding='utf-8') as input_file:`
			`for line in input_file:`
			`(platform, config, suite, case, result, _cause) = line.split(';')`
			`key = ';'.join([suite, case])`
			`setup = ';'.join([platform, config])`
			`if key not in outcomes:`
			`outcomes[key] = TestCaseOutcomes()`
			`if result == 'PASS':`
			`outcomes[key].successes.append(setup)`
			`elif result == 'FAIL':`
			`outcomes[key].failures.append(setup)`
			`return outcomes`

analyze_outcomes.py: Add test coverage regresion analyze for driver only builds Signed-off-by: Przemek Stekiel <przemyslaw.stekiel@mobica.com> 2022-10-21 13:42:08 +02:00			`def do_analyze_coverage(outcome_file):`
			`"""Perform coverage analyze."""`
New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`outcomes = read_outcome_file(outcome_file)`
analyze_outcomes.py: Add test coverage regresion analyze for driver only builds Signed-off-by: Przemek Stekiel <przemyslaw.stekiel@mobica.com> 2022-10-21 13:42:08 +02:00			`results = analyze_outcomes(outcomes)`
			`return (True if results.error_count == 0 else False)`

			`def do_analyze_driver_vs_reference(outcome_file, components, ignored_tests):`
			`"""Perform driver vs reference analyze."""`
			`# We need exactly 2 components to analyze (first driver and second reference)`
			`if(len(components) != 2 or "accel" not in components[0] or "reference" not in components[1]):`
			`print('Error: Wrong component list. Exactly 2 components are required (driver,reference). ')`
			`return False`
			`outcomes = read_outcome_file(outcome_file)`
			`return analyze_driver_vs_reference(outcomes, components, ignored_tests)`
New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00
			`def main():`
			`try:`
			`parser = argparse.ArgumentParser(description=__doc__)`
analyze_outcomes.py: Add test coverage regresion analyze for driver only builds Signed-off-by: Przemek Stekiel <przemyslaw.stekiel@mobica.com> 2022-10-21 13:42:08 +02:00			`parser.add_argument('--outcomes', metavar='OUTCOMES.CSV',`
New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`help='Outcome file to analyze')`
analyze_outcomes.py: Add test coverage regresion analyze for driver only builds Signed-off-by: Przemek Stekiel <przemyslaw.stekiel@mobica.com> 2022-10-21 13:42:08 +02:00			`parser.add_argument('--task',`
			`help='Analyze to be done: analyze_coverage or analyze_driver_vs_reference')`
			`parser.add_argument('--components',`
			`help='List of test components to compare. Must be exactly 2 in valid order: driver,reference. '`
			`'Apply only for analyze_driver_vs_reference task.')`
			`parser.add_argument('--ignore',`
			`help='List of test suits to ignore. Apply only for analyze_driver_vs_reference task.')`
New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`options = parser.parse_args()`
analyze_outcomes.py: Add test coverage regresion analyze for driver only builds Signed-off-by: Przemek Stekiel <przemyslaw.stekiel@mobica.com> 2022-10-21 13:42:08 +02:00
			`result = False`

			`if(options.task == 'analyze_coverage'):`
			`result = do_analyze_coverage(options.outcomes)`
			`elif(options.task == 'analyze_driver_vs_reference'):`
			`components_list = options.components.split(',')`
			`ignored_tests_list = options.ignore.split(',')`
			`ignored_tests_list = ['test_suite_' + x for x in ignored_tests_list]`
			`result = do_analyze_driver_vs_reference(options.outcomes, components_list, ignored_tests_list)`
			`else:`
			`print('Error: Unknown task: {}'.format(options.task))`

			`if(result == False):`
New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`sys.exit(1)`
analyze_outcomes.py: Add test coverage regresion analyze for driver only builds Signed-off-by: Przemek Stekiel <przemyslaw.stekiel@mobica.com> 2022-10-21 13:42:08 +02:00			`print("SUCCESS :-)")`
New script for test outcome analysis This is a new script designed to analyze test outcomes collected during a whole CI run. This commit introduces the script, the code to read the outcome file, and a very simple framework to report errors. It does not perform any actual analysis yet. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com> 2020-06-25 18:36:28 +02:00			`except Exception: # pylint: disable=broad-except`
			`# Print the backtrace and exit explicitly with our chosen status.`
			`traceback.print_exc()`
			`sys.exit(120)`

			`if __name__ == '__main__':`
			`main()`