42f384c186
Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
482 lines
19 KiB
Python
Executable file
482 lines
19 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
"""Assemble Mbed TLS change log entries into the change log file.
|
|
|
|
Add changelog entries to the first level-2 section.
|
|
Create a new level-2 section for unreleased changes if needed.
|
|
Remove the input files unless --keep-entries is specified.
|
|
|
|
In each level-3 section, entries are sorted in chronological order
|
|
(oldest first). From oldest to newest:
|
|
* Merged entry files are sorted according to their merge date (date of
|
|
the merge commit that brought the commit that created the file into
|
|
the target branch).
|
|
* Committed but unmerged entry files are sorted according to the date
|
|
of the commit that adds them.
|
|
* Uncommitted entry files are sorted according to their modification time.
|
|
|
|
You must run this program from within a git working directory.
|
|
"""
|
|
|
|
# Copyright (C) 2019, Arm Limited, All Rights Reserved
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
# This file is part of Mbed TLS (https://tls.mbed.org)
|
|
|
|
import argparse
|
|
from collections import OrderedDict
|
|
import datetime
|
|
import functools
|
|
import glob
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
|
|
class InputFormatError(Exception):
|
|
def __init__(self, filename, line_number, message, *args, **kwargs):
|
|
message = '{}:{}: {}'.format(filename, line_number,
|
|
message.format(*args, **kwargs))
|
|
super().__init__(message)
|
|
|
|
class LostContent(Exception):
|
|
def __init__(self, filename, line):
|
|
message = ('Lost content from {}: "{}"'.format(filename, line))
|
|
super().__init__(message)
|
|
|
|
STANDARD_SECTIONS = (
|
|
b'Interface changes',
|
|
b'Default behavior changes',
|
|
b'Requirement changes',
|
|
b'New deprecations',
|
|
b'Removals',
|
|
b'New features',
|
|
b'Security',
|
|
b'Bug fixes',
|
|
b'Performance improvements',
|
|
b'Other changes',
|
|
)
|
|
|
|
class ChangeLog:
|
|
"""An Mbed TLS changelog.
|
|
|
|
A changelog is a file in Markdown format. Each level 2 section title
|
|
starts a version, and versions are sorted in reverse chronological
|
|
order. Lines with a level 2 section title must start with '##'.
|
|
|
|
Within a version, there are multiple sections, each devoted to a kind
|
|
of change: bug fix, feature request, etc. Section titles should match
|
|
entries in STANDARD_SECTIONS exactly.
|
|
|
|
Within each section, each separate change should be on a line starting
|
|
with a '*' bullet. There may be blank lines surrounding titles, but
|
|
there should not be any blank line inside a section.
|
|
"""
|
|
|
|
_title_re = re.compile(br'#*')
|
|
def title_level(self, line):
|
|
"""Determine whether the line is a title.
|
|
|
|
Return (level, content) where level is the Markdown section level
|
|
(1 for '#', 2 for '##', etc.) and content is the section title
|
|
without leading or trailing whitespace. For a non-title line,
|
|
the level is 0.
|
|
"""
|
|
level = re.match(self._title_re, line).end()
|
|
return level, line[level:].strip()
|
|
|
|
# Only accept dotted version numbers (e.g. "3.1", not "3").
|
|
# Refuse ".x" in a version number where x is a letter: this indicates
|
|
# a version that is not yet released. Something like "3.1a" is accepted.
|
|
_version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
|
|
_incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
|
|
|
|
def section_is_released_version(self, title):
|
|
"""Whether this section is for a released version.
|
|
|
|
True if the given level-2 section title indicates that this section
|
|
contains released changes, otherwise False.
|
|
"""
|
|
# Assume that a released version has a numerical version number
|
|
# that follows a particular pattern. These criteria may be revised
|
|
# as needed in future versions of this script.
|
|
version_number = re.search(self._version_number_re, title)
|
|
if version_number:
|
|
return not re.search(self._incomplete_version_number_re,
|
|
version_number.group(0))
|
|
else:
|
|
return False
|
|
|
|
def unreleased_version_title(self):
|
|
"""The title to use if creating a new section for an unreleased version."""
|
|
# pylint: disable=no-self-use; this method may be overridden
|
|
return b'Unreleased changes'
|
|
|
|
def __init__(self, input_stream):
|
|
"""Create a changelog object.
|
|
|
|
Populate the changelog object from the content of the file
|
|
input_stream. This is typically a file opened for reading, but
|
|
can be any generator returning the lines to read.
|
|
"""
|
|
# Content before the level-2 section where the new entries are to be
|
|
# added.
|
|
self.header = []
|
|
# Content of the level-3 sections of where the new entries are to
|
|
# be added.
|
|
self.section_content = OrderedDict()
|
|
for section in STANDARD_SECTIONS:
|
|
self.section_content[section] = []
|
|
# Content of level-2 sections for already-released versions.
|
|
self.trailer = []
|
|
self.read_main_file(input_stream)
|
|
|
|
def read_main_file(self, input_stream):
|
|
"""Populate the changelog object from the content of the file.
|
|
|
|
This method is only intended to be called as part of the constructor
|
|
of the class and may not act sensibly on an object that is already
|
|
partially populated.
|
|
"""
|
|
# Parse the first level-2 section, containing changelog entries
|
|
# for unreleased changes.
|
|
# If we'll be expanding this section, everything before the first
|
|
# level-3 section title ("###...") following the first level-2
|
|
# section title ("##...") is passed through as the header
|
|
# and everything after the second level-2 section title is passed
|
|
# through as the trailer. Inside the first level-2 section,
|
|
# split out the level-3 sections.
|
|
# If we'll be creating a new version, the header is everything
|
|
# before the point where we want to add the level-2 section
|
|
# for this version, and the trailer is what follows.
|
|
level_2_seen = 0
|
|
current_section = None
|
|
for line in input_stream:
|
|
level, content = self.title_level(line)
|
|
if level == 2:
|
|
level_2_seen += 1
|
|
if level_2_seen == 1:
|
|
if self.section_is_released_version(content):
|
|
self.header.append(b'## ' +
|
|
self.unreleased_version_title() +
|
|
b'\n\n')
|
|
level_2_seen = 2
|
|
elif level == 3 and level_2_seen == 1:
|
|
current_section = content
|
|
self.section_content.setdefault(content, [])
|
|
if level_2_seen == 1 and current_section is not None:
|
|
if level != 3 and line.strip():
|
|
self.section_content[current_section].append(line)
|
|
elif level_2_seen <= 1:
|
|
self.header.append(line)
|
|
else:
|
|
self.trailer.append(line)
|
|
|
|
def add_file(self, input_stream):
|
|
"""Add changelog entries from a file.
|
|
|
|
Read lines from input_stream, which is typically a file opened
|
|
for reading. These lines must contain a series of level 3
|
|
Markdown sections with recognized titles. The corresponding
|
|
content is injected into the respective sections in the changelog.
|
|
The section titles must be either one of the hard-coded values
|
|
in STANDARD_SECTIONS in assemble_changelog.py or already present
|
|
in ChangeLog.md. Section titles must match byte-for-byte except that
|
|
leading or trailing whitespace is ignored.
|
|
"""
|
|
filename = input_stream.name
|
|
current_section = None
|
|
for line_number, line in enumerate(input_stream, 1):
|
|
if not line.strip():
|
|
continue
|
|
level, content = self.title_level(line)
|
|
if level == 3:
|
|
current_section = content
|
|
if current_section not in self.section_content:
|
|
raise InputFormatError(filename, line_number,
|
|
'Section {} is not recognized',
|
|
str(current_section)[1:])
|
|
elif level == 0:
|
|
if current_section is None:
|
|
raise InputFormatError(filename, line_number,
|
|
'Missing section title at the beginning of the file')
|
|
self.section_content[current_section].append(line)
|
|
else:
|
|
raise InputFormatError(filename, line_number,
|
|
'Only level 3 headers (###) are permitted')
|
|
|
|
def write(self, filename):
|
|
"""Write the changelog to the specified file.
|
|
"""
|
|
with open(filename, 'wb') as out:
|
|
for line in self.header:
|
|
out.write(line)
|
|
for section, lines in self.section_content.items():
|
|
if not lines:
|
|
continue
|
|
out.write(b'### ' + section + b'\n\n')
|
|
for line in lines:
|
|
out.write(line)
|
|
out.write(b'\n')
|
|
for line in self.trailer:
|
|
out.write(line)
|
|
|
|
|
|
@functools.total_ordering
|
|
class EntryFileSortKey:
|
|
"""This classes defines an ordering on changelog entry files: older < newer.
|
|
|
|
* Merged entry files are sorted according to their merge date (date of
|
|
the merge commit that brought the commit that created the file into
|
|
the target branch).
|
|
* Committed but unmerged entry files are sorted according to the date
|
|
of the commit that adds them.
|
|
* Uncommitted entry files are sorted according to their modification time.
|
|
|
|
This class assumes that the file is in a git working directory with
|
|
the target branch checked out.
|
|
"""
|
|
|
|
# Categories of files. A lower number is considered older.
|
|
MERGED = 0
|
|
COMMITTED = 1
|
|
LOCAL = 2
|
|
|
|
@staticmethod
|
|
def creation_hash(filename):
|
|
"""Return the git commit id at which the given file was created.
|
|
|
|
Return None if the file was never checked into git.
|
|
"""
|
|
hashes = subprocess.check_output(['git', 'log', '--format=%H',
|
|
'--follow',
|
|
'--', filename])
|
|
m = re.search(b'(.+)$', hashes)
|
|
if not m:
|
|
# The git output is empty. This means that the file was
|
|
# never checked in.
|
|
return None
|
|
# The last commit in the log is the oldest one, which is when the
|
|
# file was created.
|
|
return m.group(0)
|
|
|
|
@staticmethod
|
|
def list_merges(some_hash, target, *options):
|
|
"""List merge commits from some_hash to target.
|
|
|
|
Pass options to git to select which commits are included.
|
|
"""
|
|
text = subprocess.check_output(['git', 'rev-list',
|
|
'--merges', *options,
|
|
b'..'.join([some_hash, target])])
|
|
return text.rstrip(b'\n').split(b'\n')
|
|
|
|
@classmethod
|
|
def merge_hash(cls, some_hash):
|
|
"""Return the git commit id at which the given commit was merged.
|
|
|
|
Return None if the given commit was never merged.
|
|
"""
|
|
target = b'HEAD'
|
|
# List the merges from some_hash to the target in two ways.
|
|
# The ancestry list is the ones that are both descendants of
|
|
# some_hash and ancestors of the target.
|
|
ancestry = frozenset(cls.list_merges(some_hash, target,
|
|
'--ancestry-path'))
|
|
# The first_parents list only contains merges that are directly
|
|
# on the target branch. We want it in reverse order (oldest first).
|
|
first_parents = cls.list_merges(some_hash, target,
|
|
'--first-parent', '--reverse')
|
|
# Look for the oldest merge commit that's both on the direct path
|
|
# and directly on the target branch. That's the place where some_hash
|
|
# was merged on the target branch. See
|
|
# https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
|
|
for commit in first_parents:
|
|
if commit in ancestry:
|
|
return commit
|
|
return None
|
|
|
|
@staticmethod
|
|
def commit_timestamp(commit_id):
|
|
"""Return the timestamp of the given commit."""
|
|
text = subprocess.check_output(['git', 'show', '-s',
|
|
'--format=%ct',
|
|
commit_id])
|
|
return datetime.datetime.utcfromtimestamp(int(text))
|
|
|
|
@staticmethod
|
|
def file_timestamp(filename):
|
|
"""Return the modification timestamp of the given file."""
|
|
mtime = os.stat(filename).st_mtime
|
|
return datetime.datetime.fromtimestamp(mtime)
|
|
|
|
def __init__(self, filename):
|
|
"""Determine position of the file in the changelog entry order.
|
|
|
|
This constructor returns an object that can be used with comparison
|
|
operators, with `sort` and `sorted`, etc. Older entries are sorted
|
|
before newer entries.
|
|
"""
|
|
self.filename = filename
|
|
creation_hash = self.creation_hash(filename)
|
|
if not creation_hash:
|
|
self.category = self.LOCAL
|
|
self.datetime = self.file_timestamp(filename)
|
|
return
|
|
merge_hash = self.merge_hash(creation_hash)
|
|
if not merge_hash:
|
|
self.category = self.COMMITTED
|
|
self.datetime = self.commit_timestamp(creation_hash)
|
|
return
|
|
self.category = self.MERGED
|
|
self.datetime = self.commit_timestamp(merge_hash)
|
|
|
|
def sort_key(self):
|
|
""""Return a concrete sort key for this entry file sort key object.
|
|
|
|
``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
|
|
"""
|
|
return (self.category, self.datetime, self.filename)
|
|
|
|
def __eq__(self, other):
|
|
return self.sort_key() == other.sort_key()
|
|
|
|
def __lt__(self, other):
|
|
return self.sort_key() < other.sort_key()
|
|
|
|
|
|
def check_output(generated_output_file, main_input_file, merged_files):
|
|
"""Make sanity checks on the generated output.
|
|
|
|
The intent of these sanity checks is to have reasonable confidence
|
|
that no content has been lost.
|
|
|
|
The sanity check is that every line that is present in an input file
|
|
is also present in an output file. This is not perfect but good enough
|
|
for now.
|
|
"""
|
|
generated_output = set(open(generated_output_file, 'rb'))
|
|
for line in open(main_input_file, 'rb'):
|
|
if line not in generated_output:
|
|
raise LostContent('original file', line)
|
|
for merged_file in merged_files:
|
|
for line in open(merged_file, 'rb'):
|
|
if line not in generated_output:
|
|
raise LostContent(merged_file, line)
|
|
|
|
def finish_output(changelog, output_file, input_file, merged_files):
|
|
"""Write the changelog to the output file.
|
|
|
|
The input file and the list of merged files are used only for sanity
|
|
checks on the output.
|
|
"""
|
|
if os.path.exists(output_file) and not os.path.isfile(output_file):
|
|
# The output is a non-regular file (e.g. pipe). Write to it directly.
|
|
output_temp = output_file
|
|
else:
|
|
# The output is a regular file. Write to a temporary file,
|
|
# then move it into place atomically.
|
|
output_temp = output_file + '.tmp'
|
|
changelog.write(output_temp)
|
|
check_output(output_temp, input_file, merged_files)
|
|
if output_temp != output_file:
|
|
os.rename(output_temp, output_file)
|
|
|
|
def remove_merged_entries(files_to_remove):
|
|
for filename in files_to_remove:
|
|
os.remove(filename)
|
|
|
|
def list_files_to_merge(options):
|
|
"""List the entry files to merge, oldest first.
|
|
|
|
"Oldest" is defined by `EntryFileSortKey`.
|
|
"""
|
|
files_to_merge = glob.glob(os.path.join(options.dir, '*.md'))
|
|
files_to_merge.sort(key=EntryFileSortKey)
|
|
return files_to_merge
|
|
|
|
def merge_entries(options):
|
|
"""Merge changelog entries into the changelog file.
|
|
|
|
Read the changelog file from options.input.
|
|
Read entries to merge from the directory options.dir.
|
|
Write the new changelog to options.output.
|
|
Remove the merged entries if options.keep_entries is false.
|
|
"""
|
|
with open(options.input, 'rb') as input_file:
|
|
changelog = ChangeLog(input_file)
|
|
files_to_merge = list_files_to_merge(options)
|
|
if not files_to_merge:
|
|
sys.stderr.write('There are no pending changelog entries.\n')
|
|
return
|
|
for filename in files_to_merge:
|
|
with open(filename, 'rb') as input_file:
|
|
changelog.add_file(input_file)
|
|
finish_output(changelog, options.output, options.input, files_to_merge)
|
|
if not options.keep_entries:
|
|
remove_merged_entries(files_to_merge)
|
|
|
|
def show_file_timestamps(options):
|
|
"""List the files to merge and their timestamp.
|
|
|
|
This is only intended for debugging purposes.
|
|
"""
|
|
files = list_files_to_merge(options)
|
|
for filename in files:
|
|
ts = EntryFileSortKey(filename)
|
|
print(ts.category, ts.datetime, filename)
|
|
|
|
def set_defaults(options):
|
|
"""Add default values for missing options."""
|
|
output_file = getattr(options, 'output', None)
|
|
if output_file is None:
|
|
options.output = options.input
|
|
if getattr(options, 'keep_entries', None) is None:
|
|
options.keep_entries = (output_file is not None)
|
|
|
|
def main():
|
|
"""Command line entry point."""
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument('--dir', '-d', metavar='DIR',
|
|
default='ChangeLog.d',
|
|
help='Directory to read entries from'
|
|
' (default: ChangeLog.d)')
|
|
parser.add_argument('--input', '-i', metavar='FILE',
|
|
default='ChangeLog.md',
|
|
help='Existing changelog file to read from and augment'
|
|
' (default: ChangeLog.md)')
|
|
parser.add_argument('--keep-entries',
|
|
action='store_true', dest='keep_entries', default=None,
|
|
help='Keep the files containing entries'
|
|
' (default: remove them if --output/-o is not specified)')
|
|
parser.add_argument('--no-keep-entries',
|
|
action='store_false', dest='keep_entries',
|
|
help='Remove the files containing entries after they are merged'
|
|
' (default: remove them if --output/-o is not specified)')
|
|
parser.add_argument('--output', '-o', metavar='FILE',
|
|
help='Output changelog file'
|
|
' (default: overwrite the input)')
|
|
parser.add_argument('--list-files-only',
|
|
action='store_true',
|
|
help='Only list the files that would be processed (with some debugging information)')
|
|
options = parser.parse_args()
|
|
set_defaults(options)
|
|
if options.list_files_only:
|
|
show_file_timestamps(options)
|
|
return
|
|
merge_entries(options)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|