diff options
| author | Siddhesh Poyarekar <siddhesh@sourceware.org> | 2020-01-17 09:11:49 +0530 |
|---|---|---|
| committer | Siddhesh Poyarekar <siddhesh@sourceware.org> | 2020-01-23 20:00:13 +0530 |
| commit | de077de10f0ac140fdced24781370967f9e5610f (patch) | |
| tree | 1153ab38c0294e7a2fa28ba44343f93bbdea26d5 /scripts | |
| parent | ba44e5b50873ceea25884af280612e100e151746 (diff) | |
| download | glibc-de077de10f0ac140fdced24781370967f9e5610f.tar.xz glibc-de077de10f0ac140fdced24781370967f9e5610f.zip | |
gitlog-to-changelog: Drop scripts in favour of gnulib version
The ChangeLog automation scripts were incorporated in gnulib as
vcs-to-changelog for a while now since other projects expressed the
desire to use and extend this script. In the interest of avoiding
duplication of code, drop the glibc version of gitlog-to-changelog and
use the gnulib one directly.
The only file that remains is vcstocl_quirks.py, which specifies
properties and quirks of the glibc project source code. This patch
also drops the shebang at the start of vcstocl_quirks.py since the
file is not intended to be directly executable.
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/gitlog_to_changelog.py | 138 | ||||
| -rw-r--r-- | scripts/vcs_to_changelog/frontend_c.py | 827 | ||||
| -rw-r--r-- | scripts/vcs_to_changelog/misc_util.py | 51 | ||||
| -rw-r--r-- | scripts/vcs_to_changelog/vcs_git.py | 164 | ||||
| -rw-r--r-- | scripts/vcstocl_quirks.py (renamed from scripts/vcs_to_changelog/vcstocl_quirks.py) | 1 |
5 files changed, 0 insertions, 1181 deletions
diff --git a/scripts/gitlog_to_changelog.py b/scripts/gitlog_to_changelog.py deleted file mode 100755 index b7920aaf99..0000000000 --- a/scripts/gitlog_to_changelog.py +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/python3 -# Main VCSToChangeLog script. -# Copyright (C) 2019-2020 Free Software Foundation, Inc. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. - -''' Generate a ChangeLog style output based on a VCS log. - -This script takes two revisions as input and generates a ChangeLog style output -for all revisions between the two revisions. - -This script is intended to be executed from the project parent directory. - -The vcs_to_changelog directory has a file vcstocl_quirks.py that defines a -function called get_project_quirks that returns a object of class type -ProjectQuirks or a subclass of the same. The definition of the ProjectQuirks -class is below and it specifies the properties that the project must set to -ensure correct parsing of its contents. - -Among other things, ProjectQurks specifies the VCS to read from; the default is -assumed to be git. The script then studies the VCS log and for each change, -list out the nature of changes in the constituent files. - -Each file type may have parser frontends that can read files and construct -objects that may be compared to determine the minimal changes that occured in -each revision. For files that do not have parsers, we may only know the nature -of changes at the top level depending on the information that the VCS stores. - -The parser frontend must have a compare() method that takes the old and new -files as arrays of strings and prints the output in ChangeLog format. - -Currently implemented VCS: - - git - -Currently implemented frontends: - - C -''' -import sys -import os -import re -import argparse -from vcs_to_changelog.misc_util import * -from vcs_to_changelog import frontend_c -from vcs_to_changelog.vcs_git import * - -debug = DebugUtil(False) - -class ProjectQuirks: - # This is a list of regex substitutions for C/C++ macros that are known to - # break parsing of the C programs. Each member of this list is a dict with - # the key 'orig' having the regex and 'sub' having the substitution of the - # regex. - MACRO_QUIRKS = [] - - # This is a list of macro definitions that are extensively used and are - # known to break parsing due to some characteristic, mainly the lack of a - # semicolon at the end. - C_MACROS = [] - - # The repo type, defaults to git. - repo = 'git' - - # List of files to ignore either because they are not needed (such as the - # ChangeLog) or because they are non-parseable. For example, glibc has a - # header file that is only assembly code, which breaks the C parser. - IGNORE_LIST = ['ChangeLog'] - - -# Load quirks file. We assume that the script is run from the top level source -# directory. -sys.path.append('/'.join([os.getcwd(), 'scripts', 'vcs_to_changelog'])) -try: - from vcstocl_quirks import * - project_quirks = get_project_quirks(debug) -except: - project_quirks = ProjectQuirks() - -def analyze_diff(filename, oldfile, newfile, frontends): - ''' Parse the output of the old and new files and print the difference. - - For input files OLDFILE and NEWFILE with name FILENAME, generate reduced - trees for them and compare them. We limit our comparison to only C source - files. - ''' - name, ext = os.path.splitext(filename) - - if not ext in frontends.keys(): - return None - else: - frontend = frontends[ext] - frontend.compare(oldfile, newfile) - - -def main(repo, frontends, refs): - ''' ChangeLog Generator Entry Point. - ''' - commits = repo.list_commits(args.refs) - for commit in commits: - repo.list_changes(commit, frontends) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - parser.add_argument('refs', metavar='ref', type=str, nargs=2, - help='Refs to print ChangeLog entries between') - - parser.add_argument('-d', '--debug', required=False, action='store_true', - help='Run the file parser debugger.') - - args = parser.parse_args() - - debug.debug = args.debug - - if len(args.refs) < 2: - debug.eprint('Two refs needed to get a ChangeLog.') - sys.exit(os.EX_USAGE) - - REPO = {'git': GitRepo(project_quirks.IGNORE_LIST, debug)} - - fe_c = frontend_c.Frontend(project_quirks, debug) - FRONTENDS = {'.c': fe_c, - '.h': fe_c} - - main(REPO[project_quirks.repo], FRONTENDS, args.refs) diff --git a/scripts/vcs_to_changelog/frontend_c.py b/scripts/vcs_to_changelog/frontend_c.py deleted file mode 100644 index 8e37c5fa47..0000000000 --- a/scripts/vcs_to_changelog/frontend_c.py +++ /dev/null @@ -1,827 +0,0 @@ -#!/usr/bin/python3 -# The C Parser. -# Copyright (C) 2019-2020 Free Software Foundation, Inc. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. - -from enum import Enum -import re -from vcs_to_changelog.misc_util import * - -class block_flags(Enum): - ''' Flags for the code block. - ''' - else_block = 1 - macro_defined = 2 - macro_redefined = 3 - - -class block_type(Enum): - ''' Type of code block. - ''' - file = 1 - macro_cond = 2 - macro_def = 3 - macro_undef = 4 - macro_include = 5 - macro_info = 6 - decl = 7 - func = 8 - composite = 9 - macrocall = 10 - fndecl = 11 - assign = 12 - struct = 13 - union = 14 - enum = 15 - -# A dictionary describing what each action (add, modify, delete) show up as in -# the ChangeLog output. -actions = {0:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}, - block_type.file:{'new': 'New file', 'mod': 'Modified file', - 'del': 'Remove file'}, - block_type.macro_cond:{'new': 'New', 'mod': 'Modified', - 'del': 'Remove'}, - block_type.macro_def:{'new': 'New', 'mod': 'Modified', - 'del': 'Remove'}, - block_type.macro_include:{'new': 'Include file', 'mod': 'Modified', - 'del': 'Remove include'}, - block_type.macro_info:{'new': 'New preprocessor message', - 'mod': 'Modified', 'del': 'Remove'}, - block_type.decl:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}, - block_type.func:{'new': 'New function', 'mod': 'Modified function', - 'del': 'Remove function'}, - block_type.composite:{'new': 'New', 'mod': 'Modified', - 'del': 'Remove'}, - block_type.struct:{'new': 'New struct', 'mod': 'Modified struct', - 'del': 'Remove struct'}, - block_type.union:{'new': 'New union', 'mod': 'Modified union', - 'del': 'Remove union'}, - block_type.enum:{'new': 'New enum', 'mod': 'Modified enum', - 'del': 'Remove enum'}, - block_type.macrocall:{'new': 'New', 'mod': 'Modified', - 'del': 'Remove'}, - block_type.fndecl:{'new': 'New function', 'mod': 'Modified', - 'del': 'Remove'}, - block_type.assign:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}} - -def new_block(name, type, contents, parent, flags = 0): - ''' Create a new code block with the parent as PARENT. - - The code block is a basic structure around which the tree representation of - the source code is built. It has the following attributes: - - - name: A name to refer it by in the ChangeLog - - type: Any one of the following types in BLOCK_TYPE. - - contents: The contents of the block. For a block of types file or - macro_cond, this would be a list of blocks that it nests. For other types - it is a list with a single string specifying its contents. - - parent: This is the parent of the current block, useful in setting up - #elif or #else blocks in the tree. - - flags: A special field to indicate some properties of the block. See - BLOCK_FLAGS for values. - ''' - block = {} - block['matched'] = False - block['name'] = name - block['type'] = type - block['contents'] = contents - block['parent'] = parent - if parent: - parent['contents'].append(block) - - block['flags'] = flags - block['actions'] = actions[type] - - return block - - -class ExprParser: - ''' Parent class of all of the C expression parsers. - - It is necessary that the children override the parse_line() method. - ''' - ATTRIBUTE = r'(((__attribute__\s*\(\([^;]+\)\))|(asm\s*\([?)]+\)))\s*)*' - - def __init__(self, project_quirks, debug): - self.project_quirks = project_quirks - self.debug = debug - - def fast_forward_scope(self, cur, op, loc): - ''' Consume lines in a code block. - - Consume all lines of a block of code such as a composite type declaration or - a function declaration. - - - CUR is the string to consume this expression from - - OP is the string array for the file - - LOC is the first unread location in CUR - - - Returns: The next location to be read in the array as well as the updated - value of CUR, which will now have the body of the function or composite - type. - ''' - nesting = cur.count('{') - cur.count('}') - while nesting > 0 and loc < len(op): - cur = cur + ' ' + op[loc] - - nesting = nesting + op[loc].count('{') - nesting = nesting - op[loc].count('}') - loc = loc + 1 - - return (cur, loc) - - def parse_line(self, cur, op, loc, code, macros): - ''' The parse method should always be overridden by the child. - ''' - raise - - -class FuncParser(ExprParser): - REGEX = re.compile(ExprParser.ATTRIBUTE + r'\s*(\w+)\s*\([^(][^{]+\)\s*{') - - def parse_line(self, cur, op, loc, code, macros): - ''' Parse a function. - - Match a function definition. - - - CUR is the string to consume this expression from - - OP is the string array for the file - - LOC is the first unread location in CUR - - CODE is the block to which we add this - - - Returns: The next location to be read in the array. - ''' - found = re.search(self.REGEX, cur) - if not found: - return cur, loc - - name = found.group(5) - self.debug.print('FOUND FUNC: %s' % name) - - # Consume everything up to the ending brace of the function. - (cur, loc) = self.fast_forward_scope(cur, op, loc) - - new_block(name, block_type.func, [cur], code) - - return '', loc - - -class CompositeParser(ExprParser): - # Composite types such as structs and unions. - REGEX = re.compile(r'(struct|union|enum)\s*(\w*)\s*{') - - def parse_line(self, cur, op, loc, code, macros): - ''' Parse a composite type. - - Match declaration of a composite type such as a sruct or a union.. - - - CUR is the string to consume this expression from - - OP is the string array for the file - - LOC is the first unread location in CUR - - CODE is the block to which we add this - - - Returns: The next location to be read in the array. - ''' - found = re.search(self.REGEX, cur) - if not found: - return cur, loc - - # Lap up all of the struct definition. - (cur, loc) = self.fast_forward_scope(cur, op, loc) - - name = found.group(2) - - if not name: - if 'typedef' in cur: - name = re.sub(r'.*}\s*(\w+);$', r'\1', cur) - else: - name= '<anoymous>' - - ctype = found.group(1) - - if ctype == 'struct': - blocktype = block_type.struct - if ctype == 'enum': - blocktype = block_type.enum - if ctype == 'union': - blocktype = block_type.union - - new_block(name, block_type.composite, [cur], code) - - return '', loc - - -class AssignParser(ExprParser): - # Static assignments. - REGEX = re.compile(r'(\w+)\s*(\[[^\]]*\])*\s*([^\s]*attribute[\s\w()]+)?\s*=') - - def parse_line(self, cur, op, loc, code, macros): - ''' Parse an assignment statement. - - This includes array assignments. - - - CUR is the string to consume this expression from - - OP is the string array for the file - - LOC is the first unread location in CUR - - CODE is the block to which we add this - - - Returns: The next location to be read in the array. - ''' - found = re.search(self.REGEX, cur) - if not found: - return cur, loc - - name = found.group(1) - self.debug.print('FOUND ASSIGN: %s' % name) - # Lap up everything up to semicolon. - while ';' not in cur and loc < len(op): - cur = op[loc] - loc = loc + 1 - - new_block(name, block_type.assign, [cur], code) - - return '', loc - - -class DeclParser(ExprParser): - # Function pointer typedefs. - TYPEDEF_FN_RE = re.compile(r'\(\*(\w+)\)\s*\([^)]+\);') - - # Simple decls. - DECL_RE = re.compile(r'(\w+)(\[\w*\])*\s*' + ExprParser.ATTRIBUTE + ';') - - # __typeof decls. - TYPEOF_RE = re.compile(r'__typeof\s*\([\w\s]+\)\s*(\w+)\s*' + \ - ExprParser.ATTRIBUTE + ';') - - # Function Declarations. - FNDECL_RE = re.compile(r'\s*(\w+)\s*\([^\(][^;]*\)\s*' + - ExprParser.ATTRIBUTE + ';') - - def __init__(self, regex, blocktype, project_quirks, debug): - # The regex for the current instance. - self.REGEX = regex - self.blocktype = blocktype - super().__init__(project_quirks, debug) - - def parse_line(self, cur, op, loc, code, macros): - ''' Parse a top level declaration. - - All types of declarations except function declarations. - - - CUR is the string to consume this expression from - - OP is the string array for the file - - LOC is the first unread location in CUR - - CODE is the block to which we add this function - - - Returns: The next location to be read in the array. - ''' - found = re.search(self.REGEX, cur) - if not found: - return cur, loc - - # The name is the first group for all of the above regexes. This is a - # coincidence, so care must be taken if regexes are added or changed to - # ensure that this is true. - name = found.group(1) - - self.debug.print('FOUND DECL: %s' % name) - new_block(name, self.blocktype, [cur], code) - - return '', loc - - -class MacroParser(ExprParser): - # The macrocall_re peeks into the next line to ensure that it doesn't - # eat up a FUNC by accident. The func_re regex is also quite crude and - # only intends to ensure that the function name gets picked up - # correctly. - MACROCALL_RE = re.compile(r'(\w+)\s*(\(.*\))*$') - - def parse_line(self, cur, op, loc, code, macros): - ''' Parse a macro call. - - Match a symbol hack macro calls that get added without semicolons. - - - CUR is the string to consume this expression from - - OP is the string array for the file - - LOC is the first unread location in CUR - - CODE is the block to which we add this - - MACROS is the regex match object. - - - Returns: The next location to be read in the array. - ''' - - # First we have the macros for symbol hacks and all macros we identified so - # far. - if cur.count('(') != cur.count(')'): - return cur, loc - if loc < len(op) and '{' in op[loc]: - return cur, loc - - found = re.search(self.MACROCALL_RE, cur) - if found: - sym = found.group(1) - name = found.group(2) - if sym in macros or self.project_quirks and \ - sym in self.project_quirks.C_MACROS: - self.debug.print('FOUND MACROCALL: %s (%s)' % (sym, name)) - new_block(sym, block_type.macrocall, [cur], code) - return '', loc - - # Next, there could be macros that get called right inside their #ifdef, but - # without the semi-colon. - if cur.strip() == code['name'].strip(): - self.debug.print('FOUND MACROCALL (without brackets): %s' % (cur)) - new_block(cur, block_type.macrocall, [cur], code) - return '',loc - - return cur, loc - - -class Frontend: - ''' The C Frontend implementation. - ''' - KNOWN_MACROS = [] - - def __init__(self, project_quirks, debug): - self.op = [] - self.debug = debug - self.project_quirks = project_quirks - - self.c_expr_parsers = [ - CompositeParser(project_quirks, debug), - AssignParser(project_quirks, debug), - DeclParser(DeclParser.TYPEOF_RE, block_type.decl, - project_quirks, debug), - DeclParser(DeclParser.TYPEDEF_FN_RE, block_type.decl, - project_quirks, debug), - DeclParser(DeclParser.FNDECL_RE, block_type.fndecl, - project_quirks, debug), - FuncParser(project_quirks, debug), - DeclParser(DeclParser.DECL_RE, block_type.decl, project_quirks, - debug), - MacroParser(project_quirks, debug)] - - - def remove_extern_c(self): - ''' Process extern "C"/"C++" block nesting. - - The extern "C" nesting does not add much value so it's safe to almost always - drop it. Also drop extern "C++" - ''' - new_op = [] - nesting = 0 - extern_nesting = 0 - for l in self.op: - if '{' in l: - nesting = nesting + 1 - if re.match(r'extern\s*"C"\s*{', l): - extern_nesting = nesting - continue - if '}' in l: - nesting = nesting - 1 - if nesting < extern_nesting: - extern_nesting = 0 - continue - new_op.append(l) - - # Now drop all extern C++ blocks. - self.op = new_op - new_op = [] - nesting = 0 - extern_nesting = 0 - in_cpp = False - for l in self.op: - if re.match(r'extern\s*"C\+\+"\s*{', l): - nesting = nesting + 1 - in_cpp = True - - if in_cpp: - if '{' in l: - nesting = nesting + 1 - if '}' in l: - nesting = nesting - 1 - if nesting == 0: - new_op.append(l) - - self.op = new_op - - - def remove_comments(self, op): - ''' Remove comments. - - Return OP by removing all comments from it. - ''' - self.debug.print('REMOVE COMMENTS') - - sep='\n' - opstr = sep.join(op) - opstr = re.sub(r'/\*.*?\*/', r'', opstr, flags=re.MULTILINE | re.DOTALL) - opstr = re.sub(r'\\\n', r' ', opstr, flags=re.MULTILINE | re.DOTALL) - new_op = list(filter(None, opstr.split(sep))) - - return new_op - - - def normalize_condition(self, name): - ''' Make some minor transformations on macro conditions to make them more - readable. - ''' - # Negation with a redundant bracket. - name = re.sub(r'!\s*\(\s*(\w+)\s*\)', r'! \1', name) - # Pull in negation of equality. - name = re.sub(r'!\s*\(\s*(\w+)\s*==\s*(\w+)\)', r'\1 != \2', name) - # Pull in negation of inequality. - name = re.sub(r'!\s*\(\s*(\w+)\s*!=\s*(\w+)\)', r'\1 == \2', name) - # Fix simple double negation. - name = re.sub(r'!\s*\(\s*!\s*(\w+)\s*\)', r'\1', name) - # Similar, but nesting a complex expression. Because of the greedy match, - # this matches only the outermost brackets. - name = re.sub(r'!\s*\(\s*!\s*\((.*)\)\s*\)$', r'\1', name) - return name - - - def parse_preprocessor(self, loc, code, start = ''): - ''' Parse a preprocessor directive. - - In case a preprocessor condition (i.e. if/elif/else), create a new code - block to nest code into and in other cases, identify and add entities suchas - include files, defines, etc. - - - OP is the string array for the file - - LOC is the first unread location in CUR - - CODE is the block to which we add this function - - START is the string that should continue to be expanded in case we step - into a new macro scope. - - - Returns: The next location to be read in the array. - ''' - cur = self.op[loc] - loc = loc + 1 - endblock = False - - self.debug.print('PARSE_MACRO: %s' % cur) - - # Remove the # and strip spaces again. - cur = cur[1:].strip() - - # Include file. - if cur.find('include') == 0: - m = re.search(r'include\s*["<]?([^">]+)[">]?', cur) - new_block(m.group(1), block_type.macro_include, [cur], code) - - # Macro definition. |
