aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorSiddhesh Poyarekar <siddhesh@sourceware.org>2020-01-17 09:11:49 +0530
committerSiddhesh Poyarekar <siddhesh@sourceware.org>2020-01-23 20:00:13 +0530
commitde077de10f0ac140fdced24781370967f9e5610f (patch)
tree1153ab38c0294e7a2fa28ba44343f93bbdea26d5 /scripts
parentba44e5b50873ceea25884af280612e100e151746 (diff)
downloadglibc-de077de10f0ac140fdced24781370967f9e5610f.tar.xz
glibc-de077de10f0ac140fdced24781370967f9e5610f.zip
gitlog-to-changelog: Drop scripts in favour of gnulib version
The ChangeLog automation scripts were incorporated in gnulib as vcs-to-changelog for a while now since other projects expressed the desire to use and extend this script. In the interest of avoiding duplication of code, drop the glibc version of gitlog-to-changelog and use the gnulib one directly. The only file that remains is vcstocl_quirks.py, which specifies properties and quirks of the glibc project source code. This patch also drops the shebang at the start of vcstocl_quirks.py since the file is not intended to be directly executable.
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/gitlog_to_changelog.py138
-rw-r--r--scripts/vcs_to_changelog/frontend_c.py827
-rw-r--r--scripts/vcs_to_changelog/misc_util.py51
-rw-r--r--scripts/vcs_to_changelog/vcs_git.py164
-rw-r--r--scripts/vcstocl_quirks.py (renamed from scripts/vcs_to_changelog/vcstocl_quirks.py)1
5 files changed, 0 insertions, 1181 deletions
diff --git a/scripts/gitlog_to_changelog.py b/scripts/gitlog_to_changelog.py
deleted file mode 100755
index b7920aaf99..0000000000
--- a/scripts/gitlog_to_changelog.py
+++ /dev/null
@@ -1,138 +0,0 @@
-#!/usr/bin/python3
-# Main VCSToChangeLog script.
-# Copyright (C) 2019-2020 Free Software Foundation, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
-
-''' Generate a ChangeLog style output based on a VCS log.
-
-This script takes two revisions as input and generates a ChangeLog style output
-for all revisions between the two revisions.
-
-This script is intended to be executed from the project parent directory.
-
-The vcs_to_changelog directory has a file vcstocl_quirks.py that defines a
-function called get_project_quirks that returns a object of class type
-ProjectQuirks or a subclass of the same. The definition of the ProjectQuirks
-class is below and it specifies the properties that the project must set to
-ensure correct parsing of its contents.
-
-Among other things, ProjectQurks specifies the VCS to read from; the default is
-assumed to be git. The script then studies the VCS log and for each change,
-list out the nature of changes in the constituent files.
-
-Each file type may have parser frontends that can read files and construct
-objects that may be compared to determine the minimal changes that occured in
-each revision. For files that do not have parsers, we may only know the nature
-of changes at the top level depending on the information that the VCS stores.
-
-The parser frontend must have a compare() method that takes the old and new
-files as arrays of strings and prints the output in ChangeLog format.
-
-Currently implemented VCS:
-
- git
-
-Currently implemented frontends:
-
- C
-'''
-import sys
-import os
-import re
-import argparse
-from vcs_to_changelog.misc_util import *
-from vcs_to_changelog import frontend_c
-from vcs_to_changelog.vcs_git import *
-
-debug = DebugUtil(False)
-
-class ProjectQuirks:
- # This is a list of regex substitutions for C/C++ macros that are known to
- # break parsing of the C programs. Each member of this list is a dict with
- # the key 'orig' having the regex and 'sub' having the substitution of the
- # regex.
- MACRO_QUIRKS = []
-
- # This is a list of macro definitions that are extensively used and are
- # known to break parsing due to some characteristic, mainly the lack of a
- # semicolon at the end.
- C_MACROS = []
-
- # The repo type, defaults to git.
- repo = 'git'
-
- # List of files to ignore either because they are not needed (such as the
- # ChangeLog) or because they are non-parseable. For example, glibc has a
- # header file that is only assembly code, which breaks the C parser.
- IGNORE_LIST = ['ChangeLog']
-
-
-# Load quirks file. We assume that the script is run from the top level source
-# directory.
-sys.path.append('/'.join([os.getcwd(), 'scripts', 'vcs_to_changelog']))
-try:
- from vcstocl_quirks import *
- project_quirks = get_project_quirks(debug)
-except:
- project_quirks = ProjectQuirks()
-
-def analyze_diff(filename, oldfile, newfile, frontends):
- ''' Parse the output of the old and new files and print the difference.
-
- For input files OLDFILE and NEWFILE with name FILENAME, generate reduced
- trees for them and compare them. We limit our comparison to only C source
- files.
- '''
- name, ext = os.path.splitext(filename)
-
- if not ext in frontends.keys():
- return None
- else:
- frontend = frontends[ext]
- frontend.compare(oldfile, newfile)
-
-
-def main(repo, frontends, refs):
- ''' ChangeLog Generator Entry Point.
- '''
- commits = repo.list_commits(args.refs)
- for commit in commits:
- repo.list_changes(commit, frontends)
-
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser()
-
- parser.add_argument('refs', metavar='ref', type=str, nargs=2,
- help='Refs to print ChangeLog entries between')
-
- parser.add_argument('-d', '--debug', required=False, action='store_true',
- help='Run the file parser debugger.')
-
- args = parser.parse_args()
-
- debug.debug = args.debug
-
- if len(args.refs) < 2:
- debug.eprint('Two refs needed to get a ChangeLog.')
- sys.exit(os.EX_USAGE)
-
- REPO = {'git': GitRepo(project_quirks.IGNORE_LIST, debug)}
-
- fe_c = frontend_c.Frontend(project_quirks, debug)
- FRONTENDS = {'.c': fe_c,
- '.h': fe_c}
-
- main(REPO[project_quirks.repo], FRONTENDS, args.refs)
diff --git a/scripts/vcs_to_changelog/frontend_c.py b/scripts/vcs_to_changelog/frontend_c.py
deleted file mode 100644
index 8e37c5fa47..0000000000
--- a/scripts/vcs_to_changelog/frontend_c.py
+++ /dev/null
@@ -1,827 +0,0 @@
-#!/usr/bin/python3
-# The C Parser.
-# Copyright (C) 2019-2020 Free Software Foundation, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
-
-from enum import Enum
-import re
-from vcs_to_changelog.misc_util import *
-
-class block_flags(Enum):
- ''' Flags for the code block.
- '''
- else_block = 1
- macro_defined = 2
- macro_redefined = 3
-
-
-class block_type(Enum):
- ''' Type of code block.
- '''
- file = 1
- macro_cond = 2
- macro_def = 3
- macro_undef = 4
- macro_include = 5
- macro_info = 6
- decl = 7
- func = 8
- composite = 9
- macrocall = 10
- fndecl = 11
- assign = 12
- struct = 13
- union = 14
- enum = 15
-
-# A dictionary describing what each action (add, modify, delete) show up as in
-# the ChangeLog output.
-actions = {0:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'},
- block_type.file:{'new': 'New file', 'mod': 'Modified file',
- 'del': 'Remove file'},
- block_type.macro_cond:{'new': 'New', 'mod': 'Modified',
- 'del': 'Remove'},
- block_type.macro_def:{'new': 'New', 'mod': 'Modified',
- 'del': 'Remove'},
- block_type.macro_include:{'new': 'Include file', 'mod': 'Modified',
- 'del': 'Remove include'},
- block_type.macro_info:{'new': 'New preprocessor message',
- 'mod': 'Modified', 'del': 'Remove'},
- block_type.decl:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'},
- block_type.func:{'new': 'New function', 'mod': 'Modified function',
- 'del': 'Remove function'},
- block_type.composite:{'new': 'New', 'mod': 'Modified',
- 'del': 'Remove'},
- block_type.struct:{'new': 'New struct', 'mod': 'Modified struct',
- 'del': 'Remove struct'},
- block_type.union:{'new': 'New union', 'mod': 'Modified union',
- 'del': 'Remove union'},
- block_type.enum:{'new': 'New enum', 'mod': 'Modified enum',
- 'del': 'Remove enum'},
- block_type.macrocall:{'new': 'New', 'mod': 'Modified',
- 'del': 'Remove'},
- block_type.fndecl:{'new': 'New function', 'mod': 'Modified',
- 'del': 'Remove'},
- block_type.assign:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}}
-
-def new_block(name, type, contents, parent, flags = 0):
- ''' Create a new code block with the parent as PARENT.
-
- The code block is a basic structure around which the tree representation of
- the source code is built. It has the following attributes:
-
- - name: A name to refer it by in the ChangeLog
- - type: Any one of the following types in BLOCK_TYPE.
- - contents: The contents of the block. For a block of types file or
- macro_cond, this would be a list of blocks that it nests. For other types
- it is a list with a single string specifying its contents.
- - parent: This is the parent of the current block, useful in setting up
- #elif or #else blocks in the tree.
- - flags: A special field to indicate some properties of the block. See
- BLOCK_FLAGS for values.
- '''
- block = {}
- block['matched'] = False
- block['name'] = name
- block['type'] = type
- block['contents'] = contents
- block['parent'] = parent
- if parent:
- parent['contents'].append(block)
-
- block['flags'] = flags
- block['actions'] = actions[type]
-
- return block
-
-
-class ExprParser:
- ''' Parent class of all of the C expression parsers.
-
- It is necessary that the children override the parse_line() method.
- '''
- ATTRIBUTE = r'(((__attribute__\s*\(\([^;]+\)\))|(asm\s*\([?)]+\)))\s*)*'
-
- def __init__(self, project_quirks, debug):
- self.project_quirks = project_quirks
- self.debug = debug
-
- def fast_forward_scope(self, cur, op, loc):
- ''' Consume lines in a code block.
-
- Consume all lines of a block of code such as a composite type declaration or
- a function declaration.
-
- - CUR is the string to consume this expression from
- - OP is the string array for the file
- - LOC is the first unread location in CUR
-
- - Returns: The next location to be read in the array as well as the updated
- value of CUR, which will now have the body of the function or composite
- type.
- '''
- nesting = cur.count('{') - cur.count('}')
- while nesting > 0 and loc < len(op):
- cur = cur + ' ' + op[loc]
-
- nesting = nesting + op[loc].count('{')
- nesting = nesting - op[loc].count('}')
- loc = loc + 1
-
- return (cur, loc)
-
- def parse_line(self, cur, op, loc, code, macros):
- ''' The parse method should always be overridden by the child.
- '''
- raise
-
-
-class FuncParser(ExprParser):
- REGEX = re.compile(ExprParser.ATTRIBUTE + r'\s*(\w+)\s*\([^(][^{]+\)\s*{')
-
- def parse_line(self, cur, op, loc, code, macros):
- ''' Parse a function.
-
- Match a function definition.
-
- - CUR is the string to consume this expression from
- - OP is the string array for the file
- - LOC is the first unread location in CUR
- - CODE is the block to which we add this
-
- - Returns: The next location to be read in the array.
- '''
- found = re.search(self.REGEX, cur)
- if not found:
- return cur, loc
-
- name = found.group(5)
- self.debug.print('FOUND FUNC: %s' % name)
-
- # Consume everything up to the ending brace of the function.
- (cur, loc) = self.fast_forward_scope(cur, op, loc)
-
- new_block(name, block_type.func, [cur], code)
-
- return '', loc
-
-
-class CompositeParser(ExprParser):
- # Composite types such as structs and unions.
- REGEX = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
-
- def parse_line(self, cur, op, loc, code, macros):
- ''' Parse a composite type.
-
- Match declaration of a composite type such as a sruct or a union..
-
- - CUR is the string to consume this expression from
- - OP is the string array for the file
- - LOC is the first unread location in CUR
- - CODE is the block to which we add this
-
- - Returns: The next location to be read in the array.
- '''
- found = re.search(self.REGEX, cur)
- if not found:
- return cur, loc
-
- # Lap up all of the struct definition.
- (cur, loc) = self.fast_forward_scope(cur, op, loc)
-
- name = found.group(2)
-
- if not name:
- if 'typedef' in cur:
- name = re.sub(r'.*}\s*(\w+);$', r'\1', cur)
- else:
- name= '<anoymous>'
-
- ctype = found.group(1)
-
- if ctype == 'struct':
- blocktype = block_type.struct
- if ctype == 'enum':
- blocktype = block_type.enum
- if ctype == 'union':
- blocktype = block_type.union
-
- new_block(name, block_type.composite, [cur], code)
-
- return '', loc
-
-
-class AssignParser(ExprParser):
- # Static assignments.
- REGEX = re.compile(r'(\w+)\s*(\[[^\]]*\])*\s*([^\s]*attribute[\s\w()]+)?\s*=')
-
- def parse_line(self, cur, op, loc, code, macros):
- ''' Parse an assignment statement.
-
- This includes array assignments.
-
- - CUR is the string to consume this expression from
- - OP is the string array for the file
- - LOC is the first unread location in CUR
- - CODE is the block to which we add this
-
- - Returns: The next location to be read in the array.
- '''
- found = re.search(self.REGEX, cur)
- if not found:
- return cur, loc
-
- name = found.group(1)
- self.debug.print('FOUND ASSIGN: %s' % name)
- # Lap up everything up to semicolon.
- while ';' not in cur and loc < len(op):
- cur = op[loc]
- loc = loc + 1
-
- new_block(name, block_type.assign, [cur], code)
-
- return '', loc
-
-
-class DeclParser(ExprParser):
- # Function pointer typedefs.
- TYPEDEF_FN_RE = re.compile(r'\(\*(\w+)\)\s*\([^)]+\);')
-
- # Simple decls.
- DECL_RE = re.compile(r'(\w+)(\[\w*\])*\s*' + ExprParser.ATTRIBUTE + ';')
-
- # __typeof decls.
- TYPEOF_RE = re.compile(r'__typeof\s*\([\w\s]+\)\s*(\w+)\s*' + \
- ExprParser.ATTRIBUTE + ';')
-
- # Function Declarations.
- FNDECL_RE = re.compile(r'\s*(\w+)\s*\([^\(][^;]*\)\s*' +
- ExprParser.ATTRIBUTE + ';')
-
- def __init__(self, regex, blocktype, project_quirks, debug):
- # The regex for the current instance.
- self.REGEX = regex
- self.blocktype = blocktype
- super().__init__(project_quirks, debug)
-
- def parse_line(self, cur, op, loc, code, macros):
- ''' Parse a top level declaration.
-
- All types of declarations except function declarations.
-
- - CUR is the string to consume this expression from
- - OP is the string array for the file
- - LOC is the first unread location in CUR
- - CODE is the block to which we add this function
-
- - Returns: The next location to be read in the array.
- '''
- found = re.search(self.REGEX, cur)
- if not found:
- return cur, loc
-
- # The name is the first group for all of the above regexes. This is a
- # coincidence, so care must be taken if regexes are added or changed to
- # ensure that this is true.
- name = found.group(1)
-
- self.debug.print('FOUND DECL: %s' % name)
- new_block(name, self.blocktype, [cur], code)
-
- return '', loc
-
-
-class MacroParser(ExprParser):
- # The macrocall_re peeks into the next line to ensure that it doesn't
- # eat up a FUNC by accident. The func_re regex is also quite crude and
- # only intends to ensure that the function name gets picked up
- # correctly.
- MACROCALL_RE = re.compile(r'(\w+)\s*(\(.*\))*$')
-
- def parse_line(self, cur, op, loc, code, macros):
- ''' Parse a macro call.
-
- Match a symbol hack macro calls that get added without semicolons.
-
- - CUR is the string to consume this expression from
- - OP is the string array for the file
- - LOC is the first unread location in CUR
- - CODE is the block to which we add this
- - MACROS is the regex match object.
-
- - Returns: The next location to be read in the array.
- '''
-
- # First we have the macros for symbol hacks and all macros we identified so
- # far.
- if cur.count('(') != cur.count(')'):
- return cur, loc
- if loc < len(op) and '{' in op[loc]:
- return cur, loc
-
- found = re.search(self.MACROCALL_RE, cur)
- if found:
- sym = found.group(1)
- name = found.group(2)
- if sym in macros or self.project_quirks and \
- sym in self.project_quirks.C_MACROS:
- self.debug.print('FOUND MACROCALL: %s (%s)' % (sym, name))
- new_block(sym, block_type.macrocall, [cur], code)
- return '', loc
-
- # Next, there could be macros that get called right inside their #ifdef, but
- # without the semi-colon.
- if cur.strip() == code['name'].strip():
- self.debug.print('FOUND MACROCALL (without brackets): %s' % (cur))
- new_block(cur, block_type.macrocall, [cur], code)
- return '',loc
-
- return cur, loc
-
-
-class Frontend:
- ''' The C Frontend implementation.
- '''
- KNOWN_MACROS = []
-
- def __init__(self, project_quirks, debug):
- self.op = []
- self.debug = debug
- self.project_quirks = project_quirks
-
- self.c_expr_parsers = [
- CompositeParser(project_quirks, debug),
- AssignParser(project_quirks, debug),
- DeclParser(DeclParser.TYPEOF_RE, block_type.decl,
- project_quirks, debug),
- DeclParser(DeclParser.TYPEDEF_FN_RE, block_type.decl,
- project_quirks, debug),
- DeclParser(DeclParser.FNDECL_RE, block_type.fndecl,
- project_quirks, debug),
- FuncParser(project_quirks, debug),
- DeclParser(DeclParser.DECL_RE, block_type.decl, project_quirks,
- debug),
- MacroParser(project_quirks, debug)]
-
-
- def remove_extern_c(self):
- ''' Process extern "C"/"C++" block nesting.
-
- The extern "C" nesting does not add much value so it's safe to almost always
- drop it. Also drop extern "C++"
- '''
- new_op = []
- nesting = 0
- extern_nesting = 0
- for l in self.op:
- if '{' in l:
- nesting = nesting + 1
- if re.match(r'extern\s*"C"\s*{', l):
- extern_nesting = nesting
- continue
- if '}' in l:
- nesting = nesting - 1
- if nesting < extern_nesting:
- extern_nesting = 0
- continue
- new_op.append(l)
-
- # Now drop all extern C++ blocks.
- self.op = new_op
- new_op = []
- nesting = 0
- extern_nesting = 0
- in_cpp = False
- for l in self.op:
- if re.match(r'extern\s*"C\+\+"\s*{', l):
- nesting = nesting + 1
- in_cpp = True
-
- if in_cpp:
- if '{' in l:
- nesting = nesting + 1
- if '}' in l:
- nesting = nesting - 1
- if nesting == 0:
- new_op.append(l)
-
- self.op = new_op
-
-
- def remove_comments(self, op):
- ''' Remove comments.
-
- Return OP by removing all comments from it.
- '''
- self.debug.print('REMOVE COMMENTS')
-
- sep='\n'
- opstr = sep.join(op)
- opstr = re.sub(r'/\*.*?\*/', r'', opstr, flags=re.MULTILINE | re.DOTALL)
- opstr = re.sub(r'\\\n', r' ', opstr, flags=re.MULTILINE | re.DOTALL)
- new_op = list(filter(None, opstr.split(sep)))
-
- return new_op
-
-
- def normalize_condition(self, name):
- ''' Make some minor transformations on macro conditions to make them more
- readable.
- '''
- # Negation with a redundant bracket.
- name = re.sub(r'!\s*\(\s*(\w+)\s*\)', r'! \1', name)
- # Pull in negation of equality.
- name = re.sub(r'!\s*\(\s*(\w+)\s*==\s*(\w+)\)', r'\1 != \2', name)
- # Pull in negation of inequality.
- name = re.sub(r'!\s*\(\s*(\w+)\s*!=\s*(\w+)\)', r'\1 == \2', name)
- # Fix simple double negation.
- name = re.sub(r'!\s*\(\s*!\s*(\w+)\s*\)', r'\1', name)
- # Similar, but nesting a complex expression. Because of the greedy match,
- # this matches only the outermost brackets.
- name = re.sub(r'!\s*\(\s*!\s*\((.*)\)\s*\)$', r'\1', name)
- return name
-
-
- def parse_preprocessor(self, loc, code, start = ''):
- ''' Parse a preprocessor directive.
-
- In case a preprocessor condition (i.e. if/elif/else), create a new code
- block to nest code into and in other cases, identify and add entities suchas
- include files, defines, etc.
-
- - OP is the string array for the file
- - LOC is the first unread location in CUR
- - CODE is the block to which we add this function
- - START is the string that should continue to be expanded in case we step
- into a new macro scope.
-
- - Returns: The next location to be read in the array.
- '''
- cur = self.op[loc]
- loc = loc + 1
- endblock = False
-
- self.debug.print('PARSE_MACRO: %s' % cur)
-
- # Remove the # and strip spaces again.
- cur = cur[1:].strip()
-
- # Include file.
- if cur.find('include') == 0:
- m = re.search(r'include\s*["<]?([^">]+)[">]?', cur)
- new_block(m.group(1), block_type.macro_include, [cur], code)
-
- # Macro definition.