This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch siddhesh/changelog-begone created. glibc-2.28.9000-414-g9c0b599
- From: siddhesh at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 18 Dec 2018 11:29:14 -0000
- Subject: GNU C Library master sources branch siddhesh/changelog-begone created. glibc-2.28.9000-414-g9c0b599
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, siddhesh/changelog-begone has been created
at 9c0b5995ed645cf6eb5fd2d3e83e7704fe58a067 (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9c0b5995ed645cf6eb5fd2d3e83e7704fe58a067
commit 9c0b5995ed645cf6eb5fd2d3e83e7704fe58a067
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Tue Dec 18 16:57:37 2018 +0530
Add more type-specific detail to the output
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index 4c847fb..8d5d21c 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -133,6 +133,30 @@ class block_type(Enum):
assign = 12
+actions = {0:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'},
+ block_type.file:{'new': 'New file', 'mod': 'Modified file',
+ 'del': 'Remove file'},
+ block_type.macro_cond:{'new': 'New', 'mod': 'Modified',
+ 'del': 'Remove'},
+ block_type.macro_def:{'new': 'New macro', 'mod': 'Modified macro',
+ 'del': 'Remove macro'},
+ block_type.macro_undef:{'new': 'Undefine', 'mod': 'Modified',
+ 'del': 'Remove'},
+ block_type.macro_include:{'new': 'Include file', 'mod': 'Modified',
+ 'del': 'Remove include'},
+ block_type.macro_info:{'new': 'New preprocessor message',
+ 'mod': 'Modified', 'del': 'Remove'},
+ block_type.decl:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'},
+ block_type.func:{'new': 'New function', 'mod': 'Modified function',
+ 'del': 'Remove function'},
+ block_type.composite:{'new': 'New', 'mod': 'Modified',
+ 'del': 'Remove'},
+ block_type.macrocall:{'new': 'New', 'mod': 'Modified',
+ 'del': 'Remove'},
+ block_type.fndecl:{'new': 'New function', 'mod': 'Modified',
+ 'del': 'Remove'},
+ block_type.assign:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}}
+
#------------------------------------------------------------------------------
# C Parser.
#------------------------------------------------------------------------------
@@ -585,16 +609,16 @@ def compare_trees(left, right, prologue = ''):
found = True
break
if not found:
- print_changed_tree(cl, 'Removed', prologue)
+ print_changed_tree(cl, actions[cl['type']]['del'], prologue)
# ... and vice versa. This time we only need to look at unmatched
# contents.
for cr in right['contents']:
if not cr['matched']:
- print_changed_tree(cr, 'New', prologue)
+ print_changed_tree(cr, actions[cr['type']]['new'], prologue)
else:
if left['contents'] != right['contents']:
- print_changed_tree(left, 'Modified', prologue)
+ print_changed_tree(left, actions[left['type']]['mod'], prologue)
parsers = {'.c':{'parse_output':parse_c_output,'print_tree':print_c_tree},
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=505798171711afa0ba8b677fbacbb7f092abab6a
commit 505798171711afa0ba8b677fbacbb7f092abab6a
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Tue Dec 18 08:25:26 2018 +0530
Identify void functions
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index b144e0b..4c847fb 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -148,13 +148,13 @@ FUNC_RE = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\([^(][^{]+\)\s*{')
# The macrocall_re peeks into the next line to ensure that it doesn't eat up
# a FUNC by accident. The func_re regex is also quite crude and only
# intends to ensure that the function name gets picked up correctly.
-MACROCALL_RE = re.compile(r'(\w+)\s*\(\w+(\s*,\s*[\w\.]+)*\)$')
+MACROCALL_RE = re.compile(r'(\w+)\s*\((?!void).(\s*,\s*[\w\.]+)*\)$')
# Composite types such as structs and unions.
COMPOSITE_RE = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
# Static assignments.
-ASSIGN_RE = re.compile(r'(\w+)\s*(\[[^\]]*\])?\s*([^\s]*attribute[\s\w()]+)?\s*=')
+ASSIGN_RE = re.compile(r'(\w+)\s*(\[[^\]]*\])*\s*([^\s]*attribute[\s\w()]+)?\s*=')
# Function Declarations.
FNDECL_RE = re.compile(r'(\w+)\s*\([^;]+\)\s*' + ATTRIBUTE + '*;')
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3178402779d2d906b6e854f129224583ab413082
commit 3178402779d2d906b6e854f129224583ab413082
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Tue Dec 18 06:39:29 2018 +0530
Make parsers pluggable
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index b4c7785..b144e0b 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -470,7 +470,7 @@ def parse(op, loc, code, start = ''):
return loc
-def parse_output(op):
+def parse_c_output(op):
''' File parser.
Parse the input array of lines OP and generate a tree structure to
@@ -485,7 +485,7 @@ def parse_output(op):
return tree
-def print_tree(tree, indent):
+def print_c_tree(tree, indent):
''' Print the entire tree.
'''
if not debug:
@@ -494,7 +494,7 @@ def print_tree(tree, indent):
if tree['type'] == block_type.macro_cond or tree['type'] == block_type.file:
print('%sScope: %s' % (' ' * indent, tree['name']))
for c in tree['contents']:
- print_tree(c, indent + 4)
+ print_c_tree(c, indent + 4)
print('%sEndScope: %s' % (' ' * indent, tree['name']))
else:
if tree['type'] == block_type.func:
@@ -597,6 +597,21 @@ def compare_trees(left, right, prologue = ''):
print_changed_tree(left, 'Modified', prologue)
+parsers = {'.c':{'parse_output':parse_c_output,'print_tree':print_c_tree},
+ '.h':{'parse_output':parse_c_output,'print_tree':print_c_tree}}
+
+
+def get_parser(filename):
+ ''' Get an appropriate parser for FILENAME.
+ '''
+ name, ext = os.path.splitext(filename)
+
+ if not ext in parsers.keys():
+ return None
+
+ return parsers[ext]
+
+
def analyze_diff(oldfile, newfile, filename):
''' Parse the output of the old and new files and print the difference.
@@ -604,17 +619,13 @@ def analyze_diff(oldfile, newfile, filename):
trees for them and compare them. We limit our comparison to only C source
files.
'''
- split = filename.split('.')
- ext = ''
- if split:
- ext = split[-1]
+ parser = get_parser(filename)
- if ext != 'c' and ext != 'h':
+ if not parser:
return
- debug_print('\t<List diff between oldfile and newfile>')
- # op = exec_git_cmd(['diff', '-U20000', oldfile, newfile])
- # (left, right) = parse_output(op)
+ parse_output = parser['parse_output']
+ print_tree = parser['print_tree']
left = parse_output(exec_git_cmd(['show', oldfile]))
right = parse_output(exec_git_cmd(['show', newfile]))
@@ -738,11 +749,17 @@ def main(revs):
def parser_file_test(f):
''' Parser debugger Entry Point
'''
+ parser = get_parser(f)
+
+ if not parser:
+ debug_print('%s: No parser for this file type, cannot debug' % f)
+ return
+
with open(f) as srcfile:
op = srcfile.readlines()
op = [x[:-1] for x in op]
- tree = parse_output(op)
- print_tree(tree, 0)
+ tree = parser['parse_output'](op)
+ parser['print_tree'](tree, 0)
# Program Entry point. If -d is specified, the second argument is assumed to be
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=44b5f663da07a933aac0d9117e41eb16fe1a8157
commit 44b5f663da07a933aac0d9117e41eb16fe1a8157
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Wed Nov 21 22:47:16 2018 +0530
Add missing operation types for git-diff-format
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index 01a6561..b4c7785 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -678,29 +678,41 @@ def list_changes(commit):
# where OPERATION can be one of the following:
# A: File added
# D: File removed
- # M: File modified
+ # M[0-9]{3}: File modified
# R[0-9]{3}: File renamed, with the 3 digit number following it indicating
# what percentage of the file is intact.
+ # C[0-9]{3}: File copied. Same semantics as R.
+ # T: The permission bits of the file changed
+ # U: Unmerged. We should not encounter this, so we ignore it/
+ # X, or anything else: Most likely a bug. Report it.
#
- # FILE2 is set only when OPERATION is R, to indicate the new file name.
+ # FILE2 is set only when OPERATION is R or C, to indicate the new file name.
#
# Also note that merge commits have a different format here, with three
# entries each for the modes and refs, but we don't bother with it for now.
+ #
+ # For more details: https://git-scm.com/docs/diff-format
for f in op:
data = f.split()
if data[4] == 'A':
print('\t* %s: New file.' % data[5])
elif data[4] == 'D':
print('\t* %s: Delete file.' % data[5])
- elif data[4] == 'M':
+ elif data[4] == 'T':
+ print('\t* %s: Changed file permission bits from %s to %s' % \
+ (data[5], data[0], data[1]))
+ elif data[4][0] == 'M':
print('\t* %s: Modified.' % data[5])
analyze_diff(data[2], data[3], data[5])
- elif data[4][0] == 'R':
+ elif data[4][0] == 'R' or data[4][0] == 'C':
change = int(data[4][1:])
print('\t* %s: Move to...' % data[5])
print('\t* %s: ... here.' % data[6])
if change < 100:
analyze_diff(data[2], data[3], data[6])
+ # We should never encounter this, so ignore for now.
+ elif data[4] == 'U':
+ pass
else:
eprint('%s: Unknown line format %s' % (commit, data[4]))
sys.exit(42)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3988cc28b856f558786c2edfe58094479a799da0
commit 3988cc28b856f558786c2edfe58094479a799da0
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Tue Nov 20 01:05:08 2018 +0530
Another big update on a plane
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index 8b4d889..01a6561 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -47,14 +47,78 @@ import subprocess
import sys
import os
import re
-
-#------------------------------------------------------------------------------
-# C Parser.
-#------------------------------------------------------------------------------
from enum import Enum
+
+# General Utility functions.
+def eprint(*args, **kwargs):
+ ''' Print to stderr.
+ '''
+ print(*args, file=sys.stderr, **kwargs)
+
+
debug = False
+def debug_print(*args, **kwargs):
+ ''' Convenience function to print diagnostic information in the program.
+ '''
+ if debug:
+ eprint(*args, **kwargs)
+
+
+def usage(name):
+ ''' Print program usage.
+ '''
+ eprint("usage: %s <from-ref> <to-ref>" % name)
+ sys.exit(os.EX_USAGE)
+
+
+def decode(string):
+ ''' Attempt to decode a string.
+
+ Decode a string read from the source file. The multiple attempts are needed
+ due to the presence of the page break characters and some tests in locales.
+ '''
+ codecs = ['utf8', 'latin1', 'cp1252']
+
+ for i in codecs:
+ try:
+ return string.decode(i)
+ except UnicodeDecodeError:
+ pass
+
+ eprint('Failed to decode: %s' % string)
+
+
+def new_block(name, type, contents, parent):
+ ''' Create a new code block with the parent as PARENT.
+
+ The code block is a basic structure around which the tree representation of
+ the source code is built. It has the following attributes:
+
+ - type: Any one of the following types in BLOCK_TYPE.
+ - name: A name to refer it by in the ChangeLog
+ - contents: The contents of the block. For a block of types file or
+ macro_cond, this would be a list of blocks that it nests. For other types
+ it is a list with a single string specifying its contents.
+ - parent: This is the parent of the current block, useful in setting up
+ #elif or #else blocks in the tree.
+ - matched: A flag to specify if the block in a tree has found a match in the
+ other tree to which it is being compared.
+ '''
+ block = {}
+ block['matched'] = False
+ block['name'] = name
+ block['type'] = type
+ block['contents'] = contents
+ block['parent'] = parent
+ if parent:
+ parent['contents'].append(block)
+
+ return block
+
class block_type(Enum):
+ ''' Type of code block.
+ '''
file = 1
macro_cond = 2
macro_def = 3
@@ -69,6 +133,9 @@ class block_type(Enum):
assign = 12
+#------------------------------------------------------------------------------
+# C Parser.
+#------------------------------------------------------------------------------
# Regular expressions.
# The __attribute__ are written in a bunch of different ways in glibc.
@@ -89,7 +156,7 @@ COMPOSITE_RE = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
# Static assignments.
ASSIGN_RE = re.compile(r'(\w+)\s*(\[[^\]]*\])?\s*([^\s]*attribute[\s\w()]+)?\s*=')
-# Function Declarations. FIXME BROKEN
+# Function Declarations.
FNDECL_RE = re.compile(r'(\w+)\s*\([^;]+\)\s*' + ATTRIBUTE + '*;')
# Function pointer typedefs.
@@ -99,94 +166,46 @@ TYPEDEF_FN_RE = re.compile(r'\(\*(\w+)\)\s*\([^)]+\);')
DECL_RE = re.compile(r'(\w+)(\[\w+\])?\s*' + ATTRIBUTE + '?;')
-def collapse_macros(op):
- # Consolidate macro defs across multiple lines.
- new_op = []
- cur_line = ''
- join_line = False
- for l in op:
- if join_line:
- cur_line = cur_line[:-1] + ' ' + l
- else:
- cur_line = l
-
- if cur_line[0] == '#' and cur_line[-1] == '\\':
- join_line = True
- continue
- else:
- join_line = False
-
- new_op.append(cur_line)
-
- return new_op
-
-
def remove_comments(op):
- new_op = []
+ ''' Remove comments.
- # The simpler one-line comments.
- for l in op:
- # FIXME: This assumes that there's always only one comment per line.
- rem = re.sub(r'/\*.*\*/', r'', l)
- if rem:
- new_op.append(rem.strip())
-
- op = new_op
- new_op = []
+ Return OP by removing all comments from it.
+ '''
+ debug_print('REMOVE COMMENTS')
- in_comment = False
- for l in op:
- if in_comment:
- loc = l.find('*/')
- if loc == -1:
- continue
- else:
- in_comment = False
- rem = l[loc + 2:]
- if rem:
- new_op.append(rem)
- else:
- loc = l.find('/*')
- if loc == -1:
- new_op.append(l)
- else:
- in_comment = True
- rem = l[:loc]
- if rem:
- new_op.append(rem)
+ sep='\n'
+ opstr = sep.join(op)
+ opstr = re.sub(r'/\*.*?\*/', r'', opstr, flags=re.MULTILINE | re.DOTALL)
+ opstr = re.sub(r'\\\n', r' ', opstr, flags=re.MULTILINE | re.DOTALL)
+ new_op = list(filter(None, opstr.split(sep)))
return new_op
-def new_block(name, type, contents, parent):
- block = {}
- block['matched'] = False
- block['name'] = name
- block['type'] = type
- block['contents'] = contents
- block['parent'] = parent
- if parent:
- parent['contents'].append(block)
+# Parse macros.
+def parse_preprocessor(op, loc, code, start = '', else_start = ''):
+ ''' Parse a preprocessor directive.
- return block
+ In case a preprocessor condition (i.e. if/elif/else), create a new code
+ block to nest code into and in other cases, identify and add entities suchas
+ include files, defines, etc.
+ - NAME is the name of the directive
+ - CUR is the string to consume this expression from
+ - OP is the string array for the file
+ - LOC is the first unread location in CUR
+ - CODE is the block to which we add this function
-# Parse macros.
-def parse_macro(op, loc, code, start = '', else_start = ''):
+ - Returns: The next location to be read in the array.
+ '''
cur = op[loc]
loc = loc + 1
endblock = False
debug_print('PARSE_MACRO: %s' % cur)
- # Collapse the macro into a single line.
- while cur[-1] == '\\':
- cur = cur[:-1] + ' ' + op[loc]
- loc = loc + 1
-
-
# Remove the # and strip spaces again.
- cur = cur[1:].strip()
+ cur = cur[1:]
# Include file.
if cur.find('include') == 0:
@@ -242,13 +261,26 @@ def parse_macro(op, loc, code, start = '', else_start = ''):
# Given the start of a scope CUR, lap up all code up to the end of scope
# indicated by the closing brace.
-def fast_forward_scope(cur, op, loc, open='{', close='}'):
- nesting = cur.count(open) - cur.count(close)
+def fast_forward_scope(cur, op, loc):
+ ''' Consume lines in a code block.
+
+ Consume all lines of a block of code such as a composite type declaration or
+ a function declaration.
+
+ - CUR is the string to consume this expression from
+ - OP is the string array for the file
+ - LOC is the first unread location in CUR
+
+ - Returns: The next location to be read in the array as well as the updated
+ value of CUR, which will now have the body of the function or composite
+ type.
+ '''
+ nesting = cur.count('{') - cur.count('}')
while nesting > 0 and loc < len(op):
cur = cur + ' ' + op[loc]
- nesting = nesting + op[loc].count(open)
- nesting = nesting - op[loc].count(close)
+ nesting = nesting + op[loc].count('{')
+ nesting = nesting - op[loc].count('}')
loc = loc + 1
return (cur, loc)
@@ -256,6 +288,18 @@ def fast_forward_scope(cur, op, loc, open='{', close='}'):
# Different types of declarations.
def parse_decl(name, cur, op, loc, code, blocktype):
+ ''' Parse a top level declaration.
+
+ All types of declarations except function declarations.
+
+ - NAME is the name of the declarated entity
+ - CUR is the string to consume this expression from
+ - OP is the string array for the file
+ - LOC is the first unread location in CUR
+ - CODE is the block to which we add this function
+
+ - Returns: The next location to be read in the array.
+ '''
debug_print('FOUND DECL: %s' % name)
new_block(name, blocktype, [cur], code)
@@ -263,142 +307,187 @@ def parse_decl(name, cur, op, loc, code, blocktype):
# Assignments.
-def parse_assign(name, cur, op, loc, code):
+def parse_assign(name, cur, op, loc, code, blocktype):
+ ''' Parse an assignment statement.
+
+ This includes array assignments.
+
+ - NAME is the name of the assigned entity
+ - CUR is the string to consume this expression from
+ - OP is the string array for the file
+ - LOC is the first unread location in CUR
+ - CODE is the block to which we add this
+
+ - Returns: The next location to be read in the array.
+ '''
debug_print('FOUND ASSIGN: %s' % name)
# Lap up everything up to semicolon.
while ';' not in cur and loc < len(op):
cur = op[loc]
loc = loc + 1
- new_block(name, block_type.assign, [cur], code)
+ new_block(name, blocktype, [cur], code)
return loc
-# Structs or unions.
-def parse_composite(name, cur, op, loc, code):
+def parse_composite(name, cur, op, loc, code, blocktype):
+ ''' Parse a composite type.
+
+ Match declaration of a composite type such as a sruct or a union..
+
+ - NAME is the name of the composite type
+ - CUR is the string to consume this expression from
+ - OP is the string array for the file
+ - LOC is the first unread location in CUR
+ - CODE is the block to which we add this
+
+ - Returns: The next location to be read in the array.
+ '''
if not name:
name = '<anonymous>'
# Lap up all of the struct definition.
(cur, loc) = fast_forward_scope(cur, op, loc)
- new_block(name, block_type.composite, [cur], code)
+ new_block(name, blocktype, [cur], code)
return loc
-# Parse a function. NAME is the function name.
-def parse_func(name, cur, op, loc, code):
+def parse_func(name, cur, op, loc, code, blocktype):
+ ''' Parse a function.
+
+ Match a function definition.
+
+ - NAME is the name of the function
+ - CUR is the string to consume this expression from
+ - OP is the string array for the file
+ - LOC is the first unread location in CUR
+ - CODE is the block to which we add this
+
+ - Returns: The next location to be read in the array.
+ '''
debug_print('FOUND FUNC: %s' % name)
# Consume everything up to the ending brace of the function.
(cur, loc) = fast_forward_scope(cur, op, loc)
- new_block(name, block_type.func, [cur], code)
+ new_block(name, blocktype, [cur], code)
return loc
-# Parse a function. NAME is the function name.
-def parse_macrocall(name, cur, op, loc, code):
+def parse_macrocall(name, cur, op, loc, code, blocktype):
+ ''' Parse a macro call.
+
+ Match a symbol hack macro calls that get added without semicolons.
+
+ - NAME is the name of the macro call
+ - CUR is the string array for the file
+ - CUR is the string to consume this expression from
+ - OP is the string array for the file
+ - CODE is the block to which we add this
+
+ - Returns: The next location to be read in the array.
+ '''
debug_print('FOUND MACROCALL: %s' % name)
- new_block(name, block_type.macrocall, [cur], code)
+ new_block(name, blocktype, [cur], code)
return loc
-def parse_c_expr(cur, op, loc, code, start):
+c_expr_parsers = [
+ {'regex' : COMPOSITE_RE, 'func' : parse_composite, 'name' : 2,
+ 'type' : block_type.composite},
+ {'regex' : ASSIGN_RE, 'func' : parse_assign, 'name' : 1,
+ 'type' : block_type.assign},
+ {'regex' : TYPEDEF_FN_RE, 'func' : parse_decl, 'name' : 1,
+ 'type' : block_type.decl},
+ {'regex' : FNDECL_RE, 'func' : parse_decl, 'name' : 1,
+ 'type' : block_type.fndecl},
+ {'regex' : FUNC_RE, 'func' : parse_func, 'name' : 5,
+ 'type' : block_type.func},
+ {'regex' : MACROCALL_RE, 'func' : parse_macrocall, 'name' : 1,
+ 'type' : block_type.macrocall},
+ {'regex' : DECL_RE, 'func' : parse_decl, 'name' : 1,
+ 'type' : block_type.decl}]
+
+
+def parse_c_expr(cur, op, loc, code):
+ ''' Parse a C expression.
+
+ CUR is the string to be parsed, which continues to grow until a match is
+ found. OP is the string array and LOC is the first unread location in the
+ string array. CODE is the block in which any identified expressions should
+ be added.
+ '''
debug_print('PARSING: %s' % cur)
- # Composite type declarations.
- found = re.search(COMPOSITE_RE, cur)
- if found:
- return found, parse_composite(found.group(2), cur, op, loc, code)
-
- # Assignments. This should cover struct and array assignments too.
- found = re.search(ASSIGN_RE, cur)
- if found:
- return found, parse_assign(found.group(1), cur, op, loc, code)
-
- # Typedefs.
- found = re.search(TYPEDEF_FN_RE, cur)
- if found:
- return found, parse_decl(found.group(1), cur, op, loc, code,
- block_type.decl)
-
- # Function declarations are pretty straightforward compared to function
- # definitions, which have to account for any __attribute__ annotations
- # for its arguments. With declarations, we just match the last closing
- # bracket and the semicolon following it.
- found = re.search(FNDECL_RE, cur)
- if found:
- return found, parse_decl(found.group(1), cur, op, loc, code,
- block_type.fndecl)
-
- # Functions or macro calls that don't end with a semicolon.
- found = re.search(FUNC_RE, cur)
- if found:
- return found, parse_func(found.group(5), cur, op, loc, code)
-
- # Functions or macro calls that don't end with a semicolon. We need to peek
- # ahead to make sure that we don't mis-identify a function. This happens
- # only with functions that take no arguments.
- found = re.search(MACROCALL_RE, cur)
- if found and (loc >= len(op) or '{' not in op[loc]):
- return found, parse_macrocall(found.group(1), cur, op, loc, code)
-
- # Finally, all declarations.
- found = re.search(DECL_RE, cur)
- if found:
- return found, parse_decl(found.group(1), cur, op, loc, code,
- block_type.decl)
-
- return found, loc
-
-
-# Parse the file line by line. The function assumes a mostly GNU coding
-# standard compliant input so it might barf with anything that is eligible for
-# the Obfuscated C code contest.
-#
-# The basic idea of the parser is to identify macro conditional scopes and
-# definitions, includes, etc. and then parse the remaining C code in the context
-# of those macro scopes. The parser does not try to understand the semantics of
-# the code or even validate its syntax. It only records high level symbols in
-# the source and makes a tree structure to indicate the declaration/definition
-# of those symbols and their scope in the macro definitions.
-#
-# LOC is the first unparsed line.
+ # TODO: There's probably a quicker way to do this.
+ for p in c_expr_parsers:
+ found = re.search(p['regex'], cur)
+ if found:
+ return '', p['func'](found.group(p['name']), cur, op, loc, code,
+ p['type'])
+
+ return cur, loc
+
+
def parse(op, loc, code, start = ''):
+ '''
+ Parse the file line by line. The function assumes a mostly GNU coding
+ standard compliant input so it might barf with anything that is eligible for
+ the Obfuscated C code contest.
+
+ The basic idea of the parser is to identify macro conditional scopes and
+ definitions, includes, etc. and then parse the remaining C code in the
+ context of those macro scopes. The parser does not try to understand the
+ semantics of the code or even validate its syntax. It only records high
+ level symbols in the source and makes a tree structure to indicate the
+ declaration/definition of those symbols and their scope in the macro
+ definitions.
+
+ LOC is the first unparsed line.
+ '''
cur = start
endblock = False
while loc < len(op):
- nextline = op[loc].strip()
-
- if not nextline:
- loc = loc + 1
- continue
+ nextline = op[loc]
# Macros.
if nextline[0] == '#':
- (loc, endblock) = parse_macro(op, loc, code, cur, start)
+ (loc, endblock) = parse_preprocessor(op, loc, code, cur, start)
if endblock and not cur:
return loc
# Rest of C Code.
else:
cur = cur + ' ' + nextline
- found, loc = parse_c_expr(cur, op, loc + 1, code, cur)
- if found:
- cur = ''
- if endblock:
- return loc
+ cur, loc = parse_c_expr(cur, op, loc + 1, code)
return loc
+def parse_output(op):
+ ''' File parser.
+
+ Parse the input array of lines OP and generate a tree structure to
+ represent the file. This tree structure is then used for comparison between
+ the old and new file.
+ '''
+ tree = new_block('', block_type.file, [], None)
+ op = remove_comments(op)
+ op = [re.sub(r'#\s+', '#', x) for x in op]
+ op = parse(op, 0, tree)
+
+ return tree
+
+
def print_tree(tree, indent):
+ ''' Print the entire tree.
+ '''
if not debug:
return
@@ -432,40 +521,19 @@ def print_tree(tree, indent):
#------------------------------------------------------------------------------
-def debug_print(*args, **kwargs):
- if debug:
- print(*args, file=sys.stderr, **kwargs)
-
-def eprint(*args, **kwargs):
- print(*args, file=sys.stderr, **kwargs)
-
-def usage(name):
- eprint("usage: %s <from-ref> <to-ref>" % name)
- sys.exit(os.EX_USAGE)
-
-def decode(string):
- codecs = ['utf8', 'latin1', 'cp1252']
-
- for i in codecs:
- try:
- return string.decode(i)
- except UnicodeDecodeError:
- pass
-
- eprint('Failed to decode: %s' % string)
-
-def cleaned(ip):
- # Returns the output from a command after cleaning it up, i.e. removing
- # trailing spaces, newlines and dropping blank lines.
- op = list(filter(None, [decode(x[:-1]).strip() for x in ip]))
- return op
-
def exec_git_cmd(args):
+ ''' Execute a git command and return its result as a list of strings
+ '''
args.insert(0, 'git')
debug_print(args)
proc = subprocess.Popen(args, stdout=subprocess.PIPE)
- return cleaned(list(proc.stdout))
+ # Clean up the output by removing trailing spaces, newlines and dropping
+ # blank lines.
+ op = [decode(x[:-1]).strip() for x in proc.stdout]
+ op = [re.sub(r'\s+', ' ', x) for x in op]
+ op = [x for x in op if x]
+ return op
def print_changed_tree(tree, action, prologue = ''):
@@ -561,20 +629,6 @@ def analyze_diff(oldfile, newfile, filename):
print_tree(right, 0)
-def parse_output(op):
- ''' File parser.
-
- Parse the input array of lines OP and generate a tree structure to
- represent the file. This tree structure is then used for comparison between
- the old and new file.
- '''
- tree = new_block('', block_type.file, [], None)
- op = remove_comments(op)
- op = parse(op, 0, tree)
-
- return tree
-
-
def list_changes(commit):
''' List changes in a single commit.
@@ -603,8 +657,8 @@ def list_changes(commit):
break
# Find raw commit information for all non-ChangeLog files.
- op = [x[1:] for x in op
- if len(x) > 0 and x[0] == ':' and x.find('ChangeLog') == -1]
+ op = [x[1:] for x in op if len(x) > 0 and re.match(r'^:[0-9]+', x) \
+ and 'ChangeLog' not in x]
# It was only the ChangeLog, ignore.
if len(op) == 0:
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d44598ca385c3993c9952d04a923ee1a45649752
commit d44598ca385c3993c9952d04a923ee1a45649752
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Mon Nov 12 10:53:45 2018 +0530
Speed up things slightly by initializing the regex compilations just once.
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index eb0e29d..8b4d889 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -69,6 +69,36 @@ class block_type(Enum):
assign = 12
+# Regular expressions.
+
+# The __attribute__ are written in a bunch of different ways in glibc.
+ATTRIBUTE = \
+ r'((_*(attribute|ATTRIBUTE)_*(\s*\(\([^)]+\)\)|\w+))|weak_function)';
+
+# Function regex
+FUNC_RE = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\([^(][^{]+\)\s*{')
+
+# The macrocall_re peeks into the next line to ensure that it doesn't eat up
+# a FUNC by accident. The func_re regex is also quite crude and only
+# intends to ensure that the function name gets picked up correctly.
+MACROCALL_RE = re.compile(r'(\w+)\s*\(\w+(\s*,\s*[\w\.]+)*\)$')
+
+# Composite types such as structs and unions.
+COMPOSITE_RE = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
+
+# Static assignments.
+ASSIGN_RE = re.compile(r'(\w+)\s*(\[[^\]]*\])?\s*([^\s]*attribute[\s\w()]+)?\s*=')
+
+# Function Declarations. FIXME BROKEN
+FNDECL_RE = re.compile(r'(\w+)\s*\([^;]+\)\s*' + ATTRIBUTE + '*;')
+
+# Function pointer typedefs.
+TYPEDEF_FN_RE = re.compile(r'\(\*(\w+)\)\s*\([^)]+\);')
+
+# Simple decls.
+DECL_RE = re.compile(r'(\w+)(\[\w+\])?\s*' + ATTRIBUTE + '?;')
+
+
def collapse_macros(op):
# Consolidate macro defs across multiple lines.
new_op = []
@@ -282,39 +312,18 @@ def parse_macrocall(name, cur, op, loc, code):
def parse_c_expr(cur, op, loc, code, start):
debug_print('PARSING: %s' % cur)
- ATTRIBUTE = \
- r'((_*(attribute|ATTRIBUTE)_*(\s*\(\([^)]+\)\)|\w+))|weak_function)';
-
- # Regular expressions.
- #
- # The macrocall_re peeks into the next line to ensure that it doesn't eat up
- # a FUNC by accident. The func_re regex is also quite crude and only
- # intends to ensure that the function name gets picked up correctly.
- func_re = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\([^(][^{]+\)\s*{')
- macrocall_re = re.compile(r'(\w+)\s*\(\w+(\s*,\s*[\w\.]+)*\)$')
- # Composite types such as structs and unions.
- composite_re = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
- # Static assignments.
- assign_re = re.compile(r'(\w+)\s*(\[[^\]]*\])?\s*([^\s]*attribute[\s\w()]+)?\s*=')
- # Function Declarations. FIXME BROKEN
- fndecl_re = re.compile(r'(\w+)\s*\([^;]+\)\s*' + ATTRIBUTE + '*;')
- # Function pointer typedefs.
- typedef_fn_re = re.compile(r'\(\*(\w+)\)\s*\([^)]+\);')
- # Simple decls.
- decl_re = re.compile(r'(\w+)(\[\w+\])?\s*' + ATTRIBUTE + '?;')
-
# Composite type declarations.
- found = re.search(composite_re, cur)
+ found = re.search(COMPOSITE_RE, cur)
if found:
return found, parse_composite(found.group(2), cur, op, loc, code)
# Assignments. This should cover struct and array assignments too.
- found = re.search(assign_re, cur)
+ found = re.search(ASSIGN_RE, cur)
if found:
return found, parse_assign(found.group(1), cur, op, loc, code)
# Typedefs.
- found = re.search(typedef_fn_re, cur)
+ found = re.search(TYPEDEF_FN_RE, cur)
if found:
return found, parse_decl(found.group(1), cur, op, loc, code,
block_type.decl)
@@ -323,25 +332,25 @@ def parse_c_expr(cur, op, loc, code, start):
# definitions, which have to account for any __attribute__ annotations
# for its arguments. With declarations, we just match the last closing
# bracket and the semicolon following it.
- found = re.search(fndecl_re, cur)
+ found = re.search(FNDECL_RE, cur)
if found:
return found, parse_decl(found.group(1), cur, op, loc, code,
block_type.fndecl)
# Functions or macro calls that don't end with a semicolon.
- found = re.search(func_re, cur)
+ found = re.search(FUNC_RE, cur)
if found:
return found, parse_func(found.group(5), cur, op, loc, code)
# Functions or macro calls that don't end with a semicolon. We need to peek
# ahead to make sure that we don't mis-identify a function. This happens
# only with functions that take no arguments.
- found = re.search(macrocall_re, cur)
+ found = re.search(MACROCALL_RE, cur)
if found and (loc >= len(op) or '{' not in op[loc]):
return found, parse_macrocall(found.group(1), cur, op, loc, code)
# Finally, all declarations.
- found = re.search(decl_re, cur)
+ found = re.search(DECL_RE, cur)
if found:
return found, parse_decl(found.group(1), cur, op, loc, code,
block_type.decl)
@@ -459,12 +468,13 @@ def exec_git_cmd(args):
return cleaned(list(proc.stdout))
-def list_commits(revs):
- ref = revs[0] + '..' + revs[1]
- return exec_git_cmd(['log', '--pretty=%H', ref])
-
-
def print_changed_tree(tree, action, prologue = ''):
+ ''' Print the nature of the differences found in the tree compared to the
+ other tree. TREE is the tree that changed, action is what the change was
+ (Added, Removed, Modified) and prologue specifies the macro scope the change
+ is in. The function calls itself recursively for all macro condition tree
+ nodes.
+ '''
if tree['type'] != block_type.macro_cond:
print('\t%s(%s): %s.' % (prologue, tree['name'], action))
@@ -535,6 +545,8 @@ def analyze_diff(oldfile, newfile, filename):
return
debug_print('\t<List diff between oldfile and newfile>')
+ # op = exec_git_cmd(['diff', '-U20000', oldfile, newfile])
+ # (left, right) = parse_output(op)
left = parse_output(exec_git_cmd(['show', oldfile]))
right = parse_output(exec_git_cmd(['show', newfile]))
@@ -642,6 +654,13 @@ def list_changes(commit):
print('')
+def list_commits(revs):
+ ''' List commit IDs between the two revs in the REVS list.
+ '''
+ ref = revs[0] + '..' + revs[1]
+ return exec_git_cmd(['log', '--pretty=%H', ref])
+
+
def main(revs):
''' ChangeLog Generator Entry Point
'''
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=aac3aadfae9be081b0317ab42af6b565dac56e6c
commit aac3aadfae9be081b0317ab42af6b565dac56e6c
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Mon Nov 12 10:24:50 2018 +0530
The ChangeLog output!
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index 6e2a8d3..eb0e29d 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -1,4 +1,48 @@
#!/usr/bin/python3
+# Copyright (C) 2018 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+''' Generate a ChangeLog style output based on the git log.
+
+This script takes two revisions as input and generates a ChangeLog style output
+for all revisions between the two revisions. This output is intended to be an
+approximation and not the exact ChangeLog.
+
+At a high level, the script enumerates all C source files (*.c and *.h) and
+builds a tree of top level objects within macro conditionals. The top level
+objects the script currently attempts to identify are:
+
+ - Include statements
+ - Macro definitions and undefs
+ - Declarations and definitions of variables and functions
+ - Composite types
+
+The script attempts to identify quirks typically used in glibc sources such as
+the symbol hack macro calls that don't use a semicolon and tries to adjust for
+them.
+
+Known Limitations:
+
+ - Does not identify changes in or to comments. Comments are simply stripped
+ out.
+ - Weird nesting of macro conditionals may break things. Attempts have been
+ made to try and maintain state across macro conditional scopes, but
+ there's still scope to fool the script.
+ - Does not identify changes within functions.
+'''
import subprocess
import sys
import os
@@ -84,6 +128,19 @@ def remove_comments(op):
return new_op
+def new_block(name, type, contents, parent):
+ block = {}
+ block['matched'] = False
+ block['name'] = name
+ block['type'] = type
+ block['contents'] = contents
+ block['parent'] = parent
+ if parent:
+ parent['contents'].append(block)
+
+ return block
+
+
# Parse macros.
def parse_macro(op, loc, code, start = '', else_start = ''):
cur = op[loc]
@@ -104,80 +161,46 @@ def parse_macro(op, loc, code, start = '', else_start = ''):
# Include file.
if cur.find('include') == 0:
m = re.search(r'include\s*["<]?([^">]+)[">]?', cur)
- include = {}
- include['name'] = m.group(1)
- include['type'] = block_type.macro_include
- include['contents'] = [cur]
- include['parent'] = code
- code['contents'].append(include)
+ new_block(m.group(1), block_type.macro_include, [cur], code)
# Macro definition.
if cur.find('define') == 0:
m = re.search(r'define\s+([a-zA-Z0-9_]+)', cur)
- macrodef = {}
- macrodef['name'] = m.group(1)
- macrodef['type'] = block_type.macro_def
- macrodef['contents'] = [cur]
- macrodef['parent'] = code
- code['contents'].append(macrodef)
+ new_block(m.group(1), block_type.macro_def, [cur], code)
+ # Macro undef.
if cur.find('undef') == 0:
m = re.search(r'undef\s+([a-zA-Z0-9_]+)', cur)
- macrodef = {}
- macrodef['name'] = m.group(1)
- macrodef['type'] = block_type.macro_undef
- macrodef['contents'] = [cur]
- macrodef['parent'] = code
- code['contents'].append(macrodef)
+ new_block(m.group(1), block_type.macro_undef, [cur], code)
- # Macro definition.
+ # #error and #warning macros.
if cur.find('error') == 0 or cur.find('warning') == 0:
m = re.search(r'(error|warning)\s+"?(.*)"?', cur)
if m:
name = m.group(2)
else:
name = '<blank>'
- macrodef = {}
- macrodef['name'] = name
- macrodef['type'] = block_type.macro_info
- macrodef['contents'] = [cur]
- macrodef['parent'] = code
- code['contents'].append(macrodef)
+ new_block(name, block_type.macro_info, [cur], code)
# Start of an #if or #ifdef block.
elif cur.find('if') == 0:
rem = re.sub(r'ifndef', r'!', cur).strip()
rem = re.sub(r'(ifdef|defined|if)', r'', rem).strip()
- ifdef = {}
- ifdef['name'] = rem
- ifdef['type'] = block_type.macro_cond
- ifdef['contents'] = []
- ifdef['parent'] = code
- code['contents'].append(ifdef)
+ ifdef = new_block(rem, block_type.macro_cond, [], code)
loc = parse(op, loc, ifdef, start)
# End the previous #if/#elif and begin a new block.
elif cur.find('elif') == 0 and code['parent']:
rem = re.sub(r'(elif|defined)', r'', cur).strip()
- ifdef = {}
- ifdef['name'] = rem
- ifdef['type'] = block_type.macro_cond
- ifdef['contents'] = []
- ifdef['parent'] = code['parent']
- # Here's the key thing: The #else block should go into the current
- # block's parent.
- code['parent']['contents'].append(ifdef)
+ # The #else and #elif blocks should go into the current block's parent.
+ ifdef = new_block(rem, block_type.macro_cond, [], code['parent'])
loc = parse(op, loc, ifdef, else_start)
endblock = True
# End the previous #if/#elif and begin a new block.
elif cur.find('else') == 0 and code['parent']:
- ifdef = {}
- ifdef['name'] = '!(' + code['name'] + ')'
- ifdef['type'] = block_type.macro_cond
- ifdef['contents'] = []
- ifdef['parent'] = code['parent']
- code['parent']['contents'].append(ifdef)
+ name = '!(' + code['name'] + ')'
+ ifdef = new_block(name, block_type.macro_cond, [], code['parent'])
loc = parse(op, loc, ifdef, else_start)
endblock = True
@@ -204,12 +227,7 @@ def fast_forward_scope(cur, op, loc, open='{', close='}'):
# Different types of declarations.
def parse_decl(name, cur, op, loc, code, blocktype):
debug_print('FOUND DECL: %s' % name)
- block = {}
- block['name'] = name
- block['type'] = blocktype
- block['contents'] = [cur]
- block['parent'] = code
- code['contents'].append(block)
+ new_block(name, blocktype, [cur], code)
return loc
@@ -222,12 +240,7 @@ def parse_assign(name, cur, op, loc, code):
cur = op[loc]
loc = loc + 1
- block = {}
- block['name'] = name
- block['type'] = block_type.assign
- block['contents'] = [cur]
- block['parent'] = code
- code['contents'].append(block)
+ new_block(name, block_type.assign, [cur], code)
return loc
@@ -240,12 +253,7 @@ def parse_composite(name, cur, op, loc, code):
# Lap up all of the struct definition.
(cur, loc) = fast_forward_scope(cur, op, loc)
- block = {}
- block['name'] = name
- block['type'] = block_type.composite
- block['contents'] = [cur]
- block['parent'] = code
- code['contents'].append(block)
+ new_block(name, block_type.composite, [cur], code)
return loc
@@ -257,12 +265,7 @@ def parse_func(name, cur, op, loc, code):
# Consume everything up to the ending brace of the function.
(cur, loc) = fast_forward_scope(cur, op, loc)
- block = {}
- block['name'] = name
- block['type'] = block_type.func
- block['contents'] = [cur]
- block['parent'] = code
- code['contents'].append(block)
+ new_block(name, block_type.func, [cur], code)
return loc
@@ -271,12 +274,7 @@ def parse_func(name, cur, op, loc, code):
def parse_macrocall(name, cur, op, loc, code):
debug_print('FOUND MACROCALL: %s' % name)
- block = {}
- block['name'] = name
- block['type'] = block_type.macrocall
- block['contents'] = [cur]
- block['parent'] = code
- code['contents'].append(block)
+ new_block(name, block_type.macrocall, [cur], code)
return loc
@@ -292,7 +290,7 @@ def parse_c_expr(cur, op, loc, code, start):
# The macrocall_re peeks into the next line to ensure that it doesn't eat up
# a FUNC by accident. The func_re regex is also quite crude and only
# intends to ensure that the function name gets picked up correctly.
- func_re = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\([^{]+\)\s*{')
+ func_re = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\([^(][^{]+\)\s*{')
macrocall_re = re.compile(r'(\w+)\s*\(\w+(\s*,\s*[\w\.]+)*\)$')
# Composite types such as structs and unions.
composite_re = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
@@ -392,6 +390,9 @@ def parse(op, loc, code, start = ''):
def print_tree(tree, indent):
+ if not debug:
+ return
+
if tree['type'] == block_type.macro_cond or tree['type'] == block_type.file:
print('%sScope: %s' % (' ' * indent, tree['name']))
for c in tree['contents']:
@@ -452,7 +453,7 @@ def cleaned(ip):
def exec_git_cmd(args):
args.insert(0, 'git')
- print(args)
+ debug_print(args)
proc = subprocess.Popen(args, stdout=subprocess.PIPE)
return cleaned(list(proc.stdout))
@@ -462,8 +463,69 @@ def list_commits(revs):
ref = revs[0] + '..' + revs[1]
return exec_git_cmd(['log', '--pretty=%H', ref])
+
+def print_changed_tree(tree, action, prologue = ''):
+
+ if tree['type'] != block_type.macro_cond:
+ print('\t%s(%s): %s.' % (prologue, tree['name'], action))
+ return
+
+ prologue = '%s[%s]' % (prologue, tree['name'])
+ for t in tree['contents']:
+ if t['type'] == block_type.macro_cond:
+ print_changed_tree(t, action, prologue)
+ else:
+ print('\t%s(%s): %s.' % (prologue, t['name'], action))
+
+
+def compare_trees(left, right, prologue = ''):
+ ''' Compare two trees and print the difference.
+
+ This routine is the entry point to compare two trees and print out their
+ differences. LEFT and RIGHT will always have the same name and type,
+ starting with block_type.file and '' at the top level.
+ '''
+
+ if left['type'] == block_type.macro_cond or left['type'] == block_type.file:
+
+ if left['type'] == block_type.macro_cond:
+ prologue = '%s[%s]' % (prologue, left['name'])
+
+ # TODO 1: There must be some list comprehension magic I can do here.
+ # TODO 2: This won't detect when the macro condition has been changed.
+ # It will think of one condition as added and another as removed. We'll
+ # have to live with that for now.
+
+ # Make sure that everything in the left tree exists in the right tree.
+ for cl in left['contents']:
+ found = False
+ for cr in right['contents']:
+ if not cl['matched'] and not cr['matched'] and \
+ cl['name'] == cr['name'] and cl['type'] == cr['type']:
+ cl['matched'] = cr['matched'] = True
+ compare_trees(cl, cr, prologue)
+ found = True
+ break
+ if not found:
+ print_changed_tree(cl, 'Removed', prologue)
+
+ # ... and vice versa. This time we only need to look at unmatched
+ # contents.
+ for cr in right['contents']:
+ if not cr['matched']:
+ print_changed_tree(cr, 'New', prologue)
+ else:
+ if left['contents'] != right['contents']:
+ print_changed_tree(left, 'Modified', prologue)
+
+
def analyze_diff(oldfile, newfile, filename):
- # Ignore non-C files.
+ ''' Parse the output of the old and new files and print the difference.
+
+ For input files OLDFILE and NEWFILE with name FILENAME, generate reduced
+ trees for them and compare them. We limit our comparison to only C source
+ files.
+ '''
split = filename.split('.')
ext = ''
if split:
@@ -472,26 +534,29 @@ def analyze_diff(oldfile, newfile, filename):
if ext != 'c' and ext != 'h':
return
- print('\t<List diff between oldfile and newfile>')
+ debug_print('\t<List diff between oldfile and newfile>')
left = parse_output(exec_git_cmd(['show', oldfile]))
right = parse_output(exec_git_cmd(['show', newfile]))
- print('LEFT TREE')
- print('-' * 80)
+ compare_trees(left, right)
+
+ debug_print('LEFT TREE')
+ debug_print('-' * 80)
print_tree(left, 0)
- print('RIGHT TREE')
- print('-' * 80)
+ debug_print('RIGHT TREE')
+ debug_print('-' * 80)
print_tree(right, 0)
def parse_output(op):
- tree = {}
- tree['name'] = ''
- tree['type'] = block_type.file
- tree['contents'] = []
- tree['parent'] = None
- #op = preprocess(op, right)
+ ''' File parser.
+
+ Parse the input array of lines OP and generate a tree structure to
+ represent the file. This tree structure is then used for comparison between
+ the old and new file.
+ '''
+ tree = new_block('', block_type.file, [], None)
op = remove_comments(op)
op = parse(op, 0, tree)
@@ -499,6 +564,13 @@ def parse_output(op):
def list_changes(commit):
+ ''' List changes in a single commit.
+
+ For the input commit id COMMIT, identify the files that have changed and the
+ nature of their changes. Print commit information in the ChangeLog format,
+ calling into helper functions as necessary.
+ '''
+
op = exec_git_cmd(['show', '--date=short', '--raw', commit])
author = ''
date = ''
@@ -571,12 +643,16 @@ def list_changes(commit):
def main(revs):
+ ''' ChangeLog Generator Entry Point
+ '''
commits = list_commits(revs)
for commit in commits:
list_changes(commit)
def parser_file_test(f):
+ ''' Parser debugger Entry Point
+ '''
with open(f) as srcfile:
op = srcfile.readlines()
op = [x[:-1] for x in op]
@@ -584,11 +660,13 @@ def parser_file_test(f):
print_tree(tree, 0)
+# Program Entry point. If -d is specified, the second argument is assumed to be
+# a file and only the parser is run in verbose mode.
if __name__ == '__main__':
if len(sys.argv) != 3:
usage(sys.argv[0])
- if sys.argv[1] == '-t':
+ if sys.argv[1] == '-d':
debug = True
parser_file_test(sys.argv[2])
else:
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=26016efceb3aaeef1883f6990d5accc3cc4313a5
commit 26016efceb3aaeef1883f6990d5accc3cc4313a5
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Mon Nov 12 05:42:14 2018 +0530
Make assignment regex stronger and tigthen file extension check
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index fa0d027..6e2a8d3 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -297,7 +297,7 @@ def parse_c_expr(cur, op, loc, code, start):
# Composite types such as structs and unions.
composite_re = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
# Static assignments.
- assign_re = re.compile(r'(\w+)\s*(\[\])?\s*([^\s]*attribute[\s\w()]+)?\s*=')
+ assign_re = re.compile(r'(\w+)\s*(\[[^\]]*\])?\s*([^\s]*attribute[\s\w()]+)?\s*=')
# Function Declarations. FIXME BROKEN
fndecl_re = re.compile(r'(\w+)\s*\([^;]+\)\s*' + ATTRIBUTE + '*;')
# Function pointer typedefs.
@@ -464,7 +464,12 @@ def list_commits(revs):
def analyze_diff(oldfile, newfile, filename):
# Ignore non-C files.
- if filename.find('.c') < 0 and filename.find('.h') < 0:
+ split = filename.split('.')
+ ext = ''
+ if split:
+ ext = split[-1]
+
+ if ext != 'c' and ext != 'h':
return
print('\t<List diff between oldfile and newfile>')
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4af045e137d83aa43388952fa8492947d49d306a
commit 4af045e137d83aa43388952fa8492947d49d306a
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Mon Nov 12 05:29:53 2018 +0530
Remove quadratic behaviour of the ARGLIST regex
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index 21497e4..fa0d027 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -286,14 +286,13 @@ def parse_c_expr(cur, op, loc, code, start):
ATTRIBUTE = \
r'((_*(attribute|ATTRIBUTE)_*(\s*\(\([^)]+\)\)|\w+))|weak_function)';
- #ARGLIST = r'[\w\s\*]+' + ATTRIBUTE + '?,?\s*'
- ARGLIST = r'(\w+[\s\*]+\w+' + ATTRIBUTE + '?,?\s*)|void'
# Regular expressions.
#
- # Function or a macro call that doesn't need a semicolon: foo (args, ...)
- # We later distinguish between the two by peeking into the next line.
- func_re = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\((' + ARGLIST + ')*\)\s*{')
+ # The macrocall_re peeks into the next line to ensure that it doesn't eat up
+ # a FUNC by accident. The func_re regex is also quite crude and only
+ # intends to ensure that the function name gets picked up correctly.
+ func_re = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\([^{]+\)\s*{')
macrocall_re = re.compile(r'(\w+)\s*\(\w+(\s*,\s*[\w\.]+)*\)$')
# Composite types such as structs and unions.
composite_re = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5e2de41f7fafe640f07685394d2a93c95baddce2
commit 5e2de41f7fafe640f07685394d2a93c95baddce2
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Mon Nov 12 05:14:04 2018 +0530
Functional script
The FUNC parser regex shows quadratic behaviour, needs fixing.
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
index caebba4..21497e4 100755
--- a/scripts/gen-changed-entities.py
+++ b/scripts/gen-changed-entities.py
@@ -1,34 +1,569 @@
-#!/usr/bin/python
+#!/usr/bin/python3
import subprocess
import sys
+import os
+import re
+
+#------------------------------------------------------------------------------
+# C Parser.
+#------------------------------------------------------------------------------
+from enum import Enum
+debug = False
+
+class block_type(Enum):
+ file = 1
+ macro_cond = 2
+ macro_def = 3
+ macro_undef = 4
+ macro_include = 5
+ macro_info = 6
+ decl = 7
+ func = 8
+ composite = 9
+ macrocall = 10
+ fndecl = 11
+ assign = 12
+
+
+def collapse_macros(op):
+ # Consolidate macro defs across multiple lines.
+ new_op = []
+ cur_line = ''
+ join_line = False
+ for l in op:
+ if join_line:
+ cur_line = cur_line[:-1] + ' ' + l
+ else:
+ cur_line = l
+
+ if cur_line[0] == '#' and cur_line[-1] == '\\':
+ join_line = True
+ continue
+ else:
+ join_line = False
+
+ new_op.append(cur_line)
+
+ return new_op
+
+
+def remove_comments(op):
+ new_op = []
+
+ # The simpler one-line comments.
+ for l in op:
+ # FIXME: This assumes that there's always only one comment per line.
+ rem = re.sub(r'/\*.*\*/', r'', l)
+ if rem:
+ new_op.append(rem.strip())
+
+ op = new_op
+ new_op = []
+
+ in_comment = False
+ for l in op:
+ if in_comment:
+ loc = l.find('*/')
+ if loc == -1:
+ continue
+ else:
+ in_comment = False
+ rem = l[loc + 2:]
+ if rem:
+ new_op.append(rem)
+ else:
+ loc = l.find('/*')
+ if loc == -1:
+ new_op.append(l)
+ else:
+ in_comment = True
+ rem = l[:loc]
+ if rem:
+ new_op.append(rem)
+
+ return new_op
+
+
+# Parse macros.
+def parse_macro(op, loc, code, start = '', else_start = ''):
+ cur = op[loc]
+ loc = loc + 1
+ endblock = False
+
+ debug_print('PARSE_MACRO: %s' % cur)
+
+ # Collapse the macro into a single line.
+ while cur[-1] == '\\':
+ cur = cur[:-1] + ' ' + op[loc]
+ loc = loc + 1
+
+
+ # Remove the # and strip spaces again.
+ cur = cur[1:].strip()
+
+ # Include file.
+ if cur.find('include') == 0:
+ m = re.search(r'include\s*["<]?([^">]+)[">]?', cur)
+ include = {}
+ include['name'] = m.group(1)
+ include['type'] = block_type.macro_include
+ include['contents'] = [cur]
+ include['parent'] = code
+ code['contents'].append(include)
+
+ # Macro definition.
+ if cur.find('define') == 0:
+ m = re.search(r'define\s+([a-zA-Z0-9_]+)', cur)
+ macrodef = {}
+ macrodef['name'] = m.group(1)
+ macrodef['type'] = block_type.macro_def
+ macrodef['contents'] = [cur]
+ macrodef['parent'] = code
+ code['contents'].append(macrodef)
+
+ if cur.find('undef') == 0:
+ m = re.search(r'undef\s+([a-zA-Z0-9_]+)', cur)
+ macrodef = {}
+ macrodef['name'] = m.group(1)
+ macrodef['type'] = block_type.macro_undef
+ macrodef['contents'] = [cur]
+ macrodef['parent'] = code
+ code['contents'].append(macrodef)
+
+ # Macro definition.
+ if cur.find('error') == 0 or cur.find('warning') == 0:
+ m = re.search(r'(error|warning)\s+"?(.*)"?', cur)
+ if m:
+ name = m.group(2)
+ else:
+ name = '<blank>'
+ macrodef = {}
+ macrodef['name'] = name
+ macrodef['type'] = block_type.macro_info
+ macrodef['contents'] = [cur]
+ macrodef['parent'] = code
+ code['contents'].append(macrodef)
+
+ # Start of an #if or #ifdef block.
+ elif cur.find('if') == 0:
+ rem = re.sub(r'ifndef', r'!', cur).strip()
+ rem = re.sub(r'(ifdef|defined|if)', r'', rem).strip()
+ ifdef = {}
+ ifdef['name'] = rem
+ ifdef['type'] = block_type.macro_cond
+ ifdef['contents'] = []
+ ifdef['parent'] = code
+ code['contents'].append(ifdef)
+ loc = parse(op, loc, ifdef, start)
+
+ # End the previous #if/#elif and begin a new block.
+ elif cur.find('elif') == 0 and code['parent']:
+ rem = re.sub(r'(elif|defined)', r'', cur).strip()
+ ifdef = {}
+ ifdef['name'] = rem
+ ifdef['type'] = block_type.macro_cond
+ ifdef['contents'] = []
+ ifdef['parent'] = code['parent']
+ # Here's the key thing: The #else block should go into the current
+ # block's parent.
+ code['parent']['contents'].append(ifdef)
+ loc = parse(op, loc, ifdef, else_start)
+ endblock = True
+
+ # End the previous #if/#elif and begin a new block.
+ elif cur.find('else') == 0 and code['parent']:
+ ifdef = {}
+ ifdef['name'] = '!(' + code['name'] + ')'
+ ifdef['type'] = block_type.macro_cond
+ ifdef['contents'] = []
+ ifdef['parent'] = code['parent']
+ code['parent']['contents'].append(ifdef)
+ loc = parse(op, loc, ifdef, else_start)
+ endblock = True
+
+ elif cur.find('endif') == 0 and code['parent']:
+ endblock = True
+
+ return (loc, endblock)
+
+
+# Given the start of a scope CUR, lap up all code up to the end of scope
+# indicated by the closing brace.
+def fast_forward_scope(cur, op, loc, open='{', close='}'):
+ nesting = cur.count(open) - cur.count(close)
+ while nesting > 0 and loc < len(op):
+ cur = cur + ' ' + op[loc]
+
+ nesting = nesting + op[loc].count(open)
+ nesting = nesting - op[loc].count(close)
+ loc = loc + 1
+
+ return (cur, loc)
+
+
+# Different types of declarations.
+def parse_decl(name, cur, op, loc, code, blocktype):
+ debug_print('FOUND DECL: %s' % name)
+ block = {}
+ block['name'] = name
+ block['type'] = blocktype
+ block['contents'] = [cur]
+ block['parent'] = code
+ code['contents'].append(block)
+
+ return loc
+
+
+# Assignments.
+def parse_assign(name, cur, op, loc, code):
+ debug_print('FOUND ASSIGN: %s' % name)
+ # Lap up everything up to semicolon.
+ while ';' not in cur and loc < len(op):
+ cur = op[loc]
+ loc = loc + 1
+
+ block = {}
+ block['name'] = name
+ block['type'] = block_type.assign
+ block['contents'] = [cur]
+ block['parent'] = code
+ code['contents'].append(block)
+
+ return loc
+
+
+# Structs or unions.
+def parse_composite(name, cur, op, loc, code):
+ if not name:
+ name = '<anonymous>'
+
+ # Lap up all of the struct definition.
+ (cur, loc) = fast_forward_scope(cur, op, loc)
+
+ block = {}
+ block['name'] = name
+ block['type'] = block_type.composite
+ block['contents'] = [cur]
+ block['parent'] = code
+ code['contents'].append(block)
+
+ return loc
+
+
+# Parse a function. NAME is the function name.
+def parse_func(name, cur, op, loc, code):
+ debug_print('FOUND FUNC: %s' % name)
+
+ # Consume everything up to the ending brace of the function.
+ (cur, loc) = fast_forward_scope(cur, op, loc)
+
+ block = {}
+ block['name'] = name
+ block['type'] = block_type.func
+ block['contents'] = [cur]
+ block['parent'] = code
+ code['contents'].append(block)
+
+ return loc
+
+
+# Parse a function. NAME is the function name.
+def parse_macrocall(name, cur, op, loc, code):
+ debug_print('FOUND MACROCALL: %s' % name)
+
+ block = {}
+ block['name'] = name
+ block['type'] = block_type.macrocall
+ block['contents'] = [cur]
+ block['parent'] = code
+ code['contents'].append(block)
+
+ return loc
+
+
+def parse_c_expr(cur, op, loc, code, start):
+ debug_print('PARSING: %s' % cur)
+
+ ATTRIBUTE = \
+ r'((_*(attribute|ATTRIBUTE)_*(\s*\(\([^)]+\)\)|\w+))|weak_function)';
+ #ARGLIST = r'[\w\s\*]+' + ATTRIBUTE + '?,?\s*'
+ ARGLIST = r'(\w+[\s\*]+\w+' + ATTRIBUTE + '?,?\s*)|void'
+
+ # Regular expressions.
+ #
+ # Function or a macro call that doesn't need a semicolon: foo (args, ...)
+ # We later distinguish between the two by peeking into the next line.
+ func_re = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\((' + ARGLIST + ')*\)\s*{')
+ macrocall_re = re.compile(r'(\w+)\s*\(\w+(\s*,\s*[\w\.]+)*\)$')
+ # Composite types such as structs and unions.
+ composite_re = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
+ # Static assignments.
+ assign_re = re.compile(r'(\w+)\s*(\[\])?\s*([^\s]*attribute[\s\w()]+)?\s*=')
+ # Function Declarations. FIXME BROKEN
+ fndecl_re = re.compile(r'(\w+)\s*\([^;]+\)\s*' + ATTRIBUTE + '*;')
+ # Function pointer typedefs.
+ typedef_fn_re = re.compile(r'\(\*(\w+)\)\s*\([^)]+\);')
+ # Simple decls.
+ decl_re = re.compile(r'(\w+)(\[\w+\])?\s*' + ATTRIBUTE + '?;')
+
+ # Composite type declarations.
+ found = re.search(composite_re, cur)
+ if found:
+ return found, parse_composite(found.group(2), cur, op, loc, code)
+
+ # Assignments. This should cover struct and array assignments too.
+ found = re.search(assign_re, cur)
+ if found:
+ return found, parse_assign(found.group(1), cur, op, loc, code)
+
+ # Typedefs.
+ found = re.search(typedef_fn_re, cur)
+ if found:
+ return found, parse_decl(found.group(1), cur, op, loc, code,
+ block_type.decl)
+
+ # Function declarations are pretty straightforward compared to function
+ # definitions, which have to account for any __attribute__ annotations
+ # for its arguments. With declarations, we just match the last closing
+ # bracket and the semicolon following it.
+ found = re.search(fndecl_re, cur)
+ if found:
+ return found, parse_decl(found.group(1), cur, op, loc, code,
+ block_type.fndecl)
+
+ # Functions or macro calls that don't end with a semicolon.
+ found = re.search(func_re, cur)
+ if found:
+ return found, parse_func(found.group(5), cur, op, loc, code)
+
+ # Functions or macro calls that don't end with a semicolon. We need to peek
+ # ahead to make sure that we don't mis-identify a function. This happens
+ # only with functions that take no arguments.
+ found = re.search(macrocall_re, cur)
+ if found and (loc >= len(op) or '{' not in op[loc]):
+ return found, parse_macrocall(found.group(1), cur, op, loc, code)
+
+ # Finally, all declarations.
+ found = re.search(decl_re, cur)
+ if found:
+ return found, parse_decl(found.group(1), cur, op, loc, code,
+ block_type.decl)
+
+ return found, loc
+
+
+# Parse the file line by line. The function assumes a mostly GNU coding
+# standard compliant input so it might barf with anything that is eligible for
+# the Obfuscated C code contest.
+#
+# The basic idea of the parser is to identify macro conditional scopes and
+# definitions, includes, etc. and then parse the remaining C code in the context
+# of those macro scopes. The parser does not try to understand the semantics of
+# the code or even validate its syntax. It only records high level symbols in
+# the source and makes a tree structure to indicate the declaration/definition
+# of those symbols and their scope in the macro definitions.
+#
+# LOC is the first unparsed line.
+def parse(op, loc, code, start = ''):
+ cur = start
+ endblock = False
+
+ while loc < len(op):
+ nextline = op[loc].strip()
+
+ if not nextline:
+ loc = loc + 1
+ continue
+
+ # Macros.
+ if nextline[0] == '#':
+ (loc, endblock) = parse_macro(op, loc, code, cur, start)
+ if endblock and not cur:
+ return loc
+ # Rest of C Code.
+ else:
+ cur = cur + ' ' + nextline
+ found, loc = parse_c_expr(cur, op, loc + 1, code, cur)
+ if found:
+ cur = ''
+ if endblock:
+ return loc
+
+ return loc
+
+
+def print_tree(tree, indent):
+ if tree['type'] == block_type.macro_cond or tree['type'] == block_type.file:
+ print('%sScope: %s' % (' ' * indent, tree['name']))
+ for c in tree['contents']:
+ print_tree(c, indent + 4)
+ print('%sEndScope: %s' % (' ' * indent, tree['name']))
+ else:
+ if tree['type'] == block_type.func:
+ print('%sFUNC: %s' % (' ' * indent, tree['name']))
+ elif tree['type'] == block_type.composite:
+ print('%sCOMPOSITE: %s' % (' ' * indent, tree['name']))
+ elif tree['type'] == block_type.assign:
+ print('%sASSIGN: %s' % (' ' * indent, tree['name']))
+ elif tree['type'] == block_type.fndecl:
+ print('%sFNDECL: %s' % (' ' * indent, tree['name']))
+ elif tree['type'] == block_type.decl:
+ print('%sDECL: %s' % (' ' * indent, tree['name']))
+ elif tree['type'] == block_type.macrocall:
+ print('%sMACROCALL: %s' % (' ' * indent, tree['name']))
+ elif tree['type'] == block_type.macro_def:
+ print('%sDEFINE: %s' % (' ' * indent, tree['name']))
+ elif tree['type'] == block_type.macro_include:
+ print('%sINCLUDE: %s' % (' ' * indent, tree['name']))
+ elif tree['type'] == block_type.macro_undef:
+ print('%sUNDEF: %s' % (' ' * indent, tree['name']))
+ else:
+ print('%sMACRO LEAF: %s' % (' ' * indent, tree['name']))
+
+#------------------------------------------------------------------------------
+
+
+def debug_print(*args, **kwargs):
+ if debug:
+ print(*args, file=sys.stderr, **kwargs)
+
+def eprint(*args, **kwargs):
+ print(*args, file=sys.stderr, **kwargs)
+
+def usage(name):
+ eprint("usage: %s <from-ref> <to-ref>" % name)
+ sys.exit(os.EX_USAGE)
+
+def decode(string):
+ codecs = ['utf8', 'latin1', 'cp1252']
+
+ for i in codecs:
+ try:
+ return string.decode(i)
+ except UnicodeDecodeError:
+ pass
+
+ eprint('Failed to decode: %s' % string)
+
+def cleaned(ip):
+ # Returns the output from a command after cleaning it up, i.e. removing
+ # trailing spaces, newlines and dropping blank lines.
+ op = list(filter(None, [decode(x[:-1]).strip() for x in ip]))
+ return op
def exec_git_cmd(args):
args.insert(0, 'git')
+ print(args)
proc = subprocess.Popen(args, stdout=subprocess.PIPE)
- # Trim the trailing newline and return the list.
- return [x[:-1] for x in list(proc.stdout)]
+ return cleaned(list(proc.stdout))
def list_commits(revs):
ref = revs[0] + '..' + revs[1]
return exec_git_cmd(['log', '--pretty=%H', ref])
+def analyze_diff(oldfile, newfile, filename):
+ # Ignore non-C files.
+ if filename.find('.c') < 0 and filename.find('.h') < 0:
+ return
+
+ print('\t<List diff between oldfile and newfile>')
+
+ left = parse_output(exec_git_cmd(['show', oldfile]))
+ right = parse_output(exec_git_cmd(['show', newfile]))
+
+ print('LEFT TREE')
+ print('-' * 80)
+ print_tree(left, 0)
+ print('RIGHT TREE')
+ print('-' * 80)
+ print_tree(right, 0)
+
+
+def parse_output(op):
+ tree = {}
+ tree['name'] = ''
+ tree['type'] = block_type.file
+ tree['contents'] = []
+ tree['parent'] = None
+ #op = preprocess(op, right)
+ op = remove_comments(op)
+ op = parse(op, 0, tree)
+
+ return tree
+
def list_changes(commit):
- op = exec_git_cmd(['show', '--raw', commit])
+ op = exec_git_cmd(['show', '--date=short', '--raw', commit])
+ author = ''
+ date = ''
+ merge = False
+
+ for l in op:
+ if l.find('Author:') == 0:
+ tmp=l[7:].split('<')
+ authorname = tmp[0].strip()
+ authoremail=tmp[1][:-1].strip()
+ elif l.find('Date:') == 0:
+ date=l[5:].strip()
+ elif l.find('Merge:') == 0:
+ merge = True
+
+ # We got Author and Date, so don't bother with the remaining output.
+ if author != '' and date != '':
+ break
# Find raw commit information for all non-ChangeLog files.
op = [x[1:] for x in op
if len(x) > 0 and x[0] == ':' and x.find('ChangeLog') == -1]
- if (len(op) > 0):
- print("COMMIT: %s" % commit)
- for f in op:
- data = f.split()
- print("\tFile: %s: %s" % (data[4], data[5]))
- if len(data) > 6:
- print('RENAMED: %s' % data[6])
+ # It was only the ChangeLog, ignore.
+ if len(op) == 0:
+ return
+
+ print('%s %s <%s>\n' % (date, authorname, authoremail))
+
+ if merge:
+ print('\t MERGE COMMIT: %s\n' % commit)
+ return
+
+ print('\tCOMMIT: %s' % commit)
+
+ # Each of these lines has a space separated format like so:
+ # :<OLD MODE> <NEW MODE> <OLD REF> <NEW REF> <OPERATION> <FILE1> <FILE2>
+ #
+ # where OPERATION can be one of the following:
+ # A: File added
+ # D: File removed
+ # M: File modified
+ # R[0-9]{3}: File renamed, with the 3 digit number following it indicating
+ # what percentage of the file is intact.
+ #
+ # FILE2 is set only when OPERATION is R, to indicate the new file name.
+ #
+ # Also note that merge commits have a different format here, with three
+ # entries each for the modes and refs, but we don't bother with it for now.
+ for f in op:
+ data = f.split()
+ if data[4] == 'A':
+ print('\t* %s: New file.' % data[5])
+ elif data[4] == 'D':
+ print('\t* %s: Delete file.' % data[5])
+ elif data[4] == 'M':
+ print('\t* %s: Modified.' % data[5])
+ analyze_diff(data[2], data[3], data[5])
+ elif data[4][0] == 'R':
+ change = int(data[4][1:])
+ print('\t* %s: Move to...' % data[5])
+ print('\t* %s: ... here.' % data[6])
+ if change < 100:
+ analyze_diff(data[2], data[3], data[6])
+ else:
+ eprint('%s: Unknown line format %s' % (commit, data[4]))
+ sys.exit(42)
+
+ print('')
def main(revs):
@@ -37,8 +572,20 @@ def main(revs):
list_changes(commit)
+def parser_file_test(f):
+ with open(f) as srcfile:
+ op = srcfile.readlines()
+ op = [x[:-1] for x in op]
+ tree = parse_output(op)
+ print_tree(tree, 0)
+
+
if __name__ == '__main__':
if len(sys.argv) != 3:
usage(sys.argv[0])
- main(sys.argv[1:])
+ if sys.argv[1] == '-t':
+ debug = True
+ parser_file_test(sys.argv[2])
+ else:
+ main(sys.argv[1:])
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=cc659aac502852a1ce2a17bdd3e7e98c1f2b36d7
commit cc659aac502852a1ce2a17bdd3e7e98c1f2b36d7
Author: Siddhesh Poyarekar <siddhesh@linaro.org>
Date: Fri Oct 5 15:28:35 2018 +0530
add script file
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py
new file mode 100755
index 0000000..caebba4
--- /dev/null
+++ b/scripts/gen-changed-entities.py
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+import subprocess
+import sys
+
+def exec_git_cmd(args):
+ args.insert(0, 'git')
+ proc = subprocess.Popen(args, stdout=subprocess.PIPE)
+
+ # Trim the trailing newline and return the list.
+ return [x[:-1] for x in list(proc.stdout)]
+
+
+def list_commits(revs):
+ ref = revs[0] + '..' + revs[1]
+ return exec_git_cmd(['log', '--pretty=%H', ref])
+
+
+def list_changes(commit):
+ op = exec_git_cmd(['show', '--raw', commit])
+
+ # Find raw commit information for all non-ChangeLog files.
+ op = [x[1:] for x in op
+ if len(x) > 0 and x[0] == ':' and x.find('ChangeLog') == -1]
+
+ if (len(op) > 0):
+ print("COMMIT: %s" % commit)
+ for f in op:
+ data = f.split()
+ print("\tFile: %s: %s" % (data[4], data[5]))
+ if len(data) > 6:
+ print('RENAMED: %s' % data[6])
+
+
+def main(revs):
+ commits = list_commits(revs)
+ for commit in commits:
+ list_changes(commit)
+
+
+if __name__ == '__main__':
+ if len(sys.argv) != 3:
+ usage(sys.argv[0])
+
+ main(sys.argv[1:])
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources