This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH v2] Implement benchmark script in python
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: Mike Frysinger <vapier at gentoo dot org>
- Cc: libc-alpha at sourceware dot org, carlos at redhat dot com
- Date: Fri, 27 Dec 2013 20:34:45 +0530
- Subject: [PATCH v2] Implement benchmark script in python
- Authentication-results: sourceware.org; auth=none
- References: <20131206085334 dot GF14845 at spoyarek dot pnq dot redhat dot com> <201312190906 dot 12693 dot vapier at gentoo dot org>
Hi,
Following a detailed review by Mike in my initial iteration, I have
now come up with a full patch that does the following:
- Remove the perl benchmark script
- Add the new python benchmark script with changes Mike suggested and
some more
- Adjust the Makefile to use the python script
- Added a minimal pylintrc to limit line length to 79 characters and
add 'f' as a good name for a variable since it is a useful temporary
name to have for a file stream, much like i, j for iterators. I
have also verified that the code passes the pylint test with flying
colours ;)
- Mentioned additional python dependency in benchtests/README.
I have tested this script with python 2.7 as well as python3 and it
works well on both.
Siddhesh
* scripts/bench.pl: Remove file.
* scripts/bench.py: New benchmark script.
* benchtests/Makefile ($(objpfx)bench-%.c): Use it.
* benchtests/README: Mention python dependency.
* scripts/pylintrc: New file.
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 117228b..3d41274 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -127,5 +127,5 @@ $(objpfx)bench-%.c: %-inputs $(bench-deps)
{ if [ -n "$($*-INCLUDE)" ]; then \
cat $($*-INCLUDE); \
fi; \
- $(..)scripts/bench.pl $(patsubst %-inputs,%,$<); } > $@-tmp
+ $(..)scripts/bench.py $(patsubst %-inputs,%,$<); } > $@-tmp
mv -f $@-tmp $@
diff --git a/benchtests/README b/benchtests/README
index a5fd8da..2a940fa 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -8,7 +8,9 @@ basic performance properties of the function.
Running the benchmark:
=====================
-The benchmark can be executed by invoking make as follows:
+The benchmark needs python 2.7 or later in addition to the
+dependencies required to build the GNU C Library. One may run the
+benchmark by invoking make as follows:
$ make bench
diff --git a/scripts/bench.pl b/scripts/bench.pl
deleted file mode 100755
index 10f0ba4..0000000
--- a/scripts/bench.pl
+++ /dev/null
@@ -1,205 +0,0 @@
-#! /usr/bin/perl -w
-# Copyright (C) 2013 Free Software Foundation, Inc.
-# This file is part of the GNU C Library.
-
-# The GNU C Library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-
-# The GNU C Library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-
-# You should have received a copy of the GNU Lesser General Public
-# License along with the GNU C Library; if not, see
-# <http://www.gnu.org/licenses/>.
-
-
-use strict;
-use warnings;
-# Generate a benchmark source file for a given input.
-
-if (@ARGV < 1) {
- die "Usage: bench.pl <function>"
-}
-
-my $func = $ARGV[0];
-my @args;
-my $ret = "void";
-my $getret = "";
-
-# We create a hash of inputs for each variant of the test.
-my $variant = "";
-my @curvals;
-my %vals;
-my @include_headers;
-my @include_sources;
-my $incl;
-
-open INPUTS, "<$func-inputs" or die $!;
-
-LINE:while (<INPUTS>) {
- chomp;
-
- # Directives.
- if (/^## ([\w-]+): (.*)/) {
- # Function argument types.
- if ($1 eq "args") {
- @args = split(":", $2);
- }
-
- # Function return type.
- elsif ($1 eq "ret") {
- $ret = $2;
- }
-
- elsif ($1 eq "includes") {
- @include_headers = split (",", $2);
- }
-
- elsif ($1 eq "include-sources") {
- @include_sources = split (",", $2);
- }
-
- # New variant. This is the only directive allowed in the body of the
- # inputs to separate inputs into variants. All others should be at the
- # top or else all hell will break loose.
- elsif ($1 eq "name") {
-
- # Save values in the previous variant.
- my @copy = @curvals;
- $vals{$variant} = \@copy;
-
- # Prepare for the next.
- $variant=$2;
- undef @curvals;
- next LINE;
- }
-
- else {
- die "Unknown directive: ".$1;
- }
- }
-
- # Skip over comments and blank lines.
- if (/^#/ || /^$/) {
- next LINE;
- }
- push (@curvals, $_);
-}
-
-
-my $bench_func = "#define CALL_BENCH_FUNC(v, i) $func (";
-
-# Output variables. These include the return value as well as any pointers
-# that may get passed into the function, denoted by the <> around the type.
-my $outvars = "";
-
-if ($ret ne "void") {
- $outvars = "static $ret volatile ret;\n";
-}
-
-# Print the definitions and macros.
-foreach $incl (@include_headers) {
- print "#include <" . $incl . ">\n";
-}
-
-# Print the source files.
-foreach $incl (@include_sources) {
- print "#include \"" . $incl . "\"\n";
-}
-
-if (@args > 0) {
- # Save values in the last variant.
- $vals{$variant} = \@curvals;
- my $struct =
- "struct _variants
- {
- const char *name;
- int count;
- struct args *in;
- };\n";
-
- my $arg_struct = "struct args {";
-
- my $num = 0;
- my $arg;
- foreach $arg (@args) {
- if ($num > 0) {
- $bench_func = "$bench_func,";
- }
-
- $_ = $arg;
- if (/<(.*)\*>/) {
- # Output variables. These have to be pointers, so dereference once by
- # dropping one *.
- $outvars = $outvars . "static $1 out$num;\n";
- $bench_func = "$bench_func &out$num";
- }
- else {
- $arg_struct = "$arg_struct $arg volatile arg$num;";
- $bench_func = "$bench_func variants[v].in[i].arg$num";
- }
-
- $num = $num + 1;
- }
-
- $arg_struct = $arg_struct . "};\n";
- $bench_func = $bench_func . ");\n";
-
- print $bench_func;
- print $arg_struct;
- print $struct;
-
- my $c = 0;
- my $key;
-
- # Print the input arrays.
- foreach $key (keys %vals) {
- my @arr = @{$vals{$key}};
-
- print "struct args in" . $c . "[" . @arr . "] = {\n";
- foreach (@arr) {
- print "{$_},\n";
- }
- print "};\n\n";
- $c += 1;
- }
-
- # The variants. Each variant then points to the appropriate input array we
- # defined above.
- print "struct _variants variants[" . (keys %vals) . "] = {\n";
- $c = 0;
- foreach $key (keys %vals) {
- print "{\"$func($key)\", " . @{$vals{$key}} . ", in$c},\n";
- $c += 1;
- }
- print "};\n\n";
- # Finally, print the last set of macros.
- print "#define NUM_VARIANTS $c\n";
- print "#define NUM_SAMPLES(i) (variants[i].count)\n";
- print "#define VARIANT(i) (variants[i].name)\n";
-}
-else {
- print $bench_func . ");\n";
- print "#define NUM_VARIANTS (1)\n";
- print "#define NUM_SAMPLES(v) (1)\n";
- print "#define VARIANT(v) FUNCNAME \"()\"\n"
-}
-
-# Print the output variable definitions.
-print "$outvars\n";
-
-# In some cases not storing a return value seems to result in the function call
-# being optimized out.
-if ($ret ne "void") {
- $getret = "ret = ";
-}
-
-# And we're done.
-print "#define BENCH_FUNC(i, j) ({$getret CALL_BENCH_FUNC (i, j);})\n";
-
-print "#define FUNCNAME \"$func\"\n";
-print "#include \"bench-skeleton.c\"\n";
diff --git a/scripts/bench.py b/scripts/bench.py
new file mode 100755
index 0000000..c393a25
--- /dev/null
+++ b/scripts/bench.py
@@ -0,0 +1,305 @@
+#!/usr/bin/python
+# Copyright (C) 2013 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+"""Benchmark program generator script
+
+This script takes a function name as input and generates a program using
+an input file located in the benchtests directory. The name of the
+input file should be of the form foo-inputs where 'foo' is the name of
+the function.
+"""
+
+from __future__ import print_function
+import sys
+import itertools
+
+# Macro definitions for functions that take no arguments. For functions
+# that take arguments, the STRUCT_TEMPLATE, ARGS_TEMPLATE and
+# VARIANTS_TEMPLATE are used instead.
+DEFINES_TEMPLATE = '''
+#define CALL_BENCH_FUNC(v, i) %(func)s ()
+#define NUM_VARIANTS (1)
+#define NUM_SAMPLES(v) (1)
+#define VARIANT(v) FUNCNAME "()"
+'''
+
+# Structures to store arguments for the function call. A function may
+# have its inputs partitioned to represent distinct performance
+# characteristics or distinct flavors of the function. Each such
+# variant is represented by the _VARIANT structure. The ARGS structure
+# represents a single set of arguments.
+STRUCT_TEMPLATE = '''
+#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)
+
+struct args
+{
+%(args)s
+};
+
+struct _variants
+{
+ const char *name;
+ int count;
+ struct args *in;
+};
+'''
+
+# The actual input arguments.
+ARGS_TEMPLATE = '''
+struct args in%(argnum)d[%(num_args)d] = {
+%(args)s
+};
+'''
+
+# The actual variants, along with macros defined to access the variants.
+VARIANTS_TEMPLATE = '''
+struct _variants variants[%(num_variants)d] = {
+%(variants)s
+};
+
+#define NUM_VARIANTS %(num_variants)d
+#define NUM_SAMPLES(i) (variants[i].count)
+#define VARIANT(i) (variants[i].name)
+'''
+
+# Epilogue for the generated source file.
+EPILOGUE = '''
+#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
+#define FUNCNAME "%(func)s"
+#include "bench-skeleton.c"
+'''
+
+
+def gen_source(func, directives, all_vals):
+ """Generate source for the function
+
+ Generate the C source for the function from the values and
+ directives.
+
+ Args:
+ func: The function name
+ directives: A dictionary of directives applicable to this function
+ all_vals: A dictionary input values
+
+ """
+ # The includes go in first.
+ for header in directives['includes']:
+ print('#include <%s>' % header)
+
+ for header in directives['include-sources']:
+ print('#include \"%s\"' % header)
+
+ # Print macros. This branches out to a separate routine if
+ # the function takes arguments.
+ if not directives['args']:
+ print(DEFINES_TEMPLATE % {'func': func})
+ outargs = []
+ else:
+ outargs = _print_arg_data(func, directives, all_vals)
+
+ # Print the output variable definitions if necessary.
+ for out in outargs:
+ print(out)
+
+ # If we have a return value from the function, make sure it is
+ # assigned to prevent the compiler from optimizing out the
+ # call.
+ if directives['ret']:
+ print('static %s volatile ret;' % directives['ret'])
+ getret = 'ret = '
+ else:
+ getret = ''
+
+ print(EPILOGUE % {'getret': getret, 'func': func})
+
+
+def _print_arg_data(func, directives, all_vals):
+ """Print argument data
+
+ This is a helper function for gen_source that prints structure and
+ values for arguments and their variants and returns output arguments
+ if any are found.
+
+ Args:
+ func: Function name
+ directives: A dictionary of directives applicable to this function
+ all_vals: A dictionary input values
+
+ Returns:
+ Returns a list of definitions for function arguments that act as
+ output parameters.
+
+ """
+ # First, all of the definitions. We process writing of
+ # CALL_BENCH_FUNC, struct args and also the output arguments
+ # together in a single traversal of the arguments list.
+ func_args = []
+ arg_struct = []
+ outargs = []
+
+ for arg, i in zip(directives['args'], itertools.count()):
+ if arg[0] == '<' and arg[-1] == '>':
+ pos = arg.rfind('*')
+ if pos == -1:
+ die('Output argument must be a pointer type')
+
+ outargs.append('static %s out%d;' % (arg[1:pos], i))
+ func_args.append(' &out%d' % i)
+ else:
+ arg_struct.append(' %s volatile arg%d;' % (arg, i))
+ func_args.append('variants[v].in[i].arg%d' % i)
+
+ print(STRUCT_TEMPLATE % {'args' : '\n'.join(arg_struct), 'func': func,
+ 'func_args': ', '.join(func_args)})
+
+ # Now print the values.
+ variants = []
+ for k, i in zip(all_vals.keys(), itertools.count()):
+ vals = all_vals[k]
+ out = [' {%s},' % v for v in vals]
+
+ # Members for the variants structure list that we will
+ # print later.
+ variants.append(' {\"%s(%s)\", %d, in%d},' % (func, k, len(vals), i))
+ print(ARGS_TEMPLATE % {'argnum': i, 'num_args': len(vals),
+ 'args': '\n'.join(out)})
+
+ # Print the variants and the last set of macros.
+ print(VARIANTS_TEMPLATE % {'num_variants': len(all_vals),
+ 'variants': '\n'.join(variants)})
+ return outargs
+
+
+def _process_directive(d_name, d_val):
+ """Process a directive.
+
+ Evaluate the directive name and value passed and return the
+ processed value. This is a helper function for parse_file.
+
+ Args:
+ d_name: Name of the directive
+ d_value: The string value to process
+
+ Returns:
+ The processed value, which may be the string as it is or an object
+ that describes the directive.
+
+ """
+ # Process the directive values if necessary. name and ret don't
+ # need any processing.
+ if d_name.startswith('include'):
+ d_val = d_val.split(',')
+ elif d_name == 'args':
+ d_val = d_val.split(':')
+
+ # Return the values.
+ return d_val
+
+
+def parse_file(func):
+ """Parse an input file
+
+ Given a function name, open and parse an input file for the function
+ and get the necessary parameters for the generated code and the list
+ of inputs.
+
+ Args:
+ func: The function name
+
+ Returns:
+ A tuple of two elements, one a dictionary of directives and the
+ other a dictionary of all input values.
+
+ """
+ all_vals = {}
+ # Valid directives.
+ directives = {'name': '',
+ 'args': [],
+ 'includes': [],
+ 'include-sources': [],
+ 'ret': ''}
+
+ try:
+ with open('%s-inputs' % func) as f:
+ for line in f:
+ line = line.rstrip()
+
+ # Look for directives and parse it if found.
+ if line.startswith('##'):
+ try:
+ d_name, d_val = line[2:].split(':', 1)
+ d_name = d_name.strip()
+ d_val = d_val.strip()
+ directives[d_name] = _process_directive(d_name, d_val)
+ except (IndexError, KeyError):
+ die('Invalid directive: %s' % line[2:])
+
+ # Skip blank lines and comments.
+ line = line.split('#', 1)[0].rstrip()
+ if not line:
+ continue
+
+ # Otherwise, we're an input. Add to the appropriate
+ # input set.
+ cur_name = directives['name']
+ all_vals.setdefault(cur_name, [])
+ all_vals[cur_name].append(line)
+ except IOError as ex:
+ die("Failed to open input file (%s): %s" % (ex.filename, ex.strerror))
+
+ return directives, all_vals
+
+
+def die(msg):
+ """Exit with an error
+
+ Prints an error message to the standard error stream and exits with
+ a non-zero status.
+
+ Args:
+ msg: The error message to print to standard error.
+
+ """
+ sys.stderr.write('%s\n' % msg)
+ sys.exit(1)
+
+
+def main(args):
+ """Main function
+
+ Use the first command line argument as function name and parse its
+ input file to generate C source that calls the function repeatedly
+ for the input.
+
+ Args:
+ args: The command line arguments with the program name dropped.
+
+ Returns:
+ 1 on error and 0 on success.
+
+ """
+ if len(args) != 1:
+ print('Usage: %s <function>' % sys.argv[0])
+ return 1
+
+ directives, all_vals = parse_file(args[0])
+ gen_source(args[0], directives, all_vals)
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))
diff --git a/scripts/pylintrc b/scripts/pylintrc
new file mode 100644
index 0000000..9098293
--- /dev/null
+++ b/scripts/pylintrc
@@ -0,0 +1,9 @@
+[BASIC]
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=f,i,j,k,ex,Run,_
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=79