[PATCH] New module to import and process bench.out
Siddhesh Poyarekar
siddhesh@redhat.com
Thu Jun 12 11:35:00 GMT 2014
Hi,
This is the beginning of a module to import and process benchmark
outputs. Currently this is tied to the bench.out format, but in
future this needs to be generalized.
The module currently supports importing of a bench.out and validating
it against a schema file. I have also added a function that
compresses detailed timings by grouping them into their means based on
how close they are to each other.
The idea here is to have a set of routines that benchmark consumers
may find useful to build their own analysis tools. I have altered
validate_bench to use this module too.
Siddhesh
* benchtests/scripts/import_bench.py: New file.
* benchtests/scripts/validate_benchout.py: Import import_bench
instead of jsonschema.
(validate_bench): Remove function.
(main): Use import_bench.
diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py
new file mode 100755
index 0000000..ffcb775
--- /dev/null
+++ b/benchtests/scripts/import_bench.py
@@ -0,0 +1,141 @@
+#!/usr/bin/python
+# Copyright (C) 2014 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+"""Functions to import benchmark data and process it"""
+
+import json
+try:
+ import jsonschema as validator
+except ImportError:
+ print('Could not find jsonschema module.')
+ raise
+
+
+def mean(lst):
+ """Compute and return mean of numbers in a list
+
+ The pypy average function has horrible performance, so implement our
+ own mean function.
+
+ Args:
+ lst: The list of numbers to average.
+ Return:
+ The mean of members in the list.
+ """
+ return sum(lst) / len(lst)
+
+
+def split_list(bench, func, var):
+ """ Split the list into a smaller set of more distinct points
+
+ Group together points such that the difference between the smallest
+ point and the mean is less than 1/3rd of the mean. This means that
+ the mean is at most 1.5x the smallest member of that group.
+
+ mean - xmin < mean / 3
+ i.e. 2 * mean / 3 < xmin
+ i.e. mean < 3 * xmin / 2
+
+ For an evenly distributed group, the largest member will be less than
+ twice the smallest member of the group.
+ Derivation:
+
+ An evenly distributed series would be xmin, xmin + d, xmin + 2d...
+
+ mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
+ and max element is xmin + (n - 1) * d
+
+ Now, mean < 3 * xmin / 2
+
+ 3 * xmin > 2 * mean
+ 3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
+ 3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
+ n * xmin > n * (n - 1) * d
+ xmin > (n - 1) * d
+ 2 * xmin > xmin + (n-1) * d
+ 2 * xmin > xmax
+
+ Hence, proved.
+
+ Similarly, it is trivial to prove that for a similar aggregation by using
+ the maximum element, the maximum element in the group must be at most 4/3
+ times the mean.
+
+ Args:
+ bench: The benchmark object
+ func: The function name
+ var: The function variant name
+ """
+ means = []
+ lst = bench['functions'][func][var]['timings']
+ last = len(lst) - 1
+ while lst:
+ for i in range(last + 1):
+ avg = mean(lst[i:])
+ if avg > 0.75 * lst[last]:
+ means.insert(0, avg)
+ lst = lst[:i]
+ last = i - 1
+ break
+ bench['functions'][func][var]['timings'] = means
+
+
+def do_for_all_timings(bench, callback):
+ """Call a function for all timing objects for each function and its
+ variants.
+
+ Args:
+ bench: The benchmark object
+ callback: The callback function
+ """
+ for func in bench['functions'].keys():
+ for k in bench['functions'][func].keys():
+ if 'timings' not in bench['functions'][func][k].keys():
+ continue
+
+ callback(bench, func, k)
+
+
+def compress_timings(points):
+ """Club points with close enough values into a single mean value
+
+ See split_list for details on how the clubbing is done.
+
+ Args:
+ points: The set of points.
+ """
+ do_for_all_timings(points, split_list)
+
+
+def parse_bench(filename, schema_filename):
+ """Parse the input file
+
+ Parse and validate the json file containing the benchmark outputs. Return
+ the resulting object.
+ Args:
+ filename: Name of the benchmark output file.
+ Return:
+ The bench dictionary.
+ """
+ with open(schema_filename, 'r') as schemafile:
+ schema = json.load(schemafile)
+ with open(filename, 'r') as benchfile:
+ bench = json.load(benchfile)
+ validator.validate(bench, schema)
+ do_for_all_timings(bench, lambda b, f, v:
+ b['functions'][f][v]['timings'].sort())
+ return bench
diff --git a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py
index 61a8cbd..9d3a5cb 100755
--- a/benchtests/scripts/validate_benchout.py
+++ b/benchtests/scripts/validate_benchout.py
@@ -27,37 +27,26 @@ import sys
import os
try:
- import jsonschema
+ import import_bench as bench
except ImportError:
- print('Could not find jsonschema module. Output not validated.')
+ print('Import Error: Output will not be validated.')
# Return success because we don't want the bench target to fail just
# because the jsonschema module was not found.
sys.exit(os.EX_OK)
-def validate_bench(benchfile, schemafile):
- """Validate benchmark file
-
- Validate a benchmark output file against a JSON schema.
+def print_and_exit(message, exitcode):
+ """Prints message to stderr and returns the exit code.
Args:
- benchfile: The file name of the bench.out file.
- schemafile: The file name of the JSON schema file to validate
- bench.out against.
+ message: The message to print
+ exitcode: The exit code to return
- Exceptions:
- jsonschema.ValidationError: When bench.out is not valid
- jsonschema.SchemaError: When the JSON schema is not valid
- IOError: If any of the files are not found.
+ Returns:
+ The passed exit code
"""
- with open(benchfile, 'r') as bfile:
- with open(schemafile, 'r') as sfile:
- bench = json.load(bfile)
- schema = json.load(sfile)
- jsonschema.validate(bench, schema)
-
- # If we reach here, we're all good.
- print("Benchmark output in %s is valid." % benchfile)
+ print(message, file=sys.stderr)
+ return exitcode
def main(args):
@@ -73,11 +62,23 @@ def main(args):
Exceptions thrown by validate_bench
"""
if len(args) != 2:
- print("Usage: %s <bench.out file> <bench.out schema>" % sys.argv[0],
- file=sys.stderr)
- return os.EX_USAGE
+ return print_and_exit("Usage: %s <bench.out file> <bench.out schema>"
+ % sys.argv[0], os.EX_USAGE)
+
+ try:
+ bench.parse_bench(args[0], args[1])
+ except IOError as e:
+ return print_and_exit("IOError(%d): %s" % (e.errno, e.strerror),
+ os.EX_OSFILE)
+
+ except bench.validator.ValidationError as e:
+ return print_and_exit("Invalid benchmark output: %s" % e.message,
+ os.EX_DATAERR)
+
+ except bench.validator.SchemaError as e:
+ return print_and_exit("Invalid schema: %s" % e.message, os.EX_DATAERR)
- validate_bench(args[0], args[1])
+ print("Benchmark output in %s is valid." % args[0])
return os.EX_OK
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <http://sourceware.org/pipermail/libc-alpha/attachments/20140612/42b71007/attachment.sig>
More information about the Libc-alpha
mailing list