This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi, This is the beginning of a module to import and process benchmark outputs. Currently this is tied to the bench.out format, but in future this needs to be generalized. The module currently supports importing of a bench.out and validating it against a schema file. I have also added a function that compresses detailed timings by grouping them into their means based on how close they are to each other. The idea here is to have a set of routines that benchmark consumers may find useful to build their own analysis tools. I have altered validate_bench to use this module too. Siddhesh * benchtests/scripts/import_bench.py: New file. * benchtests/scripts/validate_benchout.py: Import import_bench instead of jsonschema. (validate_bench): Remove function. (main): Use import_bench. diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py new file mode 100755 index 0000000..ffcb775 --- /dev/null +++ b/benchtests/scripts/import_bench.py @@ -0,0 +1,141 @@ +#!/usr/bin/python +# Copyright (C) 2014 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. +"""Functions to import benchmark data and process it""" + +import json +try: + import jsonschema as validator +except ImportError: + print('Could not find jsonschema module.') + raise + + +def mean(lst): + """Compute and return mean of numbers in a list + + The pypy average function has horrible performance, so implement our + own mean function. + + Args: + lst: The list of numbers to average. + Return: + The mean of members in the list. + """ + return sum(lst) / len(lst) + + +def split_list(bench, func, var): + """ Split the list into a smaller set of more distinct points + + Group together points such that the difference between the smallest + point and the mean is less than 1/3rd of the mean. This means that + the mean is at most 1.5x the smallest member of that group. + + mean - xmin < mean / 3 + i.e. 2 * mean / 3 < xmin + i.e. mean < 3 * xmin / 2 + + For an evenly distributed group, the largest member will be less than + twice the smallest member of the group. + Derivation: + + An evenly distributed series would be xmin, xmin + d, xmin + 2d... + + mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n + and max element is xmin + (n - 1) * d + + Now, mean < 3 * xmin / 2 + + 3 * xmin > 2 * mean + 3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n + 3 * n * xmin > 2 * n * xmin + n * (n - 1) * d + n * xmin > n * (n - 1) * d + xmin > (n - 1) * d + 2 * xmin > xmin + (n-1) * d + 2 * xmin > xmax + + Hence, proved. + + Similarly, it is trivial to prove that for a similar aggregation by using + the maximum element, the maximum element in the group must be at most 4/3 + times the mean. + + Args: + bench: The benchmark object + func: The function name + var: The function variant name + """ + means = [] + lst = bench['functions'][func][var]['timings'] + last = len(lst) - 1 + while lst: + for i in range(last + 1): + avg = mean(lst[i:]) + if avg > 0.75 * lst[last]: + means.insert(0, avg) + lst = lst[:i] + last = i - 1 + break + bench['functions'][func][var]['timings'] = means + + +def do_for_all_timings(bench, callback): + """Call a function for all timing objects for each function and its + variants. + + Args: + bench: The benchmark object + callback: The callback function + """ + for func in bench['functions'].keys(): + for k in bench['functions'][func].keys(): + if 'timings' not in bench['functions'][func][k].keys(): + continue + + callback(bench, func, k) + + +def compress_timings(points): + """Club points with close enough values into a single mean value + + See split_list for details on how the clubbing is done. + + Args: + points: The set of points. + """ + do_for_all_timings(points, split_list) + + +def parse_bench(filename, schema_filename): + """Parse the input file + + Parse and validate the json file containing the benchmark outputs. Return + the resulting object. + Args: + filename: Name of the benchmark output file. + Return: + The bench dictionary. + """ + with open(schema_filename, 'r') as schemafile: + schema = json.load(schemafile) + with open(filename, 'r') as benchfile: + bench = json.load(benchfile) + validator.validate(bench, schema) + do_for_all_timings(bench, lambda b, f, v: + b['functions'][f][v]['timings'].sort()) + return bench diff --git a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py index 61a8cbd..9d3a5cb 100755 --- a/benchtests/scripts/validate_benchout.py +++ b/benchtests/scripts/validate_benchout.py @@ -27,37 +27,26 @@ import sys import os try: - import jsonschema + import import_bench as bench except ImportError: - print('Could not find jsonschema module. Output not validated.') + print('Import Error: Output will not be validated.') # Return success because we don't want the bench target to fail just # because the jsonschema module was not found. sys.exit(os.EX_OK) -def validate_bench(benchfile, schemafile): - """Validate benchmark file - - Validate a benchmark output file against a JSON schema. +def print_and_exit(message, exitcode): + """Prints message to stderr and returns the exit code. Args: - benchfile: The file name of the bench.out file. - schemafile: The file name of the JSON schema file to validate - bench.out against. + message: The message to print + exitcode: The exit code to return - Exceptions: - jsonschema.ValidationError: When bench.out is not valid - jsonschema.SchemaError: When the JSON schema is not valid - IOError: If any of the files are not found. + Returns: + The passed exit code """ - with open(benchfile, 'r') as bfile: - with open(schemafile, 'r') as sfile: - bench = json.load(bfile) - schema = json.load(sfile) - jsonschema.validate(bench, schema) - - # If we reach here, we're all good. - print("Benchmark output in %s is valid." % benchfile) + print(message, file=sys.stderr) + return exitcode def main(args): @@ -73,11 +62,23 @@ def main(args): Exceptions thrown by validate_bench """ if len(args) != 2: - print("Usage: %s <bench.out file> <bench.out schema>" % sys.argv[0], - file=sys.stderr) - return os.EX_USAGE + return print_and_exit("Usage: %s <bench.out file> <bench.out schema>" + % sys.argv[0], os.EX_USAGE) + + try: + bench.parse_bench(args[0], args[1]) + except IOError as e: + return print_and_exit("IOError(%d): %s" % (e.errno, e.strerror), + os.EX_OSFILE) + + except bench.validator.ValidationError as e: + return print_and_exit("Invalid benchmark output: %s" % e.message, + os.EX_DATAERR) + + except bench.validator.SchemaError as e: + return print_and_exit("Invalid schema: %s" % e.message, os.EX_DATAERR) - validate_bench(args[0], args[1]) + print("Benchmark output in %s is valid." % args[0]) return os.EX_OK
Attachment:
pgp_loSZpVT2S.pgp
Description: PGP signature
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |