From: Serhei Makarov Date: Tue, 9 Aug 2022 16:49:50 +0000 (-0400) Subject: WIP: benchmarking grid construction X-Git-Tag: cauldron2022-demo~30^2~3 X-Git-Url: https://sourceware.org/git/?a=commitdiff_plain;h=fee007ec7675bb1d06a30544ee8d82720df2a2e3;p=bunsen.git WIP: benchmarking grid construction With judicious use of the slice_testcases feature I can build release-4.6..now in about 1:45 on my laptop. Previously I used a beefy server with a big memory, so this could be interpreted as an improvement. --- diff --git a/bin/R-show-testcases b/bin/R-show-testcases index 1c031e4..da4a1f8 100755 --- a/bin/R-show-testcases +++ b/bin/R-show-testcases @@ -5,6 +5,8 @@ # I'm using capital 'R-' to indicate a bulky batch-mode script with these limitations. # Based on a script by Martin Cermak. +# TODOXXX check terminology expname -> expfile + import argparse import sqlite3 import logging @@ -44,7 +46,7 @@ def pick_default_branch(source_repo): return default_cand return 'master/main/trunk' # XXX let it produce an error -# Things that should be a library (2): far-too-grody SQL queries. +# Things that should be a library (2): far-too-grody SQL queries and nano-ORM. class Testrun: def __init__(self, db, row): @@ -70,12 +72,51 @@ class Testrun: rep += " " + str(self.attr) return rep +def get_summary(testrun, summary_fields=None, exclude_version=True): + """Create a dict summarizing the configuration for a testrun. + + Used to match testruns with similar configurations across different source commits.""" + if summary_fields is None: + summary_fields = {'architecture', 'target_board', 'distro', 'selinux'} + excluded = {'authored_day', 'authored_month', 'authored_year', 'host', 'kernel', 'snapshot', 'source.gitname'} # XXX TODO: Generalize across other projects besides SystemTap. + d = {} + for field in summary_fields: + if exclude_version and field in excluded: + continue + if field not in testrun.attr: + d[field] = 'null' + continue + d[field] = testrun.attr[field] + return d + +def get_tc_key(tc, strip_result=False): + """Create a string key 'name+subtest+result+baseline_result' for consistent use in dicts.""" + key = '' + key += tc.expfile + key += '+' + tc.subtest + if not strip_result: + key += '+' + tc.result + if tc.baseline_result is not None: + key += '+' + tc.baseline_result + return key + +def get_summary_key(summary): + """Convert a testrun summary dict to a string key for consistent use in dicts.""" + # XXX Use sorted() as a hack to avoid worrying about stable dict iteration order. + return str(sorted(summary.items())) + +#dejagnu_fail_outcomes = {'FAIL','KFAIL','XFAIL','UNTESTED','UNSUPPORTED','ERROR'} +dejagnu_fail_outcomes = {'FAIL','KFAIL','XFAIL','ERROR'} +# <- Most likely PASS->UNTESTED is not interesting, FAIL->UNTESTED is. +dejagnu_untested_outcomes = {'UNTESTED','UNSUPPORTED'} + class Result: def __init__(self, testrun, row): self._testrun = testrun self.expfile = row['expfile'] self.subtest = row['subtest'] - self.result = row['result'] + self.result = row['result'] # TODOXXX was outcome + self.baseline_result = None # XXX hypothetical, for diffing def get_all_testruns(db): # XXX Probably needs options to query slightly less than everything. @@ -144,7 +185,7 @@ def get_testruns_expnames(db, testrun_ids, expfile_like=None): return results # XXX covers [expname_start, expname_end) -def get_testrun_results(testrun, ts_id, ts_sumfile, expname_start=None, expname_end=None): +def get_testrun_results(testrun, ts_id, ts_sumfile, expname_start=None, expname_end=None, expfile_like=None): s_selectclause = "select tc.expfile, tc.subtest, tc.result" s_fromclause = "from dejagnu_testcase_v tc" s_whereclause = "where tc.testsuite = ?" @@ -155,6 +196,9 @@ def get_testrun_results(testrun, ts_id, ts_sumfile, expname_start=None, expname_ if expname_end is not None: s_whereclause += " and tc.expfile < ?" s_args += [expname_end] + if expfile_like is not None: + s_whereclause += " and tc.expfile like ?" + s_args += [expfile_like] sql = "%s %s %s" % (s_selectclause, s_fromclause, s_whereclause) testrun._db.row_factory = sqlite3.Row results = [Result(testrun, row) for row in testrun._db.execute(sql, s_args)] @@ -166,7 +210,7 @@ def get_full_testrun(testrun): return get_sliced_testrun(testrun, None, None) # Populate testrun with individual testcase rows within [expname_start, expname_end): -def get_sliced_testrun(testrun, expname_start, expname_end): +def get_sliced_testrun(testrun, expname_start, expname_end, expfile_like=None): s_selectclause = "select distinct ts.id, ts.sumfile" s_fromclause = "from dejagnu_testsuite ts" s_whereclause = "where ts.tr = ?" @@ -176,7 +220,7 @@ def get_sliced_testrun(testrun, expname_start, expname_end): ts_rows = testrun._db.execute(sql, s_args) testrun.testcases = [] for ts_row in ts_rows: - testrun.testcases += get_testrun_results(testrun, ts_row['id'], ts_row['sumfile'], expname_start=expname_start, expname_end=expname_end) + testrun.testcases += get_testrun_results(testrun, ts_row['id'], ts_row['sumfile'], expname_start=expname_start, expname_end=expname_end, expfile_like=expfile_like) #print("GOT",len(testrun.testcases),"rows for testrun",testrun.to_str()) return testrun @@ -435,9 +479,10 @@ class Timecube: self.unchanged_max_fails = {} # testcase_name -> max # of fails seen self.unchanged_n_configs = {} # testcase_name -> # of configurations seen - self._uninitialized = True # TODOXXX was self._empty + self._uninitialized = True - def _clear_results(self): + # XXX save memory in-between processing slices + def clear_results(self): self._uninitialized = False # Data structures that are emptied repeatedly if we're processing slices of expnames to save memory. @@ -446,16 +491,33 @@ class Timecube: self.configurations = {} # summary_key -> configuration_summary dict, computed by get_summary() # grid_key :: string ID of a grid cell, "testcase_name+summary_key+hexsha" - # tc_key :: string ID of a testcase, "expname+outcome+subtest" - self.outcomes_grid = {} # grid_key -> outcome {PASS,FAIL} only + # tc_key :: string ID of a testcase, "expname+result+subtest" + self.results_grid = {} # grid_key -> outcome {PASS,FAIL} only self.subtests_grid = defaultdict(lambda:[]) # testcase_name+summary_key+hexsha -> list of TestcaseRef - # TODOXXX: self._subtests_grid1 = defaultdict(lambda:set()) # testcase_name+summary_key+hexsha -> set of tc_key, computed by get_tc_key() + self._subtests_grid1 = defaultdict(lambda:set()) # testcase_name+summary_key+hexsha -> set of tc_key, computed by get_tc_key() # TODO: additional tables for differential scan of 'adjacent' results (skipping empty grid cells) #self.prev_tested = {} # grid_key -> grid_key for previous test results for this configuration #self.next_tested = {} # grid_key -> grid_key for next test results for this configuration #self.versions_grid = {} # grid_key -> version_id, commit_or_None (for finding distance between grid keys) + + def row_key(self, testcase, summary): + if type(testcase) is Result: + testcase = testcase.expfile + if type(summary) is dict: + summary = get_summary_key(summary) + return f'{testcase}+{summary}' + + def grid_key(self, testcase, summary, version): + if type(testcase) is Result: + testcase = testcase.expfile + if type(summary) is dict: + summary = get_summary_key(summary) + if type(version) is Version: + version = version.version_id + return f'{testcase}+{summary}+{version}' + def collect_expnames(self): testrun_ids = set() for v in self._versions.iter_versions(): @@ -465,51 +527,81 @@ class Timecube: self.expnames.sort() return self.expnames - # TODOXXX FIX BELOW - - def _scan_testrun(self, v, testrun, slice_start=None, slice_end=None): - summary = get_summary(testrun, summary_fields) # TODOXXX summary_fields - sk = get_summary_key(summary) # TODOXXX - - if type(slice_start) is int: - slice_start = self.expnames[slice_start] - if type(slice_end) is int and slice_end < len(self.expnames): - slice_end = self.expnames[slice_end] - - # TODO populate self.configurations + def _merge_outcome(self, gk, outcome): + global dejagnu_fail_outcomes + global dejagnu_untested_outcomes + if outcome in dejagnu_untested_outcomes: + return + if outcome in dejagnu_fail_outcomes: + self.results_grid[gk] = 'FAIL' + if gk not in self.results_grid: + self.results_grid[gk] = 'PASS' + + def _scan_testrun(self, v, testrun, expname_slice=None): + summary = get_summary(testrun) + sk = get_summary_key(summary) + + # expname_slice should already be expfile strings + slice_start = None if expname_slice is None else expname_slice[0] + slice_end = None if expname_slice is None else expname_slice[1] + + # populate self.configurations if sk not in self.configurations: self.configurations[sk] = summary - testrun = get_sliced_testrun(testrun, slice_start, slice_end) + testrun = get_sliced_testrun(testrun, slice_start, slice_end, + expfile_like=self._args.expfile_like) + tc_names = set() # XXX expnames for this testrun only for testcase in testrun.testcases: - # TODO filter testcase.name on args.expfile_like + tc_names.add(testcase.expfile) - # TODO populate self.expnames (if not slicing), self.testcase_configurations - tc_names.add(testcase.name) - if slice_start is not None and testcase.name not in self.expnames: + # populate self.expnames (if not slicing), self.testcase_configurations + if expname_slice is None: self.expnames.add(testcase.expfile) - self.testcase_configurations[testcase.expfile].add(sk) # TODOXXX use defaultdict - - # TODO populate self.outcomes_grid, self.subtests_grid1, self.subtests_grid - # TODO populate self.prev_tested, self.next_tested, self.versions_grid + self.testcase_configurations[testcase.expfile].add(sk) + + # populate self.results_grid, self._subtests_grid1, self.subtests_grid + gk = self.grid_key(testcase, sk, v) + tk = get_tc_key(testcase) # XXX should exclude baseline_result + self._merge_outcome(gk, testcase.result) # populates self.results_grid + self._subtests_grid1[gk].add(tk) + tc_ref = TestcaseRef() + tc_ref.testcase = testcase + tc_ref.testrun = testrun + tc_ref.tc_key = tk + tc_ref.summary_key = sk + self.subtests_grid[gk].append(tc_ref) + + # TODO: populate self.prev_tested, self.next_tested, self.versions_grid pass - # TODOXXX expname_slice is a 2-tuple - def iter_scan_versions(self, slice_start=None, slice_end=None): + def iter_scan_versions(self, expname_slice=None): if self._uninitialized: self.clear_results() - if not self.expnames and slice_start is not None: + if not self.expnames and expname_slice is not None: self.find_expnames() elif not self.expnames: self.expnames = set() # XXX populated by _scan_testrun() + # convert expname_slice indices to expfile strings + slice_start = None if expname_slice is None else expname_slice[0] + slice_end = None if expname_slice is None else expname_slice[1] + slice_ix1, slice_ix2 = None, None + if type(slice_start) is int: + slice_ix1 = slice_start + slice_start = self.expnames[slice_start] + if type(slice_end) is int and slice_end < len(self.expnames): + slice_ix2 = slice_end + slice_end = self.expnames[slice_end] + if slice_start is not None or slice_end is not None: + expname_slice = (slice_start,slice_end) + for v in self._versions.iter_versions(): if not v.testruns: self.untested_commits.add(v.version_id) for testrun in v.testruns: - #print("GOT",len(v.testruns),"testruns -> ",testrun.to_str()) - self._scan_testrun(v, testrun, slice_start=slice_start, slice_end=slice_end) + self._scan_testrun(v, testrun, expname_slice=expname_slice) testrun.testcases = [] # XXX free up memory for the next testrun XXX yield v, v.testruns @@ -517,9 +609,45 @@ class Timecube: self.expnames = list(self.expnames) self.expnames.sort() - # TODO: populate self.untested_expnames, self.unchanged_{expnames,max_fails,n_configs} - for testcase_name in self.expnames: # TODO in slice - pass + # populate self.untested_expnames, self.unchanged_{expnames,max_fails,n_configs} + testcase_state = {} # grid_key minus version -> # of fails expected for unchanged result + # XXX when results don't change, calculation of n_configs is simple + # however, a calculation on all expnames for ranking is more complex + expnames = self.expnames + if expname_slice is not None: + # TODO: need additional annoying code to support expname_slice strings :/ + assert(slice_ix1 is None or type(slice_ix1) is int) + assert(slice_ix2 is None or type(slice_ix2) is int) + if slice_start is not None: + expnames = expnames[slice_ix1:] + if slice_end is not None: + expnames = expnames[:slice_ix2] + for testcase_name in expnames: + is_unchanged, is_untested = True, True + failed_configs = set() + for sk in self.testcase_configurations[testcase_name]: + rowk = self.row_key(testcase_name, sk) # grid_key minus version + for v in self._versions.iter_versions(): + gk = self.grid_key(testcase_name, sk, v) + if gk not in self.results_grid: + continue # untested + is_untested = False + n_fails = 0 + if self.results_grid[gk] == 'FAIL' and gk in self._subtests_grid1: + n_fails = len(self._subtests_grid1[gk]) # TODOXXX check that only fails are added + if testcase_name not in self.unchanged_max_fails or \ + n_fails > self.unchanged_max_fails[testcase_name]: + self.unchanged_max_fails[testcase_name] = n_fails + failed_configs.add(sk) + if rowk not in testcase_state: + testcase_state[rowk] = n_fails + elif testcase_state[rowk] != n_fails: + is_unchanged = False + if is_unchanged: + self.unchanged_expnames.add(testcase_name) + self.unchanged_n_configs[testcase_name] = len(failed_configs) + if is_untested: + self.untested_expnames.add(testcase_name) def iter_versions(self, reverse=False): for v in self._versions.iter_versions(reverse): @@ -623,6 +751,11 @@ def main(): versions.get_index(v), len(versions), v.version_id, v.source_commit.hexsha[:7], v.source_commit.summary, len(v.testruns)) + else: + n_runs = 0 + for v in versions.iter_versions(): + n_runs += len(v.testruns) + logging.info("found %d versions, %d testruns in specified range", len(versions), n_runs) # (2a) Collect all expnames in the specified version range: global cube @@ -634,7 +767,6 @@ def main(): for expname in cube.expnames: n += 1 logging.info("expname%d/%d %s", n, len(cube.expnames), expname) - exit(1) slice_inc = args.slice_expnames nslices = math.ceil(len(cube.expnames)/slice_inc) if slice_inc else 1 @@ -642,7 +774,7 @@ def main(): # (2b) For each slice of expnames, collect test results in timecube: slice = (slice_inc*i, slice_inc*(i+1)) if slice_inc else None last_report, n = 0, 0 - for v, testruns in cube.iter_scan_versions(expname_slice=slice): # covers [slice[0], slice[1]) / TODOXXX was slice_start=,slice_end= + for v, testruns in cube.iter_scan_versions(expname_slice=slice): # covers [slice[0], slice[1]) n += 1 if last_report + 5 < time.time() or n == len(versions): last_report = time.time()