From: Serhei Makarov <me@serhei.io>
Date: Tue, 9 Aug 2022 16:49:50 +0000 (-0400)
Subject: WIP: benchmarking grid construction
X-Git-Tag: cauldron2022-demo~30^2~3
X-Git-Url: https://sourceware.org/git/?a=commitdiff_plain;h=fee007ec7675bb1d06a30544ee8d82720df2a2e3;p=bunsen.git

WIP: benchmarking grid construction

With judicious use of the slice_testcases feature I can build
release-4.6..now in about 1:45 on my laptop. Previously I used
a beefy server with a big memory, so this could be interpreted
as an improvement.
---

diff --git a/bin/R-show-testcases b/bin/R-show-testcases
index 1c031e4..da4a1f8 100755
--- a/bin/R-show-testcases
+++ b/bin/R-show-testcases
@@ -5,6 +5,8 @@
 # I'm using capital 'R-' to indicate a bulky batch-mode script with these limitations.
 # Based on a script by Martin Cermak.
 
+# TODOXXX check terminology expname -> expfile
+
 import argparse
 import sqlite3
 import logging
@@ -44,7 +46,7 @@ def pick_default_branch(source_repo):
             return default_cand
     return 'master/main/trunk' # XXX let it produce an error
 
-# Things that should be a library (2): far-too-grody SQL queries.
+# Things that should be a library (2): far-too-grody SQL queries and nano-ORM.
 
 class Testrun:
     def __init__(self, db, row):
@@ -70,12 +72,51 @@ class Testrun:
             rep += " " + str(self.attr)
         return rep
 
+def get_summary(testrun, summary_fields=None, exclude_version=True):
+    """Create a dict summarizing the configuration for a testrun.
+
+    Used to match testruns with similar configurations across different source commits."""
+    if summary_fields is None:
+        summary_fields = {'architecture', 'target_board', 'distro', 'selinux'}
+    excluded = {'authored_day', 'authored_month', 'authored_year', 'host', 'kernel', 'snapshot', 'source.gitname'} # XXX TODO: Generalize across other projects besides SystemTap.
+    d = {}
+    for field in summary_fields:
+        if exclude_version and field in excluded:
+            continue
+        if field not in testrun.attr:
+            d[field] = 'null'
+            continue
+        d[field] = testrun.attr[field]
+    return d
+
+def get_tc_key(tc, strip_result=False):
+    """Create a string key 'name+subtest+result+baseline_result' for consistent use in dicts."""
+    key = ''
+    key += tc.expfile
+    key += '+' + tc.subtest
+    if not strip_result:
+        key += '+' + tc.result
+        if tc.baseline_result is not None:
+            key += '+' + tc.baseline_result
+    return key
+
+def get_summary_key(summary):
+    """Convert a testrun summary dict to a string key for consistent use in dicts."""
+    # XXX Use sorted() as a hack to avoid worrying about stable dict iteration order.
+    return str(sorted(summary.items()))
+
+#dejagnu_fail_outcomes = {'FAIL','KFAIL','XFAIL','UNTESTED','UNSUPPORTED','ERROR'}
+dejagnu_fail_outcomes = {'FAIL','KFAIL','XFAIL','ERROR'}
+# <- Most likely PASS->UNTESTED is not interesting, FAIL->UNTESTED is.
+dejagnu_untested_outcomes = {'UNTESTED','UNSUPPORTED'}
+
 class Result:
     def __init__(self, testrun, row):
         self._testrun = testrun
         self.expfile = row['expfile']
         self.subtest = row['subtest']
-        self.result = row['result']
+        self.result = row['result'] # TODOXXX was outcome
+        self.baseline_result = None # XXX hypothetical, for diffing
 
 def get_all_testruns(db):
     # XXX Probably needs options to query slightly less than everything.
@@ -144,7 +185,7 @@ def get_testruns_expnames(db, testrun_ids, expfile_like=None):
     return results
 
 # XXX covers [expname_start, expname_end)
-def get_testrun_results(testrun, ts_id, ts_sumfile, expname_start=None, expname_end=None):
+def get_testrun_results(testrun, ts_id, ts_sumfile, expname_start=None, expname_end=None, expfile_like=None):
     s_selectclause = "select tc.expfile, tc.subtest, tc.result"
     s_fromclause = "from dejagnu_testcase_v tc"
     s_whereclause = "where tc.testsuite = ?"
@@ -155,6 +196,9 @@ def get_testrun_results(testrun, ts_id, ts_sumfile, expname_start=None, expname_
     if expname_end is not None:
         s_whereclause += " and tc.expfile < ?"
         s_args += [expname_end]
+    if expfile_like is not None:
+        s_whereclause += " and tc.expfile like ?"
+        s_args += [expfile_like]
     sql = "%s %s %s" % (s_selectclause, s_fromclause, s_whereclause)
     testrun._db.row_factory = sqlite3.Row
     results = [Result(testrun, row) for row in testrun._db.execute(sql, s_args)]
@@ -166,7 +210,7 @@ def get_full_testrun(testrun):
     return get_sliced_testrun(testrun, None, None)
 
 # Populate testrun with individual testcase rows within [expname_start, expname_end):
-def get_sliced_testrun(testrun, expname_start, expname_end):
+def get_sliced_testrun(testrun, expname_start, expname_end, expfile_like=None):
     s_selectclause = "select distinct ts.id, ts.sumfile"
     s_fromclause = "from dejagnu_testsuite ts"
     s_whereclause = "where ts.tr = ?"
@@ -176,7 +220,7 @@ def get_sliced_testrun(testrun, expname_start, expname_end):
     ts_rows = testrun._db.execute(sql, s_args)
     testrun.testcases = []
     for ts_row in ts_rows:
-        testrun.testcases += get_testrun_results(testrun, ts_row['id'], ts_row['sumfile'], expname_start=expname_start, expname_end=expname_end)
+        testrun.testcases += get_testrun_results(testrun, ts_row['id'], ts_row['sumfile'], expname_start=expname_start, expname_end=expname_end, expfile_like=expfile_like)
         #print("GOT",len(testrun.testcases),"rows for testrun",testrun.to_str())
     return testrun
 
@@ -435,9 +479,10 @@ class Timecube:
         self.unchanged_max_fails = {} # testcase_name -> max # of fails seen
         self.unchanged_n_configs = {} # testcase_name -> # of configurations seen
 
-        self._uninitialized = True # TODOXXX was self._empty
+        self._uninitialized = True
 
-    def _clear_results(self):
+    # XXX save memory in-between processing slices
+    def clear_results(self):
         self._uninitialized = False
 
         # Data structures that are emptied repeatedly if we're processing slices of expnames to save memory.
@@ -446,16 +491,33 @@ class Timecube:
         self.configurations = {} # summary_key -> configuration_summary dict, computed by get_summary()
 
         # grid_key :: string ID of a grid cell, "testcase_name+summary_key+hexsha"
-        # tc_key :: string ID of a testcase, "expname+outcome+subtest"
-        self.outcomes_grid = {} # grid_key -> outcome {PASS,FAIL} only
+        # tc_key :: string ID of a testcase, "expname+result+subtest"
+        self.results_grid = {} # grid_key -> outcome {PASS,FAIL} only
         self.subtests_grid = defaultdict(lambda:[]) # testcase_name+summary_key+hexsha -> list of TestcaseRef
-        # TODOXXX: self._subtests_grid1 = defaultdict(lambda:set()) # testcase_name+summary_key+hexsha -> set of tc_key, computed by get_tc_key()
+        self._subtests_grid1 = defaultdict(lambda:set()) # testcase_name+summary_key+hexsha -> set of tc_key, computed by get_tc_key()
 
         # TODO: additional tables for differential scan of 'adjacent' results (skipping empty grid cells)
         #self.prev_tested = {} # grid_key -> grid_key for previous test results for this configuration
         #self.next_tested = {} # grid_key -> grid_key for next test results for this configuration
         #self.versions_grid = {} # grid_key -> version_id, commit_or_None (for finding distance between grid keys)
 
+
+    def row_key(self, testcase, summary):
+        if type(testcase) is Result:
+            testcase = testcase.expfile
+        if type(summary) is dict:
+            summary = get_summary_key(summary)
+        return f'{testcase}+{summary}'
+
+    def grid_key(self, testcase, summary, version):
+        if type(testcase) is Result:
+            testcase = testcase.expfile
+        if type(summary) is dict:
+            summary = get_summary_key(summary)
+        if type(version) is Version:
+            version = version.version_id
+        return f'{testcase}+{summary}+{version}'
+
     def collect_expnames(self):
         testrun_ids = set()
         for v in self._versions.iter_versions():
@@ -465,51 +527,81 @@ class Timecube:
         self.expnames.sort()
         return self.expnames
 
-    # TODOXXX FIX BELOW
-
-    def _scan_testrun(self, v, testrun, slice_start=None, slice_end=None):
-        summary = get_summary(testrun, summary_fields) # TODOXXX summary_fields
-        sk = get_summary_key(summary) # TODOXXX
-
-        if type(slice_start) is int:
-            slice_start = self.expnames[slice_start]
-        if type(slice_end) is int and slice_end < len(self.expnames):
-            slice_end = self.expnames[slice_end]
-
-        # TODO populate self.configurations
+    def _merge_outcome(self, gk, outcome):
+        global dejagnu_fail_outcomes
+        global dejagnu_untested_outcomes
+        if outcome in dejagnu_untested_outcomes:
+            return
+        if outcome in dejagnu_fail_outcomes:
+            self.results_grid[gk] = 'FAIL'
+        if gk not in self.results_grid:
+            self.results_grid[gk] = 'PASS'
+
+    def _scan_testrun(self, v, testrun, expname_slice=None):
+        summary = get_summary(testrun)
+        sk = get_summary_key(summary)
+
+        # expname_slice should already be expfile strings
+        slice_start = None if expname_slice is None else expname_slice[0]
+        slice_end = None if expname_slice is None else expname_slice[1]
+
+        # populate self.configurations
         if sk not in self.configurations:
             self.configurations[sk] = summary
 
-        testrun = get_sliced_testrun(testrun, slice_start, slice_end)
+        testrun = get_sliced_testrun(testrun, slice_start, slice_end,
+                                     expfile_like=self._args.expfile_like)
+        tc_names = set() # XXX expnames for this testrun only
         for testcase in testrun.testcases:
-            # TODO filter testcase.name on args.expfile_like
+            tc_names.add(testcase.expfile)
 
-            # TODO populate self.expnames (if not slicing), self.testcase_configurations
-            tc_names.add(testcase.name)
-            if slice_start is not None and testcase.name not in self.expnames:
+            # populate self.expnames (if not slicing), self.testcase_configurations
+            if expname_slice is None:
                 self.expnames.add(testcase.expfile)
-            self.testcase_configurations[testcase.expfile].add(sk) # TODOXXX use defaultdict
-
-            # TODO populate self.outcomes_grid, self.subtests_grid1, self.subtests_grid
-            # TODO populate self.prev_tested, self.next_tested, self.versions_grid
+            self.testcase_configurations[testcase.expfile].add(sk)
+
+            # populate self.results_grid, self._subtests_grid1, self.subtests_grid
+            gk = self.grid_key(testcase, sk, v)
+            tk = get_tc_key(testcase) # XXX should exclude baseline_result
+            self._merge_outcome(gk, testcase.result) # populates self.results_grid
+            self._subtests_grid1[gk].add(tk)
+            tc_ref = TestcaseRef()
+            tc_ref.testcase = testcase
+            tc_ref.testrun = testrun
+            tc_ref.tc_key = tk
+            tc_ref.summary_key = sk
+            self.subtests_grid[gk].append(tc_ref)
+
+        # TODO: populate self.prev_tested, self.next_tested, self.versions_grid
         pass
 
-    # TODOXXX expname_slice is a 2-tuple
-    def iter_scan_versions(self, slice_start=None, slice_end=None):
+    def iter_scan_versions(self, expname_slice=None):
         if self._uninitialized:
             self.clear_results()
 
-        if not self.expnames and slice_start is not None:
+        if not self.expnames and expname_slice is not None:
             self.find_expnames()
         elif not self.expnames:
             self.expnames = set() # XXX populated by _scan_testrun()
 
+        # convert expname_slice indices to expfile strings
+        slice_start = None if expname_slice is None else expname_slice[0]
+        slice_end = None if expname_slice is None else expname_slice[1]
+        slice_ix1, slice_ix2 = None, None
+        if type(slice_start) is int:
+            slice_ix1 = slice_start
+            slice_start = self.expnames[slice_start]
+        if type(slice_end) is int and slice_end < len(self.expnames):
+            slice_ix2 = slice_end
+            slice_end = self.expnames[slice_end]
+        if slice_start is not None or slice_end is not None:
+            expname_slice = (slice_start,slice_end)
+
         for v in self._versions.iter_versions():
             if not v.testruns:
                 self.untested_commits.add(v.version_id)
             for testrun in v.testruns:
-                #print("GOT",len(v.testruns),"testruns -> ",testrun.to_str())
-                self._scan_testrun(v, testrun, slice_start=slice_start, slice_end=slice_end)
+                self._scan_testrun(v, testrun, expname_slice=expname_slice)
                 testrun.testcases = [] # XXX free up memory for the next testrun XXX
             yield v, v.testruns
 
@@ -517,9 +609,45 @@ class Timecube:
             self.expnames = list(self.expnames)
             self.expnames.sort()
 
-        # TODO: populate self.untested_expnames, self.unchanged_{expnames,max_fails,n_configs}
-        for testcase_name in self.expnames: # TODO in slice
-            pass
+        # populate self.untested_expnames, self.unchanged_{expnames,max_fails,n_configs}
+        testcase_state = {} # grid_key minus version -> # of fails expected for unchanged result
+        # XXX when results don't change, calculation of n_configs is simple
+        # however, a calculation on all expnames for ranking is more complex
+        expnames = self.expnames
+        if expname_slice is not None:
+            # TODO: need additional annoying code to support expname_slice strings :/
+            assert(slice_ix1 is None or type(slice_ix1) is int)
+            assert(slice_ix2 is None or type(slice_ix2) is int)
+            if slice_start is not None:
+                expnames = expnames[slice_ix1:]
+            if slice_end is not None:
+                expnames = expnames[:slice_ix2]
+        for testcase_name in expnames:
+            is_unchanged, is_untested = True, True
+            failed_configs = set()
+            for sk in self.testcase_configurations[testcase_name]:
+                rowk = self.row_key(testcase_name, sk) # grid_key minus version
+                for v in self._versions.iter_versions():
+                    gk = self.grid_key(testcase_name, sk, v)
+                    if gk not in self.results_grid:
+                        continue # untested
+                    is_untested = False
+                    n_fails = 0
+                    if self.results_grid[gk] == 'FAIL' and gk in self._subtests_grid1:
+                        n_fails = len(self._subtests_grid1[gk]) # TODOXXX check that only fails are added
+                        if testcase_name not in self.unchanged_max_fails or \
+                           n_fails > self.unchanged_max_fails[testcase_name]:
+                            self.unchanged_max_fails[testcase_name] = n_fails
+                        failed_configs.add(sk)
+                    if rowk not in testcase_state:
+                        testcase_state[rowk] = n_fails
+                    elif testcase_state[rowk] != n_fails:
+                        is_unchanged = False
+            if is_unchanged:
+                self.unchanged_expnames.add(testcase_name)
+                self.unchanged_n_configs[testcase_name] = len(failed_configs)
+            if is_untested:
+                self.untested_expnames.add(testcase_name)
 
     def iter_versions(self, reverse=False):
         for v in self._versions.iter_versions(reverse):
@@ -623,6 +751,11 @@ def main():
                          versions.get_index(v), len(versions), v.version_id,
                          v.source_commit.hexsha[:7], v.source_commit.summary,
                          len(v.testruns))
+    else:
+        n_runs = 0
+        for v in versions.iter_versions():
+            n_runs += len(v.testruns)
+        logging.info("found %d versions, %d testruns in specified range", len(versions), n_runs)
 
     # (2a) Collect all expnames in the specified version range:
     global cube
@@ -634,7 +767,6 @@ def main():
         for expname in cube.expnames:
             n += 1
             logging.info("expname%d/%d %s", n, len(cube.expnames), expname)
-    exit(1)
 
     slice_inc = args.slice_expnames
     nslices = math.ceil(len(cube.expnames)/slice_inc) if slice_inc else 1
@@ -642,7 +774,7 @@ def main():
         # (2b) For each slice of expnames, collect test results in timecube:
         slice = (slice_inc*i, slice_inc*(i+1)) if slice_inc else None
         last_report, n = 0, 0
-        for v, testruns in cube.iter_scan_versions(expname_slice=slice): # covers [slice[0], slice[1]) / TODOXXX was slice_start=,slice_end=
+        for v, testruns in cube.iter_scan_versions(expname_slice=slice): # covers [slice[0], slice[1])
             n += 1
             if last_report + 5 < time.time() or n == len(versions):
                 last_report = time.time()