WIP: benchmarking grid construction

author Serhei Makarov <me@serhei.io>

Tue, 9 Aug 2022 16:49:50 +0000 (12:49 -0400)

committer Serhei Makarov <me@serhei.io>

Tue, 9 Aug 2022 16:49:53 +0000 (12:49 -0400)
author Serhei Makarov <me@serhei.io>
Tue, 9 Aug 2022 16:49:50 +0000 (12:49 -0400)
committer Serhei Makarov <me@serhei.io>
Tue, 9 Aug 2022 16:49:53 +0000 (12:49 -0400)
diff --git a/bin/R-show-testcases b/bin/R-show-testcases

index 1c031e4543b0cb3ef9fd751e41097abb2fe10c8d..da4a1f8e2fad900c098bad1e596872feeb42ba7a 100755 (executable)
--- a/bin/R-show-testcases
+++ b/bin/R-show-testcases
@@ -5,6 +5,8 @@
  # I'm using capital 'R-' to indicate a bulky batch-mode script with these limitations.
  # Based on a script by Martin Cermak.
  
+# TODOXXX check terminology expname -> expfile
+
  import argparse
  import sqlite3
  import logging
@@ -44,7 +46,7 @@ def pick_default_branch(source_repo):
              return default_cand
      return 'master/main/trunk' # XXX let it produce an error
  
-# Things that should be a library (2): far-too-grody SQL queries.
+# Things that should be a library (2): far-too-grody SQL queries and nano-ORM.
  
  class Testrun:
      def __init__(self, db, row):
@@ -70,12 +72,51 @@ class Testrun:
              rep += " " + str(self.attr)
          return rep
  
+def get_summary(testrun, summary_fields=None, exclude_version=True):
+    """Create a dict summarizing the configuration for a testrun.
+
+    Used to match testruns with similar configurations across different source commits."""
+    if summary_fields is None:
+        summary_fields = {'architecture', 'target_board', 'distro', 'selinux'}
+    excluded = {'authored_day', 'authored_month', 'authored_year', 'host', 'kernel', 'snapshot', 'source.gitname'} # XXX TODO: Generalize across other projects besides SystemTap.
+    d = {}
+    for field in summary_fields:
+        if exclude_version and field in excluded:
+            continue
+        if field not in testrun.attr:
+            d[field] = 'null'
+            continue
+        d[field] = testrun.attr[field]
+    return d
+
+def get_tc_key(tc, strip_result=False):
+    """Create a string key 'name+subtest+result+baseline_result' for consistent use in dicts."""
+    key = ''
+    key += tc.expfile
+    key += '+' + tc.subtest
+    if not strip_result:
+        key += '+' + tc.result
+        if tc.baseline_result is not None:
+            key += '+' + tc.baseline_result
+    return key
+
+def get_summary_key(summary):
+    """Convert a testrun summary dict to a string key for consistent use in dicts."""
+    # XXX Use sorted() as a hack to avoid worrying about stable dict iteration order.
+    return str(sorted(summary.items()))
+
+#dejagnu_fail_outcomes = {'FAIL','KFAIL','XFAIL','UNTESTED','UNSUPPORTED','ERROR'}
+dejagnu_fail_outcomes = {'FAIL','KFAIL','XFAIL','ERROR'}
+# <- Most likely PASS->UNTESTED is not interesting, FAIL->UNTESTED is.
+dejagnu_untested_outcomes = {'UNTESTED','UNSUPPORTED'}
+
  class Result:
      def __init__(self, testrun, row):
          self._testrun = testrun
          self.expfile = row['expfile']
          self.subtest = row['subtest']
-        self.result = row['result']
+        self.result = row['result'] # TODOXXX was outcome
+        self.baseline_result = None # XXX hypothetical, for diffing
  
  def get_all_testruns(db):
      # XXX Probably needs options to query slightly less than everything.
@@ -144,7 +185,7 @@ def get_testruns_expnames(db, testrun_ids, expfile_like=None):
      return results
  
  # XXX covers [expname_start, expname_end)
-def get_testrun_results(testrun, ts_id, ts_sumfile, expname_start=None, expname_end=None):
+def get_testrun_results(testrun, ts_id, ts_sumfile, expname_start=None, expname_end=None, expfile_like=None):
      s_selectclause = "select tc.expfile, tc.subtest, tc.result"
      s_fromclause = "from dejagnu_testcase_v tc"
      s_whereclause = "where tc.testsuite = ?"
@@ -155,6 +196,9 @@ def get_testrun_results(testrun, ts_id, ts_sumfile, expname_start=None, expname_
      if expname_end is not None:
          s_whereclause += " and tc.expfile < ?"
          s_args += [expname_end]
+    if expfile_like is not None:
+        s_whereclause += " and tc.expfile like ?"
+        s_args += [expfile_like]
      sql = "%s %s %s" % (s_selectclause, s_fromclause, s_whereclause)
      testrun._db.row_factory = sqlite3.Row
      results = [Result(testrun, row) for row in testrun._db.execute(sql, s_args)]
@@ -166,7 +210,7 @@ def get_full_testrun(testrun):
      return get_sliced_testrun(testrun, None, None)
  
  # Populate testrun with individual testcase rows within [expname_start, expname_end):
-def get_sliced_testrun(testrun, expname_start, expname_end):
+def get_sliced_testrun(testrun, expname_start, expname_end, expfile_like=None):
      s_selectclause = "select distinct ts.id, ts.sumfile"
      s_fromclause = "from dejagnu_testsuite ts"
      s_whereclause = "where ts.tr = ?"
@@ -176,7 +220,7 @@ def get_sliced_testrun(testrun, expname_start, expname_end):
      ts_rows = testrun._db.execute(sql, s_args)
      testrun.testcases = []
      for ts_row in ts_rows:
-        testrun.testcases += get_testrun_results(testrun, ts_row['id'], ts_row['sumfile'], expname_start=expname_start, expname_end=expname_end)
+        testrun.testcases += get_testrun_results(testrun, ts_row['id'], ts_row['sumfile'], expname_start=expname_start, expname_end=expname_end, expfile_like=expfile_like)
          #print("GOT",len(testrun.testcases),"rows for testrun",testrun.to_str())
      return testrun
  
@@ -435,9 +479,10 @@ class Timecube:
          self.unchanged_max_fails = {} # testcase_name -> max # of fails seen
          self.unchanged_n_configs = {} # testcase_name -> # of configurations seen
  
-        self._uninitialized = True # TODOXXX was self._empty
+        self._uninitialized = True
  
-    def _clear_results(self):
+    # XXX save memory in-between processing slices
+    def clear_results(self):
          self._uninitialized = False
  
          # Data structures that are emptied repeatedly if we're processing slices of expnames to save memory.
@@ -446,16 +491,33 @@ class Timecube:
          self.configurations = {} # summary_key -> configuration_summary dict, computed by get_summary()
  
          # grid_key :: string ID of a grid cell, "testcase_name+summary_key+hexsha"
-        # tc_key :: string ID of a testcase, "expname+outcome+subtest"
-        self.outcomes_grid = {} # grid_key -> outcome {PASS,FAIL} only
+        # tc_key :: string ID of a testcase, "expname+result+subtest"
+        self.results_grid = {} # grid_key -> outcome {PASS,FAIL} only
          self.subtests_grid = defaultdict(lambda:[]) # testcase_name+summary_key+hexsha -> list of TestcaseRef
-        # TODOXXX: self._subtests_grid1 = defaultdict(lambda:set()) # testcase_name+summary_key+hexsha -> set of tc_key, computed by get_tc_key()
+        self._subtests_grid1 = defaultdict(lambda:set()) # testcase_name+summary_key+hexsha -> set of tc_key, computed by get_tc_key()
  
          # TODO: additional tables for differential scan of 'adjacent' results (skipping empty grid cells)
          #self.prev_tested = {} # grid_key -> grid_key for previous test results for this configuration
          #self.next_tested = {} # grid_key -> grid_key for next test results for this configuration
          #self.versions_grid = {} # grid_key -> version_id, commit_or_None (for finding distance between grid keys)
  
+
+    def row_key(self, testcase, summary):
+        if type(testcase) is Result:
+            testcase = testcase.expfile
+        if type(summary) is dict:
+            summary = get_summary_key(summary)
+        return f'{testcase}+{summary}'
+
+    def grid_key(self, testcase, summary, version):
+        if type(testcase) is Result:
+            testcase = testcase.expfile
+        if type(summary) is dict:
+            summary = get_summary_key(summary)
+        if type(version) is Version:
+            version = version.version_id
+        return f'{testcase}+{summary}+{version}'
+
      def collect_expnames(self):
          testrun_ids = set()
          for v in self._versions.iter_versions():
@@ -465,51 +527,81 @@ class Timecube:
          self.expnames.sort()
          return self.expnames
  
-    # TODOXXX FIX BELOW
-
-    def _scan_testrun(self, v, testrun, slice_start=None, slice_end=None):
-        summary = get_summary(testrun, summary_fields) # TODOXXX summary_fields
-        sk = get_summary_key(summary) # TODOXXX
-
-        if type(slice_start) is int:
-            slice_start = self.expnames[slice_start]
-        if type(slice_end) is int and slice_end < len(self.expnames):
-            slice_end = self.expnames[slice_end]
-
-        # TODO populate self.configurations
+    def _merge_outcome(self, gk, outcome):
+        global dejagnu_fail_outcomes
+        global dejagnu_untested_outcomes
+        if outcome in dejagnu_untested_outcomes:
+            return
+        if outcome in dejagnu_fail_outcomes:
+            self.results_grid[gk] = 'FAIL'
+        if gk not in self.results_grid:
+            self.results_grid[gk] = 'PASS'
+
+    def _scan_testrun(self, v, testrun, expname_slice=None):
+        summary = get_summary(testrun)
+        sk = get_summary_key(summary)
+
+        # expname_slice should already be expfile strings
+        slice_start = None if expname_slice is None else expname_slice[0]
+        slice_end = None if expname_slice is None else expname_slice[1]
+
+        # populate self.configurations
          if sk not in self.configurations:
              self.configurations[sk] = summary
  
-        testrun = get_sliced_testrun(testrun, slice_start, slice_end)
+        testrun = get_sliced_testrun(testrun, slice_start, slice_end,
+                                     expfile_like=self._args.expfile_like)
+        tc_names = set() # XXX expnames for this testrun only
          for testcase in testrun.testcases:
-            # TODO filter testcase.name on args.expfile_like
+            tc_names.add(testcase.expfile)
  
-            # TODO populate self.expnames (if not slicing), self.testcase_configurations
-            tc_names.add(testcase.name)
-            if slice_start is not None and testcase.name not in self.expnames:
+            # populate self.expnames (if not slicing), self.testcase_configurations
+            if expname_slice is None:
                  self.expnames.add(testcase.expfile)
-            self.testcase_configurations[testcase.expfile].add(sk) # TODOXXX use defaultdict
-
-            # TODO populate self.outcomes_grid, self.subtests_grid1, self.subtests_grid
-            # TODO populate self.prev_tested, self.next_tested, self.versions_grid
+            self.testcase_configurations[testcase.expfile].add(sk)
+
+            # populate self.results_grid, self._subtests_grid1, self.subtests_grid
+            gk = self.grid_key(testcase, sk, v)
+            tk = get_tc_key(testcase) # XXX should exclude baseline_result
+            self._merge_outcome(gk, testcase.result) # populates self.results_grid
+            self._subtests_grid1[gk].add(tk)
+            tc_ref = TestcaseRef()
+            tc_ref.testcase = testcase
+            tc_ref.testrun = testrun
+            tc_ref.tc_key = tk
+            tc_ref.summary_key = sk
+            self.subtests_grid[gk].append(tc_ref)
+
+        # TODO: populate self.prev_tested, self.next_tested, self.versions_grid
          pass
  
-    # TODOXXX expname_slice is a 2-tuple
-    def iter_scan_versions(self, slice_start=None, slice_end=None):
+    def iter_scan_versions(self, expname_slice=None):
          if self._uninitialized:
              self.clear_results()
  
-        if not self.expnames and slice_start is not None:
+        if not self.expnames and expname_slice is not None:
              self.find_expnames()
          elif not self.expnames:
              self.expnames = set() # XXX populated by _scan_testrun()
  
+        # convert expname_slice indices to expfile strings
+        slice_start = None if expname_slice is None else expname_slice[0]
+        slice_end = None if expname_slice is None else expname_slice[1]
+        slice_ix1, slice_ix2 = None, None
+        if type(slice_start) is int:
+            slice_ix1 = slice_start
+            slice_start = self.expnames[slice_start]
+        if type(slice_end) is int and slice_end < len(self.expnames):
+            slice_ix2 = slice_end
+            slice_end = self.expnames[slice_end]
+        if slice_start is not None or slice_end is not None:
+            expname_slice = (slice_start,slice_end)
+
          for v in self._versions.iter_versions():
              if not v.testruns:
                  self.untested_commits.add(v.version_id)
              for testrun in v.testruns:
-                #print("GOT",len(v.testruns),"testruns -> ",testrun.to_str())
-                self._scan_testrun(v, testrun, slice_start=slice_start, slice_end=slice_end)
+                self._scan_testrun(v, testrun, expname_slice=expname_slice)
                  testrun.testcases = [] # XXX free up memory for the next testrun XXX
              yield v, v.testruns
  
@@ -517,9 +609,45 @@ class Timecube:
              self.expnames = list(self.expnames)
              self.expnames.sort()
  
-        # TODO: populate self.untested_expnames, self.unchanged_{expnames,max_fails,n_configs}
-        for testcase_name in self.expnames: # TODO in slice
-            pass
+        # populate self.untested_expnames, self.unchanged_{expnames,max_fails,n_configs}
+        testcase_state = {} # grid_key minus version -> # of fails expected for unchanged result
+        # XXX when results don't change, calculation of n_configs is simple
+        # however, a calculation on all expnames for ranking is more complex
+        expnames = self.expnames
+        if expname_slice is not None:
+            # TODO: need additional annoying code to support expname_slice strings :/
+            assert(slice_ix1 is None or type(slice_ix1) is int)
+            assert(slice_ix2 is None or type(slice_ix2) is int)
+            if slice_start is not None:
+                expnames = expnames[slice_ix1:]
+            if slice_end is not None:
+                expnames = expnames[:slice_ix2]
+        for testcase_name in expnames:
+            is_unchanged, is_untested = True, True
+            failed_configs = set()
+            for sk in self.testcase_configurations[testcase_name]:
+                rowk = self.row_key(testcase_name, sk) # grid_key minus version
+                for v in self._versions.iter_versions():
+                    gk = self.grid_key(testcase_name, sk, v)
+                    if gk not in self.results_grid:
+                        continue # untested
+                    is_untested = False
+                    n_fails = 0
+                    if self.results_grid[gk] == 'FAIL' and gk in self._subtests_grid1:
+                        n_fails = len(self._subtests_grid1[gk]) # TODOXXX check that only fails are added
+                        if testcase_name not in self.unchanged_max_fails or \
+                           n_fails > self.unchanged_max_fails[testcase_name]:
+                            self.unchanged_max_fails[testcase_name] = n_fails
+                        failed_configs.add(sk)
+                    if rowk not in testcase_state:
+                        testcase_state[rowk] = n_fails
+                    elif testcase_state[rowk] != n_fails:
+                        is_unchanged = False
+            if is_unchanged:
+                self.unchanged_expnames.add(testcase_name)
+                self.unchanged_n_configs[testcase_name] = len(failed_configs)
+            if is_untested:
+                self.untested_expnames.add(testcase_name)
  
      def iter_versions(self, reverse=False):
          for v in self._versions.iter_versions(reverse):
@@ -623,6 +751,11 @@ def main():
                           versions.get_index(v), len(versions), v.version_id,
                           v.source_commit.hexsha[:7], v.source_commit.summary,
                           len(v.testruns))
+    else:
+        n_runs = 0
+        for v in versions.iter_versions():
+            n_runs += len(v.testruns)
+        logging.info("found %d versions, %d testruns in specified range", len(versions), n_runs)
  
      # (2a) Collect all expnames in the specified version range:
      global cube
@@ -634,7 +767,6 @@ def main():
          for expname in cube.expnames:
              n += 1
              logging.info("expname%d/%d %s", n, len(cube.expnames), expname)
-    exit(1)
  
      slice_inc = args.slice_expnames
      nslices = math.ceil(len(cube.expnames)/slice_inc) if slice_inc else 1
@@ -642,7 +774,7 @@ def main():
          # (2b) For each slice of expnames, collect test results in timecube:
          slice = (slice_inc*i, slice_inc*(i+1)) if slice_inc else None
          last_report, n = 0, 0
-        for v, testruns in cube.iter_scan_versions(expname_slice=slice): # covers [slice[0], slice[1]) / TODOXXX was slice_start=,slice_end=
+        for v, testruns in cube.iter_scan_versions(expname_slice=slice): # covers [slice[0], slice[1])
              n += 1
              if last_report + 5 < time.time() or n == len(versions):
                  last_report = time.time()
author	Serhei Makarov <me@serhei.io>
	Tue, 9 Aug 2022 16:49:50 +0000 (12:49 -0400)
committer	Serhei Makarov <me@serhei.io>
	Tue, 9 Aug 2022 16:49:53 +0000 (12:49 -0400)