[PATCH] PR26775 interruptible/observable grooming

Frank Ch. Eigler fche@redhat.com
Fri Oct 30 22:36:57 GMT 2020


Hi -

Pushed the following patch to clear up mysterious testsuite
inconsistencies from the previous patch.


Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Fri Oct 30 18:26:04 2020 -0400

    PR26775: restore thread_work_groom metric to cycle count
    
    ... and add new metrics about progress of traversal and groom
    processes.  Correct one control flow abnormality that could
    prematurely end a scanner thread and might have accounted for
    the inconsistent test results from the previous patch.
    
    Signed-off-by: Frank Ch. Eigler <fche@redhat.com>

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 5af4dc6ca08f..ec791c68d460 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,14 @@
+2020-10-30  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR26775 cont'd.
+	(thread_main_scanner): Ensure control doesn't leave
+	infinite loop until program exit, even if SIGUSR2.
+	(scan_source_paths): Have traverser clean scanq on
+	SIGUSR2.  Emit additional traversed_total metrics.
+	(groom): Emit additional groomed_total metrics.
+	(thread_main_groom): Restore previous thread_work_total
+	metric.
+
 2020-10-29  Frank Ch. Eigler  <fche@redhat.com>
 
 	PR26775
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 8448a501f356..1020a6405978 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -2711,7 +2711,8 @@ thread_main_scanner (void* arg)
       add_metric("thread_busy", "role", "scan", -1);
       bool gotone = scanq.wait_front(p);
       add_metric("thread_busy", "role", "scan", 1);
-      if (! gotone) continue; // or break
+
+      if (! gotone) continue; // go back to waiting
 
       try
         {
@@ -2750,13 +2751,9 @@ thread_main_scanner (void* arg)
           e.report(cerr);
         }
 
-      inc_metric("thread_work_total", "role","scan");
-      
-      if (sigusr2 != forced_groom_count) // stop early if groom triggered 
-        {
-          scanq.clear();
-          break;
-        }
+      // finished a scanning step -- not a "loop", because we just
+      // consume the traversal loop's work, whenever
+      inc_metric("thread_work_total","role","scan");
     }
 
   add_metric("thread_busy", "role", "scan", -1);
@@ -2801,7 +2798,10 @@ scan_source_paths()
     if (interrupted) break;
 
     if (sigusr2 != forced_groom_count) // stop early if groom triggered 
-      break;
+      {
+        scanq.clear(); // clear previously issued work for scanner threads
+        break;
+      }
     
     fts_scanned ++;
 
@@ -2825,6 +2825,10 @@ scan_source_paths()
         if (verbose > 3)
           obatched(clog) << "fts skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
         fts_regex ++;
+        if (!ri)
+          inc_metric("traversed_total","type","regex-skipped-I");
+        if (rx)
+          inc_metric("traversed_total","type","regex-skipped-X");
         continue;
       }
 
@@ -2832,6 +2836,7 @@ scan_source_paths()
       {
       case FTS_F:
         scanq.push_back (make_pair(rps, *f->fts_statp));
+        inc_metric("traversed_total","type","file");
         break;
 
       case FTS_ERR:
@@ -2841,11 +2846,16 @@ scan_source_paths()
           auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
           x.report(cerr);
         }
+        inc_metric("traversed_total","type","error");
         break;
 
-      default:
-        ;
-        /* ignore */
+      case FTS_D: // ignore
+        inc_metric("traversed_total","type","directory");
+        break;
+        
+      default: // ignore
+        inc_metric("traversed_total","type","other");
+        break;
       }
   }
   gettimeofday (&tv_end, NULL);
@@ -2890,6 +2900,7 @@ thread_main_fts_source_paths (void* arg)
             set_metric("thread_busy", "role","traverse", 1);
             scan_source_paths();
             last_rescan = time(NULL); // NB: now was before scanning
+            // finished a traversal loop
             inc_metric("thread_work_total", "role","traverse");
             set_metric("thread_busy", "role","traverse", 0);
           }
@@ -2970,9 +2981,11 @@ void groom()
           files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
           files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
           files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+          inc_metric("groomed_total", "decision", "stale");
         }
-      
-      inc_metric("thread_work_total", "role", "groom");
+      else
+        inc_metric("groomed_total", "decision", "fresh");
+
       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
         break;
     }
@@ -3039,6 +3052,8 @@ thread_main_groom (void* /*arg*/)
             set_metric("thread_busy", "role", "groom", 1);
             groom ();
             last_groom = time(NULL); // NB: now was before grooming
+            // finished a grooming loop
+            inc_metric("thread_work_total", "role", "groom");
             set_metric("thread_busy", "role", "groom", 0);
           }
         catch (const sqlite_exception& e)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 012e305cf418..13abf89ac5dd 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-30  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR26775
+	* run-debuginfod-find.sh: Modify test for restored
+	thread_work_total semantics for grooming.
+
 2020-10-29  Frank Ch. Eigler  <fche@redhat.com>
 
 	PR26775
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 3af04c00bcd6..0d1fcf1fb829 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -346,8 +346,7 @@ rm -r R/debuginfod-rpms/rhel6/*
 kill -USR2 $PID1  # groom cycle
 # Expect 3 rpms to be deleted by the groom
 # 1 groom cycle already took place at/soon-after startup, so -USR2 makes 2
-# ... times the # of files checked in each cycle
-wait_ready $PORT1 'thread_work_total{role="groom"}' 51
+wait_ready $PORT1 'thread_work_total{role="groom"}' 2
 wait_ready $PORT1 'groom{statistic="file d/e"}' 3
 
 rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests
@@ -364,7 +363,7 @@ testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2
 
 # run a groom cycle to force server to drop its fdcache
 kill -USR2 $PID1  # groom cycle
-wait_ready $PORT1 'thread_work_total{role="groom"}' 98 # 3 complete cycles
+wait_ready $PORT1 'thread_work_total{role="groom"}' 3
 # move it around a couple of times to make it likely to hit a nonexistent entry during iteration
 mv R/debuginfod-rpms/rhel7 R/debuginfod-rpms/rhel7renamed
 kill -USR1 $PID1  # scan cycle



- FChE



More information about the Elfutils-devel mailing list