[PATCH] PR26775 interruptible/observable grooming
Frank Ch. Eigler
fche@redhat.com
Fri Oct 30 22:36:57 GMT 2020
Hi -
Pushed the following patch to clear up mysterious testsuite
inconsistencies from the previous patch.
Author: Frank Ch. Eigler <fche@redhat.com>
Date: Fri Oct 30 18:26:04 2020 -0400
PR26775: restore thread_work_groom metric to cycle count
... and add new metrics about progress of traversal and groom
processes. Correct one control flow abnormality that could
prematurely end a scanner thread and might have accounted for
the inconsistent test results from the previous patch.
Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 5af4dc6ca08f..ec791c68d460 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,14 @@
+2020-10-30 Frank Ch. Eigler <fche@redhat.com>
+
+ PR26775 cont'd.
+ (thread_main_scanner): Ensure control doesn't leave
+ infinite loop until program exit, even if SIGUSR2.
+ (scan_source_paths): Have traverser clean scanq on
+ SIGUSR2. Emit additional traversed_total metrics.
+ (groom): Emit additional groomed_total metrics.
+ (thread_main_groom): Restore previous thread_work_total
+ metric.
+
2020-10-29 Frank Ch. Eigler <fche@redhat.com>
PR26775
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 8448a501f356..1020a6405978 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -2711,7 +2711,8 @@ thread_main_scanner (void* arg)
add_metric("thread_busy", "role", "scan", -1);
bool gotone = scanq.wait_front(p);
add_metric("thread_busy", "role", "scan", 1);
- if (! gotone) continue; // or break
+
+ if (! gotone) continue; // go back to waiting
try
{
@@ -2750,13 +2751,9 @@ thread_main_scanner (void* arg)
e.report(cerr);
}
- inc_metric("thread_work_total", "role","scan");
-
- if (sigusr2 != forced_groom_count) // stop early if groom triggered
- {
- scanq.clear();
- break;
- }
+ // finished a scanning step -- not a "loop", because we just
+ // consume the traversal loop's work, whenever
+ inc_metric("thread_work_total","role","scan");
}
add_metric("thread_busy", "role", "scan", -1);
@@ -2801,7 +2798,10 @@ scan_source_paths()
if (interrupted) break;
if (sigusr2 != forced_groom_count) // stop early if groom triggered
- break;
+ {
+ scanq.clear(); // clear previously issued work for scanner threads
+ break;
+ }
fts_scanned ++;
@@ -2825,6 +2825,10 @@ scan_source_paths()
if (verbose > 3)
obatched(clog) << "fts skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
fts_regex ++;
+ if (!ri)
+ inc_metric("traversed_total","type","regex-skipped-I");
+ if (rx)
+ inc_metric("traversed_total","type","regex-skipped-X");
continue;
}
@@ -2832,6 +2836,7 @@ scan_source_paths()
{
case FTS_F:
scanq.push_back (make_pair(rps, *f->fts_statp));
+ inc_metric("traversed_total","type","file");
break;
case FTS_ERR:
@@ -2841,11 +2846,16 @@ scan_source_paths()
auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
x.report(cerr);
}
+ inc_metric("traversed_total","type","error");
break;
- default:
- ;
- /* ignore */
+ case FTS_D: // ignore
+ inc_metric("traversed_total","type","directory");
+ break;
+
+ default: // ignore
+ inc_metric("traversed_total","type","other");
+ break;
}
}
gettimeofday (&tv_end, NULL);
@@ -2890,6 +2900,7 @@ thread_main_fts_source_paths (void* arg)
set_metric("thread_busy", "role","traverse", 1);
scan_source_paths();
last_rescan = time(NULL); // NB: now was before scanning
+ // finished a traversal loop
inc_metric("thread_work_total", "role","traverse");
set_metric("thread_busy", "role","traverse", 0);
}
@@ -2970,9 +2981,11 @@ void groom()
files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ inc_metric("groomed_total", "decision", "stale");
}
-
- inc_metric("thread_work_total", "role", "groom");
+ else
+ inc_metric("groomed_total", "decision", "fresh");
+
if (sigusr1 != forced_rescan_count) // stop early if scan triggered
break;
}
@@ -3039,6 +3052,8 @@ thread_main_groom (void* /*arg*/)
set_metric("thread_busy", "role", "groom", 1);
groom ();
last_groom = time(NULL); // NB: now was before grooming
+ // finished a grooming loop
+ inc_metric("thread_work_total", "role", "groom");
set_metric("thread_busy", "role", "groom", 0);
}
catch (const sqlite_exception& e)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 012e305cf418..13abf89ac5dd 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-30 Frank Ch. Eigler <fche@redhat.com>
+
+ PR26775
+ * run-debuginfod-find.sh: Modify test for restored
+ thread_work_total semantics for grooming.
+
2020-10-29 Frank Ch. Eigler <fche@redhat.com>
PR26775
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 3af04c00bcd6..0d1fcf1fb829 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -346,8 +346,7 @@ rm -r R/debuginfod-rpms/rhel6/*
kill -USR2 $PID1 # groom cycle
# Expect 3 rpms to be deleted by the groom
# 1 groom cycle already took place at/soon-after startup, so -USR2 makes 2
-# ... times the # of files checked in each cycle
-wait_ready $PORT1 'thread_work_total{role="groom"}' 51
+wait_ready $PORT1 'thread_work_total{role="groom"}' 2
wait_ready $PORT1 'groom{statistic="file d/e"}' 3
rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests
@@ -364,7 +363,7 @@ testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2
# run a groom cycle to force server to drop its fdcache
kill -USR2 $PID1 # groom cycle
-wait_ready $PORT1 'thread_work_total{role="groom"}' 98 # 3 complete cycles
+wait_ready $PORT1 'thread_work_total{role="groom"}' 3
# move it around a couple of times to make it likely to hit a nonexistent entry during iteration
mv R/debuginfod-rpms/rhel7 R/debuginfod-rpms/rhel7renamed
kill -USR1 $PID1 # scan cycle
- FChE
More information about the Elfutils-devel
mailing list