PATCH: PR27092 debuginfod improve low-memory operations

Frank Ch. Eigler fche@redhat.com
Fri Feb 5 01:45:32 GMT 2021


Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Thu Feb 4 20:31:56 2021 -0500

    PR27092: debuginfod low-memory handling
    
    A couple of closely related pieces of work allow more early warning
    about low storage/memory conditions:
    
    - New prometheus metrics to track filesystem freespace, and more
      details about some errors.
    - Frequent checking of $TMPDIR freespace, to trigger fdcache
      emergency flushes.
    - Switch to floating point prometheus metrics, to communicate
      fractions - and short time intervals - accurately.
    - Fix startup-time pthread-creation error handling.
    
    Testing is smoke-test-level only as it is hard to create
    free-space-limited $TMPDIRs. Locally tested against tiny through
    medium tmpfs filesystems, with or without sqlite db also there.  Shows
    a pleasant stream of diagnostics and metrics during shortage but
    generally does not fail outright.  However, catching an actual
    libstdc++- or kernel-level OOM is beyond our ken.
    
    Signed-off-by: Frank Ch. Eigler <fche@redhat.com>

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 2872d667fc37..8de885223de3 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,17 @@
+2021-02-04  Frank Ch. Eigler <fche@redhat.com>
+
+	PR27092 low-memory handling
+	* debuginfod.cxx (fdcache_mintmp): New parameter, with cmd-line option.
+	(parse_opt): Parse it.
+	(main): Default it.
+	(statfs_free_enough_p): New function.
+	(libarchive_fdcache::*): Call it to trigger emergency fdcache flush.
+	(thread_main_scanner): Call it to report filesystem fullness metrics.
+	(groom): Ditto.
+	(set/add_metric): Take double rather than int64_t values.
+	(archive_exception): Propagate suberror to metric label.
+	(main): Detect pthread creation fatal errors properly.
+
 2021-02-02  Frank Ch. Eigler <fche@redhat.com>
 
 	PR27323
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index c9c0dc9bb819..3b5245296b9d 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -365,6 +365,8 @@ static const struct argp_option options[] =
    { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
 #define ARGP_KEY_FDCACHE_PREFETCH 0x1003
    { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
+#define ARGP_KEY_FDCACHE_MINTMP 0x1004
+   { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 },
    { NULL, 0, NULL, 0, NULL, 0 }
   };
 
@@ -408,19 +410,20 @@ static bool traverse_logical;
 static long fdcache_fds;
 static long fdcache_mbs;
 static long fdcache_prefetch;
+static long fdcache_mintmp;
 static string tmpdir;
 
-static void set_metric(const string& key, int64_t value);
+static void set_metric(const string& key, double value);
 // static void inc_metric(const string& key);
 static void set_metric(const string& metric,
                        const string& lname, const string& lvalue,
-                       int64_t value);
+                       double value);
 static void inc_metric(const string& metric,
                        const string& lname, const string& lvalue);
 static void add_metric(const string& metric,
                        const string& lname, const string& lvalue,
-                       int64_t value);
-// static void add_metric(const string& metric, int64_t value);
+                       double value);
+// static void add_metric(const string& metric, double value);
 
 class tmp_inc_metric { // a RAII style wrapper for exception-safe scoped increment & decrement
   string m, n, v;
@@ -452,7 +455,7 @@ class tmp_ms_metric { // a RAII style wrapper for exception-safe scoped timing
     double deltas = (ts_end.tv_sec - ts_start.tv_sec)
       + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
 
-    add_metric (m + "_milliseconds_sum", n, v, (deltas*1000));
+    add_metric (m + "_milliseconds_sum", n, v, (deltas*1000.0));
     inc_metric (m + "_milliseconds_count", n, v);
   }
 };
@@ -539,6 +542,9 @@ parse_opt (int key, char *arg,
     case ARGP_KEY_FDCACHE_PREFETCH:
       fdcache_prefetch = atol (arg);
       break;
+    case ARGP_KEY_FDCACHE_MINTMP:
+      fdcache_mintmp = atol (arg);
+      break;
     case ARGP_KEY_ARG:
       source_paths.insert(string(arg));
       break;
@@ -603,7 +609,7 @@ struct archive_exception: public reportable_exception
   }
   archive_exception(struct archive* a, const string& msg):
     reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
-    inc_metric("error_count","libarchive",msg);
+    inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?"));
   }
 };
 
@@ -1092,6 +1098,23 @@ canon_pathname (const string& input)
 }
 
 
+// Estimate available free space for a given filesystem via statfs(2).
+// Return true if the free fraction is known to be smaller than the
+// given minimum percentage.  Also update a related metric.
+bool statfs_free_enough_p(const string& path, const string& label, long minfree = 0)
+{
+  struct statfs sfs;
+  int rc = statfs(path.c_str(), &sfs);
+  if (rc == 0)
+    {
+      double s = (double) sfs.f_bavail / (double) sfs.f_blocks;
+      set_metric("filesys_free_ratio","purpose",label, s);
+      return ((s * 100.0) < minfree);
+    }
+  return false;
+}
+
+
 
 // A map-like class that owns a cache of file descriptors (indexed by
 // file / content names).
@@ -1179,7 +1202,13 @@ class libarchive_fdcache
     set_metrics();
 
     // NB: we age the cache at lookup time too
-    if (front_p)
+    if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
+      {
+        inc_metric("fdcache_op_count","op","emerg-flush");
+        obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
+        this->limit(0, 0); // emergency flush
+      }
+    else if (front_p)
       this->limit(max_fds, max_mbs); // age cache if required
   }
 
@@ -1202,7 +1231,13 @@ class libarchive_fdcache
         }
     }
 
-    if (fd >= 0)
+    if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
+      {
+        inc_metric("fdcache_op_count","op","emerg-flush");
+        obatched(clog) << "fdcache emergency flush for filling tmpdir";
+        this->limit(0, 0); // emergency flush
+      }
+    else if (fd >= 0)
       this->limit(max_fds, max_mbs); // age cache if required
 
     return fd;
@@ -1240,6 +1275,7 @@ class libarchive_fdcache
       }
   }
 
+
   void limit(long maxfds, long maxmbs, bool metrics_p = true)
   {
     if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
@@ -1277,6 +1313,7 @@ class libarchive_fdcache
     if (metrics_p) set_metrics();
   }
 
+
   ~libarchive_fdcache()
   {
     // unlink any fdcache entries in $TMPDIR
@@ -1729,7 +1766,7 @@ handle_buildid (MHD_Connection* conn,
 
 ////////////////////////////////////////////////////////////////////////
 
-static map<string,int64_t> metrics; // arbitrary data for /metrics query
+static map<string,double> metrics; // arbitrary data for /metrics query
 // NB: store int64_t since all our metrics are integers; prometheus accepts double
 static mutex metrics_lock;
 // NB: these objects get released during the process exit via global dtors
@@ -1758,7 +1795,7 @@ metric_label(const string& name, const string& value)
 // add prometheus-format metric name + label tuple (if any) + value
 
 static void
-set_metric(const string& metric, int64_t value)
+set_metric(const string& metric, double value)
 {
   unique_lock<mutex> lock(metrics_lock);
   metrics[metric] = value;
@@ -1774,7 +1811,7 @@ inc_metric(const string& metric)
 static void
 set_metric(const string& metric,
            const string& lname, const string& lvalue,
-           int64_t value)
+           double value)
 {
   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
   unique_lock<mutex> lock(metrics_lock);
@@ -1792,7 +1829,7 @@ inc_metric(const string& metric,
 static void
 add_metric(const string& metric,
            const string& lname, const string& lvalue,
-           int64_t value)
+           double value)
 {
   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
   unique_lock<mutex> lock(metrics_lock);
@@ -1801,7 +1838,7 @@ add_metric(const string& metric,
 #if 0
 static void
 add_metric(const string& metric,
-           int64_t value)
+           double value)
 {
   unique_lock<mutex> lock(metrics_lock);
   metrics[metric] += value;
@@ -2825,11 +2862,17 @@ thread_main_scanner (void* arg)
           e.report(cerr);
         }
 
+      if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef)
+        {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed
+      (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
+      (void) statfs_free_enough_p(tmpdir, "tmpdir"); // this too, in case of fdcache/tmpfile usage
+
       // finished a scanning step -- not a "loop", because we just
       // consume the traversal loop's work, whenever
       inc_metric("thread_work_total","role","scan");
     }
 
+
   add_metric("thread_busy", "role", "scan", -1);
   return 0;
 }
@@ -3101,6 +3144,8 @@ void groom()
 
   database_stats_report();
 
+  (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
+
   sqlite3_db_release_memory(db); // shrink the process if possible
   sqlite3_db_release_memory(dbq); // ... for both connections
 
@@ -3252,6 +3297,7 @@ main (int argc, char *argv[])
     fdcache_mbs = 1024; // 1 gigabyte
   else
     fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
+  fdcache_mintmp = 25; // emergency flush at 25% remaining (75% full)
   fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
   fdcache_fds = (concurrency + fdcache_prefetch) * 2;
 
@@ -3408,6 +3454,7 @@ main (int argc, char *argv[])
   obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
   obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
   obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
+  obatched(clog) << "fdcache tmpdir min% " << fdcache_mintmp << endl;
   obatched(clog) << "groom time " << groom_s << endl;
   if (scan_archives.size()>0)
     {
@@ -3425,22 +3472,22 @@ main (int argc, char *argv[])
 
   pthread_t pt;
   rc = pthread_create (& pt, NULL, thread_main_groom, NULL);
-  if (rc < 0)
-    error (0, 0, "warning: cannot spawn thread (%d) to groom database\n", rc);
+  if (rc)
+    error (EXIT_FAILURE, rc, "cannot spawn thread to groom database\n");
   else
     all_threads.push_back(pt);
 
   if (scan_files || scan_archives.size() > 0)
     {
-      pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
-      if (rc < 0)
-        error (0, 0, "warning: cannot spawn thread (%d) to traverse source paths\n", rc);
+      rc = pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
+      if (rc)
+        error (EXIT_FAILURE, rc, "cannot spawn thread to traverse source paths\n");
       all_threads.push_back(pt);
       for (unsigned i=0; i<concurrency; i++)
         {
-          pthread_create (& pt, NULL, thread_main_scanner, NULL);
-          if (rc < 0)
-            error (0, 0, "warning: cannot spawn thread (%d) to scan source files / archives\n", rc);
+          rc = pthread_create (& pt, NULL, thread_main_scanner, NULL);
+          if (rc)
+            error (EXIT_FAILURE, rc, "cannot spawn thread to scan source files / archives\n");
           all_threads.push_back(pt);
         }
     }
diff --git a/doc/ChangeLog b/doc/ChangeLog
index c316047cc8ba..5cd4fe1593d2 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2021-02-04  Frank Ch. Eigler <fche@redhat.com>
+
+	* debuginfod.8: Mention new --fdcache-mintmp option.
+
 2020-12-11  Dmitry V. Levin  <ldv@altlinux.org>
 
 	* debuginfod.8: Fix spelling typos.
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index a836718f1013..c33a4b6bc085 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -212,6 +212,17 @@ $TMPDIR or \fB/tmp\fP filesystem.  This is because that is where the
 most recently used extracted files are kept.  Grooming cleans this
 cache.
 
+.TP
+.B "\-\-fdcache\-mintmp=NUM"
+Configure a disk space threshold for emergency flushing of the cache.
+The filesystem holding the cache is checked periodically.  If the
+available space falls below the given percentage, the cache is
+flushed, and the fdcache will stay disabled until the next groom
+cycle.  This mechanism, along a few associated /metrics on the webapi,
+are intended to give an operator notice about storage scarcity - which
+can translate to RAM scarcity if the disk happens to be on a RAM
+virtual disk.  The default threshold is 25%.
+
 .TP
 .B "\-v"
 Increase verbosity of logging to the standard error file descriptor.
diff --git a/tests/ChangeLog b/tests/ChangeLog
index c6e9f6184e36..907b635198ac 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,7 @@
+2021-02-04  Frank Ch. Eigler <fche@redhat.com>
+
+	* run-debuginfod-find.sh: Smoke test --fdcache-mintmp option handling.
+
 2021-01-31  Sergei Trofimovich  <slyfox@gentoo.org>
 
 	* Makefile.am (TESTS_ENVIRONMENT): export CC variable
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 7fd3420ab20a..6340f60eccab 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -100,7 +100,7 @@ wait_ready()
 # would see an error (running the testsuite under root is NOT encouraged).
 ln -s R/nothing.rpm R/nothing.rpm
 
-env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 --fdcache-mintmp 0 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
 PID1=$!
 tempfiles vlog4
 # Server must become ready



More information about the Elfutils-devel mailing list