PATCH: PR27092 debuginfod improve low-memory operations
Frank Ch. Eigler
fche@redhat.com
Fri Feb 5 01:45:32 GMT 2021
Author: Frank Ch. Eigler <fche@redhat.com>
Date: Thu Feb 4 20:31:56 2021 -0500
PR27092: debuginfod low-memory handling
A couple of closely related pieces of work allow more early warning
about low storage/memory conditions:
- New prometheus metrics to track filesystem freespace, and more
details about some errors.
- Frequent checking of $TMPDIR freespace, to trigger fdcache
emergency flushes.
- Switch to floating point prometheus metrics, to communicate
fractions - and short time intervals - accurately.
- Fix startup-time pthread-creation error handling.
Testing is smoke-test-level only as it is hard to create
free-space-limited $TMPDIRs. Locally tested against tiny through
medium tmpfs filesystems, with or without sqlite db also there. Shows
a pleasant stream of diagnostics and metrics during shortage but
generally does not fail outright. However, catching an actual
libstdc++- or kernel-level OOM is beyond our ken.
Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 2872d667fc37..8de885223de3 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,17 @@
+2021-02-04 Frank Ch. Eigler <fche@redhat.com>
+
+ PR27092 low-memory handling
+ * debuginfod.cxx (fdcache_mintmp): New parameter, with cmd-line option.
+ (parse_opt): Parse it.
+ (main): Default it.
+ (statfs_free_enough_p): New function.
+ (libarchive_fdcache::*): Call it to trigger emergency fdcache flush.
+ (thread_main_scanner): Call it to report filesystem fullness metrics.
+ (groom): Ditto.
+ (set/add_metric): Take double rather than int64_t values.
+ (archive_exception): Propagate suberror to metric label.
+ (main): Detect pthread creation fatal errors properly.
+
2021-02-02 Frank Ch. Eigler <fche@redhat.com>
PR27323
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index c9c0dc9bb819..3b5245296b9d 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -365,6 +365,8 @@ static const struct argp_option options[] =
{ "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
#define ARGP_KEY_FDCACHE_PREFETCH 0x1003
{ "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
+#define ARGP_KEY_FDCACHE_MINTMP 0x1004
+ { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 },
{ NULL, 0, NULL, 0, NULL, 0 }
};
@@ -408,19 +410,20 @@ static bool traverse_logical;
static long fdcache_fds;
static long fdcache_mbs;
static long fdcache_prefetch;
+static long fdcache_mintmp;
static string tmpdir;
-static void set_metric(const string& key, int64_t value);
+static void set_metric(const string& key, double value);
// static void inc_metric(const string& key);
static void set_metric(const string& metric,
const string& lname, const string& lvalue,
- int64_t value);
+ double value);
static void inc_metric(const string& metric,
const string& lname, const string& lvalue);
static void add_metric(const string& metric,
const string& lname, const string& lvalue,
- int64_t value);
-// static void add_metric(const string& metric, int64_t value);
+ double value);
+// static void add_metric(const string& metric, double value);
class tmp_inc_metric { // a RAII style wrapper for exception-safe scoped increment & decrement
string m, n, v;
@@ -452,7 +455,7 @@ class tmp_ms_metric { // a RAII style wrapper for exception-safe scoped timing
double deltas = (ts_end.tv_sec - ts_start.tv_sec)
+ (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
- add_metric (m + "_milliseconds_sum", n, v, (deltas*1000));
+ add_metric (m + "_milliseconds_sum", n, v, (deltas*1000.0));
inc_metric (m + "_milliseconds_count", n, v);
}
};
@@ -539,6 +542,9 @@ parse_opt (int key, char *arg,
case ARGP_KEY_FDCACHE_PREFETCH:
fdcache_prefetch = atol (arg);
break;
+ case ARGP_KEY_FDCACHE_MINTMP:
+ fdcache_mintmp = atol (arg);
+ break;
case ARGP_KEY_ARG:
source_paths.insert(string(arg));
break;
@@ -603,7 +609,7 @@ struct archive_exception: public reportable_exception
}
archive_exception(struct archive* a, const string& msg):
reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
- inc_metric("error_count","libarchive",msg);
+ inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?"));
}
};
@@ -1092,6 +1098,23 @@ canon_pathname (const string& input)
}
+// Estimate available free space for a given filesystem via statfs(2).
+// Return true if the free fraction is known to be smaller than the
+// given minimum percentage. Also update a related metric.
+bool statfs_free_enough_p(const string& path, const string& label, long minfree = 0)
+{
+ struct statfs sfs;
+ int rc = statfs(path.c_str(), &sfs);
+ if (rc == 0)
+ {
+ double s = (double) sfs.f_bavail / (double) sfs.f_blocks;
+ set_metric("filesys_free_ratio","purpose",label, s);
+ return ((s * 100.0) < minfree);
+ }
+ return false;
+}
+
+
// A map-like class that owns a cache of file descriptors (indexed by
// file / content names).
@@ -1179,7 +1202,13 @@ class libarchive_fdcache
set_metrics();
// NB: we age the cache at lookup time too
- if (front_p)
+ if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
+ {
+ inc_metric("fdcache_op_count","op","emerg-flush");
+ obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
+ this->limit(0, 0); // emergency flush
+ }
+ else if (front_p)
this->limit(max_fds, max_mbs); // age cache if required
}
@@ -1202,7 +1231,13 @@ class libarchive_fdcache
}
}
- if (fd >= 0)
+ if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
+ {
+ inc_metric("fdcache_op_count","op","emerg-flush");
+ obatched(clog) << "fdcache emergency flush for filling tmpdir";
+ this->limit(0, 0); // emergency flush
+ }
+ else if (fd >= 0)
this->limit(max_fds, max_mbs); // age cache if required
return fd;
@@ -1240,6 +1275,7 @@ class libarchive_fdcache
}
}
+
void limit(long maxfds, long maxmbs, bool metrics_p = true)
{
if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
@@ -1277,6 +1313,7 @@ class libarchive_fdcache
if (metrics_p) set_metrics();
}
+
~libarchive_fdcache()
{
// unlink any fdcache entries in $TMPDIR
@@ -1729,7 +1766,7 @@ handle_buildid (MHD_Connection* conn,
////////////////////////////////////////////////////////////////////////
-static map<string,int64_t> metrics; // arbitrary data for /metrics query
+static map<string,double> metrics; // arbitrary data for /metrics query
// NB: store int64_t since all our metrics are integers; prometheus accepts double
static mutex metrics_lock;
// NB: these objects get released during the process exit via global dtors
@@ -1758,7 +1795,7 @@ metric_label(const string& name, const string& value)
// add prometheus-format metric name + label tuple (if any) + value
static void
-set_metric(const string& metric, int64_t value)
+set_metric(const string& metric, double value)
{
unique_lock<mutex> lock(metrics_lock);
metrics[metric] = value;
@@ -1774,7 +1811,7 @@ inc_metric(const string& metric)
static void
set_metric(const string& metric,
const string& lname, const string& lvalue,
- int64_t value)
+ double value)
{
string key = (metric + "{" + metric_label(lname, lvalue) + "}");
unique_lock<mutex> lock(metrics_lock);
@@ -1792,7 +1829,7 @@ inc_metric(const string& metric,
static void
add_metric(const string& metric,
const string& lname, const string& lvalue,
- int64_t value)
+ double value)
{
string key = (metric + "{" + metric_label(lname, lvalue) + "}");
unique_lock<mutex> lock(metrics_lock);
@@ -1801,7 +1838,7 @@ add_metric(const string& metric,
#if 0
static void
add_metric(const string& metric,
- int64_t value)
+ double value)
{
unique_lock<mutex> lock(metrics_lock);
metrics[metric] += value;
@@ -2825,11 +2862,17 @@ thread_main_scanner (void* arg)
e.report(cerr);
}
+ if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef)
+ {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed
+ (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
+ (void) statfs_free_enough_p(tmpdir, "tmpdir"); // this too, in case of fdcache/tmpfile usage
+
// finished a scanning step -- not a "loop", because we just
// consume the traversal loop's work, whenever
inc_metric("thread_work_total","role","scan");
}
+
add_metric("thread_busy", "role", "scan", -1);
return 0;
}
@@ -3101,6 +3144,8 @@ void groom()
database_stats_report();
+ (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
+
sqlite3_db_release_memory(db); // shrink the process if possible
sqlite3_db_release_memory(dbq); // ... for both connections
@@ -3252,6 +3297,7 @@ main (int argc, char *argv[])
fdcache_mbs = 1024; // 1 gigabyte
else
fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
+ fdcache_mintmp = 25; // emergency flush at 25% remaining (75% full)
fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
fdcache_fds = (concurrency + fdcache_prefetch) * 2;
@@ -3408,6 +3454,7 @@ main (int argc, char *argv[])
obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
+ obatched(clog) << "fdcache tmpdir min% " << fdcache_mintmp << endl;
obatched(clog) << "groom time " << groom_s << endl;
if (scan_archives.size()>0)
{
@@ -3425,22 +3472,22 @@ main (int argc, char *argv[])
pthread_t pt;
rc = pthread_create (& pt, NULL, thread_main_groom, NULL);
- if (rc < 0)
- error (0, 0, "warning: cannot spawn thread (%d) to groom database\n", rc);
+ if (rc)
+ error (EXIT_FAILURE, rc, "cannot spawn thread to groom database\n");
else
all_threads.push_back(pt);
if (scan_files || scan_archives.size() > 0)
{
- pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
- if (rc < 0)
- error (0, 0, "warning: cannot spawn thread (%d) to traverse source paths\n", rc);
+ rc = pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
+ if (rc)
+ error (EXIT_FAILURE, rc, "cannot spawn thread to traverse source paths\n");
all_threads.push_back(pt);
for (unsigned i=0; i<concurrency; i++)
{
- pthread_create (& pt, NULL, thread_main_scanner, NULL);
- if (rc < 0)
- error (0, 0, "warning: cannot spawn thread (%d) to scan source files / archives\n", rc);
+ rc = pthread_create (& pt, NULL, thread_main_scanner, NULL);
+ if (rc)
+ error (EXIT_FAILURE, rc, "cannot spawn thread to scan source files / archives\n");
all_threads.push_back(pt);
}
}
diff --git a/doc/ChangeLog b/doc/ChangeLog
index c316047cc8ba..5cd4fe1593d2 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2021-02-04 Frank Ch. Eigler <fche@redhat.com>
+
+ * debuginfod.8: Mention new --fdcache-mintmp option.
+
2020-12-11 Dmitry V. Levin <ldv@altlinux.org>
* debuginfod.8: Fix spelling typos.
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index a836718f1013..c33a4b6bc085 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -212,6 +212,17 @@ $TMPDIR or \fB/tmp\fP filesystem. This is because that is where the
most recently used extracted files are kept. Grooming cleans this
cache.
+.TP
+.B "\-\-fdcache\-mintmp=NUM"
+Configure a disk space threshold for emergency flushing of the cache.
+The filesystem holding the cache is checked periodically. If the
+available space falls below the given percentage, the cache is
+flushed, and the fdcache will stay disabled until the next groom
+cycle. This mechanism, along a few associated /metrics on the webapi,
+are intended to give an operator notice about storage scarcity - which
+can translate to RAM scarcity if the disk happens to be on a RAM
+virtual disk. The default threshold is 25%.
+
.TP
.B "\-v"
Increase verbosity of logging to the standard error file descriptor.
diff --git a/tests/ChangeLog b/tests/ChangeLog
index c6e9f6184e36..907b635198ac 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,7 @@
+2021-02-04 Frank Ch. Eigler <fche@redhat.com>
+
+ * run-debuginfod-find.sh: Smoke test --fdcache-mintmp option handling.
+
2021-01-31 Sergei Trofimovich <slyfox@gentoo.org>
* Makefile.am (TESTS_ENVIRONMENT): export CC variable
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 7fd3420ab20a..6340f60eccab 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -100,7 +100,7 @@ wait_ready()
# would see an error (running the testsuite under root is NOT encouraged).
ln -s R/nothing.rpm R/nothing.rpm
-env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 --fdcache-mintmp 0 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
PID1=$!
tempfiles vlog4
# Server must become ready
More information about the Elfutils-devel
mailing list