From 9917927d01210580a0205f0ec69fe15943b8e8c6 Mon Sep 17 00:00:00 2001 From: Noah Date: Thu, 10 Jun 2021 10:29:45 -0400 Subject: [PATCH] debuginfod: PR25978 - Created the prefetch fdcache The debuginfod fdcache-prefetch logic has been observed to show some degeneracies in operation. Since fdcache evictions are done frequently, and freshly prefetched archive elements are put at the back of lru[], each eviction round can summarily nuke things that were just prefetched .... and are just going to be prefetched again. It would be better to have two lru lists, or being able to insert newly prefetched entries somewhere in the middle of the list rather than at the very very end. https://sourceware.org/bugzilla/show_bug.cgi?id=25978 Signed-off-by: Noah Sanci --- debuginfod/ChangeLog | 15 ++++ debuginfod/debuginfod.cxx | 146 +++++++++++++++++++++++++++++------ doc/debuginfod.8 | 10 +++ tests/ChangeLog | 7 ++ tests/run-debuginfod-find.sh | 20 ++++- 5 files changed, 173 insertions(+), 25 deletions(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 286c910a..02ad34b4 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,18 @@ +2021-06-28 Noah Sanci + + PR25978 + * debuginfod.cxx (options): Added --fdcache-prefetch-fds/mbs options. + (set_metric): Added a condition for fdcache_mintmp to ensure no + negative percentages or percentages larger than 100% are given. + (globals): Added fdcache_prefetch_mbs/fdcache_prefetch_fds. + (set_metrics): Differentiate between lru and prefetch metrics. + (intern): Added prefetch functionality for nuking preexisting copies + and incrementing prefetch metrics. + (lookup): Search prefetch cache and increment associated metrics. Upon + finding in the prefetch cache move the element to the lru cache. + (limit): Arguments updated. Update size of prefetch cache. + (main): Log prefetch and cache fds/mbs + 2021-06-03 Frank Ch. Eigler PR27863 diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 543044c6..c348aa61 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -368,7 +368,13 @@ static const struct argp_option options[] = { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 }, #define ARGP_KEY_FDCACHE_MINTMP 0x1004 { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 }, - { NULL, 0, NULL, 0, NULL, 0 } +#define ARGP_KEY_FDCACHE_PREFETCH_MBS 0x1005 + { "fdcache-prefetch-mbs", ARGP_KEY_FDCACHE_PREFETCH_MBS, "MB", 0,"Megabytes allocated to the \ + prefetch cache.", 0}, +#define ARGP_KEY_FDCACHE_PREFETCH_FDS 0x1006 + { "fdcache-prefetch-fds", ARGP_KEY_FDCACHE_PREFETCH_FDS, "NUM", 0,"Number of files allocated to the \ + prefetch cache.", 0}, + { NULL, 0, NULL, 0, NULL, 0 }, }; /* Short description of program. */ @@ -412,6 +418,8 @@ static long fdcache_fds; static long fdcache_mbs; static long fdcache_prefetch; static long fdcache_mintmp; +static long fdcache_prefetch_mbs; +static long fdcache_prefetch_fds; static string tmpdir; static void set_metric(const string& key, double value); @@ -538,10 +546,22 @@ parse_opt (int key, char *arg, break; case ARGP_KEY_FDCACHE_MINTMP: fdcache_mintmp = atol (arg); + if( fdcache_mintmp > 100 || fdcache_mintmp < 0 ) + argp_failure(state, 1, EINVAL, "fdcache mintmp percent"); break; case ARGP_KEY_ARG: source_paths.insert(string(arg)); break; + case ARGP_KEY_FDCACHE_PREFETCH_FDS: + fdcache_prefetch_fds = atol(arg); + if ( fdcache_prefetch_fds < 0) + argp_failure(state, 1, EINVAL, "fdcache prefetch fds"); + break; + case ARGP_KEY_FDCACHE_PREFETCH_MBS: + fdcache_prefetch_mbs = atol(arg); + if ( fdcache_prefetch_mbs < 0) + argp_failure(state, 1, EINVAL, "fdcache prefetch mbs"); + break; // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK); default: return ARGP_ERR_UNKNOWN; } @@ -1199,23 +1219,32 @@ private: }; deque lru; // @head: most recently used long max_fds; + deque prefetch; // prefetched long max_mbs; + long max_prefetch_mbs; + long max_prefetch_fds; public: void set_metrics() { - double total_mb = 0.0; + double fdcache_mb = 0.0; + double prefetch_mb = 0.0; for (auto i = lru.begin(); i < lru.end(); i++) - total_mb += i->fd_size_mb; - set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0)); + fdcache_mb += i->fd_size_mb; + for (auto j = prefetch.begin(); j < prefetch.end(); j++) + prefetch_mb += j->fd_size_mb; + set_metric("fdcache_bytes", fdcache_mb*1024.0*1024.0); set_metric("fdcache_count", lru.size()); + set_metric("fdcache_prefetch_bytes", prefetch_mb*1024.0*1024.0); + set_metric("fdcache_prefetch_count", prefetch.size()); } void intern(const string& a, const string& b, string fd, off_t sz, bool front_p) { { unique_lock lock(fdcache_lock); - for (auto i = lru.begin(); i < lru.end(); i++) // nuke preexisting copy + // nuke preexisting copy + for (auto i = lru.begin(); i < lru.end(); i++) { if (i->archive == a && i->entry == b) { @@ -1225,17 +1254,28 @@ public: break; // must not continue iterating } } + // nuke preexisting copy in prefetch + for (auto i = prefetch.begin(); i < prefetch.end(); i++) + { + if (i->archive == a && i->entry == b) + { + unlink (i->fd.c_str()); + prefetch.erase(i); + inc_metric("fdcache_op_count","op","prefetch_dequeue"); + break; // must not continue iterating + } + } double mb = (sz+65535)/1048576.0; // round up to 64K block fdcache_entry n = { a, b, fd, mb }; if (front_p) { - inc_metric("fdcache_op_count","op","enqueue_front"); + inc_metric("fdcache_op_count","op","enqueue"); lru.push_front(n); } else { - inc_metric("fdcache_op_count","op","enqueue_back"); - lru.push_back(n); + inc_metric("fdcache_op_count","op","prefetch_enqueue"); + prefetch.push_front(n); } if (verbose > 3) obatched(clog) << "fdcache interned a=" << a << " b=" << b @@ -1248,10 +1288,10 @@ public: { inc_metric("fdcache_op_count","op","emerg-flush"); obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl; - this->limit(0, 0); // emergency flush + this->limit(0, 0, 0, 0); // emergency flush } else if (front_p) - this->limit(max_fds, max_mbs); // age cache if required + this->limit(max_fds, max_mbs, max_prefetch_fds, max_prefetch_mbs); // age cache if required } int lookup(const string& a, const string& b) @@ -1267,7 +1307,21 @@ public: lru.erase(i); // invalidates i, so no more iteration! lru.push_front(n); inc_metric("fdcache_op_count","op","requeue_front"); - fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss + fd = open(n.fd.c_str(), O_RDONLY); + break; + } + } + // Iterate through prefetch while fd == -1 to ensure that no duplication between lru and + // prefetch occurs. + for ( auto i = prefetch.begin(); fd == -1 && i < prefetch.end(); ++i) + { + if (i->archive == a && i->entry == b) + { // found it; take the entry from the prefetch deque to the lru deque, since it has now been accessed. + fdcache_entry n = *i; + prefetch.erase(i); + lru.push_front(n); + inc_metric("fdcache_op_count","op","prefetch_access"); + fd = open(n.fd.c_str(), O_RDONLY); break; } } @@ -1277,10 +1331,10 @@ public: { inc_metric("fdcache_op_count","op","emerg-flush"); obatched(clog) << "fdcache emergency flush for filling tmpdir"; - this->limit(0, 0); // emergency flush + this->limit(0, 0, 0, 0); // emergency flush } else if (fd >= 0) - this->limit(max_fds, max_mbs); // age cache if required + this->limit(max_fds, max_mbs, max_prefetch_fds, max_prefetch_mbs); // age cache if required return fd; } @@ -1296,6 +1350,14 @@ public: return true; } } + for (auto i = prefetch.begin(); i < prefetch.end(); i++) + { + if (i->archive == a && i->entry == b) + { + inc_metric("fdcache_op_count","op","prefetch_probe_hit"); + return true; + } + } inc_metric("fdcache_op_count","op","probe_miss"); return false; } @@ -1306,7 +1368,7 @@ public: for (auto i = lru.begin(); i < lru.end(); i++) { if (i->archive == a && i->entry == b) - { // found it; move it to head of lru + { // found it; erase it from lru fdcache_entry n = *i; lru.erase(i); // invalidates i, so no more iteration! inc_metric("fdcache_op_count","op","clear"); @@ -1315,10 +1377,21 @@ public: return; } } + for (auto i = prefetch.begin(); i < prefetch.end(); i++) + { + if (i->archive == a && i->entry == b) + { // found it; erase it from lru + fdcache_entry n = *i; + prefetch.erase(i); // invalidates i, so no more iteration! + inc_metric("fdcache_op_count","op","prefetch_clear"); + unlink (n.fd.c_str()); + set_metrics(); + return; + } + } } - - void limit(long maxfds, long maxmbs, bool metrics_p = true) + void limit(long maxfds, long maxmbs, long maxprefetchfds, long maxprefetchmbs , bool metrics_p = true) { if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs)) obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl; @@ -1326,7 +1399,8 @@ public: unique_lock lock(fdcache_lock); this->max_fds = maxfds; this->max_mbs = maxmbs; - + this->max_prefetch_fds = maxprefetchfds; + this->max_prefetch_mbs = maxprefetchmbs; long total_fd = 0; double total_mb = 0.0; for (auto i = lru.begin(); i < lru.end(); i++) @@ -1334,7 +1408,7 @@ public: // accumulate totals from most recently used one going backward total_fd ++; total_mb += i->fd_size_mb; - if (total_fd > max_fds || total_mb > max_mbs) + if (total_fd > this->max_fds || total_mb > this->max_mbs) { // found the cut here point! @@ -1352,6 +1426,29 @@ public: break; } } + total_fd = 0; + total_mb = 0.0; + for(auto i = prefetch.begin(); i < prefetch.end(); i++){ + // accumulate totals from most recently used one going backward + total_fd ++; + total_mb += i->fd_size_mb; + if (total_fd > this->max_prefetch_fds || total_mb > this->max_prefetch_mbs) + { + // found the cut here point! + for (auto j = i; j < prefetch.end(); j++) // close all the fds from here on in + { + if (verbose > 3) + obatched(clog) << "fdcache evicted from prefetch a=" << j->archive << " b=" << j->entry + << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl; + if (metrics_p) + inc_metric("fdcache_op_count","op","prefetch_evict"); + unlink (j->fd.c_str()); + } + + prefetch.erase(i, prefetch.end()); // erase the nodes generally + break; + } + } if (metrics_p) set_metrics(); } @@ -1360,7 +1457,7 @@ public: { // unlink any fdcache entries in $TMPDIR // don't update metrics; those globals may be already destroyed - limit(0, 0, false); + limit(0, 0, 0, 0, false); } }; static libarchive_fdcache fdcache; @@ -1547,7 +1644,7 @@ handle_buildid_r_match (bool internal_req_p, // responsible for unlinking it later. fdcache.intern(b_source0, fn, tmppath, archive_entry_size(e), - false); // prefetched ones go to back of lru + false); // prefetched ones go to the prefetch cache prefetch_count --; close (fd); // we're not saving this fd to make a mhd-response from! continue; @@ -3293,8 +3390,8 @@ void groom() sqlite3_db_release_memory(dbq); // ... for both connections debuginfod_pool_groom(); // and release any debuginfod_client objects we've been holding onto - fdcache.limit(0,0); // release the fdcache contents - fdcache.limit(fdcache_fds,fdcache_mbs); // restore status quo parameters + fdcache.limit(0,0,0,0); // release the fdcache contents + fdcache.limit(fdcache_fds, fdcache_mbs, fdcache_prefetch_fds, fdcache_prefetch_mbs); // restore status quo parameters clock_gettime (CLOCK_MONOTONIC, &ts_end); double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9; @@ -3456,7 +3553,7 @@ main (int argc, char *argv[]) if (scan_archives.size()==0 && !scan_files && source_paths.size()>0) obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl; - fdcache.limit(fdcache_fds, fdcache_mbs); + fdcache.limit(fdcache_fds, fdcache_mbs, fdcache_prefetch_fds, fdcache_prefetch_mbs); (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore (void) signal (SIGINT, signal_handler); // ^C @@ -3600,6 +3697,9 @@ main (int argc, char *argv[]) obatched(clog) << "fdcache tmpdir " << tmpdir << endl; obatched(clog) << "fdcache tmpdir min% " << fdcache_mintmp << endl; obatched(clog) << "groom time " << groom_s << endl; + obatched(clog) << "prefetch fds " << fdcache_prefetch_fds << endl; + obatched(clog) << "prefetch mbs " << fdcache_prefetch_mbs << endl; + if (scan_archives.size()>0) { obatched ob(clog); diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 index 1ba42cf6..2325e5f7 100644 --- a/doc/debuginfod.8 +++ b/doc/debuginfod.8 @@ -212,6 +212,16 @@ $TMPDIR or \fB/tmp\fP filesystem. This is because that is where the most recently used extracted files are kept. Grooming cleans this cache. +.TP +.B "\-\-fdcache\-\-prefetch\-fds=NUM" "\-\-fdcache\-\-prefetch\-mbs=MB" +Configure how many file descriptors (fds) and megabytes (mbs) are +allocated to the prefetch fdcache. If unspecified, values of +\fB\-\-prefetch\-fds\fP and \fB\-\-prefetch\-mbs\fP depend +on concurrency of the system and on the available disk space on +the $TMPDIR. Allocating more to the prefetch cache will improve +performance in environments where different parts of several large +archives are being accessed. + .TP .B "\-\-fdcache\-mintmp=NUM" Configure a disk space threshold for emergency flushing of the cache. diff --git a/tests/ChangeLog b/tests/ChangeLog index d8fa97fa..94201959 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,10 @@ +2021-06-28 Noah Sanci + + PR25978 + * run-debuginfod-find.sh: Test to ensure options + fdcache-prefetch-fds/mbs are set. Check that inc_metric works for lru + and prefetch cache metrics. + 2021-06-16 Frank Ch. Eigler * run-debuginfod-find.sh: Fix intermittent groom/stale failure, diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 456dc2f8..93985587 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -119,12 +119,15 @@ wait_ready() fi } +FDCACHE_FDS=50 +FDCACHE_MBS=190 +PREFETCH_FDS=10 +PREFETCH_MBS=120 # create a bogus .rpm file to evoke a metric-visible error # Use a cyclic symlink instead of chmod 000 to make sure even root # would see an error (running the testsuite under root is NOT encouraged). ln -s R/nothing.rpm R/nothing.rpm - -env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 --fdcache-mintmp 0 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog$PORT1 2>&1 & +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-mbs=$FDCACHE_MBS --fdcache-fds=$FDCACHE_FDS --fdcache-prefetch-mbs=$PREFETCH_MBS --fdcache-prefetch-fds=$PREFETCH_FDS --fdcache-mintmp 0 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog$PORT1 2>&1 & PID1=$! tempfiles vlog$PORT1 errfiles vlog$PORT1 @@ -470,6 +473,19 @@ archive_test f0aa15b8aba4f3c28cac3c2a73801fefa644a9f2 /usr/src/debug/hello-1.0/h egrep '(libc.error.*rhel7)|(bc1febfd03ca)|(f0aa15b8aba)' vlog$PORT1 +######################################################################## +## PR25978 +# Ensure that the fdcache options are working. +grep "prefetch fds" vlog$PORT1 +grep "prefetch mbs" vlog$PORT1 +grep "fdcache fds" vlog$PORT1 +grep "fdcache mbs" vlog$PORT1 +# search the vlog to find what metric counts should be and check the correct metrics +# were incrimented +wait_ready $PORT1 'fdcache_op_count{op="enqueue"}' $( grep -c 'interned.*front=1' vlog$PORT1 ) +wait_ready $PORT1 'fdcache_op_count{op="evict"}' $( grep -c 'evicted a=.*' vlog$PORT1 ) +wait_ready $PORT1 'fdcache_op_count{op="prefetch_enqueue"}' $( grep -c 'interned.*front=0' vlog$PORT1 ) +wait_ready $PORT1 'fdcache_op_count{op="prefetch_evict"}' $( grep -c 'evicted from prefetch a=.*front=0' vlog$PORT1 || true ) ######################################################################## # Federation mode -- 2.31.1