patch to commit soon: PR25375 debuginfod prefetching

Frank Ch. Eigler fche@redhat.com
Mon Feb 24 22:29:00 GMT 2020


Hi -

This patch has been baking on my public servers awhile and can make a
huge difference in performance.  It's not something immediately
obvious how or whether to test, as it's a pure performance improvement.
Planning to push shortly.

commit ae8b89716116a6df124f8700f77460b5e97c12c4 (origin/fche/pr25375)
Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Sat Jan 25 18:43:07 2020 -0500

    PR25375: fdcache prefetching to reduce repeated archive decompression

diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 0acd70e4a916..7f432d7d9c41 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -355,6 +355,8 @@ static const struct argp_option options[] =
    { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
 #define ARGP_KEY_FDCACHE_MBS 0x1002
    { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
+#define ARGP_KEY_FDCACHE_PREFETCH 0x1003
+   { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
    { NULL, 0, NULL, 0, NULL, 0 }
   };
 
@@ -394,6 +396,7 @@ static regex_t file_exclude_regex;
 static bool traverse_logical;
 static long fdcache_fds;
 static long fdcache_mbs;
+static long fdcache_prefetch;
 static string tmpdir;
 
 static void set_metric(const string& key, int64_t value);
@@ -476,6 +479,9 @@ parse_opt (int key, char *arg,
     case ARGP_KEY_FDCACHE_MBS:
       fdcache_mbs = atol (arg);
       break;
+    case ARGP_KEY_FDCACHE_PREFETCH:
+      fdcache_prefetch = atol (arg);
+      break;
     case ARGP_KEY_ARG:
       source_paths.insert(string(arg));
       break;
@@ -975,14 +981,14 @@ class libarchive_fdcache
     string archive;
     string entry;
     string fd;
-    long fd_size_mb; // rounded up megabytes
+    double fd_size_mb; // slightly rounded up megabytes
   };
   deque<fdcache_entry> lru; // @head: most recently used
   long max_fds;
   long max_mbs;
 
 public:
-  void intern(const string& a, const string& b, string fd, off_t sz)
+  void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
   {
     {
       unique_lock<mutex> lock(fdcache_lock);
@@ -995,11 +1001,15 @@ class libarchive_fdcache
               break; // must not continue iterating
             }
         }
-      long mb = ((sz+1023)/1024+1023)/1024;
+      double mb = (sz+65535)/1048576.0; // round up to 64K block
       fdcache_entry n = { a, b, fd, mb };
-      lru.push_front(n);
+      if (front_p)
+        lru.push_front(n);
+      else
+        lru.push_back(n);
     if (verbose > 3)
-      obatched(clog) << "fdcache interned a=" << a << " b=" << b << " fd=" << fd << " mb=" << mb << endl;
+      obatched(clog) << "fdcache interned a=" << a << " b=" << b
+                     << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
     }
 
     this->limit(max_fds, max_mbs); // age cache if required
@@ -1022,6 +1032,17 @@ class libarchive_fdcache
     return -1;
   }
 
+  int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open
+  {
+    unique_lock<mutex> lock(fdcache_lock);
+    for (auto i = lru.begin(); i < lru.end(); i++)
+      {
+        if (i->archive == a && i->entry == b)
+          return true;
+      }
+    return false;
+  }
+
   void clear(const string& a, const string& b)
   {
     unique_lock<mutex> lock(fdcache_lock);
@@ -1047,7 +1068,7 @@ class libarchive_fdcache
     this->max_mbs = maxmbs;
 
     long total_fd = 0;
-    long total_mb = 0;
+    double total_mb = 0.0;
     for (auto i = lru.begin(); i < lru.end(); i++)
       {
         // accumulate totals from most recently used one going backward
@@ -1117,6 +1138,7 @@ handle_buildid_r_match (int64_t b_mtime,
       return 0;
     }
 
+  // check for a match in the fdcache first
   int fd = fdcache.lookup(b_source0, b_source1);
   while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
     {
@@ -1152,6 +1174,7 @@ handle_buildid_r_match (int64_t b_mtime,
       // NB: see, we never go around the 'loop' more than once
     }
 
+  // no match ... grumble, must process the archive
   string archive_decoder = "/dev/null";
   string archive_extension = "";
   for (auto&& arch : scan_archives)
@@ -1196,8 +1219,19 @@ handle_buildid_r_match (int64_t b_mtime,
   if (rc != ARCHIVE_OK)
     throw archive_exception(a, "cannot open archive from pipe");
 
-  while(1) // parse cpio archive entries
+  // archive traversal is in three stages:
+  // 1) skip entries whose names do not match the requested one
+  // 2) extract the matching entry name (set r = result)
+  // 3) extract some number of prefetched entries (just into fdcache)
+  // 4) abort any further processing
+  struct MHD_Response* r = 0;                 // will set in stage 2
+  unsigned prefetch_count = fdcache_prefetch; // will decrement in stage 3
+
+  while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
     {
+      if (interrupted)
+        break;
+
       struct archive_entry *e;
       rc = archive_read_next_header (a, &e);
       if (rc != ARCHIVE_OK)
@@ -1229,18 +1263,32 @@ handle_buildid_r_match (int64_t b_mtime,
           throw archive_exception(a, "cannot extract file");
         }
 
+      if (r != 0) // stage 3
+        {
+          // NB: now we know we have a complete reusable file; make fdcache
+          // responsible for unlinking it later.
+          fdcache.intern(b_source0, not_source1,
+                         tmppath, archive_entry_size(e),
+                         false); // prefetched ones go to back of lru
+          prefetch_count --;
+          close (fd); // we're not saving this fd to make a mhd-response from!
+          continue;
+        }
+
       // NB: now we know we have a complete reusable file; make fdcache
       // responsible for unlinking it later.
-      fdcache.intern(b_source0, b_source1, tmppath, archive_entry_size(e));
+      fdcache.intern(b_source0, b_source1,
+                     tmppath, archive_entry_size(e),
+                     true); // requested ones go to the front of lru
 
       inc_metric ("http_responses_total","result",archive_extension + " archive");
-      struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd);
+      r = MHD_create_response_from_fd (archive_entry_size(e), fd);
       if (r == 0)
         {
           if (verbose)
             obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
           close(fd);
-          break; // assume no chance of better luck around another iteration
+          break; // assume no chance of better luck around another iteration; no other copies of same file
         }
       else
         {
@@ -1251,12 +1299,12 @@ handle_buildid_r_match (int64_t b_mtime,
           /* libmicrohttpd will close it. */
           if (result_fd)
             *result_fd = fd;
-          return r;
+          continue;
         }
     }
 
   // XXX: rpm/file not found: delete this R entry?
-  return 0;
+  return r;
 }
 
 
@@ -2809,7 +2857,8 @@ main (int argc, char *argv[])
     fdcache_mbs = 1024; // 1 gigabyte
   else
     fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
-  fdcache_fds = concurrency * 2;
+  fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
+  fdcache_fds = (concurrency + fdcache_prefetch) * 2;
 
   /* Parse and process arguments.  */
   int remaining;
@@ -2943,6 +2992,7 @@ main (int argc, char *argv[])
   obatched(clog) << "rescan time " << rescan_s << endl;
   obatched(clog) << "fdcache fds " << fdcache_fds << endl;
   obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
+  obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
   obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
   obatched(clog) << "groom time " << groom_s << endl;
   if (scan_archives.size()>0)
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index ca844aedcfdc..ad4b2d993c8a 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -193,14 +193,17 @@ loops in the symbolic directory tree might lead to \fIinfinite
 traversal\fP.
 
 .TP
-.B "\-\-fdcache-fds=NUM"  "\-\-fdcache-mbs=MB"
+.B "\-\-fdcache\-fds=NUM"  "\-\-fdcache\-mbs=MB"  "\-\-fdcache-prefetch=NUM2"
 Configure limits on a cache that keeps recently extracted files from
-archives.  Up to NUM files and up to a total of MB megabytes will be
-kept extracted, in order to avoid having to decompress their archives
-again.  The default NUM and MB values depend on the concurrency of the
-system, and on the available disk space on the $TMPDIR or \fB/tmp\fP
-filesystem.  This is because that is where the most recently used
-extracted files are kept.  Grooming cleans this cache.
+archives.  Up to NUM requested files and up to a total of MB megabytes
+will be kept extracted, in order to avoid having to decompress their
+archives over and over again.  In addition, up to NUM2 other files
+from an archive may be prefetched into the cache before they are even
+requested.  The default NUM, NUM2, and MB values depend on the
+concurrency of the system, and on the available disk space on the
+$TMPDIR or \fB/tmp\fP filesystem.  This is because that is where the
+most recently used extracted files are kept.  Grooming cleans this
+cache.
 
 .TP
 .B "\-v"



More information about the Elfutils-devel mailing list