patch to commit soon: PR25375 debuginfod prefetching
Frank Ch. Eigler
fche@redhat.com
Mon Feb 24 22:29:00 GMT 2020
Hi -
This patch has been baking on my public servers awhile and can make a
huge difference in performance. It's not something immediately
obvious how or whether to test, as it's a pure performance improvement.
Planning to push shortly.
commit ae8b89716116a6df124f8700f77460b5e97c12c4 (origin/fche/pr25375)
Author: Frank Ch. Eigler <fche@redhat.com>
Date: Sat Jan 25 18:43:07 2020 -0500
PR25375: fdcache prefetching to reduce repeated archive decompression
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 0acd70e4a916..7f432d7d9c41 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -355,6 +355,8 @@ static const struct argp_option options[] =
{ "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
#define ARGP_KEY_FDCACHE_MBS 0x1002
{ "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
+#define ARGP_KEY_FDCACHE_PREFETCH 0x1003
+ { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
{ NULL, 0, NULL, 0, NULL, 0 }
};
@@ -394,6 +396,7 @@ static regex_t file_exclude_regex;
static bool traverse_logical;
static long fdcache_fds;
static long fdcache_mbs;
+static long fdcache_prefetch;
static string tmpdir;
static void set_metric(const string& key, int64_t value);
@@ -476,6 +479,9 @@ parse_opt (int key, char *arg,
case ARGP_KEY_FDCACHE_MBS:
fdcache_mbs = atol (arg);
break;
+ case ARGP_KEY_FDCACHE_PREFETCH:
+ fdcache_prefetch = atol (arg);
+ break;
case ARGP_KEY_ARG:
source_paths.insert(string(arg));
break;
@@ -975,14 +981,14 @@ class libarchive_fdcache
string archive;
string entry;
string fd;
- long fd_size_mb; // rounded up megabytes
+ double fd_size_mb; // slightly rounded up megabytes
};
deque<fdcache_entry> lru; // @head: most recently used
long max_fds;
long max_mbs;
public:
- void intern(const string& a, const string& b, string fd, off_t sz)
+ void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
{
{
unique_lock<mutex> lock(fdcache_lock);
@@ -995,11 +1001,15 @@ class libarchive_fdcache
break; // must not continue iterating
}
}
- long mb = ((sz+1023)/1024+1023)/1024;
+ double mb = (sz+65535)/1048576.0; // round up to 64K block
fdcache_entry n = { a, b, fd, mb };
- lru.push_front(n);
+ if (front_p)
+ lru.push_front(n);
+ else
+ lru.push_back(n);
if (verbose > 3)
- obatched(clog) << "fdcache interned a=" << a << " b=" << b << " fd=" << fd << " mb=" << mb << endl;
+ obatched(clog) << "fdcache interned a=" << a << " b=" << b
+ << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
}
this->limit(max_fds, max_mbs); // age cache if required
@@ -1022,6 +1032,17 @@ class libarchive_fdcache
return -1;
}
+ int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open
+ {
+ unique_lock<mutex> lock(fdcache_lock);
+ for (auto i = lru.begin(); i < lru.end(); i++)
+ {
+ if (i->archive == a && i->entry == b)
+ return true;
+ }
+ return false;
+ }
+
void clear(const string& a, const string& b)
{
unique_lock<mutex> lock(fdcache_lock);
@@ -1047,7 +1068,7 @@ class libarchive_fdcache
this->max_mbs = maxmbs;
long total_fd = 0;
- long total_mb = 0;
+ double total_mb = 0.0;
for (auto i = lru.begin(); i < lru.end(); i++)
{
// accumulate totals from most recently used one going backward
@@ -1117,6 +1138,7 @@ handle_buildid_r_match (int64_t b_mtime,
return 0;
}
+ // check for a match in the fdcache first
int fd = fdcache.lookup(b_source0, b_source1);
while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
{
@@ -1152,6 +1174,7 @@ handle_buildid_r_match (int64_t b_mtime,
// NB: see, we never go around the 'loop' more than once
}
+ // no match ... grumble, must process the archive
string archive_decoder = "/dev/null";
string archive_extension = "";
for (auto&& arch : scan_archives)
@@ -1196,8 +1219,19 @@ handle_buildid_r_match (int64_t b_mtime,
if (rc != ARCHIVE_OK)
throw archive_exception(a, "cannot open archive from pipe");
- while(1) // parse cpio archive entries
+ // archive traversal is in three stages:
+ // 1) skip entries whose names do not match the requested one
+ // 2) extract the matching entry name (set r = result)
+ // 3) extract some number of prefetched entries (just into fdcache)
+ // 4) abort any further processing
+ struct MHD_Response* r = 0; // will set in stage 2
+ unsigned prefetch_count = fdcache_prefetch; // will decrement in stage 3
+
+ while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
{
+ if (interrupted)
+ break;
+
struct archive_entry *e;
rc = archive_read_next_header (a, &e);
if (rc != ARCHIVE_OK)
@@ -1229,18 +1263,32 @@ handle_buildid_r_match (int64_t b_mtime,
throw archive_exception(a, "cannot extract file");
}
+ if (r != 0) // stage 3
+ {
+ // NB: now we know we have a complete reusable file; make fdcache
+ // responsible for unlinking it later.
+ fdcache.intern(b_source0, not_source1,
+ tmppath, archive_entry_size(e),
+ false); // prefetched ones go to back of lru
+ prefetch_count --;
+ close (fd); // we're not saving this fd to make a mhd-response from!
+ continue;
+ }
+
// NB: now we know we have a complete reusable file; make fdcache
// responsible for unlinking it later.
- fdcache.intern(b_source0, b_source1, tmppath, archive_entry_size(e));
+ fdcache.intern(b_source0, b_source1,
+ tmppath, archive_entry_size(e),
+ true); // requested ones go to the front of lru
inc_metric ("http_responses_total","result",archive_extension + " archive");
- struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd);
+ r = MHD_create_response_from_fd (archive_entry_size(e), fd);
if (r == 0)
{
if (verbose)
obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
close(fd);
- break; // assume no chance of better luck around another iteration
+ break; // assume no chance of better luck around another iteration; no other copies of same file
}
else
{
@@ -1251,12 +1299,12 @@ handle_buildid_r_match (int64_t b_mtime,
/* libmicrohttpd will close it. */
if (result_fd)
*result_fd = fd;
- return r;
+ continue;
}
}
// XXX: rpm/file not found: delete this R entry?
- return 0;
+ return r;
}
@@ -2809,7 +2857,8 @@ main (int argc, char *argv[])
fdcache_mbs = 1024; // 1 gigabyte
else
fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
- fdcache_fds = concurrency * 2;
+ fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
+ fdcache_fds = (concurrency + fdcache_prefetch) * 2;
/* Parse and process arguments. */
int remaining;
@@ -2943,6 +2992,7 @@ main (int argc, char *argv[])
obatched(clog) << "rescan time " << rescan_s << endl;
obatched(clog) << "fdcache fds " << fdcache_fds << endl;
obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
+ obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
obatched(clog) << "groom time " << groom_s << endl;
if (scan_archives.size()>0)
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index ca844aedcfdc..ad4b2d993c8a 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -193,14 +193,17 @@ loops in the symbolic directory tree might lead to \fIinfinite
traversal\fP.
.TP
-.B "\-\-fdcache-fds=NUM" "\-\-fdcache-mbs=MB"
+.B "\-\-fdcache\-fds=NUM" "\-\-fdcache\-mbs=MB" "\-\-fdcache-prefetch=NUM2"
Configure limits on a cache that keeps recently extracted files from
-archives. Up to NUM files and up to a total of MB megabytes will be
-kept extracted, in order to avoid having to decompress their archives
-again. The default NUM and MB values depend on the concurrency of the
-system, and on the available disk space on the $TMPDIR or \fB/tmp\fP
-filesystem. This is because that is where the most recently used
-extracted files are kept. Grooming cleans this cache.
+archives. Up to NUM requested files and up to a total of MB megabytes
+will be kept extracted, in order to avoid having to decompress their
+archives over and over again. In addition, up to NUM2 other files
+from an archive may be prefetched into the cache before they are even
+requested. The default NUM, NUM2, and MB values depend on the
+concurrency of the system, and on the available disk space on the
+$TMPDIR or \fB/tmp\fP filesystem. This is because that is where the
+most recently used extracted files are kept. Grooming cleans this
+cache.
.TP
.B "\-v"
More information about the Elfutils-devel
mailing list