patch rfc: debuginfod -Z (generalized archive) support
Frank Ch. Eigler
fche@redhat.com
Wed Feb 5 20:09:00 GMT 2020
Hi -
A little extension lets us process arch-linux archives. Awaiting
for some small test .pkg's from the arch folks for the elfutils
testsuite. However, hand-testing on severa larger files works!
commit b51ae89befeb81c8b51b15b7168c6e616255b486 (fche/pacman-Z)
Author: Frank Ch. Eigler <fche@redhat.com>
Date: Wed Feb 5 15:04:18 2020 -0500
debuginfod: generalized archive support
Add a '-Z EXT=CMD' option to debuginfod, which lets it scan any given
extension and run CMD on it to unwrap distro archives. For example,
for arch-linux pacman files, -Z '.tar.zst=zstdcat' lets debuginfod
grok debug and source content in split-debuginfo files.
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 8c97fdcf7085..d812e6d71ff0 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,9 @@
+2020-02-05 Frank Ch. Eigler <fche@redhat.com>
+
+ * debuginfod.cxx (argp options): Add -Z option.
+ (canonicalized_archive_entry_pathname): New function for
+ distro-agnostic file name matching/storage.
+
2020-01-22 Frank Ch. Eigler <fche@redhat.com>
* debuginfod.cxx (dwarf_extract_source_paths): Don't print
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 623dbc593c70..0de6bbaea0ee 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -333,9 +333,10 @@ ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
static const struct argp_option options[] =
{
{ NULL, 0, NULL, 0, "Scanners:", 1 },
- { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning threads.", 0 },
- { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning threads.", 0 },
- { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning threads.", 0 },
+ { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 },
+ { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 },
+ { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 },
+ { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 },
// "source-oci-imageregistry" ...
{ NULL, 0, NULL, 0, "Options:", 2 },
@@ -428,6 +429,15 @@ parse_opt (int key, char *arg,
scan_archives[".deb"]="dpkg-deb --fsys-tarfile";
scan_archives[".ddeb"]="dpkg-deb --fsys-tarfile";
break;
+ case 'Z':
+ {
+ char* extension = strchr(arg, '=');
+ if (extension)
+ scan_archives[string(arg, (extension-arg))]=string(extension+1);
+ else
+ argp_failure(state, 1, EINVAL, "bad EXT=CMD format");
+ }
+ break;
case 'L':
traverse_logical = true;
break;
@@ -1068,6 +1078,25 @@ class libarchive_fdcache
static libarchive_fdcache fdcache;
+// For security/portability reasons, many distro-package archives have
+// a "./" in front of path names; others have nothing, others have
+// "/". Canonicalize them all to a single leading "/", with the
+// assumption that this matches the dwarf-derived file names too.
+string canonicalized_archive_entry_pathname(struct archive_entry *e)
+{
+ string fn = archive_entry_pathname(e);
+ if (fn.size() == 0)
+ return fn;
+ if (fn[0] == '/')
+ return fn;
+ if (fn[0] == '.')
+ return fn.substr(1);
+ else
+ return string("/")+fn;
+}
+
+
+
static struct MHD_Response*
handle_buildid_r_match (int64_t b_mtime,
const string& b_source0,
@@ -1162,8 +1191,8 @@ handle_buildid_r_match (int64_t b_mtime,
if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
continue;
- string fn = archive_entry_pathname (e);
- if (fn != string(".")+b_source1)
+ string fn = canonicalized_archive_entry_pathname (e);
+ if (fn != b_source1)
continue;
// extract this file to a temporary file
@@ -2055,9 +2084,7 @@ archive_classify (const string& rps, string& archive_extension,
if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
continue;
- string fn = archive_entry_pathname (e);
- if (fn.size() > 1 && fn[0] == '.')
- fn = fn.substr(1); // trim off the leading '.'
+ string fn = canonicalized_archive_entry_pathname (e);
if (verbose > 3)
obatched(clog) << "libarchive checking " << fn << endl;
@@ -2764,7 +2791,7 @@ main (int argc, char *argv[])
"unexpected argument: %s", argv[remaining]);
if (scan_archives.size()==0 && !scan_files && source_paths.size()>0)
- obatched(clog) << "warning: without -F -R -U, ignoring PATHs" << endl;
+ obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl;
fdcache.limit(fdcache_fds, fdcache_mbs);
@@ -2894,7 +2921,7 @@ main (int argc, char *argv[])
obatched ob(clog);
auto& o = ob << "scanning archive types ";
for (auto&& arch : scan_archives)
- o << arch.first << " ";
+ o << arch.first << "(" << arch.second << ") ";
o << endl;
}
const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 651ea33d4106..36094d002f75 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2020-02-05 Frank Ch. Eigler <fche@redhat.com>
+
+ * debuginfod.8: Document new -Z flag and tweak other bits.
+
2020-01-10 Mark Wielaard <mark@klomp.org>
* debuginfod_find_debuginfo.3 (DEBUGINFOD_PROGRESS): Mention progress
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index 166c7c4590ed..d6561edf7159 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -61,20 +61,22 @@ or
^C
.ESAMPLE
-If the \fB\-R\fP and/or \fB-U\fP option is given, each file is scanned
-as an archive file that may contain ELF/DWARF/source files. If \-R is
-given, the will scan RPMs; and/or if \-U is given, they will scan DEB
-/ DDEB files. (The terms RPM and DEB and DDEB are used synonymously
-as "archives" in diagnostic messages.) Because of complications such
-as DWZ-compressed debuginfo, may require \fItwo\fP traversal passes to
-identify all source code. Source files for RPMs are only served from
-other RPMs, so the caution for \-F does not apply. Note that due to
-Debian/Ubuntu packaging policies & mechanisms, debuginfod cannot
-resolve source files for DEB/DDEB at all.
-
-If no PATH is listed, or neither \fB\-F\fP nor \fB\-R\fP nor \fB\-U\fP
-option is given, then \fBdebuginfod\fP will simply serve content that
-it accumulated into its index in all previous runs.
+If any of the \fB\-R\fP, \fB-U\fP, or \fB-Z\fP options is given, each
+file is scanned as an archive file that may contain ELF/DWARF/source
+files. Archive files are recognized by extension. If \-R is given,
+".rpm" files are scanned; if \-D is given, ".deb" and ".ddeb" files
+are scanned; if \-Z is given, the listed extensions are scanned.
+Because of complications such as DWZ-compressed debuginfo, may require
+\fItwo\fP traversal passes to identify all source code. Source files
+for RPMs are only served from other RPMs, so the caution for \-F does
+not apply. Note that due to Debian/Ubuntu packaging policies &
+mechanisms, debuginfod cannot resolve source files for DEB/DDEB at
+all.
+
+If no PATH is listed, or none of the scanning options is given, then
+\fBdebuginfod\fP will simply serve content that it accumulated into
+its index in all previous runs, and federate to any upstream
+debuginfod servers.
.SH OPTIONS
@@ -91,6 +93,16 @@ Activate RPM patterns in archive scanning. The default is off.
.B "\-U"
Activate DEB/DDEB patterns in archive scanning. The default is off.
+.TP
+.B "\-Z EXT=CMD"
+Activate an additional pattern in archive scanning. Files with name
+extension EXT will be processed with CMD. CMD is invoked with the
+file name added to its argument list, and is should produce the
+archive on its standard output. debuginfod uses libarchive to consume
+the result, so it can accept a wide range of archive formats and
+compression. (Include the dot in EXT.) The default is no additional
+patterns. This option may be repeated.
+
.TP
.B "\-d FILE" "\-\-database=FILE"
Set the path of the sqlite database used to store the index. This
@@ -123,7 +135,8 @@ against the full path of each file, based on its \fBrealpath(3)\fP
canonicalization. By default, all files are included and none are
excluded. A file that matches both include and exclude REGEX is
excluded. (The \fIcontents\fP of archive files are not subject to
-inclusion or exclusion filtering: they are all processed.)
+inclusion or exclusion filtering: they are all processed.) Only the
+last of each type of regular expression given is used.
.TP
.B "\-t SECONDS" "\-\-rescan\-time=SECONDS"
More information about the Elfutils-devel
mailing list