[patch] debuginfod metadata extension
Frank Ch. Eigler
fche@elastic.org
Fri May 24 01:48:51 GMT 2024
Hi -
The following patch brings in the other long-queued piece of work by
Ryan and myself. Becuase of the long divergence of the branch, it
took some manual matching up of master branch patches, so took some
time. The refactoring in debuginfod-client.c is the most complex;
the server side is comparatively simple.
Please send feedback!
commit 97f10ba356b0184ebf83c242515563f8d4a21b87 (HEAD -> master)
gpg: Signature made Thu 23 May 2024 07:14:54 PM EDT
gpg: using RSA key 4DD136490411C0A42B28844F258B6EFA0F209D24
gpg: Good signature from "Frank Ch. Eigler <fche@elastic.org>" [ultimate]
Author: Frank Ch. Eigler <fche@redhat.com>
Date: Mon Oct 31 17:40:01 2022 -0400
PR29472: debuginfod: add metadata query webapi, C api, client
This patch extends the debuginfod API with a "metadata query"
operation. It allows clients to request an enumeration of file names
known to debuginfod servers, returning a JSON response including the
matching buildids. This lets clients later download debuginfo for a
range of versions of the same named binaries, in case they need to to
prospective work (like systemtap-based live-patching). It also lets
server operators implement prefetch triggering operations for popular
but slow debuginfo slivers like kernel vdso.debug files on fedora.
Implementation requires a modern enough json-c library, namely 0.11,
which dates from 2014. Without that, debuginfod client/server bits
will refuse to build.
% debuginfod-find metadata file /bin/ls
% debuginfod-find metadata glob "/usr/local/bin/c*"
Refactored several functions in debuginfod-client.c, because the
metadata search logic is different for multiple servers (merge all
responses instead of first responder wins).
Documentation and testing are included.
Signed-off-by: Ryan Goldberg <rgoldber@redhat.com>
Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
diff --git a/NEWS b/NEWS
index 6f931bb518cc..300db133526f 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,8 @@ Version 0.192 (one after 0.191)
debuginfod: Add per-file signature verification for integrity
checking, using RPM IMA scheme from Fedora/RHEL.
+debuginfod: New API for metadata queries: file name -> buildid.
+
Version 0.191 "Bug fixes in C major"
libdw: dwarf_addrdie now supports binaries lacking a .debug_aranges
diff --git a/config/elfutils.spec.in b/config/elfutils.spec.in
index 460729972420..eff045755730 100644
--- a/config/elfutils.spec.in
+++ b/config/elfutils.spec.in
@@ -31,6 +31,8 @@ BuildRequires: pkgconfig(libmicrohttpd) >= 0.9.33
BuildRequires: pkgconfig(libcurl) >= 7.29.0
BuildRequires: pkgconfig(sqlite3) >= 3.7.17
BuildRequires: pkgconfig(libarchive) >= 3.1.2
+# For debugindod metadata query
+BuildRequires: pkgconfig(json-c) >= 0.11
# For tests need to bunzip2 test files.
BuildRequires: bzip2
@@ -42,6 +44,8 @@ BuildRequires: bsdtar
BuildRequires: curl
# For run-debuginfod-response-headers.sh test case
BuildRequires: socat
+# For run-debuginfod-find-metadata.sh
+BuildRequires: jq
# For debuginfod rpm IMA verification
BuildRequires: rpm-devel
diff --git a/configure.ac b/configure.ac
index 5adf766720e4..836d61ea6c0d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -863,9 +863,6 @@ AS_IF([test "x$enable_libdebuginfod" != "xno"], [
enable_libdebuginfod=yes # presume success
PKG_PROG_PKG_CONFIG
PKG_CHECK_MODULES([libcurl],[libcurl >= 7.29.0],[],[enable_libdebuginfod=no])
- if test "x$enable_libdebuginfod" = "xno"; then
- AC_MSG_ERROR([dependencies not found, use --disable-libdebuginfod to disable or --enable-libdebuginfod=dummy to build a (bootstrap) dummy library.])
- fi
else
AC_MSG_NOTICE([building (bootstrap) dummy libdebuginfo library])
fi
@@ -899,10 +896,8 @@ AS_IF([test "x$enable_debuginfod" != "xno"], [
PKG_CHECK_MODULES([libmicrohttpd],[libmicrohttpd >= 0.9.33],[],[enable_debuginfod=no])
PKG_CHECK_MODULES([oldlibmicrohttpd],[libmicrohttpd < 0.9.51],[old_libmicrohttpd=yes],[old_libmicrohttpd=no])
PKG_CHECK_MODULES([sqlite3],[sqlite3 >= 3.7.17],[],[enable_debuginfod=no])
- PKG_CHECK_MODULES([libarchive],[libarchive >= 3.1.2],[],[enable_debuginfod=no], AC_DEFINE([HAVE_LIBARCHIVE], [0], [Define to 0 if libarchive is not available]))
- if test "x$enable_debuginfod" = "xno"; then
- AC_MSG_ERROR([dependencies not found, use --disable-debuginfod to disable.])
- fi
+ PKG_CHECK_MODULES([libarchive],[libarchive >= 3.1.2],[],[enable_debuginfod=no])
+ PKG_CHECK_MODULES([jsonc],[json-c >= 0.11],[],[enable_debuginfod=no])
])
AS_IF([test "x$enable_debuginfod" != "xno"],AC_DEFINE([ENABLE_DEBUGINFOD],[1],[Build debuginfod]))
diff --git a/debuginfod/Makefile.am b/debuginfod/Makefile.am
index 5e4f9669d7c1..b74e3673a97e 100644
--- a/debuginfod/Makefile.am
+++ b/debuginfod/Makefile.am
@@ -33,7 +33,7 @@ include $(top_srcdir)/config/eu.am
AM_CPPFLAGS += -I$(srcdir) -I$(srcdir)/../libelf -I$(srcdir)/../libebl \
-I$(srcdir)/../libdw -I$(srcdir)/../libdwelf \
$(libmicrohttpd_CFLAGS) $(libcurl_CFLAGS) $(sqlite3_CFLAGS) \
- $(libarchive_CFLAGS)
+ $(libarchive_CFLAGS) $(jsonc_CFLAGS)
# Disable eu- prefixing for artifacts (binaries & man pages) in this
# directory, since they do not conflict with binutils tools.
@@ -70,10 +70,10 @@ bin_PROGRAMS += debuginfod-find
endif
debuginfod_SOURCES = debuginfod.cxx
-debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) $(rpm_LIBS) -lpthread -ldl
+debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) $(rpm_LIBS) $(jsonc_LIBS) $(libcurl_LIBS) -lpthread -ldl
debuginfod_find_SOURCES = debuginfod-find.c
-debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS)
+debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(jsonc_LIBS)
if LIBDEBUGINFOD
noinst_LIBRARIES = libdebuginfod.a
@@ -97,7 +97,7 @@ libdebuginfod_so_LIBS = libdebuginfod_pic.a
if DUMMY_LIBDEBUGINFOD
libdebuginfod_so_LDLIBS =
else
-libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) $(libelf) $(crypto_LIBS)
+libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) $(libelf) $(crypto_LIBS) $(jsonc_LIBS)
endif
$(LIBDEBUGINFOD_SONAME): $(srcdir)/libdebuginfod.map $(libdebuginfod_so_LIBS)
$(AM_V_CCLD)$(LINK) $(dso_LDFLAGS) -o $@ \
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index f01d1f0e55fa..c75abadf7dce 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -71,6 +71,8 @@ int debuginfod_find_source (debuginfod_client *c, const unsigned char *b,
int debuginfod_find_section (debuginfod_client *c, const unsigned char *b,
int s, const char *scn, char **p)
{ return -ENOSYS; }
+int debuginfod_find_metadata (debuginfod_client *c,
+ const char *k, char *v, char **p) { return -ENOSYS; }
void debuginfod_set_progressfn(debuginfod_client *c,
debuginfod_progressfn_t fn) { }
void debuginfod_set_verbose_fd(debuginfod_client *c, int fd) { }
@@ -104,6 +106,7 @@ void debuginfod_end (debuginfod_client *c) { }
#include <sys/utsname.h>
#include <curl/curl.h>
#include <fnmatch.h>
+#include <json-c/json.h>
/* If fts.h is included before config.h, its indirect inclusions may not
give us the right LFS aliases of these functions, so map them manually. */
@@ -211,6 +214,11 @@ static const char *cache_miss_filename = "cache_miss_s";
static const char *cache_max_unused_age_filename = "max_unused_age_s";
static const long cache_default_max_unused_age_s = 604800; /* 1 week */
+/* The metadata_retention_default_s file within the debuginfod cache
+ specifies how long metadata query results should be cached. */
+static const long metadata_retention_default_s = 3600; /* 1 hour */
+static const char *metadata_retention_filename = "metadata_retention_s";
+
/* Location of the cache of files downloaded from debuginfods.
The default parent directory is $HOME, or '/' if $HOME doesn't exist. */
static const char *cache_default_name = ".debuginfod_client_cache";
@@ -249,9 +257,14 @@ struct handle_data
to the cache. Used to ensure that a file is not downloaded from
multiple servers unnecessarily. */
CURL **target_handle;
+
/* Response http headers for this client handle, sent from the server */
char *response_data;
size_t response_data_size;
+
+ /* Response metadata values for this client handle, sent from the server */
+ char *metadata;
+ size_t metadata_size;
};
@@ -556,7 +569,8 @@ debuginfod_clean_cache(debuginfod_client *c,
return -errno;
regex_t re;
- const char * pattern = ".*/[a-f0-9]+(/debuginfo|/executable|/source.*|)$"; /* include dirs */
+ const char * pattern = ".*/(metadata.*|[a-f0-9]+(/debuginfo|/executable|/source.*|))$"; /* include dirs */
+ /* NB: also matches .../section/ subdirs, so extracted section files also get cleaned. */
if (regcomp (&re, pattern, REG_EXTENDED | REG_NOSUB) != 0)
return -ENOMEM;
@@ -794,18 +808,9 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata)
}
/* Temporary buffer for realloc */
char *temp = NULL;
- if (data->response_data == NULL)
- {
- temp = malloc(numitems);
- if (temp == NULL)
- return 0;
- }
- else
- {
- temp = realloc(data->response_data, data->response_data_size + numitems);
- if (temp == NULL)
- return 0;
- }
+ temp = realloc(data->response_data, data->response_data_size + numitems);
+ if (temp == NULL)
+ return 0;
memcpy(temp + data->response_data_size, buffer, numitems-1);
data->response_data = temp;
@@ -815,6 +820,386 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata)
return numitems;
}
+
+static size_t
+metadata_callback (char * buffer, size_t size, size_t numitems, void * userdata)
+{
+ if (size != 1)
+ return 0;
+ /* Temporary buffer for realloc */
+ char *temp = NULL;
+ struct handle_data *data = (struct handle_data *) userdata;
+ temp = realloc(data->metadata, data->metadata_size + numitems + 1);
+ if (temp == NULL)
+ return 0;
+
+ memcpy(temp + data->metadata_size, buffer, numitems);
+ data->metadata = temp;
+ data->metadata_size += numitems;
+ data->metadata[data->metadata_size] = '\0';
+ return numitems;
+}
+
+
+/* This function takes a copy of DEBUGINFOD_URLS, server_urls, and
+ * separates it into an array of urls to query, each with a
+ * corresponding IMA policy. The url_subdir is either 'buildid' or
+ * 'metadata', corresponding to the query type. Returns 0 on success
+ * and -Posix error on failure.
+ */
+int
+init_server_urls(char* url_subdir, const char* type,
+ char *server_urls, char ***server_url_list, ima_policy_t **url_ima_policies,
+ int *num_urls, int vfd)
+{
+ /* Initialize the memory to zero */
+ char *strtok_saveptr;
+ ima_policy_t verification_mode = ignore; // The default mode
+ char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
+ /* Count number of URLs. */
+ int n = 0;
+ assert (0 == strcmp(url_subdir, "buildid") || 0 == strcmp(url_subdir, "metadata"));
+
+ while (server_url != NULL)
+ {
+ int r;
+ char *tmp_url;
+ if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/')
+ r = asprintf(&tmp_url, "%s%s", server_url, url_subdir);
+ else
+ r = asprintf(&tmp_url, "%s/%s", server_url, url_subdir);
+
+ if (r == -1)
+ return -ENOMEM;
+
+ // When we encounted a (well-formed) token off the form ima:foo, we update the policy
+ // under which results from that server will be ima verified
+ if (startswith(server_url, "ima:"))
+ {
+#ifdef ENABLE_IMA_VERIFICATION
+ ima_policy_t m = ima_policy_str2enum(server_url + strlen("ima:"));
+ if(m != undefined)
+ verification_mode = m;
+ else if (vfd >= 0)
+ dprintf(vfd, "IMA mode not recognized, skipping %s\n", server_url);
+#else
+ if (vfd >= 0)
+ dprintf(vfd, "IMA signature verification is not enabled, treating %s as ima:ignore\n", server_url);
+#endif
+ goto continue_next_url;
+ }
+
+ if (verification_mode==enforcing &&
+ 0==strcmp(url_subdir, "buildid") &&
+ 0==strcmp(type,"section")) // section queries are unsecurable
+ {
+ if (vfd >= 0)
+ dprintf(vfd, "skipping server %s section query in IMA enforcing mode\n", server_url);
+ goto continue_next_url;
+ }
+
+ /* PR 27983: If the url is duplicate, skip it */
+ int url_index;
+ for (url_index = 0; url_index < n; ++url_index)
+ {
+ if(strcmp(tmp_url, (*server_url_list)[url_index]) == 0)
+ {
+ url_index = -1;
+ break;
+ }
+ }
+ if (url_index == -1)
+ {
+ if (vfd >= 0)
+ dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url);
+ free(tmp_url);
+ }
+ else
+ {
+ /* Have unique URL, save it, along with its IMA verification tag. */
+ n ++;
+ if (NULL == (*server_url_list = reallocarray(*server_url_list, n, sizeof(char*)))
+ || NULL == (*url_ima_policies = reallocarray(*url_ima_policies, n, sizeof(ima_policy_t))))
+ {
+ free (tmp_url);
+ return -ENOMEM;
+ }
+ (*server_url_list)[n-1] = tmp_url;
+ if(NULL != url_ima_policies) (*url_ima_policies)[n-1] = verification_mode;
+ }
+
+ continue_next_url:
+ server_url = strtok_r(NULL, url_delim, &strtok_saveptr);
+ }
+ *num_urls = n;
+ return 0;
+}
+
+/* Some boilerplate for checking curl_easy_setopt. */
+#define curl_easy_setopt_ck(H,O,P) do { \
+ CURLcode curl_res = curl_easy_setopt (H,O,P); \
+ if (curl_res != CURLE_OK) \
+ { \
+ if (vfd >= 0) \
+ dprintf (vfd, \
+ "Bad curl_easy_setopt: %s\n", \
+ curl_easy_strerror(curl_res)); \
+ return -EINVAL; \
+ } \
+ } while (0)
+
+
+/*
+ * This function initializes a CURL handle. It takes optional callbacks for the write
+ * function and the header function, which if defined will use userdata of type struct handle_data*.
+ * Specifically the data[i] within an array of struct handle_data's.
+ * Returns 0 on success and -Posix error on failure.
+ */
+int
+init_handle(debuginfod_client *client,
+ size_t (*w_callback)(char *buffer, size_t size, size_t nitems, void *userdata),
+ size_t (*h_callback)(char *buffer, size_t size, size_t nitems, void *userdata),
+ struct handle_data *data, int i, long timeout,
+ int vfd)
+{
+ data->handle = curl_easy_init();
+ if (data->handle == NULL)
+ return -ENETUNREACH;
+
+ if (vfd >= 0)
+ dprintf (vfd, "url %d %s\n", i, data->url);
+
+ /* Only allow http:// + https:// + file:// so we aren't being
+ redirected to some unsupported protocol.
+ libcurl will fail if we request a single protocol that is not
+ available. https missing is the most likely issue */
+#if CURL_AT_LEAST_VERSION(7, 85, 0)
+ curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS_STR,
+ curl_has_https ? "https,http,file" : "http,file");
+#else
+ curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS,
+ ((curl_has_https ? CURLPROTO_HTTPS : 0) | CURLPROTO_HTTP | CURLPROTO_FILE));
+#endif
+ curl_easy_setopt_ck(data->handle, CURLOPT_URL, data->url);
+ if (vfd >= 0)
+ curl_easy_setopt_ck(data->handle, CURLOPT_ERRORBUFFER,
+ data->errbuf);
+ if (w_callback)
+ {
+ curl_easy_setopt_ck(data->handle,
+ CURLOPT_WRITEFUNCTION, w_callback);
+ curl_easy_setopt_ck(data->handle, CURLOPT_WRITEDATA, data);
+ }
+ if (timeout > 0)
+ {
+ /* Make sure there is at least some progress,
+ try to get at least 100K per timeout seconds. */
+ curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_TIME,
+ timeout);
+ curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_LIMIT,
+ 100 * 1024L);
+ }
+ curl_easy_setopt_ck(data->handle, CURLOPT_FILETIME, (long) 1);
+ curl_easy_setopt_ck(data->handle, CURLOPT_FOLLOWLOCATION, (long) 1);
+ curl_easy_setopt_ck(data->handle, CURLOPT_FAILONERROR, (long) 1);
+ curl_easy_setopt_ck(data->handle, CURLOPT_NOSIGNAL, (long) 1);
+ if (h_callback)
+ {
+ curl_easy_setopt_ck(data->handle,
+ CURLOPT_HEADERFUNCTION, h_callback);
+ curl_easy_setopt_ck(data->handle, CURLOPT_HEADERDATA, data);
+ }
+ #if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */
+ curl_easy_setopt_ck(data->handle, CURLOPT_PATH_AS_IS, (long) 1);
+ #else
+ /* On old curl; no big deal, canonicalization here is almost the
+ same, except perhaps for ? # type decorations at the tail. */
+ #endif
+ curl_easy_setopt_ck(data->handle, CURLOPT_AUTOREFERER, (long) 1);
+ curl_easy_setopt_ck(data->handle, CURLOPT_ACCEPT_ENCODING, "");
+ curl_easy_setopt_ck(data->handle, CURLOPT_HTTPHEADER, client->headers);
+
+ return 0;
+}
+
+
+/*
+ * This function busy-waits on one or more curl queries to complete. This can
+ * be controled via only_one, which, if true, will find the first winner and exit
+ * once found. If positive maxtime and maxsize dictate the maximum allowed wait times
+ * and download sizes respectively. Returns 0 on success and -Posix error on failure.
+ */
+int
+perform_queries(CURLM *curlm, CURL **target_handle, struct handle_data *data, debuginfod_client *c,
+ int num_urls, long maxtime, long maxsize, bool only_one, int vfd, int *committed_to)
+{
+ int still_running = -1;
+ long loops = 0;
+ *committed_to = -1;
+ bool verbose_reported = false;
+ struct timespec start_time, cur_time;
+ if (c->winning_headers != NULL)
+ {
+ free (c->winning_headers);
+ c->winning_headers = NULL;
+ }
+ if (maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1)
+ return errno;
+ long delta = 0;
+ do
+ {
+ /* Check to see how long querying is taking. */
+ if (maxtime > 0)
+ {
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1)
+ return errno;
+ delta = cur_time.tv_sec - start_time.tv_sec;
+ if ( delta > maxtime)
+ {
+ dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta );
+ return -ETIME;
+ }
+ }
+ /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */
+ curl_multi_wait(curlm, NULL, 0, 1000, NULL);
+ CURLMcode curlm_res = curl_multi_perform(curlm, &still_running);
+
+ if (only_one)
+ {
+ /* If the target file has been found, abort the other queries. */
+ if (target_handle && *target_handle != NULL)
+ {
+ for (int i = 0; i < num_urls; i++)
+ if (data[i].handle != *target_handle)
+ curl_multi_remove_handle(curlm, data[i].handle);
+ else
+ {
+ *committed_to = i;
+ if (c->winning_headers == NULL)
+ {
+ c->winning_headers = data[*committed_to].response_data;
+ if (vfd >= 0 && c->winning_headers != NULL)
+ dprintf(vfd, "\n%s", c->winning_headers);
+ data[*committed_to].response_data = NULL;
+ data[*committed_to].response_data_size = 0;
+ }
+ }
+ }
+
+ if (vfd >= 0 && !verbose_reported && *committed_to >= 0)
+ {
+ bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO);
+ dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "",
+ *committed_to);
+ if (pnl)
+ c->default_progressfn_printed_p = 0;
+ verbose_reported = true;
+ }
+ }
+
+ if (curlm_res != CURLM_OK)
+ {
+ switch (curlm_res)
+ {
+ case CURLM_CALL_MULTI_PERFORM: continue;
+ case CURLM_OUT_OF_MEMORY: return -ENOMEM;
+ default: return -ENETUNREACH;
+ }
+ }
+
+ long dl_size = -1;
+ if (only_one && target_handle)
+ { // Only bother with progress functions if we're retrieving exactly 1 file
+ if (*target_handle && (c->progressfn || maxsize > 0))
+ {
+ /* Get size of file being downloaded. NB: If going through
+ deflate-compressing proxies, this number is likely to be
+ unavailable, so -1 may show. */
+ CURLcode curl_res;
+#if CURL_AT_LEAST_VERSION(7, 55, 0)
+ curl_off_t cl;
+ curl_res = curl_easy_getinfo(*target_handle,
+ CURLINFO_CONTENT_LENGTH_DOWNLOAD_T,
+ &cl);
+ if (curl_res == CURLE_OK && cl >= 0)
+ dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl);
+#else
+ double cl;
+ curl_res = curl_easy_getinfo(*target_handle,
+ CURLINFO_CONTENT_LENGTH_DOWNLOAD,
+ &cl);
+ if (curl_res == CURLE_OK && cl >= 0)
+ dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl);
+#endif
+ /* If Content-Length is -1, try to get the size from
+ X-Debuginfod-Size */
+ if (dl_size == -1 && c->winning_headers != NULL)
+ {
+ long xdl;
+ char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size");
+ size_t off = strlen("x-debuginfod-size:");
+
+ if (hdr != NULL && sscanf(hdr + off, "%ld", &xdl) == 1)
+ dl_size = xdl;
+ }
+ }
+
+ if (c->progressfn) /* inform/check progress callback */
+ {
+ loops ++;
+ long pa = loops; /* default param for progress callback */
+ if (*target_handle) /* we've committed to a server; report its download progress */
+ {
+ /* PR30809: Check actual size of cached file. This same
+ fd is shared by all the multi-curl handles (but only
+ one will end up writing to it). Another way could be
+ to tabulate totals in debuginfod_write_callback(). */
+ struct stat cached;
+ int statrc = fstat(data[*committed_to].fd, &cached);
+ if (statrc == 0)
+ pa = (long) cached.st_size;
+ else
+ {
+ /* Otherwise, query libcurl for its tabulated total.
+ However, that counts http body length, not
+ decoded/decompressed content length, so does not
+ measure quite the same thing as dl. */
+ CURLcode curl_res;
+#if CURL_AT_LEAST_VERSION(7, 55, 0)
+ curl_off_t dl;
+ curl_res = curl_easy_getinfo(target_handle,
+ CURLINFO_SIZE_DOWNLOAD_T,
+ &dl);
+ if (curl_res == 0 && dl >= 0)
+ pa = (dl > LONG_MAX ? LONG_MAX : (long)dl);
+#else
+ double dl;
+ curl_res = curl_easy_getinfo(target_handle,
+ CURLINFO_SIZE_DOWNLOAD,
+ &dl);
+ if (curl_res == 0)
+ pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl);
+#endif
+ }
+
+ if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size))
+ break;
+ }
+ }
+ }
+ /* Check to see if we are downloading something which exceeds maxsize, if set.*/
+ if (target_handle && *target_handle && dl_size > maxsize && maxsize > 0)
+ {
+ if (vfd >=0)
+ dprintf(vfd, "Content-Length too large.\n");
+ return -EFBIG;
+ }
+ } while (still_running);
+
+ return 0;
+}
+
+
/* Copy SRC to DEST, s,/,#,g */
static void
@@ -1258,56 +1643,134 @@ debuginfod_validate_imasig (debuginfod_client *c, int fd)
-/* Query each of the server URLs found in $DEBUGINFOD_URLS for the file
- with the specified build-id and type (debuginfo, executable, source or
- section). If type is source, then type_arg should be a filename. If
- type is section, then type_arg should be the name of an ELF/DWARF
- section. Otherwise type_arg may be NULL. Return a file descriptor
- for the target if successful, otherwise return an error code.
-*/
-static int
-debuginfod_query_server (debuginfod_client *c,
- const unsigned char *build_id,
- int build_id_len,
- const char *type,
- const char *type_arg,
- char **path)
-{
- char *server_urls;
- char *urls_envvar;
- const char *section = NULL;
- const char *filename = NULL;
- char *cache_path = NULL;
- char *maxage_path = NULL;
- char *interval_path = NULL;
- char *cache_miss_path = NULL;
- char *target_cache_dir = NULL;
- char *target_cache_path = NULL;
- char *target_cache_tmppath = NULL;
- char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */
- char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1];
- int vfd = c->verbose_fd;
- int rc;
- c->progressfn_cancel = false;
+/* Helper function to create client cache directory.
+ $XDG_CACHE_HOME takes priority over $HOME/.cache.
+ $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and $XDG_CACHE_HOME.
- if (strcmp (type, "source") == 0)
- filename = type_arg;
- else if (strcmp (type, "section") == 0)
+ Return resulting path name or NULL on error. Caller must free resulting string.
+ */
+static char *
+make_cache_path(void)
+{
+ char* cache_path = NULL;
+ int rc = 0;
+ /* Determine location of the cache. The path specified by the debuginfod
+ cache environment variable takes priority. */
+ char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR);
+ if (cache_var != NULL && strlen (cache_var) > 0)
+ xalloc_str (cache_path, "%s", cache_var);
+ else
{
- section = type_arg;
- if (section == NULL)
- return -EINVAL;
- }
+ /* If a cache already exists in $HOME ('/' if $HOME isn't set), then use
+ that. Otherwise use the XDG cache directory naming format. */
+ xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", cache_default_name);
- if (vfd >= 0)
- {
- dprintf (vfd, "debuginfod_find_%s ", type);
- if (build_id_len == 0) /* expect clean hexadecimal */
- dprintf (vfd, "%s", (const char *) build_id);
- else
- for (int i = 0; i < build_id_len; i++)
- dprintf (vfd, "%02x", build_id[i]);
+ struct stat st;
+ if (stat (cache_path, &st) < 0)
+ {
+ char cachedir[PATH_MAX];
+ char *xdg = getenv ("XDG_CACHE_HOME");
+
+ if (xdg != NULL && strlen (xdg) > 0)
+ snprintf (cachedir, PATH_MAX, "%s", xdg);
+ else
+ snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: "/");
+
+ /* Create XDG cache directory if it doesn't exist. */
+ if (stat (cachedir, &st) == 0)
+ {
+ if (! S_ISDIR (st.st_mode))
+ {
+ rc = -EEXIST;
+ goto out1;
+ }
+ }
+ else
+ {
+ rc = mkdir (cachedir, 0700);
+
+ /* Also check for EEXIST and S_ISDIR in case another client just
+ happened to create the cache. */
+ if (rc < 0
+ && (errno != EEXIST
+ || stat (cachedir, &st) != 0
+ || ! S_ISDIR (st.st_mode)))
+ {
+ rc = -errno;
+ goto out1;
+ }
+ }
+
+ free (cache_path);
+ xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name);
+ }
+ }
+
+ goto out;
+
+ out1:
+ (void) rc;
+ free (cache_path);
+ cache_path = NULL;
+
+ out:
+ if (cache_path != NULL)
+ (void) mkdir (cache_path, 0700); // failures with this mkdir would be caught later too
+ return cache_path;
+}
+
+
+/* Query each of the server URLs found in $DEBUGINFOD_URLS for the file
+ with the specified build-id and type (debuginfo, executable, source or
+ section). If type is source, then type_arg should be a filename. If
+ type is section, then type_arg should be the name of an ELF/DWARF
+ section. Otherwise type_arg may be NULL. Return a file descriptor
+ for the target if successful, otherwise return an error code.
+*/
+static int
+debuginfod_query_server_by_buildid (debuginfod_client *c,
+ const unsigned char *build_id,
+ int build_id_len,
+ const char *type,
+ const char *type_arg,
+ char **path)
+{
+ char *server_urls;
+ char *urls_envvar;
+ const char *section = NULL;
+ const char *filename = NULL;
+ char *cache_path = NULL;
+ char *maxage_path = NULL;
+ char *interval_path = NULL;
+ char *cache_miss_path = NULL;
+ char *target_cache_dir = NULL;
+ char *target_cache_path = NULL;
+ char *target_cache_tmppath = NULL;
+ char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */
+ char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1];
+ int vfd = c->verbose_fd;
+ int rc, r;
+
+ c->progressfn_cancel = false;
+
+ if (strcmp (type, "source") == 0)
+ filename = type_arg;
+ else if (strcmp (type, "section") == 0)
+ {
+ section = type_arg;
+ if (section == NULL)
+ return -EINVAL;
+ }
+
+ if (vfd >= 0)
+ {
+ dprintf (vfd, "debuginfod_find_%s ", type);
+ if (build_id_len == 0) /* expect clean hexadecimal */
+ dprintf (vfd, "%s", (const char *) build_id);
+ else
+ for (int i = 0; i < build_id_len; i++)
+ dprintf (vfd, "%02x", build_id[i]);
if (filename != NULL)
dprintf (vfd, " %s\n", filename);
dprintf (vfd, "\n");
@@ -1412,70 +1875,22 @@ debuginfod_query_server (debuginfod_client *c,
dprintf (vfd, "suffix %s\n", suffix);
/* set paths needed to perform the query
-
- example format
+ example format:
cache_path: $HOME/.cache
target_cache_dir: $HOME/.cache/0123abcd
target_cache_path: $HOME/.cache/0123abcd/debuginfo
target_cache_path: $HOME/.cache/0123abcd/source#PATH#TO#SOURCE ?
-
- $XDG_CACHE_HOME takes priority over $HOME/.cache.
- $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and $XDG_CACHE_HOME.
*/
- /* Determine location of the cache. The path specified by the debuginfod
- cache environment variable takes priority. */
- char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR);
- if (cache_var != NULL && strlen (cache_var) > 0)
- xalloc_str (cache_path, "%s", cache_var);
- else
+ cache_path = make_cache_path();
+ if (!cache_path)
{
- /* If a cache already exists in $HOME ('/' if $HOME isn't set), then use
- that. Otherwise use the XDG cache directory naming format. */
- xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", cache_default_name);
-
- struct stat st;
- if (stat (cache_path, &st) < 0)
- {
- char cachedir[PATH_MAX];
- char *xdg = getenv ("XDG_CACHE_HOME");
-
- if (xdg != NULL && strlen (xdg) > 0)
- snprintf (cachedir, PATH_MAX, "%s", xdg);
- else
- snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: "/");
-
- /* Create XDG cache directory if it doesn't exist. */
- if (stat (cachedir, &st) == 0)
- {
- if (! S_ISDIR (st.st_mode))
- {
- rc = -EEXIST;
- goto out;
- }
- }
- else
- {
- rc = mkdir (cachedir, 0700);
-
- /* Also check for EEXIST and S_ISDIR in case another client just
- happened to create the cache. */
- if (rc < 0
- && (errno != EEXIST
- || stat (cachedir, &st) != 0
- || ! S_ISDIR (st.st_mode)))
- {
- rc = -errno;
- goto out;
- }
- }
-
- free (cache_path);
- xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name);
- }
+ rc = -ENOMEM;
+ goto out;
}
-
xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes);
+ (void) mkdir (target_cache_dir, 0700); // failures with this mkdir would be caught later too
+
if (section != NULL)
xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, suffix);
else
@@ -1594,102 +2009,32 @@ debuginfod_query_server (debuginfod_client *c,
/* thereafter, goto out0 on error*/
/* Because of a race with cache cleanup / rmdir, try to mkdir/mkstemp up to twice. */
- for(int i=0; i<2; i++) {
- /* (re)create target directory in cache */
- (void) mkdir(target_cache_dir, 0700); /* files will be 0400 later */
-
- /* NB: write to a temporary file first, to avoid race condition of
- multiple clients checking the cache, while a partially-written or empty
- file is in there, being written from libcurl. */
- fd = mkstemp (target_cache_tmppath);
- if (fd >= 0) break;
- }
+ for(int i=0; i<2; i++)
+ {
+ /* (re)create target directory in cache */
+ (void) mkdir(target_cache_dir, 0700); /* files will be 0400 later */
+
+ /* NB: write to a temporary file first, to avoid race condition of
+ multiple clients checking the cache, while a partially-written or empty
+ file is in there, being written from libcurl. */
+ fd = mkstemp (target_cache_tmppath);
+ if (fd >= 0) break;
+ }
if (fd < 0) /* Still failed after two iterations. */
{
rc = -errno;
goto out0;
}
- /* Initialize the memory to zero */
- char *strtok_saveptr;
char **server_url_list = NULL;
ima_policy_t* url_ima_policies = NULL;
- char* server_url;
- /* Count number of URLs. */
- int num_urls = 0;
-
- ima_policy_t verification_mode = ignore; // The default mode
- for(server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
- server_url != NULL; server_url = strtok_r(NULL, url_delim, &strtok_saveptr))
+ char *server_url;
+ int num_urls;
+ r = init_server_urls("buildid", type, server_urls, &server_url_list, &url_ima_policies, &num_urls, vfd);
+ if (0 != r)
{
- // When we encounted a (well-formed) token off the form ima:foo, we update the policy
- // under which results from that server will be ima verified
- if(startswith(server_url, "ima:"))
- {
-#ifdef ENABLE_IMA_VERIFICATION
- ima_policy_t m = ima_policy_str2enum(server_url + strlen("ima:"));
- if(m != undefined)
- verification_mode = m;
- else if (vfd >= 0)
- dprintf(vfd, "IMA mode not recognized, skipping %s\n", server_url);
-#else
- if (vfd >= 0)
- dprintf(vfd, "IMA signature verification is not enabled, skipping %s\n", server_url);
-#endif
- continue; // Not a url, just a mode change so keep going
- }
-
- if (verification_mode==enforcing && 0==strcmp(type,"section"))
- {
- if (vfd >= 0)
- dprintf(vfd, "skipping server %s section query in IMA enforcing mode\n", server_url);
- continue;
- }
-
- /* PR 27983: If the url is already set to be used use, skip it */
- char *slashbuildid;
- if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/')
- slashbuildid = "buildid";
- else
- slashbuildid = "/buildid";
-
- char *tmp_url;
- if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1)
- {
- rc = -ENOMEM;
- goto out1;
- }
- int url_index;
- for (url_index = 0; url_index < num_urls; ++url_index)
- {
- if(strcmp(tmp_url, server_url_list[url_index]) == 0)
- {
- url_index = -1;
- break;
- }
- }
- if (url_index == -1)
- {
- if (vfd >= 0)
- dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url);
- free(tmp_url);
- }
- else
- {
- num_urls++;
- if (NULL == (server_url_list = reallocarray(server_url_list, num_urls, sizeof(char*)))
-#ifdef ENABLE_IMA_VERIFICATION
- || NULL == (url_ima_policies = reallocarray(url_ima_policies, num_urls, sizeof(ima_policy_t)))
-#endif
- )
- {
- free (tmp_url);
- rc = -ENOMEM;
- goto out1;
- }
- server_url_list[num_urls-1] = tmp_url;
- if(NULL != url_ima_policies) url_ima_policies[num_urls-1] = verification_mode;
- }
+ rc = r;
+ goto out1;
}
/* No URLs survived parsing / filtering? Abort abort abort. */
@@ -1773,262 +2118,43 @@ debuginfod_query_server (debuginfod_client *c,
data[i].fd = fd;
data[i].target_handle = &target_handle;
- data[i].handle = curl_easy_init();
- if (data[i].handle == NULL)
- {
- if (filename) curl_free (escaped_string);
- rc = -ENETUNREACH;
- goto out2;
- }
data[i].client = c;
- if (filename) /* must start with / */
- {
- /* PR28034 escape characters in completed url to %hh format. */
- snprintf(data[i].url, PATH_MAX, "%s/%s/%s/%s", server_url,
- build_id_bytes, type, escaped_string);
- }
- else if (section)
- snprintf(data[i].url, PATH_MAX, "%s/%s/%s/%s", server_url,
- build_id_bytes, type, section);
- else
- snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, build_id_bytes, type);
- if (vfd >= 0)
- dprintf (vfd, "url %d %s\n", i, data[i].url);
-
- /* Some boilerplate for checking curl_easy_setopt. */
-#define curl_easy_setopt_ck(H,O,P) do { \
- CURLcode curl_res = curl_easy_setopt (H,O,P); \
- if (curl_res != CURLE_OK) \
- { \
- if (vfd >= 0) \
- dprintf (vfd, \
- "Bad curl_easy_setopt: %s\n", \
- curl_easy_strerror(curl_res)); \
- rc = -EINVAL; \
- goto out2; \
- } \
- } while (0)
-
- /* Only allow http:// + https:// + file:// so we aren't being
- redirected to some unsupported protocol.
- libcurl will fail if we request a single protocol that is not
- available. https missing is the most likely issue */
-#if CURL_AT_LEAST_VERSION(7, 85, 0)
- curl_easy_setopt_ck(data[i].handle, CURLOPT_PROTOCOLS_STR,
- curl_has_https ? "https,http,file" : "http,file");
-#else
- curl_easy_setopt_ck(data[i].handle, CURLOPT_PROTOCOLS,
- ((curl_has_https ? CURLPROTO_HTTPS : 0) | CURLPROTO_HTTP | CURLPROTO_FILE));
-#endif
- curl_easy_setopt_ck(data[i].handle, CURLOPT_URL, data[i].url);
- if (vfd >= 0)
- curl_easy_setopt_ck(data[i].handle, CURLOPT_ERRORBUFFER,
- data[i].errbuf);
- curl_easy_setopt_ck(data[i].handle,
- CURLOPT_WRITEFUNCTION,
- debuginfod_write_callback);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_WRITEDATA, (void*)&data[i]);
- if (timeout > 0)
- {
- /* Make sure there is at least some progress,
- try to get at least 100K per timeout seconds. */
- curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_TIME,
- timeout);
- curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_LIMIT,
- 100 * 1024L);
- }
- curl_easy_setopt_ck(data[i].handle, CURLOPT_FILETIME, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_FOLLOWLOCATION, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_FAILONERROR, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_NOSIGNAL, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERFUNCTION,
- header_callback);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERDATA,
- (void *) &(data[i]));
-#if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */
- curl_easy_setopt_ck(data[i].handle, CURLOPT_PATH_AS_IS, (long) 1);
-#else
- /* On old curl; no big deal, canonicalization here is almost the
- same, except perhaps for ? # type decorations at the tail. */
-#endif
- curl_easy_setopt_ck(data[i].handle, CURLOPT_AUTOREFERER, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_ACCEPT_ENCODING, "");
- curl_easy_setopt_ck(data[i].handle, CURLOPT_HTTPHEADER, c->headers);
-
- curl_multi_add_handle(curlm, data[i].handle);
- }
-
- if (filename) curl_free(escaped_string);
- /* Query servers in parallel. */
- if (vfd >= 0)
- dprintf (vfd, "query %d urls in parallel\n", num_urls);
- int still_running;
- long loops = 0;
- int committed_to = -1;
- bool verbose_reported = false;
- struct timespec start_time, cur_time;
-
- free (c->winning_headers);
- c->winning_headers = NULL;
- if ( maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1)
- {
- rc = -errno;
- goto out2;
- }
- long delta = 0;
- do
- {
- /* Check to see how long querying is taking. */
- if (maxtime > 0)
- {
- if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1)
- {
- rc = -errno;
- goto out2;
- }
- delta = cur_time.tv_sec - start_time.tv_sec;
- if ( delta > maxtime)
- {
- dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta );
- rc = -ETIME;
- goto out2;
- }
- }
- /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */
- curl_multi_wait(curlm, NULL, 0, 1000, NULL);
- CURLMcode curlm_res = curl_multi_perform(curlm, &still_running);
-
- /* If the target file has been found, abort the other queries. */
- if (target_handle != NULL)
- {
- for (int i = 0; i < num_urls; i++)
- if (data[i].handle != target_handle)
- curl_multi_remove_handle(curlm, data[i].handle);
- else
- {
- committed_to = i;
- if (c->winning_headers == NULL)
- {
- c->winning_headers = data[committed_to].response_data;
- data[committed_to].response_data = NULL;
- data[committed_to].response_data_size = 0;
- }
-
- }
- }
-
- if (vfd >= 0 && !verbose_reported && committed_to >= 0)
- {
- bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO);
- dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "",
- committed_to);
- if (pnl)
- c->default_progressfn_printed_p = 0;
- verbose_reported = true;
- }
-
- if (curlm_res != CURLM_OK)
- {
- switch (curlm_res)
- {
- case CURLM_CALL_MULTI_PERFORM: continue;
- case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break;
- default: rc = -ENETUNREACH; break;
- }
- goto out2;
- }
-
- long dl_size = -1;
- if (target_handle && (c->progressfn || maxsize > 0))
- {
- /* Get size of file being downloaded. NB: If going through
- deflate-compressing proxies, this number is likely to be
- unavailable, so -1 may show. */
- CURLcode curl_res;
-#if CURL_AT_LEAST_VERSION(7, 55, 0)
- curl_off_t cl;
- curl_res = curl_easy_getinfo(target_handle,
- CURLINFO_CONTENT_LENGTH_DOWNLOAD_T,
- &cl);
- if (curl_res == CURLE_OK && cl >= 0)
- dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl);
-#else
- double cl;
- curl_res = curl_easy_getinfo(target_handle,
- CURLINFO_CONTENT_LENGTH_DOWNLOAD,
- &cl);
- if (curl_res == CURLE_OK && cl >= 0)
- dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl);
-#endif
- /* If Content-Length is -1, try to get the size from
- X-Debuginfod-Size */
- if (dl_size == -1 && c->winning_headers != NULL)
- {
- long xdl;
- char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size");
- size_t off = strlen("x-debuginfod-size:");
-
- if (hdr != NULL && sscanf(hdr + off, "%ld", &xdl) == 1)
- dl_size = xdl;
- }
- }
-
- if (c->progressfn) /* inform/check progress callback */
- {
- loops ++;
- long pa = loops; /* default param for progress callback */
- if (target_handle) /* we've committed to a server; report its download progress */
- {
- /* PR30809: Check actual size of cached file. This same
- fd is shared by all the multi-curl handles (but only
- one will end up writing to it). Another way could be
- to tabulate totals in debuginfod_write_callback(). */
- struct stat cached;
- int statrc = fstat(fd, &cached);
- if (statrc == 0)
- pa = (long) cached.st_size;
- else
- {
- /* Otherwise, query libcurl for its tabulated total.
- However, that counts http body length, not
- decoded/decompressed content length, so does not
- measure quite the same thing as dl. */
- CURLcode curl_res;
-#if CURL_AT_LEAST_VERSION(7, 55, 0)
- curl_off_t dl;
- curl_res = curl_easy_getinfo(target_handle,
- CURLINFO_SIZE_DOWNLOAD_T,
- &dl);
- if (curl_res == 0 && dl >= 0)
- pa = (dl > LONG_MAX ? LONG_MAX : (long)dl);
-#else
- double dl;
- curl_res = curl_easy_getinfo(target_handle,
- CURLINFO_SIZE_DOWNLOAD,
- &dl);
- if (curl_res == 0)
- pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl);
-#endif
- }
- }
-
- if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size))
- {
- c->progressfn_cancel = true;
- break;
- }
+ if (filename) /* must start with / */
+ {
+ /* PR28034 escape characters in completed url to %hh format. */
+ snprintf(data[i].url, PATH_MAX, "%s/%s/%s/%s", server_url,
+ build_id_bytes, type, escaped_string);
}
+ else if (section)
+ snprintf(data[i].url, PATH_MAX, "%s/%s/%s/%s", server_url,
+ build_id_bytes, type, section);
+ else
+ snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, build_id_bytes, type);
- /* Check to see if we are downloading something which exceeds maxsize, if set.*/
- if (target_handle && dl_size > maxsize && maxsize > 0)
+ r = init_handle(c, debuginfod_write_callback, header_callback, &data[i], i, timeout, vfd);
+ if (0 != r)
{
- if (vfd >=0)
- dprintf(vfd, "Content-Length too large.\n");
- rc = -EFBIG;
+ rc = r;
+ if (filename) curl_free (escaped_string);
goto out2;
}
- } while (still_running);
+
+ curl_multi_add_handle(curlm, data[i].handle);
+ }
+
+ if (filename) curl_free(escaped_string);
+
+ /* Query servers in parallel. */
+ if (vfd >= 0)
+ dprintf (vfd, "query %d urls in parallel\n", num_urls);
+ int committed_to;
+ r = perform_queries(curlm, &target_handle, data, c, num_urls, maxtime, maxsize, true, vfd, &committed_to);
+ if (0 != r)
+ {
+ rc = r;
+ goto out2;
+ }
/* Check whether a query was successful. If so, assign its handle
to verified_handle. */
@@ -2180,6 +2306,7 @@ debuginfod_query_server (debuginfod_client *c,
curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */
curl_easy_cleanup (data[i].handle);
free(data[i].response_data);
+ data[i].response_data = NULL;
}
free(c->winning_headers);
c->winning_headers = NULL;
@@ -2427,7 +2554,7 @@ debuginfod_find_debuginfo (debuginfod_client *client,
const unsigned char *build_id, int build_id_len,
char **path)
{
- return debuginfod_query_server(client, build_id, build_id_len,
+ return debuginfod_query_server_by_buildid(client, build_id, build_id_len,
"debuginfo", NULL, path);
}
@@ -2438,7 +2565,7 @@ debuginfod_find_executable(debuginfod_client *client,
const unsigned char *build_id, int build_id_len,
char **path)
{
- return debuginfod_query_server(client, build_id, build_id_len,
+ return debuginfod_query_server_by_buildid(client, build_id, build_id_len,
"executable", NULL, path);
}
@@ -2447,7 +2574,7 @@ int debuginfod_find_source(debuginfod_client *client,
const unsigned char *build_id, int build_id_len,
const char *filename, char **path)
{
- return debuginfod_query_server(client, build_id, build_id_len,
+ return debuginfod_query_server_by_buildid(client, build_id, build_id_len,
"source", filename, path);
}
@@ -2456,8 +2583,8 @@ debuginfod_find_section (debuginfod_client *client,
const unsigned char *build_id, int build_id_len,
const char *section, char **path)
{
- int rc = debuginfod_query_server(client, build_id, build_id_len,
- "section", section, path);
+ int rc = debuginfod_query_server_by_buildid(client, build_id, build_id_len,
+ "section", section, path);
if (rc != -EINVAL && rc != -ENOSYS)
return rc;
/* NB: we fall through in case of ima:enforcing-filtered DEBUGINFOD_URLS servers,
@@ -2508,6 +2635,383 @@ debuginfod_find_section (debuginfod_client *client,
return rc;
}
+
+int debuginfod_find_metadata (debuginfod_client *client,
+ const char* key, char* value, char **path)
+{
+ (void) client;
+ (void) key;
+ (void) value;
+ (void) path;
+
+ char *server_urls = NULL;
+ char *urls_envvar = NULL;
+ char *cache_path = NULL;
+ char *target_cache_dir = NULL;
+ char *target_cache_path = NULL;
+ char *target_cache_tmppath = NULL;
+ char *target_file_name = NULL;
+ char *key_and_value = NULL;
+ int rc = 0, r;
+ int vfd = client->verbose_fd;
+ struct handle_data *data = NULL;
+
+ json_object *json_metadata = json_object_new_object();
+ json_bool json_metadata_complete = true;
+ json_object *json_metadata_arr = json_object_new_array();
+ if (NULL == json_metadata)
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+ json_object_object_add(json_metadata, "results",
+ json_metadata_arr ?: json_object_new_array() /* Empty array */);
+
+ if (NULL == value || NULL == key)
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (vfd >= 0)
+ dprintf (vfd, "debuginfod_find_metadata %s %s\n", key, value);
+
+ /* Without query-able URL, we can stop here*/
+ urls_envvar = getenv(DEBUGINFOD_URLS_ENV_VAR);
+ if (vfd >= 0)
+ dprintf (vfd, "server urls \"%s\"\n",
+ urls_envvar != NULL ? urls_envvar : "");
+ if (urls_envvar == NULL || urls_envvar[0] == '\0')
+ {
+ rc = -ENOSYS;
+ goto out;
+ }
+
+ /* set paths needed to perform the query
+ example format:
+ cache_path: $HOME/.cache
+ target_cache_dir: $HOME/.cache/metadata
+ target_cache_path: $HOME/.cache/metadata/KEYENCODED_VALUEENCODED
+ target_cache_path: $HOME/.cache/metadata/KEYENCODED_VALUEENCODED.XXXXXX
+ */
+
+ // libcurl > 7.62ish has curl_url_set()/etc. to construct these things more properly.
+ // curl_easy_escape() is older
+ {
+ CURL *c = curl_easy_init();
+ if (!c)
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+ char *key_escaped = curl_easy_escape(c, key, 0);
+ char *value_escaped = curl_easy_escape(c, value, 0);
+
+ // fallback to unescaped values in unlikely case of error
+ xalloc_str (key_and_value, "key=%s&value=%s", key_escaped ?: key, value_escaped ?: value);
+ xalloc_str (target_file_name, "%s_%s", key_escaped ?: key, value_escaped ?: value);
+ curl_free(value_escaped);
+ curl_free(key_escaped);
+ curl_easy_cleanup(c);
+ }
+
+ /* Check if we have a recent result already in the cache. */
+ cache_path = make_cache_path();
+ if (! cache_path)
+ goto out;
+ xalloc_str (target_cache_dir, "%s/metadata", cache_path);
+ (void) mkdir (target_cache_dir, 0700);
+ xalloc_str (target_cache_path, "%s/%s", target_cache_dir, target_file_name);
+ xalloc_str (target_cache_tmppath, "%s/%s.XXXXXX", target_cache_dir, target_file_name);
+
+ int fd = open(target_cache_path, O_RDONLY);
+ if (fd >= 0)
+ {
+ struct stat st;
+ int metadata_retention = 0;
+ time_t now = time(NULL);
+ char *metadata_retention_path = 0;
+
+ xalloc_str (metadata_retention_path, "%s/%s", cache_path, metadata_retention_filename);
+ if (metadata_retention_path)
+ {
+ rc = debuginfod_config_cache(client, metadata_retention_path,
+ metadata_retention_default_s, &st);
+ free (metadata_retention_path);
+ if (rc < 0)
+ rc = 0;
+ }
+ else
+ rc = 0;
+ metadata_retention = rc;
+
+ if (fstat(fd, &st) != 0)
+ {
+ rc = -errno;
+ close (fd);
+ goto out;
+ }
+
+ if (metadata_retention > 0 && (now - st.st_mtime <= metadata_retention))
+ {
+ if (client && client->verbose_fd >= 0)
+ dprintf (client->verbose_fd, "cached metadata %s", target_file_name);
+
+ if (path != NULL)
+ {
+ *path = target_cache_path; // pass over the pointer
+ target_cache_path = NULL; // prevent free() in our own cleanup
+ }
+
+ /* Success!!!! */
+ rc = fd;
+ goto out;
+ }
+
+ /* We don't have to clear the likely-expired cached object here
+ by unlinking. We will shortly make a new request and save
+ results right on top. Erasing here could trigger a TOCTOU
+ race with another thread just finishing a query and passing
+ its results back.
+ */
+ // (void) unlink (target_cache_path);
+
+ close (fd);
+ }
+
+ /* No valid cached metadata found: time to make the queries. */
+
+ free (client->url);
+ client->url = NULL;
+
+ long maxtime = 0;
+ const char *maxtime_envvar;
+ maxtime_envvar = getenv(DEBUGINFOD_MAXTIME_ENV_VAR);
+ if (maxtime_envvar != NULL)
+ maxtime = atol (maxtime_envvar);
+ if (maxtime && vfd >= 0)
+ dprintf(vfd, "using max time %lds\n", maxtime);
+
+ long timeout = default_timeout;
+ const char* timeout_envvar = getenv(DEBUGINFOD_TIMEOUT_ENV_VAR);
+ if (timeout_envvar != NULL)
+ timeout = atoi (timeout_envvar);
+ if (vfd >= 0)
+ dprintf (vfd, "using timeout %ld\n", timeout);
+
+ add_default_headers(client);
+
+ /* Make a copy of the envvar so it can be safely modified. */
+ server_urls = strdup(urls_envvar);
+ if (server_urls == NULL)
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Thereafter, goto out1 on error*/
+
+ char **server_url_list = NULL;
+ ima_policy_t* url_ima_policies = NULL;
+ char *server_url;
+ int num_urls = 0;
+ r = init_server_urls("metadata", NULL, server_urls, &server_url_list, &url_ima_policies, &num_urls, vfd);
+ if (0 != r)
+ {
+ rc = r;
+ goto out1;
+ }
+
+ CURLM *curlm = client->server_mhandle;
+ assert (curlm != NULL);
+
+ CURL *target_handle = NULL;
+ data = malloc(sizeof(struct handle_data) * num_urls);
+ if (data == NULL)
+ {
+ rc = -ENOMEM;
+ goto out1;
+ }
+
+ /* thereafter, goto out2 on error. */
+
+ /* Initialize handle_data */
+ for (int i = 0; i < num_urls; i++)
+ {
+ if ((server_url = server_url_list[i]) == NULL)
+ break;
+ if (vfd >= 0)
+ dprintf (vfd, "init server %d %s\n", i, server_url);
+
+ data[i].errbuf[0] = '\0';
+ data[i].target_handle = &target_handle;
+ data[i].client = client;
+ data[i].metadata = NULL;
+ data[i].metadata_size = 0;
+ data[i].response_data = NULL;
+ data[i].response_data_size = 0;
+
+ snprintf(data[i].url, PATH_MAX, "%s?%s", server_url, key_and_value);
+
+ r = init_handle(client, metadata_callback, header_callback, &data[i], i, timeout, vfd);
+ if (0 != r)
+ {
+ rc = r;
+ goto out2;
+ }
+ curl_multi_add_handle(curlm, data[i].handle);
+ }
+
+ /* Query servers */
+ if (vfd >= 0)
+ dprintf (vfd, "Starting %d queries\n",num_urls);
+ int committed_to;
+ r = perform_queries(curlm, NULL, data, client, num_urls, maxtime, 0, false, vfd, &committed_to);
+ if (0 != r)
+ {
+ rc = r;
+ goto out2;
+ }
+
+ /* NOTE: We don't check the return codes of the curl messages since
+ a metadata query failing silently is just fine. We want to know what's
+ available from servers which can be connected with no issues.
+ If running with additional verbosity, the failure will be noted in stderr */
+
+ /* Building the new json array from all the upstream data and
+ cleanup while at it.
+ */
+ for (int i = 0; i < num_urls; i++)
+ {
+ curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */
+ curl_easy_cleanup (data[i].handle);
+ free (data[i].response_data);
+
+ if (NULL == data[i].metadata)
+ {
+ if (vfd >= 0)
+ dprintf (vfd, "Query to %s failed with error message:\n\t\"%s\"\n",
+ data[i].url, data[i].errbuf);
+ json_metadata_complete = false;
+ continue;
+ }
+
+ json_object *upstream_metadata = json_tokener_parse(data[i].metadata);
+ json_object *upstream_complete;
+ json_object *upstream_metadata_arr;
+ if (NULL == upstream_metadata ||
+ !json_object_object_get_ex(upstream_metadata, "results", &upstream_metadata_arr) ||
+ !json_object_object_get_ex(upstream_metadata, "complete", &upstream_complete))
+ continue;
+ json_metadata_complete &= json_object_get_boolean(upstream_complete);
+ // Combine the upstream metadata into the json array
+ for (int j = 0, n = json_object_array_length(upstream_metadata_arr); j < n; j++)
+ {
+ json_object *entry = json_object_array_get_idx(upstream_metadata_arr, j);
+ json_object_get(entry); // increment reference count
+ json_object_array_add(json_metadata_arr, entry);
+ }
+ json_object_put(upstream_metadata);
+
+ free (data[i].metadata);
+ }
+
+ /* Because of race with cache cleanup / rmdir, try to mkdir/mkstemp up to twice. */
+ for (int i=0; i<2; i++)
+ {
+ /* (re)create target directory in cache */
+ (void) mkdir(target_cache_dir, 0700); /* files will be 0400 later */
+
+ /* NB: write to a temporary file first, to avoid race condition of
+ multiple clients checking the cache, while a partially-written or empty
+ file is in there, being written from libcurl. */
+ fd = mkstemp (target_cache_tmppath);
+ if (fd >= 0) break;
+ }
+ if (fd < 0) /* Still failed after two iterations. */
+ {
+ rc = -errno;
+ goto out1;
+ }
+
+ /* Plop the complete json_metadata object into the cache. */
+ json_object_object_add(json_metadata, "complete", json_object_new_boolean(json_metadata_complete));
+ const char* json_string = json_object_to_json_string_ext(json_metadata, JSON_C_TO_STRING_PRETTY);
+ if (json_string == NULL)
+ {
+ rc = -ENOMEM;
+ goto out1;
+ }
+ ssize_t res = write_retry (fd, json_string, strlen(json_string));
+ (void) lseek(fd, 0, SEEK_SET); // rewind file so client can read it from the top
+
+ /* NB: json_string is auto deleted when json_metadata object is nuked */
+ if (res < 0 || (size_t) res != strlen(json_string))
+ {
+ rc = -EIO;
+ goto out1;
+ }
+ /* PR27571: make cache files casually unwriteable; dirs are already 0700 */
+ (void) fchmod(fd, 0400);
+
+ /* rename tmp->real */
+ rc = rename (target_cache_tmppath, target_cache_path);
+ if (rc < 0)
+ {
+ rc = -errno;
+ goto out1;
+ /* Perhaps we need not give up right away; could retry or something ... */
+ }
+
+ /* don't close fd - we're returning it */
+ /* don't unlink the tmppath; it's already been renamed. */
+ if (path != NULL)
+ *path = strdup(target_cache_path);
+
+ rc = fd;
+ goto out1;
+
+/* error exits */
+out2:
+ /* remove all handles from multi */
+ for (int i = 0; i < num_urls; i++)
+ {
+ if (data[i].handle != NULL)
+ {
+ curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */
+ curl_easy_cleanup (data[i].handle);
+ free (data[i].response_data);
+ free (data[i].metadata);
+ }
+ }
+
+out1:
+ free(data);
+
+ for (int i = 0; i < num_urls; ++i)
+ free(server_url_list[i]);
+ free(server_url_list);
+ free(url_ima_policies);
+
+out:
+ free (server_urls);
+ json_object_put(json_metadata);
+ /* Reset sent headers */
+ curl_slist_free_all (client->headers);
+ client->headers = NULL;
+ client->user_agent_set_p = 0;
+
+ free (target_cache_dir);
+ free (target_cache_path);
+ free (target_cache_tmppath);
+ free (key_and_value);
+ free (target_file_name);
+ free (cache_path);
+
+ return rc;
+}
+
+
/* Add an outgoing HTTP header. */
int debuginfod_add_http_header (debuginfod_client *client, const char* header)
{
diff --git a/debuginfod/debuginfod-find.c b/debuginfod/debuginfod-find.c
index 080dd8f2c6a3..b0a7c2360dd8 100644
--- a/debuginfod/debuginfod-find.c
+++ b/debuginfod/debuginfod-find.c
@@ -1,6 +1,6 @@
/* Command-line frontend for retrieving ELF / DWARF / source files
from the debuginfod.
- Copyright (C) 2019-2020 Red Hat, Inc.
+ Copyright (C) 2019-2023 Red Hat, Inc.
This file is part of elfutils.
This file is free software; you can redistribute it and/or modify
@@ -30,7 +30,7 @@
#include <fcntl.h>
#include <gelf.h>
#include <libdwelf.h>
-
+#include <json-c/json.h>
/* Name and version of program. */
ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
@@ -49,9 +49,10 @@ static const char args_doc[] = N_("debuginfo BUILDID\n"
"executable PATH\n"
"source BUILDID /FILENAME\n"
"source PATH /FILENAME\n"
- "section BUILDID SECTION-NAME\n"
- "section PATH SECTION-NAME\n");
-
+ "section BUILDID SECTION-NAME\n"
+ "section PATH SECTION-NAME\n"
+ "metadata (glob|file|KEY) (GLOB|FILENAME|VALUE)\n"
+ );
/* Definitions of arguments for argp functions. */
static const struct argp_option options[] =
@@ -145,49 +146,60 @@ main(int argc, char** argv)
/* If we were passed an ELF file name in the BUILDID slot, look in there. */
unsigned char* build_id = (unsigned char*) argv[remaining+1];
int build_id_len = 0; /* assume text */
-
- int any_non_hex = 0;
- int i;
- for (i = 0; build_id[i] != '\0'; i++)
- if ((build_id[i] >= '0' && build_id[i] <= '9') ||
- (build_id[i] >= 'a' && build_id[i] <= 'f'))
- ;
- else
- any_non_hex = 1;
-
- int fd = -1;
Elf* elf = NULL;
- if (any_non_hex) /* raw build-id */
- {
- fd = open ((char*) build_id, O_RDONLY);
- if (fd < 0)
- fprintf (stderr, "Cannot open %s: %s\n", build_id, strerror(errno));
- }
- if (fd >= 0)
- {
- elf = dwelf_elf_begin (fd);
- if (elf == NULL)
- fprintf (stderr, "Cannot open as ELF file %s: %s\n", build_id,
- elf_errmsg (-1));
- }
- if (elf != NULL)
+
+ /* Process optional buildid given via ELF file name, for some query types only. */
+ if (strcmp(argv[remaining], "debuginfo") == 0
+ || strcmp(argv[remaining], "executable") == 0
+ || strcmp(argv[remaining], "source") == 0
+ || strcmp(argv[remaining], "section") == 0)
{
- const void *extracted_build_id;
- ssize_t s = dwelf_elf_gnu_build_id(elf, &extracted_build_id);
- if (s > 0)
+ int any_non_hex = 0;
+ int i;
+ for (i = 0; build_id[i] != '\0'; i++)
+ if ((build_id[i] >= '0' && build_id[i] <= '9') ||
+ (build_id[i] >= 'a' && build_id[i] <= 'f'))
+ ;
+ else
+ any_non_hex = 1;
+
+ int fd = -1;
+ if (any_non_hex) /* raw build-id */
{
- /* Success: replace the build_id pointer/len with the binary blob
- that elfutils is keeping for us. It'll remain valid until elf_end(). */
- build_id = (unsigned char*) extracted_build_id;
- build_id_len = s;
+ fd = open ((char*) build_id, O_RDONLY);
+ if (fd < 0)
+ fprintf (stderr, "Cannot open %s: %s\n", build_id, strerror(errno));
+ }
+ if (fd >= 0)
+ {
+ elf = dwelf_elf_begin (fd);
+ if (elf == NULL)
+ fprintf (stderr, "Cannot open as ELF file %s: %s\n", build_id,
+ elf_errmsg (-1));
+ }
+ if (elf != NULL)
+ {
+ const void *extracted_build_id;
+ ssize_t s = dwelf_elf_gnu_build_id(elf, &extracted_build_id);
+ if (s > 0)
+ {
+ /* Success: replace the build_id pointer/len with the binary blob
+ that elfutils is keeping for us. It'll remain valid until elf_end(). */
+ build_id = (unsigned char*) extracted_build_id;
+ build_id_len = s;
+ }
+ else
+ fprintf (stderr, "Cannot extract build-id from %s: %s\n", build_id, elf_errmsg(-1));
}
- else
- fprintf (stderr, "Cannot extract build-id from %s: %s\n", build_id, elf_errmsg(-1));
}
char *cache_name;
int rc = 0;
+ /* By default the stdout output is the path of the cached file.
+ Some requests (ex. metadata query may instead choose to do a different output,
+ in that case a stringified json object) */
+ bool print_cached_file = true;
/* Check whether FILETYPE is valid and call the appropriate
debuginfod_find_* function. If FILETYPE is "source"
then ensure a FILENAME was also supplied as an argument. */
@@ -221,6 +233,35 @@ main(int argc, char** argv)
rc = debuginfod_find_section(client, build_id, build_id_len,
argv[remaining+2], &cache_name);
}
+ else if (strcmp(argv[remaining], "metadata") == 0) /* no buildid! */
+ {
+ if (remaining+2 == argc)
+ {
+ fprintf(stderr, "Require KEY and VALUE for \"metadata\"\n");
+ return 1;
+ }
+
+ rc = debuginfod_find_metadata (client, argv[remaining+1], argv[remaining+2],
+ &cache_name);
+ /* We output a pprinted JSON object, not the regular debuginfod-find cached file path */
+ print_cached_file = false;
+ json_object *metadata = json_object_from_file(cache_name);
+ if(metadata)
+ {
+ printf("%s\n", json_object_to_json_string_ext(metadata,
+ JSON_C_TO_STRING_PRETTY
+#ifdef JSON_C_TO_STRING_NOSLASHESCAPE /* json-c 0.15 */
+ | JSON_C_TO_STRING_NOSLASHESCAPE
+#endif
+ ));
+ json_object_put(metadata);
+ }
+ else
+ {
+ fprintf(stderr, "%s does not contain a valid JSON format object\n", cache_name);
+ return 1;
+ }
+ }
else
{
argp_help (&argp, stderr, ARGP_HELP_USAGE, argv[0]);
@@ -240,8 +281,6 @@ main(int argc, char** argv)
debuginfod_end (client);
if (elf)
elf_end(elf);
- if (fd >= 0)
- close (fd);
if (rc < 0)
{
@@ -251,7 +290,7 @@ main(int argc, char** argv)
else
close (rc);
- printf("%s\n", cache_name);
+ if(print_cached_file) printf("%s\n", cache_name);
free (cache_name);
return 0;
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index d9259ad26bb8..305edde81021 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -76,6 +76,7 @@ extern "C" {
#include <netdb.h>
#include <math.h>
#include <float.h>
+#include <fnmatch.h>
/* If fts.h is included before config.h, its indirect inclusions may not
@@ -148,6 +149,7 @@ extern "C" {
#include "printversion.h"
#include "system.h"
}
+#include <json-c/json.h>
inline bool
@@ -220,7 +222,7 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
" foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
" primary key (buildid, file, mtime)\n"
" ) " WITHOUT_ROWID ";\n"
- // Index for faster delete by file identifier
+ // Index for faster delete by file identifier and metadata searches
"create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n"
"create table if not exists " BUILDIDS "_f_s (\n"
" buildid integer not null,\n"
@@ -246,6 +248,8 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
" ) " WITHOUT_ROWID ";\n"
// Index for faster delete by archive file identifier
"create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n"
+ // Index for metadata searches
+ "create index if not exists " BUILDIDS "_r_de_idx2 on " BUILDIDS "_r_de (content);\n"
"create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
" buildid integer not null,\n"
" artifactsrc integer not null,\n"
@@ -454,6 +458,9 @@ static const struct argp_option options[] =
#define ARGP_KEY_KOJI_SIGCACHE 0x100B
{ "koji-sigcache", ARGP_KEY_KOJI_SIGCACHE, NULL, 0, "Do a koji specific mapping of rpm paths to get IMA signatures.", 0 },
#endif
+#define ARGP_KEY_METADATA_MAXTIME 0x100C
+ { "metadata-maxtime", ARGP_KEY_METADATA_MAXTIME, "SECONDS", 0,
+ "Number of seconds to limit metadata query run time, 0=unlimited.", 0 },
{ NULL, 0, NULL, 0, NULL, 0 },
};
@@ -509,6 +516,7 @@ static long scan_checkpoint = 256;
#ifdef ENABLE_IMA_VERIFICATION
static bool requires_koji_sigcache_mapping = false;
#endif
+static unsigned metadata_maxtime_s = 5;
static void set_metric(const string& key, double value);
static void inc_metric(const string& key);
@@ -711,7 +719,10 @@ parse_opt (int key, char *arg,
case ARGP_SCAN_CHECKPOINT:
scan_checkpoint = atol (arg);
if (scan_checkpoint < 0)
- argp_failure(state, 1, EINVAL, "scan checkpoint");
+ argp_failure(state, 1, EINVAL, "scan checkpoint");
+ break;
+ case ARGP_KEY_METADATA_MAXTIME:
+ metadata_maxtime_s = (unsigned) atoi(arg);
break;
#ifdef ENABLE_IMA_VERIFICATION
case ARGP_KEY_KOJI_SIGCACHE:
@@ -2382,6 +2393,58 @@ handle_buildid_r_match (bool internal_req_p,
return r;
}
+void
+add_client_federation_headers(debuginfod_client *client, MHD_Connection* conn){
+ // Transcribe incoming User-Agent:
+ string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
+ string ua_complete = string("User-Agent: ") + ua;
+ debuginfod_add_http_header (client, ua_complete.c_str());
+
+ // Compute larger XFF:, for avoiding info loss during
+ // federation, and for future cyclicity detection.
+ string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
+ if (xff != "")
+ xff += string(", "); // comma separated list
+
+ unsigned int xff_count = 0;
+ for (auto&& i : xff){
+ if (i == ',') xff_count++;
+ }
+
+ // if X-Forwarded-For: exceeds N hops,
+ // do not delegate a local lookup miss to upstream debuginfods.
+ if (xff_count >= forwarded_ttl_limit)
+ throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
+and will not query the upstream servers");
+
+ // Compute the client's numeric IP address only - so can't merge with conninfo()
+ const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
+ MHD_CONNECTION_INFO_CLIENT_ADDRESS);
+ struct sockaddr *so = u ? u->client_addr : 0;
+ char hostname[256] = ""; // RFC1035
+ if (so && so->sa_family == AF_INET) {
+ (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
+ NI_NUMERICHOST);
+ } else if (so && so->sa_family == AF_INET6) {
+ struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
+ if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
+ struct sockaddr_in addr4;
+ memset (&addr4, 0, sizeof(addr4));
+ addr4.sin_family = AF_INET;
+ addr4.sin_port = addr6->sin6_port;
+ memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
+ (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
+ hostname, sizeof (hostname), NULL, 0,
+ NI_NUMERICHOST);
+ } else {
+ (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
+ NI_NUMERICHOST);
+ }
+ }
+
+ string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
+ debuginfod_add_http_header (client, xff_complete.c_str());
+}
static struct MHD_Response*
handle_buildid_match (bool internal_req_p,
@@ -2615,58 +2678,8 @@ handle_buildid (MHD_Connection* conn,
debuginfod_set_progressfn (client, & debuginfod_find_progress);
if (conn)
- {
- // Transcribe incoming User-Agent:
- string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
- string ua_complete = string("User-Agent: ") + ua;
- debuginfod_add_http_header (client, ua_complete.c_str());
-
- // Compute larger XFF:, for avoiding info loss during
- // federation, and for future cyclicity detection.
- string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
- if (xff != "")
- xff += string(", "); // comma separated list
-
- unsigned int xff_count = 0;
- for (auto&& i : xff){
- if (i == ',') xff_count++;
- }
+ add_client_federation_headers(client, conn);
- // if X-Forwarded-For: exceeds N hops,
- // do not delegate a local lookup miss to upstream debuginfods.
- if (xff_count >= forwarded_ttl_limit)
- throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
-and will not query the upstream servers");
-
- // Compute the client's numeric IP address only - so can't merge with conninfo()
- const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
- MHD_CONNECTION_INFO_CLIENT_ADDRESS);
- struct sockaddr *so = u ? u->client_addr : 0;
- char hostname[256] = ""; // RFC1035
- if (so && so->sa_family == AF_INET) {
- (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
- NI_NUMERICHOST);
- } else if (so && so->sa_family == AF_INET6) {
- struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
- if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- struct sockaddr_in addr4;
- memset (&addr4, 0, sizeof(addr4));
- addr4.sin_family = AF_INET;
- addr4.sin_port = addr6->sin6_port;
- memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
- (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
- hostname, sizeof (hostname), NULL, 0,
- NI_NUMERICHOST);
- } else {
- (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
- NI_NUMERICHOST);
- }
- }
-
- string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
- debuginfod_add_http_header (client, xff_complete.c_str());
- }
-
if (artifacttype == "debuginfo")
fd = debuginfod_find_debuginfo (client,
(const unsigned char*) buildid.c_str(),
@@ -2873,6 +2886,225 @@ handle_metrics (off_t* size)
return r;
}
+
+static struct MHD_Response*
+handle_metadata (MHD_Connection* conn,
+ string key, string value, off_t* size)
+{
+ MHD_Response* r;
+ sqlite3 *thisdb = dbq;
+
+ // Query locally for matching e, d files
+ string op;
+ if (key == "glob")
+ op = "glob";
+ else if (key == "file")
+ op = "=";
+ else
+ throw reportable_exception("/metadata webapi error, unsupported key");
+
+ // Since PR30378, the file names are segmented into two tables. We
+ // could do a glob/= search over the _files_v view that combines
+ // them, but that means that the entire _files_v thing has to be
+ // materialized & scanned to do the query. Slow! Instead, we can
+ // segment the incoming file/glob pattern into dirname / basename
+ // parts, and apply them to the corresponding table. This is done
+ // by splitting the value at the last "/". If absent, the same
+ // convention as is used in register_file_name().
+
+ string dirname, bname; // basename is a "poisoned" identifier on some distros
+ size_t slash = value.rfind('/');
+ if (slash == std::string::npos) {
+ dirname = "";
+ bname = value;
+ } else {
+ dirname = value.substr(0, slash);
+ bname = value.substr(slash+1);
+ }
+
+ // NB: further optimization is possible: replacing the 'glob' op
+ // with simple equality, if the corresponding value segment lacks
+ // metacharacters. sqlite may or may not be smart enough to do so,
+ // so we help out.
+ string metacharacters = "[]*?";
+ string dop = (op == "glob" && dirname.find_first_of(metacharacters) == string::npos) ? "=" : op;
+ string bop = (op == "glob" && bname.find_first_of(metacharacters) == string::npos) ? "=" : op;
+
+ string sql = string(
+ // explicit query r_de and f_de once here, rather than the query_d and query_e
+ // separately, because they scan the same tables, so we'd double the work
+ "select d1.executable_p, d1.debuginfo_p, 0 as source_p, "
+ " b1.hex, f1d.name || '/' || f1b.name as file, a1.name as archive "
+ "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1, " BUILDIDS "_fileparts f1b, " BUILDIDS "_fileparts f1d, "
+ BUILDIDS "_buildids b1, " BUILDIDS "_files_v a1 "
+ "where f1.id = d1.content and a1.id = d1.file and d1.buildid = b1.id "
+ " and f1d.name " + dop + " ? and f1b.name " + bop + " ? and f1.dirname = f1d.id and f1.basename = f1b.id "
+ "union all \n"
+ "select d2.executable_p, d2.debuginfo_p, 0, "
+ " b2.hex, f2d.name || '/' || f2b.name, NULL "
+ "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2, " BUILDIDS "_fileparts f2b, " BUILDIDS "_fileparts f2d, "
+ BUILDIDS "_buildids b2 "
+ "where f2.id = d2.file and d2.buildid = b2.id "
+ " and f2d.name " + dop + " ? and f2b.name " + bop + " ? "
+ " and f2.dirname = f2d.id and f2.basename = f2b.id");
+
+ // NB: we could query source file names too, thusly:
+ //
+ // select * from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f1, " BUILDIDS "_r_sref sr
+ // where b.id = sr.buildid and f1.id = sr.artifactsrc and f1.name " + op + "?"
+ // UNION ALL something with BUILDIDS "_f_s"
+ //
+ // But the first part of this query cannot run fast without the same index temp-created
+ // during "maxigroom":
+ // create index " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);
+ // and unfortunately this index is HUGE. It's similar to the size of the _r_sref
+ // table, which is already the largest part of a debuginfod index. Adding that index
+ // would nearly double the .sqlite db size.
+
+ sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-meta-glob", sql);
+ pp->reset();
+ pp->bind(1, dirname);
+ pp->bind(2, bname);
+ pp->bind(3, dirname);
+ pp->bind(4, bname);
+ unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
+
+ json_object *metadata = json_object_new_object();
+ if (!metadata) throw libc_exception(ENOMEM, "json allocation");
+ defer_dtor<json_object*,int> metadata_d(metadata, json_object_put);
+ json_object *metadata_arr = json_object_new_array();
+ if (!metadata_arr) throw libc_exception(ENOMEM, "json allocation");
+ json_object_object_add(metadata, "results", metadata_arr);
+ // consume all the rows
+ struct timespec ts_start;
+ clock_gettime (CLOCK_MONOTONIC, &ts_start);
+
+ int rc;
+ bool metadata_complete = true;
+ while (SQLITE_DONE != (rc = pp->step()))
+ {
+ // break out of loop if we have searched too long
+ struct timespec ts_end;
+ clock_gettime (CLOCK_MONOTONIC, &ts_end);
+ double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
+ if (metadata_maxtime_s > 0 && deltas > metadata_maxtime_s)
+ {
+ metadata_complete = false;
+ break;
+ }
+
+ if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step");
+
+ int m_executable_p = sqlite3_column_int (*pp, 0);
+ int m_debuginfo_p = sqlite3_column_int (*pp, 1);
+ int m_source_p = sqlite3_column_int (*pp, 2);
+ string m_buildid = (const char*) sqlite3_column_text (*pp, 3) ?: ""; // should always be non-null
+ string m_file = (const char*) sqlite3_column_text (*pp, 4) ?: "";
+ string m_archive = (const char*) sqlite3_column_text (*pp, 5) ?: "";
+
+ // Confirm that m_file matches in the fnmatch(FNM_PATHNAME)
+ // sense, since sqlite's GLOB operator is a looser filter.
+ if (key == "glob" && fnmatch(value.c_str(), m_file.c_str(), FNM_PATHNAME) != 0)
+ continue;
+
+ auto add_metadata = [metadata_arr, m_buildid, m_file, m_archive](const string& type) {
+ json_object* entry = json_object_new_object();
+ if (NULL == entry) throw libc_exception (ENOMEM, "cannot allocate json");
+ defer_dtor<json_object*,int> entry_d(entry, json_object_put);
+
+ auto add_entry_metadata = [entry](const char* k, string v) {
+ json_object* s;
+ if(v != "") {
+ s = json_object_new_string(v.c_str());
+ if (NULL == s) throw libc_exception (ENOMEM, "cannot allocate json");
+ json_object_object_add(entry, k, s);
+ }
+ };
+
+ add_entry_metadata("type", type.c_str());
+ add_entry_metadata("buildid", m_buildid);
+ add_entry_metadata("file", m_file);
+ if (m_archive != "") add_entry_metadata("archive", m_archive);
+ if (verbose > 3)
+ obatched(clog) << "metadata found local "
+ << json_object_to_json_string_ext(entry,
+ JSON_C_TO_STRING_PRETTY)
+ << endl;
+
+ // Increase ref count to switch its ownership
+ json_object_array_add(metadata_arr, json_object_get(entry));
+ };
+
+ if (m_executable_p) add_metadata("executable");
+ if (m_debuginfo_p) add_metadata("debuginfo");
+ if (m_source_p) add_metadata("source");
+ }
+ pp->reset();
+
+ unsigned num_local_results = json_object_array_length(metadata_arr);
+
+ // Query upstream as well
+ debuginfod_client *client = debuginfod_pool_begin();
+ if (client != NULL)
+ {
+ add_client_federation_headers(client, conn);
+
+ int upstream_metadata_fd;
+ char *upstream_metadata_file = NULL;
+ upstream_metadata_fd = debuginfod_find_metadata(client, key.c_str(), (char*)value.c_str(),
+ &upstream_metadata_file);
+ if (upstream_metadata_fd >= 0) {
+ /* json-c >= 0.13 has json_object_from_fd(). */
+ json_object *upstream_metadata_json = json_object_from_file(upstream_metadata_file);
+ free (upstream_metadata_file);
+ json_object *upstream_metadata_json_arr;
+ json_object *upstream_complete;
+ if (NULL != upstream_metadata_json &&
+ json_object_object_get_ex(upstream_metadata_json, "results", &upstream_metadata_json_arr) &&
+ json_object_object_get_ex(upstream_metadata_json, "complete", &upstream_complete))
+ {
+ metadata_complete &= json_object_get_boolean(upstream_complete);
+ for (int i = 0, n = json_object_array_length(upstream_metadata_json_arr); i < n; i++)
+ {
+ json_object *entry = json_object_array_get_idx(upstream_metadata_json_arr, i);
+ if (verbose > 3)
+ obatched(clog) << "metadata found remote "
+ << json_object_to_json_string_ext(entry,
+ JSON_C_TO_STRING_PRETTY)
+ << endl;
+
+ json_object_get(entry); // increment reference count
+ json_object_array_add(metadata_arr, entry);
+ }
+ json_object_put(upstream_metadata_json);
+ }
+ close(upstream_metadata_fd);
+ }
+ debuginfod_pool_end (client);
+ }
+
+ unsigned num_total_results = json_object_array_length(metadata_arr);
+
+ if (verbose > 2)
+ obatched(clog) << "metadata found local=" << num_local_results
+ << " remote=" << (num_total_results-num_local_results)
+ << " total=" << num_total_results
+ << endl;
+
+ json_object_object_add(metadata, "complete", json_object_new_boolean(metadata_complete));
+ const char* metadata_str = json_object_to_json_string(metadata);
+ if (!metadata_str)
+ throw libc_exception (ENOMEM, "cannot allocate json");
+ r = MHD_create_response_from_buffer (strlen(metadata_str),
+ (void*) metadata_str,
+ MHD_RESPMEM_MUST_COPY);
+ *size = strlen(metadata_str);
+ if (r)
+ add_mhd_response_header(r, "Content-Type", "application/json");
+ return r;
+}
+
+
static struct MHD_Response*
handle_root (off_t* size)
{
@@ -2939,6 +3171,7 @@ handler_cb (void * /*cls*/,
clock_gettime (CLOCK_MONOTONIC, &ts_start);
double afteryou = 0.0;
string artifacttype, suffix;
+ string urlargs; // for logging
try
{
@@ -3007,6 +3240,19 @@ handler_cb (void * /*cls*/,
inc_metric("http_requests_total", "type", artifacttype);
r = handle_metrics(& http_size);
}
+ else if (url1 == "/metadata")
+ {
+ tmp_inc_metric m ("thread_busy", "role", "http-metadata");
+ const char* key = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "key");
+ const char* value = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "value");
+ if (NULL == value || NULL == key)
+ throw reportable_exception("/metadata webapi error, need key and value");
+
+ urlargs = string("?key=") + string(key) + string("&value=") + string(value); // apprx., for logging
+ artifacttype = "metadata";
+ inc_metric("http_requests_total", "type", artifacttype);
+ r = handle_metadata(connection, key, value, &http_size);
+ }
else if (url1 == "/")
{
artifacttype = "/";
@@ -3043,7 +3289,7 @@ handler_cb (void * /*cls*/,
// afteryou: delay waiting for other client's identical query to complete
// deltas: total latency, including afteryou waiting
obatched(clog) << conninfo(connection)
- << ' ' << method << ' ' << url
+ << ' ' << method << ' ' << url << urlargs
<< ' ' << http_code << ' ' << http_size
<< ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
<< endl;
@@ -3396,6 +3642,7 @@ register_file_name(sqlite_ps& ps_upsert_fileparts,
dirname = name.substr(0, slash);
filename = name.substr(slash+1);
}
+ // NB: see also handle_metadata()
// intern the two substrings
ps_upsert_fileparts
@@ -4379,12 +4626,13 @@ void groom()
if (interrupted) return;
// NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
- sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
- g1.reset().step_ok_done();
- sqlite_ps g2 (db, "optimize", "pragma optimize");
- g2.reset().step_ok_done();
- sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
- g3.reset().step_ok_done();
+ { sqlite_ps g (db, "incremental vacuum", "pragma incremental_vacuum"); g.reset().step_ok_done(); }
+ // https://www.sqlite.org/lang_analyze.html#approx
+ { sqlite_ps g (db, "analyze setup", "pragma analysis_limit = 1000;\n"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "analyze", "analyze"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "analyze reload", "analyze sqlite_schema"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "optimize", "pragma optimize"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); g.reset().step_ok_done(); }
database_stats_report();
@@ -4769,6 +5017,8 @@ main (int argc, char *argv[])
if (maxigroom)
{
obatched(clog) << "maxigrooming database, please wait." << endl;
+ // NB: this index alone can nearly double the database size!
+ // NB: this index would be necessary to run source-file metadata searches fast
extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
diff --git a/debuginfod/debuginfod.h.in b/debuginfod/debuginfod.h.in
index 73f633f0b8e9..3936b17b97cf 100644
--- a/debuginfod/debuginfod.h.in
+++ b/debuginfod/debuginfod.h.in
@@ -63,9 +63,9 @@ debuginfod_client *debuginfod_begin (void);
it is a binary blob of given length.
If successful, return a file descriptor to the target, otherwise
- return a posix error code. If successful, set *path to a
- strdup'd copy of the name of the same file in the cache.
- Caller must free() it later. */
+ return a negative POSIX error code. If successful, set *path to a
+ strdup'd copy of the name of the same file in the cache. Caller
+ must free() it later. */
int debuginfod_find_debuginfo (debuginfod_client *client,
const unsigned char *build_id,
@@ -89,6 +89,27 @@ int debuginfod_find_section (debuginfod_client *client,
const char *section,
char **path);
+/* Query the urls contained in $DEBUGINFOD_URLS for metadata
+ with given query key/value.
+
+ If successful, return a file descriptor to the JSON document
+ describing matches, otherwise return a negative POSIX error code. If
+ successful, set *path to a strdup'd copy of the name of the same
+ file in the cache. Caller must free() it later.
+
+ key can be one of 'glob' or 'file' corresponding to querying for value
+ by exact name or using a pattern matching approach.
+
+ The JSON document will be of the form {results: [{...}, ...], complete: <bool>},
+ where the results are JSON objects containing metadata and complete is true iff
+ all of the federation of servers responded with complete results (as opposed to 1+
+ failing to return or having an issue)
+ */
+int debuginfod_find_metadata (debuginfod_client *client,
+ const char *key,
+ char* value,
+ char **path);
+
typedef int (*debuginfod_progressfn_t)(debuginfod_client *c, long a, long b);
void debuginfod_set_progressfn(debuginfod_client *c,
debuginfod_progressfn_t fn);
diff --git a/debuginfod/libdebuginfod.map b/debuginfod/libdebuginfod.map
index 6334373f01b0..9cee91cd79aa 100644
--- a/debuginfod/libdebuginfod.map
+++ b/debuginfod/libdebuginfod.map
@@ -22,3 +22,6 @@ ELFUTILS_0.188 {
debuginfod_get_headers;
debuginfod_find_section;
} ELFUTILS_0.183;
+ELFUTILS_0.192 {
+ debuginfod_find_metadata;
+} ELFUTILS_0.188;
diff --git a/doc/debuginfod-client-config.7 b/doc/debuginfod-client-config.7
index f16612084e9b..bb33fb0b8b6e 100644
--- a/doc/debuginfod-client-config.7
+++ b/doc/debuginfod-client-config.7
@@ -167,3 +167,11 @@ are short-circuited (returning an immediate failure instead of sending
a new query to servers). This accelerates queries that probably would
still fail. The default is 600, 10 minutes. 0 means "forget
immediately".
+
+.TP
+.B metadata_retention_s
+This control file sets how long to remember the results of a metadata
+query. New queries for the same artifacts within this time window are
+short-circuited (repeating the same results). This accelerates
+queries that probably would probably have the same results. The
+default is 3600, 1 hour. 0 means "do not retain".
diff --git a/doc/debuginfod-find.1 b/doc/debuginfod-find.1
index d7db1bfdd838..8c63b2c5a5e0 100644
--- a/doc/debuginfod-find.1
+++ b/doc/debuginfod-find.1
@@ -29,6 +29,8 @@ debuginfod-find \- request debuginfo-related data
.B debuginfod-find [\fIOPTION\fP]... source \fIBUILDID\fP \fI/FILENAME\fP
.br
.B debuginfod-find [\fIOPTION\fP]... source \fIPATH\fP \fI/FILENAME\fP
+.br
+.B debuginfod-find [\fIOPTION\fP]... metadata \fIKEY\fP \fIVALUE\fP
.SH DESCRIPTION
\fBdebuginfod-find\fP queries one or more \fBdebuginfod\fP servers for
@@ -119,6 +121,63 @@ l l.
\../bar/foo.c AT_comp_dir=/zoo/ source BUILDID /zoo//../bar/foo.c
.TE
+.SS metadata \fIKEY\fP \fIVALUE\fP
+
+All designated debuginfod servers are queried for metadata about files
+in their index. Different search keys may be supported by different
+servers.
+
+.TS
+l l l .
+KEY VALUE DESCRIPTION
+
+\fBfile\fP path exact match \fIpath\fP, including in archives
+\fBglob\fP pattern glob match \fIpattern\fP, including in archives
+.TE
+
+The resulting output will look something like the following
+{
+ "results":[
+ {
+ "type":"executable",
+ "buildid":"f0aa15b8aba4f3c28cac3c2a73801fefa644a9f2",
+ "file":"/usr/local/bin/hello",
+ "archive":"/opt/elfutils/tests/test-2290642/R/rhel7/hello2-1.0-2.x86_64.rpm"
+ },
+ {
+ "type":"executable",
+ "buildid":"bc1febfd03ca05e030f0d205f7659db29f8a4b30",
+ "file":"hello2"
+ }
+ ],
+ "complete":true
+}'
+
+The results of the search are output to \fBstdout\fP as a JSON object
+containing an array of objects, supplying metadata about each match, as
+well as a boolean value corresponding to the completeness of the result.
+The result is considered complete if all of the queries to upstream servers
+returned complete results and the local query succeeded. This metadata report
+may be cached. It may be incomplete and may contain duplicates.
+Additional JSON object fields may be present.
+
+.TS
+l l l .
+NAME TYPE DESCRIPTION
+
+\fBbuildid\fP string hexadecimal buildid associated with the file
+\fBtype\fP string one of \fBdebuginfo\fP or \fBexecutable\fP
+\fBfile\fP string matched file name, outside or inside the archive
+\fBarchive\fP string archive containing matched file name, if any
+.TE
+
+It's worth noting that \fBtype\fP cannot be \fBsource\fP since in order
+to perform such a search fast enough additional indexing would need to be added to
+the database which would nearly double it's size.
+
+The search also always combines both files and archives in the results
+and at this time further granularity is not availible.
+
.SH "OPTIONS"
.TP
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index 577f58b6ee2e..f35ce6c1a9ca 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -132,6 +132,14 @@ scanner/groomer server and multiple passive ones, thereby sharing
service load. Archive pattern options must still be given, so
debuginfod can recognize file name extensions for unpacking.
+.TP
+.B "\-\-metadata\-maxtime=SECONDS"
+Impose a limit on the runtime of metadata webapi queries. These
+queries, especially broad "glob" wildcards, can take a large amount of
+time and produce large results. Public-facing servers may need to
+throttle them. The default limit is 5 seconds. Set 0 to disable this
+limit.
+
.TP
.B "\-D SQL" "\-\-ddl=SQL"
Execute given sqlite statement after the database is opened and
@@ -421,6 +429,16 @@ variety of statistics about the operation of the debuginfod server.
The exact set of metrics and their meanings may change in future
versions.
+.SS /metadata?key=\fIKEY\fP&value=\fIVALUE\fP
+
+This endpoint triggers a search of the files in the index plus any
+upstream federated servers, based on given key and value. If
+successful, the result is a application/json textual array, listing
+metadata for the matched files. See \fIdebuginfod-find(1)\fP for
+documentation of the common key/value search parameters, and the
+resulting data schema.
+
+
.SH DATA MANAGEMENT
debuginfod stores its index in an sqlite database in a densely packed
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 4547d95de76c..3cc9ded43b6a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -266,12 +266,13 @@ TESTS += run-debuginfod-dlopen.sh \
run-debuginfod-federation-sqlite.sh \
run-debuginfod-federation-link.sh \
run-debuginfod-percent-escape.sh \
- run-debuginfod-x-forwarded-for.sh \
- run-debuginfod-response-headers.sh \
- run-debuginfod-extraction-passive.sh \
+ run-debuginfod-x-forwarded-for.sh \
+ run-debuginfod-response-headers.sh \
+ run-debuginfod-extraction-passive.sh \
run-debuginfod-webapi-concurrency.sh \
run-debuginfod-section.sh \
- run-debuginfod-IXr.sh
+ run-debuginfod-IXr.sh \
+ run-debuginfod-find-metadata.sh
endif
if !OLD_LIBMICROHTTPD
# Will crash on too old libmicrohttpd
@@ -603,7 +604,8 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
run-debuginfod-webapi-concurrency.sh \
run-debuginfod-section.sh \
run-debuginfod-IXr.sh \
- run-debuginfod-ima-verification.sh \
+ run-debuginfod-ima-verification.sh \
+ run-debuginfod-find-metadata.sh \
debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm \
debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm \
debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm \
diff --git a/tests/debuginfod-subr.sh b/tests/debuginfod-subr.sh
index c3b0603ddb2e..000e27708192 100755
--- a/tests/debuginfod-subr.sh
+++ b/tests/debuginfod-subr.sh
@@ -26,6 +26,7 @@ type curl 2>/dev/null || (echo "need curl"; exit 77)
type rpm2cpio 2>/dev/null || (echo "need rpm2cpio"; exit 77)
type cpio 2>/dev/null || (echo "need cpio"; exit 77)
type bzcat 2>/dev/null || (echo "need bzcat"; exit 77)
+type ss 2>/dev/null || (echo "need ss"; exit 77)
bsdtar --version | grep -q zstd && zstd=true || zstd=false
echo "zstd=$zstd bsdtar=`bsdtar --version`"
diff --git a/tests/run-debuginfod-find-metadata.sh b/tests/run-debuginfod-find-metadata.sh
new file mode 100755
index 000000000000..f19c5a6e6942
--- /dev/null
+++ b/tests/run-debuginfod-find-metadata.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/debuginfod-subr.sh
+
+# for test case debugging, uncomment:
+set -x
+unset VALGRIND_CMD
+# VALGRIND_CMD="valgrind --enable-debuginfod=no"
+
+type curl 2>/dev/null || { echo "need curl"; exit 77; }
+type jq 2>/dev/null || { echo "need jq"; exit 77; }
+
+pkg-config json-c libcurl || { echo "one or more libraries are missing (libjson-c, libcurl)"; exit 77; }
+
+DB=${PWD}/.debuginfod_tmp.sqlite
+export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
+tempfiles $DB ${DB}_2
+
+# This variable is essential and ensures no time-race for claiming ports occurs
+# set base to a unique multiple of 100 not used in any other 'run-debuginfod-*' test
+base=13100
+get_ports
+mkdir R D
+cp -rvp ${abs_srcdir}/debuginfod-rpms/rhel7 R
+cp -rvp ${abs_srcdir}/debuginfod-debs/*deb D
+
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -R \
+ -d $DB -p $PORT1 -t0 -g0 R > vlog$PORT1 2>&1 &
+PID1=$!
+tempfiles vlog$PORT1
+errfiles vlog$PORT1
+
+wait_ready $PORT1 'ready' 1
+wait_ready $PORT1 'thread_work_total{role="traverse"}' 1
+wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
+wait_ready $PORT1 'thread_busy{role="scan"}' 0
+
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS="http://127.0.0.1:$PORT1 https://bad/url.web" ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -U \
+ -d ${DB}_2 -p $PORT2 -t0 -g0 D > vlog$PORT2 2>&1 &
+PID2=$!
+tempfiles vlog$PORT2
+errfiles vlog$PORT2
+
+wait_ready $PORT2 'ready' 1
+wait_ready $PORT2 'thread_work_total{role="traverse"}' 1
+wait_ready $PORT2 'thread_work_pending{role="scan"}' 0
+wait_ready $PORT2 'thread_busy{role="scan"}' 0
+
+# have clients contact the new server
+export DEBUGINFOD_URLS=http://127.0.0.1:$PORT2
+
+tempfiles json.txt
+# Check that we find correct number of files, both via local and federated links
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod-find metadata glob "/u?r/bin/*"`
+echo $RESULTJ
+N_FOUND=`echo $RESULTJ | jq '.results | length'`
+test $N_FOUND -eq 1
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod-find metadata glob "/usr/lo?al/bin/*"`
+echo $RESULTJ
+N_FOUND=`echo $RESULTJ | jq '.results | length'`
+test $N_FOUND -eq 2
+
+
+# Query via the webapi as well
+curl http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*'
+test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.results[0].buildid == "f17a29b5a25bd4960531d82aa6b07c8abe84fa66"'` = 'true'
+test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.results[0].file == "/usr/bin/hithere"'` = 'true'
+test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.results[0].archive | test(".*hithere.*deb")'` = 'true'
+# Note we query the upstream server too, since the downstream will have an incomplete result due to the badurl
+test `curl -s http://127.0.0.1:$PORT1'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.complete == true'` = 'true'
+test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.complete == false'` = 'true'
+
+# An empty array is returned on server error or if the file DNE
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod-find metadata file "/this/isnt/there"`
+echo $RESULTJ
+test `echo $RESULTJ | jq ".results == [ ]" ` = 'true'
+
+kill $PID1
+kill $PID2
+wait $PID1
+wait $PID2
+PID1=0
+PID2=0
+
+# check it's still in cache
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod-find metadata file "/usr/bin/hithere"`
+echo $RESULTJ
+test `echo $RESULTJ | jq ".results == [ ]" ` = 'true'
+
+# invalidate cache, retry previously successful query to now-dead servers
+echo 0 > $DEBUGINFOD_CACHE_PATH/metadata_retention_s
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod-find metadata glob "/u?r/bin/*"`
+echo $RESULTJ
+test `echo $RESULTJ | jq ".results == [ ]" ` = 'true'
+test `echo $RESULTJ | jq ".complete == false" ` = 'true'
+
+exit 0
More information about the Elfutils-devel
mailing list