PR25369 slice 3/3: debuginfod header relay

Frank Ch. Eigler fche@redhat.com
Wed Mar 25 03:49:36 GMT 2020


Hi -

This is the last of three bits for the month-ago PR25369 patchset.

Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Tue Mar 24 23:46:30 2020 -0400

    debuginfod: User-Agent and X-Forwarded-For header relay
    
    Extend the debuginfod client API with a function to stuff outgoing
    headers into libcurl.  Use this from debuginfod so federated trees
    of debuginfod/httpd can trace back to to the originating client
    for administrative purposes.  Docs & test included.
    
    Signed-off-by: Frank Ch. Eigler <fche@redhat.com>

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 00e7ec63232e..44c1349bfd80 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,15 @@
+2020-03-24  Frank Ch. Eigler  <fche@redhat.com>
+
+	* debuginfod.h, libdebuginfod.map: New functions for _add_url_header.
+	* debuginfod-client.c (struct debuginfod_client): Add headers fields.
+	(debuginfod_add_http_header): New client api to add outgoing headers.
+	(add_default_headers): Renamed from add_extra_headers, skip if flag.
+	(debuginfod_query_server): Pass accumulated headers to libcurl.
+	(debuginfod_end): Clean accumulated headers.
+	(debuginfod_find_*): Add default headers at this point.
+	* debuginfod.cxx (handle_buildid): Add conn pointer.  Use it to relay
+	incoming UA and XFF headers to federated upstream debuginfods.
+
 2020-03-24  Frank Ch. Eigler  <fche@redhat.com>
 
 	* debuginfod-find.c (main): Correct /source full-pathness check for
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index 58a04b9a734b..6517cb72432f 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -85,6 +85,10 @@ struct debuginfod_client
   /* Stores current/last url, if any. */
   char* url;
 
+  /* Accumulates outgoing http header names/values. */
+  int user_agent_set_p; /* affects add_default_headers */
+  struct curl_slist *headers;
+
   /* Can contain all other context, like cache_path, server_urls,
      timeout or other info gotten from environment variables, the
      handle data, etc. So those don't have to be reparsed and
@@ -311,8 +315,11 @@ debuginfod_clean_cache(debuginfod_client *c,
 
 
 static void
-add_extra_headers(CURL *handle)
+add_default_headers(debuginfod_client *client)
 {
+  if (client->user_agent_set_p)
+    return;
+
   /* Compute a User-Agent: string to send.  The more accurately this
      describes this host, the likelier that the debuginfod servers
      might be able to locate debuginfo for us. */
@@ -372,7 +379,7 @@ add_extra_headers(CURL *handle)
     }
 
   char *ua = NULL;
-  rc = asprintf(& ua, "%s/%s,%s,%s/%s",
+  rc = asprintf(& ua, "User-Agent: %s/%s,%s,%s/%s",
                 PACKAGE_NAME, PACKAGE_VERSION,
                 utspart ?: "",
                 id ?: "",
@@ -381,7 +388,7 @@ add_extra_headers(CURL *handle)
     ua = NULL;
 
   if (ua)
-    curl_easy_setopt(handle, CURLOPT_USERAGENT, (void*) ua); /* implicit strdup */
+    (void) debuginfod_add_http_header (client, ua);
 
   free (ua);
   free (id);
@@ -683,7 +690,7 @@ debuginfod_query_server (debuginfod_client *c,
       curl_easy_setopt(data[i].handle, CURLOPT_NOSIGNAL, (long) 1);
       curl_easy_setopt(data[i].handle, CURLOPT_AUTOREFERER, (long) 1);
       curl_easy_setopt(data[i].handle, CURLOPT_ACCEPT_ENCODING, "");
-      add_extra_headers(data[i].handle);
+      curl_easy_setopt(data[i].handle, CURLOPT_HTTPHEADER, c->headers);
 
       curl_multi_add_handle(curlm, data[i].handle);
       server_url = strtok_r(NULL, url_delim, &strtok_saveptr);
@@ -969,6 +976,8 @@ debuginfod_get_url(debuginfod_client *client)
 void
 debuginfod_end (debuginfod_client *client)
 {
+  if (client) /* make it safe to be invoked with NULL */
+    curl_slist_free_all (client->headers);
   free (client->url);
   free (client);
 }
@@ -978,6 +987,7 @@ debuginfod_find_debuginfo (debuginfod_client *client,
 			   const unsigned char *build_id, int build_id_len,
                            char **path)
 {
+  add_default_headers(client);
   return debuginfod_query_server(client, build_id, build_id_len,
                                  "debuginfo", NULL, path);
 }
@@ -989,6 +999,7 @@ debuginfod_find_executable(debuginfod_client *client,
 			   const unsigned char *build_id, int build_id_len,
                            char **path)
 {
+  add_default_headers(client);
   return debuginfod_query_server(client, build_id, build_id_len,
                                  "executable", NULL, path);
 }
@@ -998,11 +1009,29 @@ int debuginfod_find_source(debuginfod_client *client,
 			   const unsigned char *build_id, int build_id_len,
                            const char *filename, char **path)
 {
+  add_default_headers(client);
   return debuginfod_query_server(client, build_id, build_id_len,
                                  "source", filename, path);
 }
 
 
+/* Add an outgoing HTTP header.  */
+int debuginfod_add_http_header (debuginfod_client *client, const char* header)
+{
+  struct curl_slist *temp = curl_slist_append (client->headers, header);
+  if (temp == NULL)
+    return -ENOMEM;
+
+  /* Track if User-Agent: is being set.  If so, signal not to add the
+     default one. */
+  if (strncmp (header, "User-Agent:", 11) == 0)
+    client->user_agent_set_p = 1;
+
+  client->headers = temp;
+  return 0;
+}
+
+
 void
 debuginfod_set_progressfn(debuginfod_client *client,
 			  debuginfod_progressfn_t fn)
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 7c7e85eb6d14..96fbe8bc87fb 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -1347,11 +1347,12 @@ debuginfod_find_progress (debuginfod_client *, long a, long b)
 }
 
 
-static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */,
-                                            const string& artifacttype /* unsafe */,
-                                            const string& suffix /* unsafe */,
-                                            int *result_fd
-                                            )
+static struct MHD_Response*
+handle_buildid (MHD_Connection* conn,
+                const string& buildid /* unsafe */,
+                const string& artifacttype /* unsafe */,
+                const string& suffix /* unsafe */,
+                int *result_fd)
 {
   // validate artifacttype
   string atype_code;
@@ -1435,6 +1436,35 @@ static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */,
     {
       debuginfod_set_progressfn (client, & debuginfod_find_progress);
 
+      if (conn)
+        {
+          // Transcribe incoming User-Agent:
+          string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
+          string ua_complete = string("User-Agent: ") + ua;
+          debuginfod_add_http_header (client, ua_complete.c_str());
+
+          // Compute larger XFF:, for avoiding info loss during
+          // federation, and for future cyclicity detection.
+          string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
+          if (xff != "")
+            xff += string(", "); // comma separated list
+
+          // Compute the client's numeric IP address only - so can't merge with conninfo()
+          const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
+                                                                       MHD_CONNECTION_INFO_CLIENT_ADDRESS);
+          struct sockaddr *so = u ? u->client_addr : 0;
+          char hostname[256] = ""; // RFC1035
+          if (so && so->sa_family == AF_INET)
+            (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
+                                NI_NUMERICHOST);
+          else if (so && so->sa_family == AF_INET6)
+            (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
+                                NI_NUMERICHOST);
+
+          string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
+          debuginfod_add_http_header (client, xff_complete.c_str());
+        }
+
       if (artifacttype == "debuginfo")
 	fd = debuginfod_find_debuginfo (client,
 					(const unsigned char*) buildid.c_str(),
@@ -1632,7 +1662,8 @@ handler_cb (void * /*cls*/,
             }
 
           inc_metric("http_requests_total", "type", artifacttype);
-          r = handle_buildid(buildid, artifacttype, suffix, 0); // NB: don't care about result-fd
+          r = handle_buildid(connection, buildid,
+                             artifacttype, suffix, 0); // NB: don't care about result-fd
         }
       else if (url1 == "/metrics")
         {
@@ -1701,7 +1732,7 @@ dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
           struct MHD_Response *r = 0;
           try
             {
-              r = handle_buildid (buildid, "debuginfo", "", &alt_fd);
+              r = handle_buildid (0, buildid, "debuginfo", "", &alt_fd);
             }
           catch (const reportable_exception& e)
             {
diff --git a/debuginfod/debuginfod.h b/debuginfod/debuginfod.h
index 2ace5f350488..daedb9bf31eb 100644
--- a/debuginfod/debuginfod.h
+++ b/debuginfod/debuginfod.h
@@ -84,6 +84,9 @@ void* debuginfod_get_user_data (debuginfod_client *client);
 /* Get the current or last active URL, if known.  */
 const char* debuginfod_get_url (debuginfod_client *client);
 
+/* Add an outgoing HTTP request  "Header: Value".  Copies string.  */
+int debuginfod_add_http_header (debuginfod_client *client, const char* header);
+  
 /* Release debuginfod client connection context handle.  */
 void debuginfod_end (debuginfod_client *client);
 
diff --git a/debuginfod/libdebuginfod.map b/debuginfod/libdebuginfod.map
index d84e8924f7f8..b8edfb016054 100644
--- a/debuginfod/libdebuginfod.map
+++ b/debuginfod/libdebuginfod.map
@@ -13,4 +13,5 @@ ELFUTILS_0.179 {
   debuginfod_set_user_data;
   debuginfod_get_user_data;
   debuginfod_get_url;
+  debuginfod_add_http_header;
 };
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 59809ea8a1e2..91e3e321a2ea 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,9 @@
+2020-03-24  Frank Ch. Eigler  <fche@redhat.com>
+
+	* debuginfod_add_http_header.3: New function, documented ...
+	* debuginfod_find_debuginfo.3: ... here.
+	* Makefile.am (notrans_dist_*_man3): Add it.
+
 2020-03-22  Frank Ch. Eigler  <fche@redhat.com>
 
 	* debuginfod_get_url.3: New function, documented ...
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 38b8226d775c..f0c7e55dfa41 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -24,6 +24,7 @@ notrans_dist_man1_MANS=
 
 if DEBUGINFOD
 notrans_dist_man8_MANS += debuginfod.8
+notrans_dist_man3_MANS += debuginfod_add_http_header.3
 notrans_dist_man3_MANS += debuginfod_begin.3
 notrans_dist_man3_MANS += debuginfod_end.3
 notrans_dist_man3_MANS += debuginfod_find_debuginfo.3
diff --git a/doc/debuginfod_add_http_header.3 b/doc/debuginfod_add_http_header.3
new file mode 100644
index 000000000000..16279936e2ea
--- /dev/null
+++ b/doc/debuginfod_add_http_header.3
@@ -0,0 +1 @@
+.so man3/debuginfod_find_debuginfo.3
diff --git a/doc/debuginfod_find_debuginfo.3 b/doc/debuginfod_find_debuginfo.3
index f7ec6cc134ba..828824fb0dc3 100644
--- a/doc/debuginfod_find_debuginfo.3
+++ b/doc/debuginfod_find_debuginfo.3
@@ -54,6 +54,8 @@ OPTIONAL FUNCTIONS
 .BI "                              void *" data ");"
 .BI "void* debuginfod_get_user_data(debuginfod_client *" client ");"
 .BI "const char* debuginfod_get_url(debuginfod_client *" client ");"
+.BI "int debuginfod_add_http_header(debuginfod_client *" client ","
+.BI "                               const char* " header ");"
 
 .SH DESCRIPTION
 
@@ -159,6 +161,21 @@ The resulting string is owned by the library, and must not be modified
 or freed.  The caller should copy it if it is needed beyond the release
 of the client object.
 
+.SS "HTTP HEADER"
+
+Before a lookup function is initiated, a client application may
+add HTTP request headers to future downloads.
+.BR \%debuginfod_add_http_header ()
+may be called with strings of the form
+.BR \%"Header:\~value" .
+These strings are copied by the library.  A zero return value
+indicates success, but out-of-memory conditions may result in
+a non-zero \fI-ENOMEM\fP.
+
+By default, the library adds a descriptive \fIUser-Agent:\fP
+header to outgoing requests.  If the client application adds
+a header with the same name, this default is suppressed.
+
 .SH "CACHE"
 If the query is successful, the \fBdebuginfod_find_*\fP() functions save
 the target file to a local cache. The location of the cache is controlled
diff --git a/tests/ChangeLog b/tests/ChangeLog
index d0d32a87315a..ea8ae1223578 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,8 @@
+2020-03-24  Frank Ch. Eigler  <fche@redhat.com>
+
+	* run-debuginfod-find.sh: Test relay of UA and XFF headers across
+	federating debuginfods.
+
 2020-03-23  Mark Wielaard  <mark@klomp.org>
 
 	* getphdrnum.c: Include config.h.
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index b64130282d86..a86572c87656 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -93,8 +93,9 @@ wait_ready()
   fi
 }
 
-env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat R F Z L &
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
 PID1=$!
+tempfiles vlog4
 # Server must become ready
 wait_ready $PORT1 'ready' 1
 export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1/   # or without trailing /
@@ -366,6 +367,14 @@ fi
 rm -rf $DEBUGINFOD_CACHE_PATH
 testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID
 
+# send a request to stress XFF and User-Agent federation relay;
+# we'll grep for the two patterns in vlog4
+curl -s -H 'User-Agent: TESTCURL' -H 'X-Forwarded-For: TESTXFF' $DEBUGINFOD_URLS/buildid/deaddeadbeef00000000/debuginfo -o /dev/null || true
+
+grep -q UA:TESTCURL vlog4
+grep -q XFF:TESTXFF vlog4
+
+
 # confirm that first server can't resolve symlinked info in L/ but second can
 BUILDID=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \
          -a L/foo | grep 'Build ID' | cut -d ' ' -f 7`



More information about the Elfutils-devel mailing list