PATCH: PR27701: debuginfod clients with long-lived curl handles

Frank Ch. Eigler fche@redhat.com
Fri Apr 23 19:43:09 GMT 2021


Hi -

Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Fri Apr 23 13:04:26 2021 -0400

    PR27701: debuginfod client: encourage reused debuginfod_client objects
    
    Client objects now carry long-lived curl handles for outgoing
    connections.  This makes it more efficient for multiple sequential
    queries, because the TCP connections and/or TLS state info are kept
    around awhile, avoiding O(100ms) setup latencies.  debuginfod is
    adjusted to take advantage of this for federation.  Other clients
    should gradually do this too, perhaps including elfutils itself (in
    the libdwfl->debuginfod_client hooks).
    
    A large gdb session with 117 debuginfo downloads was observed to run
    twice as fast (45s vs. 1m30s wall-clock time), just in nuking this
    extra setup latency.  This was tested via a debuginfod intermediary:
    it should be even faster once gdb reuses its own debuginfod_client.
    
    Signed-off-by: Frank Ch. Eigler <fche@redhat.com>

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index ed2f77cfaece..ad9dbc0a8a74 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,19 @@
+2021-04-23  Frank Ch. Eigler <fche@redhat.com>
+
+	PR27701
+	* debuginfod-client.c (struct debuginfod_client): Add long-lived
+	CURL easy and multi handles.
+	(debuginfo_begin,debuginfod_end): ctor/dtor for these.
+	(debuginfod_query_server): Rework to reuse easy & multi handles.
+	(*_envvar): Just use the DEBUGINFOD_*_ENV_VAR directly instead.
+
+	* debuginfod.cxx (dc_pool): New pile of reusable debuginfod_client
+	objects for upstream federation connections.
+	(debuginfod_pool_{begin,end,groom}): New functions.
+	(handle_buildid): Use them.
+	(handler_cb): Fix keep-alive given libmicrohttpd convention of multiple
+	callbacks.
+
 2021-04-15  Frank Ch. Eigler <fche@redhat.com>
 
 	* debuginfod.cxx (groom): Only update database stats once.
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index d5e7bbdfbe1b..7fdc6c9f30ec 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -119,6 +119,11 @@ struct debuginfod_client
   /* File descriptor to output any verbose messages if > 0.  */
   int verbose_fd;
 
+  /* Count DEBUGINFOD_URLS elements and corresponding curl handles. */
+  int num_urls;
+  CURL **server_handles;
+  CURLM *server_mhandle;
+  
   /* Can contain all other context, like cache_path, server_urls,
      timeout or other info gotten from environment variables, the
      handle data, etc. So those don't have to be reparsed and
@@ -140,15 +145,12 @@ static const time_t cache_default_max_unused_age_s = 604800; /* 1 week */
    The default parent directory is $HOME, or '/' if $HOME doesn't exist.  */
 static const char *cache_default_name = ".debuginfod_client_cache";
 static const char *cache_xdg_name = "debuginfod_client";
-static const char *cache_path_envvar = DEBUGINFOD_CACHE_PATH_ENV_VAR;
 
 /* URLs of debuginfods, separated by url_delim. */
-static const char *server_urls_envvar = DEBUGINFOD_URLS_ENV_VAR;
 static const char *url_delim =  " ";
 static const char url_delim_char = ' ';
 
 /* Timeout for debuginfods, in seconds (to get at least 100K). */
-static const char *server_timeout_envvar = DEBUGINFOD_TIMEOUT_ENV_VAR;
 static const long default_timeout = 90;
 
 
@@ -523,7 +525,13 @@ debuginfod_query_server (debuginfod_client *c,
 
   /* Is there any server we can query?  If not, don't do any work,
      just return with ENOSYS.  Don't even access the cache.  */
-  urls_envvar = getenv(server_urls_envvar);
+  if (c->num_urls == 0)
+    {
+      rc = -ENOSYS;
+      goto out;
+    }
+  
+  urls_envvar = getenv(DEBUGINFOD_URLS_ENV_VAR);
   if (vfd >= 0)
     dprintf (vfd, "server urls \"%s\"\n",
 	     urls_envvar != NULL ? urls_envvar : "");
@@ -611,7 +619,7 @@ debuginfod_query_server (debuginfod_client *c,
 
   /* Determine location of the cache. The path specified by the debuginfod
      cache environment variable takes priority.  */
-  char *cache_var = getenv(cache_path_envvar);
+  char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR);
   if (cache_var != NULL && strlen (cache_var) > 0)
     xalloc_str (cache_path, "%s", cache_var);
   else
@@ -691,7 +699,7 @@ debuginfod_query_server (debuginfod_client *c,
     }
 
   long timeout = default_timeout;
-  const char* timeout_envvar = getenv(server_timeout_envvar);
+  const char* timeout_envvar = getenv(DEBUGINFOD_TIMEOUT_ENV_VAR);
   if (timeout_envvar != NULL)
     timeout = atoi (timeout_envvar);
 
@@ -725,24 +733,13 @@ debuginfod_query_server (debuginfod_client *c,
       goto out0;
     }
 
-  /* Count number of URLs.  */
-  int num_urls = 0;
-  for (int i = 0; server_urls[i] != '\0'; i++)
-    if (server_urls[i] != url_delim_char
-        && (i == 0 || server_urls[i - 1] == url_delim_char))
-      num_urls++;
-
-  CURLM *curlm = curl_multi_init();
-  if (curlm == NULL)
-    {
-      rc = -ENETUNREACH;
-      goto out0;
-    }
-
+  CURLM *curlm = c->server_mhandle;
+  assert (curlm != NULL);
+  
   /* Tracks which handle should write to fd. Set to the first
      handle that is ready to write the target file to the cache.  */
   CURL *target_handle = NULL;
-  struct handle_data *data = malloc(sizeof(struct handle_data) * num_urls);
+  struct handle_data *data = malloc(sizeof(struct handle_data) * c->num_urls);
   if (data == NULL)
     {
       rc = -ENOMEM;
@@ -752,7 +749,7 @@ debuginfod_query_server (debuginfod_client *c,
   /* thereafter, goto out1 on error.  */
 
   /* Initialize handle_data with default values. */
-  for (int i = 0; i < num_urls; i++)
+  for (int i = 0; i < c->num_urls; i++)
     {
       data[i].handle = NULL;
       data[i].fd = -1;
@@ -763,14 +760,16 @@ debuginfod_query_server (debuginfod_client *c,
   char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
 
   /* Initialize each handle.  */
-  for (int i = 0; i < num_urls && server_url != NULL; i++)
+  for (int i = 0; i < c->num_urls && server_url != NULL; i++)
     {
       if (vfd >= 0)
 	dprintf (vfd, "init server %d %s\n", i, server_url);
 
       data[i].fd = fd;
       data[i].target_handle = &target_handle;
-      data[i].handle = curl_easy_init();
+      data[i].handle = c->server_handles[i];
+      assert (data[i].handle != NULL);
+      curl_easy_reset(data[i].handle); // esp. previously sent http headers
       data[i].client = c;
 
       if (data[i].handle == NULL)
@@ -833,7 +832,7 @@ debuginfod_query_server (debuginfod_client *c,
 
   /* Query servers in parallel.  */
   if (vfd >= 0)
-    dprintf (vfd, "query %d urls in parallel\n", num_urls);
+    dprintf (vfd, "query %d urls in parallel\n", c->num_urls);
   int still_running;
   long loops = 0;
   int committed_to = -1;
@@ -846,7 +845,7 @@ debuginfod_query_server (debuginfod_client *c,
       /* If the target file has been found, abort the other queries.  */
       if (target_handle != NULL)
 	{
-	  for (int i = 0; i < num_urls; i++)
+	  for (int i = 0; i < c->num_urls; i++)
 	    if (data[i].handle != target_handle)
 	      curl_multi_remove_handle(curlm, data[i].handle);
 	    else
@@ -943,7 +942,7 @@ debuginfod_query_server (debuginfod_client *c,
 		       curl_easy_strerror (msg->data.result));
 	      if (pnl)
 		c->default_progressfn_printed_p = 0;
-	      for (int i = 0; i < num_urls; i++)
+	      for (int i = 0; i < c->num_urls; i++)
 		if (msg->easy_handle == data[i].handle)
 		  {
 		    if (strlen (data[i].errbuf) > 0)
@@ -1063,11 +1062,8 @@ debuginfod_query_server (debuginfod_client *c,
       /* Perhaps we need not give up right away; could retry or something ... */
     }
 
-  /* Success!!!! */
-  for (int i = 0; i < num_urls; i++)
-    curl_easy_cleanup(data[i].handle);
-
-  curl_multi_cleanup (curlm);
+  curl_multi_remove_handle(curlm, verified_handle);
+  assert (verified_handle == target_handle);
   free (data);
   free (server_urls);
 
@@ -1081,10 +1077,6 @@ debuginfod_query_server (debuginfod_client *c,
 
 /* error exits */
  out1:
-  for (int i = 0; i < num_urls; i++)
-    curl_easy_cleanup(data[i].handle);
-
-  curl_multi_cleanup(curlm);
   unlink (target_cache_tmppath);
   close (fd); /* before the rmdir, otherwise it'll fail */
   (void) rmdir (target_cache_dir); /* nop if not empty */
@@ -1095,6 +1087,11 @@ debuginfod_query_server (debuginfod_client *c,
 
 /* general purpose exit */
  out:
+  /* Reset sent headers */
+  curl_slist_free_all (c->headers);
+  c->headers = NULL;
+  c->user_agent_set_p = 0;
+  
   /* Conclude the last \r status line */
   /* Another possibility is to use the ANSI CSI n K EL "Erase in Line"
      code.  That way, the previously printed messages would be erased,
@@ -1127,7 +1124,9 @@ debuginfod_begin (void)
 {
   debuginfod_client *client;
   size_t size = sizeof (struct debuginfod_client);
+  const char* server_urls = NULL;
   client = (debuginfod_client *) calloc (1, size);
+
   if (client != NULL)
     {
       if (getenv(DEBUGINFOD_PROGRESS_ENV_VAR))
@@ -1137,6 +1136,51 @@ debuginfod_begin (void)
       else
 	client->verbose_fd = -1;
     }
+
+  /* Count the DEBUGINFOD_URLS and create the long-lived curl handles. */
+  client->num_urls = 0;
+  server_urls = getenv (DEBUGINFOD_URLS_ENV_VAR);
+  if (server_urls != NULL)
+    for (int i = 0; server_urls[i] != '\0'; i++)
+      if (server_urls[i] != url_delim_char
+          && (i == 0 || server_urls[i - 1] == url_delim_char))
+        client->num_urls++;
+
+  client->server_handles = calloc (client->num_urls, sizeof(CURL *));
+  if (client->server_handles == NULL)
+    goto out1;
+
+  // allocate N curl easy handles
+  for (int i=0; i<client->num_urls; i++)
+    {
+      client->server_handles[i] = curl_easy_init ();
+      if (client->server_handles[i] == NULL)
+        {
+          for (i--; i >= 0; i--)
+            curl_easy_cleanup (client->server_handles[i]);
+          goto out2;
+        }
+    }
+
+  // allocate 1 curl multi handle
+  client->server_mhandle = curl_multi_init ();
+  if (client->server_mhandle == NULL)
+    goto out3;
+  
+  goto out;
+
+ out3:
+  for (int i=0; i<client->num_urls; i++)
+    curl_easy_cleanup (client->server_handles[i]);
+  
+ out2:
+  free (client->server_handles);
+  
+ out1:
+  free (client);
+  client = NULL;
+
+ out:  
   return client;
 }
 
@@ -1165,6 +1209,11 @@ debuginfod_end (debuginfod_client *client)
   if (client == NULL)
     return;
 
+  // assume that all the easy handles have already been removed from the multi handle
+  for (int i=0; i<client->num_urls; i++)
+    curl_easy_cleanup (client->server_handles[i]);
+  free (client->server_handles);
+  curl_multi_cleanup (client->server_mhandle);
   curl_slist_free_all (client->headers);
   free (client->url);
   free (client);
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 50777f1fa193..0e6fd13fd20b 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -1573,6 +1573,46 @@ debuginfod_find_progress (debuginfod_client *, long a, long b)
 }
 
 
+// a little lru pool of debuginfod_client*s for reuse between query threads
+
+mutex dc_pool_lock;
+deque<debuginfod_client*> dc_pool;
+
+debuginfod_client* debuginfod_pool_begin()
+{
+  unique_lock<mutex> lock(dc_pool_lock);
+  if (dc_pool.size() > 0)
+    {
+      inc_metric("dc_pool_op_count","op","begin-reuse");
+      debuginfod_client *c = dc_pool.front();
+      dc_pool.pop_front();
+      return c;
+    }
+  inc_metric("dc_pool_op_count","op","begin-new");
+  return debuginfod_begin();
+}
+
+
+void debuginfod_pool_groom()
+{
+  unique_lock<mutex> lock(dc_pool_lock);
+  while (dc_pool.size() > 0)
+    {
+      inc_metric("dc_pool_op_count","op","end");
+      debuginfod_end(dc_pool.front());
+      dc_pool.pop_front();
+    }
+}
+
+
+void debuginfod_pool_end(debuginfod_client* c)
+{
+  unique_lock<mutex> lock(dc_pool_lock);
+  inc_metric("dc_pool_op_count","op","end-save");
+  dc_pool.push_front(c); // accelerate reuse, vs. push_back
+}
+
+
 static struct MHD_Response*
 handle_buildid (MHD_Connection* conn,
                 const string& buildid /* unsafe */,
@@ -1672,7 +1712,7 @@ handle_buildid (MHD_Connection* conn,
   // is to defer to other debuginfo servers.
 
   int fd = -1;
-  debuginfod_client *client = debuginfod_begin ();
+  debuginfod_client *client = debuginfod_pool_begin ();
   if (client != NULL)
     {
       debuginfod_set_progressfn (client, & debuginfod_find_progress);
@@ -1721,7 +1761,7 @@ handle_buildid (MHD_Connection* conn,
     }
   else
     fd = -errno; /* Set by debuginfod_begin.  */
-  debuginfod_end (client);
+  debuginfod_pool_end (client);
 
   if (fd >= 0)
     {
@@ -1892,11 +1932,25 @@ handler_cb (void * /*cls*/,
             const char * /*version*/,
             const char * /*upload_data*/,
             size_t * /*upload_data_size*/,
-            void ** /*con_cls*/)
+            void ** ptr)
 {
   struct MHD_Response *r = NULL;
   string url_copy = url;
 
+  /* libmicrohttpd always makes (at least) two callbacks: once just
+     past the headers, and one after the request body is finished
+     being received.  If we process things early (first callback) and
+     queue a response, libmicrohttpd would suppress http keep-alive
+     (via connection->read_closed = true). */
+  static int aptr; /* just some random object to use as a flag */
+  if (&aptr != *ptr)
+    {
+      /* do never respond on first call */
+      *ptr = &aptr;
+      return MHD_YES;
+    }
+  *ptr = NULL;                     /* reset when done */
+  
 #if MHD_VERSION >= 0x00097002
   enum MHD_Result rc;
 #else
@@ -3163,6 +3217,7 @@ void groom()
 
   sqlite3_db_release_memory(db); // shrink the process if possible
   sqlite3_db_release_memory(dbq); // ... for both connections
+  debuginfod_pool_groom(); // and release any debuginfod_client objects we've been holding onto
 
   fdcache.limit(0,0); // release the fdcache contents
   fdcache.limit(fdcache_fds,fdcache_mbs); // restore status quo parameters
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 5cd4fe1593d2..05fcd23d5465 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,9 @@
+2021-04-23  Frank Ch. Eigler <fche@redhat.com>
+
+	PR27701
+	* debuginfod_find_debuginfo.3: Specify sequential reuse policy of
+	debuginfod_client objects.
+
 2021-02-04  Frank Ch. Eigler <fche@redhat.com>
 
 	* debuginfod.8: Mention new --fdcache-mintmp option.
diff --git a/doc/debuginfod_find_debuginfo.3 b/doc/debuginfod_find_debuginfo.3
index cfddb5428c80..5ae44a9834be 100644
--- a/doc/debuginfod_find_debuginfo.3
+++ b/doc/debuginfod_find_debuginfo.3
@@ -105,6 +105,8 @@ as a debuginfod server begins transferring the target file all of the
 connections to the other servers are closed.
 
 A \fBclient\fP handle should be used from only one thread at a time.
+A handle may be reused for a series of lookups, which can improve
+performance due to retention of connections and caches.
 
 .SH "RETURN VALUE"
 
@@ -175,8 +177,8 @@ of the client object.
 
 .SS "HTTP HEADER"
 
-Before a lookup function is initiated, a client application may
-add HTTP request headers to future downloads.
+Before each lookup function is initiated, a client application may
+add HTTP request headers.  These are reset after each lookup function.
 .BR \%debuginfod_add_http_header ()
 may be called with strings of the form
 .BR \%"Header:\~value" .
diff --git a/tests/ChangeLog b/tests/ChangeLog
index ea44d20c424a..ac1963650693 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2021-04-23  Frank Ch. Eigler  <fche@redhat.com>
+
+	* run-debuginfod-find.sh: Add a tiny test for client object reuse.
+	Add an "errfiles" test construct to ask the framework to print
+	various files in the case of an error.
+
 2021-03-30  Frank Ch. Eigler  <fche@redhat.com>
 
 	* run-debuginfod-find.sh: Add thread comm checks.
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 8213c8a42877..b1911a92f909 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -50,6 +50,29 @@ cleanup()
 # clean up trash if we were aborted early
 trap cleanup 0 1 2 3 5 9 15
 
+errfiles_list=
+err() {
+    for ports in $PORT1 $PORT2
+    do
+        echo $port metrics
+        curl -s http://127.0.0.1:$port/metrics
+        echo
+    done
+    for x in $errfiles_list
+    do
+        echo "$x"
+        cat $x
+        echo
+    done
+}
+trap err ERR
+
+errfiles() {
+    errfiles_list="$errfiles_list $*"
+}
+
+
+
 # find an unused port number
 while true; do
     PORT1=`expr '(' $RANDOM % 1000 ')' + 9000`
@@ -90,10 +113,6 @@ wait_ready()
   done;
 
   if [ $timeout -eq 0 ]; then
-      echo "metric $what never changed to $value on port $port"
-      curl -s http://127.0.0.1:$port/metrics
-      echo "logs for debuginfod with port $port"
-      cat vlog$port
     exit 1;
   fi
 }
@@ -106,6 +125,7 @@ ln -s R/nothing.rpm R/nothing.rpm
 env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 --fdcache-mintmp 0 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog$PORT1 2>&1 &
 PID1=$!
 tempfiles vlog$PORT1
+errfiles vlog$PORT1
 # Server must become ready
 wait_ready $PORT1 'ready' 1
 export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1/   # or without trailing /
@@ -414,6 +434,7 @@ mkdir -p $DEBUGINFOD_CACHE_PATH
 env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -U -d ${DB}_2 -p $PORT2 -L L D > vlog$PORT2 2>&1 &
 PID2=$!
 tempfiles vlog$PORT2
+errfiles vlog$PORT2
 tempfiles ${DB}_2
 wait_ready $PORT2 'ready' 1
 wait_ready $PORT2 'thread_work_total{role="traverse"}' 1
@@ -502,6 +523,9 @@ curl -s http://127.0.0.1:$PORT2/buildid/deadbeef/debuginfo > /dev/null || true
 curl -s http://127.0.0.1:$PORT2/buildid/deadbeef/badtype > /dev/null || true
 (curl -s http://127.0.0.1:$PORT2/metrics | grep 'badtype') && false
 
+# Confirm that some debuginfod client pools are being used
+curl -s http://127.0.0.1:$PORT2/metrics | grep 'dc_pool_op.*reuse'
+
 ########################################################################
 # Corrupt the sqlite database and get debuginfod to trip across its errors
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'sqlite3.*reset'



More information about the Elfutils-devel mailing list