From 8016e84c514d96797d3452f07899fad2a6a03f4e Mon Sep 17 00:00:00 2001 From: Noah Sanci Date: Fri, 9 Jul 2021 14:53:10 -0400 Subject: [PATCH] debuginfod: PR27983 - ignore duplicate urls Gazing at server logs, one sees a minority of clients who appear to have duplicate query traffic coming in: the same URL, milliseconds apart. Chances are the user accidentally doubled her $DEBUGINFOD_URLS somehow, and the client library is dutifully asking the servers TWICE. Bug #27863 reduces the pain on the servers' CPU, but dupe network traffic is still being paid. We should reject sending outright duplicate concurrent traffic. The urls are now simply removed upon finding a duplicate after url construction. https://sourceware.org/bugzilla/show_bug.cgi?id=27983 Signed-off-by: Noah Sanci --- debuginfod/ChangeLog | 8 +++ debuginfod/debuginfod-client.c | 95 ++++++++++++++++++++++++---------- tests/ChangeLog | 5 ++ tests/run-debuginfod-find.sh | 11 ++++ 4 files changed, 91 insertions(+), 28 deletions(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index d9d11737..23544d3f 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,11 @@ +2021-07-09 Noah Sanci + + * debuginfod-client.c (debuginfod_query_server): As full-length + urls are generated with standardized formats, ignore duplicates. + Created out1 and changed out2 error gotos. Updated url creation print + statements. + (globals): Removed url_delim_char, as it was no longer used. + 2021-06-18 Mark Wielaard * debuginfod-client.c (debuginfod_begin): Don't use client if diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index f417b40a..d0343ed7 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -152,7 +152,6 @@ static const char *cache_xdg_name = "debuginfod_client"; /* URLs of debuginfods, separated by url_delim. */ static const char *url_delim = " "; -static const char url_delim_char = ' '; /* Timeout for debuginfods, in seconds (to get at least 100K). */ static const long default_timeout = 90; @@ -763,13 +762,59 @@ debuginfod_query_server (debuginfod_client *c, goto out0; } + + /* Initialize the memory to zero */ + char *strtok_saveptr; + char **server_url_list = NULL; + char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); /* Count number of URLs. */ int num_urls = 0; - for (int i = 0; server_urls[i] != '\0'; i++) - if (server_urls[i] != url_delim_char - && (i == 0 || server_urls[i - 1] == url_delim_char)) - num_urls++; - + + while (server_url != NULL) + { + /* PR 27983: If the url is already set to be used use, skip it */ + char *slashbuildid; + if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') + slashbuildid = "buildid"; + else + slashbuildid = "/buildid"; + + char *tmp_url; + if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1) + { + rc = -ENOMEM; + goto out1; + } + int url_index; + for (url_index = 0; url_index < num_urls; ++url_index) + { + if(strcmp(tmp_url, server_url_list[url_index]) == 0) + { + url_index = -1; + break; + } + } + if (url_index == -1) + { + if (vfd >= 0) + dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); + free(tmp_url); + } + else + { + num_urls++; + server_url_list = reallocarray(server_url_list, num_urls, + sizeof(char*)); + if (server_url_list == NULL) + { + rc = -ENOMEM; + goto out1; + } + server_url_list[num_urls-1] = tmp_url; + } + server_url = strtok_r(NULL, url_delim, &strtok_saveptr); + } + CURLM *curlm = c->server_mhandle; assert (curlm != NULL); @@ -793,12 +838,11 @@ debuginfod_query_server (debuginfod_client *c, data[i].errbuf[0] = '\0'; } - char *strtok_saveptr; - char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); - /* Initialize each handle. */ - for (int i = 0; i < num_urls && server_url != NULL; i++) + for (int i = 0; i < num_urls; i++) { + if ((server_url = server_url_list[i]) == NULL) + break; if (vfd >= 0) dprintf (vfd, "init server %d %s\n", i, server_url); @@ -808,25 +852,16 @@ debuginfod_query_server (debuginfod_client *c, if (data[i].handle == NULL) { rc = -ENETUNREACH; - goto out1; + goto out2; } data[i].client = c; /* Build handle url. Tolerate both http://foo:999 and http://foo:999/ forms */ - char *slashbuildid; - if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') - slashbuildid = "buildid"; - else - slashbuildid = "/buildid"; - - if (filename) /* must start with / */ - snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s%s", server_url, - slashbuildid, build_id_bytes, type, filename); + if (filename) + snprintf(data[i].url, PATH_MAX, "%s/%s/%s%s", server_url, build_id_bytes, type, filename); else - snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s", server_url, - slashbuildid, build_id_bytes, type); - + snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, build_id_bytes, type); if (vfd >= 0) dprintf (vfd, "url %d %s\n", i, data[i].url); @@ -861,7 +896,6 @@ debuginfod_query_server (debuginfod_client *c, curl_easy_setopt(data[i].handle, CURLOPT_HTTPHEADER, c->headers); curl_multi_add_handle(curlm, data[i].handle); - server_url = strtok_r(NULL, url_delim, &strtok_saveptr); } /* Query servers in parallel. */ @@ -905,7 +939,7 @@ debuginfod_query_server (debuginfod_client *c, case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break; default: rc = -ENETUNREACH; break; } - goto out1; + goto out2; } if (c->progressfn) /* inform/check progress callback */ @@ -1075,7 +1109,7 @@ debuginfod_query_server (debuginfod_client *c, } if (verified_handle == NULL) - goto out1; + goto out2; if (vfd >= 0) { @@ -1104,7 +1138,7 @@ debuginfod_query_server (debuginfod_client *c, if (rc < 0) { rc = -errno; - goto out1; + goto out2; /* Perhaps we need not give up right away; could retry or something ... */ } @@ -1127,7 +1161,7 @@ debuginfod_query_server (debuginfod_client *c, goto out; /* error exits */ - out1: + out2: /* remove all handles from multi */ for (int i = 0; i < num_urls; i++) { @@ -1140,6 +1174,11 @@ debuginfod_query_server (debuginfod_client *c, (void) rmdir (target_cache_dir); /* nop if not empty */ free(data); + out1: + for (int i = 0; i < num_urls; ++i) + free(server_url_list[i]); + free(server_url_list); + out0: free (server_urls); diff --git a/tests/ChangeLog b/tests/ChangeLog index b65cbeb7..5747d658 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,8 @@ +2021-07-09 Noah Sanci + + * run-debuginfod-find.sh: Wrote test to ensure duplicate urls are in + fact not checked. + 2021-07-02 Mark Wielaard * run-debuginfo-find.sh: unset VALGRIND_CMD before testing debuginfod diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 74a5ceff..f1848550 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -359,6 +359,17 @@ rm -rf extracted wait_ready $PORT1 'found_sourcerefs_total{source=".rpm archive"}' $sourcefiles +######################################################################## +# PR27983 ensure no duplicate urls are used in when querying servers for files +rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests +export DEBUGINFOD_URLS="http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http:127.0.0.1:7999" +env LD_LIBRARY_PATH=$ldpath ${abs_top_builddir}/debuginfod/debuginfod-find -v executable $BUILDID2 > vlog4 2>&1 || true +tempfiles vlog4 +if [ $( grep -c 'duplicate url: http://127.0.0.1:'$PORT1'.*' vlog4 ) -ne 2 ]; then + echo "Duplicated servers undetected"; + err +fi +######################################################################## # Run a bank of queries against the debuginfod-rpms / debuginfod-debs test cases archive_test() { -- 2.31.1