[Bug debuginfod/28034] %-escape url characters

Mark Wielaard mark@klomp.org
Tue Jul 20 17:50:11 GMT 2021


Hi Noah,

On Mon, Jul 19, 2021 at 10:53:17AM -0400, Noah Sanci via Elfutils-devel wrote:
> When requesting some source files, some URL-inconvenient chars
> sometimes pop up.  Example from f33 libstdc++:
> /buildid/44d8485cb75512c2ca5c8f70afbd475cae30af4f/source/usr/src/debug/
> gcc-10.3.1-1.fc33.x86_64/obj-x86_64-redhat-linux/x86_64-redhat-linux/
> libstdc++-v3/src/c++11/../../../../../libstdc++-v3/src/c++11/
> condition_variable.cc
> As this URL is passed into debuginfod's handler_cb, it appears that the
> + signs are helpfully unescaped to spaces by libmicrohttpd, which
> 'course breaks everything.  We need to suppress this HTTP URL
> processing step.  Also worth checking that %HEX decoding is also
> suppressed.

I find this description slightly confusing. It ends with "Also worth
checking..." but that is actually what is done in this patch. The part
before that about what debuginfod/microhttpd does on the server side
is interesting, but not really what this patch is about (just why this
patch is necessary, but it seems necessary on the client side always
whatever server is used).

Could you rewrite the commit message to describe what is done in this
patch?

> https://sourceware.org/bugzilla/show_bug.cgi?id=28034

> +2021-07-16  Noah Sanci  <nsanci@redhat.com>
> +
> +	PR28034
> +	* debuginfod-client.c (debuginfod_query_server): % escape filename
> +	so the completed url is processed properly.

> diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
> index f12f609c..e30f73eb 100644
> --- a/debuginfod/debuginfod-client.c
> +++ b/debuginfod/debuginfod-client.c
> @@ -831,9 +831,15 @@ debuginfod_query_server (debuginfod_client *c,
>        else
>          slashbuildid = "/buildid";
>  
> -      if (filename) /* must start with / */
> -        snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s%s", server_url,
> -                 slashbuildid, build_id_bytes, type, filename);
> +      if (filename)/* must start with / */
> +        {
> +          /* PR28034 escape characters in completed url to %hh format. */
> +          char *escaped_string;
> +          escaped_string = curl_easy_escape(data[i].handle, filename, 0);

curl_easy_escape () can return NULL on failure.

> +          snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s%s", server_url,
> +                   slashbuildid, build_id_bytes, type, escaped_string);
> +          curl_free(escaped_string);
> +        }

I note that filename is actually the full path component of the URL so
includes slashes ('/'). curl_easy_escape seems to convert these to %2F
(if I am correct). Is this intended?

>        else
>          snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s", server_url,
>                   slashbuildid, build_id_bytes, type);
> diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
> index 1adf703a..90cdcf94 100644
> --- a/doc/debuginfod.8
> +++ b/doc/debuginfod.8
> @@ -299,6 +299,7 @@ l l.
>  \../bar/foo.c AT_comp_dir=/zoo/	/buildid/BUILDID/source/zoo//../bar/foo.c
>  .TE
>  
> +Note: the client should %-escape characters in /SOURCE/FILE that are not shown as "unreserved" in section 2.3 of RFC3986.

This is a very long line. Could you break it up?
Also, maybe just give the information instead of only a reference.
(The "unreserved" characters are "a"-"z"", "A"-"Z", "0"-"9", "-", ".", "_" and "~")

Also same question as above. slash ('/') is not an unreserved
character, should it be encoded?

>  .SS /metrics
>  
>  This endpoint returns a Prometheus formatted text/plain dump of a
> diff --git a/tests/ChangeLog b/tests/ChangeLog
> index 1460b422..cfa0dee4 100644
> --- a/tests/ChangeLog
> +++ b/tests/ChangeLog
> @@ -1,3 +1,9 @@
> +2021-07-16  Noah Sanci  <nsanci@redhat.com>
> +
> +	PR28034
> +	* run-debuginfod-find.sh: Added a test case ensuring files with %
> +	escapable characters in their paths are accessible.

There are also a couple of changes (fixes?) to the testcases.
Could those be split out?

>  2021-07-06  Alice Zhang  <alizhang@redhat.com>
>  
>          PR27531
> diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
> index 73bbe65b..11c1a1a1 100755
> --- a/tests/run-debuginfod-find.sh
> +++ b/tests/run-debuginfod-find.sh
> @@ -44,6 +44,7 @@ cleanup()
>    if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi
>    if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi
>    if [ $PID4 -ne 0 ]; then kill $PID4; wait $PID4; fi
> +  sleep 5; # Give time to ensure debuginfod cleans and closes sqlite databases.
>    rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree ${PWD}/.client_cache* ${PWD}/tmp*
>    exit_cleanup
>  }

This seems a testsuite fixup?

> @@ -54,7 +55,7 @@ trap cleanup 0 1 2 3 5 9 15
>  errfiles_list=
>  err() {
>      echo ERROR REPORTS
> -    for ports in $PORT1 $PORT2
> +    for ports in $PORT1 $PORT2 $PORT3
>      do
>          echo ERROR REPORT $port metrics
>          curl -s http://127.0.0.1:$port/metrics

As is this?

> @@ -149,18 +150,18 @@ ps -q $PID1 -e -L -o '%p %c %a' | grep traverse
>  # Compile a simple program, strip its debuginfo and save the build-id.
>  # Also move the debuginfo into another directory so that elfutils
>  # cannot find it without debuginfod.
> -echo "int main() { return 0; }" > ${PWD}/prog.c
> -tempfiles prog.c
> +echo "int main() { return 0; }" > ${PWD}/p+r%o\$g.c
> +tempfiles p+r%o\$g.c
>  # Create a subdirectory to confound source path names
>  mkdir foobar
> -gcc -Wl,--build-id -g -o prog ${PWD}/foobar///./../prog.c
> -testrun ${abs_top_builddir}/src/strip -g -f prog.debug ${PWD}/prog
> +gcc -Wl,--build-id -g -o p+r%o\$g ${PWD}/foobar///./../p+r%o\$g.c
> +testrun ${abs_top_builddir}/src/strip -g -f p+r%o\$g.debug ${PWD}/p+r%o\$g
>  BUILDID=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \
> -          -a prog | grep 'Build ID' | cut -d ' ' -f 7`
> +          -a p+r%o\\$g | grep 'Build ID' | cut -d ' ' -f 7`
>  
>  wait_ready $PORT1 'thread_work_total{role="traverse"}' 1
> -mv prog F
> -mv prog.debug F
> +mv p+r%o\$g F
> +mv p+r%o\$g.debug F
>  kill -USR1 $PID1
>  # Wait till both files are in the index.
>  wait_ready $PORT1 'thread_work_total{role="traverse"}' 2
> @@ -171,7 +172,7 @@ wait_ready $PORT1 'thread_busy{role="scan"}' 0
>  
>  # Test whether elfutils, via the debuginfod client library dlopen hooks,
>  # is able to fetch debuginfo from the local debuginfod.
> -testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog 1
> +testrun ${abs_builddir}/debuginfod_build_id_find -e F/p+r%o\$g 1
>  
>  ########################################################################
>  
> @@ -213,22 +214,22 @@ fi
>  # Test whether debuginfod-find is able to fetch those files.
>  rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests
>  filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID`
> -cmp $filename F/prog.debug
> +cmp $filename F/p+r%o\$g.debug
>  if [ -w $filename ]; then
>      echo "cache file writable, boo"
>      err
>  fi
>  
> -filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable F/prog`
> -cmp $filename F/prog
> +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable F/p+r%o\\$g`
> +cmp $filename F/p+r%o\$g
>  
>  # raw source filename
> -filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/foobar///./../prog.c`
> -cmp $filename  ${PWD}/prog.c
> +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/foobar///./../p+r%o\\$g.c`
> +cmp $filename  ${PWD}/p+r%o\$g.c
>  
>  # and also the canonicalized one
> -filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/prog.c`
> -cmp $filename  ${PWD}/prog.c
> +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/p+r%o\\$g.c`
> +cmp $filename  ${PWD}/p+r%o\$g.c
>  
>  
>  ########################################################################
> @@ -672,7 +673,7 @@ touch -d '1970-01-01' $DEBUGINFOD_CACHE_PATH/00000000 # old enough to guarantee
>  echo 0 > $DEBUGINFOD_CACHE_PATH/cache_clean_interval_s
>  echo 0 > $DEBUGINFOD_CACHE_PATH/max_unused_age_s
>  
> -testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog 1
> +testrun ${abs_builddir}/debuginfod_build_id_find -e F/p+r%o\$g 1
>  
>  testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID2 && false || true
>  
> @@ -725,6 +726,7 @@ PID4=$!
>  wait_ready $PORT3 'ready' 1
>  tempfiles vlog$PORT3
>  errfiles vlog$PORT3
> +tempfiles $DB.backup*

???

>  kill -USR2 $PID4
>  wait_ready $PORT3 'thread_work_total{role="groom"}' 1
> @@ -738,6 +740,7 @@ wait_ready $PORT3 'groom{statistic="files scanned (#)"}' 0
>  wait_ready $PORT3 'groom{statistic="files scanned (mb)"}' 0
>  
>  kill $PID4
> +PID4=0

And this?
Might need to be

  wait $PID
  PID4=0

? And maybe double check the other kills in the test?

Cheers,

Mark



More information about the Elfutils-devel mailing list