[PATCH v2] Fix leb128 reading

Mark Wielaard mark@klomp.org
Thu Oct 29 23:06:20 GMT 2020


Hi Tom,

On Wed, 2020-10-28 at 17:26 -0600, Tom Tromey wrote:
> PR 26773 points out that some sleb128 values are decoded incorrectly.
> 
> This version of the fix only examines the sleb128 conversion.
> Overlong encodings are not handled, and the uleb128 decoders are not
> touched.  The approach taken here is to do the work in an unsigned
> type, and then rely on an implementation-defined cast to convert to
> signed.

I like this variant, it still handles longer than necessary encodings,
just not those that use more than 10 bytes. It also removes all the
tricky cases I had questions about. And keeps the parts I already acked
last time.

> Signed-off-by: Tom Tromey <tom@tromey.com>

> diff --git a/.gitignore b/.gitignore
> index c9790941..d737b14d 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -151,6 +151,7 @@ Makefile.in
>  /tests/get-units-invalid
>  /tests/get-units-split
>  /tests/hash
> +/tests/leb128
>  /tests/line2addr
>  /tests/low_high_pc
>  /tests/msg_tst
> diff --git a/ChangeLog b/ChangeLog
> index 72e8397c..4c8699f8 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,7 @@
> +2020-10-28  Tom Tromey  <tom@tromey.com>
> +
> +	* .gitignore: Add /tests/leb128.
> +
>  2020-10-01  Frank Ch. Eigler  <fche@redhat.com>

Ack.

> diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> index 731c7e79..289bb4c9 100644
> --- a/libdw/ChangeLog
> +++ b/libdw/ChangeLog
> @@ -1,3 +1,13 @@
> +2020-10-28  Tom Tromey  <tom@tromey.com>
> +
> +	PR26773
> +	* dwarf_getlocation.c (store_implicit_value): Use
> +	__libdw_get_uleb128_unchecked.
> +	* memory-access.h (get_sleb128_step): Assume unsigned type for
> +	'var'.
> +	(__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Work in
> +	unsigned type.  Handle final byte.
> +
>  2020-10-19  Mark Wielaard  <mark@klomp.org>
>  
>  	* dwarf_frame_register.c (dwarf_frame_register): Declare ops_mem
> diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c
> index 4617f9e9..f2bad5a9 100644
> --- a/libdw/dwarf_getlocation.c
> +++ b/libdw/dwarf_getlocation.c
> @@ -130,9 +130,8 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op *op)
>    struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s,
>  					   sizeof (struct loc_block_s), 1);
>    const unsigned char *data = (const unsigned char *) (uintptr_t) op->number2;
> -  uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word));
> -  if (unlikely (len != op->number))
> -    return -1;
> +  /* Skip the block length.  */
> +  __libdw_get_uleb128_unchecked (&data);
>    block->addr = op;
>    block->data = (unsigned char *) data;
>    block->length = op->number;

Ack.

> diff --git a/libdw/memory-access.h b/libdw/memory-access.h
> index 14436a71..8b2386ee 100644
> --- a/libdw/memory-access.h
> +++ b/libdw/memory-access.h
> @@ -113,19 +113,22 @@ __libdw_get_uleb128_unchecked (const unsigned char **addrp)
>  #define get_sleb128_step(var, addr, nth)				      \
>    do {									      \
>      unsigned char __b = *(addr)++;					      \
> +    (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7);		      \
>      if (likely ((__b & 0x80) == 0))					      \
>        {									      \
> -	struct { signed int i:7; } __s = { .i = __b };			      \
> -	(var) |= (typeof (var)) __s.i * ((typeof (var)) 1 << ((nth) * 7));    \
> +	if ((__b & 0x40) != 0)						      \
> +	  (var) |= - ((typeof (var)) 1 << (((nth) + 1) * 7));		      \
>  	return (var);							      \
>        }									      \
> -    (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7);		      \
>    } while (0)

Here it should be noted that var is already the unsigned acc from
__libdw_get_sleb, so the left shift is always fine. The value
extraction is now the same for both the continuation path and the last
byte path.

>  static inline int64_t
>  __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
>  {
> -  int64_t acc = 0;
> +  /* Do the work in an unsigned type, but use implementation-defined
> +     behavior to cast to signed on return.  This avoids some undefined
> +     behavior when shifting.  */
> +  uint64_t acc = 0;
>  
>    /* Unroll the first step to help the compiler optimize
>       for the common single-byte case.  */
> @@ -134,6 +137,20 @@ __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
>    const size_t max = __libdw_max_len_sleb128 (*addrp - 1, end);
>    for (size_t i = 1; i < max; ++i)
>      get_sleb128_step (acc, *addrp, i);
> +  if (*addrp == end)
> +    return INT64_MAX;

OK, we keep the __libdw_max_len_sleb128 as is, so it is one smaller
than the actual max number of bytes, but reaching the end of data is
still overflow.

> +  /* There might be one extra byte.  */
> +  unsigned char b = **addrp;
> +  ++*addrp;
> +  if (likely ((b & 0x80) == 0))
> +    {
> +      /* We only need the low bit of the final byte, and as it is the
> +	 sign bit, we don't need to do anything else here.  */
> +      acc |= ((typeof (acc)) b) << 7 * max;
> +      return acc;
> +    }
> +
>    /* Other implementations set VALUE to INT_MAX in this
>       case.  So we better do this as well.  */
>    return INT64_MAX;

Nice and simple.

> @@ -142,7 +159,10 @@ __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
>  static inline int64_t
>  __libdw_get_sleb128_unchecked (const unsigned char **addrp)
>  {
> -  int64_t acc = 0;
> +  /* Do the work in an unsigned type, but use implementation-defined
> +     behavior to cast to signed on return.  This avoids some undefined
> +     behavior when shifting.  */
> +  uint64_t acc = 0;
>  
>    /* Unroll the first step to help the compiler optimize
>       for the common single-byte case.  */
> @@ -152,6 +172,18 @@ __libdw_get_sleb128_unchecked (const unsigned char **addrp)
>    const size_t max = len_leb128 (int64_t) - 1;
>    for (size_t i = 1; i < max; ++i)
>      get_sleb128_step (acc, *addrp, i);

Same as above, we just substract one here when using len_leb128 instead
of using __libdw_max_len_sleb128.

> +  /* There might be one extra byte.  */
> +  unsigned char b = **addrp;
> +  ++*addrp;
> +  if (likely ((b & 0x80) == 0))
> +    {
> +      /* We only need the low bit of the final byte, and as it is the
> +	 sign bit, we don't need to do anything else here.  */
> +      acc |= ((typeof (acc)) b) << 7 * max;
> +      return acc;
> +    }

OK, as above.

>    /* Other implementations set VALUE to INT_MAX in this
>       case.  So we better do this as well.  */
>    return INT64_MAX;
> diff --git a/tests/ChangeLog b/tests/ChangeLog
> index e9d1e260..91aeadaf 100644
> --- a/tests/ChangeLog
> +++ b/tests/ChangeLog
> @@ -1,3 +1,10 @@
> +2020-10-28  Tom Tromey  <tom@tromey.com>
> +
> +	PR26773
> +	* Makefile.am (check_PROGRAMS, TESTS): Add leb128.
> +	(leb128_LDADD): New variable.
> +	* leb128.c: New file.
> +
>  2020-10-19  Mark Wielaard  <mark@klomp.org>
>  
>  	* addrcfi.c (print_register): Make ops_mem 3 elements.
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index bc5d034f..1b51ab8d 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -63,7 +63,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx scnnames sectiondump \
>  		  all-dwarf-ranges unit-info next_cfi \
>  		  elfcopy addsections xlate_notes elfrdwrnop \
>  		  dwelf_elf_e_machine_string \
> -		  getphdrnum
> +		  getphdrnum leb128
>  
>  asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
>  	    asm-tst6 asm-tst7 asm-tst8 asm-tst9
> @@ -185,7 +185,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile test-nlist \
>  	run-elfclassify.sh run-elfclassify-self.sh \
>  	run-disasm-riscv64.sh \
>  	run-pt_gnu_prop-tests.sh \
> -	run-getphdrnum.sh run-test-includes.sh
> +	run-getphdrnum.sh run-test-includes.sh \
> +	leb128
>  
>  if !BIARCH
>  export ELFUTILS_DISABLE_BIARCH = 1
> @@ -694,6 +695,7 @@ xlate_notes_LDADD = $(libelf)
>  elfrdwrnop_LDADD = $(libelf)
>  dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw)
>  getphdrnum_LDADD = $(libelf) $(libdw)
> +leb128_LDADD = $(libelf) $(libdw)

ack

>  # We want to test the libelf header against the system elf.h header.
>  # Don't include any -I CPPFLAGS. Except when we install our own elf.h.
> diff --git a/tests/leb128.c b/tests/leb128.c
> new file mode 100644
> index 00000000..6994a436
> --- /dev/null
> +++ b/tests/leb128.c
> @@ -0,0 +1,140 @@
> +/* Test program for leb128
> +   Copyright (C) 2020 Tom Tromey
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#include <config.h>
> +#include <stddef.h>
> +#include <stdbool.h>
> +#include <libdw.h>
> +#include "../libdw/libdwP.h"
> +#include "../libdw/memory-access.h"
> +
> +#define OK 0
> +#define FAIL 1
> +
> +static const unsigned char v0[] = { 0x0 };
> +static const unsigned char v23[] = { 23 };
> +static const unsigned char vm_2[] = { 0x7e };
> +static const unsigned char v128[] = { 0x80, 0x01 };
> +static const unsigned char vm_128[] = { 0x80, 0x7f };
> +static const unsigned char vhuge[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0xff, 0x0
> +  };
> +static const unsigned char most_positive[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0x3f
> +  };
> +static const unsigned char most_negative[] =
> +  {
> +    0x80, 0x80, 0x80, 0x80, 0x80,
> +    0x80, 0x80, 0x80, 0x40
> +  };
> +static const unsigned char minus_one[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0x7f
> +  };
> +
> +static int
> +test_one_sleb (const unsigned char *data, size_t len, int64_t expect)
> +{
> +  int64_t value;
> +  const unsigned char *p;
> +
> +  p = data;
> +  get_sleb128 (value, p, p + len);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  p = data;
> +  get_sleb128_unchecked (value, p);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  return OK;
> +}
> +
> +static int
> +test_sleb (void)
> +{
> +#define TEST(ARRAY, V)				      \
> +  if (test_one_sleb (ARRAY, sizeof (ARRAY), V) != OK) \
> +    return FAIL;
> +
> +  TEST (v0, 0);
> +  TEST (v23, 23);
> +  TEST (vm_2, -2);
> +  TEST (v128, 128);
> +  TEST (vm_128, -128);
> +  TEST (vhuge, 9223372036854775807ll);
> +  TEST (most_positive, 4611686018427387903ll);
> +  TEST (most_negative, -4611686018427387904ll);
> +  TEST (minus_one, -1);
> +
> +#undef TEST
> +
> +  return OK;
> +}
> +
> +static int
> +test_one_uleb (const unsigned char *data, size_t len, uint64_t expect)
> +{
> +  uint64_t value;
> +  const unsigned char *p;
> +
> +  p = data;
> +  get_uleb128 (value, p, p + len);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  p = data;
> +  get_uleb128_unchecked (value, p);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  return OK;
> +}
> +
> +static int
> +test_uleb (void)
> +{
> +#define TEST(ARRAY, V)				      \
> +  if (test_one_uleb (ARRAY, sizeof (ARRAY), V) != OK) \
> +    return FAIL;
> +
> +  TEST (v0, 0);
> +  TEST (v23, 23);
> +  TEST (vm_2, 126);
> +  TEST (v128, 128);
> +  TEST (vm_128, 16256);
> +  TEST (vhuge, 9223372036854775807ull);
> +  TEST (most_positive, 4611686018427387903ull);
> +  TEST (most_negative, 4611686018427387904ull);
> +  TEST (minus_one, 9223372036854775807ull);
> +
> +#undef TEST
> +
> +  return OK;
> +}
> +
> +int
> +main (void)
> +{
> +  return test_sleb () || test_uleb ();
> +}

Looks good. While reviewing I did try out some extra corner cases that
I added (see attached). It also shows that some values can have
multiple representations. I added them to your commit before I pushed
it. Hope you don't mind.

Thanks,

Mark
-------------- next part --------------
A non-text attachment was scrubbed...
Name: extra-tests.diff
Type: text/x-patch
Size: 2552 bytes
Desc: not available
URL: <https://sourceware.org/pipermail/elfutils-devel/attachments/20201030/104731f3/attachment.bin>


More information about the Elfutils-devel mailing list