[PATCH] Fix leb128 reading
Mark Wielaard
mark@klomp.org
Sat Oct 24 21:54:51 GMT 2020
Hi Tom,
On Fri, Oct 23, 2020 at 09:58:34PM -0600, Tom Tromey wrote:
> PR 26773 points out that some sleb128 values are decoded incorrectly.
> Looking into this, I found some other unusual cases as well.
>
> In this patch, I chose to try to handle weird leb128 encodings by
> preserving their values when possible; or returning the maximum value
> on overflow. It isn't clear to me that this is necessarily the best
> choice.
I think it is a good thing to accept "padded" leb128 numbers, but
probably up to a point. The padding will probably be either mod-4 or
mod-8 bytes. So lets at most read up to 16 bytes total.
Note we use a Signed-off-by like as described in the CONTRIBUTING
document.
> ---
> .gitignore | 1 +
> ChangeLog | 4 +
> libdw/ChangeLog | 11 +++
> libdw/dwarf_getlocation.c | 2 +-
> libdw/memory-access.h | 153 +++++++++++++++++++-----------------
> tests/ChangeLog | 7 ++
> tests/Makefile.am | 8 +-
> tests/leb128.c | 161 ++++++++++++++++++++++++++++++++++++++
> 8 files changed, 272 insertions(+), 75 deletions(-)
> create mode 100644 tests/leb128.c
>
> diff --git a/.gitignore b/.gitignore
> index c9790941..d737b14d 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -151,6 +151,7 @@ Makefile.in
> /tests/get-units-invalid
> /tests/get-units-split
> /tests/hash
> +/tests/leb128
> /tests/line2addr
> /tests/low_high_pc
> /tests/msg_tst
> diff --git a/ChangeLog b/ChangeLog
> index 72e8397c..0b50578d 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,7 @@
> +2020-10-23 Tom Tromey <tom@tromey.com>
> +
> + * .gitignore: Add /tests/leb128.
> +
> 2020-10-01 Frank Ch. Eigler <fche@redhat.com>
Nice, I always forget that.
> PR25461
> diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> index 8b0b583a..23081c12 100644
> --- a/libdw/ChangeLog
> +++ b/libdw/ChangeLog
> @@ -1,3 +1,14 @@
> +2020-10-23 Tom Tromey <tom@tromey.com>
> +
> + PR26773
> + * dwarf_getlocation.c (store_implicit_value): Use
> + __libdw_get_uleb128_unchecked.
> + * memory-access.h (__libdw_max_len_leb128)
> + (__libdw_max_len_uleb128, __libdw_max_len_sleb128): Remove.
> + (__libdw_get_uleb128, __libdw_get_uleb128_unchecked): Rewrite.
> + (get_sleb128_step): Remove.
> + (__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Rewrite.
> +
> 2020-09-03 Mark Wielaard <mark@klomp.org>
>
> * dwarf.h: Add DW_CFA_AARCH64_negate_ra_state.
> diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c
> index 4617f9e9..0ce2e08a 100644
> --- a/libdw/dwarf_getlocation.c
> +++ b/libdw/dwarf_getlocation.c
> @@ -130,7 +130,7 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op *op)
> struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s,
> sizeof (struct loc_block_s), 1);
> const unsigned char *data = (const unsigned char *) (uintptr_t) op->number2;
> - uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word));
> + uint64_t len = __libdw_get_uleb128_unchecked (&data);
> if (unlikely (len != op->number))
> return -1;
> block->addr = op;
Well spotted. So when we read the actual Dwarf_Op we have already read
len as op->number from the same data location. So this time we can
read it "unchecked". Note that we didn't used to do the sanity check
len == op->number before commit commit b7a5bc8aa ("libdw: Detect bad
DWARF in store_implicit_value.") I think we can also simply drop it
again. It really is only an internal sanity check, I don't believe it
can actually catch bad DWARF.
> diff --git a/libdw/memory-access.h b/libdw/memory-access.h
> index a39ad6d2..70317d81 100644
> --- a/libdw/memory-access.h
> +++ b/libdw/memory-access.h
> @@ -39,29 +39,6 @@
>
> #define len_leb128(var) ((8 * sizeof (var) + 6) / 7)
>
> -static inline size_t
> -__libdw_max_len_leb128 (const size_t type_len,
> - const unsigned char *addr, const unsigned char *end)
> -{
> - const size_t pointer_len = likely (addr < end) ? end - addr : 0;
> - return likely (type_len <= pointer_len) ? type_len : pointer_len;
> -}
> -
> -static inline size_t
> -__libdw_max_len_uleb128 (const unsigned char *addr, const unsigned char *end)
> -{
> - const size_t type_len = len_leb128 (uint64_t);
> - return __libdw_max_len_leb128 (type_len, addr, end);
> -}
> -
> -static inline size_t
> -__libdw_max_len_sleb128 (const unsigned char *addr, const unsigned char *end)
> -{
> - /* Subtract one step, so we don't shift into sign bit. */
> - const size_t type_len = len_leb128 (int64_t) - 1;
> - return __libdw_max_len_leb128 (type_len, addr, end);
> -}
OK, we don't need any of these anymore.
> #define get_uleb128_step(var, addr, nth) \
> do { \
> unsigned char __b = *(addr)++; \
> @@ -79,12 +56,39 @@ __libdw_get_uleb128 (const unsigned char **addrp, const unsigned char *end)
> for the common single-byte case. */
> get_uleb128_step (acc, *addrp, 0);
>
> - const size_t max = __libdw_max_len_uleb128 (*addrp - 1, end);
> - for (size_t i = 1; i < max; ++i)
> + const size_t max = len_leb128 (acc) - 1;
> + for (size_t i = 1; i < max && *addrp < end; ++i)
> get_uleb128_step (acc, *addrp, i);
> - /* Other implementations set VALUE to UINT_MAX in this
> - case. So we better do this as well. */
> - return UINT64_MAX;
> +
> + /* A single step to check the final byte, which could cause
> + overflow. */
> + if (*addrp < end)
> + {
> + const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1;
Shouldn't this be a const unsigned char?
Also, this seems to actually me a constant (since *addrp < end, we must have ended up at max).
Is the really just (1u << (8 * 8 - 7 * 9)) - 1 = (1u << 1) - 1 = 1
So we are really only interested in the lowest bit?
> + if (unlikely ((**addrp & mask) != 0))
> + {
> + /* Overflow. */
> + ++*addrp;
> + return UINT64_MAX;
> + }
> + get_uleb128_step (acc, *addrp, max);
> + }
I must admit I don't fully follow the logic of this last step, but
that might be because I am confused about the mask.
> + /* Now we can read any number of bytes, as long as the payload is 0.
> + Anything else would overflow. */
> + while (*addrp < end)
> + {
> + unsigned char b = *(*addrp)++;
> + if (b == 0x0)
> + break;
> + if (b != 0x80)
> + {
> + /* Overflow. */
> + return UINT64_MAX;
> + }
> + }
So I would also restrict this to i < 16 (assuming you keep i above).
> + return acc;
> }
>
> static inline uint64_t
> @@ -96,65 +100,72 @@ __libdw_get_uleb128_unchecked (const unsigned char **addrp)
> for the common single-byte case. */
> get_uleb128_step (acc, *addrp, 0);
>
> - const size_t max = len_leb128 (uint64_t);
> + const size_t max = len_leb128 (acc) - 1;
> for (size_t i = 1; i < max; ++i)
> get_uleb128_step (acc, *addrp, i);
> - /* Other implementations set VALUE to UINT_MAX in this
> - case. So we better do this as well. */
> - return UINT64_MAX;
> +
> + /* A single step to check the final byte, which could cause
> + overflow. */
> + const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1;
> + if (unlikely ((**addrp & mask) != 0))
> + {
> + /* Overflow. */
> + ++*addrp;
> + return UINT64_MAX;
> + }
> + get_uleb128_step (acc, *addrp, max);
> +
> + /* Now we can read any number of bytes, as long as the payload is 0.
> + Anything else would overflow. */
> + while (true)
> + {
> + unsigned char b = *(*addrp)++;
> + if (b == 0x0)
> + break;
> + if (b != 0x80)
> + {
> + /* Overflow. */
> + return UINT64_MAX;
> + }
> + }
> +
> + return acc;
> }
Same comments as above.
> /* Note, addr needs to me smaller than end. */
> #define get_uleb128(var, addr, end) ((var) = __libdw_get_uleb128 (&(addr), end))
> #define get_uleb128_unchecked(var, addr) ((var) = __libdw_get_uleb128_unchecked (&(addr)))
>
> -/* The signed case is similar, but we sign-extend the result. */
> -
> -#define get_sleb128_step(var, addr, nth) \
> - do { \
> - unsigned char __b = *(addr)++; \
> - if (likely ((__b & 0x80) == 0)) \
> - { \
> - struct { signed int i:7; } __s = { .i = __b }; \
> - (var) |= (typeof (var)) __s.i * ((typeof (var)) 1 << ((nth) * 7)); \
> - return (var); \
> - } \
> - (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7); \
> - } while (0)
> -
> static inline int64_t
> __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
> {
> - int64_t acc = 0;
> -
> - /* Unroll the first step to help the compiler optimize
> - for the common single-byte case. */
> - get_sleb128_step (acc, *addrp, 0);
> -
> - const size_t max = __libdw_max_len_sleb128 (*addrp - 1, end);
> - for (size_t i = 1; i < max; ++i)
> - get_sleb128_step (acc, *addrp, i);
> - /* Other implementations set VALUE to INT_MAX in this
> - case. So we better do this as well. */
> - return INT64_MAX;
> + const unsigned char *start = *addrp;
> + uint64_t value = __libdw_get_uleb128 (addrp, end);
> + if (unlikely (value == UINT64_MAX))
> + return INT64_MAX;
Here you need to help me out a bit, wouldn't UINT64_MAX also be the
representation of -1?
> + if (((*addrp)[-1] & 0x40) != 0)
> + {
> + size_t nbytes = *addrp - start;
> + if (7 * nbytes < 8 * sizeof (value))
> + value |= -(((typeof (value)) 1) << 7 * nbytes);
> + }
> + return value;
> }
OK, you can always safely use [-1] because addrp will have been
increased at least by one by __libdw_get_uleb128. value is uint64_t so
the left shift will always be fine. Negating 1 shifted left one beyond
what we believe is the largest but (7 * nbytes) will negate But nbytes
can be so large that shifting left will go beyond the end, which we
don't want because then we get zero. OK
But what I don't fully understand is how this works with a "padded"
leb128, won't we possibly forget to negate in that case?
> static inline int64_t
> __libdw_get_sleb128_unchecked (const unsigned char **addrp)
> {
> - int64_t acc = 0;
> -
> - /* Unroll the first step to help the compiler optimize
> - for the common single-byte case. */
> - get_sleb128_step (acc, *addrp, 0);
> -
> - /* Subtract one step, so we don't shift into sign bit. */
> - const size_t max = len_leb128 (int64_t) - 1;
> - for (size_t i = 1; i < max; ++i)
> - get_sleb128_step (acc, *addrp, i);
> - /* Other implementations set VALUE to INT_MAX in this
> - case. So we better do this as well. */
> - return INT64_MAX;
> + const unsigned char *start = *addrp;
> + uint64_t value = __libdw_get_uleb128_unchecked (addrp);
> + if (unlikely (value == UINT64_MAX))
> + return INT64_MAX;
> + if (((*addrp)[-1] & 0x40) != 0)
> + {
> + size_t nbytes = *addrp - start;
> + if (7 * nbytes < 8 * sizeof (value))
> + value |= -(((typeof (value)) 1) << 7 * nbytes);
> + }
> + return value;
> }
Same comments as above.
> #define get_sleb128(var, addr, end) ((var) = __libdw_get_sleb128 (&(addr), end))
> diff --git a/tests/ChangeLog b/tests/ChangeLog
> index aa68ffd3..5f075df8 100644
> --- a/tests/ChangeLog
> +++ b/tests/ChangeLog
> @@ -1,3 +1,10 @@
> +2020-10-22 Tom Tromey <tom@tromey.com>
> +
> + PR26773
> + * Makefile.am (check_PROGRAMS, TESTS): Add leb128.
> + (leb128_LDADD): New variable.
> + * leb128.c: New file.
> +
> 2020-10-20 Frank Ch. Eigler <fche@redhat.com>
>
> PR26756: more prometheus metrics
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 9d0707da..568a9f8d 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -1,6 +1,6 @@
> ## Process this file with automake to create Makefile.in
> ##
> -## Copyright (C) 1996-2019 Red Hat, Inc.
> +## Copyright (C) 1996-2020 Red Hat, Inc.
You may claim your own copyright.
> ## This file is part of elfutils.
> ##
> ## This file is free software; you can redistribute it and/or modify
> @@ -63,7 +63,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx scnnames sectiondump \
> all-dwarf-ranges unit-info next_cfi \
> elfcopy addsections xlate_notes elfrdwrnop \
> dwelf_elf_e_machine_string \
> - getphdrnum
> + getphdrnum leb128
>
> asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
> asm-tst6 asm-tst7 asm-tst8 asm-tst9
> @@ -185,7 +185,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile test-nlist \
> run-elfclassify.sh run-elfclassify-self.sh \
> run-disasm-riscv64.sh \
> run-pt_gnu_prop-tests.sh \
> - run-getphdrnum.sh run-test-includes.sh
> + run-getphdrnum.sh run-test-includes.sh \
> + leb128
>
> if !BIARCH
> export ELFUTILS_DISABLE_BIARCH = 1
> @@ -694,6 +695,7 @@ xlate_notes_LDADD = $(libelf)
> elfrdwrnop_LDADD = $(libelf)
> dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw)
> getphdrnum_LDADD = $(libelf) $(libdw)
> +leb128_LDADD = $(libelf) $(libdw)
Boilerplate looks good.
> # We want to test the libelf header against the system elf.h header.
> # Don't include any -I CPPFLAGS. Except when we install our own elf.h.
> diff --git a/tests/leb128.c b/tests/leb128.c
> new file mode 100644
> index 00000000..856a2b89
> --- /dev/null
> +++ b/tests/leb128.c
> @@ -0,0 +1,161 @@
> +/* Test program for leb128
> + Copyright (C) 2020 Tom Tromey
> + This file is part of elfutils.
> +
> + This file is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3 of the License, or
> + (at your option) any later version.
> +
> + elfutils is distributed in the hope that it will be useful, but
> + WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program. If not, see <http://www.gnu.org/licenses/>. */
> +
> +#include <config.h>
> +#include <stddef.h>
> +#include <stdbool.h>
> +#include <libdw.h>
> +#include <libdwfl.h>
> +#include "../libdwelf/libdwelf.h"
> +#include "../libdw/libdwP.h"
> +#include "../libdwfl/libdwflP.h"
> +#include "../libdw/memory-access.h"
Slightly surprised you need almost all internal headers, but OK.
> +#define OK 0
> +#define FAIL 1
> +
> +static const unsigned char v0[] = { 0x0 };
> +static const unsigned char v23[] = { 23 };
> +static const unsigned char vm_2[] = { 0x7e };
> +static const unsigned char v128[] = { 0x80, 0x01 };
> +static const unsigned char vm_128[] = { 0x80, 0x7f };
> +/* This is pathological but nothing seems to prevent it, and maybe
> + it would be useful for a (hypothetical) LEB128 relocation. */
> +static const unsigned char v0000[] =
> + {
> + 0x80, 0x80, 0x80, 0x80, 0x80,
> + 0x80, 0x80, 0x80, 0x80, 0x80,
> + 0x80, 0x80, 0x80, 0x80, 0x80,
> + 0x0
> + };
> +static const unsigned char vhuge[] =
> + {
> + 0xff, 0xff, 0xff, 0xff, 0xff,
> + 0xff, 0xff, 0xff, 0xff, 0x0
> + };
> +static const unsigned char most_positive[] =
> + {
> + 0xff, 0xff, 0xff, 0xff, 0xff,
> + 0xff, 0xff, 0xff, 0x3f
> + };
> +static const unsigned char most_negative[] =
> + {
> + 0x80, 0x80, 0x80, 0x80, 0x80,
> + 0x80, 0x80, 0x80, 0x40
> + };
> +static const unsigned char minus_one[] =
> + {
> + 0xff, 0xff, 0xff, 0xff, 0xff,
> + 0xff, 0xff, 0xff, 0x7f
> + };
> +static const unsigned char overflow[] =
> + {
> + 0xff, 0xff, 0xff, 0xff, 0xff,
> + 0xff, 0xff, 0xff, 0xbf, 0x01
> + };
> +
> +static int
> +test_one_sleb (const unsigned char *data, size_t len, int64_t expect)
> +{
> + int64_t value;
> + const unsigned char *p;
> +
> + p = data;
> + get_sleb128 (value, p, p + len);
> + if (value != expect || p != data + len)
> + return FAIL;
> +
> + p = data;
> + get_sleb128_unchecked (value, p);
> + if (value != expect || p != data + len)
> + return FAIL;
> +
> + return OK;
> +}
> +
> +static int
> +test_sleb (void)
> +{
> +#define TEST(ARRAY, V) \
> + if (test_one_sleb (ARRAY, sizeof (ARRAY), V) != OK) \
> + return FAIL;
> +
> + TEST (v0, 0);
> + TEST (v23, 23);
> + TEST (vm_2, -2);
> + TEST (v128, 128);
> + TEST (vm_128, -128);
> + TEST (v0000, 0);
> + TEST (vhuge, 9223372036854775807ll);
> + TEST (most_positive, 4611686018427387903ll);
> + TEST (most_negative, -4611686018427387904ll);
> + TEST (minus_one, -1);
> + TEST (overflow, INT64_MAX);
> +
> +#undef TEST
> +
> + return OK;
> +}
> +
> +static int
> +test_one_uleb (const unsigned char *data, size_t len, uint64_t expect)
> +{
> + uint64_t value;
> + const unsigned char *p;
> +
> + p = data;
> + get_uleb128 (value, p, p + len);
> + if (value != expect || p != data + len)
> + return FAIL;
> +
> + p = data;
> + get_uleb128_unchecked (value, p);
> + if (value != expect || p != data + len)
> + return FAIL;
> +
> + return OK;
> +}
> +
> +static int
> +test_uleb (void)
> +{
> +#define TEST(ARRAY, V) \
> + if (test_one_uleb (ARRAY, sizeof (ARRAY), V) != OK) \
> + return FAIL;
> +
> + TEST (v0, 0);
> + TEST (v23, 23);
> + TEST (vm_2, 126);
> + TEST (v128, 128);
> + TEST (vm_128, 16256);
> + TEST (v0000, 0);
> + TEST (vhuge, 9223372036854775807ull);
> + TEST (most_positive, 4611686018427387903ull);
> + TEST (most_negative, 4611686018427387904ull);
> + TEST (minus_one, 9223372036854775807ull);
> + TEST (overflow, UINT64_MAX);
> +
> +#undef TEST
> +
> + return OK;
> +}
> +
> +int
> +main (void)
> +{
> + return test_sleb () || test_uleb ();
> +}
Very nice to have tests for this.
Maybe we can have some extra "padding" variants for most_postitive/most_negative?
Thanks,
Mark
More information about the Elfutils-devel
mailing list