[PATCH] readelf: support zstd compressed debug sections [PR 29640]
Martin Liška
mliska@suse.cz
Fri Oct 14 07:47:00 GMT 2022
On 10/14/22 05:35, Fangrui Song wrote:
> On 2022-10-13, Martin Liška wrote:
>> On 10/1/22 08:20, Fangrui Song wrote:
>>> ---
>>> binutils/Makefile.am | 6 +--
>>> binutils/Makefile.in | 6 +--
>>> binutils/readelf.c | 112 +++++++++++++++++++++++++++----------------
>>> 3 files changed, 78 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/binutils/Makefile.am b/binutils/Makefile.am
>>> index 751fbacce12..b249af721ae 100644
>>> --- a/binutils/Makefile.am
>>> +++ b/binutils/Makefile.am
>>> @@ -54,8 +54,8 @@ DEBUGINFOD_LIBS = @DEBUGINFOD_LIBS@
>>> WARN_CFLAGS = @WARN_CFLAGS@
>>> WARN_CFLAGS_FOR_BUILD = @WARN_CFLAGS_FOR_BUILD@
>>> NO_WERROR = @NO_WERROR@
>>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>> LIBICONV = @LIBICONV@
>>>
>>> # these two are almost the same program
>>> @@ -256,7 +256,7 @@ objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>> strings_SOURCES = strings.c $(BULIBS)
>>>
>>> readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>>> -readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>> +readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>
>>> elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>> elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>>> diff --git a/binutils/Makefile.in b/binutils/Makefile.in
>>> index 6de4e239408..7d9039e0f2f 100644
>>> --- a/binutils/Makefile.in
>>> +++ b/binutils/Makefile.in
>>> @@ -651,8 +651,8 @@ am__skipyacc =
>>> # case both are empty.
>>> ZLIB = @zlibdir@ -lz
>>> ZLIBINC = @zlibinc@
>>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>
>>> # these two are almost the same program
>>> AR_PROG = ar
>>> @@ -790,7 +790,7 @@ size_SOURCES = size.c $(BULIBS)
>>> objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>> strings_SOURCES = strings.c $(BULIBS)
>>> readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>>> -readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>> +readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>> elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>> elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>>> strip_new_SOURCES = objcopy.c is-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>> diff --git a/binutils/readelf.c b/binutils/readelf.c
>>> index 351571c8abb..04cda213f33 100644
>>> --- a/binutils/readelf.c
>>> +++ b/binutils/readelf.c
>>> @@ -44,6 +44,9 @@
>>> #include <assert.h>
>>> #include <time.h>
>>> #include <zlib.h>
>>> +#ifdef HAVE_ZSTD
>>> +#include <zstd.h>
>>> +#endif
>>> #include <wchar.h>
>>>
>>> #if defined HAVE_MSGPACK
>>> @@ -15170,48 +15173,56 @@ get_section_contents (Elf_Internal_Shdr * section, Filedata * filedata)
>>> _("section contents"));
>>> }
>>>
>>> -/* Uncompresses a section that was compressed using zlib, in place. */
>>> +/* Uncompresses a section that was compressed using zlib/zstd, in place. */
>>>
>>> static bool
>>> -uncompress_section_contents (unsigned char **buffer,
>>> - uint64_t uncompressed_size,
>>> - uint64_t *size)
>>> +uncompress_section_contents (bool is_zstd, unsigned char **buffer,
>>> + uint64_t uncompressed_size, uint64_t *size)
>>> {
>>> uint64_t compressed_size = *size;
>>> unsigned char *compressed_buffer = *buffer;
>>> - unsigned char *uncompressed_buffer;
>>> + unsigned char *uncompressed_buffer = xmalloc (uncompressed_size);
>>> z_stream strm;
>>> int rc;
>>>
>>> - /* It is possible the section consists of several compressed
>>> - buffers concatenated together, so we uncompress in a loop. */
>>> - /* PR 18313: The state field in the z_stream structure is supposed
>>> - to be invisible to the user (ie us), but some compilers will
>>> - still complain about it being used without initialisation. So
>>> - we first zero the entire z_stream structure and then set the fields
>>> - that we need. */
>>> - memset (& strm, 0, sizeof strm);
>>> - strm.avail_in = compressed_size;
>>> - strm.next_in = (Bytef *) compressed_buffer;
>>> - strm.avail_out = uncompressed_size;
>>> - uncompressed_buffer = (unsigned char *) xmalloc (uncompressed_size);
>>> + if (is_zstd)
>>> + {
>>> +#ifdef HAVE_ZSTD
>>> + size_t ret = ZSTD_decompress (uncompressed_buffer, uncompressed_size,
>>> + compressed_buffer, compressed_size);
>>> + if (ZSTD_isError (ret))
>>> + goto fail;
>>> +#endif
>>> + }
>>> + else
>>> + {
>>> + /* It is possible the section consists of several compressed
>>> + buffers concatenated together, so we uncompress in a loop. */
>>> + /* PR 18313: The state field in the z_stream structure is supposed
>>> + to be invisible to the user (ie us), but some compilers will
>>> + still complain about it being used without initialisation. So
>>> + we first zero the entire z_stream structure and then set the fields
>>> + that we need. */
>>> + memset (&strm, 0, sizeof strm);
>>> + strm.avail_in = compressed_size;
>>> + strm.next_in = (Bytef *)compressed_buffer;
>>> + strm.avail_out = uncompressed_size;
>>>
>>> - rc = inflateInit (& strm);
>>> - while (strm.avail_in > 0)
>>> - {
>>> - if (rc != Z_OK)
>>> - break;
>>> - strm.next_out = ((Bytef *) uncompressed_buffer
>>> - + (uncompressed_size - strm.avail_out));
>>> - rc = inflate (&strm, Z_FINISH);
>>> - if (rc != Z_STREAM_END)
>>> - break;
>>> - rc = inflateReset (& strm);
>>> + rc = inflateInit (&strm);
>>> + while (strm.avail_in > 0)
>>> + {
>>> + if (rc != Z_OK)
>>> + break;
>>> + strm.next_out = ((Bytef *)uncompressed_buffer
>>> + + (uncompressed_size - strm.avail_out));
>>> + rc = inflate (&strm, Z_FINISH);
>>> + if (rc != Z_STREAM_END)
>>> + break;
>>> + rc = inflateReset (&strm);
>>> + }
>>> + if (inflateEnd (&strm) != Z_OK || rc != Z_OK || strm.avail_out != 0)
>>> + goto fail;
>>> }
>>> - if (inflateEnd (& strm) != Z_OK
>>> - || rc != Z_OK
>>> - || strm.avail_out != 0)
>>> - goto fail;
>>>
>>> *buffer = uncompressed_buffer;
>>> *size = uncompressed_size;
>>> @@ -15254,6 +15265,7 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>> {
>>> uint64_t new_size = num_bytes;
>>> uint64_t uncompressed_size = 0;
>>> + bool is_zstd = false;
>>>
>>> if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>> {
>>> @@ -15266,7 +15278,13 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>> by get_compression_header. */
>>> goto error_out;
>>>
>>> - if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>> + if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>> + ;
>>> +#ifdef HAVE_ZSTD
>>> + else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>> + is_zstd = true;
>>> +#endif
>>> + else
>>> {
>>> warn (_("section '%s' has unsupported compress type: %d\n"),
>>> printable_section_name (filedata, section), chdr.ch_type);
>>> @@ -15295,8 +15313,8 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>
>>> if (uncompressed_size)
>>> {
>>> - if (uncompress_section_contents (& start,
>>> - uncompressed_size, & new_size))
>>> + if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>> + &new_size))
>>> num_bytes = new_size;
>>> else
>>> {
>>> @@ -15470,6 +15488,7 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>> {
>>> uint64_t new_size = section_size;
>>> uint64_t uncompressed_size = 0;
>>> + bool is_zstd = false;
>>>
>>> if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>> {
>>> @@ -15482,7 +15501,13 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>> by get_compression_header. */
>>> goto error_out;
>>>
>>> - if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>> + if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>> + ;
>>> +#ifdef HAVE_ZSTD
>>> + else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>> + is_zstd = true;
>>> +#endif
>>> + else
>>> {
>>> warn (_("section '%s' has unsupported compress type: %d\n"),
>>> printable_section_name (filedata, section), chdr.ch_type);
>>> @@ -15511,8 +15536,8 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>
>>> if (uncompressed_size)
>>> {
>>> - if (uncompress_section_contents (& start, uncompressed_size,
>>> - & new_size))
>>> + if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>> + &new_size))
>>> {
>>> section_size = new_size;
>>> }
>>> @@ -15848,6 +15873,7 @@ load_specific_debug_section (enum dwarf_section_display_enum debug,
>>> unsigned char *start = section->start;
>>> uint64_t size = sec->sh_size;
>>> uint64_t uncompressed_size = 0;
>>> + bool is_zstd = false;
>>>
>>> if ((sec->sh_flags & SHF_COMPRESSED) != 0)
>>> {
>>> @@ -15869,7 +15895,13 @@ load_specific_debug_section (enum dwarf_section_display_enum debug,
>>> by get_compression_header. */
>>> return false;
>>>
>>> - if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>> + if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>> + ;
>>> +#ifdef HAVE_ZSTD
>>> + else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>> + is_zstd = true;
>>> +#endif
>>> + else
>>> {
>>> warn (_("section '%s' has unsupported compress type: %d\n"),
>>> section->name, chdr.ch_type);
>>> @@ -15898,7 +15930,7 @@ load_specific_debug_section (enum dwarf_section_display_enum debug,
>>>
>>> if (uncompressed_size)
>>> {
>>> - if (uncompress_section_contents (&start, uncompressed_size,
>>> + if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>> &size))
>>> {
>>> /* Free the compressed buffer, update the section buffer
>>
>> Hi.
>>
>> I noticed the following issue (dunno if caused by objcopy or the patches readelf) with this patch:
>>
>> $ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c -
>> $ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd
>> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>> readelf: Error: Unable to decompress section .debug_info
>> readelf: Error: No comp units in .debug_info section ?
>>
>> while zlib is fine:
>>
>> $ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zlib
>> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>>
>> Can you please take a look?
>
> objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd uses
> ch_type=1 (ELFCOMPRESS_ZLIB), but I don't know where goes wrong.
>
> Converting EI_CLASS looks very hacky to me.
Ok, I've got it. It's unrelated to '-O elf32-x86-64', but to the fact we move from zlib-gabi compression of a.o
to zstd for the objcopy output file.
Simpler reproducer:
$ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c - -Wa,--compress-debug-sections=zlib
$ /home/marxin/Programming/binutils/objdir//binutils/objcopy a.o a2.o --compress-debug-sections=zstd
$ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a2.o
readelf: Error: Unable to decompress section .debug_info
readelf: Error: No comp units in .debug_info section ?
As seen the .debug_info section in a2.o claims to be compressed:
$ readelf -SW a2.o | grep C
[ 5] .debug_info PROGBITS 0000000000000000 000043 000050 00 C 0 0 1
But it's not as it's not beneficial and the following code in in bfd/compress.c is taken:
/* If compression didn't make the section smaller, keep it uncompressed. */
if (compressed_size >= uncompressed_size)
{
memcpy (buffer, input_buffer, uncompressed_size);
sec->compress_status = COMPRESS_SECTION_NONE;
}
Apparently, we miss removal of the compression header :/ Moreover, the following is fine:
$ /home/marxin/Programming/binutils/objdir//binutils/objcopy a.o a2.o --compress-debug-sections=none
$ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a2.o >/dev/null
Lemme try preparing a fix.
Martin
More information about the Binutils
mailing list