This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
RFC: Displaying multibyte symbol names in readelf
- From: Nick Clifton <nickc at redhat dot com>
- To: binutils at sourceware dot org
- Date: Wed, 23 May 2012 12:14:09 +0100
- Subject: RFC: Displaying multibyte symbol names in readelf
Hi Guys,
I am planning to apply the patch below to enhance readelf so that it
can display symbol names that contain multibyte characters. Currently
such characters are displayed as a sequence of hex-bytes. With the
patch applied the actual multibyte characters will be displayed
provided that the user's terminal and environment settings support
them.
Does anyone have any objections or questions concerning adding such a
feature ?
Cheers
Nick
binutils/ChangeLog
2012-05-23 Nick Clifton <nickc@redhat.com>
* readelf.c (print_symbol): Display multibyte characters.
Index: binutils/readelf.c
===================================================================
RCS file: /cvs/src/src/binutils/readelf.c,v
retrieving revision 1.573
diff -u -3 -p -r1.573 readelf.c
--- binutils/readelf.c 15 May 2012 12:55:49 -0000 1.573
+++ binutils/readelf.c 23 May 2012 07:33:30 -0000
@@ -48,6 +48,7 @@
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
+#include <wchar.h>
#if __GNUC__ >= 2
/* Define BFD64 here, even if our default architecture is 32 bit ELF
@@ -383,25 +384,27 @@ print_vma (bfd_vma vma, print_mode mode)
return 0;
}
-/* Display a symbol on stdout. Handles the display of non-printing characters.
+/* Display a symbol on stdout. Handles the display of control characters and
+ multibye characters.
If DO_WIDE is not true then format the symbol to be at most WIDTH characters,
truncating as necessary. If WIDTH is negative then format the string to be
- exactly - WIDTH characters, truncating or padding as necessary.
+ exactly (- WIDTH) characters, truncating or padding as necessary.
Returns the number of emitted characters. */
static unsigned int
print_symbol (int width, const char *symbol)
{
- const char *c;
bfd_boolean extra_padding = FALSE;
unsigned int num_printed = 0;
+ bfd_boolean multibyte_ok;
+ mbstate_t state;
if (do_wide)
{
- /* Set the width to a very large value. This simplifies the
- code below. */
+ /* Set the width to a very large value.
+ This simplifies the code below. */
width = INT_MAX;
}
else if (width < 0)
@@ -411,65 +414,68 @@ print_symbol (int width, const char *sym
extra_padding = TRUE;
}
+ /* Check to see that we can display the characters in the symbol.
+ This test can fail if the name is encoded in a character set
+ that does not match the current LC_LANG setting. */
+ multibyte_ok = (mbstowcs (NULL, symbol, width) != (size_t) -1);
+ /* Initialise the multibyte conversion state. */
+ memset (& state, 0, sizeof (state));
+
while (width)
{
- int len;
-
- c = symbol;
-
- /* Look for non-printing symbols inside the symbol's name.
- This test is triggered in particular by the names generated
- by the assembler for local labels. */
- while (ISPRINT (*c))
- c++;
-
- len = c - symbol;
-
- if (len)
- {
- if (len > width)
- len = width;
-
- printf ("%.*s", len, symbol);
+ const char c = *symbol++;
- width -= len;
- num_printed += len;
- }
-
- if (*c == 0 || width == 0)
+ if (c == 0)
break;
- /* Now display the non-printing character, if
- there is room left in which to dipslay it. */
- if ((unsigned char) *c < 32)
+ /* mbrtowcs might accept control characters, but we do not
+ want to print them as that can affect terminal settings.
+ Control characters usually appear in the names generated
+ by the assembler for local labels. */
+ if (ISCNTRL (c))
{
if (width < 2)
break;
- printf ("^%c", *c + 0x40);
-
+ printf ("^%c", c + 0x40);
width -= 2;
num_printed += 2;
}
- else
+ else if (multibyte_ok)
+ {
+ size_t n;
+ wchar_t w;
+
+ n = mbrtowc (& w, symbol - 1, MB_CUR_MAX, & state);
+ assert (n > 0);
+ printf ("%lc", (wint_t) w);
+ width -= 1;
+ num_printed += 1;
+ /* Move symbol past the number of bytes consumed. */
+ symbol += n - 1;
+ }
+ else if (ISPRINT (c))
+ {
+ putchar (c);
+ width --;
+ num_printed ++;
+ }
+ else /* If all else fail, print in hex. */
{
if (width < 6)
break;
- printf ("<0x%.2x>", (unsigned char) *c);
-
+ printf ("<0x%.2x>", (unsigned char) c);
width -= 6;
num_printed += 6;
}
-
- symbol = c + 1;
}
if (extra_padding && width > 0)
{
/* Fill in the remaining spaces. */
printf ("%-*s", width, " ");
- num_printed += 2;
+ num_printed += width;
}
return num_printed;