RFC: Displaying multibyte symbol names in readelf

Nick Clifton nickc@redhat.com
Wed May 23 11:18:00 GMT 2012


Hi Guys,

  I am planning to apply the patch below to enhance readelf so that it
  can display symbol names that contain multibyte characters.  Currently
  such characters are displayed as a sequence of hex-bytes.  With the
  patch applied the actual multibyte characters will be displayed
  provided that the user's terminal and environment settings support
  them.

  Does anyone have any objections or questions concerning adding such a
  feature ?

Cheers
  Nick
    
binutils/ChangeLog
2012-05-23  Nick Clifton  <nickc@redhat.com>

	* readelf.c (print_symbol): Display multibyte characters.
  
Index: binutils/readelf.c
===================================================================
RCS file: /cvs/src/src/binutils/readelf.c,v
retrieving revision 1.573
diff -u -3 -p -r1.573 readelf.c
--- binutils/readelf.c	15 May 2012 12:55:49 -0000	1.573
+++ binutils/readelf.c	23 May 2012 07:33:30 -0000
@@ -48,6 +48,7 @@
 #ifdef HAVE_ZLIB_H
 #include <zlib.h>
 #endif
+#include <wchar.h>
 
 #if __GNUC__ >= 2
 /* Define BFD64 here, even if our default architecture is 32 bit ELF
@@ -383,25 +384,27 @@ print_vma (bfd_vma vma, print_mode mode)
   return 0;
 }
 
-/* Display a symbol on stdout.  Handles the display of non-printing characters.
+/* Display a symbol on stdout.  Handles the display of control characters and
+   multibye characters.
 
    If DO_WIDE is not true then format the symbol to be at most WIDTH characters,
    truncating as necessary.  If WIDTH is negative then format the string to be
-   exactly - WIDTH characters, truncating or padding as necessary.
+   exactly (- WIDTH) characters, truncating or padding as necessary.
 
    Returns the number of emitted characters.  */
 
 static unsigned int
 print_symbol (int width, const char *symbol)
 {
-  const char *c;
   bfd_boolean extra_padding = FALSE;
   unsigned int num_printed = 0;
+  bfd_boolean multibyte_ok;
+  mbstate_t state;
 
   if (do_wide)
     {
-      /* Set the width to a very large value.  This simplifies the
-	 code below.  */
+      /* Set the width to a very large value.
+	 This simplifies the code below.  */
       width = INT_MAX;
     }
   else if (width < 0)
@@ -411,65 +414,68 @@ print_symbol (int width, const char *sym
       extra_padding = TRUE;
     }
 
+  /* Check to see that we can display the characters in the symbol.
+     This test can fail if the name is encoded in a character set
+     that does not match the current LC_LANG setting.  */
+  multibyte_ok = (mbstowcs (NULL, symbol, width) != (size_t) -1);
+  /* Initialise the multibyte conversion state.  */
+  memset (& state, 0, sizeof (state));
+
   while (width)
     {
-      int len;
-
-      c = symbol;
-
-      /* Look for non-printing symbols inside the symbol's name.
-	 This test is triggered in particular by the names generated
-	 by the assembler for local labels.  */
-      while (ISPRINT (*c))
-	c++;
-
-      len = c - symbol;
-
-      if (len)
-	{
-	  if (len > width)
-	    len = width;
-
-	  printf ("%.*s", len, symbol);
+      const char c = *symbol++;
 
-	  width -= len;
-	  num_printed += len;
-	}
-
-      if (*c == 0 || width == 0)
+      if (c == 0)
 	break;
 
-      /* Now display the non-printing character, if
-	 there is room left in which to dipslay it.  */
-      if ((unsigned char) *c < 32)
+      /* mbrtowcs might accept control characters, but we do not
+	 want to print them as that can affect terminal settings.
+	 Control characters usually appear in the names generated
+	 by the assembler for local labels.  */
+      if (ISCNTRL (c))
 	{
 	  if (width < 2)
 	    break;
 
-	  printf ("^%c", *c + 0x40);
-
+	  printf ("^%c", c + 0x40);
 	  width -= 2;
 	  num_printed += 2;
 	}
-      else
+      else if (multibyte_ok)
+	{
+	  size_t  n;
+	  wchar_t w;
+   
+	  n = mbrtowc (& w, symbol - 1, MB_CUR_MAX, & state);
+	  assert (n > 0);
+	  printf ("%lc", (wint_t) w);
+	  width -= 1;
+	  num_printed += 1;
+	  /* Move symbol past the number of bytes consumed.  */
+	  symbol += n - 1;
+	}
+      else if (ISPRINT (c))
+	{
+	  putchar (c);
+	  width --;
+	  num_printed ++;
+	}
+      else /* If all else fail, print in hex.  */
 	{
 	  if (width < 6)
 	    break;
 
-	  printf ("<0x%.2x>", (unsigned char) *c);
-
+	  printf ("<0x%.2x>", (unsigned char) c);
 	  width -= 6;
 	  num_printed += 6;
 	}
-
-      symbol = c + 1;
     }
 
   if (extra_padding && width > 0)
     {
       /* Fill in the remaining spaces.  */
       printf ("%-*s", width, " ");
-      num_printed += 2;
+      num_printed += width;
     }
 
   return num_printed;



More information about the Binutils mailing list