[Patch] multibyte encodings in strings

Alan Modra amodra@gmail.com
Sat Dec 8 02:51:00 GMT 2018


On Fri, Nov 09, 2018 at 12:49:28PM +0000, Nick Clifton wrote:
> Hi Mathias,
> 
> > I am sending you a patch fixing an issue in the binary 'strings'.
> > The issue concerned finding multibyte encoded strings at odd offsets.
> 
> There is one issue however that I would like to check with you.
> The original (unpatched) decoding of the test binary that you
> supplied produces the output: "String2" whereas the patched
> version of string produces: "String1" and "tring2".  Is this
> correct ?  I was kind of expecting the output to be "String1"
> and "String2".

Yeah, the output should be exactly "String1\nString2".  Anything else
is broken.  I saw a blank line output before the expected output, and
in one other instance:

in
String1
String2

git commit 71f5e3f7b624 obviously wasn't tested on a big-endian host,
and the test fail message resulted in tcl errors.  This patch should
fix the issues I found.

	* strings.c (unget_part_char): New function.
	(print_strings): Use unget_part_char.  Formatting.
	* testsuite/binutils-all/strings.exp (test_multibyte): Don't
	use square brackets in fail message.  Expect "String1\nString2".

diff --git a/binutils/strings.c b/binutils/strings.c
index e1fecc0932..fedfee9057 100644
--- a/binutils/strings.c
+++ b/binutils/strings.c
@@ -502,6 +502,57 @@ get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
 
   return r;
 }
+
+/* Throw away one byte of a (possibly) multi-byte char C, updating
+   address and buffer to suit.  */
+
+static void
+unget_part_char (long c, file_ptr *address, int *magiccount, char **magic)
+{
+  static char tmp[4];
+
+  if (encoding_bytes > 1)
+    {
+      *address -= encoding_bytes - 1;
+
+      if (*magiccount == 0)
+	{
+	  /* If no magic buffer exists, use temp buffer.  */
+	  switch (encoding)
+	    {
+	    default:
+	      break;
+	    case 'b':
+	      tmp[0] = c & 0xff;
+	      *magiccount = 1;
+	      break;
+	    case 'l':
+	      tmp[0] = (c >> 8) & 0xff;
+	      *magiccount = 1;
+	      break;
+	    case 'B':
+	      tmp[0] = (c >> 16) & 0xff;
+	      tmp[1] = (c >> 8) & 0xff;
+	      tmp[2] = c & 0xff;
+	      *magiccount = 3;
+	      break;
+	    case 'L':
+	      tmp[0] = (c >> 8) & 0xff;
+	      tmp[1] = (c >> 16) & 0xff;
+	      tmp[2] = (c >> 24) & 0xff;
+	      *magiccount = 3;
+	      break;
+	    }
+	  *magic = tmp;
+	}
+      else
+	{
+	  /* If magic buffer exists, rewind.  */
+	  *magic -= encoding_bytes - 1;
+	  *magiccount += encoding_bytes - 1;
+	}
+    }
+}
 
 /* Find the strings in file FILENAME, read from STREAM.
    Assume that STREAM is positioned so that the next byte read
@@ -543,43 +594,8 @@ print_strings (const char *filename, FILE *stream, file_ptr address,
 
 	  if (! STRING_ISGRAPHIC (c))
 	    {
-	      /* Found a non-graphic. Try again starting with next char.  */
-	      if (encoding_bytes > 1)
-		{
-		  /* In case of multibyte encodings rewind using magic buffer.  */
-		  if (magiccount == 0)
-		    {
-		      /* If no magic buffer exists: use memory of c.  */
-		      switch (encoding)
-			{
-			default:
-			  break;
-			case 'b':
-			  c = c & 0xff;
-			  magiccount += 1;
-			  break;
-			case 'l':
-			case 'L':
-			  c = c >> 8;
-			  magiccount += (encoding_bytes -1);
-			  break;
-			case 'B':
-			  c = (( c & 0xff0000) >> 16) | ( c & 0xff00)
-			    | (( c & 0xff) << 16);
-			  magiccount += 3;
-			  break;
-			}
-		      magic = (char *) &c;
-		    }
-		  else
-		    {
-		      /* If magic buffer exists: rewind.  */
-		      magic = magic - (encoding_bytes -1);
-		    }
-
-		  address = address - (encoding_bytes -1);
-		}
-
+	      /* Found a non-graphic.  Try again starting with next byte.  */
+	      unget_part_char (c, &address, &magiccount, &magic);
 	      goto tryline;
 	    }
 	  buf[i] = c;
@@ -598,18 +614,18 @@ print_strings (const char *filename, FILE *stream, file_ptr address,
 	    if (sizeof (start) > sizeof (long))
 	      {
 # ifndef __MSVCRT__
-	        printf ("%7llo ", (unsigned long long) start);
+		printf ("%7llo ", (unsigned long long) start);
 # else
-	        printf ("%7I64o ", (unsigned long long) start);
+		printf ("%7I64o ", (unsigned long long) start);
 # endif
 	      }
 	    else
 #elif !BFD_HOST_64BIT_LONG
-	    if (start != (unsigned long) start)
-	      printf ("++%7lo ", (unsigned long) start);
-	    else
+	      if (start != (unsigned long) start)
+		printf ("++%7lo ", (unsigned long) start);
+	      else
 #endif
-	      printf ("%7lo ", (unsigned long) start);
+		printf ("%7lo ", (unsigned long) start);
 	    break;
 
 	  case 10:
@@ -617,18 +633,18 @@ print_strings (const char *filename, FILE *stream, file_ptr address,
 	    if (sizeof (start) > sizeof (long))
 	      {
 # ifndef __MSVCRT__
-	        printf ("%7lld ", (unsigned long long) start);
+		printf ("%7lld ", (unsigned long long) start);
 # else
-	        printf ("%7I64d ", (unsigned long long) start);
+		printf ("%7I64d ", (unsigned long long) start);
 # endif
 	      }
 	    else
 #elif !BFD_HOST_64BIT_LONG
-	    if (start != (unsigned long) start)
-	      printf ("++%7ld ", (unsigned long) start);
-	    else
+	      if (start != (unsigned long) start)
+		printf ("++%7ld ", (unsigned long) start);
+	      else
 #endif
-	      printf ("%7ld ", (long) start);
+		printf ("%7ld ", (long) start);
 	    break;
 
 	  case 16:
@@ -636,19 +652,19 @@ print_strings (const char *filename, FILE *stream, file_ptr address,
 	    if (sizeof (start) > sizeof (long))
 	      {
 # ifndef __MSVCRT__
-	        printf ("%7llx ", (unsigned long long) start);
+		printf ("%7llx ", (unsigned long long) start);
 # else
-	        printf ("%7I64x ", (unsigned long long) start);
+		printf ("%7I64x ", (unsigned long long) start);
 # endif
 	      }
 	    else
 #elif !BFD_HOST_64BIT_LONG
-	    if (start != (unsigned long) start)
-	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
-		      (unsigned long) (start & 0xffffffff));
-	    else
+	      if (start != (unsigned long) start)
+		printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
+			(unsigned long) (start & 0xffffffff));
+	      else
 #endif
-	      printf ("%7lx ", (unsigned long) start);
+		printf ("%7lx ", (unsigned long) start);
 	    break;
 	  }
 
@@ -662,49 +678,16 @@ print_strings (const char *filename, FILE *stream, file_ptr address,
 	    break;
 	  if (! STRING_ISGRAPHIC (c))
 	    {
-	      if (encoding_bytes > 1)
-		{
-		  /* In case of multibyte encodings rewind using magic buffer.  */
-		  if (magiccount == 0)
-		    {
-		      /* If no magic buffer exists: use memory of c.  */
-		      switch (encoding)
-			{
-			default:
-			  break;
-			case 'b':
-			  c = c & 0xff;
-			  magiccount += 1;
-			  break;
-			case 'l':
-			case 'L':
-			  c = c >> 8;
-			  magiccount += (encoding_bytes -1);
-			  break;
-			case 'B':
-			  c = (( c & 0xff0000) >> 16) | ( c & 0xff00)
-			    | (( c & 0xff) << 16);
-			  magiccount += 3;
-			  break;
-			}
-		      magic = (char *) &c;
-		    }
-		  else
-		    {
-		      /* If magic buffer exists: rewind.  */
-		      magic = magic - (encoding_bytes -1);
-		    }
-		  address = address - (encoding_bytes -1);
-		}
+	      unget_part_char (c, &address, &magiccount, &magic);
 	      break;
 	    }
 	  putchar (c);
 	}
 
       if (output_separator)
-        fputs (output_separator, stdout);
+	fputs (output_separator, stdout);
       else
-        putchar ('\n');
+	putchar ('\n');
     }
   free (buf);
 }
diff --git a/binutils/testsuite/binutils-all/strings.exp b/binutils/testsuite/binutils-all/strings.exp
index c4bbf69181..83bd2113bb 100644
--- a/binutils/testsuite/binutils-all/strings.exp
+++ b/binutils/testsuite/binutils-all/strings.exp
@@ -4,12 +4,12 @@
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
-# 
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
@@ -17,19 +17,13 @@
 proc test_multibyte {testfile} {
     global STRINGS
     global STRINGSFLAGS
-    
+
     set testname "strings: decoding little-endian multibyte strings"
     set got [binutils_run $STRINGS "$STRINGSFLAGS --encoding=l $testfile"]
 
-    set want "String1"
+    set want "String1\nString2"
     if ![regexp $want $got] then {
-	fail "$testname [String1]"
-	return
-    }
-
-    set want "tring2"
-    if ![regexp $want $got] then {
-	fail "$testname [tring2]"
+	fail "$testname"
 	return
     }
 
@@ -37,5 +31,3 @@ proc test_multibyte {testfile} {
 }
 
 test_multibyte $srcdir/$subdir/strings-1.bin
-
-

-- 
Alan Modra
Australia Development Lab, IBM



More information about the Binutils mailing list