This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: possible vfscanf problem?



Franz Sirl writes:

> diff -u -p -r1.80 vfscanf.c
> --- stdio-common/vfscanf.c      2000/08/23 16:53:42     1.80
> +++ stdio-common/vfscanf.c      2000/08/28 20:55:08
> @@ -102,7 +102,7 @@
>  #  undef EOF
>  #  define EOF            WEOF
>  # else
> -#  define ungetc(c, s) ((void) ((int) c == EOF                               \
> +#  define ungetc(c, s) ((void) ((int) (signed char) c == EOF                >                  \
>                                  || (--read_in,                               \
>                                      _IO_sputbackc (s, (unsigned char) c))))
>  #  define inchar()     (c == EOF ? EOF                                       \


I don't think this patch is correct. Casting a 'char' to 'unsigned char'
before comparing it with EOF is asking for trouble if c == 0xff
(in ISO-8859-1: Latin small letter y with diaeresis). The previous
ungetc macro was correct.

Here is another patch which

- backs out the troublesome cast,

- To avoid the warnings, adds an optimized macro 'ungetc_not_eof'. (It
  is a shame that gcc cries whenever it makes a clever optimization. Now
  we have to do the optimization by hand.)

- Before every comparison of a 'char' with 'int' (i.e. 'unsigned char'
  or EOF), casts the 'char' to 'unsigned char'.

- fixes the upper bound of the 'wp' array. It is accessed with an
  'unsigned char' index and therefore needs to have length 256, not 255.

Bruno


2000-08-29  Bruno Haible  <haible@clisp.cons.org>

	* stdio-common/vfscanf.c (_IO_vfscanf): Back out last ungetc change.
	When comparing a char with an int, always cast the char to
	'unsigned char'. New macro ungetc_not_eof, to avoid warnings when
	compiling with -funsigned-char.	Use UCHAR_MAX+1 instead of UCHAR_MAX.

*** glibc-cvs/stdio-common/vfscanf.c.bak	Tue Aug 29 13:08:21 2000
--- glibc-cvs/stdio-common/vfscanf.c	Tue Aug 29 21:43:08 2000
***************
*** 76,81 ****
--- 76,83 ----
  #  define ungetc(c, s)	((void) (c == WEOF				      \
  				 || (--read_in,				      \
  				     _IO_sputbackwc (s, c))))
+ #  define ungetc_not_eof(c, s)	((void) (--read_in,			      \
+ 					 _IO_sputbackwc (s, c)))
  #  define inchar()	(c == WEOF ? WEOF				      \
  			 : ((c = _IO_getwc_unlocked (s)),		      \
  			    (void) (c != WEOF && ++read_in), c))
***************
*** 102,110 ****
  #  undef EOF
  #  define EOF		  WEOF
  # else
! #  define ungetc(c, s)	((void) ((int) (signed char) c == EOF		      \
  				 || (--read_in,				      \
  				     _IO_sputbackc (s, (unsigned char) c))))
  #  define inchar()	(c == EOF ? EOF					      \
  			 : ((c = _IO_getc_unlocked (s)),		      \
  			    (void) (c != EOF && ++read_in), c))
--- 104,114 ----
  #  undef EOF
  #  define EOF		  WEOF
  # else
! #  define ungetc(c, s)	((void) ((int) c == EOF				      \
  				 || (--read_in,				      \
  				     _IO_sputbackc (s, (unsigned char) c))))
+ #  define ungetc_not_eof(c, s)	((void) (--read_in,			      \
+ 					 _IO_sputbackc (s, (unsigned char) c)))
  #  define inchar()	(c == EOF ? EOF					      \
  			 : ((c = _IO_getc_unlocked (s)),		      \
  			    (void) (c != EOF && ++read_in), c))
***************
*** 170,175 ****
--- 174,180 ----
    __libc_cleanup_region_end (0)
  #else
  # define ungetc(c, s)	((void) (c != EOF && --read_in), ungetc (c, s))
+ # define ungetc_not_eof(c, s)	(--read_in, (ungetc) (c, s))
  # define inchar()	(c == EOF ? EOF					      \
  			 : ((c = getc (s)), (void) (c != EOF && ++read_in), c))
  # define MEMCPY(d, s, n)  memcpy (d, s, n)
***************
*** 320,326 ****
        if (wpsize == wpmax)						    \
  	{								    \
  	  CHAR_T *old = wp;						    \
! 	  wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax;	    \
  	  wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t));		    \
  	  if (old != NULL)						    \
  	    MEMCPY (wp, old, wpsize);					    \
--- 325,331 ----
        if (wpsize == wpmax)						    \
  	{								    \
  	  CHAR_T *old = wp;						    \
! 	  wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax);  \
  	  wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t));		    \
  	  if (old != NULL)						    \
  	    MEMCPY (wp, old, wpsize);					    \
***************
*** 403,409 ****
  #endif
  
  #ifndef COMPILE_WSCANF
!       if (!isascii (*f))
  	{
  	  /* Non-ASCII, may be a multibyte.  */
  	  int len = __mbrlen (f, strlen (f), &state);
--- 408,414 ----
  #endif
  
  #ifndef COMPILE_WSCANF
!       if (!isascii ((unsigned char) *f))
  	{
  	  /* Non-ASCII, may be a multibyte.  */
  	  int len = __mbrlen (f, strlen (f), &state);
***************
*** 414,422 ****
  		  c = inchar ();
  		  if (c == EOF)
  		    input_error ();
! 		  else if (c != *f++)
  		    {
! 		      ungetc (c, s);
  		      conv_error ();
  		    }
  		}
--- 419,427 ----
  		  c = inchar ();
  		  if (c == EOF)
  		    input_error ();
! 		  else if (c != (unsigned char) *f++)
  		    {
! 		      ungetc_not_eof (c, s);
  		      conv_error ();
  		    }
  		}
***************
*** 475,485 ****
        wpsize = 0;
  
        /* Check for a positional parameter specification.  */
!       if (ISDIGIT (*f))
  	{
! 	  argpos = *f++ - L_('0');
! 	  while (ISDIGIT (*f))
! 	    argpos = argpos * 10 + (*f++ - L_('0'));
  	  if (*f == L_('$'))
  	    ++f;
  	  else
--- 480,490 ----
        wpsize = 0;
  
        /* Check for a positional parameter specification.  */
!       if (ISDIGIT ((UCHAR_T) *f))
  	{
! 	  argpos = (UCHAR_T) *f++ - L_('0');
! 	  while (ISDIGIT ((UCHAR_T) *f))
! 	    argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
  	  if (*f == L_('$'))
  	    ++f;
  	  else
***************
*** 509,523 ****
  	  }
  
        /* We have seen width. */
!       if (ISDIGIT (*f))
  	flags |= WIDTH;
  
        /* Find the maximum field width.  */
        width = 0;
!       while (ISDIGIT (*f))
  	{
  	  width *= 10;
! 	  width += *f++ - L_('0');
  	}
      got_width:
        if (width == 0)
--- 514,528 ----
  	  }
  
        /* We have seen width. */
!       if (ISDIGIT ((UCHAR_T) *f))
  	flags |= WIDTH;
  
        /* Find the maximum field width.  */
        width = 0;
!       while (ISDIGIT ((UCHAR_T) *f))
  	{
  	  width *= 10;
! 	  width += (UCHAR_T) *f++ - L_('0');
  	}
      got_width:
        if (width == 0)
***************
*** 617,623 ****
  	    input_error ();
  	  if (c != fc)
  	    {
! 	      ungetc (c, s);
  	      conv_error ();
  	    }
  	  break;
--- 622,628 ----
  	    input_error ();
  	  if (c != fc)
  	    {
! 	      ungetc_not_eof (c, s);
  	      conv_error ();
  	    }
  	  break;
***************
*** 837,843 ****
  		{
  		  if (ISSPACE (c))
  		    {
! 		      ungetc (c, s);
  		      break;
  		    }
  
--- 842,848 ----
  		{
  		  if (ISSPACE (c))
  		    {
! 		      ungetc_not_eof (c, s);
  		      break;
  		    }
  
***************
*** 937,943 ****
  	      if (!(flags & SUPPRESS))
  		{
  #ifdef COMPILE_WSCANF
! 		  /* We have to emit the code to get into the intial
  		     state.  */
  		  char buf[MB_LEN_MAX];
  		  size_t n = __wcrtomb (buf, L'\0', &state);
--- 942,948 ----
  	      if (!(flags & SUPPRESS))
  		{
  #ifdef COMPILE_WSCANF
! 		  /* We have to emit the code to get into the initial
  		     state.  */
  		  char buf[MB_LEN_MAX];
  		  size_t n = __wcrtomb (buf, L'\0', &state);
***************
*** 1002,1008 ****
  	      {
  		if (ISSPACE (c))
  		  {
! 		    ungetc (c, s);
  		    break;
  		  }
  
--- 1007,1013 ----
  	      {
  		if (ISSPACE (c))
  		  {
! 		    ungetc_not_eof (c, s);
  		    break;
  		  }
  
***************
*** 1263,1269 ****
  			mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
  
  		      cmpp = mbdigits[n];
! 		      while (*cmpp == c && avail > 0)
  			{
  			  if (*++cmpp == '\0')
  			    break;
--- 1268,1274 ----
  			mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
  
  		      cmpp = mbdigits[n];
! 		      while ((unsigned char) *cmpp == c && avail > 0)
  			{
  			  if (*++cmpp == '\0')
  			    break;
***************
*** 1288,1295 ****
  			{
  			  ungetc (c, s);
  			  while (--cmpp > mbdigits[n])
! 			    ungetc (*cmpp, s);
! 			  c = *cmpp;
  			}
  
  		      /* Advance the pointer to the next string.  */
--- 1293,1300 ----
  			{
  			  ungetc (c, s);
  			  while (--cmpp > mbdigits[n])
! 			    ungetc_not_eof ((unsigned char) *cmpp, s);
! 			  c = (unsigned char) *cmpp;
  			}
  
  		      /* Advance the pointer to the next string.  */
***************
*** 1316,1322 ****
  			      int avail = width > 0 ? width : INT_MAX;
  
  			      cmpp = mbdigits[n];
! 			      while (*cmpp == c && avail > 0)
  				{
  				  if (*++cmpp == '\0')
  				    break;
--- 1321,1327 ----
  			      int avail = width > 0 ? width : INT_MAX;
  
  			      cmpp = mbdigits[n];
! 			      while ((unsigned char) *cmpp == c && avail > 0)
  				{
  				  if (*++cmpp == '\0')
  				    break;
***************
*** 1340,1347 ****
  				{
  				  ungetc (c, s);
  				  while (--cmpp > mbdigits[n])
! 				    ungetc (*cmpp, s);
! 				  c = *cmpp;
  				}
  
  			      /* Advance the pointer to the next string.  */
--- 1345,1352 ----
  				{
  				  ungetc (c, s);
  				  while (--cmpp > mbdigits[n])
! 				    ungetc_not_eof ((unsigned char) *cmpp, s);
! 				  c = (unsigned char) *cmpp;
  				}
  
  			      /* Advance the pointer to the next string.  */
***************
*** 1377,1383 ****
  		      const char *cmpp = thousands;
  		      int avail = width > 0 ? width : INT_MAX;
  
! 		      while (*cmpp == c && avail > 0)
  			{
  			  ADDW (c);
  			  if (*++cmpp == '\0')
--- 1382,1388 ----
  		      const char *cmpp = thousands;
  		      int avail = width > 0 ? width : INT_MAX;
  
! 		      while ((unsigned char) *cmpp == c && avail > 0)
  			{
  			  ADDW (c);
  			  if (*++cmpp == '\0')
***************
*** 1398,1405 ****
  			      wpsize -= cmpp - thousands;
  			      ungetc (c, s);
  			      while (--cmpp > thousands)
! 				ungetc (*cmpp, s);
! 			      c = *cmpp;
  			    }
  			  break;
  			}
--- 1403,1410 ----
  			      wpsize -= cmpp - thousands;
  			      ungetc (c, s);
  			      while (--cmpp > thousands)
! 				ungetc_not_eof ((unsigned char) *cmpp, s);
! 			      c = (unsigned char) *cmpp;
  			    }
  			  break;
  			}
***************
*** 1449,1455 ****
  			const char *cmpp = thousands;
  			int avail = width > 0 ? width : INT_MAX;
  
! 			while (*cmpp == c && avail > 0)
  			  {
  			    ADDW (c);
  			    if (*++cmpp == '\0')
--- 1454,1460 ----
  			const char *cmpp = thousands;
  			int avail = width > 0 ? width : INT_MAX;
  
! 			while ((unsigned char) *cmpp == c && avail > 0)
  			  {
  			    ADDW (c);
  			    if (*++cmpp == '\0')
***************
*** 1470,1477 ****
  				wpsize -= cmpp - thousands;
  				ungetc (c, s);
  				while (--cmpp > thousands)
! 				  ungetc (*cmpp, s);
! 				c = *cmpp;
  			      }
  			    break;
  			  }
--- 1475,1482 ----
  				wpsize -= cmpp - thousands;
  				ungetc (c, s);
  				while (--cmpp > thousands)
! 				  ungetc_not_eof ((unsigned char) *cmpp, s);
! 				c = (unsigned char) *cmpp;
  			      }
  			    break;
  			  }
***************
*** 1611,1617 ****
  		  const char *cmpp = decimal;
  		  int avail = width > 0 ? width : INT_MAX;
  
! 		  while (*cmpp == c && avail > 0)
  		    if (*++cmpp == '\0')
  		      break;
  		    else
--- 1616,1622 ----
  		  const char *cmpp = decimal;
  		  int avail = width > 0 ? width : INT_MAX;
  
! 		  while ((unsigned char) *cmpp == c && avail > 0)
  		    if (*++cmpp == '\0')
  		      break;
  		    else
***************
*** 1629,1635 ****
  			  ungetc (c, s);
  			  if (cmpp == decimal)
  			    break;
! 			  c = *--cmpp;
  			}
  
  		      conv_error ();
--- 1634,1640 ----
  			  ungetc (c, s);
  			  if (cmpp == decimal)
  			    break;
! 			  c = (unsigned char) *--cmpp;
  			}
  
  		      conv_error ();
***************
*** 1779,1785 ****
  
  		  if (! got_dot)
  		    {
! 		      while (*cmpp == c && avail > 0)
  			if (*++cmpp == '\0')
  			  break;
  			else
--- 1784,1790 ----
  
  		  if (! got_dot)
  		    {
! 		      while ((unsigned char) *cmpp == c && avail > 0)
  			if (*++cmpp == '\0')
  			  break;
  			else
***************
*** 1794,1800 ****
  		    {
  		      /* Add all the characters.  */
  		      for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
! 			ADDW (*cmpp);
  		      if (width > 0)
  			width = avail;
  		      got_dot = 1;
--- 1799,1805 ----
  		    {
  		      /* Add all the characters.  */
  		      for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
! 			ADDW ((unsigned char) *cmpp);
  		      if (width > 0)
  			width = avail;
  		      got_dot = 1;
***************
*** 1815,1821 ****
  			    ++cmp2p;
  			  if (cmp2p == cmpp)
  			    {
! 			      while (*cmp2p == c && avail > 0)
  				if (*++cmp2p == '\0')
  				  break;
  				else
--- 1820,1826 ----
  			    ++cmp2p;
  			  if (cmp2p == cmpp)
  			    {
! 			      while ((unsigned char) *cmp2p == c && avail > 0)
  				if (*++cmp2p == '\0')
  				  break;
  				else
***************
*** 1831,1837 ****
  			{
  			  /* Add all the characters.  */
  			  for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
! 			    ADDW (*cmpp);
  			  if (width > 0)
  			    width = avail;
  			}
--- 1836,1842 ----
  			{
  			  /* Add all the characters.  */
  			  for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
! 			    ADDW ((unsigned char) *cmpp);
  			  if (width > 0)
  			    width = avail;
  			}
***************
*** 1924,1935 ****
  #else
  	  /* Fill WP with byte flags indexed by character.
  	     We will use this flag map for matching input characters.  */
! 	  if (wpmax < UCHAR_MAX)
  	    {
! 	      wpmax = UCHAR_MAX;
  	      wp = (char *) alloca (wpmax);
  	    }
! 	  memset (wp, '\0', UCHAR_MAX);
  
  	  fc = *f;
  	  if (fc == ']' || fc == '-')
--- 1929,1940 ----
  #else
  	  /* Fill WP with byte flags indexed by character.
  	     We will use this flag map for matching input characters.  */
! 	  if (wpmax < UCHAR_MAX + 1)
  	    {
! 	      wpmax = UCHAR_MAX + 1;
  	      wp = (char *) alloca (wpmax);
  	    }
! 	  memset (wp, '\0', UCHAR_MAX + 1);
  
  	  fc = *f;
  	  if (fc == ']' || fc == '-')
***************
*** 1947,1953 ****
  	      {
  		/* Add all characters from the one before the '-'
  		   up to (but not including) the next format char.  */
! 		for (fc = f[-2]; fc < *f; ++fc)
  		  wp[fc] = 1;
  	      }
  	    else
--- 1952,1958 ----
  	      {
  		/* Add all characters from the one before the '-'
  		   up to (but not including) the next format char.  */
! 		for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
  		  wp[fc] = 1;
  	      }
  	    else
***************
*** 2069,2075 ****
  
  		  if (wp[c] == not_in)
  		    {
! 		      ungetc (c, s);
  		      break;
  		    }
  
--- 2074,2080 ----
  
  		  if (wp[c] == not_in)
  		    {
! 		      ungetc_not_eof (c, s);
  		      break;
  		    }
  
***************
*** 2270,2276 ****
  
  		  if (wp[c] == not_in)
  		    {
! 		      ungetc (c, s);
  		      break;
  		    }
  
--- 2275,2281 ----
  
  		  if (wp[c] == not_in)
  		    {
! 		      ungetc_not_eof (c, s);
  		      break;
  		    }
  
***************
*** 2323,2329 ****
  	      if (!(flags & SUPPRESS))
  		{
  #ifdef COMPILE_WSCANF
! 		  /* We have to emit the code to get into the intial
  		     state.  */
  		  char buf[MB_LEN_MAX];
  		  size_t n = __wcrtomb (buf, L'\0', &state);
--- 2328,2334 ----
  	      if (!(flags & SUPPRESS))
  		{
  #ifdef COMPILE_WSCANF
! 		  /* We have to emit the code to get into the initial
  		     state.  */
  		  char buf[MB_LEN_MAX];
  		  size_t n = __wcrtomb (buf, L'\0', &state);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]