scanf and "-0x", "-nan", "inf"

Jeff Johnston jjohnstn@redhat.com
Wed May 23 20:52:00 GMT 2007


Ok. 

-- Jeff J.

Eric Blake wrote:
> A couple more scanf bugs.  OK to apply this?  (and ping on my other scanf patch 
> for pos args)
>
> sscanf("-nan", "%e", &float1) was failing, instead of setting float1 to NaN
>
> sscanf("-0x", "%i%c", &int1, &ch) was failing, instead of setting int1 to 0 and 
> ch to 'x'
>
> sscanf("infinity", "%e", &float1) was failing, instead of setting float1 to 
> infinity
>
> This patch also fixes a bug only triggered by %S in multibyte locales where 
> 0xff is considered part of an incomplete multibyte whitespace sequence and 
> where char is signed (is there such a locale?).  The code lacked a cast to 
> unsigned char, so it was calling the equivalent of ungetc(EOF) instead of the 
> intended ungetc(0xff).  I audited all the other uses of ungetc to ensure that 
> no cast was necessary, because the only thing we could possibly be unget'ting 
> were 7-bit ASCII characters.
>
> Bugs that I am still aware of:
>
> sscanf("nan():", "%e%c", &float1, &ch) populates ch with '(' instead of ':' 
> (ie. n-char-sequence nans are not parsed)
>
> sscanf("33554430.999999999999", "%e", &float1) populates float1 with 33554432 
> instead of 33554430 (ie. double-rounding occurred because strtod was used, but 
> even fixing scanf to use strtof depends on strtof to be fixed to do the parse 
> itself rather than wrapping strtod)
>
> sscanf("0x1p0", "%e", &float1) populates float1 with 0 instead of 1 (ie. hex 
> floats are not parsed) - could be made conditional on --enable-newlib-io-c99-
> formats
>
> 2007-05-22  Eric Blake  <ebb9@byu.net>
>
> 	* libc/stdio/vfscanf.c (__SVFSCANF_R): Fix %i scanning of "-0x".
> 	Support "-nan" and "inf" for %e.  Audit usage of ungetc to fix bug
> 	in %S in multibyte locales.
>
> --- libc/stdio/vfscanf.c	21 May 2007 12:39:03 -0000
> +++ libc/stdio/vfscanf.c	22 May 2007 14:46:02 -0000
> @@ -825,10 +825,10 @@
>                      *wcp = L'\0';
>                    if (mbslen != (size_t)-2) /* Incomplete sequence */
>                      {
> -                      if (iswspace(*wcp)) 
> +                      if (iswspace(*wcp))
>                          {
>                            while (n != 0)
> -                            ungetc (buf[--n], fp);
> +                            ungetc ((unsigned char) buf[--n], fp);
>                            break;
>                          }
>                        nread += n;
> @@ -987,15 +987,15 @@
>  		    }
>  		  break;
>  
> -		  /* x ok iff flag still set & 2nd char */
> +		  /* x ok iff flag still set & single 0 seen */
>  		case 'x':
>  		case 'X':
> -		  if (flags & PFXOK && p == buf + 1)
> +		  if ((flags & (PFXOK | NZDIGITS)) == PFXOK)
>  		    {
>  		      base = 16;/* if %i */
>  		      flags &= ~PFXOK;
>  		      /* We must reset the NZDIGITS and NDIGITS
> -		         flags that would have been unset by seeing
> +			 flags that would have been unset by seeing
>  			 the zero that preceded the X or x.  */
>  		      flags |= NZDIGITS | NDIGITS;
>  		      goto ok;
> @@ -1024,18 +1024,16 @@
>  	   * If we had only a sign, it is no good; push back the sign.
>  	   * If the number ends in `x', it was [sign] '0' 'x', so push back
>  	   * the x and treat it as [sign] '0'.
> +	   * Use of ungetc here and below assumes ASCII encoding; we are only
> +	   * pushing back 7-bit characters, so casting to unsigned char is
> +	   * not necessary.
>  	   */
>  	  if (flags & NDIGITS)
>  	    {
>  	      if (p > buf)
> -		_CAST_VOID ungetc (*(u_char *)-- p, fp);
> -	      goto match_failure;
> -	    }
> -	  c = ((u_char *) p)[-1];
> -	  if (c == 'x' || c == 'X')
> -	    {
> -	      --p;
> -	      /*(void)*/ ungetc (c, fp);
> +		ungetc (*--p, fp); /* [-+xX] */
> +	      if (p == buf)
> +		goto match_failure;
>  	    }
>  	  if ((flags & SUPPRESS) == 0)
>  	    {
> @@ -1096,7 +1094,8 @@
>  	  long zeroes, exp_adjust;
>  	  char *exp_start = NULL;
>  	  unsigned width_left = 0;
> -	  int nancount = 0;
> +	  unsigned char nancount = 0;
> +	  unsigned char infcount = 0;
>  #ifdef hardway
>  	  if (width == 0 || width > sizeof (buf) - 1)
>  #else
> @@ -1141,7 +1140,7 @@
>  		case '7':
>  		case '8':
>  		case '9':
> -		  if (nancount == 0)
> +		  if (nancount + infcount == 0)
>  		    {
>  		      flags &= ~(SIGNOK | NDIGITS);
>  		      goto fok;
> @@ -1159,18 +1158,23 @@
>  		case 'n':
>  		case 'N':
>  		  if (nancount == 0
> -		      && (flags & (SIGNOK | NDIGITS | DPTOK | EXPOK)) ==
> -				  (SIGNOK | NDIGITS | DPTOK | EXPOK))
> +		      && (flags & (NDIGITS | DPTOK | EXPOK)) ==
> +				  (NDIGITS | DPTOK | EXPOK))
>  		    {
>  		      flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
>  		      nancount = 1;
>  		      goto fok;
>  		    }
> -		  else if (nancount == 2)
> +		  if (nancount == 2)
>  		    {
>  		      nancount = 3;
>  		      goto fok;
>  		    }
> +		  if (infcount == 1 || infcount == 4)
> +		    {
> +		      infcount++;
> +		      goto fok;
> +		    }
>  		  break;
>  		case 'a':
>  		case 'A':
> @@ -1180,6 +1184,46 @@
>  		      goto fok;
>  		    }
>  		  break;
> +		case 'i':
> +		case 'I':
> +		  if (infcount == 0
> +		      && (flags & (NDIGITS | DPTOK | EXPOK)) ==
> +				  (NDIGITS | DPTOK | EXPOK))
> +		    {
> +		      flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
> +		      infcount = 1;
> +		      goto fok;
> +		    }
> +		  if (infcount == 3 || infcount == 5)
> +		    {
> +		      infcount++;
> +		      goto fok;
> +		    }
> +		  break;
> +		case 'f':
> +		case 'F':
> +		  if (infcount == 2)
> +		    {
> +		      infcount = 3;
> +		      goto fok;
> +		    }
> +		  break;
> +		case 't':
> +		case 'T':
> +		  if (infcount == 6)
> +		    {
> +		      infcount = 7;
> +		      goto fok;
> +		    }
> +		  break;
> +		case 'y':
> +		case 'Y':
> +		  if (infcount == 7)
> +		    {
> +		      infcount = 8;
> +		      goto fok;
> +		    }
> +		  break;
>  		case '.':
>  		  if (flags & DPTOK)
>  		    {
> @@ -1212,7 +1256,7 @@
>  	      *p++ = c;
>  	    fskip:
>  	      width--;
> -              ++nread;
> +	      ++nread;
>  	      if (--fp->_r > 0)
>  		fp->_p++;
>  	      else
> @@ -1221,24 +1265,48 @@
>  	    }
>  	  if (zeroes)
>  	    flags &= ~NDIGITS;
> -          /* We may have a 'N' or possibly even a 'Na' as the start of 'NaN', 
> -	     only to run out of chars before it was complete (or having 
> -	     encountered a non- matching char).  So check here if we have an 
> -	     outstanding nancount, and if so put back the chars we did 
> -	     swallow and treat as a failed match. */
> -          if (nancount && nancount != 3)
> -            {
> -              /* Ok... what are we supposed to do in the event that the
> -              __srefill call above was triggered in the middle of the partial
> -              'NaN' and so we can't put it all back? */
> -              while (nancount-- && (p > buf))
> -                {
> -                  ungetc (*(u_char *)--p, fp);
> -                  --nread;
> -                }
> -              goto match_failure;
> -            }
> -          /*
> +	  /* We may have 'N' or possibly even [sign] 'N' 'a' as the
> +	     start of 'NaN', only to run out of chars before it was
> +	     complete (or having encountered a non-matching char).  So
> +	     check here if we have an outstanding nancount, and if so
> +	     put back the chars we did swallow and treat as a failed
> +	     match.
> +
> +	     FIXME - we still don't handle NAN([0xdigits]).  */
> +	  if (nancount - 1 < 2) /* nancount != 0 && nancount < 3 */
> +	    {
> +	      /* Newlib's ungetc works even if we called __srefill in
> +		 the middle of a partial parse, but POSIX does not
> +		 guarantee that in all implementations of ungetc.  */
> +	      while (p > buf)
> +		{
> +		  ungetc (*--p, fp); /* [-+nNaA] */
> +		  --nread;
> +		}
> +	      goto match_failure;
> +	    }
> +	  /* Likewise for 'inf' and 'infinity'.	 But be careful that
> +	     'infinite' consumes only 3 characters, leaving the stream
> +	     at the second 'i'.	 */
> +	  if (infcount - 1 < 7) /* infcount != 0 && infcount < 8 */
> +	    {
> +	      if (infcount >= 3) /* valid 'inf', but short of 'infinity' */
> +		while (infcount-- > 3)
> +		  {
> +		    ungetc (*--p, fp); /* [iInNtT] */
> +		    --nread;
> +		  }
> +	      else
> +		{
> +		  while (p > buf)
> +		    {
> +		      ungetc (*--p, fp); /* [-+iInN] */
> +		      --nread;
> +		    }
> +		  goto match_failure;
> +		}
> +	    }
> +	  /*
>  	   * If no digits, might be missing exponent digits
>  	   * (just give back the exponent) or might be missing
>  	   * regular digits, but had sign and/or decimal point.
> @@ -1249,22 +1317,22 @@
>  		{
>  		  /* no digits at all */
>  		  while (p > buf)
> -                    {
> -		      ungetc (*(u_char *)--p, fp);
> -                      --nread;
> -                    }
> +		    {
> +		      ungetc (*--p, fp); /* [-+.] */
> +		      --nread;
> +		    }
>  		  goto match_failure;
>  		}
>  	      /* just a bad exponent (e and maybe sign) */
> -	      c = *(u_char *)-- p;
> -              --nread;
> +	      c = *--p;
> +	      --nread;
>  	      if (c != 'e' && c != 'E')
>  		{
> -		  _CAST_VOID ungetc (c, fp);	/* sign */
> -		  c = *(u_char *)-- p;
> -                  --nread;
> +		  ungetc (c, fp); /* [-+] */
> +		  c = *--p;
> +		  --nread;
>  		}
> -	      _CAST_VOID ungetc (c, fp);
> +	      ungetc (c, fp); /* [eE] */
>  	    }
>  	  if ((flags & SUPPRESS) == 0)
>  	    {
>
>
>   



More information about the Newlib mailing list