scanf and "-0x", "-nan", "inf"

Eric Blake ebb9@byu.net
Thu May 24 12:35:00 GMT 2007


Jeff Johnston <jjohnstn <at> redhat.com> writes:

> 
> Ok. 
> 

Further testing hammered out a couple of issues in my earlier posting - ungetc 
is not reentrant, so I swapped to _ungetc_r.  Also, POSIX requires that read 
failure return EOF no matter what, but we were sometimes returning with a non-
negative result even after ferror() was set because of a read error after a 
match.  Here's what I committed:

2007-05-23  Eric Blake  <ebb9@byu.net>

	* libc/stdio/vfprintf.c (_VFPRINTF_R): Don't truncate %p when
	sizeof(void*) is 8 but sizeof(long) is 4.
	* libc/stdio/vfscanf.c (__SVFSCANF_R): Likewise.  Fix %i scanning
	of "-0x".  Support "-nan" and "inf" for %e.  Audit usage of ungetc
	to fix reentrancy and bug on encoding error in multibyte locales.
	Always return EOF on read error.

Index: libc/stdio/vfprintf.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdio/vfprintf.c,v
retrieving revision 1.61
diff -u -p -r1.61 vfprintf.c
--- libc/stdio/vfprintf.c	23 May 2007 20:36:28 -0000	1.61
+++ libc/stdio/vfprintf.c	24 May 2007 04:17:01 -0000
@@ -1017,7 +1017,7 @@ reswitch:	switch (ch) {
 			 *	-- ANSI X3J11
 			 */
 			/* NOSTRICT */
-			_uquad = (u_long)(unsigned _POINTER_INT)GET_ARG (N, ap, 
void_ptr_t);
+			_uquad = (uintptr_t) GET_ARG (N, ap, void_ptr_t);
 			base = HEX;
 			xdigs = "0123456789abcdef";
 			flags |= HEXPREFIX;
Index: libc/stdio/vfscanf.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdio/vfscanf.c,v
retrieving revision 1.37
diff -u -p -r1.37 vfscanf.c
--- libc/stdio/vfscanf.c	23 May 2007 20:36:28 -0000	1.37
+++ libc/stdio/vfscanf.c	24 May 2007 04:17:01 -0000
@@ -825,10 +825,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
                     *wcp = L'\0';
                   if (mbslen != (size_t)-2) /* Incomplete sequence */
                     {
-                      if (iswspace(*wcp)) 
+                      if (iswspace(*wcp))
                         {
                           while (n != 0)
-                            ungetc (buf[--n], fp);
+                            _ungetc_r (rptr, (unsigned char) buf[--n], fp);
                           break;
                         }
                       nread += n;
@@ -987,15 +987,15 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 		    }
 		  break;
 
-		  /* x ok iff flag still set & 2nd char */
+		  /* x ok iff flag still set & single 0 seen */
 		case 'x':
 		case 'X':
-		  if (flags & PFXOK && p == buf + 1)
+		  if ((flags & (PFXOK | NZDIGITS)) == PFXOK)
 		    {
 		      base = 16;/* if %i */
 		      flags &= ~PFXOK;
 		      /* We must reset the NZDIGITS and NDIGITS
-		         flags that would have been unset by seeing
+			 flags that would have been unset by seeing
 			 the zero that preceded the X or x.  */
 		      flags |= NZDIGITS | NDIGITS;
 		      goto ok;
@@ -1024,18 +1024,16 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 	   * If we had only a sign, it is no good; push back the sign.
 	   * If the number ends in `x', it was [sign] '0' 'x', so push back
 	   * the x and treat it as [sign] '0'.
+	   * Use of ungetc here and below assumes ASCII encoding; we are only
+	   * pushing back 7-bit characters, so casting to unsigned char is
+	   * not necessary.
 	   */
 	  if (flags & NDIGITS)
 	    {
 	      if (p > buf)
-		_CAST_VOID ungetc (*(u_char *)-- p, fp);
-	      goto match_failure;
-	    }
-	  c = ((u_char *) p)[-1];
-	  if (c == 'x' || c == 'X')
-	    {
-	      --p;
-	      /*(void)*/ ungetc (c, fp);
+		_ungetc_r (rptr, *--p, fp); /* [-+xX] */
+	      if (p == buf)
+		goto match_failure;
 	    }
 	  if ((flags & SUPPRESS) == 0)
 	    {
@@ -1044,7 +1042,19 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 	      *p = 0;
 	      res = (*ccfn) (rptr, buf, (char **) NULL, base);
 	      if (flags & POINTER)
-		*(GET_ARG (N, ap, _PTR *)) = (_PTR) (unsigned _POINTER_INT) res;
+		{
+		  void **vp = GET_ARG (N, ap, void **);
+#ifndef _NO_LONGLONG
+		  if (sizeof (uintptr_t) > sizeof (u_long))
+		    {
+		      u_long_long resll;
+		      resll = _strtoull_r (rptr, buf, (char **) NULL, base);
+		      *vp = (void *) (uintptr_t) resll;
+		    }
+		  else
+#endif /* !_NO_LONGLONG */
+		    *vp = (void *) (uintptr_t) res;
+		}
 #ifdef _WANT_IO_C99_FORMATS
 	      else if (flags & CHAR)
 		{
@@ -1096,7 +1106,8 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 	  long zeroes, exp_adjust;
 	  char *exp_start = NULL;
 	  unsigned width_left = 0;
-	  int nancount = 0;
+	  char nancount = 0;
+	  char infcount = 0;
 #ifdef hardway
 	  if (width == 0 || width > sizeof (buf) - 1)
 #else
@@ -1141,7 +1152,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 		case '7':
 		case '8':
 		case '9':
-		  if (nancount == 0)
+		  if (nancount + infcount == 0)
 		    {
 		      flags &= ~(SIGNOK | NDIGITS);
 		      goto fok;
@@ -1159,18 +1170,23 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 		case 'n':
 		case 'N':
 		  if (nancount == 0
-		      && (flags & (SIGNOK | NDIGITS | DPTOK | EXPOK)) ==
-				  (SIGNOK | NDIGITS | DPTOK | EXPOK))
+		      && (flags & (NDIGITS | DPTOK | EXPOK)) ==
+				  (NDIGITS | DPTOK | EXPOK))
 		    {
 		      flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
 		      nancount = 1;
 		      goto fok;
 		    }
-		  else if (nancount == 2)
+		  if (nancount == 2)
 		    {
 		      nancount = 3;
 		      goto fok;
 		    }
+		  if (infcount == 1 || infcount == 4)
+		    {
+		      infcount++;
+		      goto fok;
+		    }
 		  break;
 		case 'a':
 		case 'A':
@@ -1180,6 +1196,46 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 		      goto fok;
 		    }
 		  break;
+		case 'i':
+		case 'I':
+		  if (infcount == 0
+		      && (flags & (NDIGITS | DPTOK | EXPOK)) ==
+				  (NDIGITS | DPTOK | EXPOK))
+		    {
+		      flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
+		      infcount = 1;
+		      goto fok;
+		    }
+		  if (infcount == 3 || infcount == 5)
+		    {
+		      infcount++;
+		      goto fok;
+		    }
+		  break;
+		case 'f':
+		case 'F':
+		  if (infcount == 2)
+		    {
+		      infcount = 3;
+		      goto fok;
+		    }
+		  break;
+		case 't':
+		case 'T':
+		  if (infcount == 6)
+		    {
+		      infcount = 7;
+		      goto fok;
+		    }
+		  break;
+		case 'y':
+		case 'Y':
+		  if (infcount == 7)
+		    {
+		      infcount = 8;
+		      goto fok;
+		    }
+		  break;
 		case '.':
 		  if (flags & DPTOK)
 		    {
@@ -1212,7 +1268,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 	      *p++ = c;
 	    fskip:
 	      width--;
-              ++nread;
+	      ++nread;
 	      if (--fp->_r > 0)
 		fp->_p++;
 	      else
@@ -1221,24 +1277,48 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 	    }
 	  if (zeroes)
 	    flags &= ~NDIGITS;
-          /* We may have a 'N' or possibly even a 'Na' as the start of 'NaN', 
-	     only to run out of chars before it was complete (or having 
-	     encountered a non- matching char).  So check here if we have an 
-	     outstanding nancount, and if so put back the chars we did 
-	     swallow and treat as a failed match. */
-          if (nancount && nancount != 3)
-            {
-              /* Ok... what are we supposed to do in the event that the
-              __srefill call above was triggered in the middle of the partial
-              'NaN' and so we can't put it all back? */
-              while (nancount-- && (p > buf))
-                {
-                  ungetc (*(u_char *)--p, fp);
-                  --nread;
-                }
-              goto match_failure;
-            }
-          /*
+	  /* We may have a 'N' or possibly even [sign] 'N' 'a' as the
+	     start of 'NaN', only to run out of chars before it was
+	     complete (or having encountered a non-matching char).  So
+	     check here if we have an outstanding nancount, and if so
+	     put back the chars we did swallow and treat as a failed
+	     match.
+
+	     FIXME - we still don't handle NAN([0xdigits]).  */
+	  if (nancount - 1U < 2U) /* nancount && nancount < 3 */
+	    {
+	      /* Newlib's ungetc works even if we called __srefill in
+		 the middle of a partial parse, but POSIX does not
+		 guarantee that in all implementations of ungetc.  */
+	      while (p > buf)
+		{
+		  _ungetc_r (rptr, *--p, fp); /* [-+nNaA] */
+		  --nread;
+		}
+	      goto match_failure;
+	    }
+	  /* Likewise for 'inf' and 'infinity'.	 But be careful that
+	     'infinite' consumes only 3 characters, leaving the stream
+	     at the second 'i'.	 */
+	  if (infcount - 1U < 7U) /* infcount && infcount < 8 */
+	    {
+	      if (infcount >= 3) /* valid 'inf', but short of 'infinity' */
+		while (infcount-- > 3)
+		  {
+		    _ungetc_r (rptr, *--p, fp); /* [iInNtT] */
+		    --nread;
+		  }
+	      else
+		{
+		  while (p > buf)
+		    {
+		      _ungetc_r (rptr, *--p, fp); /* [-+iInN] */
+		      --nread;
+		    }
+		  goto match_failure;
+		}
+	    }
+	  /*
 	   * If no digits, might be missing exponent digits
 	   * (just give back the exponent) or might be missing
 	   * regular digits, but had sign and/or decimal point.
@@ -1249,22 +1329,22 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 		{
 		  /* no digits at all */
 		  while (p > buf)
-                    {
-		      ungetc (*(u_char *)--p, fp);
-                      --nread;
-                    }
+		    {
+		      _ungetc_r (rptr, *--p, fp); /* [-+.] */
+		      --nread;
+		    }
 		  goto match_failure;
 		}
 	      /* just a bad exponent (e and maybe sign) */
-	      c = *(u_char *)-- p;
-              --nread;
+	      c = *--p;
+	      --nread;
 	      if (c != 'e' && c != 'E')
 		{
-		  _CAST_VOID ungetc (c, fp);	/* sign */
-		  c = *(u_char *)-- p;
-                  --nread;
+		  _ungetc_r (rptr, c, fp); /* [-+] */
+		  c = *--p;
+		  --nread;
 		}
-	      _CAST_VOID ungetc (c, fp);
+	      _ungetc_r (rptr, c, fp); /* [eE] */
 	    }
 	  if ((flags & SUPPRESS) == 0)
 	    {
@@ -1332,10 +1412,15 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 	}
     }
 input_failure:
+  /* On read failure, return EOF failure regardless of matches; errno
+     should have been set prior to here.  On EOF failure (including
+     invalid format string), return EOF if no matches yet, else number
+     of matches made prior to failure.  */
   _funlockfile (fp);
-  return nassigned ? nassigned : -1;
+  return nassigned && !(fp->_flags & __SERR) ? nassigned : EOF;
 match_failure:
 all_done:
+  /* Return number of matches, which can be 0 on match failure.  */
   _funlockfile (fp);
   return nassigned;
 }




More information about the Newlib mailing list