This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: scanf and "-0x", "-nan", "inf"


Ok.

-- Jeff J.

Eric Blake wrote:
A couple more scanf bugs. OK to apply this? (and ping on my other scanf patch for pos args)

sscanf("-nan", "%e", &float1) was failing, instead of setting float1 to NaN

sscanf("-0x", "%i%c", &int1, &ch) was failing, instead of setting int1 to 0 and ch to 'x'

sscanf("infinity", "%e", &float1) was failing, instead of setting float1 to infinity

This patch also fixes a bug only triggered by %S in multibyte locales where 0xff is considered part of an incomplete multibyte whitespace sequence and where char is signed (is there such a locale?). The code lacked a cast to unsigned char, so it was calling the equivalent of ungetc(EOF) instead of the intended ungetc(0xff). I audited all the other uses of ungetc to ensure that no cast was necessary, because the only thing we could possibly be unget'ting were 7-bit ASCII characters.

Bugs that I am still aware of:

sscanf("nan():", "%e%c", &float1, &ch) populates ch with '(' instead of ':' (ie. n-char-sequence nans are not parsed)

sscanf("33554430.999999999999", "%e", &float1) populates float1 with 33554432 instead of 33554430 (ie. double-rounding occurred because strtod was used, but even fixing scanf to use strtof depends on strtof to be fixed to do the parse itself rather than wrapping strtod)

sscanf("0x1p0", "%e", &float1) populates float1 with 0 instead of 1 (ie. hex floats are not parsed) - could be made conditional on --enable-newlib-io-c99-
formats


2007-05-22 Eric Blake <ebb9@byu.net>

	* libc/stdio/vfscanf.c (__SVFSCANF_R): Fix %i scanning of "-0x".
	Support "-nan" and "inf" for %e.  Audit usage of ungetc to fix bug
	in %S in multibyte locales.

--- libc/stdio/vfscanf.c 21 May 2007 12:39:03 -0000
+++ libc/stdio/vfscanf.c 22 May 2007 14:46:02 -0000
@@ -825,10 +825,10 @@
*wcp = L'\0';
if (mbslen != (size_t)-2) /* Incomplete sequence */
{
- if (iswspace(*wcp)) + if (iswspace(*wcp))
{
while (n != 0)
- ungetc (buf[--n], fp);
+ ungetc ((unsigned char) buf[--n], fp);
break;
}
nread += n;
@@ -987,15 +987,15 @@
}
break;
- /* x ok iff flag still set & 2nd char */
+ /* x ok iff flag still set & single 0 seen */
case 'x':
case 'X':
- if (flags & PFXOK && p == buf + 1)
+ if ((flags & (PFXOK | NZDIGITS)) == PFXOK)
{
base = 16;/* if %i */
flags &= ~PFXOK;
/* We must reset the NZDIGITS and NDIGITS
- flags that would have been unset by seeing
+ flags that would have been unset by seeing
the zero that preceded the X or x. */
flags |= NZDIGITS | NDIGITS;
goto ok;
@@ -1024,18 +1024,16 @@
* If we had only a sign, it is no good; push back the sign.
* If the number ends in `x', it was [sign] '0' 'x', so push back
* the x and treat it as [sign] '0'.
+ * Use of ungetc here and below assumes ASCII encoding; we are only
+ * pushing back 7-bit characters, so casting to unsigned char is
+ * not necessary.
*/
if (flags & NDIGITS)
{
if (p > buf)
- _CAST_VOID ungetc (*(u_char *)-- p, fp);
- goto match_failure;
- }
- c = ((u_char *) p)[-1];
- if (c == 'x' || c == 'X')
- {
- --p;
- /*(void)*/ ungetc (c, fp);
+ ungetc (*--p, fp); /* [-+xX] */
+ if (p == buf)
+ goto match_failure;
}
if ((flags & SUPPRESS) == 0)
{
@@ -1096,7 +1094,8 @@
long zeroes, exp_adjust;
char *exp_start = NULL;
unsigned width_left = 0;
- int nancount = 0;
+ unsigned char nancount = 0;
+ unsigned char infcount = 0;
#ifdef hardway
if (width == 0 || width > sizeof (buf) - 1)
#else
@@ -1141,7 +1140,7 @@
case '7':
case '8':
case '9':
- if (nancount == 0)
+ if (nancount + infcount == 0)
{
flags &= ~(SIGNOK | NDIGITS);
goto fok;
@@ -1159,18 +1158,23 @@
case 'n':
case 'N':
if (nancount == 0
- && (flags & (SIGNOK | NDIGITS | DPTOK | EXPOK)) ==
- (SIGNOK | NDIGITS | DPTOK | EXPOK))
+ && (flags & (NDIGITS | DPTOK | EXPOK)) ==
+ (NDIGITS | DPTOK | EXPOK))
{
flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
nancount = 1;
goto fok;
}
- else if (nancount == 2)
+ if (nancount == 2)
{
nancount = 3;
goto fok;
}
+ if (infcount == 1 || infcount == 4)
+ {
+ infcount++;
+ goto fok;
+ }
break;
case 'a':
case 'A':
@@ -1180,6 +1184,46 @@
goto fok;
}
break;
+ case 'i':
+ case 'I':
+ if (infcount == 0
+ && (flags & (NDIGITS | DPTOK | EXPOK)) ==
+ (NDIGITS | DPTOK | EXPOK))
+ {
+ flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
+ infcount = 1;
+ goto fok;
+ }
+ if (infcount == 3 || infcount == 5)
+ {
+ infcount++;
+ goto fok;
+ }
+ break;
+ case 'f':
+ case 'F':
+ if (infcount == 2)
+ {
+ infcount = 3;
+ goto fok;
+ }
+ break;
+ case 't':
+ case 'T':
+ if (infcount == 6)
+ {
+ infcount = 7;
+ goto fok;
+ }
+ break;
+ case 'y':
+ case 'Y':
+ if (infcount == 7)
+ {
+ infcount = 8;
+ goto fok;
+ }
+ break;
case '.':
if (flags & DPTOK)
{
@@ -1212,7 +1256,7 @@
*p++ = c;
fskip:
width--;
- ++nread;
+ ++nread;
if (--fp->_r > 0)
fp->_p++;
else
@@ -1221,24 +1265,48 @@
}
if (zeroes)
flags &= ~NDIGITS;
- /* We may have a 'N' or possibly even a 'Na' as the start of 'NaN', - only to run out of chars before it was complete (or having - encountered a non- matching char). So check here if we have an - outstanding nancount, and if so put back the chars we did - swallow and treat as a failed match. */
- if (nancount && nancount != 3)
- {
- /* Ok... what are we supposed to do in the event that the
- __srefill call above was triggered in the middle of the partial
- 'NaN' and so we can't put it all back? */
- while (nancount-- && (p > buf))
- {
- ungetc (*(u_char *)--p, fp);
- --nread;
- }
- goto match_failure;
- }
- /*
+ /* We may have 'N' or possibly even [sign] 'N' 'a' as the
+ start of 'NaN', only to run out of chars before it was
+ complete (or having encountered a non-matching char). So
+ check here if we have an outstanding nancount, and if so
+ put back the chars we did swallow and treat as a failed
+ match.
+
+ FIXME - we still don't handle NAN([0xdigits]). */
+ if (nancount - 1 < 2) /* nancount != 0 && nancount < 3 */
+ {
+ /* Newlib's ungetc works even if we called __srefill in
+ the middle of a partial parse, but POSIX does not
+ guarantee that in all implementations of ungetc. */
+ while (p > buf)
+ {
+ ungetc (*--p, fp); /* [-+nNaA] */
+ --nread;
+ }
+ goto match_failure;
+ }
+ /* Likewise for 'inf' and 'infinity'. But be careful that
+ 'infinite' consumes only 3 characters, leaving the stream
+ at the second 'i'. */
+ if (infcount - 1 < 7) /* infcount != 0 && infcount < 8 */
+ {
+ if (infcount >= 3) /* valid 'inf', but short of 'infinity' */
+ while (infcount-- > 3)
+ {
+ ungetc (*--p, fp); /* [iInNtT] */
+ --nread;
+ }
+ else
+ {
+ while (p > buf)
+ {
+ ungetc (*--p, fp); /* [-+iInN] */
+ --nread;
+ }
+ goto match_failure;
+ }
+ }
+ /*
* If no digits, might be missing exponent digits
* (just give back the exponent) or might be missing
* regular digits, but had sign and/or decimal point.
@@ -1249,22 +1317,22 @@
{
/* no digits at all */
while (p > buf)
- {
- ungetc (*(u_char *)--p, fp);
- --nread;
- }
+ {
+ ungetc (*--p, fp); /* [-+.] */
+ --nread;
+ }
goto match_failure;
}
/* just a bad exponent (e and maybe sign) */
- c = *(u_char *)-- p;
- --nread;
+ c = *--p;
+ --nread;
if (c != 'e' && c != 'E')
{
- _CAST_VOID ungetc (c, fp); /* sign */
- c = *(u_char *)-- p;
- --nread;
+ ungetc (c, fp); /* [-+] */
+ c = *--p;
+ --nread;
}
- _CAST_VOID ungetc (c, fp);
+ ungetc (c, fp); /* [eE] */
}
if ((flags & SUPPRESS) == 0)
{





Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]