scanf and "-0x", "-nan", "inf"
Jeff Johnston
jjohnstn@redhat.com
Wed May 23 20:52:00 GMT 2007
Ok.
-- Jeff J.
Eric Blake wrote:
> A couple more scanf bugs. OK to apply this? (and ping on my other scanf patch
> for pos args)
>
> sscanf("-nan", "%e", &float1) was failing, instead of setting float1 to NaN
>
> sscanf("-0x", "%i%c", &int1, &ch) was failing, instead of setting int1 to 0 and
> ch to 'x'
>
> sscanf("infinity", "%e", &float1) was failing, instead of setting float1 to
> infinity
>
> This patch also fixes a bug only triggered by %S in multibyte locales where
> 0xff is considered part of an incomplete multibyte whitespace sequence and
> where char is signed (is there such a locale?). The code lacked a cast to
> unsigned char, so it was calling the equivalent of ungetc(EOF) instead of the
> intended ungetc(0xff). I audited all the other uses of ungetc to ensure that
> no cast was necessary, because the only thing we could possibly be unget'ting
> were 7-bit ASCII characters.
>
> Bugs that I am still aware of:
>
> sscanf("nan():", "%e%c", &float1, &ch) populates ch with '(' instead of ':'
> (ie. n-char-sequence nans are not parsed)
>
> sscanf("33554430.999999999999", "%e", &float1) populates float1 with 33554432
> instead of 33554430 (ie. double-rounding occurred because strtod was used, but
> even fixing scanf to use strtof depends on strtof to be fixed to do the parse
> itself rather than wrapping strtod)
>
> sscanf("0x1p0", "%e", &float1) populates float1 with 0 instead of 1 (ie. hex
> floats are not parsed) - could be made conditional on --enable-newlib-io-c99-
> formats
>
> 2007-05-22 Eric Blake <ebb9@byu.net>
>
> * libc/stdio/vfscanf.c (__SVFSCANF_R): Fix %i scanning of "-0x".
> Support "-nan" and "inf" for %e. Audit usage of ungetc to fix bug
> in %S in multibyte locales.
>
> --- libc/stdio/vfscanf.c 21 May 2007 12:39:03 -0000
> +++ libc/stdio/vfscanf.c 22 May 2007 14:46:02 -0000
> @@ -825,10 +825,10 @@
> *wcp = L'\0';
> if (mbslen != (size_t)-2) /* Incomplete sequence */
> {
> - if (iswspace(*wcp))
> + if (iswspace(*wcp))
> {
> while (n != 0)
> - ungetc (buf[--n], fp);
> + ungetc ((unsigned char) buf[--n], fp);
> break;
> }
> nread += n;
> @@ -987,15 +987,15 @@
> }
> break;
>
> - /* x ok iff flag still set & 2nd char */
> + /* x ok iff flag still set & single 0 seen */
> case 'x':
> case 'X':
> - if (flags & PFXOK && p == buf + 1)
> + if ((flags & (PFXOK | NZDIGITS)) == PFXOK)
> {
> base = 16;/* if %i */
> flags &= ~PFXOK;
> /* We must reset the NZDIGITS and NDIGITS
> - flags that would have been unset by seeing
> + flags that would have been unset by seeing
> the zero that preceded the X or x. */
> flags |= NZDIGITS | NDIGITS;
> goto ok;
> @@ -1024,18 +1024,16 @@
> * If we had only a sign, it is no good; push back the sign.
> * If the number ends in `x', it was [sign] '0' 'x', so push back
> * the x and treat it as [sign] '0'.
> + * Use of ungetc here and below assumes ASCII encoding; we are only
> + * pushing back 7-bit characters, so casting to unsigned char is
> + * not necessary.
> */
> if (flags & NDIGITS)
> {
> if (p > buf)
> - _CAST_VOID ungetc (*(u_char *)-- p, fp);
> - goto match_failure;
> - }
> - c = ((u_char *) p)[-1];
> - if (c == 'x' || c == 'X')
> - {
> - --p;
> - /*(void)*/ ungetc (c, fp);
> + ungetc (*--p, fp); /* [-+xX] */
> + if (p == buf)
> + goto match_failure;
> }
> if ((flags & SUPPRESS) == 0)
> {
> @@ -1096,7 +1094,8 @@
> long zeroes, exp_adjust;
> char *exp_start = NULL;
> unsigned width_left = 0;
> - int nancount = 0;
> + unsigned char nancount = 0;
> + unsigned char infcount = 0;
> #ifdef hardway
> if (width == 0 || width > sizeof (buf) - 1)
> #else
> @@ -1141,7 +1140,7 @@
> case '7':
> case '8':
> case '9':
> - if (nancount == 0)
> + if (nancount + infcount == 0)
> {
> flags &= ~(SIGNOK | NDIGITS);
> goto fok;
> @@ -1159,18 +1158,23 @@
> case 'n':
> case 'N':
> if (nancount == 0
> - && (flags & (SIGNOK | NDIGITS | DPTOK | EXPOK)) ==
> - (SIGNOK | NDIGITS | DPTOK | EXPOK))
> + && (flags & (NDIGITS | DPTOK | EXPOK)) ==
> + (NDIGITS | DPTOK | EXPOK))
> {
> flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
> nancount = 1;
> goto fok;
> }
> - else if (nancount == 2)
> + if (nancount == 2)
> {
> nancount = 3;
> goto fok;
> }
> + if (infcount == 1 || infcount == 4)
> + {
> + infcount++;
> + goto fok;
> + }
> break;
> case 'a':
> case 'A':
> @@ -1180,6 +1184,46 @@
> goto fok;
> }
> break;
> + case 'i':
> + case 'I':
> + if (infcount == 0
> + && (flags & (NDIGITS | DPTOK | EXPOK)) ==
> + (NDIGITS | DPTOK | EXPOK))
> + {
> + flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
> + infcount = 1;
> + goto fok;
> + }
> + if (infcount == 3 || infcount == 5)
> + {
> + infcount++;
> + goto fok;
> + }
> + break;
> + case 'f':
> + case 'F':
> + if (infcount == 2)
> + {
> + infcount = 3;
> + goto fok;
> + }
> + break;
> + case 't':
> + case 'T':
> + if (infcount == 6)
> + {
> + infcount = 7;
> + goto fok;
> + }
> + break;
> + case 'y':
> + case 'Y':
> + if (infcount == 7)
> + {
> + infcount = 8;
> + goto fok;
> + }
> + break;
> case '.':
> if (flags & DPTOK)
> {
> @@ -1212,7 +1256,7 @@
> *p++ = c;
> fskip:
> width--;
> - ++nread;
> + ++nread;
> if (--fp->_r > 0)
> fp->_p++;
> else
> @@ -1221,24 +1265,48 @@
> }
> if (zeroes)
> flags &= ~NDIGITS;
> - /* We may have a 'N' or possibly even a 'Na' as the start of 'NaN',
> - only to run out of chars before it was complete (or having
> - encountered a non- matching char). So check here if we have an
> - outstanding nancount, and if so put back the chars we did
> - swallow and treat as a failed match. */
> - if (nancount && nancount != 3)
> - {
> - /* Ok... what are we supposed to do in the event that the
> - __srefill call above was triggered in the middle of the partial
> - 'NaN' and so we can't put it all back? */
> - while (nancount-- && (p > buf))
> - {
> - ungetc (*(u_char *)--p, fp);
> - --nread;
> - }
> - goto match_failure;
> - }
> - /*
> + /* We may have 'N' or possibly even [sign] 'N' 'a' as the
> + start of 'NaN', only to run out of chars before it was
> + complete (or having encountered a non-matching char). So
> + check here if we have an outstanding nancount, and if so
> + put back the chars we did swallow and treat as a failed
> + match.
> +
> + FIXME - we still don't handle NAN([0xdigits]). */
> + if (nancount - 1 < 2) /* nancount != 0 && nancount < 3 */
> + {
> + /* Newlib's ungetc works even if we called __srefill in
> + the middle of a partial parse, but POSIX does not
> + guarantee that in all implementations of ungetc. */
> + while (p > buf)
> + {
> + ungetc (*--p, fp); /* [-+nNaA] */
> + --nread;
> + }
> + goto match_failure;
> + }
> + /* Likewise for 'inf' and 'infinity'. But be careful that
> + 'infinite' consumes only 3 characters, leaving the stream
> + at the second 'i'. */
> + if (infcount - 1 < 7) /* infcount != 0 && infcount < 8 */
> + {
> + if (infcount >= 3) /* valid 'inf', but short of 'infinity' */
> + while (infcount-- > 3)
> + {
> + ungetc (*--p, fp); /* [iInNtT] */
> + --nread;
> + }
> + else
> + {
> + while (p > buf)
> + {
> + ungetc (*--p, fp); /* [-+iInN] */
> + --nread;
> + }
> + goto match_failure;
> + }
> + }
> + /*
> * If no digits, might be missing exponent digits
> * (just give back the exponent) or might be missing
> * regular digits, but had sign and/or decimal point.
> @@ -1249,22 +1317,22 @@
> {
> /* no digits at all */
> while (p > buf)
> - {
> - ungetc (*(u_char *)--p, fp);
> - --nread;
> - }
> + {
> + ungetc (*--p, fp); /* [-+.] */
> + --nread;
> + }
> goto match_failure;
> }
> /* just a bad exponent (e and maybe sign) */
> - c = *(u_char *)-- p;
> - --nread;
> + c = *--p;
> + --nread;
> if (c != 'e' && c != 'E')
> {
> - _CAST_VOID ungetc (c, fp); /* sign */
> - c = *(u_char *)-- p;
> - --nread;
> + ungetc (c, fp); /* [-+] */
> + c = *--p;
> + --nread;
> }
> - _CAST_VOID ungetc (c, fp);
> + ungetc (c, fp); /* [eE] */
> }
> if ((flags & SUPPRESS) == 0)
> {
>
>
>
More information about the Newlib
mailing list