]> sourceware.org Git - glibc.git/blob - stdio-common/vfscanf.c
Fix parsing of decimal point after +-.
[glibc.git] / stdio-common / vfscanf.c
1 /* Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
18
19 #include <assert.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <wctype.h>
30 #include <bits/libc-lock.h>
31 #include <locale/localeinfo.h>
32
33 #ifdef __GNUC__
34 # define HAVE_LONGLONG
35 # define LONGLONG long long
36 #else
37 # define LONGLONG long
38 #endif
39
40 /* Determine whether we have to handle `long long' at all. */
41 #if LONG_MAX == LONG_LONG_MAX
42 # define need_longlong 0
43 #else
44 # define need_longlong 1
45 #endif
46
47 /* Determine whether we have to handle `long'. */
48 #if INT_MAX == LONG_MAX
49 # define need_long 0
50 #else
51 # define need_long 1
52 #endif
53
54 /* Those are flags in the conversion format. */
55 #define LONG 0x001 /* l: long or double */
56 #define LONGDBL 0x002 /* L: long long or long double */
57 #define SHORT 0x004 /* h: short */
58 #define SUPPRESS 0x008 /* *: suppress assignment */
59 #define POINTER 0x010 /* weird %p pointer (`fake hex') */
60 #define NOSKIP 0x020 /* do not skip blanks */
61 #define WIDTH 0x040 /* width was given */
62 #define GROUP 0x080 /* ': group numbers */
63 #define MALLOC 0x100 /* a: malloc strings */
64 #define CHAR 0x200 /* hh: char */
65 #define I18N 0x400 /* I: use locale's digits */
66
67
68 #include <locale/localeinfo.h>
69 #include <libioP.h>
70 #include <libio.h>
71
72 #undef va_list
73 #define va_list _IO_va_list
74
75 #ifdef COMPILE_WSCANF
76 # define ungetc(c, s) ((void) (c == WEOF \
77 || (--read_in, \
78 INTUSE(_IO_sputbackwc) (s, c))))
79 # define ungetc_not_eof(c, s) ((void) (--read_in, \
80 INTUSE(_IO_sputbackwc) (s, c)))
81 # define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
82 : ((c = _IO_getwc_unlocked (s)), \
83 (void) (c != WEOF \
84 ? ++read_in \
85 : (size_t) (inchar_errno = errno)), c))
86
87 # define MEMCPY(d, s, n) __wmemcpy (d, s, n)
88 # define ISSPACE(Ch) iswspace (Ch)
89 # define ISDIGIT(Ch) iswdigit (Ch)
90 # define ISXDIGIT(Ch) iswxdigit (Ch)
91 # define TOLOWER(Ch) towlower (Ch)
92 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
93 # define __strtoll_internal __wcstoll_internal
94 # define __strtoull_internal __wcstoull_internal
95 # define __strtol_internal __wcstol_internal
96 # define __strtoul_internal __wcstoul_internal
97 # define __strtold_internal __wcstold_internal
98 # define __strtod_internal __wcstod_internal
99 # define __strtof_internal __wcstof_internal
100
101 # define L_(Str) L##Str
102 # define CHAR_T wchar_t
103 # define UCHAR_T unsigned int
104 # define WINT_T wint_t
105 # undef EOF
106 # define EOF WEOF
107 #else
108 # define ungetc(c, s) ((void) ((int) c == EOF \
109 || (--read_in, \
110 INTUSE(_IO_sputbackc) (s, (unsigned char) c))))
111 # define ungetc_not_eof(c, s) ((void) (--read_in, \
112 INTUSE(_IO_sputbackc) (s, (unsigned char) c)))
113 # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
114 : ((c = _IO_getc_unlocked (s)), \
115 (void) (c != EOF \
116 ? ++read_in \
117 : (size_t) (inchar_errno = errno)), c))
118 # define MEMCPY(d, s, n) memcpy (d, s, n)
119 # define ISSPACE(Ch) __isspace_l (Ch, loc)
120 # define ISDIGIT(Ch) __isdigit_l (Ch, loc)
121 # define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
122 # define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
123 # define ORIENT if (_IO_vtable_offset (s) == 0 \
124 && _IO_fwide (s, -1) != -1) \
125 return EOF
126
127 # define L_(Str) Str
128 # define CHAR_T char
129 # define UCHAR_T unsigned char
130 # define WINT_T int
131 #endif
132
133 #define encode_error() do { \
134 errval = 4; \
135 __set_errno (EILSEQ); \
136 goto errout; \
137 } while (0)
138 #define conv_error() do { \
139 errval = 2; \
140 goto errout; \
141 } while (0)
142 #define input_error() do { \
143 errval = 1; \
144 if (done == 0) done = EOF; \
145 goto errout; \
146 } while (0)
147 #define memory_error() do { \
148 __set_errno (ENOMEM); \
149 done = EOF; \
150 goto errout; \
151 } while (0)
152 #define ARGCHECK(s, format) \
153 do \
154 { \
155 /* Check file argument for consistence. */ \
156 CHECK_FILE (s, EOF); \
157 if (s->_flags & _IO_NO_READS) \
158 { \
159 __set_errno (EBADF); \
160 return EOF; \
161 } \
162 else if (format == NULL) \
163 { \
164 MAYBE_SET_EINVAL; \
165 return EOF; \
166 } \
167 } while (0)
168 #define LOCK_STREAM(S) \
169 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
170 _IO_flockfile (S)
171 #define UNLOCK_STREAM(S) \
172 _IO_funlockfile (S); \
173 __libc_cleanup_region_end (0)
174
175
176 /* Read formatted input from S according to the format string
177 FORMAT, using the argument list in ARG.
178 Return the number of assignments made, or -1 for an input error. */
179 #ifdef COMPILE_WSCANF
180 int
181 _IO_vfwscanf (s, format, argptr, errp)
182 _IO_FILE *s;
183 const wchar_t *format;
184 _IO_va_list argptr;
185 int *errp;
186 #else
187 int
188 _IO_vfscanf (s, format, argptr, errp)
189 _IO_FILE *s;
190 const char *format;
191 _IO_va_list argptr;
192 int *errp;
193 #endif
194 {
195 va_list arg;
196 register const CHAR_T *f = format;
197 register UCHAR_T fc; /* Current character of the format. */
198 register WINT_T done = 0; /* Assignments done. */
199 register size_t read_in = 0; /* Chars read in. */
200 register WINT_T c = 0; /* Last char read. */
201 register int width; /* Maximum field width. */
202 register int flags; /* Modifiers for current format element. */
203 int errval = 0;
204 #ifndef COMPILE_WSCANF
205 __locale_t loc = _NL_CURRENT_LOCALE;
206 struct locale_data *const curctype = loc->__locales[LC_CTYPE];
207 #endif
208
209 /* Errno of last failed inchar call. */
210 int inchar_errno = 0;
211 /* Status for reading F-P nums. */
212 char got_dot, got_e, negative;
213 /* If a [...] is a [^...]. */
214 CHAR_T not_in;
215 #define exp_char not_in
216 /* Base for integral numbers. */
217 int base;
218 /* Signedness for integral numbers. */
219 int number_signed;
220 #define is_hexa number_signed
221 /* Decimal point character. */
222 #ifdef COMPILE_WSCANF
223 wint_t decimal;
224 #else
225 const char *decimal;
226 #endif
227 /* The thousands character of the current locale. */
228 #ifdef COMPILE_WSCANF
229 wint_t thousands;
230 #else
231 const char *thousands;
232 #endif
233 /* State for the conversions. */
234 mbstate_t state;
235 /* Integral holding variables. */
236 union
237 {
238 long long int q;
239 unsigned long long int uq;
240 long int l;
241 unsigned long int ul;
242 } num;
243 /* Character-buffer pointer. */
244 char *str = NULL;
245 wchar_t *wstr = NULL;
246 char **strptr = NULL;
247 ssize_t strsize = 0;
248 /* We must not react on white spaces immediately because they can
249 possibly be matched even if in the input stream no character is
250 available anymore. */
251 int skip_space = 0;
252 /* Nonzero if we are reading a pointer. */
253 int read_pointer;
254 /* Workspace. */
255 CHAR_T *tw; /* Temporary pointer. */
256 CHAR_T *wp = NULL; /* Workspace. */
257 size_t wpmax = 0; /* Maximal size of workspace. */
258 size_t wpsize; /* Currently used bytes in workspace. */
259 #define ADDW(Ch) \
260 do \
261 { \
262 if (wpsize == wpmax) \
263 { \
264 CHAR_T *old = wp; \
265 wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax); \
266 wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t)); \
267 if (old != NULL) \
268 MEMCPY (wp, old, wpsize); \
269 } \
270 wp[wpsize++] = (Ch); \
271 } \
272 while (0)
273
274 #ifdef __va_copy
275 __va_copy (arg, argptr);
276 #else
277 arg = (va_list) argptr;
278 #endif
279
280 #ifdef ORIENT
281 ORIENT;
282 #endif
283
284 ARGCHECK (s, format);
285
286 {
287 #ifndef COMPILE_WSCANF
288 struct locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
289 #endif
290
291 /* Figure out the decimal point character. */
292 #ifdef COMPILE_WSCANF
293 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
294 #else
295 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
296 #endif
297 /* Figure out the thousands separator character. */
298 #ifdef COMPILE_WSCANF
299 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
300 #else
301 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
302 if (*thousands == '\0')
303 thousands = NULL;
304 #endif
305 }
306
307 /* Lock the stream. */
308 LOCK_STREAM (s);
309
310
311 #ifndef COMPILE_WSCANF
312 /* From now on we use `state' to convert the format string. */
313 memset (&state, '\0', sizeof (state));
314 #endif
315
316 /* Run through the format string. */
317 while (*f != '\0')
318 {
319 unsigned int argpos;
320 /* Extract the next argument, which is of type TYPE.
321 For a %N$... spec, this is the Nth argument from the beginning;
322 otherwise it is the next argument after the state now in ARG. */
323 #ifdef __va_copy
324 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
325 ({ unsigned int pos = argpos; \
326 va_list arg; \
327 __va_copy (arg, argptr); \
328 while (--pos > 0) \
329 (void) va_arg (arg, void *); \
330 va_arg (arg, type); \
331 }))
332 #else
333 # if 0
334 /* XXX Possible optimization. */
335 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
336 ({ va_list arg = (va_list) argptr; \
337 arg = (va_list) ((char *) arg \
338 + (argpos - 1) \
339 * __va_rounded_size (void *)); \
340 va_arg (arg, type); \
341 }))
342 # else
343 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
344 ({ unsigned int pos = argpos; \
345 va_list arg = (va_list) argptr; \
346 while (--pos > 0) \
347 (void) va_arg (arg, void *); \
348 va_arg (arg, type); \
349 }))
350 # endif
351 #endif
352
353 #ifndef COMPILE_WSCANF
354 if (!isascii ((unsigned char) *f))
355 {
356 /* Non-ASCII, may be a multibyte. */
357 int len = __mbrlen (f, strlen (f), &state);
358 if (len > 0)
359 {
360 do
361 {
362 c = inchar ();
363 if (c == EOF)
364 input_error ();
365 else if (c != (unsigned char) *f++)
366 {
367 ungetc_not_eof (c, s);
368 conv_error ();
369 }
370 }
371 while (--len > 0);
372 continue;
373 }
374 }
375 #endif
376
377 fc = *f++;
378 if (fc != '%')
379 {
380 /* Remember to skip spaces. */
381 if (ISSPACE (fc))
382 {
383 skip_space = 1;
384 continue;
385 }
386
387 /* Read a character. */
388 c = inchar ();
389
390 /* Characters other than format specs must just match. */
391 if (c == EOF)
392 input_error ();
393
394 /* We saw white space char as the last character in the format
395 string. Now it's time to skip all leading white space. */
396 if (skip_space)
397 {
398 while (ISSPACE (c))
399 if (inchar () == EOF)
400 input_error ();
401 skip_space = 0;
402 }
403
404 if (c != fc)
405 {
406 ungetc (c, s);
407 conv_error ();
408 }
409
410 continue;
411 }
412
413 /* This is the start of the conversion string. */
414 flags = 0;
415
416 /* Not yet decided whether we read a pointer or not. */
417 read_pointer = 0;
418
419 /* Initialize state of modifiers. */
420 argpos = 0;
421
422 /* Prepare temporary buffer. */
423 wpsize = 0;
424
425 /* Check for a positional parameter specification. */
426 if (ISDIGIT ((UCHAR_T) *f))
427 {
428 argpos = (UCHAR_T) *f++ - L_('0');
429 while (ISDIGIT ((UCHAR_T) *f))
430 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
431 if (*f == L_('$'))
432 ++f;
433 else
434 {
435 /* Oops; that was actually the field width. */
436 width = argpos;
437 flags |= WIDTH;
438 argpos = 0;
439 goto got_width;
440 }
441 }
442
443 /* Check for the assignment-suppressing, the number grouping flag,
444 and the signal to use the locale's digit representation. */
445 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
446 switch (*f++)
447 {
448 case L_('*'):
449 flags |= SUPPRESS;
450 break;
451 case L_('\''):
452 flags |= GROUP;
453 break;
454 case L_('I'):
455 flags |= I18N;
456 break;
457 }
458
459 /* We have seen width. */
460 if (ISDIGIT ((UCHAR_T) *f))
461 flags |= WIDTH;
462
463 /* Find the maximum field width. */
464 width = 0;
465 while (ISDIGIT ((UCHAR_T) *f))
466 {
467 width *= 10;
468 width += (UCHAR_T) *f++ - L_('0');
469 }
470 got_width:
471 if (width == 0)
472 width = -1;
473
474 /* Check for type modifiers. */
475 switch (*f++)
476 {
477 case L_('h'):
478 /* ints are short ints or chars. */
479 if (*f == L_('h'))
480 {
481 ++f;
482 flags |= CHAR;
483 }
484 else
485 flags |= SHORT;
486 break;
487 case L_('l'):
488 if (*f == L_('l'))
489 {
490 /* A double `l' is equivalent to an `L'. */
491 ++f;
492 flags |= LONGDBL | LONG;
493 }
494 else
495 /* ints are long ints. */
496 flags |= LONG;
497 break;
498 case L_('q'):
499 case L_('L'):
500 /* doubles are long doubles, and ints are long long ints. */
501 flags |= LONGDBL | LONG;
502 break;
503 case L_('a'):
504 /* The `a' is used as a flag only if followed by `s', `S' or
505 `['. */
506 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
507 {
508 --f;
509 break;
510 }
511 /* String conversions (%s, %[) take a `char **'
512 arg and fill it in with a malloc'd pointer. */
513 flags |= MALLOC;
514 break;
515 case L_('z'):
516 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
517 flags |= LONGDBL;
518 else if (sizeof (size_t) > sizeof (unsigned int))
519 flags |= LONG;
520 break;
521 case L_('j'):
522 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
523 flags |= LONGDBL;
524 else if (sizeof (uintmax_t) > sizeof (unsigned int))
525 flags |= LONG;
526 break;
527 case L_('t'):
528 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
529 flags |= LONGDBL;
530 else if (sizeof (ptrdiff_t) > sizeof (int))
531 flags |= LONG;
532 break;
533 default:
534 /* Not a recognized modifier. Backup. */
535 --f;
536 break;
537 }
538
539 /* End of the format string? */
540 if (*f == L_('\0'))
541 conv_error ();
542
543 /* Find the conversion specifier. */
544 fc = *f++;
545 if (skip_space || (fc != L_('[') && fc != L_('c')
546 && fc != L_('C') && fc != L_('n')))
547 {
548 /* Eat whitespace. */
549 int save_errno = errno;
550 errno = 0;
551 do
552 if (inchar () == EOF && errno == EINTR)
553 input_error ();
554 while (ISSPACE (c));
555 errno = save_errno;
556 ungetc (c, s);
557 skip_space = 0;
558 }
559
560 switch (fc)
561 {
562 case L_('%'): /* Must match a literal '%'. */
563 c = inchar ();
564 if (c == EOF)
565 input_error ();
566 if (c != fc)
567 {
568 ungetc_not_eof (c, s);
569 conv_error ();
570 }
571 break;
572
573 case L_('n'): /* Answer number of assignments done. */
574 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
575 with the 'n' conversion specifier. */
576 if (!(flags & SUPPRESS))
577 {
578 /* Don't count the read-ahead. */
579 if (need_longlong && (flags & LONGDBL))
580 *ARG (long long int *) = read_in;
581 else if (need_long && (flags & LONG))
582 *ARG (long int *) = read_in;
583 else if (flags & SHORT)
584 *ARG (short int *) = read_in;
585 else if (!(flags & CHAR))
586 *ARG (int *) = read_in;
587 else
588 *ARG (char *) = read_in;
589
590 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
591 /* We have a severe problem here. The ISO C standard
592 contradicts itself in explaining the effect of the %n
593 format in `scanf'. While in ISO C:1990 and the ISO C
594 Amendement 1:1995 the result is described as
595
596 Execution of a %n directive does not effect the
597 assignment count returned at the completion of
598 execution of the f(w)scanf function.
599
600 in ISO C Corrigendum 1:1994 the following was added:
601
602 Subclause 7.9.6.2
603 Add the following fourth example:
604 In:
605 #include <stdio.h>
606 int d1, d2, n1, n2, i;
607 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
608 the value 123 is assigned to d1 and the value3 to n1.
609 Because %n can never get an input failure the value
610 of 3 is also assigned to n2. The value of d2 is not
611 affected. The value 3 is assigned to i.
612
613 We go for now with the historically correct code from ISO C,
614 i.e., we don't count the %n assignments. When it ever
615 should proof to be wrong just remove the #ifdef above. */
616 ++done;
617 #endif
618 }
619 break;
620
621 case L_('c'): /* Match characters. */
622 if ((flags & LONG) == 0)
623 {
624 if (!(flags & SUPPRESS))
625 {
626 str = ARG (char *);
627 if (str == NULL)
628 conv_error ();
629 }
630
631 c = inchar ();
632 if (c == EOF)
633 input_error ();
634
635 if (width == -1)
636 width = 1;
637
638 #ifdef COMPILE_WSCANF
639 /* We have to convert the wide character(s) into multibyte
640 characters and store the result. */
641 memset (&state, '\0', sizeof (state));
642
643 do
644 {
645 size_t n;
646
647 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
648 if (n == (size_t) -1)
649 /* No valid wide character. */
650 input_error ();
651
652 /* Increment the output pointer. Even if we don't
653 write anything. */
654 str += n;
655 }
656 while (--width > 0 && inchar () != EOF);
657 #else
658 if (!(flags & SUPPRESS))
659 {
660 do
661 *str++ = c;
662 while (--width > 0 && inchar () != EOF);
663 }
664 else
665 while (--width > 0 && inchar () != EOF);
666 #endif
667
668 if (!(flags & SUPPRESS))
669 ++done;
670
671 break;
672 }
673 /* FALLTHROUGH */
674 case L_('C'):
675 if (!(flags & SUPPRESS))
676 {
677 wstr = ARG (wchar_t *);
678 if (wstr == NULL)
679 conv_error ();
680 }
681
682 c = inchar ();
683 if (c == EOF)
684 input_error ();
685
686 #ifdef COMPILE_WSCANF
687 /* Just store the incoming wide characters. */
688 if (!(flags & SUPPRESS))
689 {
690 do
691 *wstr++ = c;
692 while (--width > 0 && inchar () != EOF);
693 }
694 else
695 while (--width > 0 && inchar () != EOF);
696 #else
697 {
698 /* We have to convert the multibyte input sequence to wide
699 characters. */
700 char buf[1];
701 mbstate_t cstate;
702
703 memset (&cstate, '\0', sizeof (cstate));
704
705 do
706 {
707 /* This is what we present the mbrtowc function first. */
708 buf[0] = c;
709
710 while (1)
711 {
712 size_t n;
713
714 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
715 buf, 1, &cstate);
716
717 if (n == (size_t) -2)
718 {
719 /* Possibly correct character, just not enough
720 input. */
721 if (inchar () == EOF)
722 encode_error ();
723
724 buf[0] = c;
725 continue;
726 }
727
728 if (n != 1)
729 encode_error ();
730
731 /* We have a match. */
732 break;
733 }
734
735 /* Advance the result pointer. */
736 ++wstr;
737 }
738 while (--width > 0 && inchar () != EOF);
739 }
740 #endif
741
742 if (!(flags & SUPPRESS))
743 ++done;
744
745 break;
746
747 case L_('s'): /* Read a string. */
748 if (!(flags & LONG))
749 {
750 #define STRING_ARG(Str, Type) \
751 do if (!(flags & SUPPRESS)) \
752 { \
753 if (flags & MALLOC) \
754 { \
755 /* The string is to be stored in a malloc'd buffer. */ \
756 strptr = ARG (char **); \
757 if (strptr == NULL) \
758 conv_error (); \
759 /* Allocate an initial buffer. */ \
760 strsize = 100; \
761 *strptr = (char *) malloc (strsize * sizeof (Type)); \
762 Str = (Type *) *strptr; \
763 } \
764 else \
765 Str = ARG (Type *); \
766 if (Str == NULL) \
767 conv_error (); \
768 } while (0)
769 STRING_ARG (str, char);
770
771 c = inchar ();
772 if (c == EOF)
773 input_error ();
774
775 #ifdef COMPILE_WSCANF
776 memset (&state, '\0', sizeof (state));
777 #endif
778
779 do
780 {
781 if (ISSPACE (c))
782 {
783 ungetc_not_eof (c, s);
784 break;
785 }
786
787 #ifdef COMPILE_WSCANF
788 /* This is quite complicated. We have to convert the
789 wide characters into multibyte characters and then
790 store them. */
791 {
792 size_t n;
793
794 if (!(flags & SUPPRESS) && (flags & MALLOC)
795 && str + MB_CUR_MAX >= *strptr + strsize)
796 {
797 /* We have to enlarge the buffer if the `a' flag
798 was given. */
799 size_t strleng = str - *strptr;
800 char *newstr;
801
802 newstr = (char *) realloc (*strptr, strsize * 2);
803 if (newstr == NULL)
804 {
805 /* Can't allocate that much. Last-ditch
806 effort. */
807 newstr = (char *) realloc (*strptr,
808 strleng + MB_CUR_MAX);
809 if (newstr == NULL)
810 {
811 /* We lose. Oh well. Terminate the
812 string and stop converting,
813 so at least we don't skip any input. */
814 ((char *) (*strptr))[strleng] = '\0';
815 ++done;
816 conv_error ();
817 }
818 else
819 {
820 *strptr = newstr;
821 str = newstr + strleng;
822 strsize = strleng + MB_CUR_MAX;
823 }
824 }
825 else
826 {
827 *strptr = newstr;
828 str = newstr + strleng;
829 strsize *= 2;
830 }
831 }
832
833 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
834 &state);
835 if (n == (size_t) -1)
836 encode_error ();
837
838 assert (n <= MB_CUR_MAX);
839 str += n;
840 }
841 #else
842 /* This is easy. */
843 if (!(flags & SUPPRESS))
844 {
845 *str++ = c;
846 if ((flags & MALLOC)
847 && (char *) str == *strptr + strsize)
848 {
849 /* Enlarge the buffer. */
850 str = (char *) realloc (*strptr, 2 * strsize);
851 if (str == NULL)
852 {
853 /* Can't allocate that much. Last-ditch
854 effort. */
855 str = (char *) realloc (*strptr, strsize + 1);
856 if (str == NULL)
857 {
858 /* We lose. Oh well. Terminate the
859 string and stop converting,
860 so at least we don't skip any input. */
861 ((char *) (*strptr))[strsize - 1] = '\0';
862 ++done;
863 conv_error ();
864 }
865 else
866 {
867 *strptr = (char *) str;
868 str += strsize;
869 ++strsize;
870 }
871 }
872 else
873 {
874 *strptr = (char *) str;
875 str += strsize;
876 strsize *= 2;
877 }
878 }
879 }
880 #endif
881 }
882 while ((width <= 0 || --width > 0) && inchar () != EOF);
883
884 if (!(flags & SUPPRESS))
885 {
886 #ifdef COMPILE_WSCANF
887 /* We have to emit the code to get into the initial
888 state. */
889 char buf[MB_LEN_MAX];
890 size_t n = __wcrtomb (buf, L'\0', &state);
891 if (n > 0 && (flags & MALLOC)
892 && str + n >= *strptr + strsize)
893 {
894 /* Enlarge the buffer. */
895 size_t strleng = str - *strptr;
896 char *newstr;
897
898 newstr = (char *) realloc (*strptr, strleng + n + 1);
899 if (newstr == NULL)
900 {
901 /* We lose. Oh well. Terminate the string
902 and stop converting, so at least we don't
903 skip any input. */
904 ((char *) (*strptr))[strleng] = '\0';
905 ++done;
906 conv_error ();
907 }
908 else
909 {
910 *strptr = newstr;
911 str = newstr + strleng;
912 strsize = strleng + n + 1;
913 }
914 }
915
916 str = __mempcpy (str, buf, n);
917 #endif
918 *str++ = '\0';
919
920 if ((flags & MALLOC) && str - *strptr != strsize)
921 {
922 char *cp = (char *) realloc (*strptr, str - *strptr);
923 if (cp != NULL)
924 *strptr = cp;
925 }
926
927 ++done;
928 }
929 break;
930 }
931 /* FALLTHROUGH */
932
933 case L_('S'):
934 {
935 #ifndef COMPILE_WSCANF
936 mbstate_t cstate;
937 #endif
938
939 /* Wide character string. */
940 STRING_ARG (wstr, wchar_t);
941
942 c = inchar ();
943 if (c == EOF)
944 input_error ();
945
946 #ifndef COMPILE_WSCANF
947 memset (&cstate, '\0', sizeof (cstate));
948 #endif
949
950 do
951 {
952 if (ISSPACE (c))
953 {
954 ungetc_not_eof (c, s);
955 break;
956 }
957
958 #ifdef COMPILE_WSCANF
959 /* This is easy. */
960 if (!(flags & SUPPRESS))
961 {
962 *wstr++ = c;
963 if ((flags & MALLOC)
964 && wstr == (wchar_t *) *strptr + strsize)
965 {
966 /* Enlarge the buffer. */
967 wstr = (wchar_t *) realloc (*strptr,
968 (2 * strsize)
969 * sizeof (wchar_t));
970 if (wstr == NULL)
971 {
972 /* Can't allocate that much. Last-ditch
973 effort. */
974 wstr = (wchar_t *) realloc (*strptr,
975 (strsize + 1)
976 * sizeof (wchar_t));
977 if (wstr == NULL)
978 {
979 /* We lose. Oh well. Terminate the string
980 and stop converting, so at least we don't
981 skip any input. */
982 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
983 ++done;
984 conv_error ();
985 }
986 else
987 {
988 *strptr = (char *) wstr;
989 wstr += strsize;
990 ++strsize;
991 }
992 }
993 else
994 {
995 *strptr = (char *) wstr;
996 wstr += strsize;
997 strsize *= 2;
998 }
999 }
1000 }
1001 #else
1002 {
1003 char buf[1];
1004
1005 buf[0] = c;
1006
1007 while (1)
1008 {
1009 size_t n;
1010
1011 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1012 buf, 1, &cstate);
1013
1014 if (n == (size_t) -2)
1015 {
1016 /* Possibly correct character, just not enough
1017 input. */
1018 if (inchar () == EOF)
1019 encode_error ();
1020
1021 buf[0] = c;
1022 continue;
1023 }
1024
1025 if (n != 1)
1026 encode_error ();
1027
1028 /* We have a match. */
1029 ++wstr;
1030 break;
1031 }
1032
1033 if (!(flags & SUPPRESS) && (flags & MALLOC)
1034 && wstr == (wchar_t *) *strptr + strsize)
1035 {
1036 /* Enlarge the buffer. */
1037 wstr = (wchar_t *) realloc (*strptr,
1038 (2 * strsize
1039 * sizeof (wchar_t)));
1040 if (wstr == NULL)
1041 {
1042 /* Can't allocate that much. Last-ditch effort. */
1043 wstr = (wchar_t *) realloc (*strptr,
1044 ((strsize + 1)
1045 * sizeof (wchar_t)));
1046 if (wstr == NULL)
1047 {
1048 /* We lose. Oh well. Terminate the
1049 string and stop converting, so at
1050 least we don't skip any input. */
1051 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1052 ++done;
1053 conv_error ();
1054 }
1055 else
1056 {
1057 *strptr = (char *) wstr;
1058 wstr += strsize;
1059 ++strsize;
1060 }
1061 }
1062 else
1063 {
1064 *strptr = (char *) wstr;
1065 wstr += strsize;
1066 strsize *= 2;
1067 }
1068 }
1069 }
1070 #endif
1071 }
1072 while ((width <= 0 || --width > 0) && inchar () != EOF);
1073
1074 if (!(flags & SUPPRESS))
1075 {
1076 *wstr++ = L'\0';
1077
1078 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1079 {
1080 wchar_t *cp = (wchar_t *) realloc (*strptr,
1081 ((wstr
1082 - (wchar_t *) *strptr)
1083 * sizeof(wchar_t)));
1084 if (cp != NULL)
1085 *strptr = (char *) cp;
1086 }
1087
1088 ++done;
1089 }
1090 }
1091 break;
1092
1093 case L_('x'): /* Hexadecimal integer. */
1094 case L_('X'): /* Ditto. */
1095 base = 16;
1096 number_signed = 0;
1097 goto number;
1098
1099 case L_('o'): /* Octal integer. */
1100 base = 8;
1101 number_signed = 0;
1102 goto number;
1103
1104 case L_('u'): /* Unsigned decimal integer. */
1105 base = 10;
1106 number_signed = 0;
1107 goto number;
1108
1109 case L_('d'): /* Signed decimal integer. */
1110 base = 10;
1111 number_signed = 1;
1112 goto number;
1113
1114 case L_('i'): /* Generic number. */
1115 base = 0;
1116 number_signed = 1;
1117
1118 number:
1119 c = inchar ();
1120 if (c == EOF)
1121 input_error ();
1122
1123 /* Check for a sign. */
1124 if (c == L_('-') || c == L_('+'))
1125 {
1126 ADDW (c);
1127 if (width > 0)
1128 --width;
1129 c = inchar ();
1130 }
1131
1132 /* Look for a leading indication of base. */
1133 if (width != 0 && c == L_('0'))
1134 {
1135 if (width > 0)
1136 --width;
1137
1138 ADDW (c);
1139 c = inchar ();
1140
1141 if (width != 0 && TOLOWER (c) == L_('x'))
1142 {
1143 if (base == 0)
1144 base = 16;
1145 if (base == 16)
1146 {
1147 if (width > 0)
1148 --width;
1149 c = inchar ();
1150 }
1151 }
1152 else if (base == 0)
1153 base = 8;
1154 }
1155
1156 if (base == 0)
1157 base = 10;
1158
1159 if (base == 10 && (flags & I18N) != 0)
1160 {
1161 int from_level;
1162 int to_level;
1163 int level;
1164 #ifdef COMPILE_WSCANF
1165 const wchar_t *wcdigits[10];
1166 const wchar_t *wcdigits_extended[10];
1167 #else
1168 const char *mbdigits[10];
1169 const char *mbdigits_extended[10];
1170 #endif
1171 /* "to_inpunct" is a map from ASCII digits to their
1172 equivalent in locale. This is defined for locales
1173 which use an extra digits set. */
1174 wctrans_t map = __wctrans ("to_inpunct");
1175 int n;
1176
1177 from_level = 0;
1178 #ifdef COMPILE_WSCANF
1179 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1180 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1181 #else
1182 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1183 #endif
1184
1185 /* Get the alternative digit forms if there are any. */
1186 if (__builtin_expect (map != NULL, 0))
1187 {
1188 /* Adding new level for extra digits set in locale file. */
1189 ++to_level;
1190
1191 for (n = 0; n < 10; ++n)
1192 {
1193 #ifdef COMPILE_WSCANF
1194 wcdigits[n] = (const wchar_t *)
1195 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1196
1197 wchar_t *wc_extended = (wchar_t *)
1198 alloca ((to_level + 2) * sizeof (wchar_t));
1199 __wmemcpy (wc_extended, wcdigits[n], to_level);
1200 wc_extended[to_level] = __towctrans (L'0' + n, map);
1201 wc_extended[to_level + 1] = '\0';
1202 wcdigits_extended[n] = wc_extended;
1203 #else
1204 mbdigits[n]
1205 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1206
1207 /* Get the equivalent wide char in map. */
1208 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1209
1210 /* Convert it to multibyte representation. */
1211 mbstate_t state;
1212 memset (&state, '\0', sizeof (state));
1213
1214 char extra_mbdigit[MB_LEN_MAX];
1215 size_t mblen
1216 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1217
1218 if (mblen == (size_t) -1)
1219 {
1220 /* Ignore this new level. */
1221 map = NULL;
1222 break;
1223 }
1224
1225 /* Calculate the length of mbdigits[n]. */
1226 const char *last_char = mbdigits[n];
1227 for (level = 0; level < to_level; ++level)
1228 last_char = strchr (last_char, '\0') + 1;
1229
1230 size_t mbdigits_len = last_char - mbdigits[n];
1231
1232 /* Allocate memory for extended multibyte digit. */
1233 char *mb_extended;
1234 mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
1235
1236 /* And get the mbdigits + extra_digit string. */
1237 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1238 mbdigits_len),
1239 extra_mbdigit, mblen) = '\0';
1240 mbdigits_extended[n] = mb_extended;
1241 #endif
1242 }
1243 }
1244
1245 /* Read the number into workspace. */
1246 while (c != EOF && width != 0)
1247 {
1248 /* In this round we get the pointer to the digit strings
1249 and also perform the first round of comparisons. */
1250 for (n = 0; n < 10; ++n)
1251 {
1252 /* Get the string for the digits with value N. */
1253 #ifdef COMPILE_WSCANF
1254 if (__builtin_expect (map != NULL, 0))
1255 wcdigits[n] = wcdigits_extended[n];
1256 else
1257 wcdigits[n] = (const wchar_t *)
1258 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1259 wcdigits[n] += from_level;
1260
1261 if (c == (wint_t) *wcdigits[n])
1262 {
1263 to_level = from_level;
1264 break;
1265 }
1266
1267 /* Advance the pointer to the next string. */
1268 ++wcdigits[n];
1269 #else
1270 const char *cmpp;
1271 int avail = width > 0 ? width : INT_MAX;
1272
1273 if (__builtin_expect (map != NULL, 0))
1274 mbdigits[n] = mbdigits_extended[n];
1275 else
1276 mbdigits[n]
1277 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1278
1279 for (level = 0; level < from_level; level++)
1280 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1281
1282 cmpp = mbdigits[n];
1283 while ((unsigned char) *cmpp == c && avail > 0)
1284 {
1285 if (*++cmpp == '\0')
1286 break;
1287 else
1288 {
1289 if ((c = inchar ()) == EOF)
1290 break;
1291 --avail;
1292 }
1293 }
1294
1295 if (*cmpp == '\0')
1296 {
1297 if (width > 0)
1298 width = avail;
1299 to_level = from_level;
1300 break;
1301 }
1302
1303 /* We are pushing all read characters back. */
1304 if (cmpp > mbdigits[n])
1305 {
1306 ungetc (c, s);
1307 while (--cmpp > mbdigits[n])
1308 ungetc_not_eof ((unsigned char) *cmpp, s);
1309 c = (unsigned char) *cmpp;
1310 }
1311
1312 /* Advance the pointer to the next string. */
1313 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1314 #endif
1315 }
1316
1317 if (n == 10)
1318 {
1319 /* Have not yet found the digit. */
1320 for (level = from_level + 1; level <= to_level; ++level)
1321 {
1322 /* Search all ten digits of this level. */
1323 for (n = 0; n < 10; ++n)
1324 {
1325 #ifdef COMPILE_WSCANF
1326 if (c == (wint_t) *wcdigits[n])
1327 break;
1328
1329 /* Advance the pointer to the next string. */
1330 ++wcdigits[n];
1331 #else
1332 const char *cmpp;
1333 int avail = width > 0 ? width : INT_MAX;
1334
1335 cmpp = mbdigits[n];
1336 while ((unsigned char) *cmpp == c && avail > 0)
1337 {
1338 if (*++cmpp == '\0')
1339 break;
1340 else
1341 {
1342 if ((c = inchar ()) == EOF)
1343 break;
1344 --avail;
1345 }
1346 }
1347
1348 if (*cmpp == '\0')
1349 {
1350 if (width > 0)
1351 width = avail;
1352 break;
1353 }
1354
1355 /* We are pushing all read characters back. */
1356 if (cmpp > mbdigits[n])
1357 {
1358 ungetc (c, s);
1359 while (--cmpp > mbdigits[n])
1360 ungetc_not_eof ((unsigned char) *cmpp, s);
1361 c = (unsigned char) *cmpp;
1362 }
1363
1364 /* Advance the pointer to the next string. */
1365 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1366 #endif
1367 }
1368
1369 if (n < 10)
1370 {
1371 /* Found it. */
1372 from_level = level;
1373 to_level = level;
1374 break;
1375 }
1376 }
1377 }
1378
1379 if (n < 10)
1380 c = L_('0') + n;
1381 else if ((flags & GROUP)
1382 #ifdef COMPILE_WSCANF
1383 && thousands != L'\0'
1384 #else
1385 && thousands != NULL
1386 #endif
1387 )
1388 {
1389 /* Try matching against the thousands separator. */
1390 #ifdef COMPILE_WSCANF
1391 if (c != thousands)
1392 break;
1393 #else
1394 const char *cmpp = thousands;
1395 int avail = width > 0 ? width : INT_MAX;
1396
1397 while ((unsigned char) *cmpp == c && avail > 0)
1398 {
1399 ADDW (c);
1400 if (*++cmpp == '\0')
1401 break;
1402 else
1403 {
1404 if ((c = inchar ()) == EOF)
1405 break;
1406 --avail;
1407 }
1408 }
1409
1410 if (*cmpp != '\0')
1411 {
1412 /* We are pushing all read characters back. */
1413 if (cmpp > thousands)
1414 {
1415 wpsize -= cmpp - thousands;
1416 ungetc (c, s);
1417 while (--cmpp > thousands)
1418 ungetc_not_eof ((unsigned char) *cmpp, s);
1419 c = (unsigned char) *cmpp;
1420 }
1421 break;
1422 }
1423
1424 if (width > 0)
1425 width = avail;
1426
1427 /* The last thousands character will be added back by
1428 the ADDW below. */
1429 --wpsize;
1430 #endif
1431 }
1432 else
1433 break;
1434
1435 ADDW (c);
1436 if (width > 0)
1437 --width;
1438
1439 c = inchar ();
1440 }
1441 }
1442 else
1443 /* Read the number into workspace. */
1444 while (c != EOF && width != 0)
1445 {
1446 if (base == 16)
1447 {
1448 if (!ISXDIGIT (c))
1449 break;
1450 }
1451 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1452 {
1453 if (base == 10 && (flags & GROUP)
1454 #ifdef COMPILE_WSCANF
1455 && thousands != L'\0'
1456 #else
1457 && thousands != NULL
1458 #endif
1459 )
1460 {
1461 /* Try matching against the thousands separator. */
1462 #ifdef COMPILE_WSCANF
1463 if (c != thousands)
1464 break;
1465 #else
1466 const char *cmpp = thousands;
1467 int avail = width > 0 ? width : INT_MAX;
1468
1469 while ((unsigned char) *cmpp == c && avail > 0)
1470 {
1471 ADDW (c);
1472 if (*++cmpp == '\0')
1473 break;
1474 else
1475 {
1476 if ((c = inchar ()) == EOF)
1477 break;
1478 --avail;
1479 }
1480 }
1481
1482 if (*cmpp != '\0')
1483 {
1484 /* We are pushing all read characters back. */
1485 if (cmpp > thousands)
1486 {
1487 wpsize -= cmpp - thousands;
1488 ungetc (c, s);
1489 while (--cmpp > thousands)
1490 ungetc_not_eof ((unsigned char) *cmpp, s);
1491 c = (unsigned char) *cmpp;
1492 }
1493 break;
1494 }
1495
1496 if (width > 0)
1497 width = avail;
1498
1499 /* The last thousands character will be added back by
1500 the ADDW below. */
1501 --wpsize;
1502 #endif
1503 }
1504 else
1505 break;
1506 }
1507 ADDW (c);
1508 if (width > 0)
1509 --width;
1510
1511 c = inchar ();
1512 }
1513
1514 if (wpsize == 0
1515 || (wpsize == 1 && (wp[0] == L_('+') || wp[0] == L_('-'))))
1516 {
1517 /* There was no number. If we are supposed to read a pointer
1518 we must recognize "(nil)" as well. */
1519 if (wpsize == 0 && read_pointer && (width < 0 || width >= 0)
1520 && c == '('
1521 && TOLOWER (inchar ()) == L_('n')
1522 && TOLOWER (inchar ()) == L_('i')
1523 && TOLOWER (inchar ()) == L_('l')
1524 && inchar () == L_(')'))
1525 /* We must produce the value of a NULL pointer. A single
1526 '0' digit is enough. */
1527 ADDW (L_('0'));
1528 else
1529 {
1530 /* The last read character is not part of the number
1531 anymore. */
1532 ungetc (c, s);
1533
1534 conv_error ();
1535 }
1536 }
1537 else
1538 /* The just read character is not part of the number anymore. */
1539 ungetc (c, s);
1540
1541 /* Convert the number. */
1542 ADDW (L_('\0'));
1543 if (need_longlong && (flags & LONGDBL))
1544 {
1545 if (number_signed)
1546 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
1547 else
1548 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
1549 }
1550 else
1551 {
1552 if (number_signed)
1553 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
1554 else
1555 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
1556 }
1557 if (wp == tw)
1558 conv_error ();
1559
1560 if (!(flags & SUPPRESS))
1561 {
1562 if (! number_signed)
1563 {
1564 if (need_longlong && (flags & LONGDBL))
1565 *ARG (unsigned LONGLONG int *) = num.uq;
1566 else if (need_long && (flags & LONG))
1567 *ARG (unsigned long int *) = num.ul;
1568 else if (flags & SHORT)
1569 *ARG (unsigned short int *)
1570 = (unsigned short int) num.ul;
1571 else if (!(flags & CHAR))
1572 *ARG (unsigned int *) = (unsigned int) num.ul;
1573 else
1574 *ARG (unsigned char *) = (unsigned char) num.ul;
1575 }
1576 else
1577 {
1578 if (need_longlong && (flags & LONGDBL))
1579 *ARG (LONGLONG int *) = num.q;
1580 else if (need_long && (flags & LONG))
1581 *ARG (long int *) = num.l;
1582 else if (flags & SHORT)
1583 *ARG (short int *) = (short int) num.l;
1584 else if (!(flags & CHAR))
1585 *ARG (int *) = (int) num.l;
1586 else
1587 *ARG (signed char *) = (signed char) num.ul;
1588 }
1589 ++done;
1590 }
1591 break;
1592
1593 case L_('e'): /* Floating-point numbers. */
1594 case L_('E'):
1595 case L_('f'):
1596 case L_('F'):
1597 case L_('g'):
1598 case L_('G'):
1599 case L_('a'):
1600 case L_('A'):
1601 c = inchar ();
1602 if (c == EOF)
1603 input_error ();
1604
1605 got_dot = got_e = 0;
1606
1607 /* Check for a sign. */
1608 if (c == L_('-') || c == L_('+'))
1609 {
1610 negative = c == L_('-');
1611 if (width == 0 || inchar () == EOF)
1612 /* EOF is only an input error before we read any chars. */
1613 conv_error ();
1614 if (! ISDIGIT (c) && TOLOWER (c) != L_('i')
1615 && TOLOWER (c) != L_('n'))
1616 {
1617 #ifdef COMPILE_WSCANF
1618 if (c != decimal)
1619 {
1620 /* This is no valid number. */
1621 ungetc (c, s);
1622 conv_error ();
1623 }
1624 #else
1625 /* Match against the decimal point. At this point
1626 we are taking advantage of the fact that we can
1627 push more than one character back. This is
1628 (almost) never necessary since the decimal point
1629 string hopefully never contains more than one
1630 byte. */
1631 const char *cmpp = decimal;
1632 int avail = width > 0 ? width : INT_MAX;
1633
1634 while ((unsigned char) *cmpp == c && avail > 0)
1635 if (*++cmpp == '\0')
1636 break;
1637 else
1638 {
1639 if (inchar () == EOF)
1640 break;
1641 --avail;
1642 }
1643
1644 if (*cmpp != '\0')
1645 {
1646 /* This is no valid number. */
1647 while (1)
1648 {
1649 ungetc (c, s);
1650 if (cmpp == decimal)
1651 break;
1652 c = (unsigned char) *--cmpp;
1653 }
1654
1655 conv_error ();
1656 }
1657 else
1658 {
1659 /* Add all the characters. */
1660 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1661 ADDW ((unsigned char) *cmpp);
1662 if (width > 0)
1663 width = avail;
1664 got_dot = 1;
1665
1666 c = inchar ();
1667 }
1668 if (width > 0)
1669 width = avail;
1670 #endif
1671 }
1672 if (width > 0)
1673 --width;
1674 }
1675 else
1676 negative = 0;
1677
1678 /* Take care for the special arguments "nan" and "inf". */
1679 if (TOLOWER (c) == L_('n'))
1680 {
1681 /* Maybe "nan". */
1682 ADDW (c);
1683 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('a'))
1684 conv_error ();
1685 if (width > 0)
1686 --width;
1687 ADDW (c);
1688 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1689 conv_error ();
1690 if (width > 0)
1691 --width;
1692 ADDW (c);
1693 /* It is "nan". */
1694 goto scan_float;
1695 }
1696 else if (TOLOWER (c) == L_('i'))
1697 {
1698 /* Maybe "inf" or "infinity". */
1699 ADDW (c);
1700 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1701 conv_error ();
1702 if (width > 0)
1703 --width;
1704 ADDW (c);
1705 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('f'))
1706 conv_error ();
1707 if (width > 0)
1708 --width;
1709 ADDW (c);
1710 /* It is as least "inf". */
1711 if (width != 0 && inchar () != EOF)
1712 {
1713 if (TOLOWER (c) == L_('i'))
1714 {
1715 if (width > 0)
1716 --width;
1717 /* Now we have to read the rest as well. */
1718 ADDW (c);
1719 if (width == 0 || inchar () == EOF
1720 || TOLOWER (c) != L_('n'))
1721 conv_error ();
1722 if (width > 0)
1723 --width;
1724 ADDW (c);
1725 if (width == 0 || inchar () == EOF
1726 || TOLOWER (c) != L_('i'))
1727 conv_error ();
1728 if (width > 0)
1729 --width;
1730 ADDW (c);
1731 if (width == 0 || inchar () == EOF
1732 || TOLOWER (c) != L_('t'))
1733 conv_error ();
1734 if (width > 0)
1735 --width;
1736 ADDW (c);
1737 if (width == 0 || inchar () == EOF
1738 || TOLOWER (c) != L_('y'))
1739 conv_error ();
1740 if (width > 0)
1741 --width;
1742 ADDW (c);
1743 }
1744 else
1745 /* Never mind. */
1746 ungetc (c, s);
1747 }
1748 goto scan_float;
1749 }
1750
1751 is_hexa = 0;
1752 exp_char = L_('e');
1753 if (width != 0 && c == L_('0'))
1754 {
1755 ADDW (c);
1756 c = inchar ();
1757 if (width > 0)
1758 --width;
1759 if (width != 0 && TOLOWER (c) == L_('x'))
1760 {
1761 /* It is a number in hexadecimal format. */
1762 ADDW (c);
1763
1764 is_hexa = 1;
1765 exp_char = L_('p');
1766
1767 /* Grouping is not allowed. */
1768 flags &= ~GROUP;
1769 c = inchar ();
1770 if (width > 0)
1771 --width;
1772 }
1773 }
1774
1775 do
1776 {
1777 if (ISDIGIT (c))
1778 ADDW (c);
1779 else if (!got_e && is_hexa && ISXDIGIT (c))
1780 ADDW (c);
1781 else if (got_e && wp[wpsize - 1] == exp_char
1782 && (c == L_('-') || c == L_('+')))
1783 ADDW (c);
1784 else if (wpsize > 0 && !got_e
1785 && (CHAR_T) TOLOWER (c) == exp_char)
1786 {
1787 ADDW (exp_char);
1788 got_e = got_dot = 1;
1789 }
1790 else
1791 {
1792 #ifdef COMPILE_WSCANF
1793 if (! got_dot && c == decimal)
1794 {
1795 ADDW (c);
1796 got_dot = 1;
1797 }
1798 else if ((flags & GROUP) != 0 && thousands != L'\0'
1799 && ! got_dot && c == thousands)
1800 ADDW (c);
1801 else
1802 {
1803 /* The last read character is not part of the number
1804 anymore. */
1805 ungetc (c, s);
1806 break;
1807 }
1808 #else
1809 const char *cmpp = decimal;
1810 int avail = width > 0 ? width : INT_MAX;
1811
1812 if (! got_dot)
1813 {
1814 while ((unsigned char) *cmpp == c && avail > 0)
1815 if (*++cmpp == '\0')
1816 break;
1817 else
1818 {
1819 if (inchar () == EOF)
1820 break;
1821 --avail;
1822 }
1823 }
1824
1825 if (*cmpp == '\0')
1826 {
1827 /* Add all the characters. */
1828 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1829 ADDW ((unsigned char) *cmpp);
1830 if (width > 0)
1831 width = avail;
1832 got_dot = 1;
1833 }
1834 else
1835 {
1836 /* Figure out whether it is a thousands separator.
1837 There is one problem: we possibly read more than
1838 one character. We cannot push them back but since
1839 we know that parts of the `decimal' string matched,
1840 we can compare against it. */
1841 const char *cmp2p = thousands;
1842
1843 if ((flags & GROUP) != 0 && thousands != NULL
1844 && ! got_dot)
1845 {
1846 while (cmp2p - thousands < cmpp - decimal
1847 && *cmp2p == decimal[cmp2p - thousands])
1848 ++cmp2p;
1849 if (cmp2p - thousands == cmpp - decimal)
1850 {
1851 while ((unsigned char) *cmp2p == c && avail > 0)
1852 if (*++cmp2p == '\0')
1853 break;
1854 else
1855 {
1856 if (inchar () == EOF)
1857 break;
1858 --avail;
1859 }
1860 }
1861 }
1862
1863 if (cmp2p != NULL && *cmp2p == '\0')
1864 {
1865 /* Add all the characters. */
1866 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
1867 ADDW ((unsigned char) *cmpp);
1868 if (width > 0)
1869 width = avail;
1870 }
1871 else
1872 {
1873 /* The last read character is not part of the number
1874 anymore. */
1875 ungetc (c, s);
1876 break;
1877 }
1878 }
1879 #endif
1880 }
1881 if (width > 0)
1882 --width;
1883 }
1884 while (width != 0 && inchar () != EOF);
1885
1886 /* Have we read any character? If we try to read a number
1887 in hexadecimal notation and we have read only the `0x'
1888 prefix or no exponent this is an error. */
1889 if (wpsize == 0 || (is_hexa && (wpsize == 2 || ! got_e)))
1890 conv_error ();
1891
1892 scan_float:
1893 /* Convert the number. */
1894 ADDW (L_('\0'));
1895 if (flags & LONGDBL)
1896 {
1897 long double d = __strtold_internal (wp, &tw, flags & GROUP);
1898 if (!(flags & SUPPRESS) && tw != wp)
1899 *ARG (long double *) = negative ? -d : d;
1900 }
1901 else if (flags & LONG)
1902 {
1903 double d = __strtod_internal (wp, &tw, flags & GROUP);
1904 if (!(flags & SUPPRESS) && tw != wp)
1905 *ARG (double *) = negative ? -d : d;
1906 }
1907 else
1908 {
1909 float d = __strtof_internal (wp, &tw, flags & GROUP);
1910 if (!(flags & SUPPRESS) && tw != wp)
1911 *ARG (float *) = negative ? -d : d;
1912 }
1913
1914 if (tw == wp)
1915 conv_error ();
1916
1917 if (!(flags & SUPPRESS))
1918 ++done;
1919 break;
1920
1921 case L_('['): /* Character class. */
1922 if (flags & LONG)
1923 STRING_ARG (wstr, wchar_t);
1924 else
1925 STRING_ARG (str, char);
1926
1927 if (*f == L_('^'))
1928 {
1929 ++f;
1930 not_in = 1;
1931 }
1932 else
1933 not_in = 0;
1934
1935 if (width < 0)
1936 /* There is no width given so there is also no limit on the
1937 number of characters we read. Therefore we set width to
1938 a very high value to make the algorithm easier. */
1939 width = INT_MAX;
1940
1941 #ifdef COMPILE_WSCANF
1942 /* Find the beginning and the end of the scanlist. We are not
1943 creating a lookup table since it would have to be too large.
1944 Instead we search each time through the string. This is not
1945 a constant lookup time but who uses this feature deserves to
1946 be punished. */
1947 tw = (wchar_t *) f; /* Marks the beginning. */
1948
1949 if (*f == L']')
1950 ++f;
1951
1952 while ((fc = *f++) != L'\0' && fc != L']');
1953
1954 if (fc == L'\0')
1955 conv_error ();
1956 wp = (wchar_t *) f - 1;
1957 #else
1958 /* Fill WP with byte flags indexed by character.
1959 We will use this flag map for matching input characters. */
1960 if (wpmax < UCHAR_MAX + 1)
1961 {
1962 wpmax = UCHAR_MAX + 1;
1963 wp = (char *) alloca (wpmax);
1964 }
1965 memset (wp, '\0', UCHAR_MAX + 1);
1966
1967 fc = *f;
1968 if (fc == ']' || fc == '-')
1969 {
1970 /* If ] or - appears before any char in the set, it is not
1971 the terminator or separator, but the first char in the
1972 set. */
1973 wp[fc] = 1;
1974 ++f;
1975 }
1976
1977 while ((fc = *f++) != '\0' && fc != ']')
1978 if (fc == '-' && *f != '\0' && *f != ']'
1979 && (unsigned char) f[-2] <= (unsigned char) *f)
1980 {
1981 /* Add all characters from the one before the '-'
1982 up to (but not including) the next format char. */
1983 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
1984 wp[fc] = 1;
1985 }
1986 else
1987 /* Add the character to the flag map. */
1988 wp[fc] = 1;
1989
1990 if (fc == '\0')
1991 conv_error();
1992 #endif
1993
1994 if (flags & LONG)
1995 {
1996 size_t now = read_in;
1997 #ifdef COMPILE_WSCANF
1998 if (inchar () == WEOF)
1999 input_error ();
2000
2001 do
2002 {
2003 wchar_t *runp;
2004
2005 /* Test whether it's in the scanlist. */
2006 runp = tw;
2007 while (runp < wp)
2008 {
2009 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2010 && runp != tw
2011 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2012 {
2013 /* Match against all characters in between the
2014 first and last character of the sequence. */
2015 wchar_t wc;
2016
2017 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2018 if ((wint_t) wc == c)
2019 break;
2020
2021 if (wc <= runp[1] && !not_in)
2022 break;
2023 if (wc <= runp[1] && not_in)
2024 {
2025 /* The current character is not in the
2026 scanset. */
2027 ungetc (c, s);
2028 goto out;
2029 }
2030
2031 runp += 2;
2032 }
2033 else
2034 {
2035 if ((wint_t) *runp == c && !not_in)
2036 break;
2037 if ((wint_t) *runp == c && not_in)
2038 {
2039 ungetc (c, s);
2040 goto out;
2041 }
2042
2043 ++runp;
2044 }
2045 }
2046
2047 if (runp == wp && !not_in)
2048 {
2049 ungetc (c, s);
2050 goto out;
2051 }
2052
2053 if (!(flags & SUPPRESS))
2054 {
2055 *wstr++ = c;
2056
2057 if ((flags & MALLOC)
2058 && wstr == (wchar_t *) *strptr + strsize)
2059 {
2060 /* Enlarge the buffer. */
2061 wstr = (wchar_t *) realloc (*strptr,
2062 (2 * strsize)
2063 * sizeof (wchar_t));
2064 if (wstr == NULL)
2065 {
2066 /* Can't allocate that much. Last-ditch
2067 effort. */
2068 wstr = (wchar_t *)
2069 realloc (*strptr, (strsize + 1)
2070 * sizeof (wchar_t));
2071 if (wstr == NULL)
2072 {
2073 /* We lose. Oh well. Terminate the string
2074 and stop converting, so at least we don't
2075 skip any input. */
2076 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2077 ++done;
2078 conv_error ();
2079 }
2080 else
2081 {
2082 *strptr = (char *) wstr;
2083 wstr += strsize;
2084 ++strsize;
2085 }
2086 }
2087 else
2088 {
2089 *strptr = (char *) wstr;
2090 wstr += strsize;
2091 strsize *= 2;
2092 }
2093 }
2094 }
2095 }
2096 while (--width > 0 && inchar () != WEOF);
2097 out:
2098 #else
2099 char buf[MB_LEN_MAX];
2100 size_t cnt = 0;
2101 mbstate_t cstate;
2102
2103 if (inchar () == EOF)
2104 input_error ();
2105
2106 memset (&cstate, '\0', sizeof (cstate));
2107
2108 do
2109 {
2110 if (wp[c] == not_in)
2111 {
2112 ungetc_not_eof (c, s);
2113 break;
2114 }
2115
2116 /* This is easy. */
2117 if (!(flags & SUPPRESS))
2118 {
2119 size_t n;
2120
2121 /* Convert it into a wide character. */
2122 buf[0] = c;
2123 n = __mbrtowc (wstr, buf, 1, &cstate);
2124
2125 if (n == (size_t) -2)
2126 {
2127 /* Possibly correct character, just not enough
2128 input. */
2129 ++cnt;
2130 assert (cnt < MB_CUR_MAX);
2131 continue;
2132 }
2133 cnt = 0;
2134
2135 ++wstr;
2136 if ((flags & MALLOC)
2137 && wstr == (wchar_t *) *strptr + strsize)
2138 {
2139 /* Enlarge the buffer. */
2140 wstr = (wchar_t *) realloc (*strptr,
2141 (2 * strsize
2142 * sizeof (wchar_t)));
2143 if (wstr == NULL)
2144 {
2145 /* Can't allocate that much. Last-ditch
2146 effort. */
2147 wstr = (wchar_t *)
2148 realloc (*strptr, ((strsize + 1)
2149 * sizeof (wchar_t)));
2150 if (wstr == NULL)
2151 {
2152 /* We lose. Oh well. Terminate the
2153 string and stop converting,
2154 so at least we don't skip any input. */
2155 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2156 ++done;
2157 conv_error ();
2158 }
2159 else
2160 {
2161 *strptr = (char *) wstr;
2162 wstr += strsize;
2163 ++strsize;
2164 }
2165 }
2166 else
2167 {
2168 *strptr = (char *) wstr;
2169 wstr += strsize;
2170 strsize *= 2;
2171 }
2172 }
2173 }
2174
2175 if (--width <= 0)
2176 break;
2177 }
2178 while (inchar () != EOF);
2179
2180 if (cnt != 0)
2181 /* We stopped in the middle of recognizing another
2182 character. That's a problem. */
2183 encode_error ();
2184 #endif
2185
2186 if (now == read_in)
2187 /* We haven't succesfully read any character. */
2188 conv_error ();
2189
2190 if (!(flags & SUPPRESS))
2191 {
2192 *wstr++ = L'\0';
2193
2194 if ((flags & MALLOC)
2195 && wstr - (wchar_t *) *strptr != strsize)
2196 {
2197 wchar_t *cp = (wchar_t *)
2198 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2199 * sizeof(wchar_t)));
2200 if (cp != NULL)
2201 *strptr = (char *) cp;
2202 }
2203
2204 ++done;
2205 }
2206 }
2207 else
2208 {
2209 size_t now = read_in;
2210
2211 if (inchar () == EOF)
2212 input_error ();
2213
2214 #ifdef COMPILE_WSCANF
2215
2216 memset (&state, '\0', sizeof (state));
2217
2218 do
2219 {
2220 wchar_t *runp;
2221 size_t n;
2222
2223 /* Test whether it's in the scanlist. */
2224 runp = tw;
2225 while (runp < wp)
2226 {
2227 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2228 && runp != tw
2229 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2230 {
2231 /* Match against all characters in between the
2232 first and last character of the sequence. */
2233 wchar_t wc;
2234
2235 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2236 if ((wint_t) wc == c)
2237 break;
2238
2239 if (wc <= runp[1] && !not_in)
2240 break;
2241 if (wc <= runp[1] && not_in)
2242 {
2243 /* The current character is not in the
2244 scanset. */
2245 ungetc (c, s);
2246 goto out2;
2247 }
2248
2249 runp += 2;
2250 }
2251 else
2252 {
2253 if ((wint_t) *runp == c && !not_in)
2254 break;
2255 if ((wint_t) *runp == c && not_in)
2256 {
2257 ungetc (c, s);
2258 goto out2;
2259 }
2260
2261 ++runp;
2262 }
2263 }
2264
2265 if (runp == wp && !not_in)
2266 {
2267 ungetc (c, s);
2268 goto out2;
2269 }
2270
2271 if (!(flags & SUPPRESS))
2272 {
2273 if ((flags & MALLOC)
2274 && str + MB_CUR_MAX >= *strptr + strsize)
2275 {
2276 /* Enlarge the buffer. */
2277 size_t strleng = str - *strptr;
2278 char *newstr;
2279
2280 newstr = (char *) realloc (*strptr, 2 * strsize);
2281 if (newstr == NULL)
2282 {
2283 /* Can't allocate that much. Last-ditch
2284 effort. */
2285 newstr = (char *) realloc (*strptr,
2286 strleng + MB_CUR_MAX);
2287 if (newstr == NULL)
2288 {
2289 /* We lose. Oh well. Terminate the string
2290 and stop converting, so at least we don't
2291 skip any input. */
2292 ((char *) (*strptr))[strleng] = '\0';
2293 ++done;
2294 conv_error ();
2295 }
2296 else
2297 {
2298 *strptr = newstr;
2299 str = newstr + strleng;
2300 strsize = strleng + MB_CUR_MAX;
2301 }
2302 }
2303 else
2304 {
2305 *strptr = newstr;
2306 str = newstr + strleng;
2307 strsize *= 2;
2308 }
2309 }
2310 }
2311
2312 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2313 if (n == (size_t) -1)
2314 encode_error ();
2315
2316 assert (n <= MB_CUR_MAX);
2317 str += n;
2318 }
2319 while (--width > 0 && inchar () != WEOF);
2320 out2:
2321 #else
2322 do
2323 {
2324 if (wp[c] == not_in)
2325 {
2326 ungetc_not_eof (c, s);
2327 break;
2328 }
2329
2330 /* This is easy. */
2331 if (!(flags & SUPPRESS))
2332 {
2333 *str++ = c;
2334 if ((flags & MALLOC)
2335 && (char *) str == *strptr + strsize)
2336 {
2337 /* Enlarge the buffer. */
2338 size_t newsize = 2 * strsize;
2339
2340 allocagain:
2341 str = (char *) realloc (*strptr, newsize);
2342 if (str == NULL)
2343 {
2344 /* Can't allocate that much. Last-ditch
2345 effort. */
2346 if (newsize > strsize + 1)
2347 {
2348 newsize = strsize + 1;
2349 goto allocagain;
2350 }
2351 /* We lose. Oh well. Terminate the
2352 string and stop converting,
2353 so at least we don't skip any input. */
2354 ((char *) (*strptr))[strsize - 1] = '\0';
2355 ++done;
2356 conv_error ();
2357 }
2358 else
2359 {
2360 *strptr = (char *) str;
2361 str += strsize;
2362 strsize = newsize;
2363 }
2364 }
2365 }
2366 }
2367 while (--width > 0 && inchar () != EOF);
2368 #endif
2369
2370 if (now == read_in)
2371 /* We haven't succesfully read any character. */
2372 conv_error ();
2373
2374 if (!(flags & SUPPRESS))
2375 {
2376 #ifdef COMPILE_WSCANF
2377 /* We have to emit the code to get into the initial
2378 state. */
2379 char buf[MB_LEN_MAX];
2380 size_t n = __wcrtomb (buf, L'\0', &state);
2381 if (n > 0 && (flags & MALLOC)
2382 && str + n >= *strptr + strsize)
2383 {
2384 /* Enlarge the buffer. */
2385 size_t strleng = str - *strptr;
2386 char *newstr;
2387
2388 newstr = (char *) realloc (*strptr, strleng + n + 1);
2389 if (newstr == NULL)
2390 {
2391 /* We lose. Oh well. Terminate the string
2392 and stop converting, so at least we don't
2393 skip any input. */
2394 ((char *) (*strptr))[strleng] = '\0';
2395 ++done;
2396 conv_error ();
2397 }
2398 else
2399 {
2400 *strptr = newstr;
2401 str = newstr + strleng;
2402 strsize = strleng + n + 1;
2403 }
2404 }
2405
2406 str = __mempcpy (str, buf, n);
2407 #endif
2408 *str++ = '\0';
2409
2410 if ((flags & MALLOC) && str - *strptr != strsize)
2411 {
2412 char *cp = (char *) realloc (*strptr, str - *strptr);
2413 if (cp != NULL)
2414 *strptr = cp;
2415 }
2416
2417 ++done;
2418 }
2419 }
2420 break;
2421
2422 case L_('p'): /* Generic pointer. */
2423 base = 16;
2424 /* A PTR must be the same size as a `long int'. */
2425 flags &= ~(SHORT|LONGDBL);
2426 if (need_long)
2427 flags |= LONG;
2428 number_signed = 0;
2429 read_pointer = 1;
2430 goto number;
2431
2432 default:
2433 /* If this is an unknown format character punt. */
2434 conv_error ();
2435 }
2436 }
2437
2438 /* The last thing we saw int the format string was a white space.
2439 Consume the last white spaces. */
2440 if (skip_space)
2441 {
2442 do
2443 c = inchar ();
2444 while (ISSPACE (c));
2445 ungetc (c, s);
2446 }
2447
2448 errout:
2449 /* Unlock stream. */
2450 UNLOCK_STREAM (s);
2451
2452 if (errp != NULL)
2453 *errp |= errval;
2454
2455 return done;
2456 }
2457
2458 #ifdef COMPILE_WSCANF
2459 int
2460 __vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
2461 {
2462 return _IO_vfwscanf (s, format, argptr, NULL);
2463 }
2464 #else
2465 int
2466 __vfscanf (FILE *s, const char *format, va_list argptr)
2467 {
2468 return INTUSE(_IO_vfscanf) (s, format, argptr, NULL);
2469 }
2470 libc_hidden_def (__vfscanf)
2471 #endif
2472
2473 #ifdef COMPILE_WSCANF
2474 weak_alias (__vfwscanf, vfwscanf)
2475 #else
2476 weak_alias (__vfscanf, vfscanf)
2477 INTDEF(_IO_vfscanf)
2478 #endif
This page took 0.201259 seconds and 5 git commands to generate.