[PATCH] printf: Enable grouping decimal values
Corinna Vinschen
vinschen@redhat.com
Mon Feb 1 20:48:00 GMT 2010
Hi,
the below patch is derived from NetBSD code. It enables printf/wprintf
grouping using the thousands' grouping character from the locale
information when the ' (single quote) flag character has been specified
in decimal conversions (i, d, u, f, F, g, G), as specified by POSIX-1.2008.
The grouping flag character was originally guarded by the
_WANT_IO_C99_FORMATS macro, so I left that intact. The entire grouping
code is still only available with _WANT_IO_C99_FORMATS set.
A new macro has been introduced from the NetBSD code, called PRINTANDPAD.
It allowed to simplify printing of float values and taking over the
NetBSD code almost unchanged. The most important change was to allow
thousands_sep to be a multibyte character in _VFPRINTF_R. NetBSD erroneously
assumes that thousands_sep is a singlebyte char.
Tested on Cygwin using different locales using different decimal points,
thousands separators (including multibyte), and different groupings.
Ok to apply?
Thanks,
Corinna
* libc/stdio/vfprintf.c: Include locale.h also if _WANT_IO_C99_FORMATS
is defined. Raise conversion buffer size to make sure it has enough
room for numbers plus grouping character. Define GROUPING flag.
(_VFPRINTF_R): Add PRINTANDPAD macro. Handle grouping flag character.
Handle grouping for decimal integer and float values.
* libc/stdio/vfwprintf.c: Ditto.
Index: libc/stdio/vfprintf.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdio/vfprintf.c,v
retrieving revision 1.77
diff -u -p -r1.77 vfprintf.c
--- libc/stdio/vfprintf.c 14 Jan 2010 12:48:58 -0000 1.77
+++ libc/stdio/vfprintf.c 1 Feb 2010 20:40:44 -0000
@@ -375,8 +375,10 @@ _DEFUN(__sbprintf, (rptr, fp, fmt, ap),
#endif /* !STRING_ONLY */
-#ifdef FLOATING_POINT
+#if defined (FLOATING_POINT) || defined (_WANT_IO_C99_FORMATS)
# include <locale.h>
+#endif
+#ifdef FLOATING_POINT
# include <math.h>
/* For %La, an exponent of 15 bits occupies the exponent character, a
@@ -423,8 +425,16 @@ static int exponent(char *, int, int);
reentrant storage shared with mprec. All other formats that use
buf get by with fewer characters. Making BUF slightly bigger
reduces the need for malloc in %.*a and %S, when large precision or
- long strings are processed. */
+ long strings are processed.
+ The bigger size of 100 bytes is used on systems which allow number
+ strings using the locale's grouping character. Since that's a multibyte
+ value, we should use a conservative value.
+ */
+#ifdef _WANT_IO_C99_FORMATS
+#define BUF 100
+#else
#define BUF 40
+#endif
#if defined _MB_CAPABLE && MB_LEN_MAX > BUF
# undef BUF
# define BUF MB_LEN_MAX
@@ -508,6 +518,9 @@ _EXFUN(get_arg, (struct _reent *data, in
#else /* define as 0, to make SARG and UARG occupy fewer instructions */
# define CHARINT 0
#endif
+#ifdef _WANT_IO_C99_FORMATS
+# define GROUPING 0x400 /* use grouping ("'" flag) */
+#endif
int _EXFUN(_VFPRINTF_R, (struct _reent *, FILE *, _CONST char *, va_list));
@@ -552,6 +565,12 @@ _DEFUN(_VFPRINTF_R, (data, fp, fmt0, ap)
int width; /* width from format (%8d), or 0 */
int prec; /* precision from format (%.3d), or -1 */
char sign; /* sign prefix (' ', '+', '-', or \0) */
+#ifdef _WANT_IO_C99_FORMATS
+ /* locale specific numeric grouping */
+ char *thousands_sep;
+ size_t thsnd_len;
+ const char *grouping;
+#endif
#ifdef FLOATING_POINT
char *decimal_point = _localeconv_r (data)->decimal_point;
size_t decp_len = strlen (decimal_point);
@@ -560,9 +579,16 @@ _DEFUN(_VFPRINTF_R, (data, fp, fmt0, ap)
# define _fpvalue (_double_.fp)
int expt; /* integer value of exponent */
int expsize = 0; /* character count for expstr */
- int ndig = 0; /* actual number of digits returned by cvt */
char expstr[MAXEXPLEN]; /* buffer for exponent string */
+ int lead; /* sig figs before decimal or group sep */
#endif /* FLOATING_POINT */
+#if defined (FLOATING_POINT) || defined (_WANT_IO_C99_FORMATS)
+ int ndig = 0; /* actual number of digits returned by cvt */
+#endif
+#ifdef _WANT_IO_C99_FORMATS
+ int nseps; /* number of group separators with ' */
+ int nrepeats; /* number of repeats of the last group */
+#endif
u_quad_t _uquad; /* integer arguments %[diouxX] */
enum { OCT, DEC, HEX } base;/* base for [diouxX] conversion */
int dprec; /* a copy of prec if [diouxX], 0 otherwise */
@@ -617,6 +643,14 @@ _DEFUN(_VFPRINTF_R, (data, fp, fmt0, ap)
PRINT (with, n); \
} \
}
+#define PRINTANDPAD(p, ep, len, with) { \
+ int n = (ep) - (p); \
+ if (n > (len)) \
+ n = (len); \
+ if (n > 0) \
+ PRINT((p), n); \
+ PAD((len) - (n > 0 ? n : 0), (with)); \
+}
#define FLUSH() { \
if (uio.uio_resid && __SPRINT(data, fp, &uio)) \
goto error; \
@@ -757,6 +791,12 @@ _DEFUN(_VFPRINTF_R, (data, fp, fmt0, ap)
width = 0;
prec = -1;
sign = '\0';
+#ifdef FLOATING_POINT
+ lead = 0;
+#endif
+#ifdef _WANT_IO_C99_FORMATS
+ nseps = nrepeats = 0;
+#endif
#ifndef _NO_POS_ARGS
N = arg_index;
is_pos_arg = 0;
@@ -766,12 +806,12 @@ rflag: ch = *fmt++;
reswitch: switch (ch) {
#ifdef _WANT_IO_C99_FORMATS
case '\'':
- /* The ' flag is required by POSIX, but not C99.
- In the C locale, LC_NUMERIC requires
- thousands_sep to be the empty string. And since
- no other locales are supported (yet), this flag
- is currently a no-op. */
- goto rflag;
+ thousands_sep = _localeconv_r (data)->thousands_sep;
+ thsnd_len = strlen (thousands_sep);
+ grouping = _localeconv_r (data)->grouping;
+ if (thsnd_len > 0 && grouping && *grouping)
+ flags |= GROUPING;
+ goto rflag;
#endif
case ' ':
/*
@@ -1140,22 +1180,46 @@ reswitch: switch (ch) {
size = expsize + ndig;
if (ndig > 1 || flags & ALT)
++size;
- } else if (ch == 'f') { /* f fmt */
- if (expt > 0) {
+# ifdef _WANT_IO_C99_FORMATS
+ flags &= ~GROUPING;
+# endif
+ } else {
+ if (ch == 'f') { /* f fmt */
+ if (expt > 0) {
+ size = expt;
+ if (prec || flags & ALT)
+ size += prec + 1;
+ } else /* "0.X" */
+ size = (prec || flags & ALT)
+ ? prec + 2
+ : 1;
+ } else if (expt >= ndig) { /* fixed g fmt */
size = expt;
- if (prec || flags & ALT)
- size += prec + 1;
- } else /* "0.X" */
- size = (prec || flags & ALT)
- ? prec + 2
- : 1;
- } else if (expt >= ndig) { /* fixed g fmt */
- size = expt;
- if (flags & ALT)
- ++size;
- } else
- size = ndig + (expt > 0 ?
- 1 : 2 - expt);
+ if (flags & ALT)
+ ++size;
+ } else
+ size = ndig + (expt > 0 ?
+ 1 : 2 - expt);
+# ifdef _WANT_IO_C99_FORMATS
+ if ((flags & GROUPING) && expt > 0) {
+ /* space for thousands' grouping */
+ nseps = nrepeats = 0;
+ lead = expt;
+ while (*grouping != CHAR_MAX) {
+ if (lead <= *grouping)
+ break;
+ lead -= *grouping;
+ if (grouping[1]) {
+ nseps++;
+ grouping++;
+ } else
+ nrepeats++;
+ }
+ size += (nseps + nrepeats) * thsnd_len;
+ } else
+# endif
+ lead = expt;
+ }
if (softsign)
sign = '-';
@@ -1184,6 +1248,9 @@ reswitch: switch (ch) {
case 'o':
_uquad = UARG ();
base = OCT;
+#ifdef _WANT_IO_C99_FORMATS
+ flags &= ~GROUPING;
+#endif
goto nosign;
case 'p':
/*
@@ -1320,6 +1387,9 @@ hex: _uquad = UARG ();
flags |= HEXPREFIX;
}
+#ifdef _WANT_IO_C99_FORMATS
+ flags &= ~GROUPING;
+#endif
/* unsigned conversions */
nosign: sign = '\0';
/*
@@ -1355,11 +1425,37 @@ number: if ((dprec = prec) >= 0)
case DEC:
/* many numbers are 1 digit */
- while (_uquad >= 10) {
- *--cp = to_char (_uquad % 10);
- _uquad /= 10;
+ if (_uquad < 10) {
+ *--cp = to_char(_uquad);
+ break;
}
- *--cp = to_char (_uquad);
+#ifdef _WANT_IO_C99_FORMATS
+ ndig = 0;
+#endif
+ do {
+ *--cp = to_char (_uquad % 10);
+#ifdef _WANT_IO_C99_FORMATS
+ ndig++;
+ /* If (*grouping == CHAR_MAX) then no
+ more grouping */
+ if ((flags & GROUPING)
+ && ndig == *grouping
+ && *grouping != CHAR_MAX
+ && _uquad > 9) {
+ cp -= thsnd_len;
+ strncpy (cp, thousands_sep,
+ thsnd_len);
+ ndig = 0;
+ /* If (grouping[1] == '\0') then we
+ have to use *grouping character
+ (last grouping rule) for all
+ next cases. */
+ if (grouping[1] != '\0')
+ grouping++;
+ }
+#endif
+ _uquad /= 10;
+ } while (_uquad != 0);
break;
case HEX:
@@ -1459,16 +1555,33 @@ number: if ((dprec = prec) >= 0)
PAD (-expt, zeroes);
PRINT (cp, ndig);
}
- } else if (expt >= ndig) {
- PRINT (cp, ndig);
- PAD (expt - ndig, zeroes);
- if (flags & ALT)
- PRINT (decimal_point, decp_len);
} else {
- PRINT (cp, expt);
- cp += expt;
- PRINT (decimal_point, decp_len);
- PRINT (cp, ndig - expt);
+ char *convbuf = cp;
+ PRINTANDPAD(cp, convbuf + ndig,
+ lead, zeroes);
+ cp += lead;
+#ifdef _WANT_IO_C99_FORMATS
+ if (flags & GROUPING) {
+ while (nseps > 0 || nrepeats > 0) {
+ if (nrepeats > 0)
+ nrepeats--;
+ else {
+ grouping--;
+ nseps--;
+ }
+ PRINT(thousands_sep, thsnd_len);
+ PRINTANDPAD (cp, convbuf + ndig,
+ *grouping, zeroes);
+ cp += *grouping;
+ }
+ if (cp > convbuf + ndig)
+ cp = convbuf + ndig;
+ }
+#endif
+ if (prec || flags & ALT)
+ PRINT (decimal_point, decp_len);
+ PRINTANDPAD (cp, convbuf + ndig,
+ ndig - expt, zeroes);
}
} else { /* 'a', 'A', 'e', or 'E' */
if (ndig > 1 || flags & ALT) {
Index: libc/stdio/vfwprintf.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdio/vfwprintf.c,v
retrieving revision 1.2
diff -u -p -r1.2 vfwprintf.c
--- libc/stdio/vfwprintf.c 12 Mar 2009 10:27:10 -0000 1.2
+++ libc/stdio/vfwprintf.c 1 Feb 2010 20:40:44 -0000
@@ -201,8 +201,10 @@ _DEFUN(__sbwprintf, (rptr, fp, fmt, ap),
#endif /* !STRING_ONLY */
-#ifdef FLOATING_POINT
+#if defined (FLOATING_POINT) || defined (_WANT_IO_C99_FORMATS)
# include <locale.h>
+#endif
+#ifdef FLOATING_POINT
# include <math.h>
/* For %La, an exponent of 15 bits occupies the exponent character, a
@@ -249,8 +251,16 @@ static int wexponent(wchar_t *, int, int
reentrant storage shared with mprec. All other formats that use
buf get by with fewer characters. Making BUF slightly bigger
reduces the need for malloc in %.*a and %ls/%S, when large precision or
- long strings are processed. */
+ long strings are processed.
+ The bigger size of 100 bytes is used on systems which allow number
+ strings using the locale's grouping character. Since that's a multibyte
+ value, we should use a conservative value.
+ */
+#ifdef _WANT_IO_C99_FORMATS
+#define BUF 100
+#else
#define BUF 40
+#endif
#if defined _MB_CAPABLE && MB_LEN_MAX > BUF
# undef BUF
# define BUF MB_LEN_MAX
@@ -336,6 +346,9 @@ _EXFUN(get_arg, (struct _reent *data, in
#else /* define as 0, to make SARG and UARG occupy fewer instructions */
# define CHARINT 0
#endif
+#ifdef _WANT_IO_C99_FORMATS
+# define GROUPING 0x400 /* use grouping ("'" flag) */
+#endif
#ifndef STRING_ONLY
int
@@ -378,19 +391,31 @@ _DEFUN(_VFWPRINTF_R, (data, fp, fmt0, ap
int width; /* width from format (%8d), or 0 */
int prec; /* precision from format (%.3d), or -1 */
wchar_t sign; /* sign prefix (' ', '+', '-', or \0) */
-#ifdef FLOATING_POINT
- wchar_t decimal_point;
+#ifdef _WANT_IO_C99_FORMATS
+ /* locale specific numeric grouping */
+ wchar_t thousands_sep;
+ const char *grouping;
+#endif
#ifdef _MB_CAPABLE
mbstate_t state; /* mbtowc calls from library must not change state */
#endif
+#ifdef FLOATING_POINT
+ wchar_t decimal_point;
wchar_t softsign; /* temporary negative sign for floats */
union { int i; _PRINTF_FLOAT_TYPE fp; } _double_ = {0};
# define _fpvalue (_double_.fp)
int expt; /* integer value of exponent */
int expsize = 0; /* character count for expstr */
- int ndig = 0; /* actual number of digits returned by wcvt */
wchar_t expstr[MAXEXPLEN]; /* buffer for exponent string */
+ int lead; /* sig figs before decimal or group sep */
#endif /* FLOATING_POINT */
+#if defined (FLOATING_POINT) || defined (_WANT_IO_C99_FORMATS)
+ int ndig = 0; /* actual number of digits returned by cvt */
+#endif
+#ifdef _WANT_IO_C99_FORMATS
+ int nseps; /* number of group separators with ' */
+ int nrepeats; /* number of repeats of the last group */
+#endif
u_quad_t _uquad; /* integer arguments %[diouxX] */
enum { OCT, DEC, HEX } base;/* base for [diouxX] conversion */
int dprec; /* a copy of prec if [diouxX], 0 otherwise */
@@ -419,9 +444,16 @@ _DEFUN(_VFWPRINTF_R, (data, fp, fmt0, ap
#ifdef FLOATING_POINT
#ifdef _MB_CAPABLE
- memset (&state, '\0', sizeof (state));
- _mbrtowc_r (data, &decimal_point, _localeconv_r (data)->decimal_point,
- MB_CUR_MAX, &state);
+ {
+ size_t nconv;
+
+ memset (&state, '\0', sizeof (state));
+ nconv = _mbrtowc_r (data, &decimal_point,
+ _localeconv_r (data)->decimal_point,
+ MB_CUR_MAX, &state);
+ if (nconv == (size_t) -1 || nconv == (size_t) -2)
+ decimal_point = L'.';
+ }
#else
decimal_point = (wchar_t) *_localeconv_r (data)->decimal_point;
#endif
@@ -449,6 +481,14 @@ _DEFUN(_VFWPRINTF_R, (data, fp, fmt0, ap
PRINT (with, n); \
} \
}
+#define PRINTANDPAD(p, ep, len, with) { \
+ int n = (ep) - (p); \
+ if (n > (len)) \
+ n = (len); \
+ if (n > 0) \
+ PRINT((p), n); \
+ PAD((len) - (n > 0 ? n : 0), (with)); \
+}
#define FLUSH() { \
if (uio.uio_resid && __SPRINT(data, fp, &uio)) \
goto error; \
@@ -570,6 +610,12 @@ _DEFUN(_VFWPRINTF_R, (data, fp, fmt0, ap
width = 0;
prec = -1;
sign = L'\0';
+#ifdef FLOATING_POINT
+ lead = 0;
+#endif
+#ifdef _WANT_IO_C99_FORMATS
+ nseps = nrepeats = 0;
+#endif
#ifndef _NO_POS_ARGS
N = arg_index;
is_pos_arg = 0;
@@ -579,8 +625,23 @@ rflag: ch = *fmt++;
reswitch: switch (ch) {
#ifdef _WANT_IO_C99_FORMATS
case L'\'':
- /* The ' flag is required by POSIX, but not C99.
- FIXME: this flag is currently a no-op. */
+#ifdef _MB_CAPABLE
+ {
+ size_t nconv;
+
+ memset (&state, '\0', sizeof (state));
+ nconv = _mbrtowc_r (data, &thousands_sep,
+ _localeconv_r (data)->thousands_sep,
+ MB_CUR_MAX, &state);
+ if (nconv == (size_t) -1 || nconv == (size_t) -2)
+ thousands_sep = L'\0';
+ }
+#else
+ thousands_sep = (wchar_t) *_localeconv_r(data)->thousands_sep;
+#endif
+ grouping = _localeconv_r (data)->grouping;
+ if (thousands_sep && grouping && *grouping)
+ flags |= GROUPING;
goto rflag;
#endif
case L' ':
@@ -942,23 +1003,46 @@ reswitch: switch (ch) {
size = expsize + ndig;
if (ndig > 1 || flags & ALT)
++size;
- } else if (ch == L'f') { /* f fmt */
- if (expt > 0) {
+# ifdef _WANT_IO_C99_FORMATS
+ flags &= ~GROUPING;
+# endif
+ } else {
+ if (ch == L'f') { /* f fmt */
+ if (expt > 0) {
+ size = expt;
+ if (prec || flags & ALT)
+ size += prec + 1;
+ } else /* "0.X" */
+ size = (prec || flags & ALT)
+ ? prec + 2
+ : 1;
+ } else if (expt >= ndig) { /* fixed g fmt */
size = expt;
- if (prec || flags & ALT)
- size += prec + 1;
- } else /* "0.X" */
- size = (prec || flags & ALT)
- ? prec + 2
- : 1;
- } else if (expt >= ndig) { /* fixed g fmt */
- size = expt;
- if (flags & ALT)
- ++size;
- } else
- size = ndig + (expt > 0 ?
- 1 : 2 - expt);
-
+ if (flags & ALT)
+ ++size;
+ } else
+ size = ndig + (expt > 0 ?
+ 1 : 2 - expt);
+# ifdef _WANT_IO_C99_FORMATS
+ if ((flags & GROUPING) && expt > 0) {
+ /* space for thousands' grouping */
+ nseps = nrepeats = 0;
+ lead = expt;
+ while (*grouping != CHAR_MAX) {
+ if (lead <= *grouping)
+ break;
+ lead -= *grouping;
+ if (grouping[1]) {
+ nseps++;
+ grouping++;
+ } else
+ nrepeats++;
+ }
+ size += nseps + nrepeats;
+ } else
+# endif
+ lead = expt;
+ }
if (softsign)
sign = L'-';
break;
@@ -983,6 +1067,9 @@ reswitch: switch (ch) {
case L'o':
_uquad = UARG ();
base = OCT;
+#ifdef _WANT_IO_C99_FORMATS
+ flags &= ~GROUPING;
+#endif
goto nosign;
case L'p':
/*
@@ -1106,6 +1193,9 @@ hex: _uquad = UARG ();
flags |= HEXPREFIX;
}
+#ifdef _WANT_IO_C99_FORMATS
+ flags &= ~GROUPING;
+#endif
/* unsigned conversions */
nosign: sign = L'\0';
/*
@@ -1141,11 +1231,35 @@ number: if ((dprec = prec) >= 0)
case DEC:
/* many numbers are 1 digit */
- while (_uquad >= 10) {
- *--cp = to_char (_uquad % 10);
- _uquad /= 10;
+ if (_uquad < 10) {
+ *--cp = to_char(_uquad);
+ break;
}
- *--cp = to_char (_uquad);
+#ifdef _WANT_IO_C99_FORMATS
+ ndig = 0;
+#endif
+ do {
+ *--cp = to_char (_uquad % 10);
+#ifdef _WANT_IO_C99_FORMATS
+ ndig++;
+ /* If (*grouping == CHAR_MAX) then no
+ more grouping */
+ if ((flags & GROUPING)
+ && ndig == *grouping
+ && *grouping != CHAR_MAX
+ && _uquad > 9) {
+ *--cp = thousands_sep;
+ ndig = 0;
+ /* If (grouping[1] == '\0') then we
+ have to use *grouping character
+ (last grouping rule) for all
+ next cases. */
+ if (grouping[1] != '\0')
+ grouping++;
+ }
+#endif
+ _uquad /= 10;
+ } while (_uquad != 0);
break;
case HEX:
@@ -1245,17 +1359,35 @@ number: if ((dprec = prec) >= 0)
PAD (-expt, zeroes);
PRINT (cp, ndig);
}
- } else if (expt >= ndig) {
- PRINT (cp, ndig);
- PAD (expt - ndig, zeroes);
- if (flags & ALT)
- PRINT (&decimal_point, 1);
} else {
- PRINT (cp, expt);
- cp += expt;
- PRINT (&decimal_point, 1);
- PRINT (cp, ndig - expt);
+ wchar_t *convbuf = cp;
+ PRINTANDPAD(cp, convbuf + ndig,
+ lead, zeroes);
+ cp += lead;
+#ifdef _WANT_IO_C99_FORMATS
+ if (flags & GROUPING) {
+ while (nseps > 0 || nrepeats > 0) {
+ if (nrepeats > 0)
+ nrepeats--;
+ else {
+ grouping--;
+ nseps--;
+ }
+ PRINT (&thousands_sep, 1);
+ PRINTANDPAD (cp, convbuf + ndig,
+ *grouping, zeroes);
+ cp += *grouping;
+ }
+ if (cp > convbuf + ndig)
+ cp = convbuf + ndig;
+ }
+#endif
+ if (prec || flags & ALT)
+ PRINT (&decimal_point, 1);
+ PRINTANDPAD (cp, convbuf + ndig,
+ ndig - expt, zeroes);
}
+
} else { /* 'a', 'A', 'e', or 'E' */
if (ndig > 1 || flags & ALT) {
PRINT (cp, 1);
--
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat
More information about the Newlib
mailing list