[PATCH] handle multibyte decimalpoint
Corinna Vinschen
vinschen@redhat.com
Tue Jun 16 15:56:00 GMT 2009
Ping?
On Jun 8 14:38, Corinna Vinschen wrote:
> Hi,
>
>
> This patch sort of got lost. I'd like to suggest (in part again) the
> below patch to vfprintf, strtod, wcstod, and gethex, to make the
> handling of the radix character multibyte aware.
>
> The decimal point in vfprintf is now printed with the correct number of
> bytes, not just assuming it's 1 byte long.
>
> I changed strtod and wcstod accordingly to cope correctly with multibyte
> decimal points.
>
> The patch to gethex is maintaining the right to left reading order in
> the main loop, while allowing to specify a multibyte decimal point. In
> the orginal code, decpt is pointing to the character succeeding the
> decimal point. The pointer s is always >= decpt. The below code also
> sets decpt to the next char succeeding the multibyte decimal point and s
> is still >= decpt. So the calculation of e is still the same as in the
> original code.
> The main loop now checks for the trailing byte in the decimal point char
> and if it hits that char, it first checks if the strcmp would still be
> within the s0 margin. If the decimal point has been detected, s1 is set
> back to the leading byte of the decimal point character. Then the loop
> continues with the next *--s1 which points to the next digit preceeding
> the decimal point.
>
> Additionally I removed the USE_LOCALE define in gdtoa-gethex.c and
> strtod.c. It's not checked or set anywhere in newlib's configury and
> only used in these two files. However, _localeconv_r and the lconv
> structure are defined on all systems anyway.
>
> I tested this patch on Cygwin with singlebyte, doublebyte, and
> triplebyte radix chars. Note that you can only perform this test when
> you de-constify struct lconv in libc/locale/locale.c. Otherwise you
> just can't substitute the decimal_point element without a segfault.
> However, I didn't apply this change. It doesn't make sense as long as
> newlib doesn't really support setting of the LC_NUMERIC locale category.
>
> wcstod obviously only works correctly if the multibyte radix character
> is a *valid* multibyte character. Using something like "$%" for testing
> will work with strtod, but wcstod will choke on the non-convertible
> character for hopefully obvious reasons.
>
>
> Corinna
>
>
> * libc/stdio/vfprintf.c (_VFPRINTF_R): Use actual length of
> radix char instead of assuming length 1.
> * libc/stdlib/gdtoa-gethex.c: Remove use of USE_LOCALE.
> (gethex): Allow multibyte decimal point.
> Fix compiler warnings due to different signedness of pointer types.
> * libc/stdlib/strtod.c: Remove use of USE_LOCALE.
> (_strtod_r): Allow multibyte decimal point.
> * libc/stdlib/wcstod.c (_wcstod_r): Evaluate correct wide char
> endptr position if the decimal point is a multibyte char.
>
>
> Index: libc/stdio/vfprintf.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/stdio/vfprintf.c,v
> retrieving revision 1.74
> diff -u -p -r1.74 vfprintf.c
> --- libc/stdio/vfprintf.c 11 Mar 2009 11:53:22 -0000 1.74
> +++ libc/stdio/vfprintf.c 8 Jun 2009 12:02:52 -0000
> @@ -553,6 +553,7 @@ _DEFUN(_VFPRINTF_R, (data, fp, fmt0, ap)
> char sign; /* sign prefix (' ', '+', '-', or \0) */
> #ifdef FLOATING_POINT
> char *decimal_point = _localeconv_r (data)->decimal_point;
> + size_t decp_len = strlen (decimal_point);
> char softsign; /* temporary negative sign for floats */
> union { int i; _PRINTF_FLOAT_TYPE fp; } _double_ = {0};
> # define _fpvalue (_double_.fp)
> @@ -1441,13 +1442,13 @@ number: if ((dprec = prec) >= 0)
> /* kludge for __dtoa irregularity */
> PRINT ("0", 1);
> if (expt < ndig || flags & ALT) {
> - PRINT (decimal_point, 1);
> + PRINT (decimal_point, decp_len);
> PAD (ndig - 1, zeroes);
> }
> } else if (expt <= 0) {
> PRINT ("0", 1);
> if (expt || ndig || flags & ALT) {
> - PRINT (decimal_point, 1);
> + PRINT (decimal_point, decp_len);
> PAD (-expt, zeroes);
> PRINT (cp, ndig);
> }
> @@ -1455,18 +1456,18 @@ number: if ((dprec = prec) >= 0)
> PRINT (cp, ndig);
> PAD (expt - ndig, zeroes);
> if (flags & ALT)
> - PRINT (decimal_point, 1);
> + PRINT (decimal_point, decp_len);
> } else {
> PRINT (cp, expt);
> cp += expt;
> - PRINT (decimal_point, 1);
> + PRINT (decimal_point, decp_len);
> PRINT (cp, ndig - expt);
> }
> } else { /* 'a', 'A', 'e', or 'E' */
> if (ndig > 1 || flags & ALT) {
> PRINT (cp, 1);
> cp++;
> - PRINT (decimal_point, 1);
> + PRINT (decimal_point, decp_len);
> if (_fpvalue) {
> PRINT (cp, ndig - 1);
> } else /* 0.[0..] */
> Index: libc/stdlib/gdtoa-gethex.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/stdlib/gdtoa-gethex.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 gdtoa-gethex.c
> --- libc/stdlib/gdtoa-gethex.c 26 Mar 2009 10:04:40 -0000 1.3
> +++ libc/stdlib/gdtoa-gethex.c 8 Jun 2009 12:02:52 -0000
> @@ -35,10 +35,7 @@ THIS SOFTWARE.
> #include "mprec.h"
> #include "gdtoa.h"
> #include "gd_qnan.h"
> -
> -#ifdef USE_LOCALE
> #include "locale.h"
> -#endif
>
> unsigned char hexdig[256];
>
> @@ -151,11 +148,10 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
> int esign, havedig, irv, k, n, nbits, up, zret;
> __ULong L, lostbits, *x;
> Long e, e1;
> -#ifdef USE_LOCALE
> - unsigned char decimalpoint = *localeconv()->decimal_point;
> -#else
> -#define decimalpoint '.'
> -#endif
> + unsigned char *decimalpoint = (unsigned char *)
> + _localeconv_r (ptr)->decimal_point;
> + size_t decp_len = strlen ((const char *) decimalpoint);
> + unsigned char decp_end = decimalpoint[decp_len - 1];
>
> if (!hexdig['0'])
> hexdig_init();
> @@ -170,9 +166,10 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
> e = 0;
> if (!hexdig[*s]) {
> zret = 1;
> - if (*s != decimalpoint)
> + if (strncmp ((const char *) s, (const char *) decimalpoint,
> + decp_len) != 0)
> goto pcheck;
> - decpt = ++s;
> + decpt = (s += decp_len);
> if (!hexdig[*s])
> goto pcheck;
> while(*s == '0')
> @@ -184,8 +181,10 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
> }
> while(hexdig[*s])
> s++;
> - if (*s == decimalpoint && !decpt) {
> - decpt = ++s;
> + if (strncmp ((const char *) s, (const char *) decimalpoint,
> + decp_len) == 0
> + && !decpt) {
> + decpt = (s += decp_len);
> while(hexdig[*s])
> s++;
> }
> @@ -226,8 +225,12 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
> n = 0;
> L = 0;
> while(s1 > s0) {
> - if (*--s1 == decimalpoint)
> + if (*--s1 == decp_end && s1 - decp_len + 1 >= s0
> + && strncmp ((const char *) s1 - decp_len + 1,
> + (const char *) decimalpoint, decp_len) == 0) {
> + s1 -= decp_len - 1; /* Note the --s1 above! */
> continue;
> + }
> if (n == 32) {
> *x++ = L;
> L = 0;
> Index: libc/stdlib/strtod.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/stdlib/strtod.c,v
> retrieving revision 1.14
> diff -u -p -r1.14 strtod.c
> --- libc/stdlib/strtod.c 26 Mar 2009 10:04:40 -0000 1.14
> +++ libc/stdlib/strtod.c 8 Jun 2009 12:02:52 -0000
> @@ -122,9 +122,7 @@ THIS SOFTWARE.
> /* #include <fenv.h> */
> /* #endif */
>
> -#ifdef USE_LOCALE
> #include "locale.h"
> -#endif
>
> #ifdef IEEE_Arith
> #ifndef NO_IEEE_Scale
> @@ -307,14 +305,11 @@ _DEFUN (_strtod_r, (ptr, s00, se),
> else if (nd < 16)
> z = 10*z + c - '0';
> nd0 = nd;
> -#ifdef USE_LOCALE
> - if (c == *localeconv()->decimal_point)
> -#else
> - if (c == '.')
> -#endif
> + if (strncmp (s, _localeconv_r (ptr)->decimal_point,
> + strlen (_localeconv_r (ptr)->decimal_point)) == 0)
> {
> decpt = 1;
> - c = *++s;
> + c = *(s += strlen (_localeconv_r (ptr)->decimal_point));
> if (!nd) {
> for(; c == '0'; c = *++s)
> nz++;
> Index: libc/stdlib/wcstod.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/stdlib/wcstod.c,v
> retrieving revision 1.5
> diff -u -p -r1.5 wcstod.c
> --- libc/stdlib/wcstod.c 26 Mar 2009 10:04:40 -0000 1.5
> +++ libc/stdlib/wcstod.c 8 Jun 2009 12:02:52 -0000
> @@ -116,8 +116,10 @@ Supporting OS subroutines required: <<cl
> #include <_ansi.h>
> #include <errno.h>
> #include <stdlib.h>
> +#include <string.h>
> #include <wchar.h>
> #include <wctype.h>
> +#include <locale.h>
> #include <math.h>
>
> double
> @@ -167,9 +169,26 @@ _DEFUN (_wcstod_r, (ptr, nptr, endptr),
> * where it ended, count multibyte characters to find the
> * corresponding position in the wide char string.
> */
> - if (endptr != NULL)
> - /* XXX Assume each wide char is one byte. */
> + if (endptr != NULL) {
> + /* The only valid multibyte char in a float converted by
> + strtod/wcstod is the radix char. What we do here is,
> + figure out if the radix char was in the valid leading
> + float sequence in the incoming string. If so, the
> + multibyte float string is strlen(radix char) - 1 bytes
> + longer than the incoming wide char string has characters.
> + To fix endptr, reposition end as if the radix char was
> + just one byte long. The resulting difference (end - buf)
> + is then equivalent to the number of valid wide characters
> + in the input string. */
> + len = strlen (_localeconv_r (ptr)->decimal_point);
> + if (len > 1) {
> + char *d = strstr (buf,
> + _localeconv_r (ptr)->decimal_point);
> + if (d && d < end)
> + end -= len - 1;
> + }
> *endptr = (wchar_t *)nptr + (end - buf);
> + }
>
> _free_r(ptr, buf);
>
--
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat
More information about the Newlib
mailing list