[PATCH] handle multibyte decimalpoint
Jeff Johnston
jjohnstn@redhat.com
Tue Jun 16 16:09:00 GMT 2009
Ok to check-in. It would be nice if the decimal point length was stored
somewhere so we
didn't have to constantly calculate it on every call.
-- Jeff J.
Corinna Vinschen wrote:
> Ping?
>
> On Jun 8 14:38, Corinna Vinschen wrote:
>
>> Hi,
>>
>>
>> This patch sort of got lost. I'd like to suggest (in part again) the
>> below patch to vfprintf, strtod, wcstod, and gethex, to make the
>> handling of the radix character multibyte aware.
>>
>> The decimal point in vfprintf is now printed with the correct number of
>> bytes, not just assuming it's 1 byte long.
>>
>> I changed strtod and wcstod accordingly to cope correctly with multibyte
>> decimal points.
>>
>> The patch to gethex is maintaining the right to left reading order in
>> the main loop, while allowing to specify a multibyte decimal point. In
>> the orginal code, decpt is pointing to the character succeeding the
>> decimal point. The pointer s is always >= decpt. The below code also
>> sets decpt to the next char succeeding the multibyte decimal point and s
>> is still >= decpt. So the calculation of e is still the same as in the
>> original code.
>> The main loop now checks for the trailing byte in the decimal point char
>> and if it hits that char, it first checks if the strcmp would still be
>> within the s0 margin. If the decimal point has been detected, s1 is set
>> back to the leading byte of the decimal point character. Then the loop
>> continues with the next *--s1 which points to the next digit preceeding
>> the decimal point.
>>
>> Additionally I removed the USE_LOCALE define in gdtoa-gethex.c and
>> strtod.c. It's not checked or set anywhere in newlib's configury and
>> only used in these two files. However, _localeconv_r and the lconv
>> structure are defined on all systems anyway.
>>
>> I tested this patch on Cygwin with singlebyte, doublebyte, and
>> triplebyte radix chars. Note that you can only perform this test when
>> you de-constify struct lconv in libc/locale/locale.c. Otherwise you
>> just can't substitute the decimal_point element without a segfault.
>> However, I didn't apply this change. It doesn't make sense as long as
>> newlib doesn't really support setting of the LC_NUMERIC locale category.
>>
>> wcstod obviously only works correctly if the multibyte radix character
>> is a *valid* multibyte character. Using something like "$%" for testing
>> will work with strtod, but wcstod will choke on the non-convertible
>> character for hopefully obvious reasons.
>>
>>
>> Corinna
>>
>>
>> * libc/stdio/vfprintf.c (_VFPRINTF_R): Use actual length of
>> radix char instead of assuming length 1.
>> * libc/stdlib/gdtoa-gethex.c: Remove use of USE_LOCALE.
>> (gethex): Allow multibyte decimal point.
>> Fix compiler warnings due to different signedness of pointer types.
>> * libc/stdlib/strtod.c: Remove use of USE_LOCALE.
>> (_strtod_r): Allow multibyte decimal point.
>> * libc/stdlib/wcstod.c (_wcstod_r): Evaluate correct wide char
>> endptr position if the decimal point is a multibyte char.
>>
>>
>> Index: libc/stdio/vfprintf.c
>> ===================================================================
>> RCS file: /cvs/src/src/newlib/libc/stdio/vfprintf.c,v
>> retrieving revision 1.74
>> diff -u -p -r1.74 vfprintf.c
>> --- libc/stdio/vfprintf.c 11 Mar 2009 11:53:22 -0000 1.74
>> +++ libc/stdio/vfprintf.c 8 Jun 2009 12:02:52 -0000
>> @@ -553,6 +553,7 @@ _DEFUN(_VFPRINTF_R, (data, fp, fmt0, ap)
>> char sign; /* sign prefix (' ', '+', '-', or \0) */
>> #ifdef FLOATING_POINT
>> char *decimal_point = _localeconv_r (data)->decimal_point;
>> + size_t decp_len = strlen (decimal_point);
>> char softsign; /* temporary negative sign for floats */
>> union { int i; _PRINTF_FLOAT_TYPE fp; } _double_ = {0};
>> # define _fpvalue (_double_.fp)
>> @@ -1441,13 +1442,13 @@ number: if ((dprec = prec) >= 0)
>> /* kludge for __dtoa irregularity */
>> PRINT ("0", 1);
>> if (expt < ndig || flags & ALT) {
>> - PRINT (decimal_point, 1);
>> + PRINT (decimal_point, decp_len);
>> PAD (ndig - 1, zeroes);
>> }
>> } else if (expt <= 0) {
>> PRINT ("0", 1);
>> if (expt || ndig || flags & ALT) {
>> - PRINT (decimal_point, 1);
>> + PRINT (decimal_point, decp_len);
>> PAD (-expt, zeroes);
>> PRINT (cp, ndig);
>> }
>> @@ -1455,18 +1456,18 @@ number: if ((dprec = prec) >= 0)
>> PRINT (cp, ndig);
>> PAD (expt - ndig, zeroes);
>> if (flags & ALT)
>> - PRINT (decimal_point, 1);
>> + PRINT (decimal_point, decp_len);
>> } else {
>> PRINT (cp, expt);
>> cp += expt;
>> - PRINT (decimal_point, 1);
>> + PRINT (decimal_point, decp_len);
>> PRINT (cp, ndig - expt);
>> }
>> } else { /* 'a', 'A', 'e', or 'E' */
>> if (ndig > 1 || flags & ALT) {
>> PRINT (cp, 1);
>> cp++;
>> - PRINT (decimal_point, 1);
>> + PRINT (decimal_point, decp_len);
>> if (_fpvalue) {
>> PRINT (cp, ndig - 1);
>> } else /* 0.[0..] */
>> Index: libc/stdlib/gdtoa-gethex.c
>> ===================================================================
>> RCS file: /cvs/src/src/newlib/libc/stdlib/gdtoa-gethex.c,v
>> retrieving revision 1.3
>> diff -u -p -r1.3 gdtoa-gethex.c
>> --- libc/stdlib/gdtoa-gethex.c 26 Mar 2009 10:04:40 -0000 1.3
>> +++ libc/stdlib/gdtoa-gethex.c 8 Jun 2009 12:02:52 -0000
>> @@ -35,10 +35,7 @@ THIS SOFTWARE.
>> #include "mprec.h"
>> #include "gdtoa.h"
>> #include "gd_qnan.h"
>> -
>> -#ifdef USE_LOCALE
>> #include "locale.h"
>> -#endif
>>
>> unsigned char hexdig[256];
>>
>> @@ -151,11 +148,10 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
>> int esign, havedig, irv, k, n, nbits, up, zret;
>> __ULong L, lostbits, *x;
>> Long e, e1;
>> -#ifdef USE_LOCALE
>> - unsigned char decimalpoint = *localeconv()->decimal_point;
>> -#else
>> -#define decimalpoint '.'
>> -#endif
>> + unsigned char *decimalpoint = (unsigned char *)
>> + _localeconv_r (ptr)->decimal_point;
>> + size_t decp_len = strlen ((const char *) decimalpoint);
>> + unsigned char decp_end = decimalpoint[decp_len - 1];
>>
>> if (!hexdig['0'])
>> hexdig_init();
>> @@ -170,9 +166,10 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
>> e = 0;
>> if (!hexdig[*s]) {
>> zret = 1;
>> - if (*s != decimalpoint)
>> + if (strncmp ((const char *) s, (const char *) decimalpoint,
>> + decp_len) != 0)
>> goto pcheck;
>> - decpt = ++s;
>> + decpt = (s += decp_len);
>> if (!hexdig[*s])
>> goto pcheck;
>> while(*s == '0')
>> @@ -184,8 +181,10 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
>> }
>> while(hexdig[*s])
>> s++;
>> - if (*s == decimalpoint && !decpt) {
>> - decpt = ++s;
>> + if (strncmp ((const char *) s, (const char *) decimalpoint,
>> + decp_len) == 0
>> + && !decpt) {
>> + decpt = (s += decp_len);
>> while(hexdig[*s])
>> s++;
>> }
>> @@ -226,8 +225,12 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
>> n = 0;
>> L = 0;
>> while(s1 > s0) {
>> - if (*--s1 == decimalpoint)
>> + if (*--s1 == decp_end && s1 - decp_len + 1 >= s0
>> + && strncmp ((const char *) s1 - decp_len + 1,
>> + (const char *) decimalpoint, decp_len) == 0) {
>> + s1 -= decp_len - 1; /* Note the --s1 above! */
>> continue;
>> + }
>> if (n == 32) {
>> *x++ = L;
>> L = 0;
>> Index: libc/stdlib/strtod.c
>> ===================================================================
>> RCS file: /cvs/src/src/newlib/libc/stdlib/strtod.c,v
>> retrieving revision 1.14
>> diff -u -p -r1.14 strtod.c
>> --- libc/stdlib/strtod.c 26 Mar 2009 10:04:40 -0000 1.14
>> +++ libc/stdlib/strtod.c 8 Jun 2009 12:02:52 -0000
>> @@ -122,9 +122,7 @@ THIS SOFTWARE.
>> /* #include <fenv.h> */
>> /* #endif */
>>
>> -#ifdef USE_LOCALE
>> #include "locale.h"
>> -#endif
>>
>> #ifdef IEEE_Arith
>> #ifndef NO_IEEE_Scale
>> @@ -307,14 +305,11 @@ _DEFUN (_strtod_r, (ptr, s00, se),
>> else if (nd < 16)
>> z = 10*z + c - '0';
>> nd0 = nd;
>> -#ifdef USE_LOCALE
>> - if (c == *localeconv()->decimal_point)
>> -#else
>> - if (c == '.')
>> -#endif
>> + if (strncmp (s, _localeconv_r (ptr)->decimal_point,
>> + strlen (_localeconv_r (ptr)->decimal_point)) == 0)
>> {
>> decpt = 1;
>> - c = *++s;
>> + c = *(s += strlen (_localeconv_r (ptr)->decimal_point));
>> if (!nd) {
>> for(; c == '0'; c = *++s)
>> nz++;
>> Index: libc/stdlib/wcstod.c
>> ===================================================================
>> RCS file: /cvs/src/src/newlib/libc/stdlib/wcstod.c,v
>> retrieving revision 1.5
>> diff -u -p -r1.5 wcstod.c
>> --- libc/stdlib/wcstod.c 26 Mar 2009 10:04:40 -0000 1.5
>> +++ libc/stdlib/wcstod.c 8 Jun 2009 12:02:52 -0000
>> @@ -116,8 +116,10 @@ Supporting OS subroutines required: <<cl
>> #include <_ansi.h>
>> #include <errno.h>
>> #include <stdlib.h>
>> +#include <string.h>
>> #include <wchar.h>
>> #include <wctype.h>
>> +#include <locale.h>
>> #include <math.h>
>>
>> double
>> @@ -167,9 +169,26 @@ _DEFUN (_wcstod_r, (ptr, nptr, endptr),
>> * where it ended, count multibyte characters to find the
>> * corresponding position in the wide char string.
>> */
>> - if (endptr != NULL)
>> - /* XXX Assume each wide char is one byte. */
>> + if (endptr != NULL) {
>> + /* The only valid multibyte char in a float converted by
>> + strtod/wcstod is the radix char. What we do here is,
>> + figure out if the radix char was in the valid leading
>> + float sequence in the incoming string. If so, the
>> + multibyte float string is strlen(radix char) - 1 bytes
>> + longer than the incoming wide char string has characters.
>> + To fix endptr, reposition end as if the radix char was
>> + just one byte long. The resulting difference (end - buf)
>> + is then equivalent to the number of valid wide characters
>> + in the input string. */
>> + len = strlen (_localeconv_r (ptr)->decimal_point);
>> + if (len > 1) {
>> + char *d = strstr (buf,
>> + _localeconv_r (ptr)->decimal_point);
>> + if (d && d < end)
>> + end -= len - 1;
>> + }
>> *endptr = (wchar_t *)nptr + (end - buf);
>> + }
>>
>> _free_r(ptr, buf);
>>
>>
>
>
More information about the Newlib
mailing list