This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] *scanf: Allow language-dependent radix character


Hi,


if possible, I'd like to have this patch in this year's newlib snapshot,
but I'd prefer to have another set of eyes having a good look.

For quite some time, newlib's *printf family of functions prints the
radix character in a language-dependent way, as required by POSIX.

Unfortunately the *scanf family of functions neglected to do the same
for input strings, even though required by POSIX, too.

So I created a patch to handle the radix char language-dependent in
vfscanf.c and fvwscanf.c.  The only code difference in vfwscanf and in
the !_MB_CAPABLE case of vfscanf is that, rather then testing for the
constant '.', I added a default case and check for the decimal_point
character from the locale information, which is always available
anyway, see vfprintf.c.

The _MB_CAPABLE case in vfprintf is a bit more tricky.  It reads one byte
of the radix char at a time, but it does not push them to the output
buffer.  Rather, it collects all bytes of the multibyte char and only if
the character is complete, it pushes the whole multibyte char to the output
buffer.  If the input loop ends with an incomplete radix multibyte sequence,
the whole sequence is ungetc'ed and treated as the end of the floating point
input.

If you wonder about the (unsigned char) casts, these are necessary
because the variable 'c' is of type signed int, which breaks the
comparison for non-ASCII char values without the cast.

The code creating the decpt value at the start of vfwscanf is the same
code as in vfwprintf.


Thanks for any review,
Corinna


	* vfscanf.c (__SVFSCANF_R): Handle radix char language-dependent
	per POSIX.
	(__SVFWSCANF_R): Ditto.


Index: libc/stdio/vfscanf.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdio/vfscanf.c,v
retrieving revision 1.54
diff -u -p -r1.54 vfscanf.c
--- libc/stdio/vfscanf.c	29 Apr 2013 21:06:23 -0000	1.54
+++ libc/stdio/vfscanf.c	17 Dec 2013 21:51:21 -0000
@@ -162,6 +162,7 @@ Supporting OS subroutines required:
 #ifdef FLOATING_POINT
 #include <math.h>
 #include <float.h>
+#include <locale.h>
 
 /* Currently a test is made to see if long double processing is warranted.
    This could be changed in the future should the _ldtoa_r code be
@@ -1288,6 +1289,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 	  unsigned width_left = 0;
 	  char nancount = 0;
 	  char infcount = 0;
+	  const char *decpt = _localeconv_r (rptr)->decimal_point;
+#ifdef _MB_CAPABLE
+	  int decptpos = 0;
+#endif
 #ifdef hardway
 	  if (width == 0 || width > sizeof (buf) - 1)
 #else
@@ -1416,14 +1421,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 		      goto fok;
 		    }
 		  break;
-		case '.':
-		  if (flags & DPTOK)
-		    {
-		      flags &= ~(SIGNOK | DPTOK);
-		      leading_zeroes = zeroes;
-		      goto fok;
-		    }
-		  break;
 		case 'e':
 		case 'E':
 		  /* no exponent without some digits */
@@ -1442,6 +1439,52 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 		      goto fok;
 		    }
 		  break;
+		default:
+#ifndef _MB_CAPABLE
+		  if ((unsigned char) c == (unsigned char) decpt[0]
+		      && (flags & DPTOK))
+		    {
+		      flags &= ~(SIGNOK | DPTOK);
+		      leading_zeroes = zeroes;
+		      goto fok;
+		    }
+		  break;
+#else
+		  if (flags & DPTOK)
+		    {
+		      while ((unsigned char) c
+			     == (unsigned char) decpt[decptpos])
+			{
+			  if (decpt[++decptpos] == '\0')
+			    {
+			      /* We read the complete decpt seq. */
+			      flags &= ~(SIGNOK | DPTOK);
+			      leading_zeroes = zeroes;
+			      p = stpncpy (p, decpt, decptpos);
+			      decptpos = 0;
+			      goto fskip;
+			    }
+			  ++nread;
+			  if (--fp->_r > 0)
+			    fp->_p++;
+			  else if (__srefill_r (rptr, fp))
+			    break;		/* EOF */
+			  c = *fp->_p;
+			}
+		      if (decptpos > 0)
+			{
+			  /* We read part of a multibyte decimal point,
+			     but the rest is invalid or we're at EOF,
+			     so back off. */
+			  while (decptpos-- > 0)
+			    {
+			      _ungetc_r (rptr, decpt[decptpos], fp);
+			      --nread;
+			    }
+			}
+		    }
+		  break;
+#endif
 		}
 	      break;
 	    fok:
Index: libc/stdio/vfwscanf.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdio/vfwscanf.c,v
retrieving revision 1.5
diff -u -p -r1.5 vfwscanf.c
--- libc/stdio/vfwscanf.c	26 Nov 2013 17:21:00 -0000	1.5
+++ libc/stdio/vfwscanf.c	17 Dec 2013 21:51:22 -0000
@@ -161,6 +161,9 @@ C99, POSIX-1.2008
 #ifdef FLOATING_POINT
 #include <math.h>
 #include <float.h>
+#ifdef __HAVE_LOCALE_INFO_EXTENDED__
+#include "../locale/lnumeric.h"
+#endif
 
 /* Currently a test is made to see if long double processing is warranted.
    This could be changed in the future should the _ldtoa_r code be
@@ -414,6 +417,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, a
   float *flp;
   _LONG_DOUBLE *ldp;
   double *dp;
+  wchar_t decpt;
 #endif
   long *lp;
 #ifndef _NO_LONGLONG
@@ -440,6 +444,27 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, a
 # define GET_ARG(n, ap, type) (va_arg (ap, type))
 #endif
 
+#ifdef FLOATING_POINT
+#ifdef _MB_CAPABLE
+#ifdef __HAVE_LOCALE_INFO_EXTENDED__
+	  decpt = *__get_current_numeric_locale ()->wdecimal_point;
+#else
+	  {
+	    size_t nconv;
+
+	    memset (&state, '\0', sizeof (state));
+	    nconv = _mbrtowc_r (data, &decpt,
+				_localeconv_r (data)->decimal_point,
+				MB_CUR_MAX, &state);
+	    if (nconv == (size_t) -1 || nconv == (size_t) -2)
+	      decpt = L'.';
+	  }
+#endif /* !__HAVE_LOCALE_INFO_EXTENDED__ */
+#else
+	  decpt = (wchar_t) *_localeconv_r (data)->decimal_point;
+#endif /* !_MB_CAPABLE */
+#endif /* FLOATING_POINT */
+
   _newlib_flockfile_start (fp);
 
   ORIENT (fp, 1);
@@ -1271,14 +1296,6 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, a
 		      goto fok;
 		    }
 		  break;
-		case L'.':
-		  if (flags & DPTOK)
-		    {
-		      flags &= ~(SIGNOK | DPTOK);
-		      leading_zeroes = zeroes;
-		      goto fok;
-		    }
-		  break;
 		case L'e':
 		case L'E':
 		  /* no exponent without some digits */
@@ -1297,6 +1314,14 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, a
 		      goto fok;
 		    }
 		  break;
+		default:
+		  if ((wchar_t) c == decpt && (flags & DPTOK))
+		    {
+		      flags &= ~(SIGNOK | DPTOK);
+		      leading_zeroes = zeroes;
+		      goto fok;
+		    }
+		  break;
 		}
 	      if (c != WEOF)
 		_ungetwc_r (rptr, c, fp);


-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat

Attachment: pgp4czTFFPDFj.pgp
Description: PGP signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]