This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch] *scanf: Allow language-dependent radix character


In addition to any else's comments, one issue I would note is that the value of
BUF, which is used as the size of buf for floating point, assumes a single character decimal point.
I believe that should be incremented by MB_LEN_MAX in the MB case to compensate for the decimal point.

A minor issue is that _ungetc_r can possibly fail and there is no check.

-- Jeff J.

----- Original Message -----
From: "Corinna Vinschen" <vinschen@redhat.com>
To: newlib@sourceware.org
Sent: Tuesday, December 17, 2013 4:54:15 PM
Subject: [patch] *scanf: Allow language-dependent radix character

Hi,


if possible, I'd like to have this patch in this year's newlib snapshot,
but I'd prefer to have another set of eyes having a good look.

For quite some time, newlib's *printf family of functions prints the
radix character in a language-dependent way, as required by POSIX.

Unfortunately the *scanf family of functions neglected to do the same
for input strings, even though required by POSIX, too.

So I created a patch to handle the radix char language-dependent in
vfscanf.c and fvwscanf.c.  The only code difference in vfwscanf and in
the !_MB_CAPABLE case of vfscanf is that, rather then testing for the
constant '.', I added a default case and check for the decimal_point
character from the locale information, which is always available
anyway, see vfprintf.c.

The _MB_CAPABLE case in vfprintf is a bit more tricky.  It reads one byte
of the radix char at a time, but it does not push them to the output
buffer.  Rather, it collects all bytes of the multibyte char and only if
the character is complete, it pushes the whole multibyte char to the output
buffer.  If the input loop ends with an incomplete radix multibyte sequence,
the whole sequence is ungetc'ed and treated as the end of the floating point
input.

If you wonder about the (unsigned char) casts, these are necessary
because the variable 'c' is of type signed int, which breaks the
comparison for non-ASCII char values without the cast.

The code creating the decpt value at the start of vfwscanf is the same
code as in vfwprintf.


Thanks for any review,
Corinna


	* vfscanf.c (__SVFSCANF_R): Handle radix char language-dependent
	per POSIX.
	(__SVFWSCANF_R): Ditto.


Index: libc/stdio/vfscanf.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdio/vfscanf.c,v
retrieving revision 1.54
diff -u -p -r1.54 vfscanf.c
--- libc/stdio/vfscanf.c	29 Apr 2013 21:06:23 -0000	1.54
+++ libc/stdio/vfscanf.c	17 Dec 2013 21:51:21 -0000
@@ -162,6 +162,7 @@ Supporting OS subroutines required:
 #ifdef FLOATING_POINT
 #include <math.h>
 #include <float.h>
+#include <locale.h>
 
 /* Currently a test is made to see if long double processing is warranted.
    This could be changed in the future should the _ldtoa_r code be
@@ -1288,6 +1289,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 	  unsigned width_left = 0;
 	  char nancount = 0;
 	  char infcount = 0;
+	  const char *decpt = _localeconv_r (rptr)->decimal_point;
+#ifdef _MB_CAPABLE
+	  int decptpos = 0;
+#endif
 #ifdef hardway
 	  if (width == 0 || width > sizeof (buf) - 1)
 #else
@@ -1416,14 +1421,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 		      goto fok;
 		    }
 		  break;
-		case '.':
-		  if (flags & DPTOK)
-		    {
-		      flags &= ~(SIGNOK | DPTOK);
-		      leading_zeroes = zeroes;
-		      goto fok;
-		    }
-		  break;
 		case 'e':
 		case 'E':
 		  /* no exponent without some digits */
@@ -1442,6 +1439,52 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap
 		      goto fok;
 		    }
 		  break;
+		default:
+#ifndef _MB_CAPABLE
+		  if ((unsigned char) c == (unsigned char) decpt[0]
+		      && (flags & DPTOK))
+		    {
+		      flags &= ~(SIGNOK | DPTOK);
+		      leading_zeroes = zeroes;
+		      goto fok;
+		    }
+		  break;
+#else
+		  if (flags & DPTOK)
+		    {
+		      while ((unsigned char) c
+			     == (unsigned char) decpt[decptpos])
+			{
+			  if (decpt[++decptpos] == '\0')
+			    {
+			      /* We read the complete decpt seq. */
+			      flags &= ~(SIGNOK | DPTOK);
+			      leading_zeroes = zeroes;
+			      p = stpncpy (p, decpt, decptpos);
+			      decptpos = 0;
+			      goto fskip;
+			    }
+			  ++nread;
+			  if (--fp->_r > 0)
+			    fp->_p++;
+			  else if (__srefill_r (rptr, fp))
+			    break;		/* EOF */
+			  c = *fp->_p;
+			}
+		      if (decptpos > 0)
+			{
+			  /* We read part of a multibyte decimal point,
+			     but the rest is invalid or we're at EOF,
+			     so back off. */
+			  while (decptpos-- > 0)
+			    {
+			      _ungetc_r (rptr, decpt[decptpos], fp);
+			      --nread;
+			    }
+			}
+		    }
+		  break;
+#endif
 		}
 	      break;
 	    fok:
Index: libc/stdio/vfwscanf.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdio/vfwscanf.c,v
retrieving revision 1.5
diff -u -p -r1.5 vfwscanf.c
--- libc/stdio/vfwscanf.c	26 Nov 2013 17:21:00 -0000	1.5
+++ libc/stdio/vfwscanf.c	17 Dec 2013 21:51:22 -0000
@@ -161,6 +161,9 @@ C99, POSIX-1.2008
 #ifdef FLOATING_POINT
 #include <math.h>
 #include <float.h>
+#ifdef __HAVE_LOCALE_INFO_EXTENDED__
+#include "../locale/lnumeric.h"
+#endif
 
 /* Currently a test is made to see if long double processing is warranted.
    This could be changed in the future should the _ldtoa_r code be
@@ -414,6 +417,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, a
   float *flp;
   _LONG_DOUBLE *ldp;
   double *dp;
+  wchar_t decpt;
 #endif
   long *lp;
 #ifndef _NO_LONGLONG
@@ -440,6 +444,27 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, a
 # define GET_ARG(n, ap, type) (va_arg (ap, type))
 #endif
 
+#ifdef FLOATING_POINT
+#ifdef _MB_CAPABLE
+#ifdef __HAVE_LOCALE_INFO_EXTENDED__
+	  decpt = *__get_current_numeric_locale ()->wdecimal_point;
+#else
+	  {
+	    size_t nconv;
+
+	    memset (&state, '\0', sizeof (state));
+	    nconv = _mbrtowc_r (data, &decpt,
+				_localeconv_r (data)->decimal_point,
+				MB_CUR_MAX, &state);
+	    if (nconv == (size_t) -1 || nconv == (size_t) -2)
+	      decpt = L'.';
+	  }
+#endif /* !__HAVE_LOCALE_INFO_EXTENDED__ */
+#else
+	  decpt = (wchar_t) *_localeconv_r (data)->decimal_point;
+#endif /* !_MB_CAPABLE */
+#endif /* FLOATING_POINT */
+
   _newlib_flockfile_start (fp);
 
   ORIENT (fp, 1);
@@ -1271,14 +1296,6 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, a
 		      goto fok;
 		    }
 		  break;
-		case L'.':
-		  if (flags & DPTOK)
-		    {
-		      flags &= ~(SIGNOK | DPTOK);
-		      leading_zeroes = zeroes;
-		      goto fok;
-		    }
-		  break;
 		case L'e':
 		case L'E':
 		  /* no exponent without some digits */
@@ -1297,6 +1314,14 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, a
 		      goto fok;
 		    }
 		  break;
+		default:
+		  if ((wchar_t) c == decpt && (flags & DPTOK))
+		    {
+		      flags &= ~(SIGNOK | DPTOK);
+		      leading_zeroes = zeroes;
+		      goto fok;
+		    }
+		  break;
 		}
 	      if (c != WEOF)
 		_ungetwc_r (rptr, c, fp);


-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]