This is the mail archive of the mailing list for the glibc project.

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Reading localized digits in floating-point numbers

The following patch adds support for reading localized digits in
floating=point numbers.  It has almost no penalty when 'I' flag is not
used or needed map is not defined in locale file.  It also supports the
situations when we don't have decimal point or thousands-sep defined in
locale file.

Attached is the patch with a test-case.


2005-08-02  Hamed Malek  <>
	* stdio-common/vfscanf.c: Add support for reading localized
	digits in floating-point numbers.

Index: libc/stdio-common/vfscanf.c
RCS file: /cvs/glibc/libc/stdio-common/vfscanf.c,v
retrieving revision 1.113
diff -u -r1.113 vfscanf.c
--- libc/stdio-common/vfscanf.c	26 Apr 2005 20:52:22 -0000	1.113
+++ libc/stdio-common/vfscanf.c	28 Jun 2005 12:22:37 -0000
@@ -1602,6 +1602,53 @@
 	  if (c == EOF)
 	    input_error ();
+          /* "to_inpunct" is a map from ASCII digits to their
+              equivalent in locale. This is defined for locales
+              which use an extra digit set.  */
+          wctrans_t map = __wctrans ("to_inpunct");
+          wint_t wcdigit[12];
+          char *mbdigits[12];
+          if (((flags & I18N) != 0) && __builtin_expect (map != NULL, 0))
+            {
+              int n;
+              for (n = 0; n < 12; ++n)
+                {
+		  if (n < 10)
+		    wcdigit[n] = __towctrans (L'0' + n, map);
+		  else if (n == 10)
+		    wcdigit[n] = __towctrans (L'.', map);
+		  else
+		    wcdigit[n] = __towctrans (L',', map);
+		  mbdigits[n] = (char *) alloca (MB_LEN_MAX * sizeof (char));
+                  mbstate_t state;
+                  memset (&state, '\0', sizeof (state));
+                  size_t mblen;
+                  if ((mblen = __wcrtomb (mbdigits[n], wcdigit[n], &state)) == (size_t) -1)
+                    {
+		      if (n == 10)
+			memcpy (mbdigits[n], decimal, strlen (decimal) + 1);
+		      else if (n == 11)
+			memcpy (mbdigits[n], thousands, strlen (thousands) + 1);
+		      else
+			{
+			  /* Ignore checking against localized digits.  */
+			  map = NULL;
+			  break;
+			}
+                    }
+                  else
+                    mbdigits[n][mblen] = '\0';
+                }
+            }
 	  got_dot = got_e = 0;
 	  /* Check for a sign.  */
@@ -1617,9 +1664,12 @@
 		  if (c != decimal)
-		      /* This is no valid number.  */
-		      ungetc (c, s);
-		      conv_error ();
+                      if (((flags & I18N) == 0) || __builtin_expect (map == NULL, 1))
+		        {
+			  /* This is no valid number.  */
+			  ungetc (c, s);
+			  conv_error ();
+			}
 		  /* Match against the decimal point.  At this point
@@ -1651,7 +1701,10 @@
 			  c = (unsigned char) *--cmpp;
-		      conv_error ();
+                      if (((flags & I18N) == 0) || __builtin_expect (map == NULL, 1))
+                        conv_error ();
+		      else
+			c = inchar ();
@@ -1882,6 +1935,128 @@
 	  while (width != 0 && inchar () != EOF);
+          /* We may have localized digits, if we have not read 
+	     any character or have just read decimal point.  */
+          if (((flags & I18N) != 0) && __builtin_expect (map != NULL, 0))
+            {
+	      if (wpsize == 0 || (got_dot && wpsize == 1 && wcdigit[10] == decimal))
+	      if (wpsize == 0 || (got_dot && wpsize == strlen (decimal) && 
+				  strcmp (decimal, mbdigits[10]) == 0))
+		{
+		  while (width != 0 && inchar () != EOF)
+		    {
+		      if (got_e && wp[wpsize - 1] == exp_char
+			  && (c == L_('-') || c == L_('+')))
+                        ADDW (c);
+		      else if (wpsize > 0 && !got_e
+			       && (CHAR_T) TOLOWER (c) == exp_char)
+			{
+			  ADDW (exp_char);
+			  got_e = got_dot = 1;
+			}
+		      else
+			{
+			  /* Checking against localized digits plus
+			     decimal point and thousands-sep.  */
+			  int n;
+			  for (n = 0; n < 12; ++n)
+			    {
+			      if (c == wcdigit[n])
+				{
+				  if (n < 10)
+				    ADDW (L_('0') + n);
+				  else if (n == 10 && !got_dot)
+				    {
+				      ADDW (decimal);
+				      got_dot = 1;
+				    }
+				  else if (n == 11 && (flags & GROUP) != 0
+					   && thousands != L'\0'  && ! got_dot)
+				    ADDW (thousands);
+				  else
+				    {
+				      /* The last read character is not part of the number 
+					 anymore.  */
+				      n = 12;
+				    }
+				  break;
+				}
+			      const char *cmpp = mbdigits[n];
+			      int avail = width > 0 ? width : INT_MAX;
+			      while ((unsigned char) *cmpp == c && avail > 0)
+				{
+				  if (*++cmpp == '\0')
+				    break;
+				  else
+				    {
+				      if (inchar () == EOF)
+					break;
+				      --avail;
+				    }
+				}
+			      if (*cmpp == '\0')
+				{
+				  if (width > 0)
+				    width = avail;
+				  if (n < 10)
+				    ADDW (L_('0') + n);
+				  else if (n == 10 && !got_dot)
+				    {
+				      /* Add all the characters.  */
+				      for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
+					ADDW ((unsigned char) *cmpp);
+				      got_dot = 1;
+				    }
+				  else if (n == 11 && (flags & GROUP) != 0
+					   && thousands != NULL && ! got_dot)
+				    {
+				      /* Add all the characters.  */
+				      for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
+					ADDW ((unsigned char) *cmpp);
+				    }
+				  else
+				    {
+				      /* The last read character is not part of the number 
+					 anymore.  */
+				      n = 12;
+				    }
+				  break;
+				}
+			      /* We are pushing all read characters back.  */
+			      if (cmpp > mbdigits[n])
+				{
+				  ungetc (c, s);
+				  while (--cmpp > mbdigits[n])
+				    ungetc_not_eof ((unsigned char) *cmpp, s);
+				  c = (unsigned char) *cmpp;
+				}
+			    }
+			  if (n == 12)
+			    {
+			      /* The last read character is not part of the number
+				 anymore.  */
+			      ungetc (c, s);
+			      break;
+			    }
+			}
+		      if (width > 0)
+			--width;
+		    }
+		}
+            }
 	  /* Have we read any character?  If we try to read a number
 	     in hexadecimal notation and we have read only the `0x'
 	     prefix or no exponent this is an error.  */
#include <stdio.h>
#include <locale.h>
#include <assert.h>

#define P0 "\xDB\xB0"
#define P1 "\xDB\xB1"
#define P2 "\xDB\xB2"
#define P3 "\xDB\xB3"
#define P4 "\xDB\xB4"
#define P5 "\xDB\xB5"
#define P6 "\xDB\xB6"
#define P7 "\xDB\xB7"
#define P8 "\xDB\xB8"
#define P9 "\xDB\xB9"
#define PD "\xd9\xab"
#define PT "\xd9\xac"

check_sscanf (const char *s, const char *format, const float n)
  float f;

  sscanf (s, format, &f);
  if (f != n)
    printf ("got %f expected %f \n", f, n);


main ()
  setlocale (LC_ALL, "fa_IR");

  check_sscanf (P3 PD P1 P4, "%I8f", 3.14);
  check_sscanf (P3 PT P1 P4 P5, "%I'f", 3145);
  check_sscanf (P3 PD P1 P4 P1 P5 P9, "%If", 3.14159);
  check_sscanf ("-" P3 PD P1 P4 P1 P5, "%If", -3.1415);
  check_sscanf ("+" PD P1 P4 P1 P5, "%If", +.1415);
  check_sscanf (P3 PD P1 P4 P1 P5 "e+" P2, "%Ie", 3.1415e+2);

  return 0;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]