This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Reading localized digits in floating-point


The following patch adds support for reading localized digits in
floating-points numbers.  It has almost no penalty when 'I' flag is not
used or needed map is not defined in locale file.  It also supports the
situations when we don't have decimal-point or thousands-sep defined in
locale file.

Attached is the patch with a test-case.

Hamed


2005-06-28  Hamed Malek <hamed@bamdad.org>

        * stdio-common/vfscanf.c: Add support for reading localized
        digits in floating-point numbers.

Index: libc/stdio-common/vfscanf.c
===================================================================
RCS file: /cvs/glibc/libc/stdio-common/vfscanf.c,v
retrieving revision 1.113
diff -u -r1.113 vfscanf.c
--- libc/stdio-common/vfscanf.c	26 Apr 2005 20:52:22 -0000	1.113
+++ libc/stdio-common/vfscanf.c	28 Jun 2005 12:22:37 -0000
@@ -1602,6 +1602,53 @@
 	  if (c == EOF)
 	    input_error ();
 
+          /* "to_inpunct" is a map from ASCII digits to their
+              equivalent in locale. This is defined for locales
+              which use an extra digit set.  */
+          wctrans_t map = __wctrans ("to_inpunct");
+          wint_t wcdigit[12];
+#ifndef COMPILE_WSCANF
+          char *mbdigits[12];
+#endif
+          if (((flags & I18N) != 0) && __builtin_expect (map != NULL, 0))
+            {
+              int n;
+
+              for (n = 0; n < 12; ++n)
+                {
+		  if (n < 10)
+		    wcdigit[n] = __towctrans (L'0' + n, map);
+		  else if (n == 10)
+		    wcdigit[n] = __towctrans (L'.', map);
+		  else
+		    wcdigit[n] = __towctrans (L',', map);
+
+#ifndef COMPILE_WSCANF
+		  mbdigits[n] = (char *) alloca (MB_LEN_MAX * sizeof (char));
+
+                  mbstate_t state;
+                  memset (&state, '\0', sizeof (state));
+
+                  size_t mblen;
+                  if ((mblen = __wcrtomb (mbdigits[n], wcdigit[n], &state)) == (size_t) -1)
+                    {
+		      if (n == 10)
+			memcpy (mbdigits[n], decimal, strlen (decimal) + 1);
+		      else if (n == 11)
+			memcpy (mbdigits[n], thousands, strlen (thousands) + 1);
+		      else
+			{
+			  /* Ignore checking against localized digits.  */
+			  map = NULL;
+			  break;
+			}
+                    }
+                  else
+                    mbdigits[n][mblen] = '\0';
+#endif
+                }
+            }
+
 	  got_dot = got_e = 0;
 
 	  /* Check for a sign.  */
@@ -1617,9 +1664,12 @@
 #ifdef COMPILE_WSCANF
 		  if (c != decimal)
 		    {
-		      /* This is no valid number.  */
-		      ungetc (c, s);
-		      conv_error ();
+                      if (((flags & I18N) == 0) || __builtin_expect (map == NULL, 1))
+		        {
+			  /* This is no valid number.  */
+			  ungetc (c, s);
+			  conv_error ();
+			}
 		    }
 #else
 		  /* Match against the decimal point.  At this point
@@ -1651,7 +1701,10 @@
 			  c = (unsigned char) *--cmpp;
 			}
 
-		      conv_error ();
+                      if (((flags & I18N) == 0) || __builtin_expect (map == NULL, 1))
+                        conv_error ();
+		      else
+			c = inchar ();
 		    }
 		  else
 		    {
@@ -1882,6 +1935,128 @@
 	    }
 	  while (width != 0 && inchar () != EOF);
 
+          /* We may have localized digits, if we have not read 
+	     any character or have just read decimal point.  */
+
+          if (((flags & I18N) != 0) && __builtin_expect (map != NULL, 0))
+            {
+#ifdef COMPILE_WSCANF
+	      if (wpsize == 0 || (got_dot && wpsize == 1 && wcdigit[10] == decimal))
+#else
+	      if (wpsize == 0 || (got_dot && wpsize == strlen (decimal) && 
+				  strcmp (decimal, mbdigits[10]) == 0))
+#endif
+		{
+		  while (width != 0 && inchar () != EOF)
+		    {
+		      if (got_e && wp[wpsize - 1] == exp_char
+			  && (c == L_('-') || c == L_('+')))
+                        ADDW (c);
+		      else if (wpsize > 0 && !got_e
+			       && (CHAR_T) TOLOWER (c) == exp_char)
+			{
+			  ADDW (exp_char);
+			  got_e = got_dot = 1;
+			}
+		      else
+			{
+			  /* Checking against localized digits plus
+			     decimal point and thousands-sep.  */
+			  int n;
+			  for (n = 0; n < 12; ++n)
+			    {
+#ifdef COMPILE_WSCANF
+			      if (c == wcdigit[n])
+				{
+				  if (n < 10)
+				    ADDW (L_('0') + n);
+				  else if (n == 10 && !got_dot)
+				    {
+				      ADDW (decimal);
+				      got_dot = 1;
+				    }
+				  else if (n == 11 && (flags & GROUP) != 0
+					   && thousands != L'\0'  && ! got_dot)
+				    ADDW (thousands);
+				  else
+				    {
+				      /* The last read character is not part of the number 
+					 anymore.  */
+				      n = 12;
+				    }
+				  break;
+				}
+#else
+			      const char *cmpp = mbdigits[n];
+			      int avail = width > 0 ? width : INT_MAX;
+
+			      while ((unsigned char) *cmpp == c && avail > 0)
+				{
+				  if (*++cmpp == '\0')
+				    break;
+				  else
+				    {
+				      if (inchar () == EOF)
+					break;
+				      --avail;
+				    }
+				}
+
+			      if (*cmpp == '\0')
+				{
+				  if (width > 0)
+				    width = avail;
+
+				  if (n < 10)
+				    ADDW (L_('0') + n);
+				  else if (n == 10 && !got_dot)
+				    {
+				      /* Add all the characters.  */
+				      for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
+					ADDW ((unsigned char) *cmpp);
+
+				      got_dot = 1;
+				    }
+				  else if (n == 11 && (flags & GROUP) != 0
+					   && thousands != NULL && ! got_dot)
+				    {
+				      /* Add all the characters.  */
+				      for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
+					ADDW ((unsigned char) *cmpp);
+				    }
+				  else
+				    {
+				      /* The last read character is not part of the number 
+					 anymore.  */
+				      n = 12;
+				    }
+				  break;
+				}
+
+			      /* We are pushing all read characters back.  */
+			      if (cmpp > mbdigits[n])
+				{
+				  ungetc (c, s);
+				  while (--cmpp > mbdigits[n])
+				    ungetc_not_eof ((unsigned char) *cmpp, s);
+				  c = (unsigned char) *cmpp;
+				}
+#endif
+			    }
+			  if (n == 12)
+			    {
+			      /* The last read character is not part of the number
+				 anymore.  */
+			      ungetc (c, s);
+			      break;
+			    }
+			}
+		      if (width > 0)
+			--width;
+		    }
+		}
+            }
+
 	  /* Have we read any character?  If we try to read a number
 	     in hexadecimal notation and we have read only the `0x'
 	     prefix or no exponent this is an error.  */
#include <stdio.h>
#include <locale.h>
#include <assert.h>

#define P0 "\xDB\xB0"
#define P1 "\xDB\xB1"
#define P2 "\xDB\xB2"
#define P3 "\xDB\xB3"
#define P4 "\xDB\xB4"
#define P5 "\xDB\xB5"
#define P6 "\xDB\xB6"
#define P7 "\xDB\xB7"
#define P8 "\xDB\xB8"
#define P9 "\xDB\xB9"
#define PD "\xd9\xab"
#define PT "\xd9\xac"

void
check_sscanf (const char *s, const char *format, const float n)
{
  float f;

  sscanf (s, format, &f);
  if (f != n)
    printf ("got %f expected %f \n", f, n);

}

int
main ()
{
  setlocale (LC_ALL, "fa_IR");

  check_sscanf (P3 PD P1 P4, "%I8f", 3.14);
  check_sscanf (P3 PT P1 P4 P5, "%I'f", 3145);
  check_sscanf (P3 PD P1 P4 P1 P5 P9, "%If", 3.14159);
  check_sscanf ("-" P3 PD P1 P4 P1 P5, "%If", -3.1415);
  check_sscanf ("+" PD P1 P4 P1 P5, "%If", +.1415);
  check_sscanf (P3 PD P1 P4 P1 P5 "e+" P2, "%Ie", 3.1415e+2);

  return 0;
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]