This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[PATCH] Reading localized digits in scanf

From: Hamed Malek <hamed at bamdad dot org>
To: libc-alpha at sources dot redhat dot com
Cc: Behdad Esfahbod <behdad at cs dot toronto dot edu>
Date: Sat, 11 Sep 2004 08:49:53 +0430
Subject: [PATCH] Reading localized digits in scanf
Organization: Sharif FarsiWeb, Inc.

Second try:

Attached reads localized digits from a map in locale file (to_inpuct)
and adds them as a new level of digits set to wcdigits[]. This lets 'I'
flag to accept localized digits as a new digits set without breaking any
standard.

Hamed

Index: libc/stdio-common/vfscanf.c
===================================================================
RCS file: /cvs/glibc/libc/stdio-common/vfscanf.c,v
retrieving revision 1.110
diff -u -r1.110 vfscanf.c
--- libc/stdio-common/vfscanf.c	20 Apr 2004 18:51:32 -0000	1.110
+++ libc/stdio-common/vfscanf.c	4 Sep 2004 12:04:34 -0000
@@ -1163,9 +1163,16 @@
 	      int level;
 #ifdef COMPILE_WSCANF
 	      const wchar_t *wcdigits[10];
+	      /*  Extra array to extend wcdigits[] for new digits in locale.  */
+	      const wchar_t *wcdigits_extended[10];
 #else
 	      const char *mbdigits[10];
+	      const char *mbdigits_extended[10];
 #endif
+	      /*  "to_inpunct" is a map from ASCII digits to their 
+		  equivalent in locale. This is defined for locales 
+		  which use an extra digits set.  */
+	      wctrans_t map = __wctrans ("to_inpunct");
 	      int n;
 
 	      from_level = 0;
@@ -1173,9 +1180,68 @@
 	      to_level = _NL_CURRENT_WORD (LC_CTYPE,
 					   _NL_CTYPE_INDIGITS_WC_LEN) - 1;
 #else
-	      to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
+	      to_level = (uint32_t) 
+		curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
 #endif
 
+	      if (map)
+		{
+		  /*  Adding new level for extra digits set in locale file.  */
+		  to_level++;
+
+		  for (n = 0; n < 10; n++)
+		    {
+#ifdef COMPILE_WSCANF
+		      wcdigits[n] = (const wchar_t *)
+                        _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
+
+		      wchar_t *wc_extended = (wchar_t *) 
+			alloca ((to_level + 2) * sizeof (wchar_t));
+		      MEMCPY (wc_extended, wcdigits[n], to_level);
+		      wc_extended[to_level] = __towctrans (L'0' + n, map);
+		      wc_extended[to_level + 1] = '\0';
+		      wcdigits_extended[n] = wc_extended;
+#else
+		      mbdigits[n]
+                        = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
+
+		      /*  Get the equivalent wide char in map.  */
+		      wint_t extra_wcdigit = __towctrans (L'0' + n, map);
+
+		      /*  Convert it to multibyte representation.  */
+		      mbstate_t state;
+		      memset (&state, '\0', sizeof (state));
+
+		      char extra_mbdigit[MB_LEN_MAX];
+		      size_t mblen
+			= __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
+
+		      if (mblen == (size_t) -1)
+			{
+			  /*  Ignore this new level.  */
+			  map = NULL;
+			  break;
+			}
+
+		      /*  Calculate the length of mbdigits[n].  */
+		      const char *last_char = mbdigits[n];
+		      for (level = 0; level < to_level; level++)
+			last_char = strchr (last_char, '\0') + 1;
+
+		      size_t mbdigits_len = last_char - mbdigits[n];
+
+		      /*  Allocate memory for extended multibyte digit.  */
+		      mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
+
+		      /*  And get the mbdigits + extra_digit string.  */
+		      MEMCPY (mb_extended, mbdigits[n], mbdigits_len);
+		      MEMCPY (mb_extended + mbdigits_len, extra_mbdigit, mblen);
+		      mb_extended[mbdigits_len + mblen] = '\0';
+		      mbdigits_extended[n] = mb_extended;
+#endif
+		    }
+		}
+
 	      /* Read the number into workspace.  */
 	      while (c != EOF && width != 0)
 		{
@@ -1185,8 +1251,12 @@
 		    {
 		      /* Get the string for the digits with value N.  */
 #ifdef COMPILE_WSCANF
-		      wcdigits[n] = (const wchar_t *)
-			_NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
+		     if (map)
+		       wcdigits[n] = wcdigits_extended[n];
+		     else
+		       wcdigits[n] = (const wchar_t *)
+		 	 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
+			
 		      wcdigits[n] += from_level;
 
 		      if (c == (wint_t) *wcdigits[n])
@@ -1201,8 +1271,11 @@
 		      const char *cmpp;
 		      int avail = width > 0 ? width : INT_MAX;
 
-		      mbdigits[n]
-			= curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
+		      if (map)
+			mbdigits[n] = mbdigits_extended[n];
+		      else
+		        mbdigits[n]
+			  = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
 
 		      for (level = 0; level < from_level; level++)
 			mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
Index: libc/localedata/locales/fa_IR
===================================================================
RCS file: /cvs/glibc/libc/localedata/locales/fa_IR,v
retrieving revision 1.11
diff -u -r1.11 fa_IR
--- libc/localedata/locales/fa_IR	17 Mar 2004 17:22:43 -0000	1.11
+++ libc/localedata/locales/fa_IR	4 Sep 2004 12:06:47 -0000
@@ -10,8 +10,8 @@
 % Fax: +98 21 6019568
 % Language: fa
 % Territory: IR
-% Revision: 2.3
-% Date: 2004-03-16
+% Revision: 2.4
+% Date: 2004-09-04
 % Users: general
 % Repertoiremap:
 % Charset: UTF-8
@@ -50,6 +50,20 @@
 
 outdigit <U06F0>..<U06F9>
 
+map to_inpunct; /
+  (<U0030>,<U06F0>); /
+  (<U0031>,<U06F1>); /
+  (<U0032>,<U06F2>); /
+  (<U0033>,<U06F3>); /
+  (<U0034>,<U06F4>); /
+  (<U0035>,<U06F5>); /
+  (<U0036>,<U06F6>); /
+  (<U0037>,<U06F7>); /
+  (<U0038>,<U06F8>); /
+  (<U0039>,<U06F9>); /
+  (<U002E>,<U066B>); /
+  (<U002C>,<U066C>)
+
 map to_outpunct; /
   (<U002E>,<U066B>); /
   (<U002C>,<U066C>)

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]