[RFA] Refresh towlower and towupper (was Re: Update wctype functions to Unicode 5.2?)
Jeff Johnston
jjohnstn@redhat.com
Thu Feb 18 18:38:00 GMT 2010
On 13/02/10 03:51 PM, Corinna Vinschen wrote:
> On Feb 12 21:57, Corinna Vinschen wrote:
>> Additionally the functions iswblank, iswspace, towlower and towupper
>> could need some revamp. If an update of the aforementioned tables to
>> Unicode 5.2 is not a big deal, I'd volunteer to update these functions
>> as required.
>
> And here are the revamped towlower and towupper. I tested them against
> Unicode 5.2 by performing the following tests:
>
> $ cat> test-towfuncs.c<< EOF
> #include<stdio.h>
> #include<wctype.h>
>
> #if defined (TEST_TOLOWER)
> #define towfunc towlower
> #elif defined (TEST_TOUPPER)
> #define towfunc towupper
> #endif
>
> int
> main ()
> {
> wint_t upc, lwc;
>
> for (upc = 0; upc< 0x1fffff; ++upc)
> if ((lwc = towfunc (upc)) != upc)
> printf ("%04X %04X\n", upc, lwc);
> return 0;
> }
> EOF
> $ gcc -DTEST_TOLOWER test-towfuncs.c -o test-towlower
> $ gcc -DTEST_TOUPPER test-towfuncs.c -o test-towupper
> $ ./test-towlower> towlower.newlib
> $ ./test-towupper> towupper.newlib
> $ wget http://www.unicode.org/Public/5.2.0/ucd/UnicodeData.txt
> $ awk -F\; '{ if ( $14 != "" ) print $1 " " $14; }' UnicodeData.txt> towlower.unicode
> $ awk -F\; '{ if ( $13 != "" ) print $1 " " $13; }' UnicodeData.txt> towupper.unicode
> $ diff towupper.newlib towupper.unicode
> $ diff towlower.newlib towlower.unicode
>
> Ok to apply?
>
Yes, please go ahead.
-- Jeff J.
> Thanks,
> Corinna
>
>
> * libc/ctype/towlower.c (towlower): Update to Unicode 5.2.
> * libc/ctype/towupper.c (towupper): Ditto.
>
>
> Index: libc/ctype/towlower.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/towlower.c,v
> retrieving revision 1.7
> diff -u -p -r1.7 towlower.c
> --- libc/ctype/towlower.c 14 May 2009 20:16:21 -0000 1.7
> +++ libc/ctype/towlower.c 13 Feb 2010 20:50:45 -0000
> @@ -71,15 +71,14 @@ _DEFUN(towlower,(c), wint_t c)
> {
> #ifdef _MB_CAPABLE
> c = _jp2uc (c);
> + /* Based on and tested against Unicode 5.2 */
> if (c< 0x100)
> {
> if ((c>= 0x0041&& c<= 0x005a) ||
> - (c>= 0x00c0&& c<= 0x00de))
> + (c>= 0x00c0&& c<= 0x00d6) ||
> + (c>= 0x00d8&& c<= 0x00de))
> return (c + 0x20);
>
> - if (c == 0x00b5)
> - return 0x03bc;
> -
> return c;
> }
> else if (c< 0x300)
> @@ -96,8 +95,11 @@ _DEFUN(towlower,(c), wint_t c)
> return c;
> }
>
> + if (c == 0x0130)
> + return 0x0069;
> +
> if ((c>= 0x0139&& c<= 0x0147) ||
> - (c>= 0x01cd&& c<= 0x91db))
> + (c>= 0x01cd&& c<= 0x01db))
> {
> if (c& 0x01)
> return (c + 1);
> @@ -146,9 +148,6 @@ _DEFUN(towlower,(c), wint_t c)
> case 0x01f4:
> k = c + 1;
> break;
> - case 0x017f:
> - k = 0x0073;
> - break;
> case 0x0181:
> k = 0x0253;
> break;
> @@ -227,17 +226,56 @@ _DEFUN(towlower,(c), wint_t c)
> if (k != 0)
> return k;
> }
> -
> - if (c == 0x0220)
> - return 0x019e;
> + else if (c == 0x0220)
> + return 0x019e;
> + else if (c>= 0x023a&& c<= 0x024e)
> + {
> + wint_t k;
> + switch (c)
> + {
> + case 0x023a:
> + k = 0x2c65;
> + break;
> + case 0x023b:
> + case 0x0241:
> + case 0x0246:
> + case 0x0248:
> + case 0x024a:
> + case 0x024c:
> + case 0x024e:
> + k = c + 1;
> + break;
> + case 0x023d:
> + k = 0x019a;
> + break;
> + case 0x023e:
> + k = 0x2c66;
> + break;
> + case 0x0243:
> + k = 0x0180;
> + break;
> + case 0x0244:
> + k = 0x0289;
> + break;
> + case 0x0245:
> + k = 0x028c;
> + break;
> + default:
> + k = 0;
> + }
> + if (k != 0)
> + return k;
> + }
> }
> else if (c< 0x0400)
> {
> + if (c == 0x0370 || c == 0x0372 || c == 0x0376)
> + return (c + 1);
> if (c>= 0x0391&& c<= 0x03ab&& c != 0x03a2)
> return (c + 0x20);
> if (c>= 0x03d8&& c<= 0x03ee&& !(c& 0x01))
> return (c + 1);
> - if (c>= 0x0386&& c<= 0x03f5)
> + if (c>= 0x0386&& c<= 0x03ff)
> {
> wint_t k;
> switch (c)
> @@ -261,37 +299,31 @@ _DEFUN(towlower,(c), wint_t c)
> k = 0x03cd;
> break;
> case 0x038f:
> - k = 0x038f;
> + k = 0x03ce;
> break;
> - case 0x03c2:
> - k = 0x03c3;
> + case 0x03cf:
> + k = 0x03d7;
> break;
> - case 0x03d0:
> - k = 0x03b2;
> - break;
> - case 0x03d1:
> + case 0x03f4:
> k = 0x03b8;
> break;
> - case 0x03d5:
> - k = 0x03c6;
> - break;
> - case 0x03d6:
> - k = 0x03c0;
> + case 0x03f7:
> + k = 0x03f8;
> break;
> - case 0x03f0:
> - k = 0x03ba;
> + case 0x03f9:
> + k = 0x03f2;
> break;
> - case 0x03f1:
> - k = 0x03c1;
> + case 0x03fa:
> + k = 0x03fb;
> break;
> - case 0x03f2:
> - k = 0x03c3;
> + case 0x03fd:
> + k = 0x037b;
> break;
> - case 0x03f4:
> - k = 0x03b8;
> + case 0x03fe:
> + k = 0x037c;
> break;
> - case 0x03f5:
> - k = 0x03b5;
> + case 0x03ff:
> + k = 0x037d;
> break;
> default:
> k = 0;
> @@ -299,9 +331,6 @@ _DEFUN(towlower,(c), wint_t c)
> if (k != 0)
> return k;
> }
> -
> - if (c == 0x0345)
> - return 0x03b9;
> }
> else if (c< 0x500)
> {
> @@ -313,14 +342,16 @@ _DEFUN(towlower,(c), wint_t c)
>
> if ((c>= 0x0460&& c<= 0x0480) ||
> (c>= 0x048a&& c<= 0x04be) ||
> - (c>= 0x04d0&& c<= 0x04f4) ||
> - (c == 0x04f8))
> + (c>= 0x04d0&& c<= 0x04fe))
> {
> if (!(c& 0x01))
> return (c + 1);
> return c;
> }
>
> + if (c == 0x04c0)
> + return 0x04cf;
> +
> if (c>= 0x04c1&& c<= 0x04cd)
> {
> if (c& 0x01)
> @@ -331,6 +362,7 @@ _DEFUN(towlower,(c), wint_t c)
> else if (c< 0x1f00)
> {
> if ((c>= 0x0500&& c<= 0x050e) ||
> + (c>= 0x0510&& c<= 0x0524) ||
> (c>= 0x1e00&& c<= 0x1e94) ||
> (c>= 0x1ea0&& c<= 0x1ef8))
> {
> @@ -342,8 +374,14 @@ _DEFUN(towlower,(c), wint_t c)
> if (c>= 0x0531&& c<= 0x0556)
> return (c + 0x30);
>
> - if (c == 0x1e9b)
> - return 0x1e61;
> + if (c>= 0x10a0&& c<= 0x10c5)
> + return (c + 0x1c60);
> +
> + if (c == 0x1e9e)
> + return 0x00df;
> +
> + if (c>= 0x1efa&& c<= 0x1efe&& !(c& 0x01))
> + return (c + 1);
> }
> else if (c< 0x2000)
> {
> @@ -385,9 +423,6 @@ _DEFUN(towlower,(c), wint_t c)
> case 0x1fbc:
> k = 0x1fb3;
> break;
> - case 0x1fbe:
> - k = 0x03b9;
> - break;
> case 0x1fc8:
> case 0x1fc9:
> case 0x1fca:
> @@ -408,6 +443,10 @@ _DEFUN(towlower,(c), wint_t c)
> case 0x1fec:
> k = 0x1fe5;
> break;
> + case 0x1ff8:
> + case 0x1ff9:
> + k = c - 0x80;
> + break;
> case 0x1ffa:
> case 0x1ffb:
> k = c - 0x7e;
> @@ -422,26 +461,100 @@ _DEFUN(towlower,(c), wint_t c)
> return k;
> }
> }
> - else
> + else if (c< 0x2c00)
> {
> if (c>= 0x2160&& c<= 0x216f)
> return (c + 0x10);
> -
> +
> if (c>= 0x24b6&& c<= 0x24cf)
> return (c + 0x1a);
>
> + switch (c)
> + {
> + case 0x2126:
> + return 0x03c9;
> + case 0x212a:
> + return 0x006b;
> + case 0x212b:
> + return 0x00e5;
> + case 0x2132:
> + return 0x214e;
> + case 0x2183:
> + return 0x2184;
> + }
> + }
> + else if (c< 0x2d00)
> + {
> + if (c>= 0x2c00&& c<= 0x2c2e)
> + return (c + 0x30);
> +
> + if (c>= 0x2c80&& c<= 0x2ce2&& !(c& 0x01))
> + return (c + 1);
> +
> + switch (c)
> + {
> + case 0x2c60:
> + return 0x2c61;
> + case 0x2c62:
> + return 0x026b;
> + case 0x2c63:
> + return 0x1d7d;
> + case 0x2c64:
> + return 0x027d;
> + case 0x2c67:
> + case 0x2c69:
> + case 0x2c6b:
> + case 0x2c72:
> + case 0x2c75:
> + case 0x2ceb:
> + case 0x2ced:
> + return c + 1;
> + case 0x2c6d:
> + return 0x0251;
> + case 0x2c6e:
> + return 0x0271;
> + case 0x2c6f:
> + return 0x0250;
> + case 0x2c70:
> + return 0x0252;
> + case 0x2c7e:
> + return 0x023f;
> + case 0x2c7f:
> + return 0x0240;
> + }
> + }
> + else if (c>= 0xa600&& c< 0xa800)
> + {
> + if ((c>= 0xa640&& c<= 0xa65e) ||
> + (c>= 0xa662&& c<= 0xa66c) ||
> + (c>= 0xa680&& c<= 0xa696) ||
> + (c>= 0xa722&& c<= 0xa72e) ||
> + (c>= 0xa732&& c<= 0xa76e) ||
> + (c>= 0xa77f&& c<= 0xa786))
> + {
> + if (!(c& 1))
> + return (c + 1);
> + return c;
> + }
> +
> + switch (c)
> + {
> + case 0xa779:
> + case 0xa77b:
> + case 0xa77e:
> + case 0xa78b:
> + return (c + 1);
> + case 0xa77d:
> + return 0x1d79;
> + }
> + }
> + else
> + {
> if (c>= 0xff21&& c<= 0xff3a)
> return (c + 0x20);
>
> - if (c>= 0x10400&& c<= 0x10425)
> + if (c>= 0x10400&& c<= 0x10427)
> return (c + 0x28);
> -
> - if (c == 0x2126)
> - return 0x03c9;
> - if (c == 0x212a)
> - return 0x006b;
> - if (c == 0x212b)
> - return 0x00e5;
> }
> return c;
> #else
> Index: libc/ctype/towupper.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/towupper.c,v
> retrieving revision 1.7
> diff -u -p -r1.7 towupper.c
> --- libc/ctype/towupper.c 14 May 2009 20:16:21 -0000 1.7
> +++ libc/ctype/towupper.c 13 Feb 2010 20:50:45 -0000
> @@ -71,12 +71,13 @@ _DEFUN(towupper,(c), wint_t c)
> {
> #ifdef _MB_CAPABLE
> c = _jp2uc (c);
> + /* Based on and tested against Unicode 5.2 */
> if (c< 0x100)
> {
> if (c == 0x00b5)
> return 0x039c;
>
> - if ((c>= 0x00e0&& c<= 0x00fe) ||
> + if ((c>= 0x00e0&& c<= 0x00fe&& c != 0x00f7) ||
> (c>= 0x0061&& c<= 0x007a))
> return (c - 0x20);
>
> @@ -92,7 +93,8 @@ _DEFUN(towupper,(c), wint_t c)
> (c>= 0x014b&& c<= 0x0177) ||
> (c>= 0x01df&& c<= 0x01ef) ||
> (c>= 0x01f9&& c<= 0x021f) ||
> - (c>= 0x0223&& c<= 0x0233))
> + (c>= 0x0223&& c<= 0x0233) ||
> + (c>= 0x0247&& c<= 0x024f))
> {
> if (c& 0x01)
> return (c - 1);
> @@ -100,7 +102,8 @@ _DEFUN(towupper,(c), wint_t c)
> }
>
> if ((c>= 0x013a&& c<= 0x0148) ||
> - (c>= 0x01ce&& c<= 0x1dc))
> + (c>= 0x01ce&& c<= 0x01dc) ||
> + c == 0x023c || c == 0x0242)
> {
> if (!(c& 0x01))
> return (c - 1);
> @@ -121,6 +124,9 @@ _DEFUN(towupper,(c), wint_t c)
> case 0x017f:
> k = 0x0053;
> break;
> + case 0x0180:
> + k = 0x0243;
> + break;
> case 0x0183:
> k = 0x0182;
> break;
> @@ -142,6 +148,9 @@ _DEFUN(towupper,(c), wint_t c)
> case 0x0199:
> k = 0x0198;
> break;
> + case 0x019a:
> + k = 0x023d;
> + break;
> case 0x019e:
> k = 0x0220;
> break;
> @@ -176,6 +185,21 @@ _DEFUN(towupper,(c), wint_t c)
> case 0x01f3:
> k = 0x01f1;
> break;
> + case 0x023f:
> + k = 0x2c7e;
> + break;
> + case 0x0240:
> + k = 0x2c7f;
> + break;
> + case 0x0250:
> + k = 0x2c6f;
> + break;
> + case 0x0251:
> + k = 0x2c6d;
> + break;
> + case 0x0252:
> + k = 0x2c70;
> + break;
> case 0x0253:
> k = 0x0181;
> break;
> @@ -206,15 +230,24 @@ _DEFUN(towupper,(c), wint_t c)
> case 0x0269:
> k = 0x0196;
> break;
> + case 0x026b:
> + k = 0x2c62;
> + break;
> case 0x026f:
> k = 0x019c;
> break;
> + case 0x0271:
> + k = 0x2c6e;
> + break;
> case 0x0272:
> k = 0x019d;
> break;
> case 0x0275:
> k = 0x019f;
> break;
> + case 0x027d:
> + k = 0x2c64;
> + break;
> case 0x0280:
> k = 0x01a6;
> break;
> @@ -224,12 +257,18 @@ _DEFUN(towupper,(c), wint_t c)
> case 0x0288:
> k = 0x01ae;
> break;
> + case 0x0289:
> + k = 0x0244;
> + break;
> case 0x028a:
> k = 0x01b1;
> break;
> case 0x028b:
> k = 0x01b2;
> break;
> + case 0x028c:
> + k = 0x0245;
> + break;
> case 0x0292:
> k = 0x01b7;
> break;
> @@ -242,86 +281,91 @@ _DEFUN(towupper,(c), wint_t c)
> }
> else if (c< 0x0400)
> {
> - if (c == 0x03ac)
> - return 0x0386;
> -
> - if ((c& 0xfff0) == 0x03a0&& c>= 0x03ad)
> - return (c - 0x15);
> -
> + wint_t k;
> +
> + if (c>= 0x03ad&& c<= 0x03af)
> + return (c - 0x25);
> +
> if (c>= 0x03b1&& c<= 0x03cb&& c != 0x03c2)
> return (c - 0x20);
>
> - if (c == 0x03c2)
> - return 0x03a3;
> -
> - if (c>= 0x03cc&& c<= 0x03f5)
> + if (c>= 0x03d9&& c<= 0x03ef&& (c& 1))
> + return (c - 1);
> +
> + switch (c)
> {
> - wint_t k;
> - switch (c)
> - {
> - case 0x03cc:
> - k = 0x038c;
> - break;
> - case 0x03cd:
> - case 0x03ce:
> - k = c - 0x3f;
> - break;
> - case 0x03d0:
> - k = 0x0392;
> - break;
> - case 0x03d1:
> - k = 0x0398;
> - break;
> - case 0x03d5:
> - k = 0x03a6;
> - break;
> - case 0x03d6:
> - k = 0x03a0;
> - break;
> - case 0x03d9:
> - case 0x03db:
> - case 0x03dd:
> - case 0x03df:
> - case 0x03e1:
> - case 0x03e3:
> - case 0x03e5:
> - case 0x03e7:
> - case 0x03e9:
> - case 0x03eb:
> - case 0x03ed:
> - case 0x03ef:
> - k = c - 1;
> - break;
> - case 0x03f0:
> - k = 0x039a;
> - break;
> - case 0x03f1:
> - k = 0x03a1;
> - break;
> - case 0x03f2:
> - k = 0x03a3;
> - break;
> - case 0x03f5:
> - k = 0x0395;
> - break;
> - default:
> - k = 0;
> - }
> - if (k != 0)
> - return k;
> + case 0x0345:
> + k = 0x0399;
> + break;
> + case 0x0371:
> + case 0x0373:
> + case 0x0377:
> + case 0x03f8:
> + case 0x03fb:
> + k = c - 1;
> + break;
> + case 0x037b:
> + case 0x037c:
> + case 0x037d:
> + k = c + 0x82;
> + break;
> + case 0x03ac:
> + k = 0x0386;
> + break;
> + case 0x03c2:
> + k = 0x03a3;
> + break;
> + case 0x03cc:
> + k = 0x038c;
> + break;
> + case 0x03cd:
> + case 0x03ce:
> + k = c - 0x3f;
> + break;
> + case 0x03d0:
> + k = 0x0392;
> + break;
> + case 0x03d1:
> + k = 0x0398;
> + break;
> + case 0x03d5:
> + k = 0x03a6;
> + break;
> + case 0x03d6:
> + k = 0x03a0;
> + break;
> + case 0x03d7:
> + k = 0x03cf;
> + break;
> + case 0x03f0:
> + k = 0x039a;
> + break;
> + case 0x03f1:
> + k = 0x03a1;
> + break;
> + case 0x03f2:
> + k = 0x03f9;
> + break;
> + case 0x03f5:
> + k = 0x0395;
> + break;
> + default:
> + k = 0;
> }
> + if (k != 0)
> + return k;
> }
> else if (c< 0x500)
> {
> - if (c>= 0x0450&& c<= 0x045f)
> - return (c - 0x50);
> -
> if (c>= 0x0430&& c<= 0x044f)
> return (c - 0x20);
>
> + if (c>= 0x0450&& c<= 0x045f)
> + return (c - 0x50);
> +
> if ((c>= 0x0461&& c<= 0x0481) ||
> (c>= 0x048b&& c<= 0x04bf) ||
> - (c>= 0x04d1&& c<= 0x04f5))
> + (c>= 0x04d1&& c<= 0x04ff))
> {
> if (c& 0x01)
> return (c - 1);
> @@ -335,23 +379,36 @@ _DEFUN(towupper,(c), wint_t c)
> return c;
> }
>
> - if (c == 0x04f9)
> - return 0x04f8;
> + if (c == 0x04cf)
> + return 0x04c0;
> +
> + if (c>= 0x04f7&& c<= 0x04f9)
> + return (c - 1);
> + }
> + else if (c< 0x0600)
> + {
> + if (c>= 0x0501&& c<= 0x0525&& (c& 1))
> + return c - 1;
> +
> + if (c>= 0x0561&& c<= 0x0586)
> + return (c - 0x30);
> }
> else if (c< 0x1f00)
> {
> - if ((c>= 0x0501&& c<= 0x050f) ||
> - (c>= 0x1e01&& c<= 0x1e95) ||
> - (c>= 0x1ea1&& c<= 0x1ef9))
> + if (c == 0x1d79)
> + return 0xa77d;
> +
> + if (c == 0x1d7d)
> + return 0x2c63;
> +
> + if ((c>= 0x1e01&& c<= 0x1e95) ||
> + (c>= 0x1ea1&& c<= 0x1eff))
> {
> if (c& 0x01)
> return (c - 1);
> return c;
> }
>
> - if (c>= 0x0561&& c<= 0x0586)
> - return (c - 0x30);
> -
> if (c == 0x1e9b)
> return 0x1e60;
> }
> @@ -407,6 +464,9 @@ _DEFUN(towupper,(c), wint_t c)
> case 0x1f75:
> k = 0x1fcb;
> break;
> + case 0x1fc3:
> + k = 0x1fcc;
> + break;
> case 0x1fd0:
> k = 0x1fd8;
> break;
> @@ -456,18 +516,65 @@ _DEFUN(towupper,(c), wint_t c)
> return k;
> }
> }
> - else
> + else if (c< 0x3000)
> {
> + if (c == 0x214e)
> + return 0x2132;
> +
> + if (c == 0x2184)
> + return 0x2183;
> +
> if (c>= 0x2170&& c<= 0x217f)
> return (c - 0x10);
>
> if (c>= 0x24d0&& c<= 0x24e9)
> return (c - 0x1a);
>
> + if (c>= 0x2c30&& c<= 0x2c5e)
> + return (c - 0x30);
> +
> + if ((c>= 0x2c68&& c<= 0x2c6c&& !(c& 1)) ||
> + (c>= 0x2c81&& c<= 0x2ce3&& (c& 1)) ||
> + c == 0x2c73 || c == 0x2c76 ||
> + c == 0x2cec || c == 0x2cee)
> + return (c - 1);
> +
> + if (c>= 0x2c81&& c<= 0x2ce3&& (c& 1))
> + return (c - 1);
> +
> + if (c>= 0x2d00&& c<= 0x2d25)
> + return (c - 0x1c60);
> +
> + switch (c)
> + {
> + case 0x2c61:
> + return 0x2c60;
> + case 0x2c65:
> + return 0x023a;
> + case 0x2c66:
> + return 0x023e;
> + }
> + }
> + else if (c>= 0xa000&& c< 0xb000)
> + {
> + if (((c>= 0xa641&& c<= 0xa65f) ||
> + (c>= 0xa663&& c<= 0xa66d) ||
> + (c>= 0xa681&& c<= 0xa697) ||
> + (c>= 0xa723&& c<= 0xa72f) ||
> + (c>= 0xa733&& c<= 0xa76f) ||
> + (c>= 0xa77f&& c<= 0xa787))&&
> + (c& 1))
> + return (c - 1);
> +
> + if (c == 0xa77a || c == 0xa77c || c == 0xa78c)
> + return (c - 1);
> + }
> + else
> + {
> if (c>= 0xff41&& c<= 0xff5a)
> return (c - 0x20);
>
> - if (c>= 0x10428&& c<= 0x1044d)
> + if (c>= 0x10428&& c<= 0x1044f)
> return (c - 0x28);
> }
> return c;
>
>
More information about the Newlib
mailing list