locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <endian.h>
  26 #include <limits.h>
  27 #include <string.h>
  28
  29 #include "locales.h"
  30 #include "localeinfo.h"
  31 #include "langinfo.h"
  32 #include "locfile-token.h"
  33 #include "stringtrans.h"
  34
  35 /* Uncomment the following line in the production version.  */
  36 /* define NDEBUG 1 */
  37 #include <assert.h>
  38
  39
  40 void *xmalloc (size_t __n);
  41 void *xcalloc (size_t __n, size_t __s);
  42 void *xrealloc (void *__ptr, size_t __n);
  43
  44
  45 /* The bit used for representing a special class.  */
  46 #define BITPOS(class) ((class) - tok_upper)
  47 #define BIT(class) (1 << BITPOS (class))
  48
  49 #define ELEM(ctype, collection, idx, value)                                   \
  50   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  51              &ctype->collection##_act idx, value)
  52
  53 #define SWAPU32(w) \
  54   (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
  55
  56 #define SWAPU16(w) \
  57   ((((w)  >> 8) & 0xff) | (((w) & 0xff) << 8))
  58
  59
  60 /* To be compatible with former implementations we for now restrict
  61    the number of bits for character classes to 16.  When compatibility
  62    is not necessary anymore increase the number to 32.  */
  63 #define char_class_t u_int16_t
  64 #define CHAR_CLASS_TRANS SWAPU16
  65 #define char_class32_t u_int32_t
  66 #define CHAR_CLASS32_TRANS SWAPU32
  67
  68
  69 /* The real definition of the struct for the LC_CTYPE locale.  */
  70 struct locale_ctype_t
  71 {
  72   unsigned int *charnames;
  73   size_t charnames_max;
  74   size_t charnames_act;
  75
  76   /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes.  */
  77 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
  78   size_t nr_charclass;
  79   const char *classnames[MAX_NR_CHARCLASS];
  80   unsigned long int current_class_mask;
  81   unsigned int last_class_char;
  82   u_int32_t *class_collection;
  83   size_t class_collection_max;
  84   size_t class_collection_act;
  85   unsigned long int class_done;
  86
  87   /* If the following number ever turns out to be too small simply
  88      increase it.  But I doubt it will.  --drepper@gnu */
  89 #define MAX_NR_CHARMAP 16
  90   const char *mapnames[MAX_NR_CHARMAP];
  91   u_int32_t *map_collection[MAX_NR_CHARMAP];
  92   size_t map_collection_max[MAX_NR_CHARMAP];
  93   size_t map_collection_act[MAX_NR_CHARMAP];
  94   size_t map_collection_nr;
  95   size_t last_map_idx;
  96   unsigned int from_map_char;
  97   int toupper_done;
  98   int tolower_done;
  99
 100   /* The arrays for the binary representation.  */
 101   u_int32_t plane_size;
 102   u_int32_t plane_cnt;
 103   char_class_t *ctype_b;
 104   char_class32_t *ctype32_b;
 105   u_int32_t *names_el;
 106   u_int32_t *names_eb;
 107   u_int32_t **map_eb;
 108   u_int32_t **map_el;
 109   u_int32_t *class_name_ptr;
 110   u_int32_t *map_name_ptr;
 111   unsigned char *width;
 112   u_int32_t mb_cur_max;
 113   const char *codeset_name;
 114 };
 115
 116
 117 /* Prototypes for local functions.  */
 118 static void ctype_class_newP (struct linereader *lr,
 119                               struct locale_ctype_t *ctype, const char *name);
 120 static void ctype_map_newP (struct linereader *lr,
 121                             struct locale_ctype_t *ctype,
 122                             const char *name, struct charset_t *charset);
 123 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
 124                             size_t *max, size_t *act, unsigned int idx);
 125 static void set_class_defaults (struct locale_ctype_t *ctype,
 126                                 struct charset_t *charset);
 127 static void allocate_arrays (struct locale_ctype_t *ctype,
 128                              struct charset_t *charset);
 129
 130
 131 void
 132 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 133                struct charset_t *charset)
 134 {
 135   unsigned int cnt;
 136   struct locale_ctype_t *ctype;
 137
 138   /* We have a definition for LC_CTYPE.  */
 139   copy_posix.mask &= ~(1 << LC_CTYPE);
 140
 141   /* It is important that we always use UCS1 encoding for strings now.  */
 142   encoding_method = ENC_UCS1;
 143
 144   /* Allocate the needed room.  */
 145   locale->categories[LC_CTYPE].ctype = ctype =
 146     (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
 147
 148   /* We have no names seen yet.  */
 149   ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
 150   ctype->charnames =
 151     (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
 152   for (cnt = 0; cnt < 256; ++cnt)
 153     ctype->charnames[cnt] = cnt;
 154   ctype->charnames_act = 256;
 155
 156   /* Fill character class information.  */
 157   ctype->nr_charclass = 0;
 158   ctype->current_class_mask = 0;
 159   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 160   /* The order of the following instructions determines the bit
 161      positions!  */
 162   ctype_class_newP (lr, ctype, "upper");
 163   ctype_class_newP (lr, ctype, "lower");
 164   ctype_class_newP (lr, ctype, "alpha");
 165   ctype_class_newP (lr, ctype, "digit");
 166   ctype_class_newP (lr, ctype, "xdigit");
 167   ctype_class_newP (lr, ctype, "space");
 168   ctype_class_newP (lr, ctype, "print");
 169   ctype_class_newP (lr, ctype, "graph");
 170   ctype_class_newP (lr, ctype, "blank");
 171   ctype_class_newP (lr, ctype, "cntrl");
 172   ctype_class_newP (lr, ctype, "punct");
 173   ctype_class_newP (lr, ctype, "alnum");
 174
 175   ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
 176   ctype->class_collection
 177     = (u_int32_t *) xmalloc (sizeof (unsigned long int)
 178                              * ctype->class_collection_max);
 179   memset (ctype->class_collection, '\0',
 180           sizeof (unsigned long int) * ctype->class_collection_max);
 181   ctype->class_collection_act = 256;
 182
 183   /* Fill character map information.  */
 184   ctype->map_collection_nr = 0;
 185   ctype->last_map_idx = MAX_NR_CHARMAP;
 186   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 187   ctype_map_newP (lr, ctype, "toupper", charset);
 188   ctype_map_newP (lr, ctype, "tolower", charset);
 189
 190   /* Fill first 256 entries in `toupper' and `tolower' arrays.  */
 191   for (cnt = 0; cnt < 256; ++cnt)
 192     {
 193       ctype->map_collection[0][cnt] = cnt;
 194       ctype->map_collection[1][cnt] = cnt;
 195     }
 196 }
 197
 198
 199 void
 200 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
 201 {
 202   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 203 #define NCLASS 12
 204   static const struct
 205   {
 206     const char *name;
 207     const char allow[NCLASS];
 208   }
 209   valid_table[NCLASS] =
 210   {
 211     /* The order is important.  See token.h for more information.
 212        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 213     { "upper",  "--MX-XDDXXX-" },
 214     { "lower",  "--MX-XDDXXX-" },
 215     { "alpha",  "---X-XDDXXX-" },
 216     { "digit",  "XXX--XDDXXX-" },
 217     { "xdigit", "-----XDDXXX-" },
 218     { "space",  "XXXXX------X" },
 219     { "print",  "---------X--" },
 220     { "graph",  "---------X--" },
 221     { "blank",  "XXXXXM-----X" },
 222     { "cntrl",  "XXXXX-XX--XX" },
 223     { "punct",  "XXXXX-DD-X-X" },
 224     { "alnum",  "-----XDDXXX-" }
 225   };
 226   size_t cnt;
 227   int cls1, cls2;
 228   unsigned int space_value;
 229   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 230
 231   /* Set default value for classes not specified.  */
 232   set_class_defaults (ctype, charset);
 233
 234   /* Check according to table.  */
 235   for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
 236     {
 237       unsigned long int tmp;
 238
 239       tmp = ctype->class_collection[cnt];
 240       if (tmp == 0)
 241         continue;
 242
 243       for (cls1 = 0; cls1 < NCLASS; ++cls1)
 244         if ((tmp & (1 << cls1)) != 0)
 245           for (cls2 = 0; cls2 < NCLASS; ++cls2)
 246             if (valid_table[cls1].allow[cls2] != '-')
 247               {
 248                 int eq = (tmp & (1 << cls2)) != 0;
 249                 switch (valid_table[cls1].allow[cls2])
 250                   {
 251                   case 'M':
 252                     if (!eq)
 253                       {
 254                         char buf[17];
 255                         char *cp = buf;
 256                         unsigned int value;
 257
 258                         value = ctype->charnames[cnt];
 259
 260                         if ((value & 0xff000000) != 0)
 261                           cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
 262                         if ((value & 0xffff0000) != 0)
 263                           cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
 264                         if ((value & 0xffffff00) != 0)
 265                           cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
 266                         sprintf (cp, "\\%o", value & 0xff);
 267
 268                         if (!be_quiet)
 269                           error (0, 0, _("\
 270 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
 271                                  buf, valid_table[cls1].name,
 272                                  valid_table[cls2].name);
 273                       }
 274                     break;
 275
 276                   case 'X':
 277                     if (eq)
 278                       {
 279                         char buf[17];
 280                         char *cp = buf;
 281                         unsigned int value;
 282
 283                         value = ctype->charnames[cnt];
 284
 285                         if ((value & 0xff000000) != 0)
 286                           cp += sprintf (cp, "\\%o", value >> 24);
 287                         if ((value & 0xffff0000) != 0)
 288                           cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
 289                         if ((value & 0xffffff00) != 0)
 290                           cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
 291                         sprintf (cp, "\\%o", value & 0xff);
 292
 293                         if (!be_quiet)
 294                           error (0, 0, _("\
 295 character %s'%s' in class `%s' must not be in class `%s'"),
 296                                  value > 256 ? "L" : "", buf,
 297                                  valid_table[cls1].name,
 298                                  valid_table[cls2].name);
 299                       }
 300                     break;
 301
 302                   case 'D':
 303                     ctype->class_collection[cnt] |= 1 << cls2;
 304                     break;
 305
 306                   default:
 307                     error (5, 0, _("internal error in %s, line %u"),
 308                            __FUNCTION__, __LINE__);
 309                   }
 310               }
 311     }
 312
 313   /* ... and now test <SP> as a special case.  */
 314   space_value = charset_find_value (&charset->char_table, "SP", 2);
 315   if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
 316     space_value = charset_find_value (&charset->char_table, "space", 5);
 317   if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
 318     {
 319       if (!be_quiet)
 320         error (0, 0, _("character <SP> not defined in character map"));
 321     }
 322   else if (((cnt = BITPOS (tok_space),
 323              (ELEM (ctype, class_collection, , space_value)
 324               & BIT (tok_space)) == 0)
 325             || (cnt = BITPOS (tok_blank),
 326                 (ELEM (ctype, class_collection, , space_value)
 327                  & BIT (tok_blank)) == 0)))
 328     {
 329       if (!be_quiet)
 330         error (0, 0, _("<SP> character not in class `%s'"),
 331                valid_table[cnt].name);
 332     }
 333   else if (((cnt = BITPOS (tok_punct),
 334              (ELEM (ctype, class_collection, , space_value)
 335               & BIT (tok_punct)) != 0)
 336             || (cnt = BITPOS (tok_graph),
 337                 (ELEM (ctype, class_collection, , space_value)
 338                  & BIT (tok_graph))
 339                 != 0)))
 340     {
 341       if (!be_quiet)
 342         error (0, 0, _("<SP> character must not be in class `%s'"),
 343                valid_table[cnt].name);
 344     }
 345   else
 346     ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
 347
 348   /* Now that the tests are done make sure the name array contains all
 349      characters which are handled in the WIDTH section of the
 350      character set definition file.  */
 351   if (charset->width_rules != NULL)
 352     for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
 353       {
 354         size_t inner;
 355         for (inner = charset->width_rules[cnt].from;
 356              inner <= charset->width_rules[cnt].to; ++inner)
 357           (void) find_idx (ctype, NULL, NULL, NULL, inner);
 358       }
 359 }
 360
 361
 362 void
 363 ctype_output (struct localedef_t *locale, struct charset_t *charset,
 364               const char *output_path)
 365 {
 366   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 367   const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
 368                          + 2 * (ctype->map_collection_nr - 2));
 369   struct iovec iov[2 + nelems + ctype->nr_charclass
 370                   + ctype->map_collection_nr];
 371   struct locale_file data;
 372   u_int32_t idx[nelems];
 373   size_t elem, cnt, offset, total;
 374
 375
 376   if ((locale->binary & (1 << LC_CTYPE)) != 0)
 377     {
 378       iov[0].iov_base = ctype;
 379       iov[0].iov_len = locale->len[LC_CTYPE];
 380
 381       write_locale_data (output_path, "LC_CTYPE", 1, iov);
 382
 383       return;
 384     }
 385
 386
 387   /* Now prepare the output: Find the sizes of the table we can use.  */
 388   allocate_arrays (ctype, charset);
 389
 390   data.magic = LIMAGIC (LC_CTYPE);
 391   data.n = nelems;
 392   iov[0].iov_base = (void *) &data;
 393   iov[0].iov_len = sizeof (data);
 394
 395   iov[1].iov_base = (void *) idx;
 396   iov[1].iov_len = sizeof (idx);
 397
 398   idx[0] = iov[0].iov_len + iov[1].iov_len;
 399   offset = 0;
 400
 401   for (elem = 0; elem < nelems; ++elem)
 402     {
 403       if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
 404         switch (elem)
 405           {
 406 #define CTYPE_DATA(name, base, len)                                           \
 407           case _NL_ITEM_INDEX (name):                                         \
 408             iov[2 + elem + offset].iov_base = (base);                         \
 409             iov[2 + elem + offset].iov_len = (len);                           \
 410             if (elem + 1 < nelems)                                            \
 411               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;     \
 412             break
 413
 414           CTYPE_DATA (_NL_CTYPE_CLASS,
 415                       ctype->ctype_b,
 416                       (256 + 128) * sizeof (char_class_t));
 417
 418           CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
 419                       ctype->map_eb[0],
 420                       (ctype->plane_size * ctype->plane_cnt + 128)
 421                       * sizeof (u_int32_t));
 422           CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
 423                       ctype->map_eb[1],
 424                       (ctype->plane_size * ctype->plane_cnt + 128)
 425                       * sizeof (u_int32_t));
 426
 427           CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
 428                       ctype->map_el[0],
 429                       (ctype->plane_size * ctype->plane_cnt + 128)
 430                       * sizeof (u_int32_t));
 431           CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
 432                       ctype->map_el[1],
 433                       (ctype->plane_size * ctype->plane_cnt + 128)
 434                       * sizeof (u_int32_t));
 435
 436           CTYPE_DATA (_NL_CTYPE_CLASS32,
 437                       ctype->ctype32_b,
 438                       (ctype->plane_size * ctype->plane_cnt
 439                        * sizeof (char_class32_t)));
 440
 441           CTYPE_DATA (_NL_CTYPE_NAMES_EB,
 442                       ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
 443                                         * sizeof (u_int32_t)));
 444           CTYPE_DATA (_NL_CTYPE_NAMES_EL,
 445                       ctype->names_el, (ctype->plane_size * ctype->plane_cnt
 446                                         * sizeof (u_int32_t)));
 447
 448           CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
 449                       &ctype->plane_size, sizeof (u_int32_t));
 450           CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
 451                       &ctype->plane_cnt, sizeof (u_int32_t));
 452
 453           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 454             /* The class name array.  */
 455             total = 0;
 456             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 457               {
 458                 iov[2 + elem + offset].iov_base
 459                   = (void *) ctype->classnames[cnt];
 460                 iov[2 + elem + offset].iov_len
 461                   = strlen (ctype->classnames[cnt]) + 1;
 462                 total += iov[2 + elem + offset].iov_len;
 463               }
 464             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 465             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 466             total += 1 + (4 - ((total + 1) % 4));
 467
 468             if (elem + 1 < nelems)
 469               idx[elem + 1] = idx[elem] + total;
 470             break;
 471
 472           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 473             /* The class name array.  */
 474             total = 0;
 475             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 476               {
 477                 iov[2 + elem + offset].iov_base
 478                   = (void *) ctype->mapnames[cnt];
 479                 iov[2 + elem + offset].iov_len
 480                   = strlen (ctype->mapnames[cnt]) + 1;
 481                 total += iov[2 + elem + offset].iov_len;
 482               }
 483             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 484             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 485             total += 1 + (4 - ((total + 1) % 4));
 486
 487             if (elem + 1 < nelems)
 488               idx[elem + 1] = idx[elem] + total;
 489             break;
 490
 491           CTYPE_DATA (_NL_CTYPE_WIDTH,
 492                       ctype->width, ctype->plane_size * ctype->plane_cnt);
 493
 494           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 495                       &ctype->mb_cur_max, sizeof (u_int32_t));
 496
 497           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 498             total = strlen (ctype->codeset_name) + 1;
 499             if (total % 4 == 0)
 500               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
 501             else
 502               {
 503                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
 504                 memset (mempcpy (iov[2 + elem + offset].iov_base,
 505                                  ctype->codeset_name, total),
 506                         '\0', 4 - (total & 3));
 507                 total = (total + 3) & ~3;
 508               }
 509             iov[2 + elem + offset].iov_len = total;
 510             if (elem + 1 < nelems)
 511               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 512             break;
 513
 514           default:
 515             assert (! "unknown CTYPE element");
 516           }
 517       else
 518         {
 519           /* Handle extra maps.  */
 520           size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
 521
 522           if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
 523             iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
 524           else
 525             iov[2 + elem + offset].iov_base = ctype->map_el[nr];
 526
 527           iov[2 + elem + offset].iov_len = ((ctype->plane_size
 528                                              * ctype->plane_cnt + 128)
 529                                             * sizeof (u_int32_t));
 530
 531           if (elem + 1 < nelems)
 532             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 533         }
 534     }
 535
 536   assert (2 + elem + offset == (nelems + ctype->nr_charclass
 537                                 + ctype->map_collection_nr + 2));
 538
 539   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
 540 }
 541
 542
 543 /* Character class handling.  */
 544 void
 545 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
 546                  enum token_t tok, struct token *code,
 547                  struct charset_t *charset)
 548 {
 549   ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
 550                     code->val.str.start);
 551 }
 552
 553
 554 int
 555 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
 556                     const char *name)
 557 {
 558   size_t cnt;
 559
 560   for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
 561     if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
 562         == 0)
 563       return 1;
 564
 565   return 0;
 566 }
 567
 568
 569 void
 570 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
 571                    enum token_t tok, const char *str,
 572                    struct charset_t *charset)
 573 {
 574   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 575   size_t cnt;
 576
 577   switch (tok)
 578     {
 579     case tok_upper:
 580       str = "upper";
 581       break;
 582     case tok_lower:
 583       str = "lower";
 584       break;
 585     case tok_alpha:
 586       str = "alpha";
 587       break;
 588     case tok_digit:
 589       str = "digit";
 590       break;
 591     case tok_xdigit:
 592       str = "xdigit";
 593       break;
 594     case tok_space:
 595       str = "space";
 596       break;
 597     case tok_print:
 598       str = "print";
 599       break;
 600     case tok_graph:
 601       str = "graph";
 602       break;
 603     case tok_blank:
 604       str = "blank";
 605       break;
 606     case tok_cntrl:
 607       str = "cntrl";
 608       break;
 609     case tok_punct:
 610       str = "punct";
 611       break;
 612     case tok_alnum:
 613       str = "alnum";
 614       break;
 615     case tok_ident:
 616       break;
 617     default:
 618       assert (! "illegal token as class name: should not happen");
 619     }
 620
 621   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 622     if (strcmp (str, ctype->classnames[cnt]) == 0)
 623       break;
 624
 625   if (cnt >= ctype->nr_charclass)
 626     assert (! "unknown class in class definition: should not happen");
 627
 628   ctype->class_done |= BIT (tok);
 629
 630   ctype->current_class_mask = 1 << cnt;
 631   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 632 }
 633
 634
 635 void
 636 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
 637                   struct token *code, struct charset_t *charset)
 638 {
 639   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 640   unsigned int value;
 641
 642   value = charset_find_value (&charset->char_table, code->val.str.start,
 643                               code->val.str.len);
 644
 645   ctype->last_class_char = value;
 646
 647   if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
 648     /* In the LC_CTYPE category it is no error when a character is
 649        not found.  This has to be ignored silently.  */
 650     return;
 651
 652   *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
 653              &ctype->class_collection_act, value)
 654     |= ctype->current_class_mask;
 655 }
 656
 657
 658 void
 659 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
 660                 struct token *code, struct charset_t *charset)
 661 {
 662   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 663   unsigned int value, cnt;
 664
 665   value = charset_find_value (&charset->char_table, code->val.str.start,
 666                               code->val.str.len);
 667
 668   /* In the LC_CTYPE category it is no error when a character is
 669      not found.  This has to be ignored silently.  */
 670   if ((wchar_t) ctype->last_class_char != ILLEGAL_CHAR_VALUE
 671       && (wchar_t) value != ILLEGAL_CHAR_VALUE)
 672     for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
 673       *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
 674                  &ctype->class_collection_act, cnt)
 675         |= ctype->current_class_mask;
 676
 677   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 678 }
 679
 680
 681 void
 682 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
 683 {
 684   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 685
 686   /* We have no special actions to perform here.  */
 687   ctype->current_class_mask = 0;
 688   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 689 }
 690
 691
 692 /* Character map handling.  */
 693 void
 694 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
 695                enum token_t tok, struct token *code,
 696                struct charset_t *charset)
 697 {
 698   ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
 699                   code->val.str.start, charset);
 700 }
 701
 702
 703 int
 704 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
 705                    const char *name)
 706 {
 707   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 708   size_t cnt;
 709
 710   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 711     if (strcmp (name, ctype->mapnames[cnt]) == 0)
 712       return 1;
 713
 714   return 0;
 715 }
 716
 717
 718 void
 719 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
 720                  enum token_t tok, const char *name, struct charset_t *charset)
 721 {
 722   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 723   size_t cnt;
 724
 725   switch (tok)
 726     {
 727     case tok_toupper:
 728       ctype->toupper_done = 1;
 729       name = "toupper";
 730       break;
 731     case tok_tolower:
 732       ctype->tolower_done = 1;
 733       name = "tolower";
 734       break;
 735     case tok_ident:
 736       break;
 737     default:
 738       assert (! "unknown token in category `LC_CTYPE' should not happen");
 739     }
 740
 741   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 742     if (strcmp (name, ctype->mapnames[cnt]) == 0)
 743       break;
 744
 745   if (cnt == ctype->map_collection_nr)
 746     assert (! "unknown token in category `LC_CTYPE' should not happen");
 747
 748   ctype->last_map_idx = cnt;
 749   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 750 }
 751
 752
 753 void
 754 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
 755                 struct token *code, struct charset_t *charset)
 756 {
 757   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 758   unsigned int value;
 759
 760   value = charset_find_value (&charset->char_table, code->val.str.start,
 761                               code->val.str.len);
 762
 763   if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
 764     /* In the LC_CTYPE category it is no error when a character is
 765        not found.  This has to be ignored silently.  */
 766     return;
 767
 768   assert (ctype->last_map_idx < ctype->map_collection_nr);
 769
 770   ctype->from_map_char = value;
 771 }
 772
 773
 774 void
 775 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
 776               struct token *code, struct charset_t *charset)
 777 {
 778   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 779   unsigned int value;
 780
 781   value = charset_find_value (&charset->char_table, code->val.str.start,
 782                               code->val.str.len);
 783
 784   if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
 785       || (wchar_t) value == ILLEGAL_CHAR_VALUE)
 786     {
 787       /* In the LC_CTYPE category it is no error when a character is
 788          not found.  This has to be ignored silently.  */
 789       ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 790       return;
 791     }
 792
 793   *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
 794              &ctype->map_collection_max[ctype->last_map_idx],
 795              &ctype->map_collection_act[ctype->last_map_idx],
 796              ctype->from_map_char) = value;
 797
 798   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 799 }
 800
 801
 802 void
 803 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
 804 {
 805   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 806
 807   ctype->last_map_idx = MAX_NR_CHARMAP;
 808   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 809 }
 810
 811
 812 /* Local functions.  */
 813 static void
 814 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
 815                   const char *name)
 816 {
 817   size_t cnt;
 818
 819   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 820     if (strcmp (ctype->classnames[cnt], name) == 0)
 821       break;
 822
 823   if (cnt < ctype->nr_charclass)
 824     {
 825       lr_error (lr, _("character class `%s' already defined"), name);
 826       return;
 827     }
 828
 829   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
 830     /* Exit code 2 is prescribed in P1003.2b.  */
 831     error (2, 0, _("\
 832 implementation limit: no more than %d character classes allowed"),
 833            MAX_NR_CHARCLASS);
 834
 835   ctype->classnames[ctype->nr_charclass++] = name;
 836 }
 837
 838
 839 static void
 840 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
 841                 const char *name, struct charset_t *charset)
 842 {
 843   size_t max_chars = 0;
 844   size_t cnt;
 845
 846   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 847     {
 848       if (strcmp (ctype->mapnames[cnt], name) == 0)
 849         break;
 850
 851       if (max_chars < ctype->map_collection_max[cnt])
 852         max_chars = ctype->map_collection_max[cnt];
 853     }
 854
 855   if (cnt < ctype->map_collection_nr)
 856     {
 857       lr_error (lr, _("character map `%s' already defined"), name);
 858       return;
 859     }
 860
 861   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
 862     /* Exit code 2 is prescribed in P1003.2b.  */
 863     error (2, 0, _("\
 864 implementation limit: no more than %d character maps allowed"),
 865            MAX_NR_CHARMAP);
 866
 867   ctype->mapnames[cnt] = name;
 868
 869   if (max_chars == 0)
 870     ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
 871   else
 872     ctype->map_collection_max[cnt] = max_chars;
 873
 874   ctype->map_collection[cnt] = (u_int32_t *)
 875     xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
 876   memset (ctype->map_collection[cnt], '\0',
 877           sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
 878   ctype->map_collection_act[cnt] = 256;
 879
 880   ++ctype->map_collection_nr;
 881 }
 882
 883
 884 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
 885    is possible if we only want to extend the name array.  */
 886 static u_int32_t *
 887 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
 888           size_t *act, unsigned int idx)
 889 {
 890   size_t cnt;
 891
 892   if (idx < 256)
 893     return table == NULL ? NULL : &(*table)[idx];
 894
 895   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
 896     if (ctype->charnames[cnt] == idx)
 897       break;
 898
 899   /* We have to distinguish two cases: the name is found or not.  */
 900   if (cnt == ctype->charnames_act)
 901     {
 902       /* Extend the name array.  */
 903       if (ctype->charnames_act == ctype->charnames_max)
 904         {
 905           ctype->charnames_max *= 2;
 906           ctype->charnames = (unsigned int *)
 907             xrealloc (ctype->charnames,
 908                       sizeof (unsigned int) * ctype->charnames_max);
 909         }
 910       ctype->charnames[ctype->charnames_act++] = idx;
 911     }
 912
 913   if (table == NULL)
 914     /* We have done everything we are asked to do.  */
 915     return NULL;
 916
 917   if (cnt >= *act)
 918     {
 919       if (cnt >= *max)
 920         {
 921           size_t old_max = *max;
 922           do
 923             *max *= 2;
 924           while (*max <= cnt);
 925
 926           *table =
 927             (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
 928           memset (&(*table)[old_max], '\0',
 929                   (*max - old_max) * sizeof (u_int32_t));
 930         }
 931
 932       (*table)[cnt] = 0;
 933       *act = cnt;
 934     }
 935
 936   return &(*table)[cnt];
 937 }
 938
 939
 940 static void
 941 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
 942 {
 943   /* These function defines the default values for the classes and conversions
 944      according to POSIX.2 2.5.2.1.
 945      It may seem that the order of these if-blocks is arbitrary but it is NOT.
 946      Don't move them unless you know what you do!  */
 947
 948   void set_default (int bit, int from, int to)
 949     {
 950       char tmp[2];
 951       int ch;
 952       /* Define string.  */
 953       strcpy (tmp, "?");
 954
 955       for (ch = from; ch <= to; ++ch)
 956         {
 957           unsigned int value;
 958           tmp[0] = ch;
 959
 960           value = charset_find_value (&charset->char_table, tmp, 1);
 961           if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
 962             {
 963               if (!be_quiet)
 964                 error (0, 0, _("\
 965 character `%s' not defined while needed as default value"),
 966                        tmp);
 967               continue;
 968             }
 969           else
 970             ELEM (ctype, class_collection, , value) |= bit;
 971         }
 972     }
 973
 974   /* Set default values if keyword was not present.  */
 975   if ((ctype->class_done & BIT (tok_upper)) == 0)
 976     /* "If this keyword [lower] is not specified, the lowercase letters
 977         `A' through `Z', ..., shall automatically belong to this class,
 978         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
 979     set_default (BIT (tok_upper), 'A', 'Z');
 980
 981   if ((ctype->class_done & BIT (tok_lower)) == 0)
 982     /* "If this keyword [lower] is not specified, the lowercase letters
 983         `a' through `z', ..., shall automatically belong to this class,
 984         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
 985     set_default (BIT (tok_lower), 'a', 'z');
 986
 987   if ((ctype->class_done & BIT (tok_alpha)) == 0)
 988     {
 989       /* Table 2-6 in P1003.2 says that characters in class `upper' or
 990          class `lower' *must* be in class `alpha'.  */
 991       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
 992       size_t cnt;
 993
 994       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 995         if ((ctype->class_collection[cnt] & mask) != 0)
 996           ctype->class_collection[cnt] |= BIT (tok_alpha);
 997     }
 998
 999   if ((ctype->class_done & BIT (tok_digit)) == 0)
1000     /* "If this keyword [digit] is not specified, the digits `0' through
1001         `9', ..., shall automatically belong to this class, with
1002         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
1003     set_default (BIT (tok_digit), '0', '9');
1004
1005   /* "Only characters specified for the `alpha' and `digit' keyword
1006      shall be specified.  Characters specified for the keyword `alpha'
1007      and `digit' are automatically included in this class.  */
1008   {
1009     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
1010     size_t cnt;
1011
1012     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1013       if ((ctype->class_collection[cnt] & mask) != 0)
1014         ctype->class_collection[cnt] |= BIT (tok_alnum);
1015   }
1016
1017   if ((ctype->class_done & BIT (tok_space)) == 0)
1018     /* "If this keyword [space] is not specified, the characters <space>,
1019         <form-feed>, <newline>, <carriage-return>, <tab>, and
1020         <vertical-tab>, ..., shall automatically belong to this class,
1021         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
1022     {
1023       unsigned int value;
1024
1025       value = charset_find_value (&charset->char_table, "space", 5);
1026       if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1027         {
1028           if (!be_quiet)
1029             error (0, 0, _("\
1030 character `%s' not defined while needed as default value"),
1031                    "<space>");
1032         }
1033       else
1034         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1035
1036       value = charset_find_value (&charset->char_table, "form-feed", 9);
1037       if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1038         {
1039           if (!be_quiet)
1040             error (0, 0, _("\
1041 character `%s' not defined while needed as default value"),
1042                    "<form-feed>");
1043         }
1044       else
1045         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1046
1047       value = charset_find_value (&charset->char_table, "newline", 7);
1048       if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1049         {
1050           if (!be_quiet)
1051             error (0, 0, _("\
1052 character `%s' not defined while needed as default value"),
1053                    "<newline>");
1054         }
1055       else
1056         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1057
1058       value = charset_find_value (&charset->char_table, "carriage-return", 15);
1059       if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1060         {
1061           if (!be_quiet)
1062             error (0, 0, _("\
1063 character `%s' not defined while needed as default value"),
1064                    "<carriage-return>");
1065         }
1066       else
1067         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1068
1069       value = charset_find_value (&charset->char_table, "tab", 3);
1070       if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1071         {
1072           if (!be_quiet)
1073             error (0, 0, _("\
1074 character `%s' not defined while needed as default value"),
1075                    "<tab>");
1076         }
1077       else
1078         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1079
1080       value = charset_find_value (&charset->char_table, "vertical-tab", 12);
1081       if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1082         {
1083           if (!be_quiet)
1084             error (0, 0, _("\
1085 character `%s' not defined while needed as default value"),
1086                    "<vertical-tab>");
1087         }
1088       else
1089         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1090     }
1091
1092   if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1093     /* "If this keyword is not specified, the digits `0' to `9', the
1094         uppercase letters `A' through `F', and the lowercase letters `a'
1095         through `f', ..., shell automatically belong to this class, with
1096         implementation defined character values."  [P1003.2, 2.5.2.1]  */
1097     {
1098       set_default (BIT (tok_xdigit), '0', '9');
1099       set_default (BIT (tok_xdigit), 'A', 'F');
1100       set_default (BIT (tok_xdigit), 'a', 'f');
1101     }
1102
1103   if ((ctype->class_done & BIT (tok_blank)) == 0)
1104     /* "If this keyword [blank] is unspecified, the characters <space> and
1105        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
1106    {
1107       unsigned int value;
1108
1109       value = charset_find_value (&charset->char_table, "space", 5);
1110       if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1111         {
1112           if (!be_quiet)
1113             error (0, 0, _("\
1114 character `%s' not defined while needed as default value"),
1115                    "<space>");
1116         }
1117       else
1118         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1119
1120       value = charset_find_value (&charset->char_table, "tab", 3);
1121       if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1122         {
1123           if (!be_quiet)
1124             error (0, 0, _("\
1125 character `%s' not defined while needed as default value"),
1126                    "<tab>");
1127         }
1128       else
1129         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1130     }
1131
1132   if ((ctype->class_done & BIT (tok_graph)) == 0)
1133     /* "If this keyword [graph] is not specified, characters specified for
1134         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1135         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
1136     {
1137       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1138         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1139       size_t cnt;
1140
1141       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1142         if ((ctype->class_collection[cnt] & mask) != 0)
1143           ctype->class_collection[cnt] |= BIT (tok_graph);
1144     }
1145
1146   if ((ctype->class_done & BIT (tok_print)) == 0)
1147     /* "If this keyword [print] is not provided, characters specified for
1148         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1149         and the <space> character shall belong to this character class."
1150         [P1003.2, 2.5.2.1]  */
1151     {
1152       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1153         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1154       size_t cnt;
1155       wchar_t space;
1156
1157       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1158         if ((ctype->class_collection[cnt] & mask) != 0)
1159           ctype->class_collection[cnt] |= BIT (tok_print);
1160
1161       space = charset_find_value (&charset->char_table, "space", 5);
1162       if (space == ILLEGAL_CHAR_VALUE)
1163         {
1164           if (!be_quiet)
1165             error (0, 0, _("\
1166 character `%s' not defined while needed as default value"),
1167                    "<space>");
1168         }
1169       else
1170         ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1171     }
1172
1173   if (ctype->toupper_done == 0)
1174     /* "If this keyword [toupper] is not specified, the lowercase letters
1175         `a' through `z', and their corresponding uppercase letters `A' to
1176         `Z', ..., shall automatically be included, with implementation-
1177         defined character values."  [P1003.2, 2.5.2.1]  */
1178     {
1179       char tmp[4];
1180       int ch;
1181
1182       strcpy (tmp, "<?>");
1183
1184       for (ch = 'a'; ch <= 'z'; ++ch)
1185         {
1186           unsigned int value_from, value_to;
1187
1188           tmp[1] = (char) ch;
1189
1190           value_from = charset_find_value (&charset->char_table, &tmp[1], 1);
1191           if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE)
1192             {
1193               if (!be_quiet)
1194                 error (0, 0, _("\
1195 character `%s' not defined while needed as default value"),
1196                        tmp);
1197               continue;
1198             }
1199
1200           /* This conversion is implementation defined.  */
1201           tmp[1] = (char) (ch + ('A' - 'a'));
1202           value_to = charset_find_value (&charset->char_table, &tmp[1], 1);
1203           if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE)
1204             {
1205               if (!be_quiet)
1206                 error (0, 0, _("\
1207 character `%s' not defined while needed as default value"),
1208                        tmp);
1209               continue;
1210             }
1211
1212           /* The index [0] is determined by the order of the
1213              `ctype_map_newP' calls in `ctype_startup'.  */
1214           ELEM (ctype, map_collection, [0], value_from) = value_to;
1215         }
1216     }
1217
1218   if (ctype->tolower_done == 0)
1219     /* "If this keyword [tolower] is not specified, the mapping shall be
1220        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
1221     {
1222       size_t cnt;
1223
1224       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1225         if (ctype->map_collection[0][cnt] != 0)
1226           ELEM (ctype, map_collection, [1],
1227                 ctype->map_collection[0][cnt])
1228             = ctype->charnames[cnt];
1229     }
1230 }
1231
1232
1233 static void
1234 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1235 {
1236   size_t idx;
1237
1238   /* First we have to decide how we organize the arrays.  It is easy
1239      for a one-byte character set.  But multi-byte character set
1240      cannot be stored flat because the chars might be sparsely used.
1241      So we determine an optimal hashing function for the used
1242      characters.
1243
1244      We use a very trivial hashing function to store the sparse
1245      table.  CH % TABSIZE is used as an index.  To solve multiple hits
1246      we have N planes.  This guarantees a fixed search time for a
1247      character [N / 2].  In the following code we determine the minmum
1248      value for TABSIZE * N, where TABSIZE >= 256.  */
1249   size_t min_total = UINT_MAX;
1250   size_t act_size = 256;
1251
1252   if (!be_quiet)
1253     fputs (_("\
1254 Computing table size for character classes might take a while..."),
1255            stderr);
1256
1257   while (act_size < min_total)
1258     {
1259       size_t cnt[act_size];
1260       size_t act_planes = 1;
1261
1262       memset (cnt, '\0', sizeof cnt);
1263
1264       for (idx = 0; idx < 256; ++idx)
1265         cnt[idx] = 1;
1266
1267       for (idx = 0; idx < ctype->charnames_act; ++idx)
1268         if (ctype->charnames[idx] >= 256)
1269           {
1270             size_t nr = ctype->charnames[idx] % act_size;
1271
1272             if (++cnt[nr] > act_planes)
1273               {
1274                 act_planes = cnt[nr];
1275                 if (act_size * act_planes >= min_total)
1276                   break;
1277               }
1278           }
1279
1280       if (act_size * act_planes < min_total)
1281         {
1282           min_total = act_size * act_planes;
1283           ctype->plane_size = act_size;
1284           ctype->plane_cnt = act_planes;
1285         }
1286
1287       ++act_size;
1288     }
1289
1290   if (!be_quiet)
1291     fputs (_(" done\n"), stderr);
1292
1293
1294 #if __BYTE_ORDER == __LITTLE_ENDIAN
1295 # define NAMES_B1 ctype->names_el
1296 # define NAMES_B2 ctype->names_eb
1297 #else
1298 # define NAMES_B1 ctype->names_eb
1299 # define NAMES_B2 ctype->names_el
1300 #endif
1301
1302   ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1303                                            * ctype->plane_cnt,
1304                                            sizeof (u_int32_t));
1305   ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1306                                            * ctype->plane_cnt,
1307                                            sizeof (u_int32_t));
1308
1309   for (idx = 1; idx < 256; ++idx)
1310     NAMES_B1[idx] = idx;
1311
1312   /* Trick: change the 0th entry's name to 1 to mark the cell occupied.  */
1313   NAMES_B1[0] = 1;
1314
1315   for (idx = 256; idx < ctype->charnames_act; ++idx)
1316     {
1317       size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1318       size_t depth = 0;
1319
1320       while (NAMES_B1[nr + depth * ctype->plane_size])
1321         ++depth;
1322       assert (depth < ctype->plane_cnt);
1323
1324       NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1325
1326       /* Now for faster access remember the index in the NAMES_B array.  */
1327       ctype->charnames[idx] = nr + depth * ctype->plane_size;
1328     }
1329   NAMES_B1[0] = 0;
1330
1331   for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1332     NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1333
1334
1335   /* You wonder about this amount of memory?  This is only because some
1336      users do not manage to address the array with unsigned values or
1337      data types with range >= 256.  '\200' would result in the array
1338      index -128.  To help these poor people we duplicate the entries for
1339      128 up to 255 below the entry for \0.  */
1340   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1341                                              sizeof (char_class_t));
1342   ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1343                                                  * ctype->plane_cnt,
1344                                                  sizeof (char_class32_t));
1345
1346   /* Fill in the character class information.  */
1347 #if __BYTE_ORDER == __LITTLE_ENDIAN
1348 # define TRANS(w) CHAR_CLASS_TRANS (w)
1349 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1350 #else
1351 # define TRANS(w) (w)
1352 # define TRANS32(w) (w)
1353 #endif
1354
1355   for (idx = 0; idx < ctype->class_collection_act; ++idx)
1356     if (ctype->charnames[idx] < 256)
1357       ctype->ctype_b[128 + ctype->charnames[idx]]
1358         = TRANS (ctype->class_collection[idx]);
1359
1360   /* Mirror first 127 entries.  We must take care that entry -1 is not
1361      mirrored because EOF == -1.  */
1362   for (idx = 0; idx < 127; ++idx)
1363     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1364
1365   /* The 32 bit array contains all characters.  */
1366   for (idx = 0; idx < ctype->class_collection_act; ++idx)
1367     ctype->ctype32_b[ctype->charnames[idx]]
1368       = TRANS32 (ctype->class_collection[idx]);
1369
1370   /* Room for table of mappings.  */
1371   ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1372                                           * sizeof (u_int32_t *));
1373   ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1374                                           * sizeof (u_int32_t *));
1375
1376   /* Fill in all mappings.  */
1377   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1378     {
1379       unsigned int idx2;
1380
1381       /* Allocate table.  */
1382       ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1383                                                    * ctype->plane_cnt + 128)
1384                                                   * sizeof (u_int32_t));
1385       ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1386                                                    * ctype->plane_cnt + 128)
1387                                                   * sizeof (u_int32_t));
1388
1389 #if __BYTE_ORDER == __LITTLE_ENDIAN
1390 # define MAP_B1 ctype->map_el
1391 # define MAP_B2 ctype->map_eb
1392 #else
1393 # define MAP_B1 ctype->map_eb
1394 # define MAP_B2 ctype->map_el
1395 #endif
1396
1397       /* Copy default value (identity mapping).  */
1398       memcpy (&MAP_B1[idx][128], NAMES_B1,
1399               ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1400
1401       /* Copy values from collection.  */
1402       for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1403         if (ctype->map_collection[idx][idx2] != 0)
1404           MAP_B1[idx][128 + ctype->charnames[idx2]] =
1405             ctype->map_collection[idx][idx2];
1406
1407       /* Mirror first 127 entries.  We must take care not to map entry
1408          -1 because EOF == -1.  */
1409       for (idx2 = 0; idx2 < 127; ++idx2)
1410         MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1411
1412       /* EOF must map to EOF.  */
1413       MAP_B1[idx][127] = EOF;
1414
1415       /* And now the other byte order.  */
1416       for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1417         MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1418     }
1419
1420   /* Extra array for class and map names.  */
1421   ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1422                                                  * sizeof (u_int32_t));
1423   ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1424                                                * sizeof (u_int32_t));
1425
1426   /* Array for width information.  Because the expected width are very
1427      small we use only one single byte.  This save space and we need
1428      not provide the information twice with both endianesses.  */
1429   ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1430                                             * ctype->plane_cnt);
1431   /* Initialize with default width value.  */
1432   memset (ctype->width, charset->width_default,
1433           ctype->plane_size * ctype->plane_cnt);
1434   if (charset->width_rules != NULL)
1435     {
1436       size_t cnt;
1437
1438       for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1439         if (charset->width_rules[cnt].width != charset->width_default)
1440           for (idx = charset->width_rules[cnt].from;
1441                idx <= charset->width_rules[cnt].to; ++idx)
1442             {
1443               size_t nr = idx % ctype->plane_size;
1444               size_t depth = 0;
1445
1446               while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1447                 ++depth;
1448               assert (depth < ctype->plane_cnt);
1449
1450               ctype->width[nr + depth * ctype->plane_size]
1451                 = charset->width_rules[cnt].width;
1452             }
1453     }
1454
1455   /* Compute MB_CUR_MAX.  */
1456   ctype->mb_cur_max = charset->mb_cur_max;
1457
1458   /* We need the name of the currently used 8-bit character set to
1459      make correct conversion between this 8-bit representation and the
1460      ISO 10646 character set used internally for wide characters.  */
1461   ctype->codeset_name = charset->code_set_name ? : "";
1462 }