locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <byteswap.h>
  26 #include <endian.h>
  27 #include <errno.h>
  28 #include <limits.h>
  29 #include <obstack.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <wchar.h>
  33 #include <wctype.h>
  34 #include <sys/uio.h>
  35
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42 #include "localedef.h"
  43
  44 #include <assert.h>
  45
  46
  47 #ifdef PREDEFINED_CLASSES
  48 /* These are the extra bits not in wctype.h since these are not preallocated
  49    classes.  */
  50 # define _ISwspecial1   (1 << 29)
  51 # define _ISwspecial2   (1 << 30)
  52 # define _ISwspecial3   (1 << 31)
  53 #endif
  54
  55
  56 /* The bit used for representing a special class.  */
  57 #define BITPOS(class) ((class) - tok_upper)
  58 #define BIT(class) (_ISbit (BITPOS (class)))
  59 #define BITw(class) (_ISwbit (BITPOS (class)))
  60
  61 #define ELEM(ctype, collection, idx, value)                                   \
  62   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  63              &ctype->collection##_act idx, value)
  64
  65
  66 /* To be compatible with former implementations we for now restrict
  67    the number of bits for character classes to 16.  When compatibility
  68    is not necessary anymore increase the number to 32.  */
  69 #define char_class_t uint16_t
  70 #define char_class32_t uint32_t
  71
  72
  73 /* Type to describe a transliteration action.  We have a possibly
  74    multiple character from-string and a set of multiple character
  75    to-strings.  All are 32bit values since this is what is used in
  76    the gconv functions.  */
  77 struct translit_to_t
  78 {
  79   uint32_t *str;
  80
  81   struct translit_to_t *next;
  82 };
  83
  84 struct translit_t
  85 {
  86   uint32_t *from;
  87
  88   const char *fname;
  89   size_t lineno;
  90
  91   struct translit_to_t *to;
  92
  93   struct translit_t *next;
  94 };
  95
  96 struct translit_ignore_t
  97 {
  98   uint32_t from;
  99   uint32_t to;
 100   uint32_t step;
 101
 102   const char *fname;
 103   size_t lineno;
 104
 105   struct translit_ignore_t *next;
 106 };
 107
 108
 109 /* The real definition of the struct for the LC_CTYPE locale.  */
 110 struct locale_ctype_t
 111 {
 112   uint32_t *charnames;
 113   size_t charnames_max;
 114   size_t charnames_act;
 115
 116   struct repertoire_t *repertoire;
 117
 118   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 119 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 120   size_t nr_charclass;
 121   const char *classnames[MAX_NR_CHARCLASS];
 122   uint32_t last_class_char;
 123   uint32_t class256_collection[256];
 124   uint32_t *class_collection;
 125   size_t class_collection_max;
 126   size_t class_collection_act;
 127   uint32_t class_done;
 128
 129   struct charseq **mbdigits;
 130   size_t mbdigits_act;
 131   size_t mbdigits_max;
 132   uint32_t *wcdigits;
 133   size_t wcdigits_act;
 134   size_t wcdigits_max;
 135
 136   struct charseq *mboutdigits[10];
 137   uint32_t wcoutdigits[10];
 138   size_t outdigits_act;
 139
 140   /* If the following number ever turns out to be too small simply
 141      increase it.  But I doubt it will.  --drepper@gnu */
 142 #define MAX_NR_CHARMAP 16
 143   const char *mapnames[MAX_NR_CHARMAP];
 144   uint32_t *map_collection[MAX_NR_CHARMAP];
 145   uint32_t map256_collection[2][256];
 146   size_t map_collection_max[MAX_NR_CHARMAP];
 147   size_t map_collection_act[MAX_NR_CHARMAP];
 148   size_t map_collection_nr;
 149   size_t last_map_idx;
 150   int tomap_done[MAX_NR_CHARMAP];
 151
 152   /* Transliteration information.  */
 153   const char *translit_copy_locale;
 154   const char *translit_copy_repertoire;
 155   struct translit_t *translit;
 156   struct translit_ignore_t *translit_ignore;
 157
 158   uint32_t *default_missing;
 159   const char *default_missing_file;
 160   size_t default_missing_lineno;
 161
 162   /* The arrays for the binary representation.  */
 163   uint32_t plane_size;
 164   uint32_t plane_cnt;
 165   char_class_t *ctype_b;
 166   char_class32_t *ctype32_b;
 167   uint32_t *names;
 168   uint32_t **map;
 169   uint32_t **map32;
 170   uint32_t *class_name_ptr;
 171   uint32_t *map_name_ptr;
 172   unsigned char *width;
 173   uint32_t mb_cur_max;
 174   const char *codeset_name;
 175   uint32_t translit_hash_size;
 176   uint32_t translit_hash_layers;
 177   uint32_t *translit_from_idx;
 178   uint32_t *translit_from_tbl;
 179   uint32_t *translit_to_idx;
 180   uint32_t *translit_to_tbl;
 181   size_t translit_idx_size;
 182   size_t translit_from_tbl_size;
 183   size_t translit_to_tbl_size;
 184
 185   struct obstack mempool;
 186 };
 187
 188
 189 #define obstack_chunk_alloc xmalloc
 190 #define obstack_chunk_free free
 191
 192
 193 /* Prototypes for local functions.  */
 194 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 195                            struct charmap_t *charmap, int ignore_content);
 196 static void ctype_class_new (struct linereader *lr,
 197                              struct locale_ctype_t *ctype, const char *name);
 198 static void ctype_map_new (struct linereader *lr,
 199                            struct locale_ctype_t *ctype,
 200                            const char *name, struct charmap_t *charmap);
 201 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 202                            size_t *max, size_t *act, unsigned int idx);
 203 static void set_class_defaults (struct locale_ctype_t *ctype,
 204                                 struct charmap_t *charmap,
 205                                 struct repertoire_t *repertoire);
 206 static void allocate_arrays (struct locale_ctype_t *ctype,
 207                              struct charmap_t *charmap,
 208                              struct repertoire_t *repertoire);
 209
 210
 211 static const char *longnames[] =
 212 {
 213   "zero", "one", "two", "three", "four",
 214   "five", "six", "seven", "eight", "nine"
 215 };
 216 static const unsigned char digits[] = "0123456789";
 217
 218
 219 static void
 220 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 221                struct charmap_t *charmap, int ignore_content)
 222 {
 223   unsigned int cnt;
 224   struct locale_ctype_t *ctype;
 225
 226   if (!ignore_content)
 227     {
 228       /* Allocate the needed room.  */
 229       locale->categories[LC_CTYPE].ctype = ctype =
 230         (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
 231
 232       /* We have seen no names yet.  */
 233       ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 234       ctype->charnames =
 235         (unsigned int *) xmalloc (ctype->charnames_max
 236                                   * sizeof (unsigned int));
 237       for (cnt = 0; cnt < 256; ++cnt)
 238         ctype->charnames[cnt] = cnt;
 239       ctype->charnames_act = 256;
 240
 241       /* Fill character class information.  */
 242       ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 243       /* The order of the following instructions determines the bit
 244          positions!  */
 245       ctype_class_new (lr, ctype, "upper");
 246       ctype_class_new (lr, ctype, "lower");
 247       ctype_class_new (lr, ctype, "alpha");
 248       ctype_class_new (lr, ctype, "digit");
 249       ctype_class_new (lr, ctype, "xdigit");
 250       ctype_class_new (lr, ctype, "space");
 251       ctype_class_new (lr, ctype, "print");
 252       ctype_class_new (lr, ctype, "graph");
 253       ctype_class_new (lr, ctype, "blank");
 254       ctype_class_new (lr, ctype, "cntrl");
 255       ctype_class_new (lr, ctype, "punct");
 256       ctype_class_new (lr, ctype, "alnum");
 257 #ifdef PREDEFINED_CLASSES
 258       /* The following are extensions from ISO 14652.  */
 259       ctype_class_new (lr, ctype, "left_to_right");
 260       ctype_class_new (lr, ctype, "right_to_left");
 261       ctype_class_new (lr, ctype, "num_terminator");
 262       ctype_class_new (lr, ctype, "num_separator");
 263       ctype_class_new (lr, ctype, "segment_separator");
 264       ctype_class_new (lr, ctype, "block_separator");
 265       ctype_class_new (lr, ctype, "direction_control");
 266       ctype_class_new (lr, ctype, "sym_swap_layout");
 267       ctype_class_new (lr, ctype, "char_shape_selector");
 268       ctype_class_new (lr, ctype, "num_shape_selector");
 269       ctype_class_new (lr, ctype, "non_spacing");
 270       ctype_class_new (lr, ctype, "non_spacing_level3");
 271       ctype_class_new (lr, ctype, "normal_connect");
 272       ctype_class_new (lr, ctype, "r_connect");
 273       ctype_class_new (lr, ctype, "no_connect");
 274       ctype_class_new (lr, ctype, "no_connect-space");
 275       ctype_class_new (lr, ctype, "vowel_connect");
 276 #endif
 277
 278       ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 279       ctype->class_collection
 280         = (uint32_t *) xcalloc (sizeof (unsigned long int),
 281                                 ctype->class_collection_max);
 282       ctype->class_collection_act = 256;
 283
 284       /* Fill character map information.  */
 285       ctype->last_map_idx = MAX_NR_CHARMAP;
 286       ctype_map_new (lr, ctype, "toupper", charmap);
 287       ctype_map_new (lr, ctype, "tolower", charmap);
 288 #ifdef PREDEFINED_CLASSES
 289       ctype_map_new (lr, ctype, "tosymmetric", charmap);
 290 #endif
 291
 292       /* Fill first 256 entries in `toXXX' arrays.  */
 293       for (cnt = 0; cnt < 256; ++cnt)
 294         {
 295           ctype->map_collection[0][cnt] = cnt;
 296           ctype->map_collection[1][cnt] = cnt;
 297 #ifdef PREDEFINED_CLASSES
 298           ctype->map_collection[2][cnt] = cnt;
 299 #endif
 300           ctype->map256_collection[0][cnt] = cnt;
 301           ctype->map256_collection[1][cnt] = cnt;
 302         }
 303
 304       obstack_init (&ctype->mempool);
 305     }
 306 }
 307
 308
 309 void
 310 ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
 311 {
 312   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 313 #define NCLASS 12
 314   static const struct
 315   {
 316     const char *name;
 317     const char allow[NCLASS];
 318   }
 319   valid_table[NCLASS] =
 320   {
 321     /* The order is important.  See token.h for more information.
 322        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 323     { "upper",  "--MX-XDDXXX-" },
 324     { "lower",  "--MX-XDDXXX-" },
 325     { "alpha",  "---X-XDDXXX-" },
 326     { "digit",  "XXX--XDDXXX-" },
 327     { "xdigit", "-----XDDXXX-" },
 328     { "space",  "XXXXX------X" },
 329     { "print",  "---------X--" },
 330     { "graph",  "---------X--" },
 331     { "blank",  "XXXXXM-----X" },
 332     { "cntrl",  "XXXXX-XX--XX" },
 333     { "punct",  "XXXXX-DD-X-X" },
 334     { "alnum",  "-----XDDXXX-" }
 335   };
 336   size_t cnt;
 337   int cls1, cls2;
 338   uint32_t space_value;
 339   struct charseq *space_seq;
 340   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 341   int warned;
 342
 343   /* Now resolve copying and also handle completely missing definitions.  */
 344   if (ctype == NULL)
 345     {
 346       const char *repertoire_name;
 347
 348       /* First see whether we were supposed to copy.  If yes, find the
 349          actual definition.  */
 350       if (locale->copy_name[LC_CTYPE] != NULL)
 351         {
 352           /* Find the copying locale.  This has to happen transitively since
 353              the locale we are copying from might also copying another one.  */
 354           struct localedef_t *from = locale;
 355
 356           do
 357             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 358                                 from->repertoire_name, charmap);
 359           while (from->categories[LC_CTYPE].ctype == NULL
 360                  && from->copy_name[LC_CTYPE] != NULL);
 361
 362           ctype = locale->categories[LC_CTYPE].ctype
 363             = from->categories[LC_CTYPE].ctype;
 364         }
 365
 366       /* If there is still no definition issue an warning and create an
 367          empty one.  */
 368       if (ctype == NULL)
 369         {
 370           if (! be_quiet)
 371             error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
 372           ctype_startup (NULL, locale, charmap, 0);
 373           ctype = locale->categories[LC_CTYPE].ctype;
 374         }
 375
 376       /* Get the repertoire we have to use.  */
 377       repertoire_name = locale->repertoire_name ?: repertoire_global;
 378       if (repertoire_name != NULL)
 379         ctype->repertoire = repertoire_read (repertoire_name);
 380     }
 381
 382   /* We need the name of the currently used 8-bit character set to
 383      make correct conversion between this 8-bit representation and the
 384      ISO 10646 character set used internally for wide characters.  */
 385   ctype->codeset_name = charmap->code_set_name;
 386   if (ctype->codeset_name == NULL)
 387     {
 388       if (! be_quiet)
 389         error (0, 0, "no character set name specified in charmap");
 390       ctype->codeset_name = "//UNKNOWN//";
 391     }
 392
 393   /* Set default value for classes not specified.  */
 394   set_class_defaults (ctype, charmap, ctype->repertoire);
 395
 396   /* Check according to table.  */
 397   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 398     {
 399       uint32_t tmp = ctype->class_collection[cnt];
 400
 401       if (tmp != 0)
 402         {
 403           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 404             if ((tmp & _ISwbit (cls1)) != 0)
 405               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 406                 if (valid_table[cls1].allow[cls2] != '-')
 407                   {
 408                     int eq = (tmp & _ISwbit (cls2)) != 0;
 409                     switch (valid_table[cls1].allow[cls2])
 410                       {
 411                       case 'M':
 412                         if (!eq)
 413                           {
 414                             uint32_t value = ctype->charnames[cnt];
 415
 416                             if (!be_quiet)
 417                               error (0, 0, _("\
 418 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 419                                      value > 0xffff ? 8 : 4, value,
 420                                      valid_table[cls1].name,
 421                                      valid_table[cls2].name);
 422                           }
 423                         break;
 424
 425                       case 'X':
 426                         if (eq)
 427                           {
 428                             uint32_t value = ctype->charnames[cnt];
 429
 430                             if (!be_quiet)
 431                               error (0, 0, _("\
 432 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 433                                      value > 0xffff ? 8 : 4, value,
 434                                      valid_table[cls1].name,
 435                                      valid_table[cls2].name);
 436                           }
 437                         break;
 438
 439                       case 'D':
 440                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 441                         break;
 442
 443                       default:
 444                         error (5, 0, _("internal error in %s, line %u"),
 445                                __FUNCTION__, __LINE__);
 446                       }
 447                   }
 448         }
 449     }
 450
 451   for (cnt = 0; cnt < 256; ++cnt)
 452     {
 453       uint32_t tmp = ctype->class256_collection[cnt];
 454
 455       if (tmp != 0)
 456         {
 457           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 458             if ((tmp & _ISbit (cls1)) != 0)
 459               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 460                 if (valid_table[cls1].allow[cls2] != '-')
 461                   {
 462                     int eq = (tmp & _ISbit (cls2)) != 0;
 463                     switch (valid_table[cls1].allow[cls2])
 464                       {
 465                       case 'M':
 466                         if (!eq)
 467                           {
 468                             char buf[17];
 469
 470                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 471
 472                             if (!be_quiet)
 473                               error (0, 0, _("\
 474 character '%s' in class `%s' must be in class `%s'"),
 475                                      buf, valid_table[cls1].name,
 476                                      valid_table[cls2].name);
 477                           }
 478                         break;
 479
 480                       case 'X':
 481                         if (eq)
 482                           {
 483                             char buf[17];
 484
 485                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 486
 487                             if (!be_quiet)
 488                               error (0, 0, _("\
 489 character '%s' in class `%s' must not be in class `%s'"),
 490                                      buf, valid_table[cls1].name,
 491                                      valid_table[cls2].name);
 492                           }
 493                         break;
 494
 495                       case 'D':
 496                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 497                         break;
 498
 499                       default:
 500                         error (5, 0, _("internal error in %s, line %u"),
 501                                __FUNCTION__, __LINE__);
 502                       }
 503                   }
 504         }
 505     }
 506
 507   /* ... and now test <SP> as a special case.  */
 508   space_value = 32;
 509   if (((cnt = BITPOS (tok_space),
 510         (ELEM (ctype, class_collection, , space_value)
 511          & BITw (tok_space)) == 0)
 512        || (cnt = BITPOS (tok_blank),
 513            (ELEM (ctype, class_collection, , space_value)
 514             & BITw (tok_blank)) == 0)))
 515     {
 516       if (!be_quiet)
 517         error (0, 0, _("<SP> character not in class `%s'"),
 518                valid_table[cnt].name);
 519     }
 520   else if (((cnt = BITPOS (tok_punct),
 521              (ELEM (ctype, class_collection, , space_value)
 522               & BITw (tok_punct)) != 0)
 523             || (cnt = BITPOS (tok_graph),
 524                 (ELEM (ctype, class_collection, , space_value)
 525                  & BITw (tok_graph))
 526                 != 0)))
 527     {
 528       if (!be_quiet)
 529         error (0, 0, _("<SP> character must not be in class `%s'"),
 530                valid_table[cnt].name);
 531     }
 532   else
 533     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 534
 535   space_seq = charmap_find_value (charmap, "SP", 2);
 536   if (space_seq == NULL)
 537     space_seq = charmap_find_value (charmap, "space", 5);
 538   if (space_seq == NULL)
 539     space_seq = charmap_find_value (charmap, "U00000020", 5);
 540   if (space_seq == NULL || space_seq->nbytes != 1)
 541     {
 542       if (!be_quiet)
 543         error (0, 0, _("character <SP> not defined in character map"));
 544     }
 545   else if (((cnt = BITPOS (tok_space),
 546              (ctype->class256_collection[space_seq->bytes[0]]
 547               & BIT (tok_space)) == 0)
 548             || (cnt = BITPOS (tok_blank),
 549                 (ctype->class256_collection[space_seq->bytes[0]]
 550                  & BIT (tok_blank)) == 0)))
 551     {
 552       if (!be_quiet)
 553         error (0, 0, _("<SP> character not in class `%s'"),
 554                valid_table[cnt].name);
 555     }
 556   else if (((cnt = BITPOS (tok_punct),
 557              (ctype->class256_collection[space_seq->bytes[0]]
 558               & BIT (tok_punct)) != 0)
 559             || (cnt = BITPOS (tok_graph),
 560                 (ctype->class256_collection[space_seq->bytes[0]]
 561                  & BIT (tok_graph)) != 0)))
 562     {
 563       if (!be_quiet)
 564         error (0, 0, _("<SP> character must not be in class `%s'"),
 565                valid_table[cnt].name);
 566     }
 567   else
 568     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 569
 570   /* Now that the tests are done make sure the name array contains all
 571      characters which are handled in the WIDTH section of the
 572      character set definition file.  */
 573   if (charmap->width_rules != NULL)
 574     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 575       {
 576         unsigned char bytes[charmap->mb_cur_max];
 577         int nbytes = charmap->width_rules[cnt].from->nbytes;
 578
 579         /* We have the range of character for which the width is
 580            specified described using byte sequences of the multibyte
 581            charset.  We have to convert this to UCS4 now.  And we
 582            cannot simply convert the beginning and the end of the
 583            sequence, we have to iterate over the byte sequence and
 584            convert it for every single character.  */
 585         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 586
 587         while (nbytes < charmap->width_rules[cnt].to->nbytes
 588                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 589                           nbytes) <= 0)
 590           {
 591             /* Find the UCS value for `bytes'.  */
 592             int inner;
 593             uint32_t wch;
 594             struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
 595
 596             if (seq == NULL)
 597               wch = ILLEGAL_CHAR_VALUE;
 598             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 599               wch = seq->ucs4;
 600             else
 601               wch = repertoire_find_value (ctype->repertoire, seq->name,
 602                                            strlen (seq->name));
 603
 604             if (wch != ILLEGAL_CHAR_VALUE)
 605               /* We are only interested in the side-effects of the
 606                  `find_idx' call.  It will add appropriate entries in
 607                  the name array if this is necessary.  */
 608               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 609
 610             /* "Increment" the bytes sequence.  */
 611             inner = nbytes - 1;
 612             while (inner >= 0 && bytes[inner] == 0xff)
 613               --inner;
 614
 615             if (inner < 0)
 616               {
 617                 /* We have to extend the byte sequence.  */
 618                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 619                   break;
 620
 621                 bytes[0] = 1;
 622                 memset (&bytes[1], 0, nbytes);
 623                 ++nbytes;
 624               }
 625             else
 626               {
 627                 ++bytes[inner];
 628                 while (++inner < nbytes)
 629                   bytes[inner] = 0;
 630               }
 631           }
 632       }
 633
 634   /* There must be a multiple of 10 digits.  */
 635   if (ctype->mbdigits_act % 10 != 0)
 636     {
 637       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 638       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 639       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 640       error (0, 0, _("`digit' category has not entries in groups of ten"));
 641     }
 642
 643   /* Check the input digits.  There must be a multiple of ten available.
 644      In each group it could be that one or the other character is missing.
 645      In this case the whole group must be removed.  */
 646   cnt = 0;
 647   while (cnt < ctype->mbdigits_act)
 648     {
 649       size_t inner;
 650       for (inner = 0; inner < 10; ++inner)
 651         if (ctype->mbdigits[cnt + inner] == NULL)
 652           break;
 653
 654       if (inner == 10)
 655         cnt += 10;
 656       else
 657         {
 658           /* Remove the group.  */
 659           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 660                    ((ctype->wcdigits_act - cnt - 10)
 661                     * sizeof (ctype->mbdigits[0])));
 662           ctype->mbdigits_act -= 10;
 663         }
 664     }
 665
 666   /* If no input digits are given use the default.  */
 667   if (ctype->mbdigits_act == 0)
 668     {
 669       if (ctype->mbdigits_max == 0)
 670         {
 671           ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
 672                                            10 * sizeof (struct charseq *));
 673           ctype->mbdigits_max = 10;
 674         }
 675
 676       for (cnt = 0; cnt < 10; ++cnt)
 677         {
 678           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 679                                                       digits + cnt, 1);
 680           if (ctype->mbdigits[cnt] == NULL)
 681             {
 682               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 683                                                           longnames[cnt],
 684                                                           strlen (longnames[cnt]));
 685               if (ctype->mbdigits[cnt] == NULL)
 686                 {
 687                   /* Hum, this ain't good.  */
 688                   error (0, 0, _("\
 689 no input digits defined and none of the standard names in the charmap"));
 690
 691                   ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
 692                                                         sizeof (struct charseq) + 1);
 693
 694                   /* This is better than nothing.  */
 695                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 696                   ctype->mbdigits[cnt]->nbytes = 1;
 697                 }
 698             }
 699         }
 700
 701       ctype->mbdigits_act = 10;
 702     }
 703
 704   /* Check the wide character input digits.  There must be a multiple
 705      of ten available.  In each group it could be that one or the other
 706      character is missing.  In this case the whole group must be
 707      removed.  */
 708   cnt = 0;
 709   while (cnt < ctype->wcdigits_act)
 710     {
 711       size_t inner;
 712       for (inner = 0; inner < 10; ++inner)
 713         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 714           break;
 715
 716       if (inner == 10)
 717         cnt += 10;
 718       else
 719         {
 720           /* Remove the group.  */
 721           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 722                    ((ctype->wcdigits_act - cnt - 10)
 723                     * sizeof (ctype->wcdigits[0])));
 724           ctype->wcdigits_act -= 10;
 725         }
 726     }
 727
 728   /* If no input digits are given use the default.  */
 729   if (ctype->wcdigits_act == 0)
 730     {
 731       if (ctype->wcdigits_max == 0)
 732         {
 733           ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
 734                                            10 * sizeof (uint32_t));
 735           ctype->wcdigits_max = 10;
 736         }
 737
 738       for (cnt = 0; cnt < 10; ++cnt)
 739         ctype->wcdigits[cnt] = L'0' + cnt;
 740
 741       ctype->mbdigits_act = 10;
 742     }
 743
 744   /* Check the outdigits.  */
 745   warned = 0;
 746   for (cnt = 0; cnt < 10; ++cnt)
 747     if (ctype->mboutdigits[cnt] == NULL)
 748       {
 749         static struct charseq replace[2];
 750
 751         if (!warned)
 752           {
 753             error (0, 0, _("\
 754 not all characters used in `outdigit' are available in the charmap"));
 755             warned = 1;
 756           }
 757
 758         replace[0].nbytes = 1;
 759         replace[0].bytes[0] = '?';
 760         replace[0].bytes[1] = '\0';
 761         ctype->mboutdigits[cnt] = &replace[0];
 762       }
 763
 764   warned = 0;
 765   for (cnt = 0; cnt < 10; ++cnt)
 766     if (ctype->wcoutdigits[cnt] == 0)
 767       {
 768         if (!warned)
 769           {
 770             error (0, 0, _("\
 771 not all characters used in `outdigit' are available in the repertoire"));
 772             warned = 1;
 773           }
 774
 775         ctype->wcoutdigits[cnt] = L'?';
 776       }
 777 }
 778
 779
 780 void
 781 ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
 782               const char *output_path)
 783 {
 784   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 785   const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
 786                          + (ctype->map_collection_nr - 2));
 787   struct iovec iov[2 + nelems + ctype->nr_charclass
 788                   + ctype->map_collection_nr];
 789   struct locale_file data;
 790   uint32_t idx[nelems + 1];
 791   size_t elem, cnt, offset, total;
 792   char *cp;
 793
 794   /* Now prepare the output: Find the sizes of the table we can use.  */
 795   allocate_arrays (ctype, charmap, ctype->repertoire);
 796
 797   data.magic = LIMAGIC (LC_CTYPE);
 798   data.n = nelems;
 799   iov[0].iov_base = (void *) &data;
 800   iov[0].iov_len = sizeof (data);
 801
 802   iov[1].iov_base = (void *) idx;
 803   iov[1].iov_len = nelems * sizeof (uint32_t);
 804
 805   idx[0] = iov[0].iov_len + iov[1].iov_len;
 806   offset = 0;
 807
 808   for (elem = 0; elem < nelems; ++elem)
 809     {
 810       if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
 811         switch (elem)
 812           {
 813 #define CTYPE_DATA(name, base, len)                                           \
 814           case _NL_ITEM_INDEX (name):                                         \
 815             iov[2 + elem + offset].iov_base = (base);                         \
 816             iov[2 + elem + offset].iov_len = (len);                           \
 817             if (elem + 1 < nelems)                                            \
 818               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;     \
 819             break
 820
 821           CTYPE_DATA (_NL_CTYPE_CLASS,
 822                       ctype->ctype_b,
 823                       (256 + 128) * sizeof (char_class_t));
 824
 825           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 826                       ctype->map[0],
 827                       (256 + 128) * sizeof (uint32_t));
 828           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 829                       ctype->map[1],
 830                       (256 + 128) * sizeof (uint32_t));
 831
 832           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 833                       ctype->map32[0],
 834                       (ctype->plane_size * ctype->plane_cnt)
 835                       * sizeof (uint32_t));
 836           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 837                       ctype->map32[1],
 838                       (ctype->plane_size * ctype->plane_cnt)
 839                       * sizeof (uint32_t));
 840
 841           CTYPE_DATA (_NL_CTYPE_CLASS32,
 842                       ctype->ctype32_b,
 843                       (ctype->plane_size * ctype->plane_cnt
 844                        * sizeof (char_class32_t)));
 845
 846           CTYPE_DATA (_NL_CTYPE_NAMES,
 847                       ctype->names, (ctype->plane_size * ctype->plane_cnt
 848                                      * sizeof (uint32_t)));
 849
 850           CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
 851                       &ctype->translit_hash_size, sizeof (uint32_t));
 852           CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
 853                       &ctype->translit_hash_layers, sizeof (uint32_t));
 854
 855           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 856                       ctype->translit_from_idx,
 857                       ctype->translit_idx_size);
 858
 859           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 860                       ctype->translit_from_tbl,
 861                       ctype->translit_from_tbl_size);
 862
 863           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 864                       ctype->translit_to_idx,
 865                       ctype->translit_idx_size);
 866
 867           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 868                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 869
 870           CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
 871                       &ctype->plane_size, sizeof (uint32_t));
 872           CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
 873                       &ctype->plane_cnt, sizeof (uint32_t));
 874
 875           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 876             /* The class name array.  */
 877             total = 0;
 878             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 879               {
 880                 iov[2 + elem + offset].iov_base
 881                   = (void *) ctype->classnames[cnt];
 882                 iov[2 + elem + offset].iov_len
 883                   = strlen (ctype->classnames[cnt]) + 1;
 884                 total += iov[2 + elem + offset].iov_len;
 885               }
 886             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 887             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 888             total += 1 + (4 - ((total + 1) % 4));
 889
 890             idx[elem + 1] = idx[elem] + total;
 891             break;
 892
 893           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 894             /* The class name array.  */
 895             total = 0;
 896             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 897               {
 898                 iov[2 + elem + offset].iov_base
 899                   = (void *) ctype->mapnames[cnt];
 900                 iov[2 + elem + offset].iov_len
 901                   = strlen (ctype->mapnames[cnt]) + 1;
 902                 total += iov[2 + elem + offset].iov_len;
 903               }
 904             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 905             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 906             total += 1 + (4 - ((total + 1) % 4));
 907
 908             idx[elem + 1] = idx[elem] + total;
 909             break;
 910
 911           CTYPE_DATA (_NL_CTYPE_WIDTH,
 912                       ctype->width,
 913                       (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul);
 914
 915           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 916                       &ctype->mb_cur_max, sizeof (uint32_t));
 917
 918           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 919             total = strlen (ctype->codeset_name) + 1;
 920             if (total % 4 == 0)
 921               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
 922             else
 923               {
 924                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
 925                 memset (mempcpy (iov[2 + elem + offset].iov_base,
 926                                  ctype->codeset_name, total),
 927                         '\0', 4 - (total & 3));
 928                 total = (total + 3) & ~3;
 929               }
 930             iov[2 + elem + offset].iov_len = total;
 931             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 932             break;
 933
 934           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
 935             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
 936             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 937             *(uint32_t *) iov[2 + elem + offset].iov_base =
 938               ctype->mbdigits_act / 10;
 939             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
 940             break;
 941
 942           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
 943             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
 944             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 945             *(uint32_t *) iov[2 + elem + offset].iov_base =
 946               ctype->wcdigits_act / 10;
 947             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
 948             break;
 949
 950           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
 951             /* Compute the length of all possible characters.  For INDIGITS
 952                there might be more than one.  We simply concatenate all of
 953                them with a NUL byte following.  The NUL byte wouldn't be
 954                necessary but it makes it easier for the user.  */
 955             total = 0;
 956             for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
 957                  cnt < ctype->mbdigits_act; cnt += 10)
 958               total += ctype->mbdigits[cnt]->nbytes + 1;
 959             iov[2 + elem + offset].iov_base = (char *) alloca (total);
 960             iov[2 + elem + offset].iov_len = total;
 961
 962             cp = iov[2 + elem + offset].iov_base;
 963             for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
 964                  cnt < ctype->mbdigits_act; cnt += 10)
 965               {
 966                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
 967                               ctype->mbdigits[cnt]->nbytes);
 968                 *cp++ = '\0';
 969               }
 970             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 971             break;
 972
 973           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
 974             /* Compute the length of all possible characters.  For INDIGITS
 975                there might be more than one.  We simply concatenate all of
 976                them with a NUL byte following.  The NUL byte wouldn't be
 977                necessary but it makes it easier for the user.  */
 978             cnt = elem - _NL_CTYPE_OUTDIGIT0_MB;
 979             total = ctype->mboutdigits[cnt]->nbytes + 1;
 980             iov[2 + elem + offset].iov_base = (char *) alloca (total);
 981             iov[2 + elem + offset].iov_len = total;
 982
 983             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
 984                                ctype->mbdigits[cnt]->bytes,
 985                                ctype->mbdigits[cnt]->nbytes) = '\0';
 986             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 987             break;
 988
 989           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
 990             total = ctype->wcdigits_act / 10;
 991
 992             iov[2 + elem + offset].iov_base =
 993               (uint32_t *) alloca (total * sizeof (uint32_t));
 994             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
 995
 996             for (cnt = elem - _NL_CTYPE_INDIGITS0_WC;
 997                  cnt < ctype->wcdigits_act; cnt += 10)
 998               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
 999                 = ctype->wcdigits[cnt];
1000             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1001             break;
1002
1003           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1004             cnt = elem - _NL_CTYPE_OUTDIGIT0_WC;
1005             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1006             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1007             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1008             break;
1009
1010           default:
1011             assert (! "unknown CTYPE element");
1012           }
1013       else
1014         {
1015           /* Handle extra maps.  */
1016           size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
1017
1018           iov[2 + elem + offset].iov_base = ctype->map32[nr];
1019           iov[2 + elem + offset].iov_len = ((ctype->plane_size
1020                                              * ctype->plane_cnt)
1021                                             * sizeof (uint32_t));
1022
1023           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1024         }
1025     }
1026
1027   assert (2 + elem + offset == (nelems + ctype->nr_charclass
1028                                 + ctype->map_collection_nr + 2));
1029
1030   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
1031 }
1032
1033
1034 /* Local functions.  */
1035 static void
1036 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1037                  const char *name)
1038 {
1039   size_t cnt;
1040
1041   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1042     if (strcmp (ctype->classnames[cnt], name) == 0)
1043       break;
1044
1045   if (cnt < ctype->nr_charclass)
1046     {
1047       lr_error (lr, _("character class `%s' already defined"), name);
1048       return;
1049     }
1050
1051   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1052     /* Exit code 2 is prescribed in P1003.2b.  */
1053     error (2, 0, _("\
1054 implementation limit: no more than %Zd character classes allowed"),
1055            MAX_NR_CHARCLASS);
1056
1057   ctype->classnames[ctype->nr_charclass++] = name;
1058 }
1059
1060
1061 static void
1062 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1063                const char *name, struct charmap_t *charmap)
1064 {
1065   size_t max_chars = 0;
1066   size_t cnt;
1067
1068   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1069     {
1070       if (strcmp (ctype->mapnames[cnt], name) == 0)
1071         break;
1072
1073       if (max_chars < ctype->map_collection_max[cnt])
1074         max_chars = ctype->map_collection_max[cnt];
1075     }
1076
1077   if (cnt < ctype->map_collection_nr)
1078     {
1079       lr_error (lr, _("character map `%s' already defined"), name);
1080       return;
1081     }
1082
1083   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1084     /* Exit code 2 is prescribed in P1003.2b.  */
1085     error (2, 0, _("\
1086 implementation limit: no more than %d character maps allowed"),
1087            MAX_NR_CHARMAP);
1088
1089   ctype->mapnames[cnt] = name;
1090
1091   if (max_chars == 0)
1092     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1093   else
1094     ctype->map_collection_max[cnt] = max_chars;
1095
1096   ctype->map_collection[cnt] = (uint32_t *)
1097     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1098   ctype->map_collection_act[cnt] = 256;
1099
1100   ++ctype->map_collection_nr;
1101 }
1102
1103
1104 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1105    is possible if we only want to extend the name array.  */
1106 static uint32_t *
1107 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1108           size_t *act, uint32_t idx)
1109 {
1110   size_t cnt;
1111
1112   if (idx < 256)
1113     return table == NULL ? NULL : &(*table)[idx];
1114
1115   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1116     if (ctype->charnames[cnt] == idx)
1117       break;
1118
1119   /* We have to distinguish two cases: the name is found or not.  */
1120   if (cnt == ctype->charnames_act)
1121     {
1122       /* Extend the name array.  */
1123       if (ctype->charnames_act == ctype->charnames_max)
1124         {
1125           ctype->charnames_max *= 2;
1126           ctype->charnames = (uint32_t *)
1127             xrealloc (ctype->charnames,
1128                       sizeof (uint32_t) * ctype->charnames_max);
1129         }
1130       ctype->charnames[ctype->charnames_act++] = idx;
1131     }
1132
1133   if (table == NULL)
1134     /* We have done everything we are asked to do.  */
1135     return NULL;
1136
1137   if (cnt >= *act)
1138     {
1139       if (cnt >= *max)
1140         {
1141           size_t old_max = *max;
1142           do
1143             *max *= 2;
1144           while (*max <= cnt);
1145
1146           *table =
1147             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1148           memset (&(*table)[old_max], '\0',
1149                   (*max - old_max) * sizeof (uint32_t));
1150         }
1151
1152       *act = cnt + 1;
1153     }
1154
1155   return &(*table)[cnt];
1156 }
1157
1158
1159 static int
1160 get_character (struct token *now, struct charmap_t *charmap,
1161                struct repertoire_t *repertoire,
1162                struct charseq **seqp, uint32_t *wchp)
1163 {
1164   if (now->tok == tok_bsymbol)
1165     {
1166       /* This will hopefully be the normal case.  */
1167       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1168                                      now->val.str.lenmb);
1169       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1170                                   now->val.str.lenmb);
1171     }
1172   else if (now->tok == tok_ucs4)
1173     {
1174       char utmp[10];
1175
1176       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1177       *seqp = charmap_find_value (charmap, utmp, 9);
1178
1179       if (*seqp == NULL)
1180         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1181
1182       if (*seqp == NULL)
1183         {
1184           /* Compute the value in the charmap from the UCS value.  */
1185           const char *symbol = repertoire_find_symbol (repertoire,
1186                                                        now->val.ucs4);
1187
1188           if (symbol == NULL)
1189             *seqp = NULL;
1190           else
1191             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1192
1193           if (*seqp == NULL)
1194             {
1195               if (repertoire != NULL)
1196                 {
1197                   /* Insert a negative entry.  */
1198                   static const struct charseq negative
1199                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1200                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1201                                                   sizeof (uint32_t));
1202                   *newp = now->val.ucs4;
1203
1204                   insert_entry (&repertoire->seq_table, newp,
1205                                 sizeof (uint32_t), (void *) &negative);
1206                 }
1207             }
1208           else
1209             (*seqp)->ucs4 = now->val.ucs4;
1210         }
1211       else if ((*seqp)->ucs4 != now->val.ucs4)
1212         *seqp = NULL;
1213
1214       *wchp = now->val.ucs4;
1215     }
1216   else if (now->tok == tok_charcode)
1217     {
1218       /* We must map from the byte code to UCS4.  */
1219       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1220                                    now->val.str.lenmb);
1221
1222       if (*seqp == NULL)
1223         *wchp = ILLEGAL_CHAR_VALUE;
1224       else
1225         {
1226           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1227             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1228                                                    strlen ((*seqp)->name));
1229           *wchp = (*seqp)->ucs4;
1230         }
1231     }
1232   else
1233     return 1;
1234
1235   return 0;
1236 }
1237
1238
1239 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1240    the .(2). counterparts.  */
1241 static void
1242 charclass_symbolic_ellipsis (struct linereader *ldfile,
1243                              struct locale_ctype_t *ctype,
1244                              struct charmap_t *charmap,
1245                              struct repertoire_t *repertoire,
1246                              struct token *now,
1247                              const char *last_str,
1248                              unsigned long int class256_bit,
1249                              unsigned long int class_bit, int base,
1250                              int ignore_content, int handle_digits, int step)
1251 {
1252   const char *nowstr = now->val.str.startmb;
1253   char tmp[now->val.str.lenmb + 1];
1254   const char *cp;
1255   char *endp;
1256   unsigned long int from;
1257   unsigned long int to;
1258
1259   /* We have to compute the ellipsis values using the symbolic names.  */
1260   assert (last_str != NULL);
1261
1262   if (strlen (last_str) != now->val.str.lenmb)
1263     {
1264     invalid_range:
1265       lr_error (ldfile,
1266                 _("`%s' and `%.*s' are no valid names for symbolic range"),
1267                 last_str, (int) now->val.str.lenmb, nowstr);
1268       return;
1269     }
1270
1271   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1272     /* Nothing to do, the names are the same.  */
1273     return;
1274
1275   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1276     ;
1277
1278   errno = 0;
1279   from = strtoul (cp, &endp, base);
1280   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1281     goto invalid_range;
1282
1283   to = strtoul (nowstr + (cp - last_str), &endp, base);
1284   if ((to == UINT_MAX && errno == ERANGE)
1285       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1286     goto invalid_range;
1287
1288   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1289   if (!ignore_content)
1290     {
1291       now->val.str.startmb = tmp;
1292       while ((from += step) <= to)
1293         {
1294           struct charseq *seq;
1295           uint32_t wch;
1296
1297           sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1298                    last_str, now->val.str.lenmb - (cp - last_str), from);
1299
1300           get_character (now, charmap, repertoire, &seq, &wch);
1301
1302           if (seq != NULL && seq->nbytes == 1)
1303             /* Yep, we can store information about this byte sequence.  */
1304             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1305
1306           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1307             /* We have the UCS4 position.  */
1308             *find_idx (ctype, &ctype->class_collection,
1309                        &ctype->class_collection_max,
1310                        &ctype->class_collection_act, wch) |= class_bit;
1311
1312           if (handle_digits == 1)
1313             {
1314               /* We must store the digit values.  */
1315               if (ctype->mbdigits_act == ctype->mbdigits_max)
1316                 {
1317                   ctype->mbdigits_max *= 2;
1318                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1319                                               (ctype->mbdigits_max
1320                                                * sizeof (char *)));
1321                   ctype->wcdigits_max *= 2;
1322                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1323                                               (ctype->wcdigits_max
1324                                                * sizeof (uint32_t)));
1325                 }
1326
1327               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1328               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1329             }
1330           else if (handle_digits == 2)
1331             {
1332               /* We must store the digit values.  */
1333               if (ctype->outdigits_act >= 10)
1334                 {
1335                   lr_error (ldfile, _("\
1336 %s: field `%s' does not contain exactly ten entries"),
1337                             "LC_CTYPE", "outdigit");
1338                   return;
1339                 }
1340
1341               ctype->mboutdigits[ctype->outdigits_act] = seq;
1342               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1343               ++ctype->outdigits_act;
1344             }
1345         }
1346     }
1347 }
1348
1349
1350 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1351 static void
1352 charclass_ucs4_ellipsis (struct linereader *ldfile,
1353                          struct locale_ctype_t *ctype,
1354                          struct charmap_t *charmap,
1355                          struct repertoire_t *repertoire,
1356                          struct token *now, uint32_t last_wch,
1357                          unsigned long int class256_bit,
1358                          unsigned long int class_bit, int ignore_content,
1359                          int handle_digits, int step)
1360 {
1361   if (last_wch > now->val.ucs4)
1362     {
1363       lr_error (ldfile, _("\
1364 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1365                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1366                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1367       return;
1368     }
1369
1370   if (!ignore_content)
1371     while ((last_wch += step) <= now->val.ucs4)
1372       {
1373         /* We have to find out whether there is a byte sequence corresponding
1374            to this UCS4 value.  */
1375         struct charseq *seq;
1376         char utmp[10];
1377
1378         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1379         seq = charmap_find_value (charmap, utmp, 9);
1380         if (seq == NULL)
1381           {
1382             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1383             seq = charmap_find_value (charmap, utmp, 5);
1384           }
1385
1386         if (seq == NULL)
1387           /* Try looking in the repertoire map.  */
1388           seq = repertoire_find_seq (repertoire, last_wch);
1389
1390         /* If this is the first time we look for this sequence create a new
1391            entry.  */
1392         if (seq == NULL)
1393           {
1394             static const struct charseq negative
1395               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1396
1397             /* Find the symbolic name for this UCS4 value.  */
1398             if (repertoire != NULL)
1399               {
1400                 const char *symbol = repertoire_find_symbol (repertoire,
1401                                                              last_wch);
1402                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1403                                                 sizeof (uint32_t));
1404                 *newp = last_wch;
1405
1406                 if (symbol != NULL)
1407                   /* We have a name, now search the multibyte value.  */
1408                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1409
1410                 if (seq == NULL)
1411                   /* We have to create a fake entry.  */
1412                   seq = (struct charseq *) &negative;
1413                 else
1414                   seq->ucs4 = last_wch;
1415
1416                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1417                               seq);
1418               }
1419             else
1420               /* We have to create a fake entry.  */
1421               seq = (struct charseq *) &negative;
1422           }
1423
1424         /* We have a name, now search the multibyte value.  */
1425         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1426           /* Yep, we can store information about this byte sequence.  */
1427           ctype->class256_collection[(size_t) seq->bytes[0]]
1428             |= class256_bit;
1429
1430         /* And of course we have the UCS4 position.  */
1431         if (class_bit != 0)
1432           *find_idx (ctype, &ctype->class_collection,
1433                      &ctype->class_collection_max,
1434                      &ctype->class_collection_act, last_wch) |= class_bit;
1435
1436         if (handle_digits == 1)
1437           {
1438             /* We must store the digit values.  */
1439             if (ctype->mbdigits_act == ctype->mbdigits_max)
1440               {
1441                 ctype->mbdigits_max *= 2;
1442                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1443                                             (ctype->mbdigits_max
1444                                              * sizeof (char *)));
1445                 ctype->wcdigits_max *= 2;
1446                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1447                                             (ctype->wcdigits_max
1448                                              * sizeof (uint32_t)));
1449               }
1450
1451             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1452                                                       ? seq : NULL);
1453             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1454           }
1455         else if (handle_digits == 2)
1456           {
1457             /* We must store the digit values.  */
1458             if (ctype->outdigits_act >= 10)
1459               {
1460                 lr_error (ldfile, _("\
1461 %s: field `%s' does not contain exactly ten entries"),
1462                           "LC_CTYPE", "outdigit");
1463                 return;
1464               }
1465
1466             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1467                                                         ? seq : NULL);
1468             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1469             ++ctype->outdigits_act;
1470           }
1471       }
1472 }
1473
1474
1475 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1476 static void
1477 charclass_charcode_ellipsis (struct linereader *ldfile,
1478                              struct locale_ctype_t *ctype,
1479                              struct charmap_t *charmap,
1480                              struct repertoire_t *repertoire,
1481                              struct token *now, char *last_charcode,
1482                              uint32_t last_charcode_len,
1483                              unsigned long int class256_bit,
1484                              unsigned long int class_bit, int ignore_content,
1485                              int handle_digits)
1486 {
1487   /* First check whether the to-value is larger.  */
1488   if (now->val.charcode.nbytes != last_charcode_len)
1489     {
1490       lr_error (ldfile, _("\
1491 start end end character sequence of range must have the same length"));
1492       return;
1493     }
1494
1495   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1496     {
1497       lr_error (ldfile, _("\
1498 to-value character sequence is smaller than from-value sequence"));
1499       return;
1500     }
1501
1502   if (!ignore_content)
1503     {
1504       do
1505         {
1506           /* Increment the byte sequence value.  */
1507           struct charseq *seq;
1508           uint32_t wch;
1509           int i;
1510
1511           for (i = last_charcode_len - 1; i >= 0; --i)
1512             if (++last_charcode[i] != 0)
1513               break;
1514
1515           if (last_charcode_len == 1)
1516             /* Of course we have the charcode value.  */
1517             ctype->class256_collection[(size_t) last_charcode[0]]
1518               |= class256_bit;
1519
1520           /* Find the symbolic name.  */
1521           seq = charmap_find_symbol (charmap, last_charcode,
1522                                      last_charcode_len);
1523           if (seq != NULL)
1524             {
1525               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1526                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1527                                                    strlen (seq->name));
1528               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1529
1530               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1531                 *find_idx (ctype, &ctype->class_collection,
1532                            &ctype->class_collection_max,
1533                            &ctype->class_collection_act, wch) |= class_bit;
1534             }
1535           else
1536             wch = ILLEGAL_CHAR_VALUE;
1537
1538           if (handle_digits == 1)
1539             {
1540               /* We must store the digit values.  */
1541               if (ctype->mbdigits_act == ctype->mbdigits_max)
1542                 {
1543                   ctype->mbdigits_max *= 2;
1544                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1545                                               (ctype->mbdigits_max
1546                                                * sizeof (char *)));
1547                   ctype->wcdigits_max *= 2;
1548                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1549                                               (ctype->wcdigits_max
1550                                                * sizeof (uint32_t)));
1551                 }
1552
1553               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1554               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1555               seq->nbytes = last_charcode_len;
1556
1557               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1558               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1559             }
1560           else if (handle_digits == 2)
1561             {
1562               struct charseq *seq;
1563               /* We must store the digit values.  */
1564               if (ctype->outdigits_act >= 10)
1565                 {
1566                   lr_error (ldfile, _("\
1567 %s: field `%s' does not contain exactly ten entries"),
1568                             "LC_CTYPE", "outdigit");
1569                   return;
1570                 }
1571
1572               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1573               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1574               seq->nbytes = last_charcode_len;
1575
1576               ctype->mboutdigits[ctype->outdigits_act] = seq;
1577               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1578               ++ctype->outdigits_act;
1579             }
1580         }
1581       while (memcmp (last_charcode, now->val.charcode.bytes,
1582                      last_charcode_len) != 0);
1583     }
1584 }
1585
1586
1587 /* Read one transliteration entry.  */
1588 static uint32_t *
1589 read_widestring (struct linereader *ldfile, struct token *now,
1590                  struct charmap_t *charmap, struct repertoire_t *repertoire)
1591 {
1592   uint32_t *wstr;
1593
1594   if (now->tok == tok_default_missing)
1595     /* The special name "" will denote this case.  */
1596     wstr = ((uint32_t *) { 0 });
1597   else if (now->tok == tok_bsymbol)
1598     {
1599       /* Get the value from the repertoire.  */
1600       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1601       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1602                                        now->val.str.lenmb);
1603       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1604         {
1605           /* We cannot proceed, we don't know the UCS4 value.  */
1606           free (wstr);
1607           return NULL;
1608         }
1609
1610       wstr[1] = 0;
1611     }
1612   else if (now->tok == tok_ucs4)
1613     {
1614       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1615       wstr[0] = now->val.ucs4;
1616       wstr[1] = 0;
1617     }
1618   else if (now->tok == tok_charcode)
1619     {
1620       /* Argh, we have to convert to the symbol name first and then to the
1621          UCS4 value.  */
1622       struct charseq *seq = charmap_find_symbol (charmap,
1623                                                  now->val.str.startmb,
1624                                                  now->val.str.lenmb);
1625       if (seq == NULL)
1626         /* Cannot find the UCS4 value.  */
1627         return NULL;
1628
1629       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1630         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1631                                            strlen (seq->name));
1632       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1633         /* We cannot proceed, we don't know the UCS4 value.  */
1634         return NULL;
1635
1636       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1637       wstr[0] = seq->ucs4;
1638       wstr[1] = 0;
1639     }
1640   else if (now->tok == tok_string)
1641     {
1642       wstr = now->val.str.startwc;
1643       if (wstr == NULL || wstr[0] == 0)
1644         return NULL;
1645     }
1646   else
1647     {
1648       if (now->tok != tok_eol && now->tok != tok_eof)
1649         lr_ignore_rest (ldfile, 0);
1650       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1651       return (uint32_t *) -1l;
1652     }
1653
1654   return wstr;
1655 }
1656
1657
1658 static void
1659 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1660                      struct token *now, struct charmap_t *charmap,
1661                      struct repertoire_t *repertoire)
1662 {
1663   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1664   struct translit_t *result;
1665   struct translit_to_t **top;
1666   struct obstack *ob = &ctype->mempool;
1667   int first;
1668   int ignore;
1669
1670   if (from_wstr == NULL)
1671     /* There is no valid from string.  */
1672     return;
1673
1674   result = (struct translit_t *) obstack_alloc (ob,
1675                                                 sizeof (struct translit_t));
1676   result->from = from_wstr;
1677   result->fname = ldfile->fname;
1678   result->lineno = ldfile->lineno;
1679   result->next = NULL;
1680   result->to = NULL;
1681   top = &result->to;
1682   first = 1;
1683   ignore = 0;
1684
1685   while (1)
1686     {
1687       uint32_t *to_wstr;
1688
1689       /* Next we have one or more transliterations.  They are
1690          separated by semicolons.  */
1691       now = lr_token (ldfile, charmap, repertoire);
1692
1693       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1694         {
1695           /* One string read.  */
1696           const uint32_t zero = 0;
1697
1698           if (!ignore)
1699             {
1700               obstack_grow (ob, &zero, 4);
1701               to_wstr = obstack_finish (ob);
1702
1703               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1704               (*top)->str = to_wstr;
1705               (*top)->next = NULL;
1706             }
1707
1708           if (now->tok == tok_eol)
1709             {
1710               result->next = ctype->translit;
1711               ctype->translit = result;
1712               return;
1713             }
1714
1715           if (!ignore)
1716             top = &(*top)->next;
1717           ignore = 0;
1718         }
1719       else
1720         {
1721           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1722           if (to_wstr == (uint32_t *) -1l)
1723             {
1724               /* An error occurred.  */
1725               obstack_free (ob, result);
1726               return;
1727             }
1728
1729           if (to_wstr == NULL)
1730             ignore = 1;
1731           else
1732             /* This value is usable.  */
1733             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1734
1735           first = 0;
1736         }
1737     }
1738 }
1739
1740
1741 static void
1742 read_translit_ignore_entry (struct linereader *ldfile,
1743                             struct locale_ctype_t *ctype,
1744                             struct charmap_t *charmap,
1745                             struct repertoire_t *repertoire)
1746 {
1747   /* We expect a semicolon-separated list of characters we ignore.  We are
1748      only interested in the wide character definitions.  These must be
1749      single characters, possibly defining a range when an ellipsis is used.  */
1750   while (1)
1751     {
1752       struct token *now = lr_token (ldfile, charmap, repertoire);
1753       struct translit_ignore_t *newp;
1754       uint32_t from;
1755
1756       if (now->tok == tok_eol || now->tok == tok_eof)
1757         {
1758           lr_error (ldfile,
1759                     _("premature end of `translit_ignore' definition"));
1760           return;
1761         }
1762
1763       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1764         {
1765           lr_error (ldfile, _("syntax error"));
1766           lr_ignore_rest (ldfile, 0);
1767           return;
1768         }
1769
1770       if (now->tok == tok_ucs4)
1771         from = now->val.ucs4;
1772       else
1773         /* Try to get the value.  */
1774         from = repertoire_find_value (repertoire, now->val.str.startmb,
1775                                       now->val.str.lenmb);
1776
1777       if (from == ILLEGAL_CHAR_VALUE)
1778         {
1779           lr_error (ldfile, "invalid character name");
1780           newp = NULL;
1781         }
1782       else
1783         {
1784           newp = (struct translit_ignore_t *)
1785             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1786           newp->from = from;
1787           newp->to = from;
1788           newp->step = 1;
1789
1790           newp->next = ctype->translit_ignore;
1791           ctype->translit_ignore = newp;
1792         }
1793
1794       /* Now we expect either a semicolon, an ellipsis, or the end of the
1795          line.  */
1796       now = lr_token (ldfile, charmap, repertoire);
1797
1798       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
1799         {
1800           /* XXX Should we bother implementing `....'?  `...' certainly
1801              will not be implemented.  */
1802           uint32_t to;
1803           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
1804
1805           now = lr_token (ldfile, charmap, repertoire);
1806
1807           if (now->tok == tok_eol || now->tok == tok_eof)
1808             {
1809               lr_error (ldfile,
1810                         _("premature end of `translit_ignore' definition"));
1811               return;
1812             }
1813
1814           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1815             {
1816               lr_error (ldfile, _("syntax error"));
1817               lr_ignore_rest (ldfile, 0);
1818               return;
1819             }
1820
1821           if (now->tok == tok_ucs4)
1822             to = now->val.ucs4;
1823           else
1824             /* Try to get the value.  */
1825             to = repertoire_find_value (repertoire, now->val.str.startmb,
1826                                         now->val.str.lenmb);
1827
1828           if (to == ILLEGAL_CHAR_VALUE)
1829             lr_error (ldfile, "invalid character name");
1830           else
1831             {
1832               /* Make sure the `to'-value is larger.  */
1833               if (to >= from)
1834                 {
1835                   newp->to = to;
1836                   newp->step = step;
1837                 }
1838               else
1839                 lr_error (ldfile, _("\
1840 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1841                           (to | from) < 65536 ? 4 : 8, to,
1842                           (to | from) < 65536 ? 4 : 8, from);
1843             }
1844
1845           /* And the next token.  */
1846           now = lr_token (ldfile, charmap, repertoire);
1847         }
1848
1849       if (now->tok == tok_eol || now->tok == tok_eof)
1850         /* We are done.  */
1851         return;
1852
1853       if (now->tok == tok_semicolon)
1854         /* Next round.  */
1855         continue;
1856
1857       /* If we come here something is wrong.  */
1858       lr_error (ldfile, _("syntax error"));
1859       lr_ignore_rest (ldfile, 0);
1860       return;
1861     }
1862 }
1863
1864
1865 /* The parser for the LC_CTYPE section of the locale definition.  */
1866 void
1867 ctype_read (struct linereader *ldfile, struct localedef_t *result,
1868             struct charmap_t *charmap, const char *repertoire_name,
1869             int ignore_content)
1870 {
1871   struct repertoire_t *repertoire = NULL;
1872   struct locale_ctype_t *ctype;
1873   struct token *now;
1874   enum token_t nowtok;
1875   size_t cnt;
1876   struct charseq *last_seq;
1877   uint32_t last_wch = 0;
1878   enum token_t last_token;
1879   enum token_t ellipsis_token;
1880   int step;
1881   char last_charcode[16];
1882   size_t last_charcode_len = 0;
1883   const char *last_str = NULL;
1884   int mapidx;
1885
1886   /* Get the repertoire we have to use.  */
1887   if (repertoire_name != NULL)
1888     repertoire = repertoire_read (repertoire_name);
1889
1890   /* The rest of the line containing `LC_CTYPE' must be free.  */
1891   lr_ignore_rest (ldfile, 1);
1892
1893
1894   do
1895     {
1896       now = lr_token (ldfile, charmap, NULL);
1897       nowtok = now->tok;
1898     }
1899   while (nowtok == tok_eol);
1900
1901   /* If we see `copy' now we are almost done.  */
1902   if (nowtok == tok_copy)
1903     {
1904       handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_ctype,
1905                    LC_CTYPE, "LC_CTYPE", ignore_content);
1906       return;
1907     }
1908
1909   /* Prepare the data structures.  */
1910   ctype_startup (ldfile, result, charmap, ignore_content);
1911   ctype = result->categories[LC_CTYPE].ctype;
1912
1913   /* Remember the repertoire we use.  */
1914   if (!ignore_content)
1915     ctype->repertoire = repertoire;
1916
1917   while (1)
1918     {
1919       unsigned long int class_bit = 0;
1920       unsigned long int class256_bit = 0;
1921       int handle_digits = 0;
1922
1923       /* Of course we don't proceed beyond the end of file.  */
1924       if (nowtok == tok_eof)
1925         break;
1926
1927       /* Ingore empty lines.  */
1928       if (nowtok == tok_eol)
1929         {
1930           now = lr_token (ldfile, charmap, NULL);
1931           nowtok = now->tok;
1932           continue;
1933         }
1934
1935       switch (nowtok)
1936         {
1937         case tok_charclass:
1938           now = lr_token (ldfile, charmap, NULL);
1939           while (now->tok == tok_ident || now->tok == tok_string)
1940             {
1941               ctype_class_new (ldfile, ctype, now->val.str.startmb);
1942               now = lr_token (ldfile, charmap, NULL);
1943               if (now->tok != tok_semicolon)
1944                 break;
1945               now = lr_token (ldfile, charmap, NULL);
1946             }
1947           if (now->tok != tok_eol)
1948             SYNTAX_ERROR (_("\
1949 %s: syntax error in definition of new character class"), "LC_CTYPE");
1950           break;
1951
1952         case tok_charconv:
1953           now = lr_token (ldfile, charmap, NULL);
1954           while (now->tok == tok_ident || now->tok == tok_string)
1955             {
1956               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
1957               now = lr_token (ldfile, charmap, NULL);
1958               if (now->tok != tok_semicolon)
1959                 break;
1960               now = lr_token (ldfile, charmap, NULL);
1961             }
1962           if (now->tok != tok_eol)
1963             SYNTAX_ERROR (_("\
1964 %s: syntax error in definition of new character map"), "LC_CTYPE");
1965           break;
1966
1967         case tok_class:
1968           /* Ignore the rest of the line if we don't need the input of
1969              this line.  */
1970           if (ignore_content)
1971             {
1972               lr_ignore_rest (ldfile, 0);
1973               break;
1974             }
1975
1976           /* We simply forget the `class' keyword and use the following
1977              operand to determine the bit.  */
1978           now = lr_token (ldfile, charmap, NULL);
1979           if (now->tok == tok_ident || now->tok == tok_string)
1980             {
1981               /* Must can be one of the predefined class names.  */
1982               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1983                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
1984                   break;
1985               if (cnt >= ctype->nr_charclass)
1986                 {
1987 #ifdef PREDEFINED_CLASSES
1988                   if (now->val.str.lenmb == 8
1989                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
1990                     class_bit = _ISwspecial1;
1991                   else if (now->val.str.lenmb == 8
1992                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
1993                     class_bit = _ISwspecial2;
1994                   else if (now->val.str.lenmb == 8
1995                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
1996                     class_bit = _ISwspecial3;
1997                   else
1998 #endif
1999                     {
2000                       /* OK, it's a new class.  */
2001                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2002
2003                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2004                     }
2005                 }
2006               else
2007                 {
2008                   class_bit = _ISwbit (cnt);
2009
2010                   free (now->val.str.startmb);
2011                 }
2012             }
2013           else if (now->tok == tok_digit)
2014             goto handle_tok_digit;
2015           else if (now->tok < tok_upper || now->tok > tok_blank)
2016             goto err_label;
2017           else
2018             {
2019               class_bit = BITw (now->tok);
2020               class256_bit = BIT (now->tok);
2021             }
2022
2023           /* The next character must be a semicolon.  */
2024           now = lr_token (ldfile, charmap, NULL);
2025           if (now->tok != tok_semicolon)
2026             goto err_label;
2027           goto read_charclass;
2028
2029         case tok_upper:
2030         case tok_lower:
2031         case tok_alpha:
2032         case tok_alnum:
2033         case tok_space:
2034         case tok_cntrl:
2035         case tok_punct:
2036         case tok_graph:
2037         case tok_print:
2038         case tok_xdigit:
2039         case tok_blank:
2040           /* Ignore the rest of the line if we don't need the input of
2041              this line.  */
2042           if (ignore_content)
2043             {
2044               lr_ignore_rest (ldfile, 0);
2045               break;
2046             }
2047
2048           class_bit = BITw (now->tok);
2049           class256_bit = BIT (now->tok);
2050           handle_digits = 0;
2051         read_charclass:
2052           ctype->class_done |= class_bit;
2053           last_token = tok_none;
2054           ellipsis_token = tok_none;
2055           step = 1;
2056           now = lr_token (ldfile, charmap, NULL);
2057           while (now->tok != tok_eol && now->tok != tok_eof)
2058             {
2059               uint32_t wch;
2060               struct charseq *seq;
2061
2062               if (ellipsis_token == tok_none)
2063                 {
2064                   if (get_character (now, charmap, repertoire, &seq, &wch))
2065                     goto err_label;
2066
2067                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2068                     /* Yep, we can store information about this byte
2069                        sequence.  */
2070                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2071
2072                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2073                       && class_bit != 0)
2074                     /* We have the UCS4 position.  */
2075                     *find_idx (ctype, &ctype->class_collection,
2076                                &ctype->class_collection_max,
2077                                &ctype->class_collection_act, wch) |= class_bit;
2078
2079                   last_token = now->tok;
2080                   /* Terminate the string.  */
2081                   if (last_token == tok_bsymbol)
2082                     {
2083                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2084                       last_str = now->val.str.startmb;
2085                     }
2086                   else
2087                     last_str = NULL;
2088                   last_seq = seq;
2089                   last_wch = wch;
2090                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2091                   last_charcode_len = now->val.charcode.nbytes;
2092
2093                   if (!ignore_content && handle_digits == 1)
2094                     {
2095                       /* We must store the digit values.  */
2096                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2097                         {
2098                           ctype->mbdigits_max += 10;
2099                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2100                                                       (ctype->mbdigits_max
2101                                                        * sizeof (char *)));
2102                           ctype->wcdigits_max += 10;
2103                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2104                                                       (ctype->wcdigits_max
2105                                                        * sizeof (uint32_t)));
2106                         }
2107
2108                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2109                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2110                     }
2111                   else if (!ignore_content && handle_digits == 2)
2112                     {
2113                       /* We must store the digit values.  */
2114                       if (ctype->outdigits_act >= 10)
2115                         {
2116                           lr_error (ldfile, _("\
2117 %s: field `%s' does not contain exactly ten entries"),
2118                             "LC_CTYPE", "outdigit");
2119                           goto err_label;
2120                         }
2121
2122                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2123                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2124                       ++ctype->outdigits_act;
2125                     }
2126                 }
2127               else
2128                 {
2129                   /* Now it gets complicated.  We have to resolve the
2130                      ellipsis problem.  First we must distinguish between
2131                      the different kind of ellipsis and this must match the
2132                      tokens we have seen.  */
2133                   assert (last_token != tok_none);
2134
2135                   if (last_token != now->tok)
2136                     {
2137                       lr_error (ldfile, _("\
2138 ellipsis range must be marked by two operands of same type"));
2139                       lr_ignore_rest (ldfile, 0);
2140                       break;
2141                     }
2142
2143                   if (last_token == tok_bsymbol)
2144                     {
2145                       if (ellipsis_token == tok_ellipsis3)
2146                         lr_error (ldfile, _("with symbolic name range values \
2147 the absolute ellipsis `...' must not be used"));
2148
2149                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2150                                                    repertoire, now, last_str,
2151                                                    class256_bit, class_bit,
2152                                                    (ellipsis_token
2153                                                     == tok_ellipsis4
2154                                                     ? 10 : 16),
2155                                                    ignore_content,
2156                                                    handle_digits, step);
2157                     }
2158                   else if (last_token == tok_ucs4)
2159                     {
2160                       if (ellipsis_token != tok_ellipsis2)
2161                         lr_error (ldfile, _("\
2162 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2163
2164                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2165                                                repertoire, now, last_wch,
2166                                                class256_bit, class_bit,
2167                                                ignore_content, handle_digits,
2168                                                step);
2169                     }
2170                   else
2171                     {
2172                       assert (last_token == tok_charcode);
2173
2174                       if (ellipsis_token != tok_ellipsis3)
2175                         lr_error (ldfile, _("\
2176 with character code range values one must use the absolute ellipsis `...'"));
2177
2178                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2179                                                    repertoire, now,
2180                                                    last_charcode,
2181                                                    last_charcode_len,
2182                                                    class256_bit, class_bit,
2183                                                    ignore_content,
2184                                                    handle_digits);
2185                     }
2186
2187                   /* Now we have used the last value.  */
2188                   last_token = tok_none;
2189                 }
2190
2191               /* Next we expect a semicolon or the end of the line.  */
2192               now = lr_token (ldfile, charmap, NULL);
2193               if (now->tok == tok_eol || now->tok == tok_eof)
2194                 break;
2195
2196               if (last_token != tok_none
2197                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2198                 {
2199                   if (now->tok == tok_ellipsis2_2)
2200                     {
2201                       now->tok = tok_ellipsis2;
2202                       step = 2;
2203                     }
2204                   else if (now->tok == tok_ellipsis4_2)
2205                     {
2206                       now->tok = tok_ellipsis4;
2207                       step = 2;
2208                     }
2209
2210                   ellipsis_token = now->tok;
2211
2212                   now = lr_token (ldfile, charmap, NULL);
2213                   continue;
2214                 }
2215
2216               if (now->tok != tok_semicolon)
2217                 goto err_label;
2218
2219               /* And get the next character.  */
2220               now = lr_token (ldfile, charmap, NULL);
2221
2222               ellipsis_token = tok_none;
2223               step = 1;
2224             }
2225           break;
2226
2227         case tok_digit:
2228           /* Ignore the rest of the line if we don't need the input of
2229              this line.  */
2230           if (ignore_content)
2231             {
2232               lr_ignore_rest (ldfile, 0);
2233               break;
2234             }
2235
2236         handle_tok_digit:
2237           class_bit = _ISwdigit;
2238           class256_bit = _ISdigit;
2239           handle_digits = 1;
2240           goto read_charclass;
2241
2242         case tok_outdigit:
2243           /* Ignore the rest of the line if we don't need the input of
2244              this line.  */
2245           if (ignore_content)
2246             {
2247               lr_ignore_rest (ldfile, 0);
2248               break;
2249             }
2250
2251           if (ctype->outdigits_act != 0)
2252             lr_error (ldfile, _("\
2253 %s: field `%s' declared more than once"),
2254                       "LC_CTYPE", "outdigit");
2255           class_bit = 0;
2256           class256_bit = 0;
2257           handle_digits = 2;
2258           goto read_charclass;
2259
2260         case tok_toupper:
2261           /* Ignore the rest of the line if we don't need the input of
2262              this line.  */
2263           if (ignore_content)
2264             {
2265               lr_ignore_rest (ldfile, 0);
2266               break;
2267             }
2268
2269           mapidx = 0;
2270           goto read_mapping;
2271
2272         case tok_tolower:
2273           /* Ignore the rest of the line if we don't need the input of
2274              this line.  */
2275           if (ignore_content)
2276             {
2277               lr_ignore_rest (ldfile, 0);
2278               break;
2279             }
2280
2281           mapidx = 1;
2282           goto read_mapping;
2283
2284         case tok_map:
2285           /* Ignore the rest of the line if we don't need the input of
2286              this line.  */
2287           if (ignore_content)
2288             {
2289               lr_ignore_rest (ldfile, 0);
2290               break;
2291             }
2292
2293           /* We simply forget the `map' keyword and use the following
2294              operand to determine the mapping.  */
2295           now = lr_token (ldfile, charmap, NULL);
2296           if (now->tok == tok_ident || now->tok == tok_string)
2297             {
2298               size_t cnt;
2299
2300               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2301                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2302                   break;
2303
2304               if (cnt < ctype->map_collection_nr)
2305                 free (now->val.str.startmb);
2306               else
2307                 /* OK, it's a new map.  */
2308                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2309
2310               mapidx = cnt;
2311             }
2312           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2313             goto err_label;
2314           else
2315             mapidx = now->tok - tok_toupper;
2316
2317           now = lr_token (ldfile, charmap, NULL);
2318           /* This better should be a semicolon.  */
2319           if (now->tok != tok_semicolon)
2320             goto err_label;
2321
2322         read_mapping:
2323           /* Test whether this mapping was already defined.  */
2324           if (ctype->tomap_done[mapidx])
2325             {
2326               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2327                         ctype->mapnames[mapidx]);
2328               lr_ignore_rest (ldfile, 0);
2329               break;
2330             }
2331           ctype->tomap_done[mapidx] = 1;
2332
2333           now = lr_token (ldfile, charmap, NULL);
2334           while (now->tok != tok_eol && now->tok != tok_eof)
2335             {
2336               struct charseq *from_seq;
2337               uint32_t from_wch;
2338               struct charseq *to_seq;
2339               uint32_t to_wch;
2340
2341               /* Every pair starts with an opening brace.  */
2342               if (now->tok != tok_open_brace)
2343                 goto err_label;
2344
2345               /* Next comes the from-value.  */
2346               now = lr_token (ldfile, charmap, NULL);
2347               if (get_character (now, charmap, repertoire, &from_seq,
2348                                  &from_wch) != 0)
2349                 goto err_label;
2350
2351               /* The next is a comma.  */
2352               now = lr_token (ldfile, charmap, NULL);
2353               if (now->tok != tok_comma)
2354                 goto err_label;
2355
2356               /* And the other value.  */
2357               now = lr_token (ldfile, charmap, NULL);
2358               if (get_character (now, charmap, repertoire, &to_seq,
2359                                  &to_wch) != 0)
2360                 goto err_label;
2361
2362               /* And the last thing is the closing brace.  */
2363               now = lr_token (ldfile, charmap, NULL);
2364               if (now->tok != tok_close_brace)
2365                 goto err_label;
2366
2367               if (!ignore_content)
2368                 {
2369                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2370                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2371                     /* We can use this value.  */
2372                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2373                       = to_seq->bytes[0];
2374
2375                   if (from_wch != ILLEGAL_CHAR_VALUE
2376                       && to_wch != ILLEGAL_CHAR_VALUE)
2377                     /* Both correct values.  */
2378                     *find_idx (ctype, &ctype->map_collection[mapidx],
2379                                &ctype->map_collection_max[mapidx],
2380                                &ctype->map_collection_act[mapidx],
2381                                from_wch) = to_wch;
2382                 }
2383
2384               /* Now comes a semicolon or the end of the line/file.  */
2385               now = lr_token (ldfile, charmap, NULL);
2386               if (now->tok == tok_semicolon)
2387                 now = lr_token (ldfile, charmap, NULL);
2388             }
2389           break;
2390
2391         case tok_translit_start:
2392           /* Ignore the rest of the line if we don't need the input of
2393              this line.  */
2394           if (ignore_content)
2395             {
2396               lr_ignore_rest (ldfile, 0);
2397               break;
2398             }
2399
2400           /* The rest of the line better should be empty.  */
2401           lr_ignore_rest (ldfile, 1);
2402
2403           /* We count here the number of allocated entries in the `translit'
2404              array.  */
2405           cnt = 0;
2406
2407           /* We proceed until we see the `translit_end' token.  */
2408           while (now = lr_token (ldfile, charmap, repertoire),
2409                  now->tok != tok_translit_end && now->tok != tok_eof)
2410             {
2411               if (now->tok == tok_eol)
2412                 /* Ignore empty lines.  */
2413                 continue;
2414
2415               if (now->tok == tok_translit_end)
2416                 {
2417                   lr_ignore_rest (ldfile, 0);
2418                   break;
2419                 }
2420
2421               if (now->tok == tok_include)
2422                 {
2423                   /* We have to include locale.  */
2424                   const char *locale_name;
2425                   const char *repertoire_name;
2426
2427                   now = lr_token (ldfile, charmap, NULL);
2428                   /* This should be a string or an identifier.  In any
2429                      case something to name a locale.  */
2430                   if (now->tok != tok_string && now->tok != tok_ident)
2431                     {
2432                     translit_syntax:
2433                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2434                       lr_ignore_rest (ldfile, 0);
2435                       continue;
2436                     }
2437                   locale_name = now->val.str.startmb;
2438
2439                   /* Next should be a semicolon.  */
2440                   now = lr_token (ldfile, charmap, NULL);
2441                   if (now->tok != tok_semicolon)
2442                     goto translit_syntax;
2443
2444                   /* Now the repertoire name.  */
2445                   now = lr_token (ldfile, charmap, NULL);
2446                   if ((now->tok != tok_string && now->tok != tok_ident)
2447                       || now->val.str.startmb == NULL)
2448                     goto translit_syntax;
2449                   repertoire_name = now->val.str.startmb;
2450
2451                   /* We must not have more than one `include'.  */
2452                   if (ctype->translit_copy_locale != NULL)
2453                     {
2454                       lr_error (ldfile, _("\
2455 %s: only one `include' instruction allowed"), "LC_CTYPE");
2456                       lr_ignore_rest (ldfile, 0);
2457                       continue;
2458                     }
2459
2460                   ctype->translit_copy_locale = locale_name;
2461                   ctype->translit_copy_repertoire = repertoire_name;
2462
2463                   /* The rest of the line must be empty.  */
2464                   lr_ignore_rest (ldfile, 1);
2465
2466                   /* Make sure the locale is read.  */
2467                   add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
2468                                    repertoire_name, 1);
2469                   continue;
2470                 }
2471               else if (now->tok == tok_default_missing)
2472                 {
2473                   uint32_t *wstr;
2474
2475                   /* We expect a single character or string as the
2476                      argument.  */
2477                   now = lr_token (ldfile, charmap, NULL);
2478                   wstr = read_widestring (ldfile, now, charmap, repertoire);
2479
2480                   if (wstr != NULL)
2481                     {
2482                       if (ctype->default_missing != NULL)
2483                         {
2484                           lr_error (ldfile, _("\
2485 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2486                           error_at_line (0, 0, ctype->default_missing_file,
2487                                          ctype->default_missing_lineno,
2488                                          _("previous definition was here"));
2489                         }
2490                       else
2491                         {
2492                           ctype->default_missing = wstr;
2493                           ctype->default_missing_file = ldfile->fname;
2494                           ctype->default_missing_lineno = ldfile->lineno;
2495                         }
2496                     }
2497                   lr_ignore_rest (ldfile, 1);
2498                   continue;
2499                 }
2500               else if (now->tok == tok_translit_ignore)
2501                 {
2502                   read_translit_ignore_entry (ldfile, ctype, charmap,
2503                                               repertoire);
2504                   continue;
2505                 }
2506
2507               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2508             }
2509           break;
2510
2511         case tok_ident:
2512           /* Ignore the rest of the line if we don't need the input of
2513              this line.  */
2514           if (ignore_content)
2515             {
2516               lr_ignore_rest (ldfile, 0);
2517               break;
2518             }
2519
2520           /* This could mean one of several things.  First test whether
2521              it's a character class name.  */
2522           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2523             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2524               break;
2525           if (cnt < ctype->nr_charclass)
2526             {
2527               class_bit = _ISwbit (cnt);
2528               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2529               free (now->val.str.startmb);
2530               goto read_charclass;
2531             }
2532           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2533             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2534               break;
2535           if (cnt < ctype->map_collection_nr)
2536             {
2537               mapidx = cnt;
2538               free (now->val.str.startmb);
2539               goto read_mapping;
2540             }
2541 #ifdef PREDEFINED_CLASSES
2542           if (strcmp (now->val.str.startmb, "special1") == 0)
2543             {
2544               class_bit = _ISwspecial1;
2545               free (now->val.str.startmb);
2546               goto read_charclass;
2547             }
2548           if (strcmp (now->val.str.startmb, "special2") == 0)
2549             {
2550               class_bit = _ISwspecial2;
2551               free (now->val.str.startmb);
2552               goto read_charclass;
2553             }
2554           if (strcmp (now->val.str.startmb, "special3") == 0)
2555             {
2556               class_bit = _ISwspecial3;
2557               free (now->val.str.startmb);
2558               goto read_charclass;
2559             }
2560           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2561             {
2562               mapidx = 2;
2563               goto read_mapping;
2564             }
2565 #endif
2566           break;
2567
2568         case tok_end:
2569           /* Next we assume `LC_CTYPE'.  */
2570           now = lr_token (ldfile, charmap, NULL);
2571           if (now->tok == tok_eof)
2572             break;
2573           if (now->tok == tok_eol)
2574             lr_error (ldfile, _("%s: incomplete `END' line"),
2575                       "LC_CTYPE");
2576           else if (now->tok != tok_lc_ctype)
2577             lr_error (ldfile, _("\
2578 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2579           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2580           return;
2581
2582         default:
2583         err_label:
2584           if (now->tok != tok_eof)
2585             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2586         }
2587
2588       /* Prepare for the next round.  */
2589       now = lr_token (ldfile, charmap, NULL);
2590       nowtok = now->tok;
2591     }
2592
2593   /* When we come here we reached the end of the file.  */
2594   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2595 }
2596
2597
2598 static void
2599 set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2600                     struct repertoire_t *repertoire)
2601 {
2602   size_t cnt;
2603
2604   /* These function defines the default values for the classes and conversions
2605      according to POSIX.2 2.5.2.1.
2606      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2607      Don't move them unless you know what you do!  */
2608
2609   void set_default (int bitpos, int from, int to)
2610     {
2611       char tmp[2];
2612       int ch;
2613       int bit = _ISbit (bitpos);
2614       int bitw = _ISwbit (bitpos);
2615       /* Define string.  */
2616       strcpy (tmp, "?");
2617
2618       for (ch = from; ch <= to; ++ch)
2619         {
2620           struct charseq *seq;
2621           tmp[0] = ch;
2622
2623           seq = charmap_find_value (charmap, tmp, 1);
2624           if (seq == NULL)
2625             {
2626               if (!be_quiet)
2627                 error (0, 0, _("\
2628 %s: character `%s' not defined in charmap while needed as default value"),
2629                        "LC_CTYPE", tmp);
2630             }
2631           else if (seq->nbytes != 1)
2632             error (0, 0, _("\
2633 %s: character `%s' in charmap not representable with one byte"),
2634                    "LC_CTYPE", tmp);
2635           else
2636             ctype->class256_collection[seq->bytes[0]] |= bit;
2637
2638           /* No need to search here, the ASCII value is also the Unicode
2639              value.  */
2640           ELEM (ctype, class_collection, , ch) |= bitw;
2641         }
2642     }
2643
2644   /* Set default values if keyword was not present.  */
2645   if ((ctype->class_done & BITw (tok_upper)) == 0)
2646     /* "If this keyword [lower] is not specified, the lowercase letters
2647         `A' through `Z', ..., shall automatically belong to this class,
2648         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2649     set_default (BITPOS (tok_upper), 'A', 'Z');
2650
2651   if ((ctype->class_done & BITw (tok_lower)) == 0)
2652     /* "If this keyword [lower] is not specified, the lowercase letters
2653         `a' through `z', ..., shall automatically belong to this class,
2654         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2655     set_default (BITPOS (tok_lower), 'a', 'z');
2656
2657   if ((ctype->class_done & BITw (tok_alpha)) == 0)
2658     {
2659       /* Table 2-6 in P1003.2 says that characters in class `upper' or
2660          class `lower' *must* be in class `alpha'.  */
2661       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2662       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2663
2664       for (cnt = 0; cnt < 256; ++cnt)
2665         if ((ctype->class256_collection[cnt] & mask) != 0)
2666           ctype->class256_collection[cnt] |= BIT (tok_alpha);
2667
2668       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2669         if ((ctype->class_collection[cnt] & maskw) != 0)
2670           ctype->class_collection[cnt] |= BITw (tok_alpha);
2671     }
2672
2673   if ((ctype->class_done & BITw (tok_digit)) == 0)
2674     /* "If this keyword [digit] is not specified, the digits `0' through
2675         `9', ..., shall automatically belong to this class, with
2676         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2677     set_default (BITPOS (tok_digit), '0', '9');
2678
2679   /* "Only characters specified for the `alpha' and `digit' keyword
2680      shall be specified.  Characters specified for the keyword `alpha'
2681      and `digit' are automatically included in this class.  */
2682   {
2683     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2684     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2685
2686     for (cnt = 0; cnt < 256; ++cnt)
2687       if ((ctype->class256_collection[cnt] & mask) != 0)
2688         ctype->class256_collection[cnt] |= BIT (tok_alnum);
2689
2690     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2691       if ((ctype->class_collection[cnt] & maskw) != 0)
2692         ctype->class_collection[cnt] |= BITw (tok_alnum);
2693   }
2694
2695   if ((ctype->class_done & BITw (tok_space)) == 0)
2696     /* "If this keyword [space] is not specified, the characters <space>,
2697         <form-feed>, <newline>, <carriage-return>, <tab>, and
2698         <vertical-tab>, ..., shall automatically belong to this class,
2699         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2700     {
2701       struct charseq *seq;
2702
2703       seq = charmap_find_value (charmap, "space", 5);
2704       if (seq == NULL)
2705         seq = charmap_find_value (charmap, "SP", 2);
2706       if (seq == NULL)
2707         seq = charmap_find_value (charmap, "U00000020", 9);
2708       if (seq == NULL)
2709         {
2710           if (!be_quiet)
2711             error (0, 0, _("\
2712 %s: character `%s' not defined while needed as default value"),
2713                    "LC_CTYPE", "<space>");
2714         }
2715       else if (seq->nbytes != 1)
2716         error (0, 0, _("\
2717 %s: character `%s' in charmap not representable with one byte"),
2718                "LC_CTYPE", "<space>");
2719       else
2720         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2721
2722       /* No need to search.  */
2723       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2724
2725       seq = charmap_find_value (charmap, "form-feed", 9);
2726       if (seq == NULL)
2727         seq = charmap_find_value (charmap, "U0000000C", 9);
2728       if (seq == NULL)
2729         {
2730           if (!be_quiet)
2731             error (0, 0, _("\
2732 %s: character `%s' not defined while needed as default value"),
2733                    "LC_CTYPE", "<form-feed>");
2734         }
2735       else if (seq->nbytes != 1)
2736         error (0, 0, _("\
2737 %s: character `%s' in charmap not representable with one byte"),
2738                "LC_CTYPE", "<form-feed>");
2739       else
2740         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2741
2742       /* No need to search.  */
2743       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
2744
2745
2746       seq = charmap_find_value (charmap, "newline", 7);
2747       if (seq == NULL)
2748         seq = charmap_find_value (charmap, "U0000000A", 9);
2749       if (seq == NULL)
2750         {
2751           if (!be_quiet)
2752             error (0, 0, _("\
2753 character `%s' not defined while needed as default value"),
2754                    "<newline>");
2755         }
2756       else if (seq->nbytes != 1)
2757         error (0, 0, _("\
2758 %s: character `%s' in charmap not representable with one byte"),
2759                "LC_CTYPE", "<newline>");
2760       else
2761         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2762
2763       /* No need to search.  */
2764       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
2765
2766
2767       seq = charmap_find_value (charmap, "carriage-return", 15);
2768       if (seq == NULL)
2769         seq = charmap_find_value (charmap, "U0000000D", 9);
2770       if (seq == NULL)
2771         {
2772           if (!be_quiet)
2773             error (0, 0, _("\
2774 %s: character `%s' not defined while needed as default value"),
2775                    "LC_CTYPE", "<carriage-return>");
2776         }
2777       else if (seq->nbytes != 1)
2778         error (0, 0, _("\
2779 %s: character `%s' in charmap not representable with one byte"),
2780                "LC_CTYPE", "<carriage-return>");
2781       else
2782         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2783
2784       /* No need to search.  */
2785       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
2786
2787
2788       seq = charmap_find_value (charmap, "tab", 3);
2789       if (seq == NULL)
2790         seq = charmap_find_value (charmap, "U00000009", 9);
2791       if (seq == NULL)
2792         {
2793           if (!be_quiet)
2794             error (0, 0, _("\
2795 %s: character `%s' not defined while needed as default value"),
2796                    "LC_CTYPE", "<tab>");
2797         }
2798       else if (seq->nbytes != 1)
2799         error (0, 0, _("\
2800 %s: character `%s' in charmap not representable with one byte"),
2801                "LC_CTYPE", "<tab>");
2802       else
2803         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2804
2805       /* No need to search.  */
2806       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
2807
2808
2809       seq = charmap_find_value (charmap, "vertical-tab", 12);
2810       if (seq == NULL)
2811         seq = charmap_find_value (charmap, "U0000000B", 9);
2812       if (seq == NULL)
2813         {
2814           if (!be_quiet)
2815             error (0, 0, _("\
2816 %s: character `%s' not defined while needed as default value"),
2817                    "LC_CTYPE", "<vertical-tab>");
2818         }
2819       else if (seq->nbytes != 1)
2820         error (0, 0, _("\
2821 %s: character `%s' in charmap not representable with one byte"),
2822                "LC_CTYPE", "<vertical-tab>");
2823       else
2824         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2825
2826       /* No need to search.  */
2827       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
2828     }
2829
2830   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
2831     /* "If this keyword is not specified, the digits `0' to `9', the
2832         uppercase letters `A' through `F', and the lowercase letters `a'
2833         through `f', ..., shell automatically belong to this class, with
2834         implementation defined character values."  [P1003.2, 2.5.2.1]  */
2835     {
2836       set_default (BITPOS (tok_xdigit), '0', '9');
2837       set_default (BITPOS (tok_xdigit), 'A', 'F');
2838       set_default (BITPOS (tok_xdigit), 'a', 'f');
2839     }
2840
2841   if ((ctype->class_done & BITw (tok_blank)) == 0)
2842     /* "If this keyword [blank] is unspecified, the characters <space> and
2843        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
2844    {
2845       struct charseq *seq;
2846
2847       seq = charmap_find_value (charmap, "space", 5);
2848       if (seq == NULL)
2849         seq = charmap_find_value (charmap, "SP", 2);
2850       if (seq == NULL)
2851         seq = charmap_find_value (charmap, "U00000020", 9);
2852       if (seq == NULL)
2853         {
2854           if (!be_quiet)
2855             error (0, 0, _("\
2856 %s: character `%s' not defined while needed as default value"),
2857                    "LC_CTYPE", "<space>");
2858         }
2859       else if (seq->nbytes != 1)
2860         error (0, 0, _("\
2861 %s: character `%s' in charmap not representable with one byte"),
2862                "LC_CTYPE", "<space>");
2863       else
2864         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2865
2866       /* No need to search.  */
2867       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
2868
2869
2870       seq = charmap_find_value (charmap, "tab", 3);
2871       if (seq == NULL)
2872         seq = charmap_find_value (charmap, "U00000009", 9);
2873       if (seq == NULL)
2874         {
2875           if (!be_quiet)
2876             error (0, 0, _("\
2877 %s: character `%s' not defined while needed as default value"),
2878                    "LC_CTYPE", "<tab>");
2879         }
2880       else if (seq->nbytes != 1)
2881         error (0, 0, _("\
2882 %s: character `%s' in charmap not representable with one byte"),
2883                "LC_CTYPE", "<tab>");
2884       else
2885         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2886
2887       /* No need to search.  */
2888       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
2889     }
2890
2891   if ((ctype->class_done & BITw (tok_graph)) == 0)
2892     /* "If this keyword [graph] is not specified, characters specified for
2893         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
2894         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
2895     {
2896       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2897         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2898       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
2899         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
2900         BITw (tok_punct);
2901       size_t cnt;
2902
2903       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2904         if ((ctype->class_collection[cnt] & maskw) != 0)
2905           ctype->class_collection[cnt] |= BITw (tok_graph);
2906
2907       for (cnt = 0; cnt < 256; ++cnt)
2908         if ((ctype->class256_collection[cnt] & mask) != 0)
2909           ctype->class256_collection[cnt] |= BIT (tok_graph);
2910     }
2911
2912   if ((ctype->class_done & BITw (tok_print)) == 0)
2913     /* "If this keyword [print] is not provided, characters specified for
2914         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
2915         and the <space> character shall belong to this character class."
2916         [P1003.2, 2.5.2.1]  */
2917     {
2918       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2919         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2920       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
2921         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
2922         BITw (tok_punct);
2923       size_t cnt;
2924       struct charseq *seq;
2925
2926       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2927         if ((ctype->class_collection[cnt] & maskw) != 0)
2928           ctype->class_collection[cnt] |= BITw (tok_print);
2929
2930       for (cnt = 0; cnt < 256; ++cnt)
2931         if ((ctype->class256_collection[cnt] & mask) != 0)
2932           ctype->class256_collection[cnt] |= BIT (tok_print);
2933
2934
2935       seq = charmap_find_value (charmap, "space", 5);
2936       if (seq == NULL)
2937         seq = charmap_find_value (charmap, "SP", 2);
2938       if (seq == NULL)
2939         seq = charmap_find_value (charmap, "U00000020", 9);
2940       if (seq == NULL)
2941         {
2942           if (!be_quiet)
2943             error (0, 0, _("\
2944 %s: character `%s' not defined while needed as default value"),
2945                    "LC_CTYPE", "<space>");
2946         }
2947       else if (seq->nbytes != 1)
2948         error (0, 0, _("\
2949 %s: character `%s' in charmap not representable with one byte"),
2950                "LC_CTYPE", "<space>");
2951       else
2952         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
2953
2954       /* No need to search.  */
2955       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
2956     }
2957
2958   if (ctype->tomap_done[0] == 0)
2959     /* "If this keyword [toupper] is not specified, the lowercase letters
2960         `a' through `z', and their corresponding uppercase letters `A' to
2961         `Z', ..., shall automatically be included, with implementation-
2962         defined character values."  [P1003.2, 2.5.2.1]  */
2963     {
2964       char tmp[4];
2965       int ch;
2966
2967       strcpy (tmp, "<?>");
2968
2969       for (ch = 'a'; ch <= 'z'; ++ch)
2970         {
2971           struct charseq *seq_from, *seq_to;
2972
2973           tmp[1] = (char) ch;
2974
2975           seq_from = charmap_find_value (charmap, &tmp[1], 1);
2976           if (seq_from == NULL)
2977             {
2978               if (!be_quiet)
2979                 error (0, 0, _("\
2980 %s: character `%s' not defined while needed as default value"),
2981                        "LC_CTYPE", tmp);
2982             }
2983           else if (seq_from->nbytes != 1)
2984             {
2985               if (!be_quiet)
2986                 error (0, 0, _("\
2987 %s: character `%s' needed as default value not representable with one byte"),
2988                        "LC_CTYPE", tmp);
2989             }
2990           else
2991             {
2992               /* This conversion is implementation defined.  */
2993               tmp[1] = (char) (ch + ('A' - 'a'));
2994               seq_to = charmap_find_value (charmap, &tmp[1], 1);
2995               if (seq_to == NULL)
2996                 {
2997                   if (!be_quiet)
2998                     error (0, 0, _("\
2999 %s: character `%s' not defined while needed as default value"),
3000                            "LC_CTYPE", tmp);
3001                 }
3002               else if (seq_to->nbytes != 1)
3003                 {
3004                   if (!be_quiet)
3005                     error (0, 0, _("\
3006 %s: character `%s' needed as default value not representable with one byte"),
3007                            "LC_CTYPE", tmp);
3008                 }
3009               else
3010                 /* The index [0] is determined by the order of the
3011                    `ctype_map_newP' calls in `ctype_startup'.  */
3012                 ctype->map256_collection[0][seq_from->bytes[0]]
3013                   = seq_to->bytes[0];
3014             }
3015
3016           /* No need to search.  */
3017           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3018         }
3019     }
3020
3021   if (ctype->tomap_done[1] == 0)
3022     /* "If this keyword [tolower] is not specified, the mapping shall be
3023        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3024     {
3025       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3026         if (ctype->map_collection[0][cnt] != 0)
3027           ELEM (ctype, map_collection, [1],
3028                 ctype->map_collection[0][cnt])
3029             = ctype->charnames[cnt];
3030
3031       for (cnt = 0; cnt < 256; ++cnt)
3032         if (ctype->map256_collection[0][cnt] != 0)
3033           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3034     }
3035
3036   if (ctype->outdigits_act == 0)
3037     {
3038       for (cnt = 0; cnt < 10; ++cnt)
3039         {
3040           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3041                                                          digits + cnt, 1);
3042
3043           if (ctype->mboutdigits[cnt] == NULL)
3044             {
3045               ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3046                                                              longnames[cnt],
3047                                                              strlen (longnames[cnt]));
3048
3049               if (ctype->mboutdigits[cnt] == NULL)
3050                 {
3051                   /* Provide a replacement.  */
3052                   error (0, 0, _("\
3053 no output digits defined and none of the standard names in the charmap"));
3054
3055                   ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
3056                                                            sizeof (struct charseq) + 1);
3057
3058                   /* This is better than nothing.  */
3059                   ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3060                   ctype->mboutdigits[cnt]->nbytes = 1;
3061                 }
3062             }
3063
3064           ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
3065                                                            digits + cnt, 1);
3066
3067           if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
3068             {
3069               ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
3070                                                                longnames[cnt],
3071                                                                strlen (longnames[cnt]));
3072
3073               if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
3074                 {
3075                   /* Provide a replacement.  */
3076                   error (0, 0, _("\
3077 no output digits defined and none of the standard names in the repertoire"));
3078
3079                   /* This is better than nothing.  */
3080                   ctype->wcoutdigits[cnt] = (uint32_t) digits[cnt];
3081                 }
3082             }
3083         }
3084
3085       ctype->outdigits_act = 10;
3086     }
3087 }
3088
3089
3090 static void
3091 allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
3092                  struct repertoire_t *repertoire)
3093 {
3094   size_t idx;
3095   size_t width_table_size;
3096
3097   /* First we have to decide how we organize the arrays.  It is easy
3098      for a one-byte character set.  But multi-byte character set
3099      cannot be stored flat because the chars might be sparsely used.
3100      So we determine an optimal hashing function for the used
3101      characters.
3102
3103      We use a very trivial hashing function to store the sparse
3104      table.  CH % TABSIZE is used as an index.  To solve multiple hits
3105      we have N planes.  This guarantees a fixed search time for a
3106      character [N / 2].  In the following code we determine the minimum
3107      value for TABSIZE * N, where TABSIZE >= 256.
3108
3109      Some people complained that this algorithm takes too long.  Well,
3110      go on, improve it.  But changing the step size is *not* an
3111      option.  Some people changed this to use only sizes of prime
3112      numbers.  Think again, do some math.  We are looking for the
3113      optimal solution, not something which works in general.  Unless
3114      somebody can provide a dynamic programming solution I think this
3115      implementation is as good as it can get.  */
3116   size_t min_total = UINT_MAX;
3117   size_t act_size = 256;
3118
3119   if (!be_quiet && ctype->charnames_act > 512)
3120     fputs (_("\
3121 Computing table size for character classes might take a while..."),
3122            stderr);
3123
3124   /* While we want to have a small total size we are willing to use a
3125      little bit larger table if this reduces the number of layers.
3126      Therefore we add a little penalty to the number of planes.
3127      Maybe this constant has to be adjusted a bit.  */
3128 #define PENALTY 128
3129   do
3130     {
3131       size_t cnt[act_size];
3132       size_t act_planes = 1;
3133
3134       memset (cnt, '\0', sizeof cnt);
3135
3136       for (idx = 0; idx < 256; ++idx)
3137         cnt[idx] = 1;
3138
3139       for (idx = 0; idx < ctype->charnames_act; ++idx)
3140         if (ctype->charnames[idx] >= 256)
3141           {
3142             size_t nr = ctype->charnames[idx] % act_size;
3143
3144             if (++cnt[nr] > act_planes)
3145               {
3146                 act_planes = cnt[nr];
3147                 if ((act_size + PENALTY) * act_planes >= min_total)
3148                   break;
3149               }
3150           }
3151
3152       if ((act_size + PENALTY) * act_planes < min_total)
3153         {
3154           min_total = (act_size + PENALTY) * act_planes;
3155           ctype->plane_size = act_size;
3156           ctype->plane_cnt = act_planes;
3157         }
3158
3159       ++act_size;
3160     }
3161   while (act_size < min_total);
3162
3163   if (!be_quiet && ctype->charnames_act > 512)
3164     fputs (_(" done\n"), stderr);
3165
3166
3167   ctype->names = (uint32_t *) xcalloc (ctype->plane_size
3168                                        * ctype->plane_cnt,
3169                                        sizeof (uint32_t));
3170
3171   for (idx = 1; idx < 256; ++idx)
3172     ctype->names[idx] = idx;
3173
3174   /* Trick: change the 0th entry's name to 1 to mark the cell occupied.  */
3175   ctype->names[0] = 1;
3176
3177   for (idx = 256; idx < ctype->charnames_act; ++idx)
3178     {
3179       size_t nr = (ctype->charnames[idx] % ctype->plane_size);
3180       size_t depth = 0;
3181
3182       while (ctype->names[nr + depth * ctype->plane_size])
3183         ++depth;
3184       assert (depth < ctype->plane_cnt);
3185
3186       ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
3187
3188       /* Now for faster access remember the index in the NAMES_B array.  */
3189       ctype->charnames[idx] = nr + depth * ctype->plane_size;
3190     }
3191   ctype->names[0] = 0;
3192
3193
3194   /* You wonder about this amount of memory?  This is only because some
3195      users do not manage to address the array with unsigned values or
3196      data types with range >= 256.  '\200' would result in the array
3197      index -128.  To help these poor people we duplicate the entries for
3198      128 up to 255 below the entry for \0.  */
3199   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
3200                                              sizeof (char_class_t));
3201   ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
3202                                                  * ctype->plane_cnt,
3203                                                  sizeof (char_class32_t));
3204
3205   /* This is the array accessed using the multibyte string elements.  */
3206   for (idx = 0; idx < 256; ++idx)
3207     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3208
3209   /* Mirror first 127 entries.  We must take care that entry -1 is not
3210      mirrored because EOF == -1.  */
3211   for (idx = 0; idx < 127; ++idx)
3212     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3213
3214   /* The 32 bit array contains all characters.  */
3215   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3216     ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3217
3218   /* Room for table of mappings.  */
3219   ctype->map = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3220   ctype->map32 = (uint32_t **) xmalloc (ctype->map_collection_nr
3221                                       * sizeof (uint32_t *));
3222
3223   /* Fill in all mappings.  */
3224   for (idx = 0; idx < 2; ++idx)
3225     {
3226       unsigned int idx2;
3227
3228       /* Allocate table.  */
3229       ctype->map[idx] = (uint32_t *) xmalloc ((256 + 128) * sizeof (uint32_t));
3230
3231       /* Copy values from collection.  */
3232       for (idx2 = 0; idx2 < 256; ++idx2)
3233         ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3234
3235       /* Mirror first 127 entries.  We must take care not to map entry
3236          -1 because EOF == -1.  */
3237       for (idx2 = 0; idx2 < 127; ++idx2)
3238         ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
3239
3240       /* EOF must map to EOF.  */
3241       ctype->map[idx][127] = EOF;
3242     }
3243
3244   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3245     {
3246       unsigned int idx2;
3247
3248       /* Allocate table.  */
3249       ctype->map32[idx] = (uint32_t *) xmalloc (ctype->plane_size
3250                                                 * ctype->plane_cnt
3251                                                 * sizeof (uint32_t));
3252
3253       /* Copy default value (identity mapping).  */
3254       memcpy (ctype->map32[idx], ctype->names,
3255               ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
3256
3257       /* Copy values from collection.  */
3258       for (idx2 = 0; idx2 < 256; ++idx2)
3259         if (ctype->map_collection[idx][idx2] != 0)
3260           ctype->map32[idx][idx2] = ctype->map_collection[idx][idx2];
3261
3262       while (idx2 < ctype->map_collection_act[idx])
3263         {
3264           if (ctype->map_collection[idx][idx2] != 0)
3265             ctype->map32[idx][ctype->charnames[idx2]] =
3266               ctype->map_collection[idx][idx2];
3267           ++idx2;
3268         }
3269     }
3270
3271   /* Extra array for class and map names.  */
3272   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3273                                                 * sizeof (uint32_t));
3274   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3275                                               * sizeof (uint32_t));
3276
3277   /* Array for width information.  Because the expected width are very
3278      small we use only one single byte.  This save space and we need
3279      not provide the information twice with both endianesses.  */
3280   width_table_size = (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul;
3281   ctype->width = (unsigned char *) xmalloc (width_table_size);
3282
3283   /* Initialize with default width value.  */
3284   memset (ctype->width, charmap->width_default, width_table_size);
3285   if (charmap->width_rules != NULL)
3286     {
3287       size_t cnt;
3288
3289       for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3290         {
3291           unsigned char bytes[charmap->mb_cur_max];
3292           int nbytes = charmap->width_rules[cnt].from->nbytes;
3293
3294           /* We have the range of character for which the width is
3295              specified described using byte sequences of the multibyte
3296              charset.  We have to convert this to UCS4 now.  And we
3297              cannot simply convert the beginning and the end of the
3298              sequence, we have to iterate over the byte sequence and
3299              convert it for every single character.  */
3300           memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3301
3302           while (nbytes < charmap->width_rules[cnt].to->nbytes
3303                  || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3304                             nbytes) <= 0)
3305             {
3306               /* Find the UCS value for `bytes'.  */
3307               int inner;
3308               uint32_t wch;
3309               struct charseq *seq =
3310                 charmap_find_symbol (charmap, bytes, nbytes);
3311
3312               if (seq == NULL)
3313                 wch = ILLEGAL_CHAR_VALUE;
3314               else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3315                 wch = seq->ucs4;
3316               else
3317                 wch = repertoire_find_value (ctype->repertoire, seq->name,
3318                                              strlen (seq->name));
3319
3320               if (wch != ILLEGAL_CHAR_VALUE)
3321                 {
3322                   /* Store the value.  */
3323                   size_t nr = wch % ctype->plane_size;
3324                   size_t depth = 0;
3325
3326                   while (ctype->names[nr + depth * ctype->plane_size] != wch)
3327                     ++depth;
3328                   assert (depth < ctype->plane_cnt);
3329
3330                   ctype->width[nr + depth * ctype->plane_size]
3331                     = charmap->width_rules[cnt].width;
3332                 }
3333
3334               /* "Increment" the bytes sequence.  */
3335               inner = nbytes - 1;
3336               while (inner >= 0 && bytes[inner] == 0xff)
3337                 --inner;
3338
3339               if (inner < 0)
3340                 {
3341                   /* We have to extend the byte sequence.  */
3342                   if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3343                     break;
3344
3345                   bytes[0] = 1;
3346                   memset (&bytes[1], 0, nbytes);
3347                   ++nbytes;
3348                 }
3349               else
3350                 {
3351                   ++bytes[inner];
3352                   while (++inner < nbytes)
3353                     bytes[inner] = 0;
3354                 }
3355             }
3356         }
3357     }
3358
3359   /* Set MB_CUR_MAX.  */
3360   ctype->mb_cur_max = charmap->mb_cur_max;
3361
3362   /* Now determine the table for the transliteration information.
3363
3364      XXX It is not yet clear to me whether it is worth implementing a
3365      complicated algorithm which uses a hash table to locate the entries.
3366      For now I'll use a simple array which can be searching using binary
3367      search.  */
3368   if (ctype->translit_copy_locale != NULL)
3369     {
3370       /* Fold in the transliteration information from the locale mentioned
3371          in the `include' statement.  */
3372       struct locale_ctype_t *here = ctype;
3373
3374       do
3375         {
3376           struct localedef_t *other = find_locale (LC_CTYPE,
3377                                                    here->translit_copy_locale,
3378                                                    repertoire->name, charmap);
3379
3380           if (other == NULL)
3381             {
3382               error (0, 0, _("\
3383 %s: transliteration data from locale `%s' not available"),
3384                      "LC_CTYPE", here->translit_copy_locale);
3385               break;
3386             }
3387
3388           here = other->categories[LC_CTYPE].ctype;
3389
3390           /* Enqueue the information if necessary.  */
3391           if (here->translit != NULL)
3392             {
3393               struct translit_t *endp = here->translit;
3394               while (endp->next != NULL)
3395                 endp = endp->next;
3396
3397               endp->next = ctype->translit;
3398               ctype->translit = here->translit;
3399             }
3400         }
3401       while (here->translit_copy_locale != NULL);
3402     }
3403
3404   if (ctype->translit != NULL)
3405     {
3406       /* First count how many entries we have.  This is the upper limit
3407          since some entries from the included files might be overwritten.  */
3408       size_t number = 0;
3409       size_t cnt;
3410       struct translit_t *runp = ctype->translit;
3411       struct translit_t **sorted;
3412       size_t from_len, to_len;
3413
3414       while (runp != NULL)
3415         {
3416           ++number;
3417           runp = runp->next;
3418         }
3419
3420       /* Next we allocate an array large enough and fill in the values.  */
3421       sorted = (struct translit_t **) alloca (number
3422                                               * sizeof (struct translit_t **));
3423       runp = ctype->translit;
3424       number = 0;
3425       do
3426         {
3427           /* Search for the place where to insert this string.
3428              XXX Better use a real sorting algorithm later.  */
3429           size_t idx = 0;
3430           int replace = 0;
3431
3432           while (idx < number)
3433             {
3434               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3435                                 (const wchar_t *) runp->from);
3436               if (res == 0)
3437                 {
3438                   replace = 1;
3439                   break;
3440                 }
3441               if (res > 0)
3442                 break;
3443               ++idx;
3444             }
3445
3446           if (replace)
3447             sorted[idx] = runp;
3448           else
3449             {
3450               memmove (&sorted[idx + 1], &sorted[idx],
3451                        (number - idx) * sizeof (struct translit_t *));
3452               sorted[idx] = runp;
3453               ++number;
3454             }
3455
3456           runp = runp->next;
3457         }
3458       while (runp != NULL);
3459
3460       /* The next step is putting all the possible transliteration
3461          strings in one memory block so that we can write it out.
3462          We need several different blocks:
3463          - index to the from-string array
3464          - from-string array
3465          - index to the to-string array
3466          - to-string array.
3467       */
3468       from_len = to_len = 0;
3469       for (cnt = 0; cnt < number; ++cnt)
3470         {
3471           struct translit_to_t *srunp;
3472           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3473           srunp = sorted[cnt]->to;
3474           while (srunp != NULL)
3475             {
3476               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3477               srunp = srunp->next;
3478             }
3479           /* Plus one for the extra NUL character marking the end of
3480              the list for the current entry.  */
3481           ++to_len;
3482         }
3483
3484       /* We can allocate the arrays for the results.  */
3485       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3486       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3487       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3488       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3489
3490       from_len = 0;
3491       to_len = 0;
3492       for (cnt = 0; cnt < number; ++cnt)
3493         {
3494           size_t len;
3495           struct translit_to_t *srunp;
3496
3497           ctype->translit_from_idx[cnt] = from_len;
3498           ctype->translit_to_idx[cnt] = to_len;
3499
3500           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3501           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3502                    (const wchar_t *) sorted[cnt]->from, len);
3503           from_len += len;
3504
3505           ctype->translit_to_idx[cnt] = to_len;
3506           srunp = sorted[cnt]->to;
3507           while (srunp != NULL)
3508             {
3509               len = wcslen ((const wchar_t *) srunp->str) + 1;
3510               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3511                        (const wchar_t *) srunp->str, len);
3512               to_len += len;
3513               srunp = srunp->next;
3514             }
3515           ctype->translit_to_tbl[to_len++] = L'\0';
3516         }
3517
3518       /* Store the information about the length.  */
3519       ctype->translit_idx_size = number * sizeof (uint32_t);
3520       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3521       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3522     }
3523   else
3524     {
3525       /* Provide some dummy pointers since we have nothing to write out.  */
3526       static uint32_t no_str = { 0 };
3527
3528       ctype->translit_from_idx = &no_str;
3529       ctype->translit_from_tbl = &no_str;
3530       ctype->translit_to_tbl = &no_str;
3531       ctype->translit_idx_size = 0;
3532       ctype->translit_from_tbl_size = 0;
3533       ctype->translit_to_tbl_size = 0;
3534     }
3535 }