locale/programs/ld-collate.c

   1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <errno.h>
  25 #include <error.h>
  26 #include <stdlib.h>
  27 #include <wchar.h>
  28
  29 #include "charmap.h"
  30 #include "localeinfo.h"
  31 #include "linereader.h"
  32 #include "locfile.h"
  33 #include "localedef.h"
  34
  35 /* Uncomment the following line in the production version.  */
  36 /* #define NDEBUG 1 */
  37 #include <assert.h>
  38
  39 #define obstack_chunk_alloc malloc
  40 #define obstack_chunk_free free
  41
  42 /* Forward declaration.  */
  43 struct element_t;
  44
  45 /* Data type for list of strings.  */
  46 struct section_list
  47 {
  48   struct section_list *next;
  49   /* Name of the section.  */
  50   const char *name;
  51   /* First element of this section.  */
  52   struct element_t *first;
  53   /* Last element of this section.  */
  54   struct element_t *last;
  55   /* These are the rules for this section.  */
  56   enum coll_sort_rule *rules;
  57 };
  58
  59 struct element_t;
  60
  61 struct element_list_t
  62 {
  63   /* Number of elements.  */
  64   int cnt;
  65
  66   struct element_t **w;
  67 };
  68
  69 /* Data type for collating element.  */
  70 struct element_t
  71 {
  72   const char *name;
  73
  74   const char *mbs;
  75   const uint32_t *wcs;
  76   int mborder;
  77   int wcorder;
  78
  79   struct element_list_t *weights;
  80
  81   /* Where does the definition come from.  */
  82   const char *file;
  83   size_t line;
  84
  85   /* Which section does this belong to.  */
  86   struct section_list *section;
  87
  88   /* Predecessor and successor in the order list.  */
  89   struct element_t *last;
  90   struct element_t *next;
  91
  92   /* Next element in multibyte output list.  */
  93   struct element_t *mbnext;
  94 };
  95
  96 /* Special element value.  */
  97 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
  98 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
  99 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 100
 101 /* Data type for collating symbol.  */
 102 struct symbol_t
 103 {
 104   /* Point to place in the order list.  */
 105   struct element_t *order;
 106
 107   /* Where does the definition come from.  */
 108   const char *file;
 109   size_t line;
 110 };
 111
 112
 113 /* The real definition of the struct for the LC_COLLATE locale.  */
 114 struct locale_collate_t
 115 {
 116   int col_weight_max;
 117   int cur_weight_max;
 118
 119   /* List of known scripts.  */
 120   struct section_list *sections;
 121   /* Current section using definition.  */
 122   struct section_list *current_section;
 123   /* There always can be an unnamed section.  */
 124   struct section_list unnamed_section;
 125   /* To make handling of errors easier we have another section.  */
 126   struct section_list error_section;
 127
 128   /* Number of sorting rules given in order_start line.  */
 129   uint32_t nrules;
 130
 131   /* Start of the order list.  */
 132   struct element_t *start;
 133
 134   /* The undefined element.  */
 135   struct element_t undefined;
 136
 137   /* This is the cursor for `reorder_after' insertions.  */
 138   struct element_t *cursor;
 139
 140   /* This value is used when handling ellipsis.  */
 141   struct element_t ellipsis_weight;
 142
 143   /* Known collating elements.  */
 144   hash_table elem_table;
 145
 146   /* Known collating symbols.  */
 147   hash_table sym_table;
 148
 149   /* Known collation sequences.  */
 150   hash_table seq_table;
 151
 152   struct obstack mempool;
 153
 154   /* The LC_COLLATE category is a bit special as it is sometimes possible
 155      that the definitions from more than one input file contains information.
 156      Therefore we keep all relevant input in a list.  */
 157   struct locale_collate_t *next;
 158
 159   /* Arrays with heads of the list for each of the leading bytes in
 160      the multibyte sequences.  */
 161   struct element_t *mbheads[256];
 162 };
 163
 164
 165 /* We have a few global variables which are used for reading all
 166    LC_COLLATE category descriptions in all files.  */
 167 static int nrules;
 168
 169
 170 static struct section_list *
 171 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 172                    struct section_list *next)
 173 {
 174   struct section_list *newp;
 175
 176   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 177                                                 sizeof (*newp));
 178   newp->next = next;
 179   newp->name = string;
 180   newp->first = NULL;
 181
 182   return newp;
 183 }
 184
 185
 186 static struct element_t *
 187 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 188              const uint32_t *wcs, const char *name, size_t namelen)
 189 {
 190   struct element_t *newp;
 191
 192   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 193                                              sizeof (*newp));
 194   newp->name = name == NULL ? NULL : obstack_copy (&collate->mempool,
 195                                                    name, namelen);
 196   if (mbs != NULL)
 197     newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 198   else
 199     newp->mbs = NULL;
 200   if (wcs != NULL)
 201     {
 202       size_t nwcs = wcslen ((wchar_t *) wcs) + 1;
 203       uint32_t zero = 0;
 204       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 205       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 206       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 207     }
 208   else
 209     newp->wcs = NULL;
 210   newp->mborder = 0;
 211   newp->wcorder = 0;
 212
 213   /* Will be allocated later.  */
 214   newp->weights = NULL;
 215
 216   newp->file = NULL;
 217   newp->line = 0;
 218
 219   newp->section = NULL;
 220
 221   newp->last = NULL;
 222   newp->next = NULL;
 223
 224   newp->mbnext = NULL;
 225
 226   return newp;
 227 }
 228
 229
 230 static struct symbol_t *
 231 new_symbol (struct locale_collate_t *collate)
 232 {
 233   struct symbol_t *newp;
 234
 235   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 236
 237   newp->order = NULL;
 238
 239   newp->file = NULL;
 240   newp->line = 0;
 241
 242   return newp;
 243 }
 244
 245
 246 /* Test whether this name is already defined somewhere.  */
 247 static int
 248 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 249                  struct charmap_t *charmap, struct repertoire_t *repertoire,
 250                  const char *symbol, size_t symbol_len)
 251 {
 252   void *ignore = NULL;
 253
 254   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 255     {
 256       lr_error (ldfile, _("`%s' already defined in charmap"), symbol);
 257       return 1;
 258     }
 259
 260   if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0)
 261     {
 262       lr_error (ldfile, _("`%s' already defined in repertoire"), symbol);
 263       return 1;
 264     }
 265
 266   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 267     {
 268       lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol);
 269       return 1;
 270     }
 271
 272   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 273     {
 274       lr_error (ldfile, _("`%s' already defined as collating element"),
 275                 symbol);
 276       return 1;
 277     }
 278
 279   return 0;
 280 }
 281
 282
 283 /* Read the direction specification.  */
 284 static void
 285 read_directions (struct linereader *ldfile, struct token *arg,
 286                  struct charmap_t *charmap, struct repertoire_t *repertoire,
 287                  struct locale_collate_t *collate)
 288 {
 289   int cnt = 0;
 290   int max = nrules ?: 10;
 291   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 292   int warned = 0;
 293
 294   while (1)
 295     {
 296       int valid = 0;
 297
 298       if (arg->tok == tok_forward)
 299         {
 300           if (rules[cnt] & sort_backward)
 301             {
 302               if (! warned)
 303                 {
 304                   lr_error (ldfile, _("\
 305 %s: `forward' and `backward' are mutually excluding each other"),
 306                             "LC_COLLATE");
 307                   warned = 1;
 308                 }
 309             }
 310           else if (rules[cnt] & sort_forward)
 311             {
 312               if (! warned)
 313                 {
 314                   lr_error (ldfile, _("\
 315 %s: `%s' mentioned twice in definition of weight %d"),
 316                             "LC_COLLATE", "forward", cnt + 1);
 317                 }
 318             }
 319           else
 320             rules[cnt] |= sort_forward;
 321
 322           valid = 1;
 323         }
 324       else if (arg->tok == tok_backward)
 325         {
 326           if (rules[cnt] & sort_forward)
 327             {
 328               if (! warned)
 329                 {
 330                   lr_error (ldfile, _("\
 331 %s: `forward' and `backward' are mutually excluding each other"),
 332                             "LC_COLLATE");
 333                   warned = 1;
 334                 }
 335             }
 336           else if (rules[cnt] & sort_backward)
 337             {
 338               if (! warned)
 339                 {
 340                   lr_error (ldfile, _("\
 341 %s: `%s' mentioned twice in definition of weight %d"),
 342                             "LC_COLLATE", "backward", cnt + 1);
 343                 }
 344             }
 345           else
 346             rules[cnt] |= sort_backward;
 347
 348           valid = 1;
 349         }
 350       else if (arg->tok == tok_position)
 351         {
 352           if (rules[cnt] & sort_position)
 353             {
 354               if (! warned)
 355                 {
 356                   lr_error (ldfile, _("\
 357 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
 358                             "LC_COLLATE", "position", cnt + 1);
 359                 }
 360             }
 361           else
 362             rules[cnt] |= sort_position;
 363
 364           valid = 1;
 365         }
 366
 367       if (valid)
 368         arg = lr_token (ldfile, charmap, repertoire);
 369
 370       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 371           || arg->tok == tok_semicolon)
 372         {
 373           if (! valid && ! warned)
 374             {
 375               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 376               warned = 1;
 377             }
 378
 379           /* See whether we have to increment the counter.  */
 380           if (arg->tok != tok_comma && rules[cnt] != 0)
 381             ++cnt;
 382
 383           if (arg->tok == tok_eof || arg->tok == tok_eol)
 384             /* End of line or file, so we exit the loop.  */
 385             break;
 386
 387           if (nrules == 0)
 388             {
 389               /* See whether we have enough room in the array.  */
 390               if (cnt == max)
 391                 {
 392                   max += 10;
 393                   rules = (enum coll_sort_rule *) xrealloc (rules,
 394                                                             max
 395                                                             * sizeof (*rules));
 396                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 397                 }
 398             }
 399           else
 400             {
 401               if (cnt == nrules)
 402                 {
 403                   /* There must not be any more rule.  */
 404                   if (! warned)
 405                     {
 406                       lr_error (ldfile, _("\
 407 %s: too many rules; first entry only had %d"),
 408                                 "LC_COLLATE", nrules);
 409                       warned = 1;
 410                     }
 411
 412                   lr_ignore_rest (ldfile, 0);
 413                   break;
 414                 }
 415             }
 416         }
 417       else
 418         {
 419           if (! warned)
 420             {
 421               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 422               warned = 1;
 423             }
 424         }
 425
 426       arg = lr_token (ldfile, charmap, repertoire);
 427     }
 428
 429   if (nrules == 0)
 430     {
 431       /* Now we know how many rules we have.  */
 432       nrules = cnt;
 433       rules = (enum coll_sort_rule *) xrealloc (rules,
 434                                                 nrules * sizeof (*rules));
 435     }
 436   else
 437     {
 438       if (cnt < nrules)
 439         {
 440           /* Not enough rules in this specification.  */
 441           if (! warned)
 442             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 443
 444           do
 445             rules[cnt] = sort_forward;
 446           while (++cnt < nrules);
 447         }
 448     }
 449
 450   collate->current_section->rules = rules;
 451 }
 452
 453
 454 static struct element_t *
 455 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 456               const char *str, size_t len, uint32_t *wcstr)
 457 {
 458   struct element_t *result = NULL;
 459
 460   /* Search for the entries among the collation sequences already define.  */
 461   if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
 462     {
 463       /* Nope, not define yet.  So we see whether it is a
 464          collation symbol.  */
 465       void *ptr;
 466
 467       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 468         {
 469           /* It's a collation symbol.  */
 470           struct symbol_t *sym = (struct symbol_t *) ptr;
 471           result = sym->order;
 472
 473           if (result == NULL)
 474             result = sym->order = new_element (collate, NULL, 0, NULL,
 475                                                NULL, 0);
 476         }
 477       else if (find_entry (&collate->elem_table, str, len,
 478                            (void **) &result) != 0)
 479         {
 480           /* It's also no collation element.  So it is an character
 481              element defined later.  */
 482           result = new_element (collate, NULL, 0, NULL, str, len);
 483           if (result != NULL)
 484             /* Insert it into the sequence table.  */
 485             insert_entry (&collate->seq_table, str, len, result);
 486         }
 487     }
 488
 489   return result;
 490 }
 491
 492
 493 static void
 494 unlink_element (struct locale_collate_t *collate)
 495 {
 496   if (collate->cursor->next != NULL)
 497     collate->cursor->next->last = collate->cursor->last;
 498   if (collate->cursor->last != NULL)
 499     collate->cursor->last->next = collate->cursor->next;
 500   collate->cursor = collate->cursor->last;
 501 }
 502
 503
 504 static void
 505 insert_weights (struct linereader *ldfile, struct element_t *elem,
 506                 struct charmap_t *charmap, struct repertoire_t *repertoire,
 507                 struct locale_collate_t *collate, enum token_t ellipsis)
 508 {
 509   int weight_cnt;
 510   struct token *arg;
 511
 512   /* Initialize all the fields.  */
 513   elem->file = ldfile->fname;
 514   elem->line = ldfile->lineno;
 515   elem->last = collate->cursor;
 516   elem->next = collate->cursor ? collate->cursor->next : NULL;
 517   if (collate->cursor != NULL)
 518     collate->cursor->next = elem;
 519   elem->weights = (struct element_list_t *)
 520     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 521   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 522
 523   if (collate->current_section->first == NULL)
 524     collate->current_section->first = elem;
 525   if (collate->current_section->last == collate->cursor)
 526     collate->current_section->last = elem;
 527
 528   collate->cursor = elem;
 529
 530   weight_cnt = 0;
 531
 532   arg = lr_token (ldfile, charmap, repertoire);
 533   do
 534     {
 535       if (arg->tok == tok_eof || arg->tok == tok_eol)
 536         break;
 537
 538       if (arg->tok == tok_ignore)
 539         {
 540           /* The weight for this level has to be ignored.  We use the
 541              null pointer to indicate this.  */
 542           elem->weights[weight_cnt].w = (struct element_t **)
 543             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 544           elem->weights[weight_cnt].w[0] = NULL;
 545           elem->weights[weight_cnt].cnt = 1;
 546         }
 547       else if (arg->tok == tok_bsymbol)
 548         {
 549           struct element_t *val = find_element (ldfile, collate,
 550                                                 arg->val.str.startmb,
 551                                                 arg->val.str.lenmb,
 552                                                 arg->val.str.startwc);
 553
 554           if (val == NULL)
 555             break;
 556
 557           elem->weights[weight_cnt].w = (struct element_t **)
 558             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 559           elem->weights[weight_cnt].w[0] = val;
 560           elem->weights[weight_cnt].cnt = 1;
 561         }
 562       else if (arg->tok == tok_string)
 563         {
 564           /* Split the string up in the individual characters and put
 565              the element definitions in the list.  */
 566           const char *cp = arg->val.str.startmb;
 567           int cnt = 0;
 568           struct element_t *charelem;
 569           void *base = obstack_base (&collate->mempool);
 570
 571           if (*cp == '\0')
 572             {
 573               lr_error (ldfile, _("%s: empty weight string not allowed"),
 574                         "LC_COLLATE");
 575               lr_ignore_rest (ldfile, 0);
 576               break;
 577             }
 578
 579           do
 580             {
 581               if (*cp == '<')
 582                 {
 583                   /* Ahh, it's a bsymbol.  That's what we want.  */
 584                   const char *startp = cp;
 585
 586                   while (*++cp != '>')
 587                     {
 588                       if (*cp == ldfile->escape_char)
 589                         ++cp;
 590                       if (*cp == '\0')
 591                         {
 592                           /* It's a syntax error.  */
 593                           obstack_free (&collate->mempool, base);
 594                           goto syntax;
 595                         }
 596                     }
 597
 598                     charelem = find_element (ldfile, collate, startp,
 599                                              cp - startp, NULL);
 600                     ++cp;
 601                 }
 602               else
 603                 {
 604                   /* People really shouldn't use characters directly in
 605                      the string.  Especially since it's not really clear
 606                      what this means.  We interpret all characters in the
 607                      string as if that would be bsymbols.  Otherwise we
 608                      would have to match back to bsymbols somehow and this
 609                      is also not what people normally expect.  */
 610                   charelem = find_element (ldfile, collate, cp++, 1, NULL);
 611                 }
 612
 613               if (charelem == NULL)
 614                 {
 615                   /* We ignore the rest of the line.  */
 616                   lr_ignore_rest (ldfile, 0);
 617                   break;
 618                 }
 619
 620               /* Add the pointer.  */
 621               obstack_ptr_grow (&collate->mempool, charelem);
 622               ++cnt;
 623             }
 624           while (*cp != '\0');
 625
 626           /* Now store the information.  */
 627           elem->weights[weight_cnt].w = (struct element_t **)
 628             obstack_finish (&collate->mempool);
 629           elem->weights[weight_cnt].cnt = cnt;
 630
 631           /* We don't need the string anymore.  */
 632           free (arg->val.str.startmb);
 633         }
 634       else if (ellipsis != tok_none
 635                && (arg->tok == tok_ellipsis2
 636                    || arg->tok == tok_ellipsis3
 637                    || arg->tok == tok_ellipsis4))
 638         {
 639           /* It must be the same ellipsis as used in the initial column.  */
 640           if (arg->tok != ellipsis)
 641             lr_error (ldfile, _("\
 642 %s: weights must use the same ellipsis symbol as the name"),
 643                       "LC_COLLATE");
 644
 645           /* The weight for this level has to be ignored.  We use the
 646              null pointer to indicate this.  */
 647           elem->weights[weight_cnt].w = (struct element_t **)
 648             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 649           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 650           elem->weights[weight_cnt].cnt = 1;
 651         }
 652       else
 653         {
 654         syntax:
 655           /* It's a syntax error.  */
 656           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 657           lr_ignore_rest (ldfile, 0);
 658           break;
 659         }
 660
 661       arg = lr_token (ldfile, charmap, repertoire);
 662       /* This better should be the end of the line or a semicolon.  */
 663       if (arg->tok == tok_semicolon)
 664         /* OK, ignore this and read the next token.  */
 665         arg = lr_token (ldfile, charmap, repertoire);
 666       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 667         {
 668           /* It's a syntax error.  */
 669           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 670           lr_ignore_rest (ldfile, 0);
 671           break;
 672         }
 673     }
 674   while (++weight_cnt < nrules);
 675
 676   if (weight_cnt < nrules)
 677     {
 678       /* This means the rest of the line uses the current element as
 679          the weight.  */
 680       do
 681         {
 682           elem->weights[weight_cnt].w = (struct element_t **)
 683             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 684           elem->weights[weight_cnt].w[0] = elem;
 685           elem->weights[weight_cnt].cnt = 1;
 686         }
 687       while (++weight_cnt < nrules);
 688     }
 689   else
 690     {
 691       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 692         {
 693           /* Too many rule values.  */
 694           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 695           lr_ignore_rest (ldfile, 0);
 696         }
 697       else
 698         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 699     }
 700 }
 701
 702
 703 static int
 704 insert_value (struct linereader *ldfile, struct token *arg,
 705               struct charmap_t *charmap, struct repertoire_t *repertoire,
 706               struct locale_collate_t *collate)
 707 {
 708   /* First find out what kind of symbol this is.  */
 709   struct charseq *seq;
 710   uint32_t wc;
 711   struct element_t *elem = NULL;
 712
 713   /* Try to find the character in the charmap.  */
 714   seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb);
 715
 716   /* Determine the wide character.  */
 717   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 718     {
 719       wc = repertoire_find_value (repertoire, arg->val.str.startmb,
 720                                   arg->val.str.lenmb);
 721       if (seq != NULL)
 722         seq->ucs4 = wc;
 723     }
 724   else
 725     wc = seq->ucs4;
 726
 727   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 728     {
 729       /* It's no character, so look through the collation elements and
 730          symbol list.  */
 731       void *result;
 732
 733       if (find_entry (&collate->sym_table, arg->val.str.startmb,
 734                       arg->val.str.lenmb, &result) == 0)
 735         {
 736           /* It's a collation symbol.  */
 737           struct symbol_t *sym = (struct symbol_t *) result;
 738           elem = sym->order;
 739
 740           if (elem == NULL)
 741             elem = sym->order = new_element (collate, NULL, 0, NULL, NULL, 0);
 742         }
 743       else if (find_entry (&collate->elem_table, arg->val.str.startmb,
 744                            arg->val.str.lenmb, (void **) &elem) != 0)
 745         {
 746           /* It's also no collation element.  Therefore ignore it.  */
 747           lr_ignore_rest (ldfile, 0);
 748           return 1;
 749         }
 750     }
 751   else
 752     {
 753       /* Otherwise the symbols stands for a character.  */
 754       if (find_entry (&collate->seq_table, arg->val.str.startmb,
 755                       arg->val.str.lenmb, (void **) &elem) != 0)
 756         {
 757           uint32_t wcs[2] = { wc, 0 };
 758
 759           /* We have to allocate an entry.  */
 760           elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
 761                               seq != NULL ? seq->nbytes : 0,
 762                               wcs, arg->val.str.startmb, arg->val.str.lenmb);
 763
 764           /* And add it to the table.  */
 765           if (insert_entry (&collate->seq_table, arg->val.str.startmb,
 766                             arg->val.str.lenmb, elem) != 0)
 767             /* This cannot happen.  */
 768             assert (! "Internal error");
 769         }
 770     }
 771
 772   /* Test whether this element is not already in the list.  */
 773   if (elem->next != NULL || (collate->cursor != NULL
 774                              && elem->next == collate->cursor))
 775     {
 776       lr_error (ldfile, _("order for `%.*s' already defined at %s:%zu"),
 777                 arg->val.str.lenmb, arg->val.str.startmb,
 778                 elem->file, elem->line);
 779       lr_ignore_rest (ldfile, 0);
 780       return 1;
 781     }
 782
 783   insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
 784
 785   return 0;
 786 }
 787
 788
 789 static void
 790 handle_ellipsis (struct linereader *ldfile, struct token *arg,
 791                  enum token_t ellipsis, struct charmap_t *charmap,
 792                  struct repertoire_t *repertoire,
 793                  struct locale_collate_t *collate)
 794 {
 795   struct element_t *startp;
 796   struct element_t *endp;
 797
 798   /* Unlink the entry added for the ellipsis.  */
 799   unlink_element (collate);
 800   startp = collate->cursor;
 801
 802   /* Process and add the end-entry.  */
 803   if (arg != NULL
 804       && insert_value (ldfile, arg, charmap, repertoire, collate))
 805     /* Something went wrong with inserting the to-value.  This means
 806        we cannot process the ellipsis.  */
 807     return;
 808
 809   /* Reset the cursor.  */
 810   collate->cursor = startp;
 811
 812   /* Now we have to handle many different situations:
 813      - we have to distinguish between the three different ellipsis forms
 814      - the is the ellipsis at the beginning, in the middle, or at the end.
 815   */
 816   endp = collate->cursor->next;
 817   assert (arg == NULL || endp != NULL);
 818
 819   /* Both, the start and the end symbol, must stand for characters.  */
 820   if ((startp == NULL || startp->name == NULL)
 821       || (endp == NULL || endp->name == NULL))
 822     {
 823       lr_error (ldfile, _("\
 824 %s: the start end the end symbol of a range must stand for characters"),
 825                 "LC_COLLATE");
 826       return;
 827     }
 828
 829   if (ellipsis == tok_ellipsis3)
 830     {
 831       /* One requirement we make here: the length of the byte
 832          sequences for the first and end character must be the same.
 833          This is mainly to prevent unwanted effects and this is often
 834          not what is wanted.  */
 835       size_t len = (startp->mbs != NULL ? strlen (startp->mbs)
 836                     : (endp->mbs != NULL ? strlen (endp->mbs) : 0));
 837       char mbcnt[len + 1];
 838       char mbend[len + 1];
 839
 840       /* Well, this should be caught somewhere else already.  Just to
 841          make sure.  */
 842       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
 843       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
 844
 845       if (startp != NULL && endp != NULL
 846           && startp->mbs != NULL && endp->mbs != NULL
 847           && strlen (startp->mbs) != strlen (endp->mbs))
 848         {
 849           lr_error (ldfile, _("\
 850 %s: byte sequences of first and last character must have the same length"),
 851                     "LC_COLLATE");
 852           return;
 853         }
 854
 855       /* Determine whether we have to generate multibyte sequences.  */
 856       if ((startp == NULL || startp->mbs != NULL)
 857           && (endp == NULL || endp->mbs != NULL))
 858         {
 859           int cnt;
 860           int ret;
 861
 862           /* Prepare the beginning byte sequence.  This is either from the
 863              beginning byte sequence or it is all nulls if it was an
 864              initial ellipsis.  */
 865           if (startp == NULL || startp->mbs == NULL)
 866             memset (mbcnt, '\0', len);
 867           else
 868             {
 869               memcpy (mbcnt, startp->mbs, len);
 870
 871               /* And increment it so that the value is the first one we will
 872                  try to insert.  */
 873               for (cnt = len - 1; cnt >= 0; --cnt)
 874                 if (++mbcnt[cnt] != '\0')
 875                   break;
 876             }
 877           mbcnt[len] = '\0';
 878
 879           /* And the end sequence.  */
 880           if (endp == NULL || endp->mbs == NULL)
 881             memset (mbend, '\0', len);
 882           else
 883             memcpy (mbend, endp->mbs, len);
 884           mbend[len] = '\0';
 885
 886           /* Test whether we have a correct range.  */
 887           ret = memcmp (mbcnt, mbend, len);
 888           if (ret >= 0)
 889             {
 890               if (ret > 0)
 891                 lr_error (ldfile, _("%s: byte sequence of first character of \
 892 sequence is not lower than that of the last character"), "LC_COLLATE");
 893               return;
 894             }
 895
 896           /* Generate the byte sequences data.  */
 897           while (1)
 898             {
 899               struct charseq *seq;
 900
 901               /* Quite a bit of work ahead.  We have to find the character
 902                  definition for the byte sequence and then determine the
 903                  wide character belonging to it.  */
 904               seq = charmap_find_symbol (charmap, mbcnt, len);
 905               if (seq != NULL)
 906                 {
 907                   struct element_t *elem;
 908                   size_t namelen;
 909
 910                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 911                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
 912                                                        strlen (seq->name));
 913
 914                   /* I don't this this can ever happen.  */
 915                   assert (seq->name != NULL);
 916                   namelen = strlen (seq->name);
 917
 918                   /* Now we are ready to insert the new value in the
 919                      sequence.  Find out whether the element is
 920                      already known.  */
 921                   if (find_entry (&collate->seq_table, seq->name, namelen,
 922                                   (void **) &elem) != 0)
 923                     {
 924                       uint32_t wcs[2] = { seq->ucs4, 0 };
 925
 926                       /* We have to allocate an entry.  */
 927                       elem = new_element (collate, mbcnt, len, wcs, seq->name,
 928                                           namelen);
 929
 930                       /* And add it to the table.  */
 931                       if (insert_entry (&collate->seq_table, seq->name,
 932                                         namelen, elem) != 0)
 933                         /* This cannot happen.  */
 934                         assert (! "Internal error");
 935                     }
 936
 937                   /* Test whether this element is not already in the list.  */
 938                   if (elem->next != NULL || (collate->cursor != NULL
 939                                              && elem->next == collate->cursor))
 940                     {
 941                       lr_error (ldfile, _("\
 942 order for `%.*s' already defined at %s:%zu"),
 943                                 namelen, seq->name, elem->file, elem->line);
 944                       goto increment;
 945                     }
 946
 947                   /* Enqueue the new element.  */
 948                   elem->last = collate->cursor;
 949                   elem->next = collate->cursor->next;
 950                   elem->last->next = elem;
 951                   if (elem->next != NULL)
 952                     elem->next->last = elem;
 953                   collate->cursor = elem;
 954
 955                  /* Add the weight value.  We take them from the
 956                     `ellipsis_weights' member of `collate'.  */
 957                   elem->weights = (struct element_list_t *)
 958                     obstack_alloc (&collate->mempool,
 959                                    nrules * sizeof (struct element_list_t));
 960                   for (cnt = 0; cnt < nrules; ++cnt)
 961                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
 962                         && (collate->ellipsis_weight.weights[cnt].w[0]
 963                             == ELEMENT_ELLIPSIS2))
 964                       {
 965                         elem->weights[cnt].w = (struct element_t **)
 966                           obstack_alloc (&collate->mempool,
 967                                          sizeof (struct element_t *));
 968                         elem->weights[cnt].w[0] = elem;
 969                         elem->weights[cnt].cnt = 1;
 970                       }
 971                     else
 972                       {
 973                         /* Simly use the weight from `ellipsis_weight'.  */
 974                         elem->weights[cnt].w =
 975                           collate->ellipsis_weight.weights[cnt].w;
 976                         elem->weights[cnt].cnt =
 977                           collate->ellipsis_weight.weights[cnt].cnt;
 978                       }
 979                 }
 980
 981               /* Increment for the next round.  */
 982             increment:
 983               for (cnt = len - 1; cnt >= 0; --cnt)
 984                 if (++mbcnt[cnt] != '\0')
 985                   break;
 986
 987               /* Find out whether this was all.  */
 988               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
 989                 /* Yep, that's all.  */
 990                 break;
 991             }
 992         }
 993     }
 994   else
 995     {
 996       /* For symbolic range we naturally must have a beginning and an
 997          end specified by the user.  */
 998       if (startp == NULL)
 999         lr_error (ldfile, _("\
1000 %s: symbolic range ellipsis must not directly follow `order_start'"),
1001                   "LC_COLLATE");
1002       else if (endp == NULL)
1003         lr_error (ldfile, _("\
1004 %s: symbolic range ellipsis must not be direct followed by `order_end'"),
1005                   "LC_COLLATE");
1006       else
1007         {
1008           /* Determine the range.  To do so we have to determine the
1009              common prefix of the both names and then the numeric
1010              values of both ends.  */
1011           size_t lenfrom = strlen (startp->name);
1012           size_t lento = strlen (endp->name);
1013           char buf[lento + 1];
1014           int preflen = 0;
1015           long int from;
1016           long int to;
1017           char *cp;
1018           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1019
1020           if (lenfrom != lento)
1021             {
1022             invalid_range:
1023               lr_error (ldfile, _("\
1024 `%s' and `%.*s' are no valid names for symbolic range"),
1025                         startp->name, lento, endp->name);
1026               return;
1027             }
1028
1029           while (startp->name[preflen] == endp->name[preflen])
1030             if (startp->name[preflen] == '\0')
1031               /* Nothing to be done.  The start and end point are identical
1032                  and while inserting the end point we have already given
1033                  the user an error message.  */
1034               return;
1035             else
1036               ++preflen;
1037
1038           errno = 0;
1039           from = strtol (startp->name + preflen, &cp, base);
1040           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1041             goto invalid_range;
1042
1043           errno = 0;
1044           to = strtol (endp->name + preflen, &cp, base);
1045           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1046             goto invalid_range;
1047
1048           /* Copy the prefix.  */
1049           memcpy (buf, startp->name, preflen);
1050
1051           /* Loop over all values.  */
1052           for (++from; from < to; ++from)
1053             {
1054               struct element_t *elem = NULL;
1055               struct charseq *seq;
1056               uint32_t wc;
1057               int cnt;
1058
1059               /* Generate the the name.  */
1060               sprintf (buf + preflen, base == 10 ? "%d" : "%x", from);
1061
1062               /* Look whether this name is already defined.  */
1063               if (find_entry (&collate->seq_table, arg->val.str.startmb,
1064                               arg->val.str.lenmb, (void **) &elem) == 0)
1065                 {
1066                   if (elem->next != NULL || (collate->cursor != NULL
1067                                              && elem->next == collate->cursor))
1068                     {
1069                       lr_error (ldfile, _("\
1070 %s: order for `%.*s' already defined at %s:%zu"),
1071                                 "LC_COLLATE", lenfrom, buf,
1072                                 elem->file, elem->line);
1073                       continue;
1074                     }
1075
1076                   if (elem->name == NULL)
1077                     {
1078                       lr_error (ldfile, _("%s: `%s' must be a charater"),
1079                                 "LC_COLLATE", buf);
1080                       continue;
1081                     }
1082                 }
1083
1084               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1085                 {
1086                   /* Search for a character of this name.  */
1087                   seq = charmap_find_value (charmap, buf, lenfrom);
1088                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1089                     {
1090                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1091
1092                       if (seq != NULL)
1093                         seq->ucs4 = wc;
1094                     }
1095                   else
1096                     wc = seq->ucs4;
1097
1098                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1099                     /* We don't know anything about a character with this
1100                        name.  XXX Should we warn?  */
1101                     continue;
1102
1103                   if (elem == NULL)
1104                     {
1105                       uint32_t wcs[2] = { wc, 0 };
1106
1107                       /* We have to allocate an entry.  */
1108                       elem = new_element (collate,
1109                                           seq != NULL ? seq->bytes : NULL,
1110                                           seq != NULL ? seq->nbytes : 0,
1111                                           wc == ILLEGAL_CHAR_VALUE
1112                                           ? NULL : wcs,
1113                                           buf, lenfrom);
1114                     }
1115                   else
1116                     {
1117                       /* Update the element.  */
1118                       if (seq != NULL)
1119                         elem->mbs = obstack_copy0 (&collate->mempool,
1120                                                    seq->bytes, seq->nbytes);
1121
1122                       if (wc != ILLEGAL_CHAR_VALUE)
1123                         {
1124                           uint32_t zero = 0;
1125
1126                           obstack_grow (&collate->mempool,
1127                                         &wc, sizeof (uint32_t));
1128                           obstack_grow (&collate->mempool,
1129                                         &zero, sizeof (uint32_t));
1130                           elem->wcs = obstack_finish (&collate->mempool);
1131                         }
1132                     }
1133
1134                   elem->file = ldfile->fname;
1135                   elem->line = ldfile->lineno;
1136                 }
1137
1138               /* Enqueue the new element.  */
1139               elem->last = collate->cursor;
1140               elem->next = collate->cursor->next;
1141               elem->last->next = elem;
1142               if (elem->next != NULL)
1143                 elem->next->last = elem;
1144               collate->cursor = elem;
1145
1146               /* Now add the weights.  They come from the `ellipsis_weights'
1147                  member of `collate'.  */
1148               elem->weights = (struct element_list_t *)
1149                 obstack_alloc (&collate->mempool,
1150                                nrules * sizeof (struct element_list_t));
1151               for (cnt = 0; cnt < nrules; ++cnt)
1152                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1153                     && (collate->ellipsis_weight.weights[cnt].w[0]
1154                         == ELEMENT_ELLIPSIS2))
1155                   {
1156                     elem->weights[cnt].w = (struct element_t **)
1157                       obstack_alloc (&collate->mempool,
1158                                      sizeof (struct element_t *));
1159                     elem->weights[cnt].w[0] = elem;
1160                     elem->weights[cnt].cnt = 1;
1161                   }
1162                 else
1163                   {
1164                     /* Simly use the weight from `ellipsis_weight'.  */
1165                     elem->weights[cnt].w =
1166                       collate->ellipsis_weight.weights[cnt].w;
1167                     elem->weights[cnt].cnt =
1168                       collate->ellipsis_weight.weights[cnt].cnt;
1169                   }
1170             }
1171         }
1172     }
1173 }
1174
1175
1176 static void
1177 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1178                  struct localedef_t *copy_locale, int ignore_content)
1179 {
1180   if (!ignore_content)
1181     {
1182       struct locale_collate_t *collate;
1183
1184       if (copy_locale == NULL)
1185         {
1186           collate = locale->categories[LC_COLLATE].collate =
1187             (struct locale_collate_t *)
1188             xcalloc (1, sizeof (struct locale_collate_t));
1189
1190           /* Init the various data structures.  */
1191           init_hash (&collate->elem_table, 100);
1192           init_hash (&collate->sym_table, 100);
1193           init_hash (&collate->seq_table, 500);
1194           obstack_init (&collate->mempool);
1195
1196           collate->col_weight_max = -1;
1197         }
1198       else
1199         collate = locale->categories[LC_COLLATE].collate =
1200           copy_locale->categories[LC_COLLATE].collate;
1201     }
1202
1203   ldfile->translate_strings = 0;
1204   ldfile->return_widestr = 0;
1205 }
1206
1207
1208 void
1209 collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
1210 {
1211   /* Now is the time when we can assign the individual collation
1212      values for all the symbols.  We have possibly different values
1213      for the wide- and the multibyte-character symbols.  This is done
1214      since it might make a difference in the encoding if there is in
1215      some cases no multibyte-character but there are wide-characters.
1216      (The other way around it is not important since theencoded
1217      collation value in the wide-character case is 32 bits wide and
1218      therefore requires no encoding).
1219
1220      The lowest collation value assigned is 2.  Zero is reserved for
1221      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1222      functions and 1 is used to separate the individual passes for the
1223      different rules.
1224
1225      We also have to construct is list with all the bytes/words which
1226      can come first in a sequence, followed by all the elements which
1227      also start with this byte/word.  The order is reverse which has
1228      among others the important effect that longer strings are located
1229      first in the list.  This is required for the output data since
1230      the algorithm used in `strcoll' etc depends on this.
1231
1232      The multibyte case is easy.  We simply sort into an array with
1233      256 elements.  */
1234   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1235   int mbact = 2;
1236   int wcact = 2;
1237   struct element_t *runp = collate->start;
1238
1239   while (runp != NULL)
1240     {
1241       if (runp->mbs != NULL)
1242         {
1243           struct element_t **eptr;
1244
1245           /* Determine the order.  */
1246           runp->mborder = mbact++;
1247
1248           /* Find the point where to insert in the list.  */
1249           eptr = &collate->mbheads[(unsigned int) runp->mbs[0]];
1250           while (*eptr != NULL)
1251             {
1252               /* Check which string is larger, the one we want to insert
1253                  or the current element of the list we are looking at.  */
1254               assert (runp->mbs[0] == (*eptr)->mbs[0]);
1255               if (strcmp (runp->mbs, (*eptr)->mbs) > 0)
1256                 break;
1257
1258               eptr = &(*eptr)->mbnext;
1259             }
1260
1261           /* Set the pointers.  */
1262           runp->mbnext = *eptr;
1263           *eptr = runp;
1264         }
1265
1266       if (runp->wcs != NULL)
1267         runp->wcorder = wcact++;
1268
1269       /* Up to the next entry.  */
1270       runp = runp->next;
1271     }
1272 }
1273
1274
1275 void
1276 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
1277                 const char *output_path)
1278 {
1279 }
1280
1281
1282 void
1283 collate_read (struct linereader *ldfile, struct localedef_t *result,
1284               struct charmap_t *charmap, const char *repertoire_name,
1285               int ignore_content)
1286 {
1287   struct repertoire_t *repertoire = NULL;
1288   struct locale_collate_t *collate;
1289   struct token *now;
1290   struct token *arg = NULL;
1291   enum token_t nowtok;
1292   int state = 0;
1293   enum token_t was_ellipsis = tok_none;
1294   struct localedef_t *copy_locale = NULL;
1295
1296   /* Get the repertoire we have to use.  */
1297   if (repertoire_name != NULL)
1298     repertoire = repertoire_read (repertoire_name);
1299
1300   /* The rest of the line containing `LC_COLLATE' must be free.  */
1301   lr_ignore_rest (ldfile, 1);
1302
1303   do
1304     {
1305       now = lr_token (ldfile, charmap, NULL);
1306       nowtok = now->tok;
1307     }
1308   while (nowtok == tok_eol);
1309
1310   if (nowtok == tok_copy)
1311     {
1312       state = 2;
1313       now = lr_token (ldfile, charmap, NULL);
1314       if (now->tok != tok_string)
1315         {
1316           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1317
1318         skip_category:
1319           do
1320             now = lr_token (ldfile, charmap, NULL);
1321           while (now->tok != tok_eof && now->tok != tok_end);
1322
1323           if (now->tok != tok_eof
1324               || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof))
1325             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
1326           else if (now->tok != tok_lc_collate)
1327             {
1328               lr_error (ldfile, _("\
1329 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1330               lr_ignore_rest (ldfile, 0);
1331             }
1332           else
1333             lr_ignore_rest (ldfile, 1);
1334
1335           return;
1336         }
1337
1338       /* Get the locale definition.  */
1339       copy_locale = find_locale (LC_COLLATE, now->val.str.startmb,
1340                                  repertoire_name, charmap);
1341       if ((copy_locale->avail & COLLATE_LOCALE) == 0)
1342         {
1343           /* Not yet loaded.  So do it now.  */
1344           if (locfile_read (copy_locale, charmap) != 0)
1345             goto skip_category;
1346         }
1347
1348       lr_ignore_rest (ldfile, 1);
1349
1350       now = lr_token (ldfile, charmap, NULL);
1351       nowtok = now->tok;
1352     }
1353
1354   /* Prepare the data structures.  */
1355   collate_startup (ldfile, result, copy_locale, ignore_content);
1356   collate = result->categories[LC_COLLATE].collate;
1357
1358   while (1)
1359     {
1360       /* Of course we don't proceed beyond the end of file.  */
1361       if (nowtok == tok_eof)
1362         break;
1363
1364       /* Ingore empty lines.  */
1365       if (nowtok == tok_eol)
1366         {
1367           now = lr_token (ldfile, charmap, NULL);
1368           nowtok = now->tok;
1369           continue;
1370         }
1371
1372       switch (nowtok)
1373         {
1374         case tok_coll_weight_max:
1375           /* Ignore the rest of the line if we don't need the input of
1376              this line.  */
1377           if (ignore_content)
1378             {
1379               lr_ignore_rest (ldfile, 0);
1380               break;
1381             }
1382
1383           if (state != 0)
1384             goto err_label;
1385
1386           arg = lr_token (ldfile, charmap, NULL);
1387           if (arg->tok != tok_number)
1388             goto err_label;
1389           if (collate->col_weight_max != -1)
1390             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
1391                       "LC_COLLATE", "col_weight_max");
1392           else
1393             collate->col_weight_max = arg->val.num;
1394           lr_ignore_rest (ldfile, 1);
1395           break;
1396
1397         case tok_section_symbol:
1398           /* Ignore the rest of the line if we don't need the input of
1399              this line.  */
1400           if (ignore_content)
1401             {
1402               lr_ignore_rest (ldfile, 0);
1403               break;
1404             }
1405
1406           if (state != 0)
1407             goto err_label;
1408
1409           arg = lr_token (ldfile, charmap, repertoire);
1410           if (arg->tok != tok_bsymbol)
1411             goto err_label;
1412           else if (!ignore_content)
1413             {
1414               /* Check whether this section is already known.  */
1415               struct section_list *known = collate->sections;
1416               while (known != NULL)
1417                 if (strcmp (known->name, arg->val.str.startmb) == 0)
1418                   break;
1419
1420               if (known != NULL)
1421                 {
1422                   lr_error (ldfile,
1423                             _("%s: duplicate declaration of section `%s'"),
1424                             "LC_COLLATE", arg->val.str.startmb);
1425                   free (arg->val.str.startmb);
1426                 }
1427               else
1428                 collate->sections = make_seclist_elem (collate,
1429                                                        arg->val.str.startmb,
1430                                                        collate->sections);
1431
1432               lr_ignore_rest (ldfile, known == NULL);
1433             }
1434           else
1435             {
1436               free (arg->val.str.startmb);
1437               lr_ignore_rest (ldfile, 0);
1438             }
1439           break;
1440
1441         case tok_collating_element:
1442           /* Ignore the rest of the line if we don't need the input of
1443              this line.  */
1444           if (ignore_content)
1445             {
1446               lr_ignore_rest (ldfile, 0);
1447               break;
1448             }
1449
1450           if (state != 0)
1451             goto err_label;
1452
1453           arg = lr_token (ldfile, charmap, repertoire);
1454           if (arg->tok != tok_bsymbol)
1455             goto err_label;
1456           else
1457             {
1458               const char *symbol = arg->val.str.startmb;
1459               size_t symbol_len = arg->val.str.lenmb;
1460
1461               /* Next the `from' keyword.  */
1462               arg = lr_token (ldfile, charmap, repertoire);
1463               if (arg->tok != tok_from)
1464                 {
1465                   free ((char *) symbol);
1466                   goto err_label;
1467                 }
1468
1469               ldfile->return_widestr = 1;
1470
1471               /* Finally the string with the replacement.  */
1472               arg = lr_token (ldfile, charmap, repertoire);
1473               ldfile->return_widestr = 0;
1474               if (arg->tok != tok_string)
1475                 goto err_label;
1476
1477               if (!ignore_content)
1478                 {
1479                   if (symbol == NULL)
1480                     lr_error (ldfile, _("\
1481 %s: unknown character in collating element name"),
1482                               "LC_COLLATE");
1483                   if (arg->val.str.startmb == NULL)
1484                     lr_error (ldfile, _("\
1485 %s: unknown character in collating element definition"),
1486                               "LC_COLLATE");
1487                   if (arg->val.str.startwc == NULL)
1488                     lr_error (ldfile, _("\
1489 %s: unknown wide character in collating element definition"),
1490                               "LC_COLLATE");
1491                   else if (arg->val.str.lenwc < 2)
1492                     lr_error (ldfile, _("\
1493 %s: substitution string in collating element definition must have at least two characters"),
1494                               "LC_COLLATE");
1495
1496                   if (symbol != NULL)
1497                     {
1498                       /* The name is already defined.  */
1499                       if (check_duplicate (ldfile, collate, charmap,
1500                                            repertoire, symbol, symbol_len))
1501                         goto col_elem_free;
1502
1503                       if (insert_entry (&collate->elem_table,
1504                                         symbol, symbol_len,
1505                                         new_element (collate,
1506                                                      NULL, 0, NULL, symbol,
1507                                                      symbol_len)) < 0)
1508                         lr_error (ldfile, _("\
1509 error while adding collating element"));
1510                     }
1511                   else
1512                     goto col_elem_free;
1513                 }
1514               else
1515                 {
1516                 col_elem_free:
1517                   if (symbol != NULL)
1518                     free ((char *) symbol);
1519                   if (arg->val.str.startmb != NULL)
1520                     free (arg->val.str.startmb);
1521                   if (arg->val.str.startwc != NULL)
1522                     free (arg->val.str.startwc);
1523                 }
1524               lr_ignore_rest (ldfile, 1);
1525             }
1526           break;
1527
1528         case tok_collating_symbol:
1529           /* Ignore the rest of the line if we don't need the input of
1530              this line.  */
1531           if (ignore_content)
1532             {
1533               lr_ignore_rest (ldfile, 0);
1534               break;
1535             }
1536
1537           if (state != 0)
1538             goto err_label;
1539
1540           arg = lr_token (ldfile, charmap, repertoire);
1541           if (arg->tok != tok_bsymbol)
1542             goto err_label;
1543           else
1544             {
1545               const char *symbol = arg->val.str.startmb;
1546               size_t symbol_len = arg->val.str.lenmb;
1547
1548               if (!ignore_content)
1549                 {
1550                   if (symbol == NULL)
1551                     lr_error (ldfile, _("\
1552 %s: unknown character in collating symbol name"),
1553                               "LC_COLLATE");
1554                   else
1555                     {
1556                       /* The name is already defined.  */
1557                       if (check_duplicate (ldfile, collate, charmap,
1558                                            repertoire, symbol, symbol_len))
1559                         goto col_sym_free;
1560
1561                       if (insert_entry (&collate->sym_table,
1562                                         symbol, symbol_len,
1563                                         new_symbol (collate)) < 0)
1564                         lr_error (ldfile, _("\
1565 error while adding collating symbol"));
1566                     }
1567                 }
1568               else
1569                 {
1570                 col_sym_free:
1571                   if (symbol != NULL)
1572                     free ((char *) symbol);
1573                 }
1574               lr_ignore_rest (ldfile, 1);
1575             }
1576           break;
1577
1578         case tok_symbol_equivalence:
1579           /* Ignore the rest of the line if we don't need the input of
1580              this line.  */
1581           if (ignore_content)
1582             {
1583               lr_ignore_rest (ldfile, 0);
1584               break;
1585             }
1586
1587           if (state != 0)
1588             goto err_label;
1589
1590           arg = lr_token (ldfile, charmap, repertoire);
1591           if (arg->tok != tok_bsymbol)
1592             goto err_label;
1593           else
1594             {
1595               const char *newname = arg->val.str.startmb;
1596               size_t newname_len = arg->val.str.lenmb;
1597               const char *symname;
1598               size_t symname_len;
1599               struct symbol_t *symval;
1600
1601               arg = lr_token (ldfile, charmap, repertoire);
1602               if (arg->tok != tok_bsymbol)
1603                 {
1604                   if (newname != NULL)
1605                     free ((char *) newname);
1606                   goto err_label;
1607                 }
1608
1609               symname = arg->val.str.startmb;
1610               symname_len = arg->val.str.lenmb;
1611
1612               if (!ignore_content)
1613                 {
1614                   if (newname == NULL)
1615                     {
1616                       lr_error (ldfile, _("\
1617 %s: unknown character in equivalent definition name"),
1618                                 "LC_COLLATE");
1619                       goto sym_equiv_free;
1620                     }
1621                   if (symname == NULL)
1622                     {
1623                       lr_error (ldfile, _("\
1624 %s: unknown character in equivalent definition value"),
1625                                 "LC_COLLATE");
1626                       goto sym_equiv_free;
1627                     }
1628                   /* The name is already defined.  */
1629                   if (check_duplicate (ldfile, collate, charmap,
1630                                        repertoire, symname, symname_len))
1631                     goto col_sym_free;
1632
1633                   /* See whether the symbol name is already defined.  */
1634                   if (find_entry (&collate->sym_table, symname, symname_len,
1635                                   (void **) &symval) != 0)
1636                     {
1637                       lr_error (ldfile, _("\
1638 %s: unknown symbol `%s' in equivalent definition"),
1639                                 "LC_COLLATE", symname);
1640                       goto col_sym_free;
1641                     }
1642
1643                   if (insert_entry (&collate->sym_table,
1644                                     newname, newname_len, symval) < 0)
1645                     {
1646                       lr_error (ldfile, _("\
1647 error while adding equivalent collating symbol"));
1648                       goto sym_equiv_free;
1649                     }
1650
1651                   free ((char *) symname);
1652                 }
1653               else
1654                 {
1655                 sym_equiv_free:
1656                   if (newname != NULL)
1657                     free ((char *) newname);
1658                   if (symname != NULL)
1659                     free ((char *) symname);
1660                 }
1661               lr_ignore_rest (ldfile, 1);
1662             }
1663           break;
1664
1665         case tok_order_start:
1666           /* Ignore the rest of the line if we don't need the input of
1667              this line.  */
1668           if (ignore_content)
1669             {
1670               lr_ignore_rest (ldfile, 0);
1671               break;
1672             }
1673
1674           if (state != 0 && state != 1)
1675             goto err_label;
1676           state = 1;
1677
1678           /* The 14652 draft does not specify whether all `order_start' lines
1679              must contain the same number of sort-rules, but 14651 does.  So
1680              we require this here as well.  */
1681           arg = lr_token (ldfile, charmap, repertoire);
1682           if (arg->tok == tok_bsymbol)
1683             {
1684               /* This better should be a section name.  */
1685               struct section_list *sp = collate->sections;
1686               while (sp != NULL
1687                      && strcmp (sp->name, arg->val.str.startmb) != 0)
1688                 sp = sp->next;
1689
1690               if (sp == NULL)
1691                 {
1692                   lr_error (ldfile, _("\
1693 %s: unknown section name `%s'"),
1694                             "LC_COLLATE", arg->val.str.startmb);
1695                   /* We use the error section.  */
1696                   collate->current_section = &collate->error_section;
1697                 }
1698               else
1699                 {
1700                   /* Remember this section.  */
1701                   collate->current_section = sp;
1702
1703                   /* One should not be allowed to open the same
1704                      section twice.  */
1705                   if (sp->first != NULL)
1706                     lr_error (ldfile, _("\
1707 %s: multiple order definitions for section `%s'"),
1708                               "LC_COLLATE", sp->name);
1709
1710                   /* Next should come the end of the line or a semicolon.  */
1711                   arg = lr_token (ldfile, charmap, repertoire);
1712                   if (arg->tok == tok_eol)
1713                     {
1714                       uint32_t cnt;
1715
1716                       /* This means we have exactly one rule: `forward'.  */
1717                       if (collate->nrules > 1)
1718                         lr_error (ldfile, _("\
1719 %s: invalid number of sorting rules"),
1720                                   "LC_COLLATE");
1721                       else
1722                         collate->nrules = 1;
1723                       sp->rules = obstack_alloc (&collate->mempool,
1724                                                  (sizeof (enum coll_sort_rule)
1725                                                   * collate->nrules));
1726                       for (cnt = 0; cnt < collate->nrules; ++cnt)
1727                         sp->rules[cnt] = sort_forward;
1728
1729                       /* Next line.  */
1730                       break;
1731                     }
1732
1733                   /* Get the next token.  */
1734                   arg = lr_token (ldfile, charmap, repertoire);
1735                 }
1736             }
1737           else
1738             {
1739               /* There is no section symbol.  Therefore we use the unnamed
1740                  section.  */
1741               collate->current_section = &collate->unnamed_section;
1742
1743               if (collate->unnamed_section.first != NULL)
1744                 lr_error (ldfile, _("\
1745 %s: multiple order definitions for unnamed section"),
1746                           "LC_COLLATE");
1747             }
1748
1749           /* Now read the direction names.  */
1750           read_directions (ldfile, arg, charmap, repertoire, collate);
1751
1752           /* From now be need the strings untranslated.  */
1753           ldfile->translate_strings = 0;
1754           break;
1755
1756         case tok_order_end:
1757           /* Ignore the rest of the line if we don't need the input of
1758              this line.  */
1759           if (ignore_content)
1760             {
1761               lr_ignore_rest (ldfile, 0);
1762               break;
1763             }
1764
1765           if (state != 1)
1766             goto err_label;
1767
1768           /* Handle ellipsis at end of list.  */
1769           if (was_ellipsis != tok_none)
1770             {
1771               handle_ellipsis (ldfile, NULL, was_ellipsis, charmap, repertoire,
1772                                collate);
1773               was_ellipsis = tok_none;
1774             }
1775
1776           state = 2;
1777           lr_ignore_rest (ldfile, 1);
1778           break;
1779
1780         case tok_reorder_after:
1781           /* Ignore the rest of the line if we don't need the input of
1782              this line.  */
1783           if (ignore_content)
1784             {
1785               lr_ignore_rest (ldfile, 0);
1786               break;
1787             }
1788
1789           if (state == 1)
1790             {
1791               lr_error (ldfile, _("%s: missing `order_end' keyword"),
1792                         "LC_COLLATE");
1793               state = 2;
1794
1795               /* Handle ellipsis at end of list.  */
1796               if (was_ellipsis != tok_none)
1797                 {
1798                   handle_ellipsis (ldfile, arg, was_ellipsis, charmap,
1799                                    repertoire, collate);
1800                   was_ellipsis = tok_none;
1801                 }
1802             }
1803           else if (state != 2 && state != 3)
1804             goto err_label;
1805           state = 3;
1806
1807           arg = lr_token (ldfile, charmap, repertoire);
1808           if (arg->tok == tok_bsymbol)
1809             {
1810               /* Find this symbol in the sequence table.  */
1811               struct element_t *insp;
1812               int no_error = 1;
1813
1814               if (find_entry (&collate->seq_table, arg->val.str.startmb,
1815                               arg->val.str.lenmb, (void **) &insp) == 0)
1816                 /* Yes, the symbol exists.  Simply point the cursor
1817                    to it.  */
1818                   collate->cursor = insp;
1819               else
1820                 {
1821                   /* This is bad.  The symbol after which we have to
1822                      insert does not exist.  */
1823                   lr_error (ldfile, _("\
1824 %s: cannot reorder after %.*s: symbol not known"),
1825                             "LC_COLLATE", arg->val.str.lenmb,
1826                             arg->val.str.startmb);
1827                   collate->cursor = NULL;
1828                   no_error = 0;
1829                 }
1830
1831               lr_ignore_rest (ldfile, no_error);
1832             }
1833           else
1834             /* This must not happen.  */
1835             goto err_label;
1836           break;
1837
1838         case tok_reorder_end:
1839           /* Ignore the rest of the line if we don't need the input of
1840              this line.  */
1841           if (ignore_content)
1842             break;
1843
1844           if (state != 3)
1845             goto err_label;
1846           state = 4;
1847           lr_ignore_rest (ldfile, 1);
1848           break;
1849
1850         case tok_reorder_sections_after:
1851           /* Ignore the rest of the line if we don't need the input of
1852              this line.  */
1853           if (ignore_content)
1854             {
1855               lr_ignore_rest (ldfile, 0);
1856               break;
1857             }
1858
1859           if (state == 1)
1860             {
1861               lr_error (ldfile, _("%s: missing `order_end' keyword"),
1862                         "LC_COLLATE");
1863               state = 2;
1864
1865               /* Handle ellipsis at end of list.  */
1866               if (was_ellipsis != tok_none)
1867                 {
1868                   handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
1869                                    repertoire, collate);
1870                   was_ellipsis = tok_none;
1871                 }
1872             }
1873           else if (state == 3)
1874             {
1875               error (0, 0, _("%s: missing `reorder-end' keyword"),
1876                      "LC_COLLATE");
1877               state = 4;
1878             }
1879           else if (state != 2 && state != 4)
1880             goto err_label;
1881           state = 5;
1882
1883           /* Get the name of the sections we are adding after.  */
1884           arg = lr_token (ldfile, charmap, repertoire);
1885           if (arg->tok == tok_bsymbol)
1886             {
1887               /* Now find a section with this name.  */
1888               struct section_list *runp = collate->sections;
1889
1890               while (runp != NULL)
1891                 {
1892                   if (runp->name != NULL
1893                       && strlen (runp->name) == arg->val.str.lenmb
1894                       && memcmp (runp->name, arg->val.str.startmb,
1895                                  arg->val.str.lenmb) == 0)
1896                     break;
1897
1898                   runp = runp->next;
1899                 }
1900
1901               if (runp != NULL)
1902                 collate->current_section = runp;
1903               else
1904                 {
1905                   /* This is bad.  The section after which we have to
1906                      reorder does not exist.  Therefore we cannot
1907                      process the whole rest of this reorder
1908                      specification.  */
1909                   lr_error (ldfile, _("%s: section `%.*s' not known"),
1910                             "LC_COLLATE", arg->val.str.lenmb,
1911                             arg->val.str.startmb);
1912
1913                   do
1914                     {
1915                       lr_ignore_rest (ldfile, 0);
1916
1917                       now = lr_token (ldfile, charmap, NULL);
1918                     }
1919                   while (now->tok == tok_reorder_sections_after
1920                          || now->tok == tok_reorder_sections_end
1921                          || now->tok == tok_end);
1922
1923                   /* Process the token we just saw.  */
1924                   nowtok = now->tok;
1925                   continue;
1926                 }
1927             }
1928           else
1929             /* This must not happen.  */
1930             goto err_label;
1931           break;
1932
1933         case tok_reorder_sections_end:
1934           /* Ignore the rest of the line if we don't need the input of
1935              this line.  */
1936           if (ignore_content)
1937             break;
1938
1939           if (state != 5)
1940             goto err_label;
1941           state = 6;
1942           lr_ignore_rest (ldfile, 1);
1943           break;
1944
1945         case tok_bsymbol:
1946           /* Ignore the rest of the line if we don't need the input of
1947              this line.  */
1948           if (ignore_content)
1949             {
1950               lr_ignore_rest (ldfile, 0);
1951               break;
1952             }
1953
1954           if (state != 1 && state != 3)
1955             goto err_label;
1956
1957           if (state == 3)
1958             {
1959               /* It is possible that we already have this collation sequence.
1960                  In this case we move the entry.  */
1961               struct element_t *seqp;
1962
1963               /* If the symbol after which we have to insert was not found
1964                  ignore all entries.  */
1965               if (collate->cursor == NULL)
1966                 {
1967                   lr_ignore_rest (ldfile, 0);
1968                   break;
1969                 }
1970
1971               if (find_entry (&collate->seq_table, arg->val.str.startmb,
1972                               arg->val.str.lenmb, (void **) &seqp) == 0)
1973                 {
1974                   /* Remove the entry from the old position.  */
1975                   if (seqp->last == NULL)
1976                     collate->start = seqp->next;
1977                   else
1978                     seqp->last->next = seqp->next;
1979                   if (seqp->next != NULL)
1980                     seqp->next->last = seqp->last;
1981
1982                   /* We also have to check whether this entry is the
1983                      first or last of a section.  */
1984                   if (seqp->section->first == seqp)
1985                     {
1986                       if (seqp->section->first == seqp->section->last)
1987                         /* This setion has no content anymore.  */
1988                         seqp->section->first = seqp->section->last = NULL;
1989                       else
1990                         seqp->section->first = seqp->next;
1991                     }
1992                   else if (seqp->section->last == seqp)
1993                     seqp->section->last = seqp->last;
1994
1995                   /* Now insert it in the new place.  */
1996                   seqp->next = collate->cursor->next;
1997                   seqp->last = collate->cursor;
1998                   collate->cursor->next = seqp;
1999                   if (seqp->next != NULL)
2000                     seqp->next->last = seqp;
2001
2002                   seqp->section = collate->cursor->section;
2003                   if (seqp->section->last == collate->cursor)
2004                     seqp->section->last = seqp;
2005
2006                   break;
2007                 }
2008
2009               /* Otherwise we just add a new entry.  */
2010             }
2011           else if (state == 5)
2012             {
2013               /* We are reordering sections.  Find the named section.  */
2014               struct section_list *runp = collate->sections;
2015               struct section_list *prevp = NULL;
2016
2017               while (runp != NULL)
2018                 {
2019                   if (runp->name != NULL
2020                       && strlen (runp->name) == arg->val.str.lenmb
2021                       && memcmp (runp->name, arg->val.str.startmb,
2022                                  arg->val.str.lenmb) == 0)
2023                     break;
2024
2025                   prevp = runp;
2026                   runp = runp->next;
2027                 }
2028
2029               if (runp == NULL)
2030                 {
2031                   lr_error (ldfile, _("%s: section `%.*s' not known"),
2032                             "LC_COLLATE", arg->val.str.lenmb,
2033                             arg->val.str.startmb);
2034                   lr_ignore_rest (ldfile, 0);
2035                 }
2036               else
2037                 {
2038                   if (runp != collate->current_section)
2039                     {
2040                       /* Remove the named section from the old place and
2041                          insert it in the new one.  */
2042                       prevp->next = runp->next;
2043
2044                       runp->next = collate->current_section->next;
2045                       collate->current_section->next = runp;
2046                       collate->current_section = runp;
2047                     }
2048
2049                   /* Process the rest of the line which might change
2050                      the collation rules.  */
2051                   arg = lr_token (ldfile, charmap, repertoire);
2052                   if (arg->tok != tok_eof && arg->tok != tok_eol)
2053                     read_directions (ldfile, arg, charmap, repertoire,
2054                                      collate);
2055                 }
2056               break;
2057             }
2058           else if (was_ellipsis != tok_none)
2059             {
2060               /* Using the information in the `ellipsis_weight'
2061                  element and this and the last value we have to handle
2062                  the ellipsis now.  */
2063               assert (state == 1);
2064
2065               handle_ellipsis (ldfile, arg, was_ellipsis, charmap, repertoire,
2066                                collate);
2067
2068               /* Remember that we processed the ellipsis.  */
2069               was_ellipsis = tok_none;
2070
2071               /* And don't add the value a second time.  */
2072               break;
2073             }
2074
2075           /* Now insert in the new place.  */
2076           insert_value (ldfile, arg, charmap, repertoire, collate);
2077           break;
2078
2079         case tok_undefined:
2080           /* Ignore the rest of the line if we don't need the input of
2081              this line.  */
2082           if (ignore_content)
2083             {
2084               lr_ignore_rest (ldfile, 0);
2085               break;
2086             }
2087
2088           if (state != 1)
2089             goto err_label;
2090
2091           if (was_ellipsis != tok_none)
2092             {
2093               lr_error (ldfile,
2094                         _("%s: cannot have `%s' as end of ellipsis range"),
2095                         "LC_COLLATE", "UNDEFINED");
2096
2097               unlink_element (collate);
2098               was_ellipsis = tok_none;
2099             }
2100
2101           /* See whether UNDEFINED already appeared somewhere.  */
2102           if (collate->undefined.next != NULL
2103               || (collate->cursor != NULL
2104                   && collate->undefined.next == collate->cursor))
2105             {
2106               lr_error (ldfile,
2107                         _("%s: order for `%.*s' already defined at %s:%zu"),
2108                         "LC_COLLATE", 9, "UNDEFINED", collate->undefined.file,
2109                         collate->undefined.line);
2110               lr_ignore_rest (ldfile, 0);
2111             }
2112           else
2113             /* Parse the weights.  */
2114              insert_weights (ldfile, &collate->undefined, charmap,
2115                              repertoire, collate, tok_none);
2116           break;
2117
2118         case tok_ellipsis2:
2119         case tok_ellipsis3:
2120         case tok_ellipsis4:
2121           /* This is the symbolic (decimal or hexadecimal) or absolute
2122              ellipsis.  */
2123           if (was_ellipsis != tok_none)
2124             goto err_label;
2125
2126           if (state != 1 && state != 3)
2127             goto err_label;
2128
2129           was_ellipsis = nowtok;
2130
2131           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
2132                           repertoire, collate, nowtok);
2133           break;
2134
2135         case tok_end:
2136           /* Next we assume `LC_COLLATE'.  */
2137           if (!ignore_content)
2138             {
2139               if (state == 0)
2140                 /* We must either see a copy statement or have
2141                    ordering values.  */
2142                 lr_error (ldfile,
2143                           _("%s: empty category description not allowed"),
2144                           "LC_COLLATE");
2145               else if (state == 1)
2146                 {
2147                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
2148                             "LC_COLLATE");
2149
2150                   /* Handle ellipsis at end of list.  */
2151                   if (was_ellipsis != tok_none)
2152                     {
2153                       handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
2154                                        repertoire, collate);
2155                       was_ellipsis = tok_none;
2156                     }
2157                 }
2158               else if (state == 3)
2159                 error (0, 0, _("%s: missing `reorder-end' keyword"),
2160                        "LC_COLLATE");
2161               else if (state == 5)
2162                 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
2163                        "LC_COLLATE");
2164             }
2165           arg = lr_token (ldfile, charmap, NULL);
2166           if (arg->tok == tok_eof)
2167             break;
2168           if (arg->tok == tok_eol)
2169             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
2170           else if (arg->tok != tok_lc_collate)
2171             lr_error (ldfile, _("\
2172 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2173           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
2174           return;
2175
2176         default:
2177         err_label:
2178           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2179         }
2180
2181       /* Prepare for the next round.  */
2182       now = lr_token (ldfile, charmap, NULL);
2183       nowtok = now->tok;
2184     }
2185
2186   /* When we come here we reached the end of the file.  */
2187   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2188 }