]> sourceware.org Git - glibc.git/blob - locale/programs/ld-collate.c
a0f1139cdee13b52bfbabb6b613cb9d564d44271
[glibc.git] / locale / programs / ld-collate.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <errno.h>
25 #include <error.h>
26 #include <stdlib.h>
27 #include <wchar.h>
28
29 #include "charmap.h"
30 #include "localeinfo.h"
31 #include "linereader.h"
32 #include "locfile.h"
33 #include "localedef.h"
34
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
37 #include <assert.h>
38
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
41
42 /* Forward declaration. */
43 struct element_t;
44
45 /* Data type for list of strings. */
46 struct section_list
47 {
48 struct section_list *next;
49 /* Name of the section. */
50 const char *name;
51 /* First element of this section. */
52 struct element_t *first;
53 /* Last element of this section. */
54 struct element_t *last;
55 /* These are the rules for this section. */
56 enum coll_sort_rule *rules;
57 /* Index of the rule set in the appropriate section of the output file. */
58 int ruleidx;
59 };
60
61 struct element_t;
62
63 struct element_list_t
64 {
65 /* Number of elements. */
66 int cnt;
67
68 struct element_t **w;
69 };
70
71 /* Data type for collating element. */
72 struct element_t
73 {
74 const char *name;
75
76 const char *mbs;
77 size_t nmbs;
78 const uint32_t *wcs;
79 size_t nwcs;
80 int *mborder;
81 int wcorder;
82
83 /* The following is a bit mask which bits are set if this element is
84 used in the appropriate level. Interesting for the singlebyte
85 weight computation.
86
87 XXX The type here restricts the number of levels to 32. It could
88 we changed if necessary but I doubt this is necessary. */
89 unsigned int used_in_level;
90
91 struct element_list_t *weights;
92
93 /* Where does the definition come from. */
94 const char *file;
95 size_t line;
96
97 /* Which section does this belong to. */
98 struct section_list *section;
99
100 /* Predecessor and successor in the order list. */
101 struct element_t *last;
102 struct element_t *next;
103
104 /* Next element in multibyte output list. */
105 struct element_t *mbnext;
106 };
107
108 /* Special element value. */
109 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
110 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
111 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
112
113 /* Data type for collating symbol. */
114 struct symbol_t
115 {
116 /* Point to place in the order list. */
117 struct element_t *order;
118
119 /* Where does the definition come from. */
120 const char *file;
121 size_t line;
122 };
123
124
125 /* The real definition of the struct for the LC_COLLATE locale. */
126 struct locale_collate_t
127 {
128 int col_weight_max;
129 int cur_weight_max;
130
131 /* List of known scripts. */
132 struct section_list *sections;
133 /* Current section using definition. */
134 struct section_list *current_section;
135 /* There always can be an unnamed section. */
136 struct section_list unnamed_section;
137 /* To make handling of errors easier we have another section. */
138 struct section_list error_section;
139
140 /* Number of sorting rules given in order_start line. */
141 uint32_t nrules;
142
143 /* Start of the order list. */
144 struct element_t *start;
145
146 /* The undefined element. */
147 struct element_t undefined;
148
149 /* This is the cursor for `reorder_after' insertions. */
150 struct element_t *cursor;
151
152 /* This value is used when handling ellipsis. */
153 struct element_t ellipsis_weight;
154
155 /* Known collating elements. */
156 hash_table elem_table;
157
158 /* Known collating symbols. */
159 hash_table sym_table;
160
161 /* Known collation sequences. */
162 hash_table seq_table;
163
164 struct obstack mempool;
165
166 /* The LC_COLLATE category is a bit special as it is sometimes possible
167 that the definitions from more than one input file contains information.
168 Therefore we keep all relevant input in a list. */
169 struct locale_collate_t *next;
170
171 /* Arrays with heads of the list for each of the leading bytes in
172 the multibyte sequences. */
173 struct element_t *mbheads[256];
174 };
175
176
177 /* We have a few global variables which are used for reading all
178 LC_COLLATE category descriptions in all files. */
179 static int nrules;
180
181
182 /* These are definitions used by some of the functions for handling
183 UTF-8 encoding below. */
184 static const uint32_t encoding_mask[] =
185 {
186 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
187 };
188
189 static const unsigned char encoding_byte[] =
190 {
191 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
192 };
193
194
195 /* We need UTF-8 encoding of numbers. */
196 static inline int
197 utf8_encode (char *buf, int val)
198 {
199 char *startp = buf;
200 int retval;
201
202 if (val < 0x80)
203 {
204 *buf++ = (char) val;
205 retval = 1;
206 }
207 else
208 {
209 int step;
210
211 for (step = 2; step < 6; ++step)
212 if ((val & encoding_mask[step - 2]) == 0)
213 break;
214 retval = step;
215
216 *buf = encoding_byte[step - 2];
217 --step;
218 do
219 {
220 buf[step] = 0x80 | (val & 0x3f);
221 val >>= 6;
222 }
223 while (--step > 0);
224 *buf |= val;
225 }
226
227 return buf - startp;
228 }
229
230
231 static struct section_list *
232 make_seclist_elem (struct locale_collate_t *collate, const char *string,
233 struct section_list *next)
234 {
235 struct section_list *newp;
236
237 newp = (struct section_list *) obstack_alloc (&collate->mempool,
238 sizeof (*newp));
239 newp->next = next;
240 newp->name = string;
241 newp->first = NULL;
242
243 return newp;
244 }
245
246
247 static struct element_t *
248 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
249 const uint32_t *wcs, const char *name, size_t namelen)
250 {
251 struct element_t *newp;
252
253 newp = (struct element_t *) obstack_alloc (&collate->mempool,
254 sizeof (*newp));
255 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
256 name, namelen);
257 if (mbs != NULL)
258 {
259 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
260 newp->nmbs = mbslen;
261 }
262 else
263 {
264 newp->mbs = NULL;
265 newp->nmbs = 0;
266 }
267 if (wcs != NULL)
268 {
269 size_t nwcs = wcslen ((wchar_t *) wcs);
270 uint32_t zero = 0;
271 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
272 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
273 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
274 newp->nwcs = nwcs;
275 }
276 else
277 {
278 newp->wcs = NULL;
279 newp->nwcs = 0;
280 }
281 newp->mborder = NULL;
282 newp->wcorder = 0;
283 newp->used_in_level = 0;
284
285 /* Will be allocated later. */
286 newp->weights = NULL;
287
288 newp->file = NULL;
289 newp->line = 0;
290
291 newp->section = collate->current_section;
292
293 newp->last = NULL;
294 newp->next = NULL;
295
296 newp->mbnext = NULL;
297
298 return newp;
299 }
300
301
302 static struct symbol_t *
303 new_symbol (struct locale_collate_t *collate)
304 {
305 struct symbol_t *newp;
306
307 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
308
309 newp->order = NULL;
310
311 newp->file = NULL;
312 newp->line = 0;
313
314 return newp;
315 }
316
317
318 /* Test whether this name is already defined somewhere. */
319 static int
320 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
321 struct charmap_t *charmap, struct repertoire_t *repertoire,
322 const char *symbol, size_t symbol_len)
323 {
324 void *ignore = NULL;
325
326 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
327 {
328 lr_error (ldfile, _("`%s' already defined in charmap"), symbol);
329 return 1;
330 }
331
332 if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0)
333 {
334 lr_error (ldfile, _("`%s' already defined in repertoire"), symbol);
335 return 1;
336 }
337
338 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
339 {
340 lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol);
341 return 1;
342 }
343
344 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
345 {
346 lr_error (ldfile, _("`%s' already defined as collating element"),
347 symbol);
348 return 1;
349 }
350
351 return 0;
352 }
353
354
355 /* Read the direction specification. */
356 static void
357 read_directions (struct linereader *ldfile, struct token *arg,
358 struct charmap_t *charmap, struct repertoire_t *repertoire,
359 struct locale_collate_t *collate)
360 {
361 int cnt = 0;
362 int max = nrules ?: 10;
363 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
364 int warned = 0;
365
366 while (1)
367 {
368 int valid = 0;
369
370 if (arg->tok == tok_forward)
371 {
372 if (rules[cnt] & sort_backward)
373 {
374 if (! warned)
375 {
376 lr_error (ldfile, _("\
377 %s: `forward' and `backward' are mutually excluding each other"),
378 "LC_COLLATE");
379 warned = 1;
380 }
381 }
382 else if (rules[cnt] & sort_forward)
383 {
384 if (! warned)
385 {
386 lr_error (ldfile, _("\
387 %s: `%s' mentioned twice in definition of weight %d"),
388 "LC_COLLATE", "forward", cnt + 1);
389 }
390 }
391 else
392 rules[cnt] |= sort_forward;
393
394 valid = 1;
395 }
396 else if (arg->tok == tok_backward)
397 {
398 if (rules[cnt] & sort_forward)
399 {
400 if (! warned)
401 {
402 lr_error (ldfile, _("\
403 %s: `forward' and `backward' are mutually excluding each other"),
404 "LC_COLLATE");
405 warned = 1;
406 }
407 }
408 else if (rules[cnt] & sort_backward)
409 {
410 if (! warned)
411 {
412 lr_error (ldfile, _("\
413 %s: `%s' mentioned twice in definition of weight %d"),
414 "LC_COLLATE", "backward", cnt + 1);
415 }
416 }
417 else
418 rules[cnt] |= sort_backward;
419
420 valid = 1;
421 }
422 else if (arg->tok == tok_position)
423 {
424 if (rules[cnt] & sort_position)
425 {
426 if (! warned)
427 {
428 lr_error (ldfile, _("\
429 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
430 "LC_COLLATE", "position", cnt + 1);
431 }
432 }
433 else
434 rules[cnt] |= sort_position;
435
436 valid = 1;
437 }
438
439 if (valid)
440 arg = lr_token (ldfile, charmap, repertoire);
441
442 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
443 || arg->tok == tok_semicolon)
444 {
445 if (! valid && ! warned)
446 {
447 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
448 warned = 1;
449 }
450
451 /* See whether we have to increment the counter. */
452 if (arg->tok != tok_comma && rules[cnt] != 0)
453 ++cnt;
454
455 if (arg->tok == tok_eof || arg->tok == tok_eol)
456 /* End of line or file, so we exit the loop. */
457 break;
458
459 if (nrules == 0)
460 {
461 /* See whether we have enough room in the array. */
462 if (cnt == max)
463 {
464 max += 10;
465 rules = (enum coll_sort_rule *) xrealloc (rules,
466 max
467 * sizeof (*rules));
468 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
469 }
470 }
471 else
472 {
473 if (cnt == nrules)
474 {
475 /* There must not be any more rule. */
476 if (! warned)
477 {
478 lr_error (ldfile, _("\
479 %s: too many rules; first entry only had %d"),
480 "LC_COLLATE", nrules);
481 warned = 1;
482 }
483
484 lr_ignore_rest (ldfile, 0);
485 break;
486 }
487 }
488 }
489 else
490 {
491 if (! warned)
492 {
493 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
494 warned = 1;
495 }
496 }
497
498 arg = lr_token (ldfile, charmap, repertoire);
499 }
500
501 if (nrules == 0)
502 {
503 /* Now we know how many rules we have. */
504 nrules = cnt;
505 rules = (enum coll_sort_rule *) xrealloc (rules,
506 nrules * sizeof (*rules));
507 }
508 else
509 {
510 if (cnt < nrules)
511 {
512 /* Not enough rules in this specification. */
513 if (! warned)
514 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
515
516 do
517 rules[cnt] = sort_forward;
518 while (++cnt < nrules);
519 }
520 }
521
522 collate->current_section->rules = rules;
523 }
524
525
526 static struct element_t *
527 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
528 const char *str, size_t len, uint32_t *wcstr)
529 {
530 struct element_t *result = NULL;
531
532 /* Search for the entries among the collation sequences already define. */
533 if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
534 {
535 /* Nope, not define yet. So we see whether it is a
536 collation symbol. */
537 void *ptr;
538
539 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
540 {
541 /* It's a collation symbol. */
542 struct symbol_t *sym = (struct symbol_t *) ptr;
543 result = sym->order;
544
545 if (result == NULL)
546 result = sym->order = new_element (collate, NULL, 0, NULL,
547 NULL, 0);
548 }
549 else if (find_entry (&collate->elem_table, str, len,
550 (void **) &result) != 0)
551 {
552 /* It's also no collation element. So it is a character
553 element defined later. */
554 result = new_element (collate, NULL, 0, NULL, str, len);
555 if (result != NULL)
556 /* Insert it into the sequence table. */
557 insert_entry (&collate->seq_table, str, len, result);
558 }
559 }
560
561 return result;
562 }
563
564
565 static void
566 unlink_element (struct locale_collate_t *collate)
567 {
568 if (collate->cursor == collate->start)
569 {
570 assert (collate->cursor->next == NULL);
571 assert (collate->cursor->last == NULL);
572 collate->cursor = NULL;
573 }
574 else
575 {
576 if (collate->cursor->next != NULL)
577 collate->cursor->next->last = collate->cursor->last;
578 if (collate->cursor->last != NULL)
579 collate->cursor->last->next = collate->cursor->next;
580 collate->cursor = collate->cursor->last;
581 }
582 }
583
584
585 static void
586 insert_weights (struct linereader *ldfile, struct element_t *elem,
587 struct charmap_t *charmap, struct repertoire_t *repertoire,
588 struct locale_collate_t *collate, enum token_t ellipsis)
589 {
590 int weight_cnt;
591 struct token *arg;
592
593 /* Initialize all the fields. */
594 elem->file = ldfile->fname;
595 elem->line = ldfile->lineno;
596 elem->last = collate->cursor;
597 elem->next = collate->cursor ? collate->cursor->next : NULL;
598 elem->section = collate->current_section;
599 if (collate->cursor != NULL)
600 collate->cursor->next = elem;
601 if (collate->start == NULL)
602 {
603 assert (collate->cursor == NULL);
604 collate->start = elem;
605 }
606 elem->weights = (struct element_list_t *)
607 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
608 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
609
610 if (collate->current_section->first == NULL)
611 collate->current_section->first = elem;
612 if (collate->current_section->last == collate->cursor)
613 collate->current_section->last = elem;
614
615 collate->cursor = elem;
616
617 weight_cnt = 0;
618
619 arg = lr_token (ldfile, charmap, repertoire);
620 do
621 {
622 if (arg->tok == tok_eof || arg->tok == tok_eol)
623 break;
624
625 if (arg->tok == tok_ignore)
626 {
627 /* The weight for this level has to be ignored. We use the
628 null pointer to indicate this. */
629 elem->weights[weight_cnt].w = (struct element_t **)
630 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
631 elem->weights[weight_cnt].w[0] = NULL;
632 elem->weights[weight_cnt].cnt = 1;
633 }
634 else if (arg->tok == tok_bsymbol)
635 {
636 struct element_t *val = find_element (ldfile, collate,
637 arg->val.str.startmb,
638 arg->val.str.lenmb,
639 arg->val.str.startwc);
640
641 if (val == NULL)
642 break;
643
644 elem->weights[weight_cnt].w = (struct element_t **)
645 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
646 elem->weights[weight_cnt].w[0] = val;
647 elem->weights[weight_cnt].cnt = 1;
648 }
649 else if (arg->tok == tok_string)
650 {
651 /* Split the string up in the individual characters and put
652 the element definitions in the list. */
653 const char *cp = arg->val.str.startmb;
654 int cnt = 0;
655 struct element_t *charelem;
656 struct element_t **weights = NULL;
657 int max = 0;
658
659 if (*cp == '\0')
660 {
661 lr_error (ldfile, _("%s: empty weight string not allowed"),
662 "LC_COLLATE");
663 lr_ignore_rest (ldfile, 0);
664 break;
665 }
666
667 do
668 {
669 if (*cp == '<')
670 {
671 /* Ahh, it's a bsymbol. That's what we want. */
672 const char *startp = ++cp;
673
674 while (*cp != '>')
675 {
676 if (*cp == ldfile->escape_char)
677 ++cp;
678 if (*cp == '\0')
679 /* It's a syntax error. */
680 goto syntax;
681
682 ++cp;
683 }
684
685 charelem = find_element (ldfile, collate, startp,
686 cp - startp, NULL);
687 ++cp;
688 }
689 else
690 {
691 /* People really shouldn't use characters directly in
692 the string. Especially since it's not really clear
693 what this means. We interpret all characters in the
694 string as if that would be bsymbols. Otherwise we
695 would have to match back to bsymbols somehow and this
696 is normally not what people normally expect. */
697 charelem = find_element (ldfile, collate, cp++, 1, NULL);
698 }
699
700 if (charelem == NULL)
701 {
702 /* We ignore the rest of the line. */
703 lr_ignore_rest (ldfile, 0);
704 break;
705 }
706
707 /* Add the pointer. */
708 if (cnt >= max)
709 {
710 struct element_t **newp;
711 max += 10;
712 newp = (struct element_t **)
713 alloca (max * sizeof (struct element_t *));
714 memcpy (newp, weights, cnt * sizeof (struct element_t *));
715 weights = newp;
716 }
717 weights[cnt++] = charelem;
718 }
719 while (*cp != '\0');
720
721 /* Now store the information. */
722 elem->weights[weight_cnt].w = (struct element_t **)
723 obstack_alloc (&collate->mempool,
724 cnt * sizeof (struct element_t *));
725 memcpy (elem->weights[weight_cnt].w, weights,
726 cnt * sizeof (struct element_t *));
727 elem->weights[weight_cnt].cnt = cnt;
728
729 /* We don't need the string anymore. */
730 free (arg->val.str.startmb);
731 }
732 else if (ellipsis != tok_none
733 && (arg->tok == tok_ellipsis2
734 || arg->tok == tok_ellipsis3
735 || arg->tok == tok_ellipsis4))
736 {
737 /* It must be the same ellipsis as used in the initial column. */
738 if (arg->tok != ellipsis)
739 lr_error (ldfile, _("\
740 %s: weights must use the same ellipsis symbol as the name"),
741 "LC_COLLATE");
742
743 /* The weight for this level has to be ignored. We use the
744 null pointer to indicate this. */
745 elem->weights[weight_cnt].w = (struct element_t **)
746 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
747 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
748 elem->weights[weight_cnt].cnt = 1;
749 }
750 else
751 {
752 syntax:
753 /* It's a syntax error. */
754 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
755 lr_ignore_rest (ldfile, 0);
756 break;
757 }
758
759 arg = lr_token (ldfile, charmap, repertoire);
760 /* This better should be the end of the line or a semicolon. */
761 if (arg->tok == tok_semicolon)
762 /* OK, ignore this and read the next token. */
763 arg = lr_token (ldfile, charmap, repertoire);
764 else if (arg->tok != tok_eof && arg->tok != tok_eol)
765 {
766 /* It's a syntax error. */
767 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
768 lr_ignore_rest (ldfile, 0);
769 break;
770 }
771 }
772 while (++weight_cnt < nrules);
773
774 if (weight_cnt < nrules)
775 {
776 /* This means the rest of the line uses the current element as
777 the weight. */
778 do
779 {
780 elem->weights[weight_cnt].w = (struct element_t **)
781 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
782 elem->weights[weight_cnt].w[0] = elem;
783 elem->weights[weight_cnt].cnt = 1;
784 }
785 while (++weight_cnt < nrules);
786 }
787 else
788 {
789 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
790 {
791 /* Too many rule values. */
792 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
793 lr_ignore_rest (ldfile, 0);
794 }
795 else
796 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
797 }
798 }
799
800
801 static int
802 insert_value (struct linereader *ldfile, struct token *arg,
803 struct charmap_t *charmap, struct repertoire_t *repertoire,
804 struct locale_collate_t *collate)
805 {
806 /* First find out what kind of symbol this is. */
807 struct charseq *seq;
808 uint32_t wc;
809 struct element_t *elem = NULL;
810
811 /* Try to find the character in the charmap. */
812 seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb);
813
814 /* Determine the wide character. */
815 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
816 {
817 wc = repertoire_find_value (repertoire, arg->val.str.startmb,
818 arg->val.str.lenmb);
819 if (seq != NULL)
820 seq->ucs4 = wc;
821 }
822 else
823 wc = seq->ucs4;
824
825 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
826 {
827 /* It's no character, so look through the collation elements and
828 symbol list. */
829 void *result;
830
831 if (find_entry (&collate->sym_table, arg->val.str.startmb,
832 arg->val.str.lenmb, &result) == 0)
833 {
834 /* It's a collation symbol. */
835 struct symbol_t *sym = (struct symbol_t *) result;
836 elem = sym->order;
837
838 if (elem == NULL)
839 elem = sym->order = new_element (collate, NULL, 0, NULL, NULL, 0);
840 }
841 else if (find_entry (&collate->elem_table, arg->val.str.startmb,
842 arg->val.str.lenmb, (void **) &elem) != 0)
843 {
844 /* It's also no collation element. Therefore ignore it. */
845 lr_ignore_rest (ldfile, 0);
846 return 1;
847 }
848 }
849 else
850 {
851 /* Otherwise the symbols stands for a character. */
852 if (find_entry (&collate->seq_table, arg->val.str.startmb,
853 arg->val.str.lenmb, (void **) &elem) != 0)
854 {
855 uint32_t wcs[2] = { wc, 0 };
856
857 /* We have to allocate an entry. */
858 elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
859 seq != NULL ? seq->nbytes : 0,
860 wcs, arg->val.str.startmb, arg->val.str.lenmb);
861
862 /* And add it to the table. */
863 if (insert_entry (&collate->seq_table, arg->val.str.startmb,
864 arg->val.str.lenmb, elem) != 0)
865 /* This cannot happen. */
866 assert (! "Internal error");
867 }
868 else
869 {
870 /* Maybe the character was used before the definition. In this case
871 we have to insert the byte sequences now. */
872 if (elem->mbs == NULL && seq != NULL)
873 {
874 elem->mbs = obstack_copy0 (&collate->mempool,
875 seq->bytes, seq->nbytes);
876 elem->nmbs = seq->nbytes;
877 }
878
879 if (elem->wcs == NULL && seq != ILLEGAL_CHAR_VALUE)
880 {
881 uint32_t wcs[2] = { wc, 0 };
882
883 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
884 elem->nwcs = 1;
885 }
886 }
887 }
888
889 /* Test whether this element is not already in the list. */
890 if (elem->next != NULL || (collate->cursor != NULL
891 && elem->next == collate->cursor))
892 {
893 lr_error (ldfile, _("order for `%.*s' already defined at %s:%zu"),
894 (int) arg->val.str.lenmb, arg->val.str.startmb,
895 elem->file, elem->line);
896 lr_ignore_rest (ldfile, 0);
897 return 1;
898 }
899
900 insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
901
902 return 0;
903 }
904
905
906 static void
907 handle_ellipsis (struct linereader *ldfile, struct token *arg,
908 enum token_t ellipsis, struct charmap_t *charmap,
909 struct repertoire_t *repertoire,
910 struct locale_collate_t *collate)
911 {
912 struct element_t *startp;
913 struct element_t *endp;
914
915 /* Unlink the entry added for the ellipsis. */
916 unlink_element (collate);
917 startp = collate->cursor;
918
919 /* Process and add the end-entry. */
920 if (arg != NULL
921 && insert_value (ldfile, arg, charmap, repertoire, collate))
922 /* Something went wrong with inserting the to-value. This means
923 we cannot process the ellipsis. */
924 return;
925
926 /* Reset the cursor. */
927 collate->cursor = startp;
928
929 /* Now we have to handle many different situations:
930 - we have to distinguish between the three different ellipsis forms
931 - the is the ellipsis at the beginning, in the middle, or at the end.
932 */
933 endp = collate->cursor->next;
934 assert (arg == NULL || endp != NULL);
935
936 /* Both, the start and the end symbol, must stand for characters. */
937 if ((startp == NULL || startp->name == NULL)
938 || (endp == NULL || endp->name == NULL))
939 {
940 lr_error (ldfile, _("\
941 %s: the start end the end symbol of a range must stand for characters"),
942 "LC_COLLATE");
943 return;
944 }
945
946 if (ellipsis == tok_ellipsis3)
947 {
948 /* One requirement we make here: the length of the byte
949 sequences for the first and end character must be the same.
950 This is mainly to prevent unwanted effects and this is often
951 not what is wanted. */
952 size_t len = (startp->mbs != NULL ? startp->nmbs
953 : (endp->mbs != NULL ? endp->nmbs : 0));
954 char mbcnt[len + 1];
955 char mbend[len + 1];
956
957 /* Well, this should be caught somewhere else already. Just to
958 make sure. */
959 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
960 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
961
962 if (startp != NULL && endp != NULL
963 && startp->mbs != NULL && endp->mbs != NULL
964 && startp->nmbs != endp->nmbs)
965 {
966 lr_error (ldfile, _("\
967 %s: byte sequences of first and last character must have the same length"),
968 "LC_COLLATE");
969 return;
970 }
971
972 /* Determine whether we have to generate multibyte sequences. */
973 if ((startp == NULL || startp->mbs != NULL)
974 && (endp == NULL || endp->mbs != NULL))
975 {
976 int cnt;
977 int ret;
978
979 /* Prepare the beginning byte sequence. This is either from the
980 beginning byte sequence or it is all nulls if it was an
981 initial ellipsis. */
982 if (startp == NULL || startp->mbs == NULL)
983 memset (mbcnt, '\0', len);
984 else
985 {
986 memcpy (mbcnt, startp->mbs, len);
987
988 /* And increment it so that the value is the first one we will
989 try to insert. */
990 for (cnt = len - 1; cnt >= 0; --cnt)
991 if (++mbcnt[cnt] != '\0')
992 break;
993 }
994 mbcnt[len] = '\0';
995
996 /* And the end sequence. */
997 if (endp == NULL || endp->mbs == NULL)
998 memset (mbend, '\0', len);
999 else
1000 memcpy (mbend, endp->mbs, len);
1001 mbend[len] = '\0';
1002
1003 /* Test whether we have a correct range. */
1004 ret = memcmp (mbcnt, mbend, len);
1005 if (ret >= 0)
1006 {
1007 if (ret > 0)
1008 lr_error (ldfile, _("%s: byte sequence of first character of \
1009 sequence is not lower than that of the last character"), "LC_COLLATE");
1010 return;
1011 }
1012
1013 /* Generate the byte sequences data. */
1014 while (1)
1015 {
1016 struct charseq *seq;
1017
1018 /* Quite a bit of work ahead. We have to find the character
1019 definition for the byte sequence and then determine the
1020 wide character belonging to it. */
1021 seq = charmap_find_symbol (charmap, mbcnt, len);
1022 if (seq != NULL)
1023 {
1024 struct element_t *elem;
1025 size_t namelen;
1026
1027 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1028 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1029 strlen (seq->name));
1030
1031 /* I don't this this can ever happen. */
1032 assert (seq->name != NULL);
1033 namelen = strlen (seq->name);
1034
1035 /* Now we are ready to insert the new value in the
1036 sequence. Find out whether the element is
1037 already known. */
1038 if (find_entry (&collate->seq_table, seq->name, namelen,
1039 (void **) &elem) != 0)
1040 {
1041 uint32_t wcs[2] = { seq->ucs4, 0 };
1042
1043 /* We have to allocate an entry. */
1044 elem = new_element (collate, mbcnt, len, wcs, seq->name,
1045 namelen);
1046
1047 /* And add it to the table. */
1048 if (insert_entry (&collate->seq_table, seq->name,
1049 namelen, elem) != 0)
1050 /* This cannot happen. */
1051 assert (! "Internal error");
1052 }
1053
1054 /* Test whether this element is not already in the list. */
1055 if (elem->next != NULL || (collate->cursor != NULL
1056 && elem->next == collate->cursor))
1057 {
1058 lr_error (ldfile, _("\
1059 order for `%.*s' already defined at %s:%zu"),
1060 (int) namelen, seq->name,
1061 elem->file, elem->line);
1062 goto increment;
1063 }
1064
1065 /* Enqueue the new element. */
1066 elem->last = collate->cursor;
1067 if (collate->cursor != NULL)
1068 elem->next = NULL;
1069 else
1070 {
1071 elem->next = collate->cursor->next;
1072 elem->last->next = elem;
1073 if (elem->next != NULL)
1074 elem->next->last = elem;
1075 }
1076 if (collate->start == NULL)
1077 {
1078 assert (collate->cursor == NULL);
1079 collate->start = elem;
1080 }
1081 collate->cursor = elem;
1082
1083 /* Add the weight value. We take them from the
1084 `ellipsis_weights' member of `collate'. */
1085 elem->weights = (struct element_list_t *)
1086 obstack_alloc (&collate->mempool,
1087 nrules * sizeof (struct element_list_t));
1088 for (cnt = 0; cnt < nrules; ++cnt)
1089 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1090 && (collate->ellipsis_weight.weights[cnt].w[0]
1091 == ELEMENT_ELLIPSIS2))
1092 {
1093 elem->weights[cnt].w = (struct element_t **)
1094 obstack_alloc (&collate->mempool,
1095 sizeof (struct element_t *));
1096 elem->weights[cnt].w[0] = elem;
1097 elem->weights[cnt].cnt = 1;
1098 }
1099 else
1100 {
1101 /* Simly use the weight from `ellipsis_weight'. */
1102 elem->weights[cnt].w =
1103 collate->ellipsis_weight.weights[cnt].w;
1104 elem->weights[cnt].cnt =
1105 collate->ellipsis_weight.weights[cnt].cnt;
1106 }
1107 }
1108
1109 /* Increment for the next round. */
1110 increment:
1111 for (cnt = len - 1; cnt >= 0; --cnt)
1112 if (++mbcnt[cnt] != '\0')
1113 break;
1114
1115 /* Find out whether this was all. */
1116 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1117 /* Yep, that's all. */
1118 break;
1119 }
1120 }
1121 }
1122 else
1123 {
1124 /* For symbolic range we naturally must have a beginning and an
1125 end specified by the user. */
1126 if (startp == NULL)
1127 lr_error (ldfile, _("\
1128 %s: symbolic range ellipsis must not directly follow `order_start'"),
1129 "LC_COLLATE");
1130 else if (endp == NULL)
1131 lr_error (ldfile, _("\
1132 %s: symbolic range ellipsis must not be direct followed by `order_end'"),
1133 "LC_COLLATE");
1134 else
1135 {
1136 /* Determine the range. To do so we have to determine the
1137 common prefix of the both names and then the numeric
1138 values of both ends. */
1139 size_t lenfrom = strlen (startp->name);
1140 size_t lento = strlen (endp->name);
1141 char buf[lento + 1];
1142 int preflen = 0;
1143 long int from;
1144 long int to;
1145 char *cp;
1146 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1147
1148 if (lenfrom != lento)
1149 {
1150 invalid_range:
1151 lr_error (ldfile, _("\
1152 `%s' and `%.*s' are no valid names for symbolic range"),
1153 startp->name, (int) lento, endp->name);
1154 return;
1155 }
1156
1157 while (startp->name[preflen] == endp->name[preflen])
1158 if (startp->name[preflen] == '\0')
1159 /* Nothing to be done. The start and end point are identical
1160 and while inserting the end point we have already given
1161 the user an error message. */
1162 return;
1163 else
1164 ++preflen;
1165
1166 errno = 0;
1167 from = strtol (startp->name + preflen, &cp, base);
1168 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1169 goto invalid_range;
1170
1171 errno = 0;
1172 to = strtol (endp->name + preflen, &cp, base);
1173 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1174 goto invalid_range;
1175
1176 /* Copy the prefix. */
1177 memcpy (buf, startp->name, preflen);
1178
1179 /* Loop over all values. */
1180 for (++from; from < to; ++from)
1181 {
1182 struct element_t *elem = NULL;
1183 struct charseq *seq;
1184 uint32_t wc;
1185 int cnt;
1186
1187 /* Generate the the name. */
1188 sprintf (buf + preflen, base == 10 ? "%d" : "%x", from);
1189
1190 /* Look whether this name is already defined. */
1191 if (find_entry (&collate->seq_table, arg->val.str.startmb,
1192 arg->val.str.lenmb, (void **) &elem) == 0)
1193 {
1194 if (elem->next != NULL || (collate->cursor != NULL
1195 && elem->next == collate->cursor))
1196 {
1197 lr_error (ldfile, _("\
1198 %s: order for `%.*s' already defined at %s:%zu"),
1199 "LC_COLLATE", (int) lenfrom, buf,
1200 elem->file, elem->line);
1201 continue;
1202 }
1203
1204 if (elem->name == NULL)
1205 {
1206 lr_error (ldfile, _("%s: `%s' must be a charater"),
1207 "LC_COLLATE", buf);
1208 continue;
1209 }
1210 }
1211
1212 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1213 {
1214 /* Search for a character of this name. */
1215 seq = charmap_find_value (charmap, buf, lenfrom);
1216 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1217 {
1218 wc = repertoire_find_value (repertoire, buf, lenfrom);
1219
1220 if (seq != NULL)
1221 seq->ucs4 = wc;
1222 }
1223 else
1224 wc = seq->ucs4;
1225
1226 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1227 /* We don't know anything about a character with this
1228 name. XXX Should we warn? */
1229 continue;
1230
1231 if (elem == NULL)
1232 {
1233 uint32_t wcs[2] = { wc, 0 };
1234
1235 /* We have to allocate an entry. */
1236 elem = new_element (collate,
1237 seq != NULL ? seq->bytes : NULL,
1238 seq != NULL ? seq->nbytes : 0,
1239 wc == ILLEGAL_CHAR_VALUE
1240 ? NULL : wcs,
1241 buf, lenfrom);
1242 }
1243 else
1244 {
1245 /* Update the element. */
1246 if (seq != NULL)
1247 {
1248 elem->mbs = obstack_copy0 (&collate->mempool,
1249 seq->bytes, seq->nbytes);
1250 elem->nmbs = seq->nbytes;
1251 }
1252
1253 if (wc != ILLEGAL_CHAR_VALUE)
1254 {
1255 uint32_t zero = 0;
1256
1257 obstack_grow (&collate->mempool,
1258 &wc, sizeof (uint32_t));
1259 obstack_grow (&collate->mempool,
1260 &zero, sizeof (uint32_t));
1261 elem->wcs = obstack_finish (&collate->mempool);
1262 elem->nwcs = 1;
1263 }
1264 }
1265
1266 elem->file = ldfile->fname;
1267 elem->line = ldfile->lineno;
1268 elem->section = collate->current_section;
1269 }
1270
1271 /* Enqueue the new element. */
1272 elem->last = collate->cursor;
1273 elem->next = collate->cursor->next;
1274 elem->last->next = elem;
1275 if (elem->next != NULL)
1276 elem->next->last = elem;
1277 collate->cursor = elem;
1278
1279 /* Now add the weights. They come from the `ellipsis_weights'
1280 member of `collate'. */
1281 elem->weights = (struct element_list_t *)
1282 obstack_alloc (&collate->mempool,
1283 nrules * sizeof (struct element_list_t));
1284 for (cnt = 0; cnt < nrules; ++cnt)
1285 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1286 && (collate->ellipsis_weight.weights[cnt].w[0]
1287 == ELEMENT_ELLIPSIS2))
1288 {
1289 elem->weights[cnt].w = (struct element_t **)
1290 obstack_alloc (&collate->mempool,
1291 sizeof (struct element_t *));
1292 elem->weights[cnt].w[0] = elem;
1293 elem->weights[cnt].cnt = 1;
1294 }
1295 else
1296 {
1297 /* Simly use the weight from `ellipsis_weight'. */
1298 elem->weights[cnt].w =
1299 collate->ellipsis_weight.weights[cnt].w;
1300 elem->weights[cnt].cnt =
1301 collate->ellipsis_weight.weights[cnt].cnt;
1302 }
1303 }
1304 }
1305 }
1306 }
1307
1308
1309 static void
1310 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1311 struct localedef_t *copy_locale, int ignore_content)
1312 {
1313 if (!ignore_content)
1314 {
1315 struct locale_collate_t *collate;
1316
1317 if (copy_locale == NULL)
1318 {
1319 collate = locale->categories[LC_COLLATE].collate =
1320 (struct locale_collate_t *)
1321 xcalloc (1, sizeof (struct locale_collate_t));
1322
1323 /* Init the various data structures. */
1324 init_hash (&collate->elem_table, 100);
1325 init_hash (&collate->sym_table, 100);
1326 init_hash (&collate->seq_table, 500);
1327 obstack_init (&collate->mempool);
1328
1329 collate->col_weight_max = -1;
1330 }
1331 else
1332 collate = locale->categories[LC_COLLATE].collate =
1333 copy_locale->categories[LC_COLLATE].collate;
1334 }
1335
1336 ldfile->translate_strings = 0;
1337 ldfile->return_widestr = 0;
1338 }
1339
1340
1341 void
1342 collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
1343 {
1344 /* Now is the time when we can assign the individual collation
1345 values for all the symbols. We have possibly different values
1346 for the wide- and the multibyte-character symbols. This is done
1347 since it might make a difference in the encoding if there is in
1348 some cases no multibyte-character but there are wide-characters.
1349 (The other way around it is not important since theencoded
1350 collation value in the wide-character case is 32 bits wide and
1351 therefore requires no encoding).
1352
1353 The lowest collation value assigned is 2. Zero is reserved for
1354 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1355 functions and 1 is used to separate the individual passes for the
1356 different rules.
1357
1358 We also have to construct is list with all the bytes/words which
1359 can come first in a sequence, followed by all the elements which
1360 also start with this byte/word. The order is reverse which has
1361 among others the important effect that longer strings are located
1362 first in the list. This is required for the output data since
1363 the algorithm used in `strcoll' etc depends on this.
1364
1365 The multibyte case is easy. We simply sort into an array with
1366 256 elements. */
1367 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1368 int mbact[nrules];
1369 int wcact;
1370 struct element_t *runp;
1371 int i;
1372 int need_undefined = 0;
1373 struct section_list *sect;
1374 int ruleidx;
1375
1376 /* If this assertion is hit change the type in `element_t'. */
1377 assert (nrules <= sizeof (runp->used_in_level) * 8);
1378
1379 /* Find out which elements are used at which level. At the same
1380 time we find out whether we have any undefined symbols. */
1381 runp = collate->start;
1382 while (runp != NULL)
1383 {
1384 if (runp->mbs != NULL)
1385 {
1386 for (i = 0; i < nrules; ++i)
1387 {
1388 int j;
1389
1390 for (j = 0; j < runp->weights[i].cnt; ++j)
1391 /* A NULL pointer as the weight means IGNORE. */
1392 if (runp->weights[i].w[j] != NULL)
1393 {
1394 if (runp->weights[i].w[j]->weights == NULL)
1395 {
1396 error_at_line (0, 0, runp->file, runp->line,
1397 _("symbol `%s' not defined"),
1398 runp->weights[i].w[j]->name);
1399
1400 need_undefined = 1;
1401 runp->weights[i].w[j] = &collate->undefined;
1402 }
1403 else
1404 /* Set the bit for the level. */
1405 runp->weights[i].w[j]->used_in_level |= 1 << i;
1406 }
1407 }
1408 }
1409
1410 /* Up to the next entry. */
1411 runp = runp->next;
1412 }
1413
1414 /* Walk through the list of defined sequences and assign weights. Also
1415 create the data structure which will allow generating the single byte
1416 character based tables.
1417
1418 Since at each time only the weights for each of the rules are
1419 only compared to other weights for this rule it is possible to
1420 assign more compact weight values than simply counting all
1421 weights in sequence. We can assign weights from 3, one for each
1422 rule individually and only for those elements, which are actually
1423 used for this rule.
1424
1425 Why is this important? It is not for the wide char table. But
1426 it is for the singlebyte output since here larger numbers have to
1427 be encoded to make it possible to emit the value as a byte
1428 string. */
1429 for (i = 0; i < nrules; ++i)
1430 mbact[i] = 3;
1431 wcact = 3;
1432 runp = collate->start;
1433 while (runp != NULL)
1434 {
1435 /* Determine the order. */
1436 if (runp->used_in_level != 0)
1437 {
1438 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1439 nrules * sizeof (int));
1440
1441 for (i = 0; i < nrules; ++i)
1442 if ((runp->used_in_level & (1 << i)) != 0)
1443 runp->mborder[i] = mbact[i]++;
1444 else
1445 runp->mborder[i] = 0;
1446 }
1447
1448 if (runp->mbs != NULL)
1449 {
1450 struct element_t **eptr;
1451
1452 /* Find the point where to insert in the list. */
1453 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1454 while (*eptr != NULL)
1455 {
1456 if ((*eptr)->nmbs < runp->nmbs)
1457 break;
1458
1459 if ((*eptr)->nmbs == runp->nmbs)
1460 {
1461 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1462
1463 if (c == 0)
1464 {
1465 /* This should not happen. It means that we have
1466 to symbols with the same byte sequence. It is
1467 of course an error. */
1468 error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
1469 _("symbol `%s' has same encoding as"),
1470 (*eptr)->name);
1471 error_at_line (0, 0, runp->file, runp->line,
1472 _("symbol `%s'"), runp->name);
1473 goto dont_insert;
1474 }
1475 else if (c < 0)
1476 /* Insert it here. */
1477 break;
1478 }
1479
1480 /* To the next entry. */
1481 eptr = &(*eptr)->mbnext;
1482 }
1483
1484 /* Set the pointers. */
1485 runp->mbnext = *eptr;
1486 *eptr = runp;
1487 dont_insert:
1488 }
1489
1490 if (runp->wcs != NULL)
1491 runp->wcorder = wcact++;
1492
1493 /* Up to the next entry. */
1494 runp = runp->next;
1495 }
1496
1497 /* Find out whether any of the `mbheads' entries is unset. In this
1498 case we use the UNDEFINED entry. */
1499 for (i = 1; i < 256; ++i)
1500 if (collate->mbheads[i] == NULL)
1501 {
1502 need_undefined = 1;
1503 collate->mbheads[i] = &collate->undefined;
1504 }
1505
1506 /* Now determine whether the UNDEFINED entry is needed and if yes,
1507 whether it was defined. */
1508 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1509 if (need_undefined && collate->undefined.file == NULL)
1510 {
1511 error (0, 0, _("no definition of `UNDEFINED'"));
1512
1513 /* Add UNDEFINED at the end. */
1514 collate->undefined.mborder =
1515 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1516
1517 for (i = 0; i < nrules; ++i)
1518 collate->undefined.mborder[i] = mbact[i]++;
1519
1520 collate->undefined.wcorder = wcact++;
1521 }
1522
1523 /* Finally, try to unify the rules for the sections. Whenever the rules
1524 for a section are the same as those for another section give the
1525 ruleset the same index. Since there are never many section we can
1526 use an O(n^2) algorithm here. */
1527 sect = collate->sections;
1528 assert (sect != NULL);
1529 ruleidx = 0;
1530 do
1531 {
1532 struct section_list *osect = collate->sections;
1533
1534 while (osect != sect)
1535 if (memcmp (osect->rules, sect->rules, nrules) == 0)
1536 break;
1537 else
1538 osect = osect->next;
1539
1540 if (osect == sect)
1541 sect->ruleidx = ruleidx++;
1542 else
1543 sect->ruleidx = osect->ruleidx;
1544
1545 /* Next section. */
1546 sect = sect->next;
1547 }
1548 while (sect != NULL);
1549 /* We are currently not prepared for more than 256 rulesets. But this
1550 should never really be a problem. */
1551 assert (ruleidx <= 256);
1552 }
1553
1554
1555 static inline int32_t
1556 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1557 struct element_t *elem)
1558 {
1559 size_t cnt;
1560 int32_t retval;
1561
1562 /* Optimize the use of UNDEFINED. */
1563 if (elem == &collate->undefined)
1564 /* The weights are already inserted. */
1565 return 0;
1566
1567 /* This byte can start exactly one collation element and this is
1568 a single byte. We can directly give the index to the weights. */
1569 retval = obstack_object_size (pool);
1570
1571 /* Construct the weight. */
1572 for (cnt = 0; cnt < nrules; ++cnt)
1573 {
1574 char buf[elem->weights[cnt].cnt * 7];
1575 int len = 0;
1576 int i;
1577
1578 /* Add the direction. */
1579 obstack_1grow (pool, elem->section->rules[cnt]);
1580
1581 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1582 /* Encode the weight value. */
1583 if (elem->weights[cnt].w[i] == NULL)
1584 {
1585 /* This entry was IGNORE. */
1586 buf[len++] = '\3';
1587 }
1588 else
1589 len += utf8_encode (&buf[len],
1590 elem->weights[cnt].w[i]->mborder[cnt]);
1591
1592 /* And add the buffer content. */
1593 obstack_grow (pool, buf, len);
1594 }
1595
1596 return retval;
1597 }
1598
1599
1600 void
1601 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
1602 const char *output_path)
1603 {
1604 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1605 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1606 struct iovec iov[2 + nelems];
1607 struct locale_file data;
1608 uint32_t idx[nelems];
1609 size_t cnt;
1610 size_t ch;
1611 int32_t tablemb[256];
1612 struct obstack weightpool;
1613 struct obstack extrapool;
1614 struct section_list *sect;
1615 int i;
1616
1617 obstack_init (&weightpool);
1618 obstack_init (&extrapool);
1619
1620 data.magic = LIMAGIC (LC_COLLATE);
1621 data.n = nelems;
1622 iov[0].iov_base = (void *) &data;
1623 iov[0].iov_len = sizeof (data);
1624
1625 iov[1].iov_base = (void *) idx;
1626 iov[1].iov_len = sizeof (idx);
1627
1628 idx[0] = iov[0].iov_len + iov[1].iov_len;
1629 cnt = 0;
1630
1631 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
1632 iov[2 + cnt].iov_base = &collate->nrules;
1633 iov[2 + cnt].iov_len = sizeof (uint32_t);
1634 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1635 ++cnt;
1636
1637 /* Prepare the ruleset table. */
1638 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
1639 if (sect->ruleidx == i)
1640 {
1641 obstack_grow (&weightpool, sect->rules, nrules);
1642 ++i;
1643 }
1644 /* And align the output. */
1645 i = (nrules * i) % __alignof__ (int32_t);
1646 if (i > 0)
1647 do
1648 obstack_1grow (&weightpool, '\0');
1649 while (++i < __alignof__ (int32_t));
1650
1651 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
1652 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
1653 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
1654 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1655 ++cnt;
1656
1657 /* Generate the 8-bit table. Walk through the lists of sequences
1658 starting with the same byte and add them one after the other to
1659 the table. In case we have more than one sequence starting with
1660 the same byte we have to use extra indirection.
1661
1662 First add a record for the NUL byte. This entry will never be used
1663 so it does not matter. */
1664 tablemb[0] = 0;
1665
1666 /* Now insert the `UNDEFINED' value if it is used. Since this value
1667 will probably be used more than once it is good to store the
1668 weights only once. */
1669 if (collate->undefined.used_in_level != 0)
1670 output_weight (&weightpool, collate, &collate->undefined);
1671
1672 for (ch = 1; ch < 256; ++ch)
1673 if (collate->mbheads[ch]->mbnext == NULL
1674 && collate->mbheads[ch]->nmbs == 1)
1675 {
1676 tablemb[ch] = output_weight (&weightpool, collate,
1677 collate->mbheads[ch]);
1678 }
1679 else
1680 {
1681 /* The entries in the list are sorted by length and then
1682 alphabetically. This is the order in which we will add the
1683 elements to the collation table. This allows to simply
1684 walk the table in sequence and stop at the first matching
1685 entry. Since the longer sequences are coming first in the
1686 list they have the possibility to match first, just as it
1687 has to be. In the worst case we are walking to the end of
1688 the list where we put, if no singlebyte sequence is defined
1689 in the locale definition, the weights for UNDEFINED.
1690
1691 To reduce the length of the search list we compress them a bit.
1692 This happens by collecting sequences of consecutive byte
1693 sequences in one entry (having and begin and end byte sequence)
1694 and add only one index into the weight table. We can find the
1695 consecutive entries since they are also consecutive in the list. */
1696 struct element_t *runp = collate->mbheads[ch];
1697 struct element_t *lastp;
1698
1699 tablemb[ch] = -obstack_object_size (&extrapool);
1700
1701 do
1702 {
1703 /* Store the current index in the weight table. We know that
1704 the current position in the `extrapool' is aligned on a
1705 32-bit address. */
1706 int32_t weightidx;
1707 int added;
1708
1709 /* Output the weight info. */
1710 weightidx = output_weight (&weightpool, collate, runp);
1711
1712 /* Find out wether this is a single entry or we have more than
1713 one consecutive entry. */
1714 if (runp->mbnext != NULL
1715 && runp->nmbs == runp->mbnext->nmbs
1716 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
1717 && (runp->mbs[runp->nmbs - 1] + 1
1718 == runp->mbnext->mbs[runp->nmbs - 1]))
1719 {
1720 int i;
1721
1722 /* More than one consecutive entry. We mark this by having
1723 a negative index into the weight table. */
1724 weightidx = -weightidx;
1725
1726 /* Now add first the initial byte sequence. */
1727 added = ((sizeof (int32_t) + 1 + 1 + 2 * (runp->nmbs - 1)
1728 + __alignof__ (int32_t) - 1)
1729 & ~(__alignof__ (int32_t) - 1));
1730 obstack_make_room (&extrapool, added);
1731
1732 if (sizeof (int32_t) == sizeof (int))
1733 obstack_int_grow_fast (&extrapool, weightidx);
1734 else
1735 obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
1736 obstack_1grow_fast (&extrapool, runp->section->ruleidx);
1737 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
1738 for (i = 1; i < runp->nmbs; ++i)
1739 obstack_1grow_fast (&extrapool, runp->mbs[i]);
1740
1741 /* Now find the end of the consecutive sequence. */
1742 do
1743 runp = runp->next;
1744 while (runp->mbnext != NULL
1745 && runp->nmbs == runp->mbnext->nmbs
1746 && memcmp (runp->mbs, runp->mbnext->mbs,
1747 runp->nmbs - 1) == 0
1748 && (runp->mbs[runp->nmbs - 1] + 1
1749 == runp->mbnext->mbs[runp->nmbs - 1]));
1750
1751 /* And add the end by sequence. Without length this time. */
1752 for (i = 1; i < runp->nmbs; ++i)
1753 obstack_1grow_fast (&extrapool, runp->mbs[i]);
1754 }
1755 else
1756 {
1757 /* A single entry. Simply add the index and the length and
1758 string (except for the first character which is already
1759 tested for). */
1760 int i;
1761
1762 added = ((sizeof (int32_t) + 1 + 1 + runp->nmbs - 1
1763 + __alignof__ (int32_t) - 1)
1764 & ~(__alignof__ (int32_t) - 1));
1765 obstack_make_room (&extrapool, added);
1766
1767 if (sizeof (int32_t) == sizeof (int))
1768 obstack_int_grow_fast (&extrapool, weightidx);
1769 else
1770 obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
1771 obstack_1grow_fast (&extrapool, runp->section->ruleidx);
1772 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
1773 for (i = 1; i < runp->nmbs; ++i)
1774 obstack_1grow_fast (&extrapool, runp->mbs[i]);
1775 }
1776
1777 /* Add alignment bytes if necessary. */
1778 i = added % __alignof__ (int32_t);
1779 if (i > 0)
1780 do
1781 obstack_1grow_fast (&extrapool, '\0');
1782 while (++i != __alignof__ (int32_t));
1783
1784 /* Next entry. */
1785 lastp = runp;
1786 runp = runp->mbnext;
1787 }
1788 while (runp != NULL);
1789
1790 /* If the final entry in the list is not a single character we
1791 add an UNDEFINED entry here. */
1792 if (lastp->nmbs != 1)
1793 {
1794 int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t))
1795 & ~(__alignof__ (int32_t) - 1));
1796 obstack_make_room (&extrapool, added);
1797
1798 if (sizeof (int32_t) == sizeof (int))
1799 obstack_int_grow_fast (&extrapool, 0);
1800 else
1801 {
1802 int32_t zero = 0;
1803 obstack_grow (&extrapool, &zero, sizeof (int32_t));
1804 }
1805 /* XXX What rule? We just pick the first. */
1806 obstack_1grow_fast (&extrapool, 0);
1807 /* Length is zero. */
1808 obstack_1grow_fast (&extrapool, 0);
1809
1810 /* Add alignment bytes if necessary. */
1811 i = added % __alignof__ (int32_t);
1812 if (i > 0)
1813 do
1814 obstack_1grow_fast (&extrapool, '\0');
1815 while (++i != __alignof__ (int32_t));
1816 }
1817 }
1818
1819 /* Now add the three tables. */
1820 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
1821 iov[2 + cnt].iov_base = tablemb;
1822 iov[2 + cnt].iov_len = sizeof (tablemb);
1823 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1824 ++cnt;
1825
1826 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
1827 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
1828 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
1829 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1830 ++cnt;
1831
1832 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
1833 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
1834 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
1835 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1836 ++cnt;
1837
1838
1839 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
1840
1841 write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
1842 }
1843
1844
1845 void
1846 collate_read (struct linereader *ldfile, struct localedef_t *result,
1847 struct charmap_t *charmap, const char *repertoire_name,
1848 int ignore_content)
1849 {
1850 struct repertoire_t *repertoire = NULL;
1851 struct locale_collate_t *collate;
1852 struct token *now;
1853 struct token *arg = NULL;
1854 enum token_t nowtok;
1855 int state = 0;
1856 enum token_t was_ellipsis = tok_none;
1857 struct localedef_t *copy_locale = NULL;
1858
1859 /* Get the repertoire we have to use. */
1860 if (repertoire_name != NULL)
1861 repertoire = repertoire_read (repertoire_name);
1862
1863 /* The rest of the line containing `LC_COLLATE' must be free. */
1864 lr_ignore_rest (ldfile, 1);
1865
1866 do
1867 {
1868 now = lr_token (ldfile, charmap, NULL);
1869 nowtok = now->tok;
1870 }
1871 while (nowtok == tok_eol);
1872
1873 if (nowtok == tok_copy)
1874 {
1875 state = 2;
1876 now = lr_token (ldfile, charmap, NULL);
1877 if (now->tok != tok_string)
1878 {
1879 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1880
1881 skip_category:
1882 do
1883 now = lr_token (ldfile, charmap, NULL);
1884 while (now->tok != tok_eof && now->tok != tok_end);
1885
1886 if (now->tok != tok_eof
1887 || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof))
1888 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
1889 else if (now->tok != tok_lc_collate)
1890 {
1891 lr_error (ldfile, _("\
1892 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1893 lr_ignore_rest (ldfile, 0);
1894 }
1895 else
1896 lr_ignore_rest (ldfile, 1);
1897
1898 return;
1899 }
1900
1901 /* Get the locale definition. */
1902 copy_locale = find_locale (LC_COLLATE, now->val.str.startmb,
1903 repertoire_name, charmap);
1904 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
1905 {
1906 /* Not yet loaded. So do it now. */
1907 if (locfile_read (copy_locale, charmap) != 0)
1908 goto skip_category;
1909 }
1910
1911 lr_ignore_rest (ldfile, 1);
1912
1913 now = lr_token (ldfile, charmap, NULL);
1914 nowtok = now->tok;
1915 }
1916
1917 /* Prepare the data structures. */
1918 collate_startup (ldfile, result, copy_locale, ignore_content);
1919 collate = result->categories[LC_COLLATE].collate;
1920
1921 while (1)
1922 {
1923 /* Of course we don't proceed beyond the end of file. */
1924 if (nowtok == tok_eof)
1925 break;
1926
1927 /* Ingore empty lines. */
1928 if (nowtok == tok_eol)
1929 {
1930 now = lr_token (ldfile, charmap, NULL);
1931 nowtok = now->tok;
1932 continue;
1933 }
1934
1935 switch (nowtok)
1936 {
1937 case tok_coll_weight_max:
1938 /* Ignore the rest of the line if we don't need the input of
1939 this line. */
1940 if (ignore_content)
1941 {
1942 lr_ignore_rest (ldfile, 0);
1943 break;
1944 }
1945
1946 if (state != 0)
1947 goto err_label;
1948
1949 arg = lr_token (ldfile, charmap, NULL);
1950 if (arg->tok != tok_number)
1951 goto err_label;
1952 if (collate->col_weight_max != -1)
1953 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
1954 "LC_COLLATE", "col_weight_max");
1955 else
1956 collate->col_weight_max = arg->val.num;
1957 lr_ignore_rest (ldfile, 1);
1958 break;
1959
1960 case tok_section_symbol:
1961 /* Ignore the rest of the line if we don't need the input of
1962 this line. */
1963 if (ignore_content)
1964 {
1965 lr_ignore_rest (ldfile, 0);
1966 break;
1967 }
1968
1969 if (state != 0)
1970 goto err_label;
1971
1972 arg = lr_token (ldfile, charmap, repertoire);
1973 if (arg->tok != tok_bsymbol)
1974 goto err_label;
1975 else if (!ignore_content)
1976 {
1977 /* Check whether this section is already known. */
1978 struct section_list *known = collate->sections;
1979 while (known != NULL)
1980 if (strcmp (known->name, arg->val.str.startmb) == 0)
1981 break;
1982
1983 if (known != NULL)
1984 {
1985 lr_error (ldfile,
1986 _("%s: duplicate declaration of section `%s'"),
1987 "LC_COLLATE", arg->val.str.startmb);
1988 free (arg->val.str.startmb);
1989 }
1990 else
1991 collate->sections = make_seclist_elem (collate,
1992 arg->val.str.startmb,
1993 collate->sections);
1994
1995 lr_ignore_rest (ldfile, known == NULL);
1996 }
1997 else
1998 {
1999 free (arg->val.str.startmb);
2000 lr_ignore_rest (ldfile, 0);
2001 }
2002 break;
2003
2004 case tok_collating_element:
2005 /* Ignore the rest of the line if we don't need the input of
2006 this line. */
2007 if (ignore_content)
2008 {
2009 lr_ignore_rest (ldfile, 0);
2010 break;
2011 }
2012
2013 if (state != 0)
2014 goto err_label;
2015
2016 arg = lr_token (ldfile, charmap, repertoire);
2017 if (arg->tok != tok_bsymbol)
2018 goto err_label;
2019 else
2020 {
2021 const char *symbol = arg->val.str.startmb;
2022 size_t symbol_len = arg->val.str.lenmb;
2023
2024 /* Next the `from' keyword. */
2025 arg = lr_token (ldfile, charmap, repertoire);
2026 if (arg->tok != tok_from)
2027 {
2028 free ((char *) symbol);
2029 goto err_label;
2030 }
2031
2032 ldfile->return_widestr = 1;
2033
2034 /* Finally the string with the replacement. */
2035 arg = lr_token (ldfile, charmap, repertoire);
2036 ldfile->return_widestr = 0;
2037 if (arg->tok != tok_string)
2038 goto err_label;
2039
2040 if (!ignore_content)
2041 {
2042 if (symbol == NULL)
2043 lr_error (ldfile, _("\
2044 %s: unknown character in collating element name"),
2045 "LC_COLLATE");
2046 if (arg->val.str.startmb == NULL)
2047 lr_error (ldfile, _("\
2048 %s: unknown character in collating element definition"),
2049 "LC_COLLATE");
2050 if (arg->val.str.startwc == NULL)
2051 lr_error (ldfile, _("\
2052 %s: unknown wide character in collating element definition"),
2053 "LC_COLLATE");
2054 else if (arg->val.str.lenwc < 2)
2055 lr_error (ldfile, _("\
2056 %s: substitution string in collating element definition must have at least two characters"),
2057 "LC_COLLATE");
2058
2059 if (symbol != NULL)
2060 {
2061 /* The name is already defined. */
2062 if (check_duplicate (ldfile, collate, charmap,
2063 repertoire, symbol, symbol_len))
2064 goto col_elem_free;
2065
2066 if (insert_entry (&collate->elem_table,
2067 symbol, symbol_len,
2068 new_element (collate,
2069 NULL, 0, NULL, symbol,
2070 symbol_len)) < 0)
2071 lr_error (ldfile, _("\
2072 error while adding collating element"));
2073 }
2074 else
2075 goto col_elem_free;
2076 }
2077 else
2078 {
2079 col_elem_free:
2080 if (symbol != NULL)
2081 free ((char *) symbol);
2082 if (arg->val.str.startmb != NULL)
2083 free (arg->val.str.startmb);
2084 if (arg->val.str.startwc != NULL)
2085 free (arg->val.str.startwc);
2086 }
2087 lr_ignore_rest (ldfile, 1);
2088 }
2089 break;
2090
2091 case tok_collating_symbol:
2092 /* Ignore the rest of the line if we don't need the input of
2093 this line. */
2094 if (ignore_content)
2095 {
2096 lr_ignore_rest (ldfile, 0);
2097 break;
2098 }
2099
2100 if (state != 0)
2101 goto err_label;
2102
2103 arg = lr_token (ldfile, charmap, repertoire);
2104 if (arg->tok != tok_bsymbol)
2105 goto err_label;
2106 else
2107 {
2108 const char *symbol = arg->val.str.startmb;
2109 size_t symbol_len = arg->val.str.lenmb;
2110
2111 if (!ignore_content)
2112 {
2113 if (symbol == NULL)
2114 lr_error (ldfile, _("\
2115 %s: unknown character in collating symbol name"),
2116 "LC_COLLATE");
2117 else
2118 {
2119 /* The name is already defined. */
2120 if (check_duplicate (ldfile, collate, charmap,
2121 repertoire, symbol, symbol_len))
2122 goto col_sym_free;
2123
2124 if (insert_entry (&collate->sym_table,
2125 symbol, symbol_len,
2126 new_symbol (collate)) < 0)
2127 lr_error (ldfile, _("\
2128 error while adding collating symbol"));
2129 }
2130 }
2131 else
2132 {
2133 col_sym_free:
2134 if (symbol != NULL)
2135 free ((char *) symbol);
2136 }
2137 lr_ignore_rest (ldfile, 1);
2138 }
2139 break;
2140
2141 case tok_symbol_equivalence:
2142 /* Ignore the rest of the line if we don't need the input of
2143 this line. */
2144 if (ignore_content)
2145 {
2146 lr_ignore_rest (ldfile, 0);
2147 break;
2148 }
2149
2150 if (state != 0)
2151 goto err_label;
2152
2153 arg = lr_token (ldfile, charmap, repertoire);
2154 if (arg->tok != tok_bsymbol)
2155 goto err_label;
2156 else
2157 {
2158 const char *newname = arg->val.str.startmb;
2159 size_t newname_len = arg->val.str.lenmb;
2160 const char *symname;
2161 size_t symname_len;
2162 struct symbol_t *symval;
2163
2164 arg = lr_token (ldfile, charmap, repertoire);
2165 if (arg->tok != tok_bsymbol)
2166 {
2167 if (newname != NULL)
2168 free ((char *) newname);
2169 goto err_label;
2170 }
2171
2172 symname = arg->val.str.startmb;
2173 symname_len = arg->val.str.lenmb;
2174
2175 if (!ignore_content)
2176 {
2177 if (newname == NULL)
2178 {
2179 lr_error (ldfile, _("\
2180 %s: unknown character in equivalent definition name"),
2181 "LC_COLLATE");
2182 goto sym_equiv_free;
2183 }
2184 if (symname == NULL)
2185 {
2186 lr_error (ldfile, _("\
2187 %s: unknown character in equivalent definition value"),
2188 "LC_COLLATE");
2189 goto sym_equiv_free;
2190 }
2191 /* The name is already defined. */
2192 if (check_duplicate (ldfile, collate, charmap,
2193 repertoire, symname, symname_len))
2194 goto col_sym_free;
2195
2196 /* See whether the symbol name is already defined. */
2197 if (find_entry (&collate->sym_table, symname, symname_len,
2198 (void **) &symval) != 0)
2199 {
2200 lr_error (ldfile, _("\
2201 %s: unknown symbol `%s' in equivalent definition"),
2202 "LC_COLLATE", symname);
2203 goto col_sym_free;
2204 }
2205
2206 if (insert_entry (&collate->sym_table,
2207 newname, newname_len, symval) < 0)
2208 {
2209 lr_error (ldfile, _("\
2210 error while adding equivalent collating symbol"));
2211 goto sym_equiv_free;
2212 }
2213
2214 free ((char *) symname);
2215 }
2216 else
2217 {
2218 sym_equiv_free:
2219 if (newname != NULL)
2220 free ((char *) newname);
2221 if (symname != NULL)
2222 free ((char *) symname);
2223 }
2224 lr_ignore_rest (ldfile, 1);
2225 }
2226 break;
2227
2228 case tok_order_start:
2229 /* Ignore the rest of the line if we don't need the input of
2230 this line. */
2231 if (ignore_content)
2232 {
2233 lr_ignore_rest (ldfile, 0);
2234 break;
2235 }
2236
2237 if (state != 0 && state != 1)
2238 goto err_label;
2239 state = 1;
2240
2241 /* The 14652 draft does not specify whether all `order_start' lines
2242 must contain the same number of sort-rules, but 14651 does. So
2243 we require this here as well. */
2244 arg = lr_token (ldfile, charmap, repertoire);
2245 if (arg->tok == tok_bsymbol)
2246 {
2247 /* This better should be a section name. */
2248 struct section_list *sp = collate->sections;
2249 while (sp != NULL
2250 && strcmp (sp->name, arg->val.str.startmb) != 0)
2251 sp = sp->next;
2252
2253 if (sp == NULL)
2254 {
2255 lr_error (ldfile, _("\
2256 %s: unknown section name `%s'"),
2257 "LC_COLLATE", arg->val.str.startmb);
2258 /* We use the error section. */
2259 collate->current_section = &collate->error_section;
2260
2261 if (collate->error_section.first == NULL)
2262 {
2263 collate->error_section.next = collate->sections;
2264 collate->sections = &collate->error_section;
2265 }
2266 }
2267 else
2268 {
2269 /* Remember this section. */
2270 collate->current_section = sp;
2271
2272 /* One should not be allowed to open the same
2273 section twice. */
2274 if (sp->first != NULL)
2275 lr_error (ldfile, _("\
2276 %s: multiple order definitions for section `%s'"),
2277 "LC_COLLATE", sp->name);
2278 else
2279 {
2280 sp->next = collate->sections;
2281 collate->sections = sp;
2282 }
2283
2284 /* Next should come the end of the line or a semicolon. */
2285 arg = lr_token (ldfile, charmap, repertoire);
2286 if (arg->tok == tok_eol)
2287 {
2288 uint32_t cnt;
2289
2290 /* This means we have exactly one rule: `forward'. */
2291 if (collate->nrules > 1)
2292 lr_error (ldfile, _("\
2293 %s: invalid number of sorting rules"),
2294 "LC_COLLATE");
2295 else
2296 collate->nrules = 1;
2297 sp->rules = obstack_alloc (&collate->mempool,
2298 (sizeof (enum coll_sort_rule)
2299 * collate->nrules));
2300 for (cnt = 0; cnt < collate->nrules; ++cnt)
2301 sp->rules[cnt] = sort_forward;
2302
2303 /* Next line. */
2304 break;
2305 }
2306
2307 /* Get the next token. */
2308 arg = lr_token (ldfile, charmap, repertoire);
2309 }
2310 }
2311 else
2312 {
2313 /* There is no section symbol. Therefore we use the unnamed
2314 section. */
2315 collate->current_section = &collate->unnamed_section;
2316
2317 if (collate->unnamed_section.first != NULL)
2318 lr_error (ldfile, _("\
2319 %s: multiple order definitions for unnamed section"),
2320 "LC_COLLATE");
2321 else
2322 {
2323 collate->unnamed_section.next = collate->sections;
2324 collate->sections = &collate->unnamed_section;
2325 }
2326 }
2327
2328 /* Now read the direction names. */
2329 read_directions (ldfile, arg, charmap, repertoire, collate);
2330
2331 /* From now be need the strings untranslated. */
2332 ldfile->translate_strings = 0;
2333 break;
2334
2335 case tok_order_end:
2336 /* Ignore the rest of the line if we don't need the input of
2337 this line. */
2338 if (ignore_content)
2339 {
2340 lr_ignore_rest (ldfile, 0);
2341 break;
2342 }
2343
2344 if (state != 1)
2345 goto err_label;
2346
2347 /* Handle ellipsis at end of list. */
2348 if (was_ellipsis != tok_none)
2349 {
2350 handle_ellipsis (ldfile, NULL, was_ellipsis, charmap, repertoire,
2351 collate);
2352 was_ellipsis = tok_none;
2353 }
2354
2355 state = 2;
2356 lr_ignore_rest (ldfile, 1);
2357 break;
2358
2359 case tok_reorder_after:
2360 /* Ignore the rest of the line if we don't need the input of
2361 this line. */
2362 if (ignore_content)
2363 {
2364 lr_ignore_rest (ldfile, 0);
2365 break;
2366 }
2367
2368 if (state == 1)
2369 {
2370 lr_error (ldfile, _("%s: missing `order_end' keyword"),
2371 "LC_COLLATE");
2372 state = 2;
2373
2374 /* Handle ellipsis at end of list. */
2375 if (was_ellipsis != tok_none)
2376 {
2377 handle_ellipsis (ldfile, arg, was_ellipsis, charmap,
2378 repertoire, collate);
2379 was_ellipsis = tok_none;
2380 }
2381 }
2382 else if (state != 2 && state != 3)
2383 goto err_label;
2384 state = 3;
2385
2386 arg = lr_token (ldfile, charmap, repertoire);
2387 if (arg->tok == tok_bsymbol)
2388 {
2389 /* Find this symbol in the sequence table. */
2390 struct element_t *insp;
2391 int no_error = 1;
2392
2393 if (find_entry (&collate->seq_table, arg->val.str.startmb,
2394 arg->val.str.lenmb, (void **) &insp) == 0)
2395 /* Yes, the symbol exists. Simply point the cursor
2396 to it. */
2397 collate->cursor = insp;
2398 else
2399 {
2400 /* This is bad. The symbol after which we have to
2401 insert does not exist. */
2402 lr_error (ldfile, _("\
2403 %s: cannot reorder after %.*s: symbol not known"),
2404 "LC_COLLATE", (int) arg->val.str.lenmb,
2405 arg->val.str.startmb);
2406 collate->cursor = NULL;
2407 no_error = 0;
2408 }
2409
2410 lr_ignore_rest (ldfile, no_error);
2411 }
2412 else
2413 /* This must not happen. */
2414 goto err_label;
2415 break;
2416
2417 case tok_reorder_end:
2418 /* Ignore the rest of the line if we don't need the input of
2419 this line. */
2420 if (ignore_content)
2421 break;
2422
2423 if (state != 3)
2424 goto err_label;
2425 state = 4;
2426 lr_ignore_rest (ldfile, 1);
2427 break;
2428
2429 case tok_reorder_sections_after:
2430 /* Ignore the rest of the line if we don't need the input of
2431 this line. */
2432 if (ignore_content)
2433 {
2434 lr_ignore_rest (ldfile, 0);
2435 break;
2436 }
2437
2438 if (state == 1)
2439 {
2440 lr_error (ldfile, _("%s: missing `order_end' keyword"),
2441 "LC_COLLATE");
2442 state = 2;
2443
2444 /* Handle ellipsis at end of list. */
2445 if (was_ellipsis != tok_none)
2446 {
2447 handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
2448 repertoire, collate);
2449 was_ellipsis = tok_none;
2450 }
2451 }
2452 else if (state == 3)
2453 {
2454 error (0, 0, _("%s: missing `reorder-end' keyword"),
2455 "LC_COLLATE");
2456 state = 4;
2457 }
2458 else if (state != 2 && state != 4)
2459 goto err_label;
2460 state = 5;
2461
2462 /* Get the name of the sections we are adding after. */
2463 arg = lr_token (ldfile, charmap, repertoire);
2464 if (arg->tok == tok_bsymbol)
2465 {
2466 /* Now find a section with this name. */
2467 struct section_list *runp = collate->sections;
2468
2469 while (runp != NULL)
2470 {
2471 if (runp->name != NULL
2472 && strlen (runp->name) == arg->val.str.lenmb
2473 && memcmp (runp->name, arg->val.str.startmb,
2474 arg->val.str.lenmb) == 0)
2475 break;
2476
2477 runp = runp->next;
2478 }
2479
2480 if (runp != NULL)
2481 collate->current_section = runp;
2482 else
2483 {
2484 /* This is bad. The section after which we have to
2485 reorder does not exist. Therefore we cannot
2486 process the whole rest of this reorder
2487 specification. */
2488 lr_error (ldfile, _("%s: section `%.*s' not known"),
2489 "LC_COLLATE", (int) arg->val.str.lenmb,
2490 arg->val.str.startmb);
2491
2492 do
2493 {
2494 lr_ignore_rest (ldfile, 0);
2495
2496 now = lr_token (ldfile, charmap, NULL);
2497 }
2498 while (now->tok == tok_reorder_sections_after
2499 || now->tok == tok_reorder_sections_end
2500 || now->tok == tok_end);
2501
2502 /* Process the token we just saw. */
2503 nowtok = now->tok;
2504 continue;
2505 }
2506 }
2507 else
2508 /* This must not happen. */
2509 goto err_label;
2510 break;
2511
2512 case tok_reorder_sections_end:
2513 /* Ignore the rest of the line if we don't need the input of
2514 this line. */
2515 if (ignore_content)
2516 break;
2517
2518 if (state != 5)
2519 goto err_label;
2520 state = 6;
2521 lr_ignore_rest (ldfile, 1);
2522 break;
2523
2524 case tok_bsymbol:
2525 /* Ignore the rest of the line if we don't need the input of
2526 this line. */
2527 if (ignore_content)
2528 {
2529 lr_ignore_rest (ldfile, 0);
2530 break;
2531 }
2532
2533 if (state != 1 && state != 3)
2534 goto err_label;
2535
2536 if (state == 3)
2537 {
2538 /* It is possible that we already have this collation sequence.
2539 In this case we move the entry. */
2540 struct element_t *seqp;
2541
2542 /* If the symbol after which we have to insert was not found
2543 ignore all entries. */
2544 if (collate->cursor == NULL)
2545 {
2546 lr_ignore_rest (ldfile, 0);
2547 break;
2548 }
2549
2550 if (find_entry (&collate->seq_table, arg->val.str.startmb,
2551 arg->val.str.lenmb, (void **) &seqp) == 0)
2552 {
2553 /* Remove the entry from the old position. */
2554 if (seqp->last == NULL)
2555 collate->start = seqp->next;
2556 else
2557 seqp->last->next = seqp->next;
2558 if (seqp->next != NULL)
2559 seqp->next->last = seqp->last;
2560
2561 /* We also have to check whether this entry is the
2562 first or last of a section. */
2563 if (seqp->section->first == seqp)
2564 {
2565 if (seqp->section->first == seqp->section->last)
2566 /* This setion has no content anymore. */
2567 seqp->section->first = seqp->section->last = NULL;
2568 else
2569 seqp->section->first = seqp->next;
2570 }
2571 else if (seqp->section->last == seqp)
2572 seqp->section->last = seqp->last;
2573
2574 /* Now insert it in the new place. */
2575 seqp->next = collate->cursor->next;
2576 seqp->last = collate->cursor;
2577 collate->cursor->next = seqp;
2578 if (seqp->next != NULL)
2579 seqp->next->last = seqp;
2580
2581 seqp->section = collate->cursor->section;
2582 if (seqp->section->last == collate->cursor)
2583 seqp->section->last = seqp;
2584
2585 break;
2586 }
2587
2588 /* Otherwise we just add a new entry. */
2589 }
2590 else if (state == 5)
2591 {
2592 /* We are reordering sections. Find the named section. */
2593 struct section_list *runp = collate->sections;
2594 struct section_list *prevp = NULL;
2595
2596 while (runp != NULL)
2597 {
2598 if (runp->name != NULL
2599 && strlen (runp->name) == arg->val.str.lenmb
2600 && memcmp (runp->name, arg->val.str.startmb,
2601 arg->val.str.lenmb) == 0)
2602 break;
2603
2604 prevp = runp;
2605 runp = runp->next;
2606 }
2607
2608 if (runp == NULL)
2609 {
2610 lr_error (ldfile, _("%s: section `%.*s' not known"),
2611 "LC_COLLATE", (int) arg->val.str.lenmb,
2612 arg->val.str.startmb);
2613 lr_ignore_rest (ldfile, 0);
2614 }
2615 else
2616 {
2617 if (runp != collate->current_section)
2618 {
2619 /* Remove the named section from the old place and
2620 insert it in the new one. */
2621 prevp->next = runp->next;
2622
2623 runp->next = collate->current_section->next;
2624 collate->current_section->next = runp;
2625 collate->current_section = runp;
2626 }
2627
2628 /* Process the rest of the line which might change
2629 the collation rules. */
2630 arg = lr_token (ldfile, charmap, repertoire);
2631 if (arg->tok != tok_eof && arg->tok != tok_eol)
2632 read_directions (ldfile, arg, charmap, repertoire,
2633 collate);
2634 }
2635 break;
2636 }
2637 else if (was_ellipsis != tok_none)
2638 {
2639 /* Using the information in the `ellipsis_weight'
2640 element and this and the last value we have to handle
2641 the ellipsis now. */
2642 assert (state == 1);
2643
2644 handle_ellipsis (ldfile, arg, was_ellipsis, charmap, repertoire,
2645 collate);
2646
2647 /* Remember that we processed the ellipsis. */
2648 was_ellipsis = tok_none;
2649
2650 /* And don't add the value a second time. */
2651 break;
2652 }
2653
2654 /* Now insert in the new place. */
2655 insert_value (ldfile, arg, charmap, repertoire, collate);
2656 break;
2657
2658 case tok_undefined:
2659 /* Ignore the rest of the line if we don't need the input of
2660 this line. */
2661 if (ignore_content)
2662 {
2663 lr_ignore_rest (ldfile, 0);
2664 break;
2665 }
2666
2667 if (state != 1)
2668 goto err_label;
2669
2670 if (was_ellipsis != tok_none)
2671 {
2672 lr_error (ldfile,
2673 _("%s: cannot have `%s' as end of ellipsis range"),
2674 "LC_COLLATE", "UNDEFINED");
2675
2676 unlink_element (collate);
2677 was_ellipsis = tok_none;
2678 }
2679
2680 /* See whether UNDEFINED already appeared somewhere. */
2681 if (collate->undefined.next != NULL
2682 || (collate->cursor != NULL
2683 && collate->undefined.next == collate->cursor))
2684 {
2685 lr_error (ldfile,
2686 _("%s: order for `%.*s' already defined at %s:%zu"),
2687 "LC_COLLATE", 9, "UNDEFINED", collate->undefined.file,
2688 collate->undefined.line);
2689 lr_ignore_rest (ldfile, 0);
2690 }
2691 else
2692 /* Parse the weights. */
2693 insert_weights (ldfile, &collate->undefined, charmap,
2694 repertoire, collate, tok_none);
2695 break;
2696
2697 case tok_ellipsis2:
2698 case tok_ellipsis3:
2699 case tok_ellipsis4:
2700 /* This is the symbolic (decimal or hexadecimal) or absolute
2701 ellipsis. */
2702 if (was_ellipsis != tok_none)
2703 goto err_label;
2704
2705 if (state != 1 && state != 3)
2706 goto err_label;
2707
2708 was_ellipsis = nowtok;
2709
2710 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
2711 repertoire, collate, nowtok);
2712 break;
2713
2714 case tok_end:
2715 /* Next we assume `LC_COLLATE'. */
2716 if (!ignore_content)
2717 {
2718 if (state == 0)
2719 /* We must either see a copy statement or have
2720 ordering values. */
2721 lr_error (ldfile,
2722 _("%s: empty category description not allowed"),
2723 "LC_COLLATE");
2724 else if (state == 1)
2725 {
2726 lr_error (ldfile, _("%s: missing `order_end' keyword"),
2727 "LC_COLLATE");
2728
2729 /* Handle ellipsis at end of list. */
2730 if (was_ellipsis != tok_none)
2731 {
2732 handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
2733 repertoire, collate);
2734 was_ellipsis = tok_none;
2735 }
2736 }
2737 else if (state == 3)
2738 error (0, 0, _("%s: missing `reorder-end' keyword"),
2739 "LC_COLLATE");
2740 else if (state == 5)
2741 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
2742 "LC_COLLATE");
2743 }
2744 arg = lr_token (ldfile, charmap, NULL);
2745 if (arg->tok == tok_eof)
2746 break;
2747 if (arg->tok == tok_eol)
2748 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
2749 else if (arg->tok != tok_lc_collate)
2750 lr_error (ldfile, _("\
2751 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2752 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
2753 return;
2754
2755 default:
2756 err_label:
2757 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2758 }
2759
2760 /* Prepare for the next round. */
2761 now = lr_token (ldfile, charmap, NULL);
2762 nowtok = now->tok;
2763 }
2764
2765 /* When we come here we reached the end of the file. */
2766 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2767 }
This page took 0.158342 seconds and 4 git commands to generate.