]> sourceware.org Git - glibc.git/blob - locale/programs/ld-collate.c
Update.
[glibc.git] / locale / programs / ld-collate.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <errno.h>
25 #include <error.h>
26 #include <stdlib.h>
27 #include <wchar.h>
28
29 #include "charmap.h"
30 #include "localeinfo.h"
31 #include "linereader.h"
32 #include "locfile.h"
33 #include "localedef.h"
34
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
37 #include <assert.h>
38
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
41
42 /* Forward declaration. */
43 struct element_t;
44
45 /* Data type for list of strings. */
46 struct section_list
47 {
48 struct section_list *next;
49 /* Name of the section. */
50 const char *name;
51 /* First element of this section. */
52 struct element_t *first;
53 /* Last element of this section. */
54 struct element_t *last;
55 /* These are the rules for this section. */
56 enum coll_sort_rule *rules;
57 };
58
59 struct element_t;
60
61 struct element_list_t
62 {
63 /* Number of elements. */
64 int cnt;
65
66 struct element_t **w;
67 };
68
69 /* Data type for collating element. */
70 struct element_t
71 {
72 const char *name;
73
74 const char *mbs;
75 const uint32_t *wcs;
76 int mborder;
77 int wcorder;
78
79 struct element_list_t *weights;
80
81 /* Where does the definition come from. */
82 const char *file;
83 size_t line;
84
85 /* Which section does this belong to. */
86 struct section_list *section;
87
88 /* Predecessor and successor in the order list. */
89 struct element_t *last;
90 struct element_t *next;
91
92 /* Next element in multibyte output list. */
93 struct element_t *mbnext;
94 };
95
96 /* Special element value. */
97 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
98 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
99 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
100
101 /* Data type for collating symbol. */
102 struct symbol_t
103 {
104 /* Point to place in the order list. */
105 struct element_t *order;
106
107 /* Where does the definition come from. */
108 const char *file;
109 size_t line;
110 };
111
112
113 /* The real definition of the struct for the LC_COLLATE locale. */
114 struct locale_collate_t
115 {
116 int col_weight_max;
117 int cur_weight_max;
118
119 /* List of known scripts. */
120 struct section_list *sections;
121 /* Current section using definition. */
122 struct section_list *current_section;
123 /* There always can be an unnamed section. */
124 struct section_list unnamed_section;
125 /* To make handling of errors easier we have another section. */
126 struct section_list error_section;
127
128 /* Number of sorting rules given in order_start line. */
129 uint32_t nrules;
130
131 /* Start of the order list. */
132 struct element_t *start;
133
134 /* The undefined element. */
135 struct element_t undefined;
136
137 /* This is the cursor for `reorder_after' insertions. */
138 struct element_t *cursor;
139
140 /* This value is used when handling ellipsis. */
141 struct element_t ellipsis_weight;
142
143 /* Known collating elements. */
144 hash_table elem_table;
145
146 /* Known collating symbols. */
147 hash_table sym_table;
148
149 /* Known collation sequences. */
150 hash_table seq_table;
151
152 struct obstack mempool;
153
154 /* The LC_COLLATE category is a bit special as it is sometimes possible
155 that the definitions from more than one input file contains information.
156 Therefore we keep all relevant input in a list. */
157 struct locale_collate_t *next;
158
159 /* Arrays with heads of the list for each of the leading bytes in
160 the multibyte sequences. */
161 struct element_t *mbheads[256];
162 };
163
164
165 /* We have a few global variables which are used for reading all
166 LC_COLLATE category descriptions in all files. */
167 static int nrules;
168
169
170 static struct section_list *
171 make_seclist_elem (struct locale_collate_t *collate, const char *string,
172 struct section_list *next)
173 {
174 struct section_list *newp;
175
176 newp = (struct section_list *) obstack_alloc (&collate->mempool,
177 sizeof (*newp));
178 newp->next = next;
179 newp->name = string;
180 newp->first = NULL;
181
182 return newp;
183 }
184
185
186 static struct element_t *
187 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
188 const uint32_t *wcs, const char *name, size_t namelen)
189 {
190 struct element_t *newp;
191
192 newp = (struct element_t *) obstack_alloc (&collate->mempool,
193 sizeof (*newp));
194 newp->name = name == NULL ? NULL : obstack_copy (&collate->mempool,
195 name, namelen);
196 if (mbs != NULL)
197 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
198 else
199 newp->mbs = NULL;
200 if (wcs != NULL)
201 {
202 size_t nwcs = wcslen ((wchar_t *) wcs) + 1;
203 uint32_t zero = 0;
204 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
205 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
206 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
207 }
208 else
209 newp->wcs = NULL;
210 newp->mborder = 0;
211 newp->wcorder = 0;
212
213 /* Will be allocated later. */
214 newp->weights = NULL;
215
216 newp->file = NULL;
217 newp->line = 0;
218
219 newp->section = NULL;
220
221 newp->last = NULL;
222 newp->next = NULL;
223
224 newp->mbnext = NULL;
225
226 return newp;
227 }
228
229
230 static struct symbol_t *
231 new_symbol (struct locale_collate_t *collate)
232 {
233 struct symbol_t *newp;
234
235 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
236
237 newp->order = NULL;
238
239 newp->file = NULL;
240 newp->line = 0;
241
242 return newp;
243 }
244
245
246 /* Test whether this name is already defined somewhere. */
247 static int
248 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
249 struct charmap_t *charmap, struct repertoire_t *repertoire,
250 const char *symbol, size_t symbol_len)
251 {
252 void *ignore = NULL;
253
254 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
255 {
256 lr_error (ldfile, _("`%s' already defined in charmap"), symbol);
257 return 1;
258 }
259
260 if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0)
261 {
262 lr_error (ldfile, _("`%s' already defined in repertoire"), symbol);
263 return 1;
264 }
265
266 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
267 {
268 lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol);
269 return 1;
270 }
271
272 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
273 {
274 lr_error (ldfile, _("`%s' already defined as collating element"),
275 symbol);
276 return 1;
277 }
278
279 return 0;
280 }
281
282
283 /* Read the direction specification. */
284 static void
285 read_directions (struct linereader *ldfile, struct token *arg,
286 struct charmap_t *charmap, struct repertoire_t *repertoire,
287 struct locale_collate_t *collate)
288 {
289 int cnt = 0;
290 int max = nrules ?: 10;
291 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
292 int warned = 0;
293
294 while (1)
295 {
296 int valid = 0;
297
298 if (arg->tok == tok_forward)
299 {
300 if (rules[cnt] & sort_backward)
301 {
302 if (! warned)
303 {
304 lr_error (ldfile, _("\
305 %s: `forward' and `backward' are mutually excluding each other"),
306 "LC_COLLATE");
307 warned = 1;
308 }
309 }
310 else if (rules[cnt] & sort_forward)
311 {
312 if (! warned)
313 {
314 lr_error (ldfile, _("\
315 %s: `%s' mentioned twice in definition of weight %d"),
316 "LC_COLLATE", "forward", cnt + 1);
317 }
318 }
319 else
320 rules[cnt] |= sort_forward;
321
322 valid = 1;
323 }
324 else if (arg->tok == tok_backward)
325 {
326 if (rules[cnt] & sort_forward)
327 {
328 if (! warned)
329 {
330 lr_error (ldfile, _("\
331 %s: `forward' and `backward' are mutually excluding each other"),
332 "LC_COLLATE");
333 warned = 1;
334 }
335 }
336 else if (rules[cnt] & sort_backward)
337 {
338 if (! warned)
339 {
340 lr_error (ldfile, _("\
341 %s: `%s' mentioned twice in definition of weight %d"),
342 "LC_COLLATE", "backward", cnt + 1);
343 }
344 }
345 else
346 rules[cnt] |= sort_backward;
347
348 valid = 1;
349 }
350 else if (arg->tok == tok_position)
351 {
352 if (rules[cnt] & sort_position)
353 {
354 if (! warned)
355 {
356 lr_error (ldfile, _("\
357 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
358 "LC_COLLATE", "position", cnt + 1);
359 }
360 }
361 else
362 rules[cnt] |= sort_position;
363
364 valid = 1;
365 }
366
367 if (valid)
368 arg = lr_token (ldfile, charmap, repertoire);
369
370 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
371 || arg->tok == tok_semicolon)
372 {
373 if (! valid && ! warned)
374 {
375 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
376 warned = 1;
377 }
378
379 /* See whether we have to increment the counter. */
380 if (arg->tok != tok_comma && rules[cnt] != 0)
381 ++cnt;
382
383 if (arg->tok == tok_eof || arg->tok == tok_eol)
384 /* End of line or file, so we exit the loop. */
385 break;
386
387 if (nrules == 0)
388 {
389 /* See whether we have enough room in the array. */
390 if (cnt == max)
391 {
392 max += 10;
393 rules = (enum coll_sort_rule *) xrealloc (rules,
394 max
395 * sizeof (*rules));
396 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
397 }
398 }
399 else
400 {
401 if (cnt == nrules)
402 {
403 /* There must not be any more rule. */
404 if (! warned)
405 {
406 lr_error (ldfile, _("\
407 %s: too many rules; first entry only had %d"),
408 "LC_COLLATE", nrules);
409 warned = 1;
410 }
411
412 lr_ignore_rest (ldfile, 0);
413 break;
414 }
415 }
416 }
417 else
418 {
419 if (! warned)
420 {
421 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
422 warned = 1;
423 }
424 }
425
426 arg = lr_token (ldfile, charmap, repertoire);
427 }
428
429 if (nrules == 0)
430 {
431 /* Now we know how many rules we have. */
432 nrules = cnt;
433 rules = (enum coll_sort_rule *) xrealloc (rules,
434 nrules * sizeof (*rules));
435 }
436 else
437 {
438 if (cnt < nrules)
439 {
440 /* Not enough rules in this specification. */
441 if (! warned)
442 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
443
444 do
445 rules[cnt] = sort_forward;
446 while (++cnt < nrules);
447 }
448 }
449
450 collate->current_section->rules = rules;
451 }
452
453
454 static struct element_t *
455 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
456 const char *str, size_t len, uint32_t *wcstr)
457 {
458 struct element_t *result = NULL;
459
460 /* Search for the entries among the collation sequences already define. */
461 if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
462 {
463 /* Nope, not define yet. So we see whether it is a
464 collation symbol. */
465 void *ptr;
466
467 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
468 {
469 /* It's a collation symbol. */
470 struct symbol_t *sym = (struct symbol_t *) ptr;
471 result = sym->order;
472
473 if (result == NULL)
474 result = sym->order = new_element (collate, NULL, 0, NULL,
475 NULL, 0);
476 }
477 else if (find_entry (&collate->elem_table, str, len,
478 (void **) &result) != 0)
479 {
480 /* It's also no collation element. So it is an character
481 element defined later. */
482 result = new_element (collate, NULL, 0, NULL, str, len);
483 if (result != NULL)
484 /* Insert it into the sequence table. */
485 insert_entry (&collate->seq_table, str, len, result);
486 }
487 }
488
489 return result;
490 }
491
492
493 static void
494 unlink_element (struct locale_collate_t *collate)
495 {
496 if (collate->cursor->next != NULL)
497 collate->cursor->next->last = collate->cursor->last;
498 if (collate->cursor->last != NULL)
499 collate->cursor->last->next = collate->cursor->next;
500 collate->cursor = collate->cursor->last;
501 }
502
503
504 static void
505 insert_weights (struct linereader *ldfile, struct element_t *elem,
506 struct charmap_t *charmap, struct repertoire_t *repertoire,
507 struct locale_collate_t *collate, enum token_t ellipsis)
508 {
509 int weight_cnt;
510 struct token *arg;
511
512 /* Initialize all the fields. */
513 elem->file = ldfile->fname;
514 elem->line = ldfile->lineno;
515 elem->last = collate->cursor;
516 elem->next = collate->cursor ? collate->cursor->next : NULL;
517 if (collate->cursor != NULL)
518 collate->cursor->next = elem;
519 elem->weights = (struct element_list_t *)
520 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
521 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
522
523 if (collate->current_section->first == NULL)
524 collate->current_section->first = elem;
525 if (collate->current_section->last == collate->cursor)
526 collate->current_section->last = elem;
527
528 collate->cursor = elem;
529
530 weight_cnt = 0;
531
532 arg = lr_token (ldfile, charmap, repertoire);
533 do
534 {
535 if (arg->tok == tok_eof || arg->tok == tok_eol)
536 break;
537
538 if (arg->tok == tok_ignore)
539 {
540 /* The weight for this level has to be ignored. We use the
541 null pointer to indicate this. */
542 elem->weights[weight_cnt].w = (struct element_t **)
543 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
544 elem->weights[weight_cnt].w[0] = NULL;
545 elem->weights[weight_cnt].cnt = 1;
546 }
547 else if (arg->tok == tok_bsymbol)
548 {
549 struct element_t *val = find_element (ldfile, collate,
550 arg->val.str.startmb,
551 arg->val.str.lenmb,
552 arg->val.str.startwc);
553
554 if (val == NULL)
555 break;
556
557 elem->weights[weight_cnt].w = (struct element_t **)
558 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
559 elem->weights[weight_cnt].w[0] = val;
560 elem->weights[weight_cnt].cnt = 1;
561 }
562 else if (arg->tok == tok_string)
563 {
564 /* Split the string up in the individual characters and put
565 the element definitions in the list. */
566 const char *cp = arg->val.str.startmb;
567 int cnt = 0;
568 struct element_t *charelem;
569 void *base = obstack_base (&collate->mempool);
570
571 if (*cp == '\0')
572 {
573 lr_error (ldfile, _("%s: empty weight string not allowed"),
574 "LC_COLLATE");
575 lr_ignore_rest (ldfile, 0);
576 break;
577 }
578
579 do
580 {
581 if (*cp == '<')
582 {
583 /* Ahh, it's a bsymbol. That's what we want. */
584 const char *startp = cp;
585
586 while (*++cp != '>')
587 {
588 if (*cp == ldfile->escape_char)
589 ++cp;
590 if (*cp == '\0')
591 {
592 /* It's a syntax error. */
593 obstack_free (&collate->mempool, base);
594 goto syntax;
595 }
596 }
597
598 charelem = find_element (ldfile, collate, startp,
599 cp - startp, NULL);
600 ++cp;
601 }
602 else
603 {
604 /* People really shouldn't use characters directly in
605 the string. Especially since it's not really clear
606 what this means. We interpret all characters in the
607 string as if that would be bsymbols. Otherwise we
608 would have to match back to bsymbols somehow and this
609 is also not what people normally expect. */
610 charelem = find_element (ldfile, collate, cp++, 1, NULL);
611 }
612
613 if (charelem == NULL)
614 {
615 /* We ignore the rest of the line. */
616 lr_ignore_rest (ldfile, 0);
617 break;
618 }
619
620 /* Add the pointer. */
621 obstack_ptr_grow (&collate->mempool, charelem);
622 ++cnt;
623 }
624 while (*cp != '\0');
625
626 /* Now store the information. */
627 elem->weights[weight_cnt].w = (struct element_t **)
628 obstack_finish (&collate->mempool);
629 elem->weights[weight_cnt].cnt = cnt;
630
631 /* We don't need the string anymore. */
632 free (arg->val.str.startmb);
633 }
634 else if (ellipsis != tok_none
635 && (arg->tok == tok_ellipsis2
636 || arg->tok == tok_ellipsis3
637 || arg->tok == tok_ellipsis4))
638 {
639 /* It must be the same ellipsis as used in the initial column. */
640 if (arg->tok != ellipsis)
641 lr_error (ldfile, _("\
642 %s: weights must use the same ellipsis symbol as the name"),
643 "LC_COLLATE");
644
645 /* The weight for this level has to be ignored. We use the
646 null pointer to indicate this. */
647 elem->weights[weight_cnt].w = (struct element_t **)
648 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
649 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
650 elem->weights[weight_cnt].cnt = 1;
651 }
652 else
653 {
654 syntax:
655 /* It's a syntax error. */
656 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
657 lr_ignore_rest (ldfile, 0);
658 break;
659 }
660
661 arg = lr_token (ldfile, charmap, repertoire);
662 /* This better should be the end of the line or a semicolon. */
663 if (arg->tok == tok_semicolon)
664 /* OK, ignore this and read the next token. */
665 arg = lr_token (ldfile, charmap, repertoire);
666 else if (arg->tok != tok_eof && arg->tok != tok_eol)
667 {
668 /* It's a syntax error. */
669 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
670 lr_ignore_rest (ldfile, 0);
671 break;
672 }
673 }
674 while (++weight_cnt < nrules);
675
676 if (weight_cnt < nrules)
677 {
678 /* This means the rest of the line uses the current element as
679 the weight. */
680 do
681 {
682 elem->weights[weight_cnt].w = (struct element_t **)
683 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
684 elem->weights[weight_cnt].w[0] = elem;
685 elem->weights[weight_cnt].cnt = 1;
686 }
687 while (++weight_cnt < nrules);
688 }
689 else
690 {
691 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
692 {
693 /* Too many rule values. */
694 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
695 lr_ignore_rest (ldfile, 0);
696 }
697 else
698 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
699 }
700 }
701
702
703 static int
704 insert_value (struct linereader *ldfile, struct token *arg,
705 struct charmap_t *charmap, struct repertoire_t *repertoire,
706 struct locale_collate_t *collate)
707 {
708 /* First find out what kind of symbol this is. */
709 struct charseq *seq;
710 uint32_t wc;
711 struct element_t *elem = NULL;
712
713 /* Try to find the character in the charmap. */
714 seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb);
715
716 /* Determine the wide character. */
717 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
718 {
719 wc = repertoire_find_value (repertoire, arg->val.str.startmb,
720 arg->val.str.lenmb);
721 if (seq != NULL)
722 seq->ucs4 = wc;
723 }
724 else
725 wc = seq->ucs4;
726
727 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
728 {
729 /* It's no character, so look through the collation elements and
730 symbol list. */
731 void *result;
732
733 if (find_entry (&collate->sym_table, arg->val.str.startmb,
734 arg->val.str.lenmb, &result) == 0)
735 {
736 /* It's a collation symbol. */
737 struct symbol_t *sym = (struct symbol_t *) result;
738 elem = sym->order;
739
740 if (elem == NULL)
741 elem = sym->order = new_element (collate, NULL, 0, NULL, NULL, 0);
742 }
743 else if (find_entry (&collate->elem_table, arg->val.str.startmb,
744 arg->val.str.lenmb, (void **) &elem) != 0)
745 {
746 /* It's also no collation element. Therefore ignore it. */
747 lr_ignore_rest (ldfile, 0);
748 return 1;
749 }
750 }
751 else
752 {
753 /* Otherwise the symbols stands for a character. */
754 if (find_entry (&collate->seq_table, arg->val.str.startmb,
755 arg->val.str.lenmb, (void **) &elem) != 0)
756 {
757 uint32_t wcs[2] = { wc, 0 };
758
759 /* We have to allocate an entry. */
760 elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
761 seq != NULL ? seq->nbytes : 0,
762 wcs, arg->val.str.startmb, arg->val.str.lenmb);
763
764 /* And add it to the table. */
765 if (insert_entry (&collate->seq_table, arg->val.str.startmb,
766 arg->val.str.lenmb, elem) != 0)
767 /* This cannot happen. */
768 assert (! "Internal error");
769 }
770 }
771
772 /* Test whether this element is not already in the list. */
773 if (elem->next != NULL || (collate->cursor != NULL
774 && elem->next == collate->cursor))
775 {
776 lr_error (ldfile, _("order for `%.*s' already defined at %s:%zu"),
777 arg->val.str.lenmb, arg->val.str.startmb,
778 elem->file, elem->line);
779 lr_ignore_rest (ldfile, 0);
780 return 1;
781 }
782
783 insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
784
785 return 0;
786 }
787
788
789 static void
790 handle_ellipsis (struct linereader *ldfile, struct token *arg,
791 enum token_t ellipsis, struct charmap_t *charmap,
792 struct repertoire_t *repertoire,
793 struct locale_collate_t *collate)
794 {
795 struct element_t *startp;
796 struct element_t *endp;
797
798 /* Unlink the entry added for the ellipsis. */
799 unlink_element (collate);
800 startp = collate->cursor;
801
802 /* Process and add the end-entry. */
803 if (arg != NULL
804 && insert_value (ldfile, arg, charmap, repertoire, collate))
805 /* Something went wrong with inserting the to-value. This means
806 we cannot process the ellipsis. */
807 return;
808
809 /* Reset the cursor. */
810 collate->cursor = startp;
811
812 /* Now we have to handle many different situations:
813 - we have to distinguish between the three different ellipsis forms
814 - the is the ellipsis at the beginning, in the middle, or at the end.
815 */
816 endp = collate->cursor->next;
817 assert (arg == NULL || endp != NULL);
818
819 /* Both, the start and the end symbol, must stand for characters. */
820 if ((startp == NULL || startp->name == NULL)
821 || (endp == NULL || endp->name == NULL))
822 {
823 lr_error (ldfile, _("\
824 %s: the start end the end symbol of a range must stand for characters"),
825 "LC_COLLATE");
826 return;
827 }
828
829 if (ellipsis == tok_ellipsis3)
830 {
831 /* One requirement we make here: the length of the byte
832 sequences for the first and end character must be the same.
833 This is mainly to prevent unwanted effects and this is often
834 not what is wanted. */
835 size_t len = (startp->mbs != NULL ? strlen (startp->mbs)
836 : (endp->mbs != NULL ? strlen (endp->mbs) : 0));
837 char mbcnt[len + 1];
838 char mbend[len + 1];
839
840 /* Well, this should be caught somewhere else already. Just to
841 make sure. */
842 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
843 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
844
845 if (startp != NULL && endp != NULL
846 && startp->mbs != NULL && endp->mbs != NULL
847 && strlen (startp->mbs) != strlen (endp->mbs))
848 {
849 lr_error (ldfile, _("\
850 %s: byte sequences of first and last character must have the same length"),
851 "LC_COLLATE");
852 return;
853 }
854
855 /* Determine whether we have to generate multibyte sequences. */
856 if ((startp == NULL || startp->mbs != NULL)
857 && (endp == NULL || endp->mbs != NULL))
858 {
859 int cnt;
860 int ret;
861
862 /* Prepare the beginning byte sequence. This is either from the
863 beginning byte sequence or it is all nulls if it was an
864 initial ellipsis. */
865 if (startp == NULL || startp->mbs == NULL)
866 memset (mbcnt, '\0', len);
867 else
868 {
869 memcpy (mbcnt, startp->mbs, len);
870
871 /* And increment it so that the value is the first one we will
872 try to insert. */
873 for (cnt = len - 1; cnt >= 0; --cnt)
874 if (++mbcnt[cnt] != '\0')
875 break;
876 }
877 mbcnt[len] = '\0';
878
879 /* And the end sequence. */
880 if (endp == NULL || endp->mbs == NULL)
881 memset (mbend, '\0', len);
882 else
883 memcpy (mbend, endp->mbs, len);
884 mbend[len] = '\0';
885
886 /* Test whether we have a correct range. */
887 ret = memcmp (mbcnt, mbend, len);
888 if (ret >= 0)
889 {
890 if (ret > 0)
891 lr_error (ldfile, _("%s: byte sequence of first character of \
892 sequence is not lower than that of the last character"), "LC_COLLATE");
893 return;
894 }
895
896 /* Generate the byte sequences data. */
897 while (1)
898 {
899 struct charseq *seq;
900
901 /* Quite a bit of work ahead. We have to find the character
902 definition for the byte sequence and then determine the
903 wide character belonging to it. */
904 seq = charmap_find_symbol (charmap, mbcnt, len);
905 if (seq != NULL)
906 {
907 struct element_t *elem;
908 size_t namelen;
909
910 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
911 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
912 strlen (seq->name));
913
914 /* I don't this this can ever happen. */
915 assert (seq->name != NULL);
916 namelen = strlen (seq->name);
917
918 /* Now we are ready to insert the new value in the
919 sequence. Find out whether the element is
920 already known. */
921 if (find_entry (&collate->seq_table, seq->name, namelen,
922 (void **) &elem) != 0)
923 {
924 uint32_t wcs[2] = { seq->ucs4, 0 };
925
926 /* We have to allocate an entry. */
927 elem = new_element (collate, mbcnt, len, wcs, seq->name,
928 namelen);
929
930 /* And add it to the table. */
931 if (insert_entry (&collate->seq_table, seq->name,
932 namelen, elem) != 0)
933 /* This cannot happen. */
934 assert (! "Internal error");
935 }
936
937 /* Test whether this element is not already in the list. */
938 if (elem->next != NULL || (collate->cursor != NULL
939 && elem->next == collate->cursor))
940 {
941 lr_error (ldfile, _("\
942 order for `%.*s' already defined at %s:%zu"),
943 namelen, seq->name, elem->file, elem->line);
944 goto increment;
945 }
946
947 /* Enqueue the new element. */
948 elem->last = collate->cursor;
949 elem->next = collate->cursor->next;
950 elem->last->next = elem;
951 if (elem->next != NULL)
952 elem->next->last = elem;
953 collate->cursor = elem;
954
955 /* Add the weight value. We take them from the
956 `ellipsis_weights' member of `collate'. */
957 elem->weights = (struct element_list_t *)
958 obstack_alloc (&collate->mempool,
959 nrules * sizeof (struct element_list_t));
960 for (cnt = 0; cnt < nrules; ++cnt)
961 if (collate->ellipsis_weight.weights[cnt].cnt == 1
962 && (collate->ellipsis_weight.weights[cnt].w[0]
963 == ELEMENT_ELLIPSIS2))
964 {
965 elem->weights[cnt].w = (struct element_t **)
966 obstack_alloc (&collate->mempool,
967 sizeof (struct element_t *));
968 elem->weights[cnt].w[0] = elem;
969 elem->weights[cnt].cnt = 1;
970 }
971 else
972 {
973 /* Simly use the weight from `ellipsis_weight'. */
974 elem->weights[cnt].w =
975 collate->ellipsis_weight.weights[cnt].w;
976 elem->weights[cnt].cnt =
977 collate->ellipsis_weight.weights[cnt].cnt;
978 }
979 }
980
981 /* Increment for the next round. */
982 increment:
983 for (cnt = len - 1; cnt >= 0; --cnt)
984 if (++mbcnt[cnt] != '\0')
985 break;
986
987 /* Find out whether this was all. */
988 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
989 /* Yep, that's all. */
990 break;
991 }
992 }
993 }
994 else
995 {
996 /* For symbolic range we naturally must have a beginning and an
997 end specified by the user. */
998 if (startp == NULL)
999 lr_error (ldfile, _("\
1000 %s: symbolic range ellipsis must not directly follow `order_start'"),
1001 "LC_COLLATE");
1002 else if (endp == NULL)
1003 lr_error (ldfile, _("\
1004 %s: symbolic range ellipsis must not be direct followed by `order_end'"),
1005 "LC_COLLATE");
1006 else
1007 {
1008 /* Determine the range. To do so we have to determine the
1009 common prefix of the both names and then the numeric
1010 values of both ends. */
1011 size_t lenfrom = strlen (startp->name);
1012 size_t lento = strlen (endp->name);
1013 char buf[lento + 1];
1014 int preflen = 0;
1015 long int from;
1016 long int to;
1017 char *cp;
1018 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1019
1020 if (lenfrom != lento)
1021 {
1022 invalid_range:
1023 lr_error (ldfile, _("\
1024 `%s' and `%.*s' are no valid names for symbolic range"),
1025 startp->name, lento, endp->name);
1026 return;
1027 }
1028
1029 while (startp->name[preflen] == endp->name[preflen])
1030 if (startp->name[preflen] == '\0')
1031 /* Nothing to be done. The start and end point are identical
1032 and while inserting the end point we have already given
1033 the user an error message. */
1034 return;
1035 else
1036 ++preflen;
1037
1038 errno = 0;
1039 from = strtol (startp->name + preflen, &cp, base);
1040 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1041 goto invalid_range;
1042
1043 errno = 0;
1044 to = strtol (endp->name + preflen, &cp, base);
1045 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1046 goto invalid_range;
1047
1048 /* Copy the prefix. */
1049 memcpy (buf, startp->name, preflen);
1050
1051 /* Loop over all values. */
1052 for (++from; from < to; ++from)
1053 {
1054 struct element_t *elem = NULL;
1055 struct charseq *seq;
1056 uint32_t wc;
1057 int cnt;
1058
1059 /* Generate the the name. */
1060 sprintf (buf + preflen, base == 10 ? "%d" : "%x", from);
1061
1062 /* Look whether this name is already defined. */
1063 if (find_entry (&collate->seq_table, arg->val.str.startmb,
1064 arg->val.str.lenmb, (void **) &elem) == 0)
1065 {
1066 if (elem->next != NULL || (collate->cursor != NULL
1067 && elem->next == collate->cursor))
1068 {
1069 lr_error (ldfile, _("\
1070 %s: order for `%.*s' already defined at %s:%zu"),
1071 "LC_COLLATE", lenfrom, buf,
1072 elem->file, elem->line);
1073 continue;
1074 }
1075
1076 if (elem->name == NULL)
1077 {
1078 lr_error (ldfile, _("%s: `%s' must be a charater"),
1079 "LC_COLLATE", buf);
1080 continue;
1081 }
1082 }
1083
1084 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1085 {
1086 /* Search for a character of this name. */
1087 seq = charmap_find_value (charmap, buf, lenfrom);
1088 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1089 {
1090 wc = repertoire_find_value (repertoire, buf, lenfrom);
1091
1092 if (seq != NULL)
1093 seq->ucs4 = wc;
1094 }
1095 else
1096 wc = seq->ucs4;
1097
1098 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1099 /* We don't know anything about a character with this
1100 name. XXX Should we warn? */
1101 continue;
1102
1103 if (elem == NULL)
1104 {
1105 uint32_t wcs[2] = { wc, 0 };
1106
1107 /* We have to allocate an entry. */
1108 elem = new_element (collate,
1109 seq != NULL ? seq->bytes : NULL,
1110 seq != NULL ? seq->nbytes : 0,
1111 wc == ILLEGAL_CHAR_VALUE
1112 ? NULL : wcs,
1113 buf, lenfrom);
1114 }
1115 else
1116 {
1117 /* Update the element. */
1118 if (seq != NULL)
1119 elem->mbs = obstack_copy0 (&collate->mempool,
1120 seq->bytes, seq->nbytes);
1121
1122 if (wc != ILLEGAL_CHAR_VALUE)
1123 {
1124 uint32_t zero = 0;
1125
1126 obstack_grow (&collate->mempool,
1127 &wc, sizeof (uint32_t));
1128 obstack_grow (&collate->mempool,
1129 &zero, sizeof (uint32_t));
1130 elem->wcs = obstack_finish (&collate->mempool);
1131 }
1132 }
1133
1134 elem->file = ldfile->fname;
1135 elem->line = ldfile->lineno;
1136 }
1137
1138 /* Enqueue the new element. */
1139 elem->last = collate->cursor;
1140 elem->next = collate->cursor->next;
1141 elem->last->next = elem;
1142 if (elem->next != NULL)
1143 elem->next->last = elem;
1144 collate->cursor = elem;
1145
1146 /* Now add the weights. They come from the `ellipsis_weights'
1147 member of `collate'. */
1148 elem->weights = (struct element_list_t *)
1149 obstack_alloc (&collate->mempool,
1150 nrules * sizeof (struct element_list_t));
1151 for (cnt = 0; cnt < nrules; ++cnt)
1152 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1153 && (collate->ellipsis_weight.weights[cnt].w[0]
1154 == ELEMENT_ELLIPSIS2))
1155 {
1156 elem->weights[cnt].w = (struct element_t **)
1157 obstack_alloc (&collate->mempool,
1158 sizeof (struct element_t *));
1159 elem->weights[cnt].w[0] = elem;
1160 elem->weights[cnt].cnt = 1;
1161 }
1162 else
1163 {
1164 /* Simly use the weight from `ellipsis_weight'. */
1165 elem->weights[cnt].w =
1166 collate->ellipsis_weight.weights[cnt].w;
1167 elem->weights[cnt].cnt =
1168 collate->ellipsis_weight.weights[cnt].cnt;
1169 }
1170 }
1171 }
1172 }
1173 }
1174
1175
1176 static void
1177 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1178 struct localedef_t *copy_locale, int ignore_content)
1179 {
1180 if (!ignore_content)
1181 {
1182 struct locale_collate_t *collate;
1183
1184 if (copy_locale == NULL)
1185 {
1186 collate = locale->categories[LC_COLLATE].collate =
1187 (struct locale_collate_t *)
1188 xcalloc (1, sizeof (struct locale_collate_t));
1189
1190 /* Init the various data structures. */
1191 init_hash (&collate->elem_table, 100);
1192 init_hash (&collate->sym_table, 100);
1193 init_hash (&collate->seq_table, 500);
1194 obstack_init (&collate->mempool);
1195
1196 collate->col_weight_max = -1;
1197 }
1198 else
1199 collate = locale->categories[LC_COLLATE].collate =
1200 copy_locale->categories[LC_COLLATE].collate;
1201 }
1202
1203 ldfile->translate_strings = 0;
1204 ldfile->return_widestr = 0;
1205 }
1206
1207
1208 void
1209 collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
1210 {
1211 /* Now is the time when we can assign the individual collation
1212 values for all the symbols. We have possibly different values
1213 for the wide- and the multibyte-character symbols. This is done
1214 since it might make a difference in the encoding if there is in
1215 some cases no multibyte-character but there are wide-characters.
1216 (The other way around it is not important since theencoded
1217 collation value in the wide-character case is 32 bits wide and
1218 therefore requires no encoding).
1219
1220 The lowest collation value assigned is 2. Zero is reserved for
1221 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1222 functions and 1 is used to separate the individual passes for the
1223 different rules.
1224
1225 We also have to construct is list with all the bytes/words which
1226 can come first in a sequence, followed by all the elements which
1227 also start with this byte/word. The order is reverse which has
1228 among others the important effect that longer strings are located
1229 first in the list. This is required for the output data since
1230 the algorithm used in `strcoll' etc depends on this.
1231
1232 The multibyte case is easy. We simply sort into an array with
1233 256 elements. */
1234 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1235 int mbact = 2;
1236 int wcact = 2;
1237 struct element_t *runp = collate->start;
1238
1239 while (runp != NULL)
1240 {
1241 if (runp->mbs != NULL)
1242 {
1243 struct element_t **eptr;
1244
1245 /* Determine the order. */
1246 runp->mborder = mbact++;
1247
1248 /* Find the point where to insert in the list. */
1249 eptr = &collate->mbheads[(unsigned int) runp->mbs[0]];
1250 while (*eptr != NULL)
1251 {
1252 /* Check which string is larger, the one we want to insert
1253 or the current element of the list we are looking at. */
1254 assert (runp->mbs[0] == (*eptr)->mbs[0]);
1255 if (strcmp (runp->mbs, (*eptr)->mbs) > 0)
1256 break;
1257
1258 eptr = &(*eptr)->mbnext;
1259 }
1260
1261 /* Set the pointers. */
1262 runp->mbnext = *eptr;
1263 *eptr = runp;
1264 }
1265
1266 if (runp->wcs != NULL)
1267 runp->wcorder = wcact++;
1268
1269 /* Up to the next entry. */
1270 runp = runp->next;
1271 }
1272 }
1273
1274
1275 void
1276 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
1277 const char *output_path)
1278 {
1279 }
1280
1281
1282 void
1283 collate_read (struct linereader *ldfile, struct localedef_t *result,
1284 struct charmap_t *charmap, const char *repertoire_name,
1285 int ignore_content)
1286 {
1287 struct repertoire_t *repertoire = NULL;
1288 struct locale_collate_t *collate;
1289 struct token *now;
1290 struct token *arg = NULL;
1291 enum token_t nowtok;
1292 int state = 0;
1293 enum token_t was_ellipsis = tok_none;
1294 struct localedef_t *copy_locale = NULL;
1295
1296 /* Get the repertoire we have to use. */
1297 if (repertoire_name != NULL)
1298 repertoire = repertoire_read (repertoire_name);
1299
1300 /* The rest of the line containing `LC_COLLATE' must be free. */
1301 lr_ignore_rest (ldfile, 1);
1302
1303 do
1304 {
1305 now = lr_token (ldfile, charmap, NULL);
1306 nowtok = now->tok;
1307 }
1308 while (nowtok == tok_eol);
1309
1310 if (nowtok == tok_copy)
1311 {
1312 state = 2;
1313 now = lr_token (ldfile, charmap, NULL);
1314 if (now->tok != tok_string)
1315 {
1316 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1317
1318 skip_category:
1319 do
1320 now = lr_token (ldfile, charmap, NULL);
1321 while (now->tok != tok_eof && now->tok != tok_end);
1322
1323 if (now->tok != tok_eof
1324 || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof))
1325 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
1326 else if (now->tok != tok_lc_collate)
1327 {
1328 lr_error (ldfile, _("\
1329 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1330 lr_ignore_rest (ldfile, 0);
1331 }
1332 else
1333 lr_ignore_rest (ldfile, 1);
1334
1335 return;
1336 }
1337
1338 /* Get the locale definition. */
1339 copy_locale = find_locale (LC_COLLATE, now->val.str.startmb,
1340 repertoire_name, charmap);
1341 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
1342 {
1343 /* Not yet loaded. So do it now. */
1344 if (locfile_read (copy_locale, charmap) != 0)
1345 goto skip_category;
1346 }
1347
1348 lr_ignore_rest (ldfile, 1);
1349
1350 now = lr_token (ldfile, charmap, NULL);
1351 nowtok = now->tok;
1352 }
1353
1354 /* Prepare the data structures. */
1355 collate_startup (ldfile, result, copy_locale, ignore_content);
1356 collate = result->categories[LC_COLLATE].collate;
1357
1358 while (1)
1359 {
1360 /* Of course we don't proceed beyond the end of file. */
1361 if (nowtok == tok_eof)
1362 break;
1363
1364 /* Ingore empty lines. */
1365 if (nowtok == tok_eol)
1366 {
1367 now = lr_token (ldfile, charmap, NULL);
1368 nowtok = now->tok;
1369 continue;
1370 }
1371
1372 switch (nowtok)
1373 {
1374 case tok_coll_weight_max:
1375 /* Ignore the rest of the line if we don't need the input of
1376 this line. */
1377 if (ignore_content)
1378 {
1379 lr_ignore_rest (ldfile, 0);
1380 break;
1381 }
1382
1383 if (state != 0)
1384 goto err_label;
1385
1386 arg = lr_token (ldfile, charmap, NULL);
1387 if (arg->tok != tok_number)
1388 goto err_label;
1389 if (collate->col_weight_max != -1)
1390 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
1391 "LC_COLLATE", "col_weight_max");
1392 else
1393 collate->col_weight_max = arg->val.num;
1394 lr_ignore_rest (ldfile, 1);
1395 break;
1396
1397 case tok_section_symbol:
1398 /* Ignore the rest of the line if we don't need the input of
1399 this line. */
1400 if (ignore_content)
1401 {
1402 lr_ignore_rest (ldfile, 0);
1403 break;
1404 }
1405
1406 if (state != 0)
1407 goto err_label;
1408
1409 arg = lr_token (ldfile, charmap, repertoire);
1410 if (arg->tok != tok_bsymbol)
1411 goto err_label;
1412 else if (!ignore_content)
1413 {
1414 /* Check whether this section is already known. */
1415 struct section_list *known = collate->sections;
1416 while (known != NULL)
1417 if (strcmp (known->name, arg->val.str.startmb) == 0)
1418 break;
1419
1420 if (known != NULL)
1421 {
1422 lr_error (ldfile,
1423 _("%s: duplicate declaration of section `%s'"),
1424 "LC_COLLATE", arg->val.str.startmb);
1425 free (arg->val.str.startmb);
1426 }
1427 else
1428 collate->sections = make_seclist_elem (collate,
1429 arg->val.str.startmb,
1430 collate->sections);
1431
1432 lr_ignore_rest (ldfile, known == NULL);
1433 }
1434 else
1435 {
1436 free (arg->val.str.startmb);
1437 lr_ignore_rest (ldfile, 0);
1438 }
1439 break;
1440
1441 case tok_collating_element:
1442 /* Ignore the rest of the line if we don't need the input of
1443 this line. */
1444 if (ignore_content)
1445 {
1446 lr_ignore_rest (ldfile, 0);
1447 break;
1448 }
1449
1450 if (state != 0)
1451 goto err_label;
1452
1453 arg = lr_token (ldfile, charmap, repertoire);
1454 if (arg->tok != tok_bsymbol)
1455 goto err_label;
1456 else
1457 {
1458 const char *symbol = arg->val.str.startmb;
1459 size_t symbol_len = arg->val.str.lenmb;
1460
1461 /* Next the `from' keyword. */
1462 arg = lr_token (ldfile, charmap, repertoire);
1463 if (arg->tok != tok_from)
1464 {
1465 free ((char *) symbol);
1466 goto err_label;
1467 }
1468
1469 ldfile->return_widestr = 1;
1470
1471 /* Finally the string with the replacement. */
1472 arg = lr_token (ldfile, charmap, repertoire);
1473 ldfile->return_widestr = 0;
1474 if (arg->tok != tok_string)
1475 goto err_label;
1476
1477 if (!ignore_content)
1478 {
1479 if (symbol == NULL)
1480 lr_error (ldfile, _("\
1481 %s: unknown character in collating element name"),
1482 "LC_COLLATE");
1483 if (arg->val.str.startmb == NULL)
1484 lr_error (ldfile, _("\
1485 %s: unknown character in collating element definition"),
1486 "LC_COLLATE");
1487 if (arg->val.str.startwc == NULL)
1488 lr_error (ldfile, _("\
1489 %s: unknown wide character in collating element definition"),
1490 "LC_COLLATE");
1491 else if (arg->val.str.lenwc < 2)
1492 lr_error (ldfile, _("\
1493 %s: substitution string in collating element definition must have at least two characters"),
1494 "LC_COLLATE");
1495
1496 if (symbol != NULL)
1497 {
1498 /* The name is already defined. */
1499 if (check_duplicate (ldfile, collate, charmap,
1500 repertoire, symbol, symbol_len))
1501 goto col_elem_free;
1502
1503 if (insert_entry (&collate->elem_table,
1504 symbol, symbol_len,
1505 new_element (collate,
1506 NULL, 0, NULL, symbol,
1507 symbol_len)) < 0)
1508 lr_error (ldfile, _("\
1509 error while adding collating element"));
1510 }
1511 else
1512 goto col_elem_free;
1513 }
1514 else
1515 {
1516 col_elem_free:
1517 if (symbol != NULL)
1518 free ((char *) symbol);
1519 if (arg->val.str.startmb != NULL)
1520 free (arg->val.str.startmb);
1521 if (arg->val.str.startwc != NULL)
1522 free (arg->val.str.startwc);
1523 }
1524 lr_ignore_rest (ldfile, 1);
1525 }
1526 break;
1527
1528 case tok_collating_symbol:
1529 /* Ignore the rest of the line if we don't need the input of
1530 this line. */
1531 if (ignore_content)
1532 {
1533 lr_ignore_rest (ldfile, 0);
1534 break;
1535 }
1536
1537 if (state != 0)
1538 goto err_label;
1539
1540 arg = lr_token (ldfile, charmap, repertoire);
1541 if (arg->tok != tok_bsymbol)
1542 goto err_label;
1543 else
1544 {
1545 const char *symbol = arg->val.str.startmb;
1546 size_t symbol_len = arg->val.str.lenmb;
1547
1548 if (!ignore_content)
1549 {
1550 if (symbol == NULL)
1551 lr_error (ldfile, _("\
1552 %s: unknown character in collating symbol name"),
1553 "LC_COLLATE");
1554 else
1555 {
1556 /* The name is already defined. */
1557 if (check_duplicate (ldfile, collate, charmap,
1558 repertoire, symbol, symbol_len))
1559 goto col_sym_free;
1560
1561 if (insert_entry (&collate->sym_table,
1562 symbol, symbol_len,
1563 new_symbol (collate)) < 0)
1564 lr_error (ldfile, _("\
1565 error while adding collating symbol"));
1566 }
1567 }
1568 else
1569 {
1570 col_sym_free:
1571 if (symbol != NULL)
1572 free ((char *) symbol);
1573 }
1574 lr_ignore_rest (ldfile, 1);
1575 }
1576 break;
1577
1578 case tok_symbol_equivalence:
1579 /* Ignore the rest of the line if we don't need the input of
1580 this line. */
1581 if (ignore_content)
1582 {
1583 lr_ignore_rest (ldfile, 0);
1584 break;
1585 }
1586
1587 if (state != 0)
1588 goto err_label;
1589
1590 arg = lr_token (ldfile, charmap, repertoire);
1591 if (arg->tok != tok_bsymbol)
1592 goto err_label;
1593 else
1594 {
1595 const char *newname = arg->val.str.startmb;
1596 size_t newname_len = arg->val.str.lenmb;
1597 const char *symname;
1598 size_t symname_len;
1599 struct symbol_t *symval;
1600
1601 arg = lr_token (ldfile, charmap, repertoire);
1602 if (arg->tok != tok_bsymbol)
1603 {
1604 if (newname != NULL)
1605 free ((char *) newname);
1606 goto err_label;
1607 }
1608
1609 symname = arg->val.str.startmb;
1610 symname_len = arg->val.str.lenmb;
1611
1612 if (!ignore_content)
1613 {
1614 if (newname == NULL)
1615 {
1616 lr_error (ldfile, _("\
1617 %s: unknown character in equivalent definition name"),
1618 "LC_COLLATE");
1619 goto sym_equiv_free;
1620 }
1621 if (symname == NULL)
1622 {
1623 lr_error (ldfile, _("\
1624 %s: unknown character in equivalent definition value"),
1625 "LC_COLLATE");
1626 goto sym_equiv_free;
1627 }
1628 /* The name is already defined. */
1629 if (check_duplicate (ldfile, collate, charmap,
1630 repertoire, symname, symname_len))
1631 goto col_sym_free;
1632
1633 /* See whether the symbol name is already defined. */
1634 if (find_entry (&collate->sym_table, symname, symname_len,
1635 (void **) &symval) != 0)
1636 {
1637 lr_error (ldfile, _("\
1638 %s: unknown symbol `%s' in equivalent definition"),
1639 "LC_COLLATE", symname);
1640 goto col_sym_free;
1641 }
1642
1643 if (insert_entry (&collate->sym_table,
1644 newname, newname_len, symval) < 0)
1645 {
1646 lr_error (ldfile, _("\
1647 error while adding equivalent collating symbol"));
1648 goto sym_equiv_free;
1649 }
1650
1651 free ((char *) symname);
1652 }
1653 else
1654 {
1655 sym_equiv_free:
1656 if (newname != NULL)
1657 free ((char *) newname);
1658 if (symname != NULL)
1659 free ((char *) symname);
1660 }
1661 lr_ignore_rest (ldfile, 1);
1662 }
1663 break;
1664
1665 case tok_order_start:
1666 /* Ignore the rest of the line if we don't need the input of
1667 this line. */
1668 if (ignore_content)
1669 {
1670 lr_ignore_rest (ldfile, 0);
1671 break;
1672 }
1673
1674 if (state != 0 && state != 1)
1675 goto err_label;
1676 state = 1;
1677
1678 /* The 14652 draft does not specify whether all `order_start' lines
1679 must contain the same number of sort-rules, but 14651 does. So
1680 we require this here as well. */
1681 arg = lr_token (ldfile, charmap, repertoire);
1682 if (arg->tok == tok_bsymbol)
1683 {
1684 /* This better should be a section name. */
1685 struct section_list *sp = collate->sections;
1686 while (sp != NULL
1687 && strcmp (sp->name, arg->val.str.startmb) != 0)
1688 sp = sp->next;
1689
1690 if (sp == NULL)
1691 {
1692 lr_error (ldfile, _("\
1693 %s: unknown section name `%s'"),
1694 "LC_COLLATE", arg->val.str.startmb);
1695 /* We use the error section. */
1696 collate->current_section = &collate->error_section;
1697 }
1698 else
1699 {
1700 /* Remember this section. */
1701 collate->current_section = sp;
1702
1703 /* One should not be allowed to open the same
1704 section twice. */
1705 if (sp->first != NULL)
1706 lr_error (ldfile, _("\
1707 %s: multiple order definitions for section `%s'"),
1708 "LC_COLLATE", sp->name);
1709
1710 /* Next should come the end of the line or a semicolon. */
1711 arg = lr_token (ldfile, charmap, repertoire);
1712 if (arg->tok == tok_eol)
1713 {
1714 uint32_t cnt;
1715
1716 /* This means we have exactly one rule: `forward'. */
1717 if (collate->nrules > 1)
1718 lr_error (ldfile, _("\
1719 %s: invalid number of sorting rules"),
1720 "LC_COLLATE");
1721 else
1722 collate->nrules = 1;
1723 sp->rules = obstack_alloc (&collate->mempool,
1724 (sizeof (enum coll_sort_rule)
1725 * collate->nrules));
1726 for (cnt = 0; cnt < collate->nrules; ++cnt)
1727 sp->rules[cnt] = sort_forward;
1728
1729 /* Next line. */
1730 break;
1731 }
1732
1733 /* Get the next token. */
1734 arg = lr_token (ldfile, charmap, repertoire);
1735 }
1736 }
1737 else
1738 {
1739 /* There is no section symbol. Therefore we use the unnamed
1740 section. */
1741 collate->current_section = &collate->unnamed_section;
1742
1743 if (collate->unnamed_section.first != NULL)
1744 lr_error (ldfile, _("\
1745 %s: multiple order definitions for unnamed section"),
1746 "LC_COLLATE");
1747 }
1748
1749 /* Now read the direction names. */
1750 read_directions (ldfile, arg, charmap, repertoire, collate);
1751
1752 /* From now be need the strings untranslated. */
1753 ldfile->translate_strings = 0;
1754 break;
1755
1756 case tok_order_end:
1757 /* Ignore the rest of the line if we don't need the input of
1758 this line. */
1759 if (ignore_content)
1760 {
1761 lr_ignore_rest (ldfile, 0);
1762 break;
1763 }
1764
1765 if (state != 1)
1766 goto err_label;
1767
1768 /* Handle ellipsis at end of list. */
1769 if (was_ellipsis != tok_none)
1770 {
1771 handle_ellipsis (ldfile, NULL, was_ellipsis, charmap, repertoire,
1772 collate);
1773 was_ellipsis = tok_none;
1774 }
1775
1776 state = 2;
1777 lr_ignore_rest (ldfile, 1);
1778 break;
1779
1780 case tok_reorder_after:
1781 /* Ignore the rest of the line if we don't need the input of
1782 this line. */
1783 if (ignore_content)
1784 {
1785 lr_ignore_rest (ldfile, 0);
1786 break;
1787 }
1788
1789 if (state == 1)
1790 {
1791 lr_error (ldfile, _("%s: missing `order_end' keyword"),
1792 "LC_COLLATE");
1793 state = 2;
1794
1795 /* Handle ellipsis at end of list. */
1796 if (was_ellipsis != tok_none)
1797 {
1798 handle_ellipsis (ldfile, arg, was_ellipsis, charmap,
1799 repertoire, collate);
1800 was_ellipsis = tok_none;
1801 }
1802 }
1803 else if (state != 2 && state != 3)
1804 goto err_label;
1805 state = 3;
1806
1807 arg = lr_token (ldfile, charmap, repertoire);
1808 if (arg->tok == tok_bsymbol)
1809 {
1810 /* Find this symbol in the sequence table. */
1811 struct element_t *insp;
1812 int no_error = 1;
1813
1814 if (find_entry (&collate->seq_table, arg->val.str.startmb,
1815 arg->val.str.lenmb, (void **) &insp) == 0)
1816 /* Yes, the symbol exists. Simply point the cursor
1817 to it. */
1818 collate->cursor = insp;
1819 else
1820 {
1821 /* This is bad. The symbol after which we have to
1822 insert does not exist. */
1823 lr_error (ldfile, _("\
1824 %s: cannot reorder after %.*s: symbol not known"),
1825 "LC_COLLATE", arg->val.str.lenmb,
1826 arg->val.str.startmb);
1827 collate->cursor = NULL;
1828 no_error = 0;
1829 }
1830
1831 lr_ignore_rest (ldfile, no_error);
1832 }
1833 else
1834 /* This must not happen. */
1835 goto err_label;
1836 break;
1837
1838 case tok_reorder_end:
1839 /* Ignore the rest of the line if we don't need the input of
1840 this line. */
1841 if (ignore_content)
1842 break;
1843
1844 if (state != 3)
1845 goto err_label;
1846 state = 4;
1847 lr_ignore_rest (ldfile, 1);
1848 break;
1849
1850 case tok_reorder_sections_after:
1851 /* Ignore the rest of the line if we don't need the input of
1852 this line. */
1853 if (ignore_content)
1854 {
1855 lr_ignore_rest (ldfile, 0);
1856 break;
1857 }
1858
1859 if (state == 1)
1860 {
1861 lr_error (ldfile, _("%s: missing `order_end' keyword"),
1862 "LC_COLLATE");
1863 state = 2;
1864
1865 /* Handle ellipsis at end of list. */
1866 if (was_ellipsis != tok_none)
1867 {
1868 handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
1869 repertoire, collate);
1870 was_ellipsis = tok_none;
1871 }
1872 }
1873 else if (state == 3)
1874 {
1875 error (0, 0, _("%s: missing `reorder-end' keyword"),
1876 "LC_COLLATE");
1877 state = 4;
1878 }
1879 else if (state != 2 && state != 4)
1880 goto err_label;
1881 state = 5;
1882
1883 /* Get the name of the sections we are adding after. */
1884 arg = lr_token (ldfile, charmap, repertoire);
1885 if (arg->tok == tok_bsymbol)
1886 {
1887 /* Now find a section with this name. */
1888 struct section_list *runp = collate->sections;
1889
1890 while (runp != NULL)
1891 {
1892 if (runp->name != NULL
1893 && strlen (runp->name) == arg->val.str.lenmb
1894 && memcmp (runp->name, arg->val.str.startmb,
1895 arg->val.str.lenmb) == 0)
1896 break;
1897
1898 runp = runp->next;
1899 }
1900
1901 if (runp != NULL)
1902 collate->current_section = runp;
1903 else
1904 {
1905 /* This is bad. The section after which we have to
1906 reorder does not exist. Therefore we cannot
1907 process the whole rest of this reorder
1908 specification. */
1909 lr_error (ldfile, _("%s: section `%.*s' not known"),
1910 "LC_COLLATE", arg->val.str.lenmb,
1911 arg->val.str.startmb);
1912
1913 do
1914 {
1915 lr_ignore_rest (ldfile, 0);
1916
1917 now = lr_token (ldfile, charmap, NULL);
1918 }
1919 while (now->tok == tok_reorder_sections_after
1920 || now->tok == tok_reorder_sections_end
1921 || now->tok == tok_end);
1922
1923 /* Process the token we just saw. */
1924 nowtok = now->tok;
1925 continue;
1926 }
1927 }
1928 else
1929 /* This must not happen. */
1930 goto err_label;
1931 break;
1932
1933 case tok_reorder_sections_end:
1934 /* Ignore the rest of the line if we don't need the input of
1935 this line. */
1936 if (ignore_content)
1937 break;
1938
1939 if (state != 5)
1940 goto err_label;
1941 state = 6;
1942 lr_ignore_rest (ldfile, 1);
1943 break;
1944
1945 case tok_bsymbol:
1946 /* Ignore the rest of the line if we don't need the input of
1947 this line. */
1948 if (ignore_content)
1949 {
1950 lr_ignore_rest (ldfile, 0);
1951 break;
1952 }
1953
1954 if (state != 1 && state != 3)
1955 goto err_label;
1956
1957 if (state == 3)
1958 {
1959 /* It is possible that we already have this collation sequence.
1960 In this case we move the entry. */
1961 struct element_t *seqp;
1962
1963 /* If the symbol after which we have to insert was not found
1964 ignore all entries. */
1965 if (collate->cursor == NULL)
1966 {
1967 lr_ignore_rest (ldfile, 0);
1968 break;
1969 }
1970
1971 if (find_entry (&collate->seq_table, arg->val.str.startmb,
1972 arg->val.str.lenmb, (void **) &seqp) == 0)
1973 {
1974 /* Remove the entry from the old position. */
1975 if (seqp->last == NULL)
1976 collate->start = seqp->next;
1977 else
1978 seqp->last->next = seqp->next;
1979 if (seqp->next != NULL)
1980 seqp->next->last = seqp->last;
1981
1982 /* We also have to check whether this entry is the
1983 first or last of a section. */
1984 if (seqp->section->first == seqp)
1985 {
1986 if (seqp->section->first == seqp->section->last)
1987 /* This setion has no content anymore. */
1988 seqp->section->first = seqp->section->last = NULL;
1989 else
1990 seqp->section->first = seqp->next;
1991 }
1992 else if (seqp->section->last == seqp)
1993 seqp->section->last = seqp->last;
1994
1995 /* Now insert it in the new place. */
1996 seqp->next = collate->cursor->next;
1997 seqp->last = collate->cursor;
1998 collate->cursor->next = seqp;
1999 if (seqp->next != NULL)
2000 seqp->next->last = seqp;
2001
2002 seqp->section = collate->cursor->section;
2003 if (seqp->section->last == collate->cursor)
2004 seqp->section->last = seqp;
2005
2006 break;
2007 }
2008
2009 /* Otherwise we just add a new entry. */
2010 }
2011 else if (state == 5)
2012 {
2013 /* We are reordering sections. Find the named section. */
2014 struct section_list *runp = collate->sections;
2015 struct section_list *prevp = NULL;
2016
2017 while (runp != NULL)
2018 {
2019 if (runp->name != NULL
2020 && strlen (runp->name) == arg->val.str.lenmb
2021 && memcmp (runp->name, arg->val.str.startmb,
2022 arg->val.str.lenmb) == 0)
2023 break;
2024
2025 prevp = runp;
2026 runp = runp->next;
2027 }
2028
2029 if (runp == NULL)
2030 {
2031 lr_error (ldfile, _("%s: section `%.*s' not known"),
2032 "LC_COLLATE", arg->val.str.lenmb,
2033 arg->val.str.startmb);
2034 lr_ignore_rest (ldfile, 0);
2035 }
2036 else
2037 {
2038 if (runp != collate->current_section)
2039 {
2040 /* Remove the named section from the old place and
2041 insert it in the new one. */
2042 prevp->next = runp->next;
2043
2044 runp->next = collate->current_section->next;
2045 collate->current_section->next = runp;
2046 collate->current_section = runp;
2047 }
2048
2049 /* Process the rest of the line which might change
2050 the collation rules. */
2051 arg = lr_token (ldfile, charmap, repertoire);
2052 if (arg->tok != tok_eof && arg->tok != tok_eol)
2053 read_directions (ldfile, arg, charmap, repertoire,
2054 collate);
2055 }
2056 break;
2057 }
2058 else if (was_ellipsis != tok_none)
2059 {
2060 /* Using the information in the `ellipsis_weight'
2061 element and this and the last value we have to handle
2062 the ellipsis now. */
2063 assert (state == 1);
2064
2065 handle_ellipsis (ldfile, arg, was_ellipsis, charmap, repertoire,
2066 collate);
2067
2068 /* Remember that we processed the ellipsis. */
2069 was_ellipsis = tok_none;
2070
2071 /* And don't add the value a second time. */
2072 break;
2073 }
2074
2075 /* Now insert in the new place. */
2076 insert_value (ldfile, arg, charmap, repertoire, collate);
2077 break;
2078
2079 case tok_undefined:
2080 /* Ignore the rest of the line if we don't need the input of
2081 this line. */
2082 if (ignore_content)
2083 {
2084 lr_ignore_rest (ldfile, 0);
2085 break;
2086 }
2087
2088 if (state != 1)
2089 goto err_label;
2090
2091 if (was_ellipsis != tok_none)
2092 {
2093 lr_error (ldfile,
2094 _("%s: cannot have `%s' as end of ellipsis range"),
2095 "LC_COLLATE", "UNDEFINED");
2096
2097 unlink_element (collate);
2098 was_ellipsis = tok_none;
2099 }
2100
2101 /* See whether UNDEFINED already appeared somewhere. */
2102 if (collate->undefined.next != NULL
2103 || (collate->cursor != NULL
2104 && collate->undefined.next == collate->cursor))
2105 {
2106 lr_error (ldfile,
2107 _("%s: order for `%.*s' already defined at %s:%zu"),
2108 "LC_COLLATE", 9, "UNDEFINED", collate->undefined.file,
2109 collate->undefined.line);
2110 lr_ignore_rest (ldfile, 0);
2111 }
2112 else
2113 /* Parse the weights. */
2114 insert_weights (ldfile, &collate->undefined, charmap,
2115 repertoire, collate, tok_none);
2116 break;
2117
2118 case tok_ellipsis2:
2119 case tok_ellipsis3:
2120 case tok_ellipsis4:
2121 /* This is the symbolic (decimal or hexadecimal) or absolute
2122 ellipsis. */
2123 if (was_ellipsis != tok_none)
2124 goto err_label;
2125
2126 if (state != 1 && state != 3)
2127 goto err_label;
2128
2129 was_ellipsis = nowtok;
2130
2131 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
2132 repertoire, collate, nowtok);
2133 break;
2134
2135 case tok_end:
2136 /* Next we assume `LC_COLLATE'. */
2137 if (!ignore_content)
2138 {
2139 if (state == 0)
2140 /* We must either see a copy statement or have
2141 ordering values. */
2142 lr_error (ldfile,
2143 _("%s: empty category description not allowed"),
2144 "LC_COLLATE");
2145 else if (state == 1)
2146 {
2147 lr_error (ldfile, _("%s: missing `order_end' keyword"),
2148 "LC_COLLATE");
2149
2150 /* Handle ellipsis at end of list. */
2151 if (was_ellipsis != tok_none)
2152 {
2153 handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
2154 repertoire, collate);
2155 was_ellipsis = tok_none;
2156 }
2157 }
2158 else if (state == 3)
2159 error (0, 0, _("%s: missing `reorder-end' keyword"),
2160 "LC_COLLATE");
2161 else if (state == 5)
2162 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
2163 "LC_COLLATE");
2164 }
2165 arg = lr_token (ldfile, charmap, NULL);
2166 if (arg->tok == tok_eof)
2167 break;
2168 if (arg->tok == tok_eol)
2169 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
2170 else if (arg->tok != tok_lc_collate)
2171 lr_error (ldfile, _("\
2172 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2173 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
2174 return;
2175
2176 default:
2177 err_label:
2178 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2179 }
2180
2181 /* Prepare for the next round. */
2182 now = lr_token (ldfile, charmap, NULL);
2183 nowtok = now->tok;
2184 }
2185
2186 /* When we come here we reached the end of the file. */
2187 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2188 }
This page took 0.137435 seconds and 6 git commands to generate.