]> sourceware.org Git - glibc.git/blob - locale/programs/ld-collate.c
Update.
[glibc.git] / locale / programs / ld-collate.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <errno.h>
25 #include <error.h>
26 #include <stdlib.h>
27 #include <wchar.h>
28
29 #include "charmap.h"
30 #include "localeinfo.h"
31 #include "linereader.h"
32 #include "locfile.h"
33 #include "localedef.h"
34
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
37 #include <assert.h>
38
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
41
42 /* Forward declaration. */
43 struct element_t;
44
45 /* Data type for list of strings. */
46 struct section_list
47 {
48 struct section_list *next;
49 /* Name of the section. */
50 const char *name;
51 /* First element of this section. */
52 struct element_t *first;
53 /* Last element of this section. */
54 struct element_t *last;
55 /* These are the rules for this section. */
56 enum coll_sort_rule *rules;
57 /* Index of the rule set in the appropriate section of the output file. */
58 int ruleidx;
59 };
60
61 struct element_t;
62
63 struct element_list_t
64 {
65 /* Number of elements. */
66 int cnt;
67
68 struct element_t **w;
69 };
70
71 /* Data type for collating element. */
72 struct element_t
73 {
74 const char *name;
75
76 const char *mbs;
77 size_t nmbs;
78 const uint32_t *wcs;
79 size_t nwcs;
80 int *mborder;
81 int wcorder;
82
83 /* The following is a bit mask which bits are set if this element is
84 used in the appropriate level. Interesting for the singlebyte
85 weight computation.
86
87 XXX The type here restricts the number of levels to 32. It could
88 we changed if necessary but I doubt this is necessary. */
89 unsigned int used_in_level;
90
91 struct element_list_t *weights;
92
93 /* Where does the definition come from. */
94 const char *file;
95 size_t line;
96
97 /* Which section does this belong to. */
98 struct section_list *section;
99
100 /* Predecessor and successor in the order list. */
101 struct element_t *last;
102 struct element_t *next;
103
104 /* Next element in multibyte output list. */
105 struct element_t *mbnext;
106 };
107
108 /* Special element value. */
109 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
110 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
111 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
112
113 /* Data type for collating symbol. */
114 struct symbol_t
115 {
116 /* Point to place in the order list. */
117 struct element_t *order;
118
119 /* Where does the definition come from. */
120 const char *file;
121 size_t line;
122 };
123
124
125 /* The real definition of the struct for the LC_COLLATE locale. */
126 struct locale_collate_t
127 {
128 int col_weight_max;
129 int cur_weight_max;
130
131 /* List of known scripts. */
132 struct section_list *sections;
133 /* Current section using definition. */
134 struct section_list *current_section;
135 /* There always can be an unnamed section. */
136 struct section_list unnamed_section;
137 /* To make handling of errors easier we have another section. */
138 struct section_list error_section;
139
140 /* Number of sorting rules given in order_start line. */
141 uint32_t nrules;
142
143 /* Start of the order list. */
144 struct element_t *start;
145
146 /* The undefined element. */
147 struct element_t undefined;
148
149 /* This is the cursor for `reorder_after' insertions. */
150 struct element_t *cursor;
151
152 /* This value is used when handling ellipsis. */
153 struct element_t ellipsis_weight;
154
155 /* Known collating elements. */
156 hash_table elem_table;
157
158 /* Known collating symbols. */
159 hash_table sym_table;
160
161 /* Known collation sequences. */
162 hash_table seq_table;
163
164 struct obstack mempool;
165
166 /* The LC_COLLATE category is a bit special as it is sometimes possible
167 that the definitions from more than one input file contains information.
168 Therefore we keep all relevant input in a list. */
169 struct locale_collate_t *next;
170
171 /* Arrays with heads of the list for each of the leading bytes in
172 the multibyte sequences. */
173 struct element_t *mbheads[256];
174 };
175
176
177 /* We have a few global variables which are used for reading all
178 LC_COLLATE category descriptions in all files. */
179 static int nrules;
180
181
182 /* These are definitions used by some of the functions for handling
183 UTF-8 encoding below. */
184 static const uint32_t encoding_mask[] =
185 {
186 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
187 };
188
189 static const unsigned char encoding_byte[] =
190 {
191 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
192 };
193
194
195 /* We need UTF-8 encoding of numbers. */
196 static inline int
197 utf8_encode (char *buf, int val)
198 {
199 char *startp = buf;
200 int retval;
201
202 if (val < 0x80)
203 {
204 *buf++ = (char) val;
205 retval = 1;
206 }
207 else
208 {
209 int step;
210
211 for (step = 2; step < 6; ++step)
212 if ((val & encoding_mask[step - 2]) == 0)
213 break;
214 retval = step;
215
216 *buf = encoding_byte[step - 2];
217 --step;
218 do
219 {
220 buf[step] = 0x80 | (val & 0x3f);
221 val >>= 6;
222 }
223 while (--step > 0);
224 *buf |= val;
225 }
226
227 return buf - startp;
228 }
229
230
231 static struct section_list *
232 make_seclist_elem (struct locale_collate_t *collate, const char *string,
233 struct section_list *next)
234 {
235 struct section_list *newp;
236
237 newp = (struct section_list *) obstack_alloc (&collate->mempool,
238 sizeof (*newp));
239 newp->next = next;
240 newp->name = string;
241 newp->first = NULL;
242
243 return newp;
244 }
245
246
247 static struct element_t *
248 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
249 const uint32_t *wcs, const char *name, size_t namelen)
250 {
251 struct element_t *newp;
252
253 newp = (struct element_t *) obstack_alloc (&collate->mempool,
254 sizeof (*newp));
255 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
256 name, namelen);
257 if (mbs != NULL)
258 {
259 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
260 newp->nmbs = mbslen;
261 }
262 else
263 {
264 newp->mbs = NULL;
265 newp->nmbs = 0;
266 }
267 if (wcs != NULL)
268 {
269 size_t nwcs = wcslen ((wchar_t *) wcs);
270 uint32_t zero = 0;
271 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
272 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
273 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
274 newp->nwcs = nwcs;
275 }
276 else
277 {
278 newp->wcs = NULL;
279 newp->nwcs = 0;
280 }
281 newp->mborder = NULL;
282 newp->wcorder = 0;
283 newp->used_in_level = 0;
284
285 /* Will be allocated later. */
286 newp->weights = NULL;
287
288 newp->file = NULL;
289 newp->line = 0;
290
291 newp->section = collate->current_section;
292
293 newp->last = NULL;
294 newp->next = NULL;
295
296 newp->mbnext = NULL;
297
298 return newp;
299 }
300
301
302 static struct symbol_t *
303 new_symbol (struct locale_collate_t *collate)
304 {
305 struct symbol_t *newp;
306
307 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
308
309 newp->order = NULL;
310
311 newp->file = NULL;
312 newp->line = 0;
313
314 return newp;
315 }
316
317
318 /* Test whether this name is already defined somewhere. */
319 static int
320 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
321 struct charmap_t *charmap, struct repertoire_t *repertoire,
322 const char *symbol, size_t symbol_len)
323 {
324 void *ignore = NULL;
325
326 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
327 {
328 lr_error (ldfile, _("`%s' already defined in charmap"), symbol);
329 return 1;
330 }
331
332 if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0)
333 {
334 lr_error (ldfile, _("`%s' already defined in repertoire"), symbol);
335 return 1;
336 }
337
338 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
339 {
340 lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol);
341 return 1;
342 }
343
344 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
345 {
346 lr_error (ldfile, _("`%s' already defined as collating element"),
347 symbol);
348 return 1;
349 }
350
351 return 0;
352 }
353
354
355 /* Read the direction specification. */
356 static void
357 read_directions (struct linereader *ldfile, struct token *arg,
358 struct charmap_t *charmap, struct repertoire_t *repertoire,
359 struct locale_collate_t *collate)
360 {
361 int cnt = 0;
362 int max = nrules ?: 10;
363 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
364 int warned = 0;
365
366 while (1)
367 {
368 int valid = 0;
369
370 if (arg->tok == tok_forward)
371 {
372 if (rules[cnt] & sort_backward)
373 {
374 if (! warned)
375 {
376 lr_error (ldfile, _("\
377 %s: `forward' and `backward' are mutually excluding each other"),
378 "LC_COLLATE");
379 warned = 1;
380 }
381 }
382 else if (rules[cnt] & sort_forward)
383 {
384 if (! warned)
385 {
386 lr_error (ldfile, _("\
387 %s: `%s' mentioned twice in definition of weight %d"),
388 "LC_COLLATE", "forward", cnt + 1);
389 }
390 }
391 else
392 rules[cnt] |= sort_forward;
393
394 valid = 1;
395 }
396 else if (arg->tok == tok_backward)
397 {
398 if (rules[cnt] & sort_forward)
399 {
400 if (! warned)
401 {
402 lr_error (ldfile, _("\
403 %s: `forward' and `backward' are mutually excluding each other"),
404 "LC_COLLATE");
405 warned = 1;
406 }
407 }
408 else if (rules[cnt] & sort_backward)
409 {
410 if (! warned)
411 {
412 lr_error (ldfile, _("\
413 %s: `%s' mentioned twice in definition of weight %d"),
414 "LC_COLLATE", "backward", cnt + 1);
415 }
416 }
417 else
418 rules[cnt] |= sort_backward;
419
420 valid = 1;
421 }
422 else if (arg->tok == tok_position)
423 {
424 if (rules[cnt] & sort_position)
425 {
426 if (! warned)
427 {
428 lr_error (ldfile, _("\
429 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
430 "LC_COLLATE", "position", cnt + 1);
431 }
432 }
433 else
434 rules[cnt] |= sort_position;
435
436 valid = 1;
437 }
438
439 if (valid)
440 arg = lr_token (ldfile, charmap, repertoire);
441
442 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
443 || arg->tok == tok_semicolon)
444 {
445 if (! valid && ! warned)
446 {
447 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
448 warned = 1;
449 }
450
451 /* See whether we have to increment the counter. */
452 if (arg->tok != tok_comma && rules[cnt] != 0)
453 ++cnt;
454
455 if (arg->tok == tok_eof || arg->tok == tok_eol)
456 /* End of line or file, so we exit the loop. */
457 break;
458
459 if (nrules == 0)
460 {
461 /* See whether we have enough room in the array. */
462 if (cnt == max)
463 {
464 max += 10;
465 rules = (enum coll_sort_rule *) xrealloc (rules,
466 max
467 * sizeof (*rules));
468 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
469 }
470 }
471 else
472 {
473 if (cnt == nrules)
474 {
475 /* There must not be any more rule. */
476 if (! warned)
477 {
478 lr_error (ldfile, _("\
479 %s: too many rules; first entry only had %d"),
480 "LC_COLLATE", nrules);
481 warned = 1;
482 }
483
484 lr_ignore_rest (ldfile, 0);
485 break;
486 }
487 }
488 }
489 else
490 {
491 if (! warned)
492 {
493 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
494 warned = 1;
495 }
496 }
497
498 arg = lr_token (ldfile, charmap, repertoire);
499 }
500
501 if (nrules == 0)
502 {
503 /* Now we know how many rules we have. */
504 nrules = cnt;
505 rules = (enum coll_sort_rule *) xrealloc (rules,
506 nrules * sizeof (*rules));
507 }
508 else
509 {
510 if (cnt < nrules)
511 {
512 /* Not enough rules in this specification. */
513 if (! warned)
514 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
515
516 do
517 rules[cnt] = sort_forward;
518 while (++cnt < nrules);
519 }
520 }
521
522 collate->current_section->rules = rules;
523 }
524
525
526 static struct element_t *
527 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
528 const char *str, size_t len, uint32_t *wcstr)
529 {
530 struct element_t *result = NULL;
531
532 /* Search for the entries among the collation sequences already define. */
533 if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
534 {
535 /* Nope, not define yet. So we see whether it is a
536 collation symbol. */
537 void *ptr;
538
539 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
540 {
541 /* It's a collation symbol. */
542 struct symbol_t *sym = (struct symbol_t *) ptr;
543 result = sym->order;
544
545 if (result == NULL)
546 result = sym->order = new_element (collate, NULL, 0, NULL,
547 NULL, 0);
548 }
549 else if (find_entry (&collate->elem_table, str, len,
550 (void **) &result) != 0)
551 {
552 /* It's also no collation element. So it is a character
553 element defined later. */
554 result = new_element (collate, NULL, 0, NULL, str, len);
555 if (result != NULL)
556 /* Insert it into the sequence table. */
557 insert_entry (&collate->seq_table, str, len, result);
558 }
559 }
560
561 return result;
562 }
563
564
565 static void
566 unlink_element (struct locale_collate_t *collate)
567 {
568 if (collate->cursor == collate->start)
569 {
570 assert (collate->cursor->next == NULL);
571 assert (collate->cursor->last == NULL);
572 collate->cursor = NULL;
573 }
574 else
575 {
576 if (collate->cursor->next != NULL)
577 collate->cursor->next->last = collate->cursor->last;
578 if (collate->cursor->last != NULL)
579 collate->cursor->last->next = collate->cursor->next;
580 collate->cursor = collate->cursor->last;
581 }
582 }
583
584
585 static void
586 insert_weights (struct linereader *ldfile, struct element_t *elem,
587 struct charmap_t *charmap, struct repertoire_t *repertoire,
588 struct locale_collate_t *collate, enum token_t ellipsis)
589 {
590 int weight_cnt;
591 struct token *arg;
592
593 /* Initialize all the fields. */
594 elem->file = ldfile->fname;
595 elem->line = ldfile->lineno;
596 elem->last = collate->cursor;
597 elem->next = collate->cursor ? collate->cursor->next : NULL;
598 elem->section = collate->current_section;
599 if (collate->cursor != NULL)
600 collate->cursor->next = elem;
601 if (collate->start == NULL)
602 {
603 assert (collate->cursor == NULL);
604 collate->start = elem;
605 }
606 elem->weights = (struct element_list_t *)
607 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
608 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
609
610 if (collate->current_section->first == NULL)
611 collate->current_section->first = elem;
612 if (collate->current_section->last == collate->cursor)
613 collate->current_section->last = elem;
614
615 collate->cursor = elem;
616
617 weight_cnt = 0;
618
619 arg = lr_token (ldfile, charmap, repertoire);
620 do
621 {
622 if (arg->tok == tok_eof || arg->tok == tok_eol)
623 break;
624
625 if (arg->tok == tok_ignore)
626 {
627 /* The weight for this level has to be ignored. We use the
628 null pointer to indicate this. */
629 elem->weights[weight_cnt].w = (struct element_t **)
630 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
631 elem->weights[weight_cnt].w[0] = NULL;
632 elem->weights[weight_cnt].cnt = 1;
633 }
634 else if (arg->tok == tok_bsymbol)
635 {
636 struct element_t *val = find_element (ldfile, collate,
637 arg->val.str.startmb,
638 arg->val.str.lenmb,
639 arg->val.str.startwc);
640
641 if (val == NULL)
642 break;
643
644 elem->weights[weight_cnt].w = (struct element_t **)
645 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
646 elem->weights[weight_cnt].w[0] = val;
647 elem->weights[weight_cnt].cnt = 1;
648 }
649 else if (arg->tok == tok_string)
650 {
651 /* Split the string up in the individual characters and put
652 the element definitions in the list. */
653 const char *cp = arg->val.str.startmb;
654 int cnt = 0;
655 struct element_t *charelem;
656 struct element_t **weights = NULL;
657 int max = 0;
658
659 if (*cp == '\0')
660 {
661 lr_error (ldfile, _("%s: empty weight string not allowed"),
662 "LC_COLLATE");
663 lr_ignore_rest (ldfile, 0);
664 break;
665 }
666
667 do
668 {
669 if (*cp == '<')
670 {
671 /* Ahh, it's a bsymbol. That's what we want. */
672 const char *startp = ++cp;
673
674 while (*cp != '>')
675 {
676 if (*cp == ldfile->escape_char)
677 ++cp;
678 if (*cp == '\0')
679 /* It's a syntax error. */
680 goto syntax;
681
682 ++cp;
683 }
684
685 charelem = find_element (ldfile, collate, startp,
686 cp - startp, NULL);
687 ++cp;
688 }
689 else
690 {
691 /* People really shouldn't use characters directly in
692 the string. Especially since it's not really clear
693 what this means. We interpret all characters in the
694 string as if that would be bsymbols. Otherwise we
695 would have to match back to bsymbols somehow and this
696 is normally not what people normally expect. */
697 charelem = find_element (ldfile, collate, cp++, 1, NULL);
698 }
699
700 if (charelem == NULL)
701 {
702 /* We ignore the rest of the line. */
703 lr_ignore_rest (ldfile, 0);
704 break;
705 }
706
707 /* Add the pointer. */
708 if (cnt >= max)
709 {
710 struct element_t **newp;
711 max += 10;
712 newp = (struct element_t **)
713 alloca (max * sizeof (struct element_t *));
714 memcpy (newp, weights, cnt * sizeof (struct element_t *));
715 weights = newp;
716 }
717 weights[cnt++] = charelem;
718 }
719 while (*cp != '\0');
720
721 /* Now store the information. */
722 elem->weights[weight_cnt].w = (struct element_t **)
723 obstack_alloc (&collate->mempool,
724 cnt * sizeof (struct element_t *));
725 memcpy (elem->weights[weight_cnt].w, weights,
726 cnt * sizeof (struct element_t *));
727 elem->weights[weight_cnt].cnt = cnt;
728
729 /* We don't need the string anymore. */
730 free (arg->val.str.startmb);
731 }
732 else if (ellipsis != tok_none
733 && (arg->tok == tok_ellipsis2
734 || arg->tok == tok_ellipsis3
735 || arg->tok == tok_ellipsis4))
736 {
737 /* It must be the same ellipsis as used in the initial column. */
738 if (arg->tok != ellipsis)
739 lr_error (ldfile, _("\
740 %s: weights must use the same ellipsis symbol as the name"),
741 "LC_COLLATE");
742
743 /* The weight for this level has to be ignored. We use the
744 null pointer to indicate this. */
745 elem->weights[weight_cnt].w = (struct element_t **)
746 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
747 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
748 elem->weights[weight_cnt].cnt = 1;
749 }
750 else
751 {
752 syntax:
753 /* It's a syntax error. */
754 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
755 lr_ignore_rest (ldfile, 0);
756 break;
757 }
758
759 arg = lr_token (ldfile, charmap, repertoire);
760 /* This better should be the end of the line or a semicolon. */
761 if (arg->tok == tok_semicolon)
762 /* OK, ignore this and read the next token. */
763 arg = lr_token (ldfile, charmap, repertoire);
764 else if (arg->tok != tok_eof && arg->tok != tok_eol)
765 {
766 /* It's a syntax error. */
767 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
768 lr_ignore_rest (ldfile, 0);
769 break;
770 }
771 }
772 while (++weight_cnt < nrules);
773
774 if (weight_cnt < nrules)
775 {
776 /* This means the rest of the line uses the current element as
777 the weight. */
778 do
779 {
780 elem->weights[weight_cnt].w = (struct element_t **)
781 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
782 elem->weights[weight_cnt].w[0] = elem;
783 elem->weights[weight_cnt].cnt = 1;
784 }
785 while (++weight_cnt < nrules);
786 }
787 else
788 {
789 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
790 {
791 /* Too many rule values. */
792 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
793 lr_ignore_rest (ldfile, 0);
794 }
795 else
796 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
797 }
798 }
799
800
801 static int
802 insert_value (struct linereader *ldfile, struct token *arg,
803 struct charmap_t *charmap, struct repertoire_t *repertoire,
804 struct locale_collate_t *collate)
805 {
806 /* First find out what kind of symbol this is. */
807 struct charseq *seq;
808 uint32_t wc;
809 struct element_t *elem = NULL;
810
811 /* Try to find the character in the charmap. */
812 seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb);
813
814 /* Determine the wide character. */
815 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
816 {
817 wc = repertoire_find_value (repertoire, arg->val.str.startmb,
818 arg->val.str.lenmb);
819 if (seq != NULL)
820 seq->ucs4 = wc;
821 }
822 else
823 wc = seq->ucs4;
824
825 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
826 {
827 /* It's no character, so look through the collation elements and
828 symbol list. */
829 void *result;
830
831 if (find_entry (&collate->sym_table, arg->val.str.startmb,
832 arg->val.str.lenmb, &result) == 0)
833 {
834 /* It's a collation symbol. */
835 struct symbol_t *sym = (struct symbol_t *) result;
836 elem = sym->order;
837
838 if (elem == NULL)
839 elem = sym->order = new_element (collate, NULL, 0, NULL, NULL, 0);
840 }
841 else if (find_entry (&collate->elem_table, arg->val.str.startmb,
842 arg->val.str.lenmb, (void **) &elem) != 0)
843 {
844 /* It's also no collation element. Therefore ignore it. */
845 lr_ignore_rest (ldfile, 0);
846 return 1;
847 }
848 }
849 else
850 {
851 /* Otherwise the symbols stands for a character. */
852 if (find_entry (&collate->seq_table, arg->val.str.startmb,
853 arg->val.str.lenmb, (void **) &elem) != 0)
854 {
855 uint32_t wcs[2] = { wc, 0 };
856
857 /* We have to allocate an entry. */
858 elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
859 seq != NULL ? seq->nbytes : 0,
860 wcs, arg->val.str.startmb, arg->val.str.lenmb);
861
862 /* And add it to the table. */
863 if (insert_entry (&collate->seq_table, arg->val.str.startmb,
864 arg->val.str.lenmb, elem) != 0)
865 /* This cannot happen. */
866 assert (! "Internal error");
867 }
868 else
869 {
870 /* Maybe the character was used before the definition. In this case
871 we have to insert the byte sequences now. */
872 if (elem->mbs == NULL && seq != NULL)
873 {
874 elem->mbs = obstack_copy0 (&collate->mempool,
875 seq->bytes, seq->nbytes);
876 elem->nmbs = seq->nbytes;
877 }
878
879 if (elem->wcs == NULL && seq != ILLEGAL_CHAR_VALUE)
880 {
881 uint32_t wcs[2] = { wc, 0 };
882
883 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
884 elem->nwcs = 1;
885 }
886 }
887 }
888
889 /* Test whether this element is not already in the list. */
890 if (elem->next != NULL || (collate->cursor != NULL
891 && elem->next == collate->cursor))
892 {
893 lr_error (ldfile, _("order for `%.*s' already defined at %s:%zu"),
894 (int) arg->val.str.lenmb, arg->val.str.startmb,
895 elem->file, elem->line);
896 lr_ignore_rest (ldfile, 0);
897 return 1;
898 }
899
900 insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
901
902 return 0;
903 }
904
905
906 static void
907 handle_ellipsis (struct linereader *ldfile, struct token *arg,
908 enum token_t ellipsis, struct charmap_t *charmap,
909 struct repertoire_t *repertoire,
910 struct locale_collate_t *collate)
911 {
912 struct element_t *startp;
913 struct element_t *endp;
914
915 /* Unlink the entry added for the ellipsis. */
916 unlink_element (collate);
917 startp = collate->cursor;
918
919 /* Process and add the end-entry. */
920 if (arg != NULL
921 && insert_value (ldfile, arg, charmap, repertoire, collate))
922 /* Something went wrong with inserting the to-value. This means
923 we cannot process the ellipsis. */
924 return;
925
926 /* Reset the cursor. */
927 collate->cursor = startp;
928
929 /* Now we have to handle many different situations:
930 - we have to distinguish between the three different ellipsis forms
931 - the is the ellipsis at the beginning, in the middle, or at the end.
932 */
933 endp = collate->cursor->next;
934 assert (arg == NULL || endp != NULL);
935
936 /* Both, the start and the end symbol, must stand for characters. */
937 if ((startp == NULL || startp->name == NULL)
938 || (endp == NULL || endp->name == NULL))
939 {
940 lr_error (ldfile, _("\
941 %s: the start end the end symbol of a range must stand for characters"),
942 "LC_COLLATE");
943 return;
944 }
945
946 if (ellipsis == tok_ellipsis3)
947 {
948 /* One requirement we make here: the length of the byte
949 sequences for the first and end character must be the same.
950 This is mainly to prevent unwanted effects and this is often
951 not what is wanted. */
952 size_t len = (startp->mbs != NULL ? startp->nmbs
953 : (endp->mbs != NULL ? endp->nmbs : 0));
954 char mbcnt[len + 1];
955 char mbend[len + 1];
956
957 /* Well, this should be caught somewhere else already. Just to
958 make sure. */
959 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
960 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
961
962 if (startp != NULL && endp != NULL
963 && startp->mbs != NULL && endp->mbs != NULL
964 && startp->nmbs != endp->nmbs)
965 {
966 lr_error (ldfile, _("\
967 %s: byte sequences of first and last character must have the same length"),
968 "LC_COLLATE");
969 return;
970 }
971
972 /* Determine whether we have to generate multibyte sequences. */
973 if ((startp == NULL || startp->mbs != NULL)
974 && (endp == NULL || endp->mbs != NULL))
975 {
976 int cnt;
977 int ret;
978
979 /* Prepare the beginning byte sequence. This is either from the
980 beginning byte sequence or it is all nulls if it was an
981 initial ellipsis. */
982 if (startp == NULL || startp->mbs == NULL)
983 memset (mbcnt, '\0', len);
984 else
985 {
986 memcpy (mbcnt, startp->mbs, len);
987
988 /* And increment it so that the value is the first one we will
989 try to insert. */
990 for (cnt = len - 1; cnt >= 0; --cnt)
991 if (++mbcnt[cnt] != '\0')
992 break;
993 }
994 mbcnt[len] = '\0';
995
996 /* And the end sequence. */
997 if (endp == NULL || endp->mbs == NULL)
998 memset (mbend, '\0', len);
999 else
1000 memcpy (mbend, endp->mbs, len);
1001 mbend[len] = '\0';
1002
1003 /* Test whether we have a correct range. */
1004 ret = memcmp (mbcnt, mbend, len);
1005 if (ret >= 0)
1006 {
1007 if (ret > 0)
1008 lr_error (ldfile, _("%s: byte sequence of first character of \
1009 sequence is not lower than that of the last character"), "LC_COLLATE");
1010 return;
1011 }
1012
1013 /* Generate the byte sequences data. */
1014 while (1)
1015 {
1016 struct charseq *seq;
1017
1018 /* Quite a bit of work ahead. We have to find the character
1019 definition for the byte sequence and then determine the
1020 wide character belonging to it. */
1021 seq = charmap_find_symbol (charmap, mbcnt, len);
1022 if (seq != NULL)
1023 {
1024 struct element_t *elem;
1025 size_t namelen;
1026
1027 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1028 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1029 strlen (seq->name));
1030
1031 /* I don't this this can ever happen. */
1032 assert (seq->name != NULL);
1033 namelen = strlen (seq->name);
1034
1035 /* Now we are ready to insert the new value in the
1036 sequence. Find out whether the element is
1037 already known. */
1038 if (find_entry (&collate->seq_table, seq->name, namelen,
1039 (void **) &elem) != 0)
1040 {
1041 uint32_t wcs[2] = { seq->ucs4, 0 };
1042
1043 /* We have to allocate an entry. */
1044 elem = new_element (collate, mbcnt, len, wcs, seq->name,
1045 namelen);
1046
1047 /* And add it to the table. */
1048 if (insert_entry (&collate->seq_table, seq->name,
1049 namelen, elem) != 0)
1050 /* This cannot happen. */
1051 assert (! "Internal error");
1052 }
1053
1054 /* Test whether this element is not already in the list. */
1055 if (elem->next != NULL || (collate->cursor != NULL
1056 && elem->next == collate->cursor))
1057 {
1058 lr_error (ldfile, _("\
1059 order for `%.*s' already defined at %s:%zu"),
1060 (int) namelen, seq->name,
1061 elem->file, elem->line);
1062 goto increment;
1063 }
1064
1065 /* Enqueue the new element. */
1066 elem->last = collate->cursor;
1067 if (collate->cursor != NULL)
1068 elem->next = NULL;
1069 else
1070 {
1071 elem->next = collate->cursor->next;
1072 elem->last->next = elem;
1073 if (elem->next != NULL)
1074 elem->next->last = elem;
1075 }
1076 if (collate->start == NULL)
1077 {
1078 assert (collate->cursor == NULL);
1079 collate->start = elem;
1080 }
1081 collate->cursor = elem;
1082
1083 /* Add the weight value. We take them from the
1084 `ellipsis_weights' member of `collate'. */
1085 elem->weights = (struct element_list_t *)
1086 obstack_alloc (&collate->mempool,
1087 nrules * sizeof (struct element_list_t));
1088 for (cnt = 0; cnt < nrules; ++cnt)
1089 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1090 && (collate->ellipsis_weight.weights[cnt].w[0]
1091 == ELEMENT_ELLIPSIS2))
1092 {
1093 elem->weights[cnt].w = (struct element_t **)
1094 obstack_alloc (&collate->mempool,
1095 sizeof (struct element_t *));
1096 elem->weights[cnt].w[0] = elem;
1097 elem->weights[cnt].cnt = 1;
1098 }
1099 else
1100 {
1101 /* Simly use the weight from `ellipsis_weight'. */
1102 elem->weights[cnt].w =
1103 collate->ellipsis_weight.weights[cnt].w;
1104 elem->weights[cnt].cnt =
1105 collate->ellipsis_weight.weights[cnt].cnt;
1106 }
1107 }
1108
1109 /* Increment for the next round. */
1110 increment:
1111 for (cnt = len - 1; cnt >= 0; --cnt)
1112 if (++mbcnt[cnt] != '\0')
1113 break;
1114
1115 /* Find out whether this was all. */
1116 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1117 /* Yep, that's all. */
1118 break;
1119 }
1120 }
1121 }
1122 else
1123 {
1124 /* For symbolic range we naturally must have a beginning and an
1125 end specified by the user. */
1126 if (startp == NULL)
1127 lr_error (ldfile, _("\
1128 %s: symbolic range ellipsis must not directly follow `order_start'"),
1129 "LC_COLLATE");
1130 else if (endp == NULL)
1131 lr_error (ldfile, _("\
1132 %s: symbolic range ellipsis must not be direct followed by `order_end'"),
1133 "LC_COLLATE");
1134 else
1135 {
1136 /* Determine the range. To do so we have to determine the
1137 common prefix of the both names and then the numeric
1138 values of both ends. */
1139 size_t lenfrom = strlen (startp->name);
1140 size_t lento = strlen (endp->name);
1141 char buf[lento + 1];
1142 int preflen = 0;
1143 long int from;
1144 long int to;
1145 char *cp;
1146 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1147
1148 if (lenfrom != lento)
1149 {
1150 invalid_range:
1151 lr_error (ldfile, _("\
1152 `%s' and `%.*s' are no valid names for symbolic range"),
1153 startp->name, (int) lento, endp->name);
1154 return;
1155 }
1156
1157 while (startp->name[preflen] == endp->name[preflen])
1158 if (startp->name[preflen] == '\0')
1159 /* Nothing to be done. The start and end point are identical
1160 and while inserting the end point we have already given
1161 the user an error message. */
1162 return;
1163 else
1164 ++preflen;
1165
1166 errno = 0;
1167 from = strtol (startp->name + preflen, &cp, base);
1168 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1169 goto invalid_range;
1170
1171 errno = 0;
1172 to = strtol (endp->name + preflen, &cp, base);
1173 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1174 goto invalid_range;
1175
1176 /* Copy the prefix. */
1177 memcpy (buf, startp->name, preflen);
1178
1179 /* Loop over all values. */
1180 for (++from; from < to; ++from)
1181 {
1182 struct element_t *elem = NULL;
1183 struct charseq *seq;
1184 uint32_t wc;
1185 int cnt;
1186
1187 /* Generate the the name. */
1188 sprintf (buf + preflen, base == 10 ? "%d" : "%x", from);
1189
1190 /* Look whether this name is already defined. */
1191 if (find_entry (&collate->seq_table, arg->val.str.startmb,
1192 arg->val.str.lenmb, (void **) &elem) == 0)
1193 {
1194 if (elem->next != NULL || (collate->cursor != NULL
1195 && elem->next == collate->cursor))
1196 {
1197 lr_error (ldfile, _("\
1198 %s: order for `%.*s' already defined at %s:%zu"),
1199 "LC_COLLATE", (int) lenfrom, buf,
1200 elem->file, elem->line);
1201 continue;
1202 }
1203
1204 if (elem->name == NULL)
1205 {
1206 lr_error (ldfile, _("%s: `%s' must be a charater"),
1207 "LC_COLLATE", buf);
1208 continue;
1209 }
1210 }
1211
1212 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1213 {
1214 /* Search for a character of this name. */
1215 seq = charmap_find_value (charmap, buf, lenfrom);
1216 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1217 {
1218 wc = repertoire_find_value (repertoire, buf, lenfrom);
1219
1220 if (seq != NULL)
1221 seq->ucs4 = wc;
1222 }
1223 else
1224 wc = seq->ucs4;
1225
1226 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1227 /* We don't know anything about a character with this
1228 name. XXX Should we warn? */
1229 continue;
1230
1231 if (elem == NULL)
1232 {
1233 uint32_t wcs[2] = { wc, 0 };
1234
1235 /* We have to allocate an entry. */
1236 elem = new_element (collate,
1237 seq != NULL ? seq->bytes : NULL,
1238 seq != NULL ? seq->nbytes : 0,
1239 wc == ILLEGAL_CHAR_VALUE
1240 ? NULL : wcs,
1241 buf, lenfrom);
1242 }
1243 else
1244 {
1245 /* Update the element. */
1246 if (seq != NULL)
1247 {
1248 elem->mbs = obstack_copy0 (&collate->mempool,
1249 seq->bytes, seq->nbytes);
1250 elem->nmbs = seq->nbytes;
1251 }
1252
1253 if (wc != ILLEGAL_CHAR_VALUE)
1254 {
1255 uint32_t zero = 0;
1256
1257 obstack_grow (&collate->mempool,
1258 &wc, sizeof (uint32_t));
1259 obstack_grow (&collate->mempool,
1260 &zero, sizeof (uint32_t));
1261 elem->wcs = obstack_finish (&collate->mempool);
1262 elem->nwcs = 1;
1263 }
1264 }
1265
1266 elem->file = ldfile->fname;
1267 elem->line = ldfile->lineno;
1268 elem->section = collate->current_section;
1269 }
1270
1271 /* Enqueue the new element. */
1272 elem->last = collate->cursor;
1273 elem->next = collate->cursor->next;
1274 elem->last->next = elem;
1275 if (elem->next != NULL)
1276 elem->next->last = elem;
1277 collate->cursor = elem;
1278
1279 /* Now add the weights. They come from the `ellipsis_weights'
1280 member of `collate'. */
1281 elem->weights = (struct element_list_t *)
1282 obstack_alloc (&collate->mempool,
1283 nrules * sizeof (struct element_list_t));
1284 for (cnt = 0; cnt < nrules; ++cnt)
1285 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1286 && (collate->ellipsis_weight.weights[cnt].w[0]
1287 == ELEMENT_ELLIPSIS2))
1288 {
1289 elem->weights[cnt].w = (struct element_t **)
1290 obstack_alloc (&collate->mempool,
1291 sizeof (struct element_t *));
1292 elem->weights[cnt].w[0] = elem;
1293 elem->weights[cnt].cnt = 1;
1294 }
1295 else
1296 {
1297 /* Simly use the weight from `ellipsis_weight'. */
1298 elem->weights[cnt].w =
1299 collate->ellipsis_weight.weights[cnt].w;
1300 elem->weights[cnt].cnt =
1301 collate->ellipsis_weight.weights[cnt].cnt;
1302 }
1303 }
1304 }
1305 }
1306 }
1307
1308
1309 static void
1310 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1311 struct localedef_t *copy_locale, int ignore_content)
1312 {
1313 if (!ignore_content)
1314 {
1315 struct locale_collate_t *collate;
1316
1317 if (copy_locale == NULL)
1318 {
1319 collate = locale->categories[LC_COLLATE].collate =
1320 (struct locale_collate_t *)
1321 xcalloc (1, sizeof (struct locale_collate_t));
1322
1323 /* Init the various data structures. */
1324 init_hash (&collate->elem_table, 100);
1325 init_hash (&collate->sym_table, 100);
1326 init_hash (&collate->seq_table, 500);
1327 obstack_init (&collate->mempool);
1328
1329 collate->col_weight_max = -1;
1330 }
1331 else
1332 collate = locale->categories[LC_COLLATE].collate =
1333 copy_locale->categories[LC_COLLATE].collate;
1334 }
1335
1336 ldfile->translate_strings = 0;
1337 ldfile->return_widestr = 0;
1338 }
1339
1340
1341 void
1342 collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
1343 {
1344 /* Now is the time when we can assign the individual collation
1345 values for all the symbols. We have possibly different values
1346 for the wide- and the multibyte-character symbols. This is done
1347 since it might make a difference in the encoding if there is in
1348 some cases no multibyte-character but there are wide-characters.
1349 (The other way around it is not important since theencoded
1350 collation value in the wide-character case is 32 bits wide and
1351 therefore requires no encoding).
1352
1353 The lowest collation value assigned is 2. Zero is reserved for
1354 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1355 functions and 1 is used to separate the individual passes for the
1356 different rules.
1357
1358 We also have to construct is list with all the bytes/words which
1359 can come first in a sequence, followed by all the elements which
1360 also start with this byte/word. The order is reverse which has
1361 among others the important effect that longer strings are located
1362 first in the list. This is required for the output data since
1363 the algorithm used in `strcoll' etc depends on this.
1364
1365 The multibyte case is easy. We simply sort into an array with
1366 256 elements. */
1367 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1368 int mbact[nrules];
1369 int wcact;
1370 struct element_t *runp;
1371 int i;
1372 int need_undefined = 0;
1373 struct section_list *sect;
1374 int ruleidx;
1375
1376 /* If this assertion is hit change the type in `element_t'. */
1377 assert (nrules <= sizeof (runp->used_in_level) * 8);
1378
1379 /* Find out which elements are used at which level. At the same
1380 time we find out whether we have any undefined symbols. */
1381 runp = collate->start;
1382 while (runp != NULL)
1383 {
1384 if (runp->mbs != NULL)
1385 {
1386 for (i = 0; i < nrules; ++i)
1387 {
1388 int j;
1389
1390 for (j = 0; j < runp->weights[i].cnt; ++j)
1391 /* A NULL pointer as the weight means IGNORE. */
1392 if (runp->weights[i].w[j] != NULL)
1393 {
1394 if (runp->weights[i].w[j]->weights == NULL)
1395 {
1396 error_at_line (0, 0, runp->file, runp->line,
1397 _("symbol `%s' not defined"),
1398 runp->weights[i].w[j]->name);
1399
1400 need_undefined = 1;
1401 runp->weights[i].w[j] = &collate->undefined;
1402 }
1403 else
1404 /* Set the bit for the level. */
1405 runp->weights[i].w[j]->used_in_level |= 1 << i;
1406 }
1407 }
1408 }
1409
1410 /* Up to the next entry. */
1411 runp = runp->next;
1412 }
1413
1414 /* Walk through the list of defined sequences and assign weights. Also
1415 create the data structure which will allow generating the single byte
1416 character based tables.
1417
1418 Since at each time only the weights for each of the rules are
1419 only compared to other weights for this rule it is possible to
1420 assign more compact weight values than simply counting all
1421 weights in sequence. We can assign weights from 3, one for each
1422 rule individually and only for those elements, which are actually
1423 used for this rule.
1424
1425 Why is this important? It is not for the wide char table. But
1426 it is for the singlebyte output since here larger numbers have to
1427 be encoded to make it possible to emit the value as a byte
1428 string. */
1429 for (i = 0; i < nrules; ++i)
1430 mbact[i] = 3;
1431 wcact = 3;
1432 runp = collate->start;
1433 while (runp != NULL)
1434 {
1435 /* Determine the order. */
1436 if (runp->used_in_level != 0)
1437 {
1438 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1439 nrules * sizeof (int));
1440
1441 for (i = 0; i < nrules; ++i)
1442 if ((runp->used_in_level & (1 << i)) != 0)
1443 runp->mborder[i] = mbact[i]++;
1444 else
1445 runp->mborder[i] = 0;
1446 }
1447
1448 if (runp->mbs != NULL)
1449 {
1450 struct element_t **eptr;
1451
1452 /* Find the point where to insert in the list. */
1453 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1454 while (*eptr != NULL)
1455 {
1456 if ((*eptr)->nmbs < runp->nmbs)
1457 break;
1458
1459 if ((*eptr)->nmbs == runp->nmbs)
1460 {
1461 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1462
1463 if (c == 0)
1464 {
1465 /* This should not happen. It means that we have
1466 to symbols with the same byte sequence. It is
1467 of course an error. */
1468 error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
1469 _("symbol `%s' has same encoding as"),
1470 (*eptr)->name);
1471 error_at_line (0, 0, runp->file, runp->line,
1472 _("symbol `%s'"), runp->name);
1473 goto dont_insert;
1474 }
1475 else if (c < 0)
1476 /* Insert it here. */
1477 break;
1478 }
1479
1480 /* To the next entry. */
1481 eptr = &(*eptr)->mbnext;
1482 }
1483
1484 /* Set the pointers. */
1485 runp->mbnext = *eptr;
1486 *eptr = runp;
1487 dont_insert:
1488 }
1489
1490 if (runp->wcs != NULL)
1491 runp->wcorder = wcact++;
1492
1493 /* Up to the next entry. */
1494 runp = runp->next;
1495 }
1496
1497 /* Find out whether any of the `mbheads' entries is unset. In this
1498 case we use the UNDEFINED entry. */
1499 for (i = 1; i < 256; ++i)
1500 if (collate->mbheads[i] == NULL)
1501 {
1502 need_undefined = 1;
1503 collate->mbheads[i] = &collate->undefined;
1504 }
1505
1506 /* Now determine whether the UNDEFINED entry is needed and if yes,
1507 whether it was defined. */
1508 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1509 if (need_undefined && collate->undefined.file == NULL)
1510 {
1511 error (0, 0, _("no definition of `UNDEFINED'"));
1512
1513 /* Add UNDEFINED at the end. */
1514 collate->undefined.mborder =
1515 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1516
1517 for (i = 0; i < nrules; ++i)
1518 collate->undefined.mborder[i] = mbact[i]++;
1519
1520 collate->undefined.wcorder = wcact++;
1521 }
1522
1523 /* Finally, try to unify the rules for the sections. Whenever the rules
1524 for a section are the same as those for another section give the
1525 ruleset the same index. Since there are never many section we can
1526 use an O(n^2) algorithm here. */
1527 sect = collate->sections;
1528 assert (sect != NULL);
1529 ruleidx = 0;
1530 do
1531 {
1532 struct section_list *osect = collate->sections;
1533
1534 while (osect != sect)
1535 if (memcmp (osect->rules, sect->rules, nrules) == 0)
1536 break;
1537 else
1538 osect = osect->next;
1539
1540 if (osect == sect)
1541 sect->ruleidx = ruleidx++;
1542 else
1543 sect->ruleidx = osect->ruleidx;
1544
1545 /* Next section. */
1546 sect = sect->next;
1547 }
1548 while (sect != NULL);
1549 /* We are currently not prepared for more than 256 rulesets. But this
1550 should never really be a problem. */
1551 assert (ruleidx <= 256);
1552 }
1553
1554
1555 static inline int32_t
1556 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1557 struct element_t *elem)
1558 {
1559 size_t cnt;
1560 int32_t retval;
1561
1562 /* Optimize the use of UNDEFINED. */
1563 if (elem == &collate->undefined)
1564 /* The weights are already inserted. */
1565 return 0;
1566
1567 /* This byte can start exactly one collation element and this is
1568 a single byte. We can directly give the index to the weights. */
1569 retval = obstack_object_size (pool);
1570
1571 /* Construct the weight. */
1572 for (cnt = 0; cnt < nrules; ++cnt)
1573 {
1574 char buf[elem->weights[cnt].cnt * 7];
1575 int len = 0;
1576 int i;
1577
1578 /* Add the direction. */
1579 obstack_1grow (pool, elem->section->rules[cnt]);
1580
1581 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1582 /* Encode the weight value. */
1583 if (elem->weights[cnt].w[i] == NULL)
1584 {
1585 /* This entry was IGNORE. */
1586 buf[len++] = IGNORE_CHAR;
1587 }
1588 else
1589 len += utf8_encode (&buf[len],
1590 elem->weights[cnt].w[i]->mborder[cnt]);
1591
1592 /* And add the buffer content. */
1593 obstack_grow (pool, buf, len);
1594 }
1595
1596 return retval;
1597 }
1598
1599
1600 void
1601 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
1602 const char *output_path)
1603 {
1604 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1605 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1606 struct iovec iov[2 + nelems];
1607 struct locale_file data;
1608 uint32_t idx[nelems];
1609 size_t cnt;
1610 size_t ch;
1611 int32_t tablemb[256];
1612 struct obstack weightpool;
1613 struct obstack extrapool;
1614 struct section_list *sect;
1615 int i;
1616
1617 obstack_init (&weightpool);
1618 obstack_init (&extrapool);
1619
1620 data.magic = LIMAGIC (LC_COLLATE);
1621 data.n = nelems;
1622 iov[0].iov_base = (void *) &data;
1623 iov[0].iov_len = sizeof (data);
1624
1625 iov[1].iov_base = (void *) idx;
1626 iov[1].iov_len = sizeof (idx);
1627
1628 idx[0] = iov[0].iov_len + iov[1].iov_len;
1629 cnt = 0;
1630
1631 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
1632 iov[2 + cnt].iov_base = &collate->nrules;
1633 iov[2 + cnt].iov_len = sizeof (uint32_t);
1634 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1635 ++cnt;
1636
1637 /* Prepare the ruleset table. */
1638 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
1639 if (sect->ruleidx == i)
1640 {
1641 obstack_grow (&weightpool, sect->rules, nrules);
1642 ++i;
1643 }
1644 /* And align the output. */
1645 i = (nrules * i) % __alignof__ (int32_t);
1646 if (i > 0)
1647 do
1648 obstack_1grow (&weightpool, '\0');
1649 while (++i < __alignof__ (int32_t));
1650
1651 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
1652 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
1653 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
1654 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1655 ++cnt;
1656
1657 /* Generate the 8-bit table. Walk through the lists of sequences
1658 starting with the same byte and add them one after the other to
1659 the table. In case we have more than one sequence starting with
1660 the same byte we have to use extra indirection.
1661
1662 First add a record for the NUL byte. This entry will never be used
1663 so it does not matter. */
1664 tablemb[0] = 0;
1665
1666 /* Now insert the `UNDEFINED' value if it is used. Since this value
1667 will probably be used more than once it is good to store the
1668 weights only once. */
1669 if (collate->undefined.used_in_level != 0)
1670 output_weight (&weightpool, collate, &collate->undefined);
1671
1672 for (ch = 1; ch < 256; ++ch)
1673 if (collate->mbheads[ch]->mbnext == NULL
1674 && collate->mbheads[ch]->nmbs == 1)
1675 {
1676 tablemb[ch] = output_weight (&weightpool, collate,
1677 collate->mbheads[ch]);
1678 }
1679 else
1680 {
1681 /* The entries in the list are sorted by length and then
1682 alphabetically. This is the order in which we will add the
1683 elements to the collation table. This allows to simply
1684 walk the table in sequence and stop at the first matching
1685 entry. Since the longer sequences are coming first in the
1686 list they have the possibility to match first, just as it
1687 has to be. In the worst case we are walking to the end of
1688 the list where we put, if no singlebyte sequence is defined
1689 in the locale definition, the weights for UNDEFINED.
1690
1691 To reduce the length of the search list we compress them a bit.
1692 This happens by collecting sequences of consecutive byte
1693 sequences in one entry (having and begin and end byte sequence)
1694 and add only one index into the weight table. We can find the
1695 consecutive entries since they are also consecutive in the list. */
1696 struct element_t *runp = collate->mbheads[ch];
1697 struct element_t *lastp;
1698
1699 tablemb[ch] = -obstack_object_size (&extrapool);
1700
1701 do
1702 {
1703 /* Store the current index in the weight table. We know that
1704 the current position in the `extrapool' is aligned on a
1705 32-bit address. */
1706 int32_t weightidx;
1707 int added;
1708
1709 /* Output the weight info. */
1710 weightidx = output_weight (&weightpool, collate, runp);
1711
1712 /* Find out wether this is a single entry or we have more than
1713 one consecutive entry. */
1714 if (runp->mbnext != NULL
1715 && runp->nmbs == runp->mbnext->nmbs
1716 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
1717 && (runp->mbs[runp->nmbs - 1] + 1
1718 == runp->mbnext->mbs[runp->nmbs - 1]))
1719 {
1720 int i;
1721
1722 /* More than one consecutive entry. We mark this by having
1723 a negative index into the weight table. */
1724 weightidx = -weightidx;
1725
1726 /* Now add first the initial byte sequence. */
1727 added = ((sizeof (int32_t) + 1 + 1 + 2 * (runp->nmbs - 1)
1728 + __alignof__ (int32_t) - 1)
1729 & ~(__alignof__ (int32_t) - 1));
1730 obstack_make_room (&extrapool, added);
1731
1732 if (sizeof (int32_t) == sizeof (int))
1733 obstack_int_grow_fast (&extrapool, weightidx);
1734 else
1735 obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
1736 obstack_1grow_fast (&extrapool, runp->section->ruleidx);
1737 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
1738 for (i = 1; i < runp->nmbs; ++i)
1739 obstack_1grow_fast (&extrapool, runp->mbs[i]);
1740
1741 /* Now find the end of the consecutive sequence. */
1742 do
1743 runp = runp->next;
1744 while (runp->mbnext != NULL
1745 && runp->nmbs == runp->mbnext->nmbs
1746 && memcmp (runp->mbs, runp->mbnext->mbs,
1747 runp->nmbs - 1) == 0
1748 && (runp->mbs[runp->nmbs - 1] + 1
1749 == runp->mbnext->mbs[runp->nmbs - 1]));
1750
1751 /* And add the end by sequence. Without length this time. */
1752 for (i = 1; i < runp->nmbs; ++i)
1753 obstack_1grow_fast (&extrapool, runp->mbs[i]);
1754 }
1755 else
1756 {
1757 /* A single entry. Simply add the index and the length and
1758 string (except for the first character which is already
1759 tested for). */
1760 int i;
1761
1762 added = ((sizeof (int32_t) + 1 + 1 + runp->nmbs - 1
1763 + __alignof__ (int32_t) - 1)
1764 & ~(__alignof__ (int32_t) - 1));
1765 obstack_make_room (&extrapool, added);
1766
1767 if (sizeof (int32_t) == sizeof (int))
1768 obstack_int_grow_fast (&extrapool, weightidx);
1769 else
1770 obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
1771 obstack_1grow_fast (&extrapool, runp->section->ruleidx);
1772 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
1773 for (i = 1; i < runp->nmbs; ++i)
1774 obstack_1grow_fast (&extrapool, runp->mbs[i]);
1775 }
1776
1777 /* Add alignment bytes if necessary. */
1778 i = added % __alignof__ (int32_t);
1779 if (i > 0)
1780 do
1781 obstack_1grow_fast (&extrapool, '\0');
1782 while (++i != __alignof__ (int32_t));
1783
1784 /* Next entry. */
1785 lastp = runp;
1786 runp = runp->mbnext;
1787 }
1788 while (runp != NULL);
1789
1790 /* If the final entry in the list is not a single character we
1791 add an UNDEFINED entry here. */
1792 if (lastp->nmbs != 1)
1793 {
1794 int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t))
1795 & ~(__alignof__ (int32_t) - 1));
1796 obstack_make_room (&extrapool, added);
1797
1798 if (sizeof (int32_t) == sizeof (int))
1799 obstack_int_grow_fast (&extrapool, 0);
1800 else
1801 {
1802 int32_t zero = 0;
1803 obstack_grow (&extrapool, &zero, sizeof (int32_t));
1804 }
1805 /* XXX What rule? We just pick the first. */
1806 obstack_1grow_fast (&extrapool, 0);
1807 /* Length is zero. */
1808 obstack_1grow_fast (&extrapool, 0);
1809
1810 /* Add alignment bytes if necessary. */
1811 i = added % __alignof__ (int32_t);
1812 if (i > 0)
1813 do
1814 obstack_1grow_fast (&extrapool, '\0');
1815 while (++i != __alignof__ (int32_t));
1816 }
1817 }
1818
1819 /* Now add the three tables. */
1820 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
1821 iov[2 + cnt].iov_base = tablemb;
1822 iov[2 + cnt].iov_len = sizeof (tablemb);
1823 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1824 ++cnt;
1825
1826 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
1827 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
1828 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
1829 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1830 ++cnt;
1831
1832 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
1833 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
1834 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
1835 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1836 ++cnt;
1837
1838
1839 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
1840
1841 write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
1842
1843 obstack_free (&weightpool, NULL);
1844 obstack_free (&extrapool, NULL);
1845 }
1846
1847
1848 void
1849 collate_read (struct linereader *ldfile, struct localedef_t *result,
1850 struct charmap_t *charmap, const char *repertoire_name,
1851 int ignore_content)
1852 {
1853 struct repertoire_t *repertoire = NULL;
1854 struct locale_collate_t *collate;
1855 struct token *now;
1856 struct token *arg = NULL;
1857 enum token_t nowtok;
1858 int state = 0;
1859 enum token_t was_ellipsis = tok_none;
1860 struct localedef_t *copy_locale = NULL;
1861
1862 /* Get the repertoire we have to use. */
1863 if (repertoire_name != NULL)
1864 repertoire = repertoire_read (repertoire_name);
1865
1866 /* The rest of the line containing `LC_COLLATE' must be free. */
1867 lr_ignore_rest (ldfile, 1);
1868
1869 do
1870 {
1871 now = lr_token (ldfile, charmap, NULL);
1872 nowtok = now->tok;
1873 }
1874 while (nowtok == tok_eol);
1875
1876 if (nowtok == tok_copy)
1877 {
1878 state = 2;
1879 now = lr_token (ldfile, charmap, NULL);
1880 if (now->tok != tok_string)
1881 {
1882 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1883
1884 skip_category:
1885 do
1886 now = lr_token (ldfile, charmap, NULL);
1887 while (now->tok != tok_eof && now->tok != tok_end);
1888
1889 if (now->tok != tok_eof
1890 || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof))
1891 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
1892 else if (now->tok != tok_lc_collate)
1893 {
1894 lr_error (ldfile, _("\
1895 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1896 lr_ignore_rest (ldfile, 0);
1897 }
1898 else
1899 lr_ignore_rest (ldfile, 1);
1900
1901 return;
1902 }
1903
1904 /* Get the locale definition. */
1905 copy_locale = find_locale (LC_COLLATE, now->val.str.startmb,
1906 repertoire_name, charmap);
1907 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
1908 {
1909 /* Not yet loaded. So do it now. */
1910 if (locfile_read (copy_locale, charmap) != 0)
1911 goto skip_category;
1912 }
1913
1914 lr_ignore_rest (ldfile, 1);
1915
1916 now = lr_token (ldfile, charmap, NULL);
1917 nowtok = now->tok;
1918 }
1919
1920 /* Prepare the data structures. */
1921 collate_startup (ldfile, result, copy_locale, ignore_content);
1922 collate = result->categories[LC_COLLATE].collate;
1923
1924 while (1)
1925 {
1926 /* Of course we don't proceed beyond the end of file. */
1927 if (nowtok == tok_eof)
1928 break;
1929
1930 /* Ingore empty lines. */
1931 if (nowtok == tok_eol)
1932 {
1933 now = lr_token (ldfile, charmap, NULL);
1934 nowtok = now->tok;
1935 continue;
1936 }
1937
1938 switch (nowtok)
1939 {
1940 case tok_coll_weight_max:
1941 /* Ignore the rest of the line if we don't need the input of
1942 this line. */
1943 if (ignore_content)
1944 {
1945 lr_ignore_rest (ldfile, 0);
1946 break;
1947 }
1948
1949 if (state != 0)
1950 goto err_label;
1951
1952 arg = lr_token (ldfile, charmap, NULL);
1953 if (arg->tok != tok_number)
1954 goto err_label;
1955 if (collate->col_weight_max != -1)
1956 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
1957 "LC_COLLATE", "col_weight_max");
1958 else
1959 collate->col_weight_max = arg->val.num;
1960 lr_ignore_rest (ldfile, 1);
1961 break;
1962
1963 case tok_section_symbol:
1964 /* Ignore the rest of the line if we don't need the input of
1965 this line. */
1966 if (ignore_content)
1967 {
1968 lr_ignore_rest (ldfile, 0);
1969 break;
1970 }
1971
1972 if (state != 0)
1973 goto err_label;
1974
1975 arg = lr_token (ldfile, charmap, repertoire);
1976 if (arg->tok != tok_bsymbol)
1977 goto err_label;
1978 else if (!ignore_content)
1979 {
1980 /* Check whether this section is already known. */
1981 struct section_list *known = collate->sections;
1982 while (known != NULL)
1983 if (strcmp (known->name, arg->val.str.startmb) == 0)
1984 break;
1985
1986 if (known != NULL)
1987 {
1988 lr_error (ldfile,
1989 _("%s: duplicate declaration of section `%s'"),
1990 "LC_COLLATE", arg->val.str.startmb);
1991 free (arg->val.str.startmb);
1992 }
1993 else
1994 collate->sections = make_seclist_elem (collate,
1995 arg->val.str.startmb,
1996 collate->sections);
1997
1998 lr_ignore_rest (ldfile, known == NULL);
1999 }
2000 else
2001 {
2002 free (arg->val.str.startmb);
2003 lr_ignore_rest (ldfile, 0);
2004 }
2005 break;
2006
2007 case tok_collating_element:
2008 /* Ignore the rest of the line if we don't need the input of
2009 this line. */
2010 if (ignore_content)
2011 {
2012 lr_ignore_rest (ldfile, 0);
2013 break;
2014 }
2015
2016 if (state != 0)
2017 goto err_label;
2018
2019 arg = lr_token (ldfile, charmap, repertoire);
2020 if (arg->tok != tok_bsymbol)
2021 goto err_label;
2022 else
2023 {
2024 const char *symbol = arg->val.str.startmb;
2025 size_t symbol_len = arg->val.str.lenmb;
2026
2027 /* Next the `from' keyword. */
2028 arg = lr_token (ldfile, charmap, repertoire);
2029 if (arg->tok != tok_from)
2030 {
2031 free ((char *) symbol);
2032 goto err_label;
2033 }
2034
2035 ldfile->return_widestr = 1;
2036
2037 /* Finally the string with the replacement. */
2038 arg = lr_token (ldfile, charmap, repertoire);
2039 ldfile->return_widestr = 0;
2040 if (arg->tok != tok_string)
2041 goto err_label;
2042
2043 if (!ignore_content)
2044 {
2045 if (symbol == NULL)
2046 lr_error (ldfile, _("\
2047 %s: unknown character in collating element name"),
2048 "LC_COLLATE");
2049 if (arg->val.str.startmb == NULL)
2050 lr_error (ldfile, _("\
2051 %s: unknown character in collating element definition"),
2052 "LC_COLLATE");
2053 if (arg->val.str.startwc == NULL)
2054 lr_error (ldfile, _("\
2055 %s: unknown wide character in collating element definition"),
2056 "LC_COLLATE");
2057 else if (arg->val.str.lenwc < 2)
2058 lr_error (ldfile, _("\
2059 %s: substitution string in collating element definition must have at least two characters"),
2060 "LC_COLLATE");
2061
2062 if (symbol != NULL)
2063 {
2064 /* The name is already defined. */
2065 if (check_duplicate (ldfile, collate, charmap,
2066 repertoire, symbol, symbol_len))
2067 goto col_elem_free;
2068
2069 if (insert_entry (&collate->elem_table,
2070 symbol, symbol_len,
2071 new_element (collate,
2072 NULL, 0, NULL, symbol,
2073 symbol_len)) < 0)
2074 lr_error (ldfile, _("\
2075 error while adding collating element"));
2076 }
2077 else
2078 goto col_elem_free;
2079 }
2080 else
2081 {
2082 col_elem_free:
2083 if (symbol != NULL)
2084 free ((char *) symbol);
2085 if (arg->val.str.startmb != NULL)
2086 free (arg->val.str.startmb);
2087 if (arg->val.str.startwc != NULL)
2088 free (arg->val.str.startwc);
2089 }
2090 lr_ignore_rest (ldfile, 1);
2091 }
2092 break;
2093
2094 case tok_collating_symbol:
2095 /* Ignore the rest of the line if we don't need the input of
2096 this line. */
2097 if (ignore_content)
2098 {
2099 lr_ignore_rest (ldfile, 0);
2100 break;
2101 }
2102
2103 if (state != 0)
2104 goto err_label;
2105
2106 arg = lr_token (ldfile, charmap, repertoire);
2107 if (arg->tok != tok_bsymbol)
2108 goto err_label;
2109 else
2110 {
2111 const char *symbol = arg->val.str.startmb;
2112 size_t symbol_len = arg->val.str.lenmb;
2113
2114 if (!ignore_content)
2115 {
2116 if (symbol == NULL)
2117 lr_error (ldfile, _("\
2118 %s: unknown character in collating symbol name"),
2119 "LC_COLLATE");
2120 else
2121 {
2122 /* The name is already defined. */
2123 if (check_duplicate (ldfile, collate, charmap,
2124 repertoire, symbol, symbol_len))
2125 goto col_sym_free;
2126
2127 if (insert_entry (&collate->sym_table,
2128 symbol, symbol_len,
2129 new_symbol (collate)) < 0)
2130 lr_error (ldfile, _("\
2131 error while adding collating symbol"));
2132 }
2133 }
2134 else
2135 {
2136 col_sym_free:
2137 if (symbol != NULL)
2138 free ((char *) symbol);
2139 }
2140 lr_ignore_rest (ldfile, 1);
2141 }
2142 break;
2143
2144 case tok_symbol_equivalence:
2145 /* Ignore the rest of the line if we don't need the input of
2146 this line. */
2147 if (ignore_content)
2148 {
2149 lr_ignore_rest (ldfile, 0);
2150 break;
2151 }
2152
2153 if (state != 0)
2154 goto err_label;
2155
2156 arg = lr_token (ldfile, charmap, repertoire);
2157 if (arg->tok != tok_bsymbol)
2158 goto err_label;
2159 else
2160 {
2161 const char *newname = arg->val.str.startmb;
2162 size_t newname_len = arg->val.str.lenmb;
2163 const char *symname;
2164 size_t symname_len;
2165 struct symbol_t *symval;
2166
2167 arg = lr_token (ldfile, charmap, repertoire);
2168 if (arg->tok != tok_bsymbol)
2169 {
2170 if (newname != NULL)
2171 free ((char *) newname);
2172 goto err_label;
2173 }
2174
2175 symname = arg->val.str.startmb;
2176 symname_len = arg->val.str.lenmb;
2177
2178 if (!ignore_content)
2179 {
2180 if (newname == NULL)
2181 {
2182 lr_error (ldfile, _("\
2183 %s: unknown character in equivalent definition name"),
2184 "LC_COLLATE");
2185 goto sym_equiv_free;
2186 }
2187 if (symname == NULL)
2188 {
2189 lr_error (ldfile, _("\
2190 %s: unknown character in equivalent definition value"),
2191 "LC_COLLATE");
2192 goto sym_equiv_free;
2193 }
2194 /* The name is already defined. */
2195 if (check_duplicate (ldfile, collate, charmap,
2196 repertoire, symname, symname_len))
2197 goto col_sym_free;
2198
2199 /* See whether the symbol name is already defined. */
2200 if (find_entry (&collate->sym_table, symname, symname_len,
2201 (void **) &symval) != 0)
2202 {
2203 lr_error (ldfile, _("\
2204 %s: unknown symbol `%s' in equivalent definition"),
2205 "LC_COLLATE", symname);
2206 goto col_sym_free;
2207 }
2208
2209 if (insert_entry (&collate->sym_table,
2210 newname, newname_len, symval) < 0)
2211 {
2212 lr_error (ldfile, _("\
2213 error while adding equivalent collating symbol"));
2214 goto sym_equiv_free;
2215 }
2216
2217 free ((char *) symname);
2218 }
2219 else
2220 {
2221 sym_equiv_free:
2222 if (newname != NULL)
2223 free ((char *) newname);
2224 if (symname != NULL)
2225 free ((char *) symname);
2226 }
2227 lr_ignore_rest (ldfile, 1);
2228 }
2229 break;
2230
2231 case tok_order_start:
2232 /* Ignore the rest of the line if we don't need the input of
2233 this line. */
2234 if (ignore_content)
2235 {
2236 lr_ignore_rest (ldfile, 0);
2237 break;
2238 }
2239
2240 if (state != 0 && state != 1)
2241 goto err_label;
2242 state = 1;
2243
2244 /* The 14652 draft does not specify whether all `order_start' lines
2245 must contain the same number of sort-rules, but 14651 does. So
2246 we require this here as well. */
2247 arg = lr_token (ldfile, charmap, repertoire);
2248 if (arg->tok == tok_bsymbol)
2249 {
2250 /* This better should be a section name. */
2251 struct section_list *sp = collate->sections;
2252 while (sp != NULL
2253 && strcmp (sp->name, arg->val.str.startmb) != 0)
2254 sp = sp->next;
2255
2256 if (sp == NULL)
2257 {
2258 lr_error (ldfile, _("\
2259 %s: unknown section name `%s'"),
2260 "LC_COLLATE", arg->val.str.startmb);
2261 /* We use the error section. */
2262 collate->current_section = &collate->error_section;
2263
2264 if (collate->error_section.first == NULL)
2265 {
2266 collate->error_section.next = collate->sections;
2267 collate->sections = &collate->error_section;
2268 }
2269 }
2270 else
2271 {
2272 /* Remember this section. */
2273 collate->current_section = sp;
2274
2275 /* One should not be allowed to open the same
2276 section twice. */
2277 if (sp->first != NULL)
2278 lr_error (ldfile, _("\
2279 %s: multiple order definitions for section `%s'"),
2280 "LC_COLLATE", sp->name);
2281 else
2282 {
2283 sp->next = collate->sections;
2284 collate->sections = sp;
2285 }
2286
2287 /* Next should come the end of the line or a semicolon. */
2288 arg = lr_token (ldfile, charmap, repertoire);
2289 if (arg->tok == tok_eol)
2290 {
2291 uint32_t cnt;
2292
2293 /* This means we have exactly one rule: `forward'. */
2294 if (collate->nrules > 1)
2295 lr_error (ldfile, _("\
2296 %s: invalid number of sorting rules"),
2297 "LC_COLLATE");
2298 else
2299 collate->nrules = 1;
2300 sp->rules = obstack_alloc (&collate->mempool,
2301 (sizeof (enum coll_sort_rule)
2302 * collate->nrules));
2303 for (cnt = 0; cnt < collate->nrules; ++cnt)
2304 sp->rules[cnt] = sort_forward;
2305
2306 /* Next line. */
2307 break;
2308 }
2309
2310 /* Get the next token. */
2311 arg = lr_token (ldfile, charmap, repertoire);
2312 }
2313 }
2314 else
2315 {
2316 /* There is no section symbol. Therefore we use the unnamed
2317 section. */
2318 collate->current_section = &collate->unnamed_section;
2319
2320 if (collate->unnamed_section.first != NULL)
2321 lr_error (ldfile, _("\
2322 %s: multiple order definitions for unnamed section"),
2323 "LC_COLLATE");
2324 else
2325 {
2326 collate->unnamed_section.next = collate->sections;
2327 collate->sections = &collate->unnamed_section;
2328 }
2329 }
2330
2331 /* Now read the direction names. */
2332 read_directions (ldfile, arg, charmap, repertoire, collate);
2333
2334 /* From now be need the strings untranslated. */
2335 ldfile->translate_strings = 0;
2336 break;
2337
2338 case tok_order_end:
2339 /* Ignore the rest of the line if we don't need the input of
2340 this line. */
2341 if (ignore_content)
2342 {
2343 lr_ignore_rest (ldfile, 0);
2344 break;
2345 }
2346
2347 if (state != 1)
2348 goto err_label;
2349
2350 /* Handle ellipsis at end of list. */
2351 if (was_ellipsis != tok_none)
2352 {
2353 handle_ellipsis (ldfile, NULL, was_ellipsis, charmap, repertoire,
2354 collate);
2355 was_ellipsis = tok_none;
2356 }
2357
2358 state = 2;
2359 lr_ignore_rest (ldfile, 1);
2360 break;
2361
2362 case tok_reorder_after:
2363 /* Ignore the rest of the line if we don't need the input of
2364 this line. */
2365 if (ignore_content)
2366 {
2367 lr_ignore_rest (ldfile, 0);
2368 break;
2369 }
2370
2371 if (state == 1)
2372 {
2373 lr_error (ldfile, _("%s: missing `order_end' keyword"),
2374 "LC_COLLATE");
2375 state = 2;
2376
2377 /* Handle ellipsis at end of list. */
2378 if (was_ellipsis != tok_none)
2379 {
2380 handle_ellipsis (ldfile, arg, was_ellipsis, charmap,
2381 repertoire, collate);
2382 was_ellipsis = tok_none;
2383 }
2384 }
2385 else if (state != 2 && state != 3)
2386 goto err_label;
2387 state = 3;
2388
2389 arg = lr_token (ldfile, charmap, repertoire);
2390 if (arg->tok == tok_bsymbol)
2391 {
2392 /* Find this symbol in the sequence table. */
2393 struct element_t *insp;
2394 int no_error = 1;
2395
2396 if (find_entry (&collate->seq_table, arg->val.str.startmb,
2397 arg->val.str.lenmb, (void **) &insp) == 0)
2398 /* Yes, the symbol exists. Simply point the cursor
2399 to it. */
2400 collate->cursor = insp;
2401 else
2402 {
2403 /* This is bad. The symbol after which we have to
2404 insert does not exist. */
2405 lr_error (ldfile, _("\
2406 %s: cannot reorder after %.*s: symbol not known"),
2407 "LC_COLLATE", (int) arg->val.str.lenmb,
2408 arg->val.str.startmb);
2409 collate->cursor = NULL;
2410 no_error = 0;
2411 }
2412
2413 lr_ignore_rest (ldfile, no_error);
2414 }
2415 else
2416 /* This must not happen. */
2417 goto err_label;
2418 break;
2419
2420 case tok_reorder_end:
2421 /* Ignore the rest of the line if we don't need the input of
2422 this line. */
2423 if (ignore_content)
2424 break;
2425
2426 if (state != 3)
2427 goto err_label;
2428 state = 4;
2429 lr_ignore_rest (ldfile, 1);
2430 break;
2431
2432 case tok_reorder_sections_after:
2433 /* Ignore the rest of the line if we don't need the input of
2434 this line. */
2435 if (ignore_content)
2436 {
2437 lr_ignore_rest (ldfile, 0);
2438 break;
2439 }
2440
2441 if (state == 1)
2442 {
2443 lr_error (ldfile, _("%s: missing `order_end' keyword"),
2444 "LC_COLLATE");
2445 state = 2;
2446
2447 /* Handle ellipsis at end of list. */
2448 if (was_ellipsis != tok_none)
2449 {
2450 handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
2451 repertoire, collate);
2452 was_ellipsis = tok_none;
2453 }
2454 }
2455 else if (state == 3)
2456 {
2457 error (0, 0, _("%s: missing `reorder-end' keyword"),
2458 "LC_COLLATE");
2459 state = 4;
2460 }
2461 else if (state != 2 && state != 4)
2462 goto err_label;
2463 state = 5;
2464
2465 /* Get the name of the sections we are adding after. */
2466 arg = lr_token (ldfile, charmap, repertoire);
2467 if (arg->tok == tok_bsymbol)
2468 {
2469 /* Now find a section with this name. */
2470 struct section_list *runp = collate->sections;
2471
2472 while (runp != NULL)
2473 {
2474 if (runp->name != NULL
2475 && strlen (runp->name) == arg->val.str.lenmb
2476 && memcmp (runp->name, arg->val.str.startmb,
2477 arg->val.str.lenmb) == 0)
2478 break;
2479
2480 runp = runp->next;
2481 }
2482
2483 if (runp != NULL)
2484 collate->current_section = runp;
2485 else
2486 {
2487 /* This is bad. The section after which we have to
2488 reorder does not exist. Therefore we cannot
2489 process the whole rest of this reorder
2490 specification. */
2491 lr_error (ldfile, _("%s: section `%.*s' not known"),
2492 "LC_COLLATE", (int) arg->val.str.lenmb,
2493 arg->val.str.startmb);
2494
2495 do
2496 {
2497 lr_ignore_rest (ldfile, 0);
2498
2499 now = lr_token (ldfile, charmap, NULL);
2500 }
2501 while (now->tok == tok_reorder_sections_after
2502 || now->tok == tok_reorder_sections_end
2503 || now->tok == tok_end);
2504
2505 /* Process the token we just saw. */
2506 nowtok = now->tok;
2507 continue;
2508 }
2509 }
2510 else
2511 /* This must not happen. */
2512 goto err_label;
2513 break;
2514
2515 case tok_reorder_sections_end:
2516 /* Ignore the rest of the line if we don't need the input of
2517 this line. */
2518 if (ignore_content)
2519 break;
2520
2521 if (state != 5)
2522 goto err_label;
2523 state = 6;
2524 lr_ignore_rest (ldfile, 1);
2525 break;
2526
2527 case tok_bsymbol:
2528 /* Ignore the rest of the line if we don't need the input of
2529 this line. */
2530 if (ignore_content)
2531 {
2532 lr_ignore_rest (ldfile, 0);
2533 break;
2534 }
2535
2536 if (state != 1 && state != 3)
2537 goto err_label;
2538
2539 if (state == 3)
2540 {
2541 /* It is possible that we already have this collation sequence.
2542 In this case we move the entry. */
2543 struct element_t *seqp;
2544
2545 /* If the symbol after which we have to insert was not found
2546 ignore all entries. */
2547 if (collate->cursor == NULL)
2548 {
2549 lr_ignore_rest (ldfile, 0);
2550 break;
2551 }
2552
2553 if (find_entry (&collate->seq_table, arg->val.str.startmb,
2554 arg->val.str.lenmb, (void **) &seqp) == 0)
2555 {
2556 /* Remove the entry from the old position. */
2557 if (seqp->last == NULL)
2558 collate->start = seqp->next;
2559 else
2560 seqp->last->next = seqp->next;
2561 if (seqp->next != NULL)
2562 seqp->next->last = seqp->last;
2563
2564 /* We also have to check whether this entry is the
2565 first or last of a section. */
2566 if (seqp->section->first == seqp)
2567 {
2568 if (seqp->section->first == seqp->section->last)
2569 /* This setion has no content anymore. */
2570 seqp->section->first = seqp->section->last = NULL;
2571 else
2572 seqp->section->first = seqp->next;
2573 }
2574 else if (seqp->section->last == seqp)
2575 seqp->section->last = seqp->last;
2576
2577 /* Now insert it in the new place. */
2578 seqp->next = collate->cursor->next;
2579 seqp->last = collate->cursor;
2580 collate->cursor->next = seqp;
2581 if (seqp->next != NULL)
2582 seqp->next->last = seqp;
2583
2584 seqp->section = collate->cursor->section;
2585 if (seqp->section->last == collate->cursor)
2586 seqp->section->last = seqp;
2587
2588 break;
2589 }
2590
2591 /* Otherwise we just add a new entry. */
2592 }
2593 else if (state == 5)
2594 {
2595 /* We are reordering sections. Find the named section. */
2596 struct section_list *runp = collate->sections;
2597 struct section_list *prevp = NULL;
2598
2599 while (runp != NULL)
2600 {
2601 if (runp->name != NULL
2602 && strlen (runp->name) == arg->val.str.lenmb
2603 && memcmp (runp->name, arg->val.str.startmb,
2604 arg->val.str.lenmb) == 0)
2605 break;
2606
2607 prevp = runp;
2608 runp = runp->next;
2609 }
2610
2611 if (runp == NULL)
2612 {
2613 lr_error (ldfile, _("%s: section `%.*s' not known"),
2614 "LC_COLLATE", (int) arg->val.str.lenmb,
2615 arg->val.str.startmb);
2616 lr_ignore_rest (ldfile, 0);
2617 }
2618 else
2619 {
2620 if (runp != collate->current_section)
2621 {
2622 /* Remove the named section from the old place and
2623 insert it in the new one. */
2624 prevp->next = runp->next;
2625
2626 runp->next = collate->current_section->next;
2627 collate->current_section->next = runp;
2628 collate->current_section = runp;
2629 }
2630
2631 /* Process the rest of the line which might change
2632 the collation rules. */
2633 arg = lr_token (ldfile, charmap, repertoire);
2634 if (arg->tok != tok_eof && arg->tok != tok_eol)
2635 read_directions (ldfile, arg, charmap, repertoire,
2636 collate);
2637 }
2638 break;
2639 }
2640 else if (was_ellipsis != tok_none)
2641 {
2642 /* Using the information in the `ellipsis_weight'
2643 element and this and the last value we have to handle
2644 the ellipsis now. */
2645 assert (state == 1);
2646
2647 handle_ellipsis (ldfile, arg, was_ellipsis, charmap, repertoire,
2648 collate);
2649
2650 /* Remember that we processed the ellipsis. */
2651 was_ellipsis = tok_none;
2652
2653 /* And don't add the value a second time. */
2654 break;
2655 }
2656
2657 /* Now insert in the new place. */
2658 insert_value (ldfile, arg, charmap, repertoire, collate);
2659 break;
2660
2661 case tok_undefined:
2662 /* Ignore the rest of the line if we don't need the input of
2663 this line. */
2664 if (ignore_content)
2665 {
2666 lr_ignore_rest (ldfile, 0);
2667 break;
2668 }
2669
2670 if (state != 1)
2671 goto err_label;
2672
2673 if (was_ellipsis != tok_none)
2674 {
2675 lr_error (ldfile,
2676 _("%s: cannot have `%s' as end of ellipsis range"),
2677 "LC_COLLATE", "UNDEFINED");
2678
2679 unlink_element (collate);
2680 was_ellipsis = tok_none;
2681 }
2682
2683 /* See whether UNDEFINED already appeared somewhere. */
2684 if (collate->undefined.next != NULL
2685 || (collate->cursor != NULL
2686 && collate->undefined.next == collate->cursor))
2687 {
2688 lr_error (ldfile,
2689 _("%s: order for `%.*s' already defined at %s:%zu"),
2690 "LC_COLLATE", 9, "UNDEFINED", collate->undefined.file,
2691 collate->undefined.line);
2692 lr_ignore_rest (ldfile, 0);
2693 }
2694 else
2695 /* Parse the weights. */
2696 insert_weights (ldfile, &collate->undefined, charmap,
2697 repertoire, collate, tok_none);
2698 break;
2699
2700 case tok_ellipsis2:
2701 case tok_ellipsis3:
2702 case tok_ellipsis4:
2703 /* This is the symbolic (decimal or hexadecimal) or absolute
2704 ellipsis. */
2705 if (was_ellipsis != tok_none)
2706 goto err_label;
2707
2708 if (state != 1 && state != 3)
2709 goto err_label;
2710
2711 was_ellipsis = nowtok;
2712
2713 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
2714 repertoire, collate, nowtok);
2715 break;
2716
2717 case tok_end:
2718 /* Next we assume `LC_COLLATE'. */
2719 if (!ignore_content)
2720 {
2721 if (state == 0)
2722 /* We must either see a copy statement or have
2723 ordering values. */
2724 lr_error (ldfile,
2725 _("%s: empty category description not allowed"),
2726 "LC_COLLATE");
2727 else if (state == 1)
2728 {
2729 lr_error (ldfile, _("%s: missing `order_end' keyword"),
2730 "LC_COLLATE");
2731
2732 /* Handle ellipsis at end of list. */
2733 if (was_ellipsis != tok_none)
2734 {
2735 handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
2736 repertoire, collate);
2737 was_ellipsis = tok_none;
2738 }
2739 }
2740 else if (state == 3)
2741 error (0, 0, _("%s: missing `reorder-end' keyword"),
2742 "LC_COLLATE");
2743 else if (state == 5)
2744 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
2745 "LC_COLLATE");
2746 }
2747 arg = lr_token (ldfile, charmap, NULL);
2748 if (arg->tok == tok_eof)
2749 break;
2750 if (arg->tok == tok_eol)
2751 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
2752 else if (arg->tok != tok_lc_collate)
2753 lr_error (ldfile, _("\
2754 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2755 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
2756 return;
2757
2758 default:
2759 err_label:
2760 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2761 }
2762
2763 /* Prepare for the next round. */
2764 now = lr_token (ldfile, charmap, NULL);
2765 nowtok = now->tok;
2766 }
2767
2768 /* When we come here we reached the end of the file. */
2769 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2770 }
This page took 0.153957 seconds and 6 git commands to generate.