1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
30 #include "localeinfo.h"
31 #include "linereader.h"
33 #include "localedef.h"
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
42 /* Forward declaration. */
45 /* Data type for list of strings. */
48 struct section_list
*next
;
49 /* Name of the section. */
51 /* First element of this section. */
52 struct element_t
*first
;
53 /* Last element of this section. */
54 struct element_t
*last
;
55 /* These are the rules for this section. */
56 enum coll_sort_rule
*rules
;
63 /* Number of elements. */
69 /* Data type for collating element. */
79 struct element_list_t
*weights
;
81 /* Where does the definition come from. */
85 /* Which section does this belong to. */
86 struct section_list
*section
;
88 /* Predecessor and successor in the order list. */
89 struct element_t
*last
;
90 struct element_t
*next
;
92 /* Next element in multibyte output list. */
93 struct element_t
*mbnext
;
96 /* Special element value. */
97 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
98 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
99 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
101 /* Data type for collating symbol. */
104 /* Point to place in the order list. */
105 struct element_t
*order
;
107 /* Where does the definition come from. */
113 /* The real definition of the struct for the LC_COLLATE locale. */
114 struct locale_collate_t
119 /* List of known scripts. */
120 struct section_list
*sections
;
121 /* Current section using definition. */
122 struct section_list
*current_section
;
123 /* There always can be an unnamed section. */
124 struct section_list unnamed_section
;
125 /* To make handling of errors easier we have another section. */
126 struct section_list error_section
;
128 /* Number of sorting rules given in order_start line. */
131 /* Start of the order list. */
132 struct element_t
*start
;
134 /* The undefined element. */
135 struct element_t undefined
;
137 /* This is the cursor for `reorder_after' insertions. */
138 struct element_t
*cursor
;
140 /* This value is used when handling ellipsis. */
141 struct element_t ellipsis_weight
;
143 /* Known collating elements. */
144 hash_table elem_table
;
146 /* Known collating symbols. */
147 hash_table sym_table
;
149 /* Known collation sequences. */
150 hash_table seq_table
;
152 struct obstack mempool
;
154 /* The LC_COLLATE category is a bit special as it is sometimes possible
155 that the definitions from more than one input file contains information.
156 Therefore we keep all relevant input in a list. */
157 struct locale_collate_t
*next
;
159 /* Arrays with heads of the list for each of the leading bytes in
160 the multibyte sequences. */
161 struct element_t
*mbheads
[256];
165 /* We have a few global variables which are used for reading all
166 LC_COLLATE category descriptions in all files. */
170 static struct section_list
*
171 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
172 struct section_list
*next
)
174 struct section_list
*newp
;
176 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
186 static struct element_t
*
187 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
188 const uint32_t *wcs
, const char *name
, size_t namelen
)
190 struct element_t
*newp
;
192 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
194 newp
->name
= name
== NULL
? NULL
: obstack_copy (&collate
->mempool
,
197 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
202 size_t nwcs
= wcslen ((wchar_t *) wcs
) + 1;
204 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
205 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
206 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
213 /* Will be allocated later. */
214 newp
->weights
= NULL
;
219 newp
->section
= NULL
;
230 static struct symbol_t
*
231 new_symbol (struct locale_collate_t
*collate
)
233 struct symbol_t
*newp
;
235 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
246 /* Test whether this name is already defined somewhere. */
248 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
249 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
250 const char *symbol
, size_t symbol_len
)
254 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
256 lr_error (ldfile
, _("`%s' already defined in charmap"), symbol
);
260 if (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
262 lr_error (ldfile
, _("`%s' already defined in repertoire"), symbol
);
266 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
268 lr_error (ldfile
, _("`%s' already defined as collating symbol"), symbol
);
272 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
274 lr_error (ldfile
, _("`%s' already defined as collating element"),
283 /* Read the direction specification. */
285 read_directions (struct linereader
*ldfile
, struct token
*arg
,
286 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
287 struct locale_collate_t
*collate
)
290 int max
= nrules
?: 10;
291 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
298 if (arg
->tok
== tok_forward
)
300 if (rules
[cnt
] & sort_backward
)
304 lr_error (ldfile
, _("\
305 %s: `forward' and `backward' are mutually excluding each other"),
310 else if (rules
[cnt
] & sort_forward
)
314 lr_error (ldfile
, _("\
315 %s: `%s' mentioned twice in definition of weight %d"),
316 "LC_COLLATE", "forward", cnt
+ 1);
320 rules
[cnt
] |= sort_forward
;
324 else if (arg
->tok
== tok_backward
)
326 if (rules
[cnt
] & sort_forward
)
330 lr_error (ldfile
, _("\
331 %s: `forward' and `backward' are mutually excluding each other"),
336 else if (rules
[cnt
] & sort_backward
)
340 lr_error (ldfile
, _("\
341 %s: `%s' mentioned twice in definition of weight %d"),
342 "LC_COLLATE", "backward", cnt
+ 1);
346 rules
[cnt
] |= sort_backward
;
350 else if (arg
->tok
== tok_position
)
352 if (rules
[cnt
] & sort_position
)
356 lr_error (ldfile
, _("\
357 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
358 "LC_COLLATE", "position", cnt
+ 1);
362 rules
[cnt
] |= sort_position
;
368 arg
= lr_token (ldfile
, charmap
, repertoire
);
370 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
371 || arg
->tok
== tok_semicolon
)
373 if (! valid
&& ! warned
)
375 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
379 /* See whether we have to increment the counter. */
380 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
383 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
384 /* End of line or file, so we exit the loop. */
389 /* See whether we have enough room in the array. */
393 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
396 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
403 /* There must not be any more rule. */
406 lr_error (ldfile
, _("\
407 %s: too many rules; first entry only had %d"),
408 "LC_COLLATE", nrules
);
412 lr_ignore_rest (ldfile
, 0);
421 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
426 arg
= lr_token (ldfile
, charmap
, repertoire
);
431 /* Now we know how many rules we have. */
433 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
434 nrules
* sizeof (*rules
));
440 /* Not enough rules in this specification. */
442 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
445 rules
[cnt
] = sort_forward
;
446 while (++cnt
< nrules
);
450 collate
->current_section
->rules
= rules
;
454 static struct element_t
*
455 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
456 const char *str
, size_t len
, uint32_t *wcstr
)
458 struct element_t
*result
= NULL
;
460 /* Search for the entries among the collation sequences already define. */
461 if (find_entry (&collate
->seq_table
, str
, len
, (void **) &result
) != 0)
463 /* Nope, not define yet. So we see whether it is a
467 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
469 /* It's a collation symbol. */
470 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
474 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
477 else if (find_entry (&collate
->elem_table
, str
, len
,
478 (void **) &result
) != 0)
480 /* It's also no collation element. So it is an character
481 element defined later. */
482 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
);
484 /* Insert it into the sequence table. */
485 insert_entry (&collate
->seq_table
, str
, len
, result
);
494 unlink_element (struct locale_collate_t
*collate
)
496 if (collate
->cursor
->next
!= NULL
)
497 collate
->cursor
->next
->last
= collate
->cursor
->last
;
498 if (collate
->cursor
->last
!= NULL
)
499 collate
->cursor
->last
->next
= collate
->cursor
->next
;
500 collate
->cursor
= collate
->cursor
->last
;
505 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
506 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
507 struct locale_collate_t
*collate
, enum token_t ellipsis
)
512 /* Initialize all the fields. */
513 elem
->file
= ldfile
->fname
;
514 elem
->line
= ldfile
->lineno
;
515 elem
->last
= collate
->cursor
;
516 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
517 if (collate
->cursor
!= NULL
)
518 collate
->cursor
->next
= elem
;
519 elem
->weights
= (struct element_list_t
*)
520 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
521 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
523 if (collate
->current_section
->first
== NULL
)
524 collate
->current_section
->first
= elem
;
525 if (collate
->current_section
->last
== collate
->cursor
)
526 collate
->current_section
->last
= elem
;
528 collate
->cursor
= elem
;
532 arg
= lr_token (ldfile
, charmap
, repertoire
);
535 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
538 if (arg
->tok
== tok_ignore
)
540 /* The weight for this level has to be ignored. We use the
541 null pointer to indicate this. */
542 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
543 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
544 elem
->weights
[weight_cnt
].w
[0] = NULL
;
545 elem
->weights
[weight_cnt
].cnt
= 1;
547 else if (arg
->tok
== tok_bsymbol
)
549 struct element_t
*val
= find_element (ldfile
, collate
,
550 arg
->val
.str
.startmb
,
552 arg
->val
.str
.startwc
);
557 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
558 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
559 elem
->weights
[weight_cnt
].w
[0] = val
;
560 elem
->weights
[weight_cnt
].cnt
= 1;
562 else if (arg
->tok
== tok_string
)
564 /* Split the string up in the individual characters and put
565 the element definitions in the list. */
566 const char *cp
= arg
->val
.str
.startmb
;
568 struct element_t
*charelem
;
569 void *base
= obstack_base (&collate
->mempool
);
573 lr_error (ldfile
, _("%s: empty weight string not allowed"),
575 lr_ignore_rest (ldfile
, 0);
583 /* Ahh, it's a bsymbol. That's what we want. */
584 const char *startp
= cp
;
588 if (*cp
== ldfile
->escape_char
)
592 /* It's a syntax error. */
593 obstack_free (&collate
->mempool
, base
);
598 charelem
= find_element (ldfile
, collate
, startp
,
604 /* People really shouldn't use characters directly in
605 the string. Especially since it's not really clear
606 what this means. We interpret all characters in the
607 string as if that would be bsymbols. Otherwise we
608 would have to match back to bsymbols somehow and this
609 is also not what people normally expect. */
610 charelem
= find_element (ldfile
, collate
, cp
++, 1, NULL
);
613 if (charelem
== NULL
)
615 /* We ignore the rest of the line. */
616 lr_ignore_rest (ldfile
, 0);
620 /* Add the pointer. */
621 obstack_ptr_grow (&collate
->mempool
, charelem
);
626 /* Now store the information. */
627 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
628 obstack_finish (&collate
->mempool
);
629 elem
->weights
[weight_cnt
].cnt
= cnt
;
631 /* We don't need the string anymore. */
632 free (arg
->val
.str
.startmb
);
634 else if (ellipsis
!= tok_none
635 && (arg
->tok
== tok_ellipsis2
636 || arg
->tok
== tok_ellipsis3
637 || arg
->tok
== tok_ellipsis4
))
639 /* It must be the same ellipsis as used in the initial column. */
640 if (arg
->tok
!= ellipsis
)
641 lr_error (ldfile
, _("\
642 %s: weights must use the same ellipsis symbol as the name"),
645 /* The weight for this level has to be ignored. We use the
646 null pointer to indicate this. */
647 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
648 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
649 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
650 elem
->weights
[weight_cnt
].cnt
= 1;
655 /* It's a syntax error. */
656 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
657 lr_ignore_rest (ldfile
, 0);
661 arg
= lr_token (ldfile
, charmap
, repertoire
);
662 /* This better should be the end of the line or a semicolon. */
663 if (arg
->tok
== tok_semicolon
)
664 /* OK, ignore this and read the next token. */
665 arg
= lr_token (ldfile
, charmap
, repertoire
);
666 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
668 /* It's a syntax error. */
669 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
670 lr_ignore_rest (ldfile
, 0);
674 while (++weight_cnt
< nrules
);
676 if (weight_cnt
< nrules
)
678 /* This means the rest of the line uses the current element as
682 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
683 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
684 elem
->weights
[weight_cnt
].w
[0] = elem
;
685 elem
->weights
[weight_cnt
].cnt
= 1;
687 while (++weight_cnt
< nrules
);
691 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
693 /* Too many rule values. */
694 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
695 lr_ignore_rest (ldfile
, 0);
698 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
704 insert_value (struct linereader
*ldfile
, struct token
*arg
,
705 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
706 struct locale_collate_t
*collate
)
708 /* First find out what kind of symbol this is. */
711 struct element_t
*elem
= NULL
;
713 /* Try to find the character in the charmap. */
714 seq
= charmap_find_value (charmap
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
716 /* Determine the wide character. */
717 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
719 wc
= repertoire_find_value (repertoire
, arg
->val
.str
.startmb
,
727 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
729 /* It's no character, so look through the collation elements and
733 if (find_entry (&collate
->sym_table
, arg
->val
.str
.startmb
,
734 arg
->val
.str
.lenmb
, &result
) == 0)
736 /* It's a collation symbol. */
737 struct symbol_t
*sym
= (struct symbol_t
*) result
;
741 elem
= sym
->order
= new_element (collate
, NULL
, 0, NULL
, NULL
, 0);
743 else if (find_entry (&collate
->elem_table
, arg
->val
.str
.startmb
,
744 arg
->val
.str
.lenmb
, (void **) &elem
) != 0)
746 /* It's also no collation element. Therefore ignore it. */
747 lr_ignore_rest (ldfile
, 0);
753 /* Otherwise the symbols stands for a character. */
754 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
755 arg
->val
.str
.lenmb
, (void **) &elem
) != 0)
757 uint32_t wcs
[2] = { wc
, 0 };
759 /* We have to allocate an entry. */
760 elem
= new_element (collate
, seq
!= NULL
? seq
->bytes
: NULL
,
761 seq
!= NULL
? seq
->nbytes
: 0,
762 wcs
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
764 /* And add it to the table. */
765 if (insert_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
766 arg
->val
.str
.lenmb
, elem
) != 0)
767 /* This cannot happen. */
768 assert (! "Internal error");
772 /* Test whether this element is not already in the list. */
773 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
774 && elem
->next
== collate
->cursor
))
776 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%zu"),
777 arg
->val
.str
.lenmb
, arg
->val
.str
.startmb
,
778 elem
->file
, elem
->line
);
779 lr_ignore_rest (ldfile
, 0);
783 insert_weights (ldfile
, elem
, charmap
, repertoire
, collate
, tok_none
);
790 handle_ellipsis (struct linereader
*ldfile
, struct token
*arg
,
791 enum token_t ellipsis
, struct charmap_t
*charmap
,
792 struct repertoire_t
*repertoire
,
793 struct locale_collate_t
*collate
)
795 struct element_t
*startp
;
796 struct element_t
*endp
;
798 /* Unlink the entry added for the ellipsis. */
799 unlink_element (collate
);
800 startp
= collate
->cursor
;
802 /* Process and add the end-entry. */
804 && insert_value (ldfile
, arg
, charmap
, repertoire
, collate
))
805 /* Something went wrong with inserting the to-value. This means
806 we cannot process the ellipsis. */
809 /* Reset the cursor. */
810 collate
->cursor
= startp
;
812 /* Now we have to handle many different situations:
813 - we have to distinguish between the three different ellipsis forms
814 - the is the ellipsis at the beginning, in the middle, or at the end.
816 endp
= collate
->cursor
->next
;
817 assert (arg
== NULL
|| endp
!= NULL
);
819 /* Both, the start and the end symbol, must stand for characters. */
820 if ((startp
== NULL
|| startp
->name
== NULL
)
821 || (endp
== NULL
|| endp
->name
== NULL
))
823 lr_error (ldfile
, _("\
824 %s: the start end the end symbol of a range must stand for characters"),
829 if (ellipsis
== tok_ellipsis3
)
831 /* One requirement we make here: the length of the byte
832 sequences for the first and end character must be the same.
833 This is mainly to prevent unwanted effects and this is often
834 not what is wanted. */
835 size_t len
= (startp
->mbs
!= NULL
? strlen (startp
->mbs
)
836 : (endp
->mbs
!= NULL
? strlen (endp
->mbs
) : 0));
840 /* Well, this should be caught somewhere else already. Just to
842 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
843 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
845 if (startp
!= NULL
&& endp
!= NULL
846 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
847 && strlen (startp
->mbs
) != strlen (endp
->mbs
))
849 lr_error (ldfile
, _("\
850 %s: byte sequences of first and last character must have the same length"),
855 /* Determine whether we have to generate multibyte sequences. */
856 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
857 && (endp
== NULL
|| endp
->mbs
!= NULL
))
862 /* Prepare the beginning byte sequence. This is either from the
863 beginning byte sequence or it is all nulls if it was an
865 if (startp
== NULL
|| startp
->mbs
== NULL
)
866 memset (mbcnt
, '\0', len
);
869 memcpy (mbcnt
, startp
->mbs
, len
);
871 /* And increment it so that the value is the first one we will
873 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
874 if (++mbcnt
[cnt
] != '\0')
879 /* And the end sequence. */
880 if (endp
== NULL
|| endp
->mbs
== NULL
)
881 memset (mbend
, '\0', len
);
883 memcpy (mbend
, endp
->mbs
, len
);
886 /* Test whether we have a correct range. */
887 ret
= memcmp (mbcnt
, mbend
, len
);
891 lr_error (ldfile
, _("%s: byte sequence of first character of \
892 sequence is not lower than that of the last character"), "LC_COLLATE");
896 /* Generate the byte sequences data. */
901 /* Quite a bit of work ahead. We have to find the character
902 definition for the byte sequence and then determine the
903 wide character belonging to it. */
904 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
907 struct element_t
*elem
;
910 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
911 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
914 /* I don't this this can ever happen. */
915 assert (seq
->name
!= NULL
);
916 namelen
= strlen (seq
->name
);
918 /* Now we are ready to insert the new value in the
919 sequence. Find out whether the element is
921 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
922 (void **) &elem
) != 0)
924 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
926 /* We have to allocate an entry. */
927 elem
= new_element (collate
, mbcnt
, len
, wcs
, seq
->name
,
930 /* And add it to the table. */
931 if (insert_entry (&collate
->seq_table
, seq
->name
,
933 /* This cannot happen. */
934 assert (! "Internal error");
937 /* Test whether this element is not already in the list. */
938 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
939 && elem
->next
== collate
->cursor
))
941 lr_error (ldfile
, _("\
942 order for `%.*s' already defined at %s:%zu"),
943 namelen
, seq
->name
, elem
->file
, elem
->line
);
947 /* Enqueue the new element. */
948 elem
->last
= collate
->cursor
;
949 elem
->next
= collate
->cursor
->next
;
950 elem
->last
->next
= elem
;
951 if (elem
->next
!= NULL
)
952 elem
->next
->last
= elem
;
953 collate
->cursor
= elem
;
955 /* Add the weight value. We take them from the
956 `ellipsis_weights' member of `collate'. */
957 elem
->weights
= (struct element_list_t
*)
958 obstack_alloc (&collate
->mempool
,
959 nrules
* sizeof (struct element_list_t
));
960 for (cnt
= 0; cnt
< nrules
; ++cnt
)
961 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
962 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
963 == ELEMENT_ELLIPSIS2
))
965 elem
->weights
[cnt
].w
= (struct element_t
**)
966 obstack_alloc (&collate
->mempool
,
967 sizeof (struct element_t
*));
968 elem
->weights
[cnt
].w
[0] = elem
;
969 elem
->weights
[cnt
].cnt
= 1;
973 /* Simly use the weight from `ellipsis_weight'. */
974 elem
->weights
[cnt
].w
=
975 collate
->ellipsis_weight
.weights
[cnt
].w
;
976 elem
->weights
[cnt
].cnt
=
977 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
981 /* Increment for the next round. */
983 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
984 if (++mbcnt
[cnt
] != '\0')
987 /* Find out whether this was all. */
988 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
989 /* Yep, that's all. */
996 /* For symbolic range we naturally must have a beginning and an
997 end specified by the user. */
999 lr_error (ldfile
, _("\
1000 %s: symbolic range ellipsis must not directly follow `order_start'"),
1002 else if (endp
== NULL
)
1003 lr_error (ldfile
, _("\
1004 %s: symbolic range ellipsis must not be direct followed by `order_end'"),
1008 /* Determine the range. To do so we have to determine the
1009 common prefix of the both names and then the numeric
1010 values of both ends. */
1011 size_t lenfrom
= strlen (startp
->name
);
1012 size_t lento
= strlen (endp
->name
);
1013 char buf
[lento
+ 1];
1018 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1020 if (lenfrom
!= lento
)
1023 lr_error (ldfile
, _("\
1024 `%s' and `%.*s' are no valid names for symbolic range"),
1025 startp
->name
, lento
, endp
->name
);
1029 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1030 if (startp
->name
[preflen
] == '\0')
1031 /* Nothing to be done. The start and end point are identical
1032 and while inserting the end point we have already given
1033 the user an error message. */
1039 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1040 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1044 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1045 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1048 /* Copy the prefix. */
1049 memcpy (buf
, startp
->name
, preflen
);
1051 /* Loop over all values. */
1052 for (++from
; from
< to
; ++from
)
1054 struct element_t
*elem
= NULL
;
1055 struct charseq
*seq
;
1059 /* Generate the the name. */
1060 sprintf (buf
+ preflen
, base
== 10 ? "%d" : "%x", from
);
1062 /* Look whether this name is already defined. */
1063 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
1064 arg
->val
.str
.lenmb
, (void **) &elem
) == 0)
1066 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1067 && elem
->next
== collate
->cursor
))
1069 lr_error (ldfile
, _("\
1070 %s: order for `%.*s' already defined at %s:%zu"),
1071 "LC_COLLATE", lenfrom
, buf
,
1072 elem
->file
, elem
->line
);
1076 if (elem
->name
== NULL
)
1078 lr_error (ldfile
, _("%s: `%s' must be a charater"),
1084 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1086 /* Search for a character of this name. */
1087 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1088 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1090 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1098 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1099 /* We don't know anything about a character with this
1100 name. XXX Should we warn? */
1105 uint32_t wcs
[2] = { wc
, 0 };
1107 /* We have to allocate an entry. */
1108 elem
= new_element (collate
,
1109 seq
!= NULL
? seq
->bytes
: NULL
,
1110 seq
!= NULL
? seq
->nbytes
: 0,
1111 wc
== ILLEGAL_CHAR_VALUE
1117 /* Update the element. */
1119 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1120 seq
->bytes
, seq
->nbytes
);
1122 if (wc
!= ILLEGAL_CHAR_VALUE
)
1126 obstack_grow (&collate
->mempool
,
1127 &wc
, sizeof (uint32_t));
1128 obstack_grow (&collate
->mempool
,
1129 &zero
, sizeof (uint32_t));
1130 elem
->wcs
= obstack_finish (&collate
->mempool
);
1134 elem
->file
= ldfile
->fname
;
1135 elem
->line
= ldfile
->lineno
;
1138 /* Enqueue the new element. */
1139 elem
->last
= collate
->cursor
;
1140 elem
->next
= collate
->cursor
->next
;
1141 elem
->last
->next
= elem
;
1142 if (elem
->next
!= NULL
)
1143 elem
->next
->last
= elem
;
1144 collate
->cursor
= elem
;
1146 /* Now add the weights. They come from the `ellipsis_weights'
1147 member of `collate'. */
1148 elem
->weights
= (struct element_list_t
*)
1149 obstack_alloc (&collate
->mempool
,
1150 nrules
* sizeof (struct element_list_t
));
1151 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1152 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1153 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1154 == ELEMENT_ELLIPSIS2
))
1156 elem
->weights
[cnt
].w
= (struct element_t
**)
1157 obstack_alloc (&collate
->mempool
,
1158 sizeof (struct element_t
*));
1159 elem
->weights
[cnt
].w
[0] = elem
;
1160 elem
->weights
[cnt
].cnt
= 1;
1164 /* Simly use the weight from `ellipsis_weight'. */
1165 elem
->weights
[cnt
].w
=
1166 collate
->ellipsis_weight
.weights
[cnt
].w
;
1167 elem
->weights
[cnt
].cnt
=
1168 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1177 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1178 struct localedef_t
*copy_locale
, int ignore_content
)
1180 if (!ignore_content
)
1182 struct locale_collate_t
*collate
;
1184 if (copy_locale
== NULL
)
1186 collate
= locale
->categories
[LC_COLLATE
].collate
=
1187 (struct locale_collate_t
*)
1188 xcalloc (1, sizeof (struct locale_collate_t
));
1190 /* Init the various data structures. */
1191 init_hash (&collate
->elem_table
, 100);
1192 init_hash (&collate
->sym_table
, 100);
1193 init_hash (&collate
->seq_table
, 500);
1194 obstack_init (&collate
->mempool
);
1196 collate
->col_weight_max
= -1;
1199 collate
= locale
->categories
[LC_COLLATE
].collate
=
1200 copy_locale
->categories
[LC_COLLATE
].collate
;
1203 ldfile
->translate_strings
= 0;
1204 ldfile
->return_widestr
= 0;
1209 collate_finish (struct localedef_t
*locale
, struct charmap_t
*charmap
)
1211 /* Now is the time when we can assign the individual collation
1212 values for all the symbols. We have possibly different values
1213 for the wide- and the multibyte-character symbols. This is done
1214 since it might make a difference in the encoding if there is in
1215 some cases no multibyte-character but there are wide-characters.
1216 (The other way around it is not important since theencoded
1217 collation value in the wide-character case is 32 bits wide and
1218 therefore requires no encoding).
1220 The lowest collation value assigned is 2. Zero is reserved for
1221 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1222 functions and 1 is used to separate the individual passes for the
1225 We also have to construct is list with all the bytes/words which
1226 can come first in a sequence, followed by all the elements which
1227 also start with this byte/word. The order is reverse which has
1228 among others the important effect that longer strings are located
1229 first in the list. This is required for the output data since
1230 the algorithm used in `strcoll' etc depends on this.
1232 The multibyte case is easy. We simply sort into an array with
1234 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1237 struct element_t
*runp
= collate
->start
;
1239 while (runp
!= NULL
)
1241 if (runp
->mbs
!= NULL
)
1243 struct element_t
**eptr
;
1245 /* Determine the order. */
1246 runp
->mborder
= mbact
++;
1248 /* Find the point where to insert in the list. */
1249 eptr
= &collate
->mbheads
[(unsigned int) runp
->mbs
[0]];
1250 while (*eptr
!= NULL
)
1252 /* Check which string is larger, the one we want to insert
1253 or the current element of the list we are looking at. */
1254 assert (runp
->mbs
[0] == (*eptr
)->mbs
[0]);
1255 if (strcmp (runp
->mbs
, (*eptr
)->mbs
) > 0)
1258 eptr
= &(*eptr
)->mbnext
;
1261 /* Set the pointers. */
1262 runp
->mbnext
= *eptr
;
1266 if (runp
->wcs
!= NULL
)
1267 runp
->wcorder
= wcact
++;
1269 /* Up to the next entry. */
1276 collate_output (struct localedef_t
*locale
, struct charmap_t
*charmap
,
1277 const char *output_path
)
1283 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
1284 struct charmap_t
*charmap
, const char *repertoire_name
,
1287 struct repertoire_t
*repertoire
= NULL
;
1288 struct locale_collate_t
*collate
;
1290 struct token
*arg
= NULL
;
1291 enum token_t nowtok
;
1293 enum token_t was_ellipsis
= tok_none
;
1294 struct localedef_t
*copy_locale
= NULL
;
1296 /* Get the repertoire we have to use. */
1297 if (repertoire_name
!= NULL
)
1298 repertoire
= repertoire_read (repertoire_name
);
1300 /* The rest of the line containing `LC_COLLATE' must be free. */
1301 lr_ignore_rest (ldfile
, 1);
1305 now
= lr_token (ldfile
, charmap
, NULL
);
1308 while (nowtok
== tok_eol
);
1310 if (nowtok
== tok_copy
)
1313 now
= lr_token (ldfile
, charmap
, NULL
);
1314 if (now
->tok
!= tok_string
)
1316 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1320 now
= lr_token (ldfile
, charmap
, NULL
);
1321 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
1323 if (now
->tok
!= tok_eof
1324 || (now
= lr_token (ldfile
, charmap
, NULL
), now
->tok
== tok_eof
))
1325 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
1326 else if (now
->tok
!= tok_lc_collate
)
1328 lr_error (ldfile
, _("\
1329 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1330 lr_ignore_rest (ldfile
, 0);
1333 lr_ignore_rest (ldfile
, 1);
1338 /* Get the locale definition. */
1339 copy_locale
= find_locale (LC_COLLATE
, now
->val
.str
.startmb
,
1340 repertoire_name
, charmap
);
1341 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
1343 /* Not yet loaded. So do it now. */
1344 if (locfile_read (copy_locale
, charmap
) != 0)
1348 lr_ignore_rest (ldfile
, 1);
1350 now
= lr_token (ldfile
, charmap
, NULL
);
1354 /* Prepare the data structures. */
1355 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
1356 collate
= result
->categories
[LC_COLLATE
].collate
;
1360 /* Of course we don't proceed beyond the end of file. */
1361 if (nowtok
== tok_eof
)
1364 /* Ingore empty lines. */
1365 if (nowtok
== tok_eol
)
1367 now
= lr_token (ldfile
, charmap
, NULL
);
1374 case tok_coll_weight_max
:
1375 /* Ignore the rest of the line if we don't need the input of
1379 lr_ignore_rest (ldfile
, 0);
1386 arg
= lr_token (ldfile
, charmap
, NULL
);
1387 if (arg
->tok
!= tok_number
)
1389 if (collate
->col_weight_max
!= -1)
1390 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
1391 "LC_COLLATE", "col_weight_max");
1393 collate
->col_weight_max
= arg
->val
.num
;
1394 lr_ignore_rest (ldfile
, 1);
1397 case tok_section_symbol
:
1398 /* Ignore the rest of the line if we don't need the input of
1402 lr_ignore_rest (ldfile
, 0);
1409 arg
= lr_token (ldfile
, charmap
, repertoire
);
1410 if (arg
->tok
!= tok_bsymbol
)
1412 else if (!ignore_content
)
1414 /* Check whether this section is already known. */
1415 struct section_list
*known
= collate
->sections
;
1416 while (known
!= NULL
)
1417 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
1423 _("%s: duplicate declaration of section `%s'"),
1424 "LC_COLLATE", arg
->val
.str
.startmb
);
1425 free (arg
->val
.str
.startmb
);
1428 collate
->sections
= make_seclist_elem (collate
,
1429 arg
->val
.str
.startmb
,
1432 lr_ignore_rest (ldfile
, known
== NULL
);
1436 free (arg
->val
.str
.startmb
);
1437 lr_ignore_rest (ldfile
, 0);
1441 case tok_collating_element
:
1442 /* Ignore the rest of the line if we don't need the input of
1446 lr_ignore_rest (ldfile
, 0);
1453 arg
= lr_token (ldfile
, charmap
, repertoire
);
1454 if (arg
->tok
!= tok_bsymbol
)
1458 const char *symbol
= arg
->val
.str
.startmb
;
1459 size_t symbol_len
= arg
->val
.str
.lenmb
;
1461 /* Next the `from' keyword. */
1462 arg
= lr_token (ldfile
, charmap
, repertoire
);
1463 if (arg
->tok
!= tok_from
)
1465 free ((char *) symbol
);
1469 ldfile
->return_widestr
= 1;
1471 /* Finally the string with the replacement. */
1472 arg
= lr_token (ldfile
, charmap
, repertoire
);
1473 ldfile
->return_widestr
= 0;
1474 if (arg
->tok
!= tok_string
)
1477 if (!ignore_content
)
1480 lr_error (ldfile
, _("\
1481 %s: unknown character in collating element name"),
1483 if (arg
->val
.str
.startmb
== NULL
)
1484 lr_error (ldfile
, _("\
1485 %s: unknown character in collating element definition"),
1487 if (arg
->val
.str
.startwc
== NULL
)
1488 lr_error (ldfile
, _("\
1489 %s: unknown wide character in collating element definition"),
1491 else if (arg
->val
.str
.lenwc
< 2)
1492 lr_error (ldfile
, _("\
1493 %s: substitution string in collating element definition must have at least two characters"),
1498 /* The name is already defined. */
1499 if (check_duplicate (ldfile
, collate
, charmap
,
1500 repertoire
, symbol
, symbol_len
))
1503 if (insert_entry (&collate
->elem_table
,
1505 new_element (collate
,
1506 NULL
, 0, NULL
, symbol
,
1508 lr_error (ldfile
, _("\
1509 error while adding collating element"));
1518 free ((char *) symbol
);
1519 if (arg
->val
.str
.startmb
!= NULL
)
1520 free (arg
->val
.str
.startmb
);
1521 if (arg
->val
.str
.startwc
!= NULL
)
1522 free (arg
->val
.str
.startwc
);
1524 lr_ignore_rest (ldfile
, 1);
1528 case tok_collating_symbol
:
1529 /* Ignore the rest of the line if we don't need the input of
1533 lr_ignore_rest (ldfile
, 0);
1540 arg
= lr_token (ldfile
, charmap
, repertoire
);
1541 if (arg
->tok
!= tok_bsymbol
)
1545 const char *symbol
= arg
->val
.str
.startmb
;
1546 size_t symbol_len
= arg
->val
.str
.lenmb
;
1548 if (!ignore_content
)
1551 lr_error (ldfile
, _("\
1552 %s: unknown character in collating symbol name"),
1556 /* The name is already defined. */
1557 if (check_duplicate (ldfile
, collate
, charmap
,
1558 repertoire
, symbol
, symbol_len
))
1561 if (insert_entry (&collate
->sym_table
,
1563 new_symbol (collate
)) < 0)
1564 lr_error (ldfile
, _("\
1565 error while adding collating symbol"));
1572 free ((char *) symbol
);
1574 lr_ignore_rest (ldfile
, 1);
1578 case tok_symbol_equivalence
:
1579 /* Ignore the rest of the line if we don't need the input of
1583 lr_ignore_rest (ldfile
, 0);
1590 arg
= lr_token (ldfile
, charmap
, repertoire
);
1591 if (arg
->tok
!= tok_bsymbol
)
1595 const char *newname
= arg
->val
.str
.startmb
;
1596 size_t newname_len
= arg
->val
.str
.lenmb
;
1597 const char *symname
;
1599 struct symbol_t
*symval
;
1601 arg
= lr_token (ldfile
, charmap
, repertoire
);
1602 if (arg
->tok
!= tok_bsymbol
)
1604 if (newname
!= NULL
)
1605 free ((char *) newname
);
1609 symname
= arg
->val
.str
.startmb
;
1610 symname_len
= arg
->val
.str
.lenmb
;
1612 if (!ignore_content
)
1614 if (newname
== NULL
)
1616 lr_error (ldfile
, _("\
1617 %s: unknown character in equivalent definition name"),
1619 goto sym_equiv_free
;
1621 if (symname
== NULL
)
1623 lr_error (ldfile
, _("\
1624 %s: unknown character in equivalent definition value"),
1626 goto sym_equiv_free
;
1628 /* The name is already defined. */
1629 if (check_duplicate (ldfile
, collate
, charmap
,
1630 repertoire
, symname
, symname_len
))
1633 /* See whether the symbol name is already defined. */
1634 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
1635 (void **) &symval
) != 0)
1637 lr_error (ldfile
, _("\
1638 %s: unknown symbol `%s' in equivalent definition"),
1639 "LC_COLLATE", symname
);
1643 if (insert_entry (&collate
->sym_table
,
1644 newname
, newname_len
, symval
) < 0)
1646 lr_error (ldfile
, _("\
1647 error while adding equivalent collating symbol"));
1648 goto sym_equiv_free
;
1651 free ((char *) symname
);
1656 if (newname
!= NULL
)
1657 free ((char *) newname
);
1658 if (symname
!= NULL
)
1659 free ((char *) symname
);
1661 lr_ignore_rest (ldfile
, 1);
1665 case tok_order_start
:
1666 /* Ignore the rest of the line if we don't need the input of
1670 lr_ignore_rest (ldfile
, 0);
1674 if (state
!= 0 && state
!= 1)
1678 /* The 14652 draft does not specify whether all `order_start' lines
1679 must contain the same number of sort-rules, but 14651 does. So
1680 we require this here as well. */
1681 arg
= lr_token (ldfile
, charmap
, repertoire
);
1682 if (arg
->tok
== tok_bsymbol
)
1684 /* This better should be a section name. */
1685 struct section_list
*sp
= collate
->sections
;
1687 && strcmp (sp
->name
, arg
->val
.str
.startmb
) != 0)
1692 lr_error (ldfile
, _("\
1693 %s: unknown section name `%s'"),
1694 "LC_COLLATE", arg
->val
.str
.startmb
);
1695 /* We use the error section. */
1696 collate
->current_section
= &collate
->error_section
;
1700 /* Remember this section. */
1701 collate
->current_section
= sp
;
1703 /* One should not be allowed to open the same
1705 if (sp
->first
!= NULL
)
1706 lr_error (ldfile
, _("\
1707 %s: multiple order definitions for section `%s'"),
1708 "LC_COLLATE", sp
->name
);
1710 /* Next should come the end of the line or a semicolon. */
1711 arg
= lr_token (ldfile
, charmap
, repertoire
);
1712 if (arg
->tok
== tok_eol
)
1716 /* This means we have exactly one rule: `forward'. */
1717 if (collate
->nrules
> 1)
1718 lr_error (ldfile
, _("\
1719 %s: invalid number of sorting rules"),
1722 collate
->nrules
= 1;
1723 sp
->rules
= obstack_alloc (&collate
->mempool
,
1724 (sizeof (enum coll_sort_rule
)
1725 * collate
->nrules
));
1726 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1727 sp
->rules
[cnt
] = sort_forward
;
1733 /* Get the next token. */
1734 arg
= lr_token (ldfile
, charmap
, repertoire
);
1739 /* There is no section symbol. Therefore we use the unnamed
1741 collate
->current_section
= &collate
->unnamed_section
;
1743 if (collate
->unnamed_section
.first
!= NULL
)
1744 lr_error (ldfile
, _("\
1745 %s: multiple order definitions for unnamed section"),
1749 /* Now read the direction names. */
1750 read_directions (ldfile
, arg
, charmap
, repertoire
, collate
);
1752 /* From now be need the strings untranslated. */
1753 ldfile
->translate_strings
= 0;
1757 /* Ignore the rest of the line if we don't need the input of
1761 lr_ignore_rest (ldfile
, 0);
1768 /* Handle ellipsis at end of list. */
1769 if (was_ellipsis
!= tok_none
)
1771 handle_ellipsis (ldfile
, NULL
, was_ellipsis
, charmap
, repertoire
,
1773 was_ellipsis
= tok_none
;
1777 lr_ignore_rest (ldfile
, 1);
1780 case tok_reorder_after
:
1781 /* Ignore the rest of the line if we don't need the input of
1785 lr_ignore_rest (ldfile
, 0);
1791 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
1795 /* Handle ellipsis at end of list. */
1796 if (was_ellipsis
!= tok_none
)
1798 handle_ellipsis (ldfile
, arg
, was_ellipsis
, charmap
,
1799 repertoire
, collate
);
1800 was_ellipsis
= tok_none
;
1803 else if (state
!= 2 && state
!= 3)
1807 arg
= lr_token (ldfile
, charmap
, repertoire
);
1808 if (arg
->tok
== tok_bsymbol
)
1810 /* Find this symbol in the sequence table. */
1811 struct element_t
*insp
;
1814 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
1815 arg
->val
.str
.lenmb
, (void **) &insp
) == 0)
1816 /* Yes, the symbol exists. Simply point the cursor
1818 collate
->cursor
= insp
;
1821 /* This is bad. The symbol after which we have to
1822 insert does not exist. */
1823 lr_error (ldfile
, _("\
1824 %s: cannot reorder after %.*s: symbol not known"),
1825 "LC_COLLATE", arg
->val
.str
.lenmb
,
1826 arg
->val
.str
.startmb
);
1827 collate
->cursor
= NULL
;
1831 lr_ignore_rest (ldfile
, no_error
);
1834 /* This must not happen. */
1838 case tok_reorder_end
:
1839 /* Ignore the rest of the line if we don't need the input of
1847 lr_ignore_rest (ldfile
, 1);
1850 case tok_reorder_sections_after
:
1851 /* Ignore the rest of the line if we don't need the input of
1855 lr_ignore_rest (ldfile
, 0);
1861 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
1865 /* Handle ellipsis at end of list. */
1866 if (was_ellipsis
!= tok_none
)
1868 handle_ellipsis (ldfile
, NULL
, was_ellipsis
, charmap
,
1869 repertoire
, collate
);
1870 was_ellipsis
= tok_none
;
1873 else if (state
== 3)
1875 error (0, 0, _("%s: missing `reorder-end' keyword"),
1879 else if (state
!= 2 && state
!= 4)
1883 /* Get the name of the sections we are adding after. */
1884 arg
= lr_token (ldfile
, charmap
, repertoire
);
1885 if (arg
->tok
== tok_bsymbol
)
1887 /* Now find a section with this name. */
1888 struct section_list
*runp
= collate
->sections
;
1890 while (runp
!= NULL
)
1892 if (runp
->name
!= NULL
1893 && strlen (runp
->name
) == arg
->val
.str
.lenmb
1894 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
1895 arg
->val
.str
.lenmb
) == 0)
1902 collate
->current_section
= runp
;
1905 /* This is bad. The section after which we have to
1906 reorder does not exist. Therefore we cannot
1907 process the whole rest of this reorder
1909 lr_error (ldfile
, _("%s: section `%.*s' not known"),
1910 "LC_COLLATE", arg
->val
.str
.lenmb
,
1911 arg
->val
.str
.startmb
);
1915 lr_ignore_rest (ldfile
, 0);
1917 now
= lr_token (ldfile
, charmap
, NULL
);
1919 while (now
->tok
== tok_reorder_sections_after
1920 || now
->tok
== tok_reorder_sections_end
1921 || now
->tok
== tok_end
);
1923 /* Process the token we just saw. */
1929 /* This must not happen. */
1933 case tok_reorder_sections_end
:
1934 /* Ignore the rest of the line if we don't need the input of
1942 lr_ignore_rest (ldfile
, 1);
1946 /* Ignore the rest of the line if we don't need the input of
1950 lr_ignore_rest (ldfile
, 0);
1954 if (state
!= 1 && state
!= 3)
1959 /* It is possible that we already have this collation sequence.
1960 In this case we move the entry. */
1961 struct element_t
*seqp
;
1963 /* If the symbol after which we have to insert was not found
1964 ignore all entries. */
1965 if (collate
->cursor
== NULL
)
1967 lr_ignore_rest (ldfile
, 0);
1971 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
1972 arg
->val
.str
.lenmb
, (void **) &seqp
) == 0)
1974 /* Remove the entry from the old position. */
1975 if (seqp
->last
== NULL
)
1976 collate
->start
= seqp
->next
;
1978 seqp
->last
->next
= seqp
->next
;
1979 if (seqp
->next
!= NULL
)
1980 seqp
->next
->last
= seqp
->last
;
1982 /* We also have to check whether this entry is the
1983 first or last of a section. */
1984 if (seqp
->section
->first
== seqp
)
1986 if (seqp
->section
->first
== seqp
->section
->last
)
1987 /* This setion has no content anymore. */
1988 seqp
->section
->first
= seqp
->section
->last
= NULL
;
1990 seqp
->section
->first
= seqp
->next
;
1992 else if (seqp
->section
->last
== seqp
)
1993 seqp
->section
->last
= seqp
->last
;
1995 /* Now insert it in the new place. */
1996 seqp
->next
= collate
->cursor
->next
;
1997 seqp
->last
= collate
->cursor
;
1998 collate
->cursor
->next
= seqp
;
1999 if (seqp
->next
!= NULL
)
2000 seqp
->next
->last
= seqp
;
2002 seqp
->section
= collate
->cursor
->section
;
2003 if (seqp
->section
->last
== collate
->cursor
)
2004 seqp
->section
->last
= seqp
;
2009 /* Otherwise we just add a new entry. */
2011 else if (state
== 5)
2013 /* We are reordering sections. Find the named section. */
2014 struct section_list
*runp
= collate
->sections
;
2015 struct section_list
*prevp
= NULL
;
2017 while (runp
!= NULL
)
2019 if (runp
->name
!= NULL
2020 && strlen (runp
->name
) == arg
->val
.str
.lenmb
2021 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
2022 arg
->val
.str
.lenmb
) == 0)
2031 lr_error (ldfile
, _("%s: section `%.*s' not known"),
2032 "LC_COLLATE", arg
->val
.str
.lenmb
,
2033 arg
->val
.str
.startmb
);
2034 lr_ignore_rest (ldfile
, 0);
2038 if (runp
!= collate
->current_section
)
2040 /* Remove the named section from the old place and
2041 insert it in the new one. */
2042 prevp
->next
= runp
->next
;
2044 runp
->next
= collate
->current_section
->next
;
2045 collate
->current_section
->next
= runp
;
2046 collate
->current_section
= runp
;
2049 /* Process the rest of the line which might change
2050 the collation rules. */
2051 arg
= lr_token (ldfile
, charmap
, repertoire
);
2052 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
2053 read_directions (ldfile
, arg
, charmap
, repertoire
,
2058 else if (was_ellipsis
!= tok_none
)
2060 /* Using the information in the `ellipsis_weight'
2061 element and this and the last value we have to handle
2062 the ellipsis now. */
2063 assert (state
== 1);
2065 handle_ellipsis (ldfile
, arg
, was_ellipsis
, charmap
, repertoire
,
2068 /* Remember that we processed the ellipsis. */
2069 was_ellipsis
= tok_none
;
2071 /* And don't add the value a second time. */
2075 /* Now insert in the new place. */
2076 insert_value (ldfile
, arg
, charmap
, repertoire
, collate
);
2080 /* Ignore the rest of the line if we don't need the input of
2084 lr_ignore_rest (ldfile
, 0);
2091 if (was_ellipsis
!= tok_none
)
2094 _("%s: cannot have `%s' as end of ellipsis range"),
2095 "LC_COLLATE", "UNDEFINED");
2097 unlink_element (collate
);
2098 was_ellipsis
= tok_none
;
2101 /* See whether UNDEFINED already appeared somewhere. */
2102 if (collate
->undefined
.next
!= NULL
2103 || (collate
->cursor
!= NULL
2104 && collate
->undefined
.next
== collate
->cursor
))
2107 _("%s: order for `%.*s' already defined at %s:%zu"),
2108 "LC_COLLATE", 9, "UNDEFINED", collate
->undefined
.file
,
2109 collate
->undefined
.line
);
2110 lr_ignore_rest (ldfile
, 0);
2113 /* Parse the weights. */
2114 insert_weights (ldfile
, &collate
->undefined
, charmap
,
2115 repertoire
, collate
, tok_none
);
2121 /* This is the symbolic (decimal or hexadecimal) or absolute
2123 if (was_ellipsis
!= tok_none
)
2126 if (state
!= 1 && state
!= 3)
2129 was_ellipsis
= nowtok
;
2131 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
2132 repertoire
, collate
, nowtok
);
2136 /* Next we assume `LC_COLLATE'. */
2137 if (!ignore_content
)
2140 /* We must either see a copy statement or have
2143 _("%s: empty category description not allowed"),
2145 else if (state
== 1)
2147 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
2150 /* Handle ellipsis at end of list. */
2151 if (was_ellipsis
!= tok_none
)
2153 handle_ellipsis (ldfile
, NULL
, was_ellipsis
, charmap
,
2154 repertoire
, collate
);
2155 was_ellipsis
= tok_none
;
2158 else if (state
== 3)
2159 error (0, 0, _("%s: missing `reorder-end' keyword"),
2161 else if (state
== 5)
2162 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
2165 arg
= lr_token (ldfile
, charmap
, NULL
);
2166 if (arg
->tok
== tok_eof
)
2168 if (arg
->tok
== tok_eol
)
2169 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
2170 else if (arg
->tok
!= tok_lc_collate
)
2171 lr_error (ldfile
, _("\
2172 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2173 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
2178 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2181 /* Prepare for the next round. */
2182 now
= lr_token (ldfile
, charmap
, NULL
);
2186 /* When we come here we reached the end of the file. */
2187 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");