1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
30 #include "localeinfo.h"
31 #include "linereader.h"
33 #include "localedef.h"
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
42 /* Forward declaration. */
45 /* Data type for list of strings. */
48 struct section_list
*next
;
49 /* Name of the section. */
51 /* First element of this section. */
52 struct element_t
*first
;
53 /* Last element of this section. */
54 struct element_t
*last
;
55 /* These are the rules for this section. */
56 enum coll_sort_rule
*rules
;
57 /* Index of the rule set in the appropriate section of the output file. */
65 /* Number of elements. */
71 /* Data type for collating element. */
83 /* The following is a bit mask which bits are set if this element is
84 used in the appropriate level. Interesting for the singlebyte
87 XXX The type here restricts the number of levels to 32. It could
88 we changed if necessary but I doubt this is necessary. */
89 unsigned int used_in_level
;
91 struct element_list_t
*weights
;
93 /* Where does the definition come from. */
97 /* Which section does this belong to. */
98 struct section_list
*section
;
100 /* Predecessor and successor in the order list. */
101 struct element_t
*last
;
102 struct element_t
*next
;
104 /* Next element in multibyte output list. */
105 struct element_t
*mbnext
;
108 /* Special element value. */
109 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
110 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
111 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
113 /* Data type for collating symbol. */
116 /* Point to place in the order list. */
117 struct element_t
*order
;
119 /* Where does the definition come from. */
125 /* The real definition of the struct for the LC_COLLATE locale. */
126 struct locale_collate_t
131 /* List of known scripts. */
132 struct section_list
*sections
;
133 /* Current section using definition. */
134 struct section_list
*current_section
;
135 /* There always can be an unnamed section. */
136 struct section_list unnamed_section
;
137 /* To make handling of errors easier we have another section. */
138 struct section_list error_section
;
140 /* Number of sorting rules given in order_start line. */
143 /* Start of the order list. */
144 struct element_t
*start
;
146 /* The undefined element. */
147 struct element_t undefined
;
149 /* This is the cursor for `reorder_after' insertions. */
150 struct element_t
*cursor
;
152 /* This value is used when handling ellipsis. */
153 struct element_t ellipsis_weight
;
155 /* Known collating elements. */
156 hash_table elem_table
;
158 /* Known collating symbols. */
159 hash_table sym_table
;
161 /* Known collation sequences. */
162 hash_table seq_table
;
164 struct obstack mempool
;
166 /* The LC_COLLATE category is a bit special as it is sometimes possible
167 that the definitions from more than one input file contains information.
168 Therefore we keep all relevant input in a list. */
169 struct locale_collate_t
*next
;
171 /* Arrays with heads of the list for each of the leading bytes in
172 the multibyte sequences. */
173 struct element_t
*mbheads
[256];
177 /* We have a few global variables which are used for reading all
178 LC_COLLATE category descriptions in all files. */
182 /* These are definitions used by some of the functions for handling
183 UTF-8 encoding below. */
184 static const uint32_t encoding_mask
[] =
186 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
189 static const unsigned char encoding_byte
[] =
191 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
195 /* We need UTF-8 encoding of numbers. */
197 utf8_encode (char *buf
, int val
)
211 for (step
= 2; step
< 6; ++step
)
212 if ((val
& encoding_mask
[step
- 2]) == 0)
216 *buf
= encoding_byte
[step
- 2];
220 buf
[step
] = 0x80 | (val
& 0x3f);
231 static struct section_list
*
232 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
233 struct section_list
*next
)
235 struct section_list
*newp
;
237 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
247 static struct element_t
*
248 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
249 const uint32_t *wcs
, const char *name
, size_t namelen
)
251 struct element_t
*newp
;
253 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
255 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
259 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
269 size_t nwcs
= wcslen ((wchar_t *) wcs
);
271 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
272 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
273 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
281 newp
->mborder
= NULL
;
283 newp
->used_in_level
= 0;
285 /* Will be allocated later. */
286 newp
->weights
= NULL
;
291 newp
->section
= collate
->current_section
;
302 static struct symbol_t
*
303 new_symbol (struct locale_collate_t
*collate
)
305 struct symbol_t
*newp
;
307 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
318 /* Test whether this name is already defined somewhere. */
320 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
321 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
322 const char *symbol
, size_t symbol_len
)
326 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
328 lr_error (ldfile
, _("`%s' already defined in charmap"), symbol
);
332 if (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
334 lr_error (ldfile
, _("`%s' already defined in repertoire"), symbol
);
338 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
340 lr_error (ldfile
, _("`%s' already defined as collating symbol"), symbol
);
344 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
346 lr_error (ldfile
, _("`%s' already defined as collating element"),
355 /* Read the direction specification. */
357 read_directions (struct linereader
*ldfile
, struct token
*arg
,
358 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
359 struct locale_collate_t
*collate
)
362 int max
= nrules
?: 10;
363 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
370 if (arg
->tok
== tok_forward
)
372 if (rules
[cnt
] & sort_backward
)
376 lr_error (ldfile
, _("\
377 %s: `forward' and `backward' are mutually excluding each other"),
382 else if (rules
[cnt
] & sort_forward
)
386 lr_error (ldfile
, _("\
387 %s: `%s' mentioned twice in definition of weight %d"),
388 "LC_COLLATE", "forward", cnt
+ 1);
392 rules
[cnt
] |= sort_forward
;
396 else if (arg
->tok
== tok_backward
)
398 if (rules
[cnt
] & sort_forward
)
402 lr_error (ldfile
, _("\
403 %s: `forward' and `backward' are mutually excluding each other"),
408 else if (rules
[cnt
] & sort_backward
)
412 lr_error (ldfile
, _("\
413 %s: `%s' mentioned twice in definition of weight %d"),
414 "LC_COLLATE", "backward", cnt
+ 1);
418 rules
[cnt
] |= sort_backward
;
422 else if (arg
->tok
== tok_position
)
424 if (rules
[cnt
] & sort_position
)
428 lr_error (ldfile
, _("\
429 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
430 "LC_COLLATE", "position", cnt
+ 1);
434 rules
[cnt
] |= sort_position
;
440 arg
= lr_token (ldfile
, charmap
, repertoire
);
442 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
443 || arg
->tok
== tok_semicolon
)
445 if (! valid
&& ! warned
)
447 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
451 /* See whether we have to increment the counter. */
452 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
455 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
456 /* End of line or file, so we exit the loop. */
461 /* See whether we have enough room in the array. */
465 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
468 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
475 /* There must not be any more rule. */
478 lr_error (ldfile
, _("\
479 %s: too many rules; first entry only had %d"),
480 "LC_COLLATE", nrules
);
484 lr_ignore_rest (ldfile
, 0);
493 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
498 arg
= lr_token (ldfile
, charmap
, repertoire
);
503 /* Now we know how many rules we have. */
505 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
506 nrules
* sizeof (*rules
));
512 /* Not enough rules in this specification. */
514 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
517 rules
[cnt
] = sort_forward
;
518 while (++cnt
< nrules
);
522 collate
->current_section
->rules
= rules
;
526 static struct element_t
*
527 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
528 const char *str
, size_t len
, uint32_t *wcstr
)
530 struct element_t
*result
= NULL
;
532 /* Search for the entries among the collation sequences already define. */
533 if (find_entry (&collate
->seq_table
, str
, len
, (void **) &result
) != 0)
535 /* Nope, not define yet. So we see whether it is a
539 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
541 /* It's a collation symbol. */
542 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
546 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
549 else if (find_entry (&collate
->elem_table
, str
, len
,
550 (void **) &result
) != 0)
552 /* It's also no collation element. So it is a character
553 element defined later. */
554 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
);
556 /* Insert it into the sequence table. */
557 insert_entry (&collate
->seq_table
, str
, len
, result
);
566 unlink_element (struct locale_collate_t
*collate
)
568 if (collate
->cursor
== collate
->start
)
570 assert (collate
->cursor
->next
== NULL
);
571 assert (collate
->cursor
->last
== NULL
);
572 collate
->cursor
= NULL
;
576 if (collate
->cursor
->next
!= NULL
)
577 collate
->cursor
->next
->last
= collate
->cursor
->last
;
578 if (collate
->cursor
->last
!= NULL
)
579 collate
->cursor
->last
->next
= collate
->cursor
->next
;
580 collate
->cursor
= collate
->cursor
->last
;
586 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
587 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
588 struct locale_collate_t
*collate
, enum token_t ellipsis
)
593 /* Initialize all the fields. */
594 elem
->file
= ldfile
->fname
;
595 elem
->line
= ldfile
->lineno
;
596 elem
->last
= collate
->cursor
;
597 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
598 elem
->section
= collate
->current_section
;
599 if (collate
->cursor
!= NULL
)
600 collate
->cursor
->next
= elem
;
601 if (collate
->start
== NULL
)
603 assert (collate
->cursor
== NULL
);
604 collate
->start
= elem
;
606 elem
->weights
= (struct element_list_t
*)
607 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
608 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
610 if (collate
->current_section
->first
== NULL
)
611 collate
->current_section
->first
= elem
;
612 if (collate
->current_section
->last
== collate
->cursor
)
613 collate
->current_section
->last
= elem
;
615 collate
->cursor
= elem
;
619 arg
= lr_token (ldfile
, charmap
, repertoire
);
622 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
625 if (arg
->tok
== tok_ignore
)
627 /* The weight for this level has to be ignored. We use the
628 null pointer to indicate this. */
629 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
630 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
631 elem
->weights
[weight_cnt
].w
[0] = NULL
;
632 elem
->weights
[weight_cnt
].cnt
= 1;
634 else if (arg
->tok
== tok_bsymbol
)
636 struct element_t
*val
= find_element (ldfile
, collate
,
637 arg
->val
.str
.startmb
,
639 arg
->val
.str
.startwc
);
644 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
645 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
646 elem
->weights
[weight_cnt
].w
[0] = val
;
647 elem
->weights
[weight_cnt
].cnt
= 1;
649 else if (arg
->tok
== tok_string
)
651 /* Split the string up in the individual characters and put
652 the element definitions in the list. */
653 const char *cp
= arg
->val
.str
.startmb
;
655 struct element_t
*charelem
;
656 struct element_t
**weights
= NULL
;
661 lr_error (ldfile
, _("%s: empty weight string not allowed"),
663 lr_ignore_rest (ldfile
, 0);
671 /* Ahh, it's a bsymbol. That's what we want. */
672 const char *startp
= ++cp
;
676 if (*cp
== ldfile
->escape_char
)
679 /* It's a syntax error. */
685 charelem
= find_element (ldfile
, collate
, startp
,
691 /* People really shouldn't use characters directly in
692 the string. Especially since it's not really clear
693 what this means. We interpret all characters in the
694 string as if that would be bsymbols. Otherwise we
695 would have to match back to bsymbols somehow and this
696 is normally not what people normally expect. */
697 charelem
= find_element (ldfile
, collate
, cp
++, 1, NULL
);
700 if (charelem
== NULL
)
702 /* We ignore the rest of the line. */
703 lr_ignore_rest (ldfile
, 0);
707 /* Add the pointer. */
710 struct element_t
**newp
;
712 newp
= (struct element_t
**)
713 alloca (max
* sizeof (struct element_t
*));
714 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
717 weights
[cnt
++] = charelem
;
721 /* Now store the information. */
722 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
723 obstack_alloc (&collate
->mempool
,
724 cnt
* sizeof (struct element_t
*));
725 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
726 cnt
* sizeof (struct element_t
*));
727 elem
->weights
[weight_cnt
].cnt
= cnt
;
729 /* We don't need the string anymore. */
730 free (arg
->val
.str
.startmb
);
732 else if (ellipsis
!= tok_none
733 && (arg
->tok
== tok_ellipsis2
734 || arg
->tok
== tok_ellipsis3
735 || arg
->tok
== tok_ellipsis4
))
737 /* It must be the same ellipsis as used in the initial column. */
738 if (arg
->tok
!= ellipsis
)
739 lr_error (ldfile
, _("\
740 %s: weights must use the same ellipsis symbol as the name"),
743 /* The weight for this level has to be ignored. We use the
744 null pointer to indicate this. */
745 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
746 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
747 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
748 elem
->weights
[weight_cnt
].cnt
= 1;
753 /* It's a syntax error. */
754 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
755 lr_ignore_rest (ldfile
, 0);
759 arg
= lr_token (ldfile
, charmap
, repertoire
);
760 /* This better should be the end of the line or a semicolon. */
761 if (arg
->tok
== tok_semicolon
)
762 /* OK, ignore this and read the next token. */
763 arg
= lr_token (ldfile
, charmap
, repertoire
);
764 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
766 /* It's a syntax error. */
767 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
768 lr_ignore_rest (ldfile
, 0);
772 while (++weight_cnt
< nrules
);
774 if (weight_cnt
< nrules
)
776 /* This means the rest of the line uses the current element as
780 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
781 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
782 elem
->weights
[weight_cnt
].w
[0] = elem
;
783 elem
->weights
[weight_cnt
].cnt
= 1;
785 while (++weight_cnt
< nrules
);
789 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
791 /* Too many rule values. */
792 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
793 lr_ignore_rest (ldfile
, 0);
796 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
802 insert_value (struct linereader
*ldfile
, struct token
*arg
,
803 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
804 struct locale_collate_t
*collate
)
806 /* First find out what kind of symbol this is. */
809 struct element_t
*elem
= NULL
;
811 /* Try to find the character in the charmap. */
812 seq
= charmap_find_value (charmap
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
814 /* Determine the wide character. */
815 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
817 wc
= repertoire_find_value (repertoire
, arg
->val
.str
.startmb
,
825 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
827 /* It's no character, so look through the collation elements and
831 if (find_entry (&collate
->sym_table
, arg
->val
.str
.startmb
,
832 arg
->val
.str
.lenmb
, &result
) == 0)
834 /* It's a collation symbol. */
835 struct symbol_t
*sym
= (struct symbol_t
*) result
;
839 elem
= sym
->order
= new_element (collate
, NULL
, 0, NULL
, NULL
, 0);
841 else if (find_entry (&collate
->elem_table
, arg
->val
.str
.startmb
,
842 arg
->val
.str
.lenmb
, (void **) &elem
) != 0)
844 /* It's also no collation element. Therefore ignore it. */
845 lr_ignore_rest (ldfile
, 0);
851 /* Otherwise the symbols stands for a character. */
852 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
853 arg
->val
.str
.lenmb
, (void **) &elem
) != 0)
855 uint32_t wcs
[2] = { wc
, 0 };
857 /* We have to allocate an entry. */
858 elem
= new_element (collate
, seq
!= NULL
? seq
->bytes
: NULL
,
859 seq
!= NULL
? seq
->nbytes
: 0,
860 wcs
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
862 /* And add it to the table. */
863 if (insert_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
864 arg
->val
.str
.lenmb
, elem
) != 0)
865 /* This cannot happen. */
866 assert (! "Internal error");
870 /* Maybe the character was used before the definition. In this case
871 we have to insert the byte sequences now. */
872 if (elem
->mbs
== NULL
&& seq
!= NULL
)
874 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
875 seq
->bytes
, seq
->nbytes
);
876 elem
->nmbs
= seq
->nbytes
;
879 if (elem
->wcs
== NULL
&& seq
!= ILLEGAL_CHAR_VALUE
)
881 uint32_t wcs
[2] = { wc
, 0 };
883 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
889 /* Test whether this element is not already in the list. */
890 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
891 && elem
->next
== collate
->cursor
))
893 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%zu"),
894 (int) arg
->val
.str
.lenmb
, arg
->val
.str
.startmb
,
895 elem
->file
, elem
->line
);
896 lr_ignore_rest (ldfile
, 0);
900 insert_weights (ldfile
, elem
, charmap
, repertoire
, collate
, tok_none
);
907 handle_ellipsis (struct linereader
*ldfile
, struct token
*arg
,
908 enum token_t ellipsis
, struct charmap_t
*charmap
,
909 struct repertoire_t
*repertoire
,
910 struct locale_collate_t
*collate
)
912 struct element_t
*startp
;
913 struct element_t
*endp
;
915 /* Unlink the entry added for the ellipsis. */
916 unlink_element (collate
);
917 startp
= collate
->cursor
;
919 /* Process and add the end-entry. */
921 && insert_value (ldfile
, arg
, charmap
, repertoire
, collate
))
922 /* Something went wrong with inserting the to-value. This means
923 we cannot process the ellipsis. */
926 /* Reset the cursor. */
927 collate
->cursor
= startp
;
929 /* Now we have to handle many different situations:
930 - we have to distinguish between the three different ellipsis forms
931 - the is the ellipsis at the beginning, in the middle, or at the end.
933 endp
= collate
->cursor
->next
;
934 assert (arg
== NULL
|| endp
!= NULL
);
936 /* Both, the start and the end symbol, must stand for characters. */
937 if ((startp
== NULL
|| startp
->name
== NULL
)
938 || (endp
== NULL
|| endp
->name
== NULL
))
940 lr_error (ldfile
, _("\
941 %s: the start end the end symbol of a range must stand for characters"),
946 if (ellipsis
== tok_ellipsis3
)
948 /* One requirement we make here: the length of the byte
949 sequences for the first and end character must be the same.
950 This is mainly to prevent unwanted effects and this is often
951 not what is wanted. */
952 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
953 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
957 /* Well, this should be caught somewhere else already. Just to
959 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
960 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
962 if (startp
!= NULL
&& endp
!= NULL
963 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
964 && startp
->nmbs
!= endp
->nmbs
)
966 lr_error (ldfile
, _("\
967 %s: byte sequences of first and last character must have the same length"),
972 /* Determine whether we have to generate multibyte sequences. */
973 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
974 && (endp
== NULL
|| endp
->mbs
!= NULL
))
979 /* Prepare the beginning byte sequence. This is either from the
980 beginning byte sequence or it is all nulls if it was an
982 if (startp
== NULL
|| startp
->mbs
== NULL
)
983 memset (mbcnt
, '\0', len
);
986 memcpy (mbcnt
, startp
->mbs
, len
);
988 /* And increment it so that the value is the first one we will
990 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
991 if (++mbcnt
[cnt
] != '\0')
996 /* And the end sequence. */
997 if (endp
== NULL
|| endp
->mbs
== NULL
)
998 memset (mbend
, '\0', len
);
1000 memcpy (mbend
, endp
->mbs
, len
);
1003 /* Test whether we have a correct range. */
1004 ret
= memcmp (mbcnt
, mbend
, len
);
1008 lr_error (ldfile
, _("%s: byte sequence of first character of \
1009 sequence is not lower than that of the last character"), "LC_COLLATE");
1013 /* Generate the byte sequences data. */
1016 struct charseq
*seq
;
1018 /* Quite a bit of work ahead. We have to find the character
1019 definition for the byte sequence and then determine the
1020 wide character belonging to it. */
1021 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1024 struct element_t
*elem
;
1027 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1028 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1029 strlen (seq
->name
));
1031 /* I don't this this can ever happen. */
1032 assert (seq
->name
!= NULL
);
1033 namelen
= strlen (seq
->name
);
1035 /* Now we are ready to insert the new value in the
1036 sequence. Find out whether the element is
1038 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1039 (void **) &elem
) != 0)
1041 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1043 /* We have to allocate an entry. */
1044 elem
= new_element (collate
, mbcnt
, len
, wcs
, seq
->name
,
1047 /* And add it to the table. */
1048 if (insert_entry (&collate
->seq_table
, seq
->name
,
1049 namelen
, elem
) != 0)
1050 /* This cannot happen. */
1051 assert (! "Internal error");
1054 /* Test whether this element is not already in the list. */
1055 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1056 && elem
->next
== collate
->cursor
))
1058 lr_error (ldfile
, _("\
1059 order for `%.*s' already defined at %s:%zu"),
1060 (int) namelen
, seq
->name
,
1061 elem
->file
, elem
->line
);
1065 /* Enqueue the new element. */
1066 elem
->last
= collate
->cursor
;
1067 if (collate
->cursor
!= NULL
)
1071 elem
->next
= collate
->cursor
->next
;
1072 elem
->last
->next
= elem
;
1073 if (elem
->next
!= NULL
)
1074 elem
->next
->last
= elem
;
1076 if (collate
->start
== NULL
)
1078 assert (collate
->cursor
== NULL
);
1079 collate
->start
= elem
;
1081 collate
->cursor
= elem
;
1083 /* Add the weight value. We take them from the
1084 `ellipsis_weights' member of `collate'. */
1085 elem
->weights
= (struct element_list_t
*)
1086 obstack_alloc (&collate
->mempool
,
1087 nrules
* sizeof (struct element_list_t
));
1088 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1089 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1090 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1091 == ELEMENT_ELLIPSIS2
))
1093 elem
->weights
[cnt
].w
= (struct element_t
**)
1094 obstack_alloc (&collate
->mempool
,
1095 sizeof (struct element_t
*));
1096 elem
->weights
[cnt
].w
[0] = elem
;
1097 elem
->weights
[cnt
].cnt
= 1;
1101 /* Simly use the weight from `ellipsis_weight'. */
1102 elem
->weights
[cnt
].w
=
1103 collate
->ellipsis_weight
.weights
[cnt
].w
;
1104 elem
->weights
[cnt
].cnt
=
1105 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1109 /* Increment for the next round. */
1111 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1112 if (++mbcnt
[cnt
] != '\0')
1115 /* Find out whether this was all. */
1116 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1117 /* Yep, that's all. */
1124 /* For symbolic range we naturally must have a beginning and an
1125 end specified by the user. */
1127 lr_error (ldfile
, _("\
1128 %s: symbolic range ellipsis must not directly follow `order_start'"),
1130 else if (endp
== NULL
)
1131 lr_error (ldfile
, _("\
1132 %s: symbolic range ellipsis must not be direct followed by `order_end'"),
1136 /* Determine the range. To do so we have to determine the
1137 common prefix of the both names and then the numeric
1138 values of both ends. */
1139 size_t lenfrom
= strlen (startp
->name
);
1140 size_t lento
= strlen (endp
->name
);
1141 char buf
[lento
+ 1];
1146 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1148 if (lenfrom
!= lento
)
1151 lr_error (ldfile
, _("\
1152 `%s' and `%.*s' are no valid names for symbolic range"),
1153 startp
->name
, (int) lento
, endp
->name
);
1157 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1158 if (startp
->name
[preflen
] == '\0')
1159 /* Nothing to be done. The start and end point are identical
1160 and while inserting the end point we have already given
1161 the user an error message. */
1167 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1168 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1172 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1173 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1176 /* Copy the prefix. */
1177 memcpy (buf
, startp
->name
, preflen
);
1179 /* Loop over all values. */
1180 for (++from
; from
< to
; ++from
)
1182 struct element_t
*elem
= NULL
;
1183 struct charseq
*seq
;
1187 /* Generate the the name. */
1188 sprintf (buf
+ preflen
, base
== 10 ? "%d" : "%x", from
);
1190 /* Look whether this name is already defined. */
1191 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
1192 arg
->val
.str
.lenmb
, (void **) &elem
) == 0)
1194 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1195 && elem
->next
== collate
->cursor
))
1197 lr_error (ldfile
, _("\
1198 %s: order for `%.*s' already defined at %s:%zu"),
1199 "LC_COLLATE", (int) lenfrom
, buf
,
1200 elem
->file
, elem
->line
);
1204 if (elem
->name
== NULL
)
1206 lr_error (ldfile
, _("%s: `%s' must be a charater"),
1212 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1214 /* Search for a character of this name. */
1215 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1216 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1218 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1226 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1227 /* We don't know anything about a character with this
1228 name. XXX Should we warn? */
1233 uint32_t wcs
[2] = { wc
, 0 };
1235 /* We have to allocate an entry. */
1236 elem
= new_element (collate
,
1237 seq
!= NULL
? seq
->bytes
: NULL
,
1238 seq
!= NULL
? seq
->nbytes
: 0,
1239 wc
== ILLEGAL_CHAR_VALUE
1245 /* Update the element. */
1248 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1249 seq
->bytes
, seq
->nbytes
);
1250 elem
->nmbs
= seq
->nbytes
;
1253 if (wc
!= ILLEGAL_CHAR_VALUE
)
1257 obstack_grow (&collate
->mempool
,
1258 &wc
, sizeof (uint32_t));
1259 obstack_grow (&collate
->mempool
,
1260 &zero
, sizeof (uint32_t));
1261 elem
->wcs
= obstack_finish (&collate
->mempool
);
1266 elem
->file
= ldfile
->fname
;
1267 elem
->line
= ldfile
->lineno
;
1268 elem
->section
= collate
->current_section
;
1271 /* Enqueue the new element. */
1272 elem
->last
= collate
->cursor
;
1273 elem
->next
= collate
->cursor
->next
;
1274 elem
->last
->next
= elem
;
1275 if (elem
->next
!= NULL
)
1276 elem
->next
->last
= elem
;
1277 collate
->cursor
= elem
;
1279 /* Now add the weights. They come from the `ellipsis_weights'
1280 member of `collate'. */
1281 elem
->weights
= (struct element_list_t
*)
1282 obstack_alloc (&collate
->mempool
,
1283 nrules
* sizeof (struct element_list_t
));
1284 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1285 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1286 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1287 == ELEMENT_ELLIPSIS2
))
1289 elem
->weights
[cnt
].w
= (struct element_t
**)
1290 obstack_alloc (&collate
->mempool
,
1291 sizeof (struct element_t
*));
1292 elem
->weights
[cnt
].w
[0] = elem
;
1293 elem
->weights
[cnt
].cnt
= 1;
1297 /* Simly use the weight from `ellipsis_weight'. */
1298 elem
->weights
[cnt
].w
=
1299 collate
->ellipsis_weight
.weights
[cnt
].w
;
1300 elem
->weights
[cnt
].cnt
=
1301 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1310 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1311 struct localedef_t
*copy_locale
, int ignore_content
)
1313 if (!ignore_content
)
1315 struct locale_collate_t
*collate
;
1317 if (copy_locale
== NULL
)
1319 collate
= locale
->categories
[LC_COLLATE
].collate
=
1320 (struct locale_collate_t
*)
1321 xcalloc (1, sizeof (struct locale_collate_t
));
1323 /* Init the various data structures. */
1324 init_hash (&collate
->elem_table
, 100);
1325 init_hash (&collate
->sym_table
, 100);
1326 init_hash (&collate
->seq_table
, 500);
1327 obstack_init (&collate
->mempool
);
1329 collate
->col_weight_max
= -1;
1332 collate
= locale
->categories
[LC_COLLATE
].collate
=
1333 copy_locale
->categories
[LC_COLLATE
].collate
;
1336 ldfile
->translate_strings
= 0;
1337 ldfile
->return_widestr
= 0;
1342 collate_finish (struct localedef_t
*locale
, struct charmap_t
*charmap
)
1344 /* Now is the time when we can assign the individual collation
1345 values for all the symbols. We have possibly different values
1346 for the wide- and the multibyte-character symbols. This is done
1347 since it might make a difference in the encoding if there is in
1348 some cases no multibyte-character but there are wide-characters.
1349 (The other way around it is not important since theencoded
1350 collation value in the wide-character case is 32 bits wide and
1351 therefore requires no encoding).
1353 The lowest collation value assigned is 2. Zero is reserved for
1354 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1355 functions and 1 is used to separate the individual passes for the
1358 We also have to construct is list with all the bytes/words which
1359 can come first in a sequence, followed by all the elements which
1360 also start with this byte/word. The order is reverse which has
1361 among others the important effect that longer strings are located
1362 first in the list. This is required for the output data since
1363 the algorithm used in `strcoll' etc depends on this.
1365 The multibyte case is easy. We simply sort into an array with
1367 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1370 struct element_t
*runp
;
1372 int need_undefined
= 0;
1373 struct section_list
*sect
;
1376 /* If this assertion is hit change the type in `element_t'. */
1377 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1379 /* Find out which elements are used at which level. At the same
1380 time we find out whether we have any undefined symbols. */
1381 runp
= collate
->start
;
1382 while (runp
!= NULL
)
1384 if (runp
->mbs
!= NULL
)
1386 for (i
= 0; i
< nrules
; ++i
)
1390 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1391 /* A NULL pointer as the weight means IGNORE. */
1392 if (runp
->weights
[i
].w
[j
] != NULL
)
1394 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1396 error_at_line (0, 0, runp
->file
, runp
->line
,
1397 _("symbol `%s' not defined"),
1398 runp
->weights
[i
].w
[j
]->name
);
1401 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1404 /* Set the bit for the level. */
1405 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1410 /* Up to the next entry. */
1414 /* Walk through the list of defined sequences and assign weights. Also
1415 create the data structure which will allow generating the single byte
1416 character based tables.
1418 Since at each time only the weights for each of the rules are
1419 only compared to other weights for this rule it is possible to
1420 assign more compact weight values than simply counting all
1421 weights in sequence. We can assign weights from 3, one for each
1422 rule individually and only for those elements, which are actually
1425 Why is this important? It is not for the wide char table. But
1426 it is for the singlebyte output since here larger numbers have to
1427 be encoded to make it possible to emit the value as a byte
1429 for (i
= 0; i
< nrules
; ++i
)
1432 runp
= collate
->start
;
1433 while (runp
!= NULL
)
1435 /* Determine the order. */
1436 if (runp
->used_in_level
!= 0)
1438 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1439 nrules
* sizeof (int));
1441 for (i
= 0; i
< nrules
; ++i
)
1442 if ((runp
->used_in_level
& (1 << i
)) != 0)
1443 runp
->mborder
[i
] = mbact
[i
]++;
1445 runp
->mborder
[i
] = 0;
1448 if (runp
->mbs
!= NULL
)
1450 struct element_t
**eptr
;
1452 /* Find the point where to insert in the list. */
1453 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1454 while (*eptr
!= NULL
)
1456 if ((*eptr
)->nmbs
< runp
->nmbs
)
1459 if ((*eptr
)->nmbs
== runp
->nmbs
)
1461 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1465 /* This should not happen. It means that we have
1466 to symbols with the same byte sequence. It is
1467 of course an error. */
1468 error_at_line (0, 0, (*eptr
)->file
, (*eptr
)->line
,
1469 _("symbol `%s' has same encoding as"),
1471 error_at_line (0, 0, runp
->file
, runp
->line
,
1472 _("symbol `%s'"), runp
->name
);
1476 /* Insert it here. */
1480 /* To the next entry. */
1481 eptr
= &(*eptr
)->mbnext
;
1484 /* Set the pointers. */
1485 runp
->mbnext
= *eptr
;
1490 if (runp
->wcs
!= NULL
)
1491 runp
->wcorder
= wcact
++;
1493 /* Up to the next entry. */
1497 /* Find out whether any of the `mbheads' entries is unset. In this
1498 case we use the UNDEFINED entry. */
1499 for (i
= 1; i
< 256; ++i
)
1500 if (collate
->mbheads
[i
] == NULL
)
1503 collate
->mbheads
[i
] = &collate
->undefined
;
1506 /* Now determine whether the UNDEFINED entry is needed and if yes,
1507 whether it was defined. */
1508 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1509 if (need_undefined
&& collate
->undefined
.file
== NULL
)
1511 error (0, 0, _("no definition of `UNDEFINED'"));
1513 /* Add UNDEFINED at the end. */
1514 collate
->undefined
.mborder
=
1515 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1517 for (i
= 0; i
< nrules
; ++i
)
1518 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1520 collate
->undefined
.wcorder
= wcact
++;
1523 /* Finally, try to unify the rules for the sections. Whenever the rules
1524 for a section are the same as those for another section give the
1525 ruleset the same index. Since there are never many section we can
1526 use an O(n^2) algorithm here. */
1527 sect
= collate
->sections
;
1528 assert (sect
!= NULL
);
1532 struct section_list
*osect
= collate
->sections
;
1534 while (osect
!= sect
)
1535 if (memcmp (osect
->rules
, sect
->rules
, nrules
) == 0)
1538 osect
= osect
->next
;
1541 sect
->ruleidx
= ruleidx
++;
1543 sect
->ruleidx
= osect
->ruleidx
;
1548 while (sect
!= NULL
);
1549 /* We are currently not prepared for more than 256 rulesets. But this
1550 should never really be a problem. */
1551 assert (ruleidx
<= 256);
1555 static inline int32_t
1556 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1557 struct element_t
*elem
)
1562 /* Optimize the use of UNDEFINED. */
1563 if (elem
== &collate
->undefined
)
1564 /* The weights are already inserted. */
1567 /* This byte can start exactly one collation element and this is
1568 a single byte. We can directly give the index to the weights. */
1569 retval
= obstack_object_size (pool
);
1571 /* Construct the weight. */
1572 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1574 char buf
[elem
->weights
[cnt
].cnt
* 7];
1578 /* Add the direction. */
1579 obstack_1grow (pool
, elem
->section
->rules
[cnt
]);
1581 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1582 /* Encode the weight value. */
1583 if (elem
->weights
[cnt
].w
[i
] == NULL
)
1585 /* This entry was IGNORE. */
1589 len
+= utf8_encode (&buf
[len
],
1590 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1592 /* And add the buffer content. */
1593 obstack_grow (pool
, buf
, len
);
1601 collate_output (struct localedef_t
*locale
, struct charmap_t
*charmap
,
1602 const char *output_path
)
1604 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1605 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
1606 struct iovec iov
[2 + nelems
];
1607 struct locale_file data
;
1608 uint32_t idx
[nelems
];
1611 int32_t tablemb
[256];
1612 struct obstack weightpool
;
1613 struct obstack extrapool
;
1614 struct section_list
*sect
;
1617 obstack_init (&weightpool
);
1618 obstack_init (&extrapool
);
1620 data
.magic
= LIMAGIC (LC_COLLATE
);
1622 iov
[0].iov_base
= (void *) &data
;
1623 iov
[0].iov_len
= sizeof (data
);
1625 iov
[1].iov_base
= (void *) idx
;
1626 iov
[1].iov_len
= sizeof (idx
);
1628 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1631 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_NRULES
));
1632 iov
[2 + cnt
].iov_base
= &collate
->nrules
;
1633 iov
[2 + cnt
].iov_len
= sizeof (uint32_t);
1634 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1637 /* Prepare the ruleset table. */
1638 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
1639 if (sect
->ruleidx
== i
)
1641 obstack_grow (&weightpool
, sect
->rules
, nrules
);
1644 /* And align the output. */
1645 i
= (nrules
* i
) % __alignof__ (int32_t);
1648 obstack_1grow (&weightpool
, '\0');
1649 while (++i
< __alignof__ (int32_t));
1651 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_RULESETS
));
1652 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
1653 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
1654 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1657 /* Generate the 8-bit table. Walk through the lists of sequences
1658 starting with the same byte and add them one after the other to
1659 the table. In case we have more than one sequence starting with
1660 the same byte we have to use extra indirection.
1662 First add a record for the NUL byte. This entry will never be used
1663 so it does not matter. */
1666 /* Now insert the `UNDEFINED' value if it is used. Since this value
1667 will probably be used more than once it is good to store the
1668 weights only once. */
1669 if (collate
->undefined
.used_in_level
!= 0)
1670 output_weight (&weightpool
, collate
, &collate
->undefined
);
1672 for (ch
= 1; ch
< 256; ++ch
)
1673 if (collate
->mbheads
[ch
]->mbnext
== NULL
1674 && collate
->mbheads
[ch
]->nmbs
== 1)
1676 tablemb
[ch
] = output_weight (&weightpool
, collate
,
1677 collate
->mbheads
[ch
]);
1681 /* The entries in the list are sorted by length and then
1682 alphabetically. This is the order in which we will add the
1683 elements to the collation table. This allows to simply
1684 walk the table in sequence and stop at the first matching
1685 entry. Since the longer sequences are coming first in the
1686 list they have the possibility to match first, just as it
1687 has to be. In the worst case we are walking to the end of
1688 the list where we put, if no singlebyte sequence is defined
1689 in the locale definition, the weights for UNDEFINED.
1691 To reduce the length of the search list we compress them a bit.
1692 This happens by collecting sequences of consecutive byte
1693 sequences in one entry (having and begin and end byte sequence)
1694 and add only one index into the weight table. We can find the
1695 consecutive entries since they are also consecutive in the list. */
1696 struct element_t
*runp
= collate
->mbheads
[ch
];
1697 struct element_t
*lastp
;
1699 tablemb
[ch
] = -obstack_object_size (&extrapool
);
1703 /* Store the current index in the weight table. We know that
1704 the current position in the `extrapool' is aligned on a
1709 /* Output the weight info. */
1710 weightidx
= output_weight (&weightpool
, collate
, runp
);
1712 /* Find out wether this is a single entry or we have more than
1713 one consecutive entry. */
1714 if (runp
->mbnext
!= NULL
1715 && runp
->nmbs
== runp
->mbnext
->nmbs
1716 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
1717 && (runp
->mbs
[runp
->nmbs
- 1] + 1
1718 == runp
->mbnext
->mbs
[runp
->nmbs
- 1]))
1722 /* More than one consecutive entry. We mark this by having
1723 a negative index into the weight table. */
1724 weightidx
= -weightidx
;
1726 /* Now add first the initial byte sequence. */
1727 added
= ((sizeof (int32_t) + 1 + 1 + 2 * (runp
->nmbs
- 1)
1728 + __alignof__ (int32_t) - 1)
1729 & ~(__alignof__ (int32_t) - 1));
1730 obstack_make_room (&extrapool
, added
);
1732 if (sizeof (int32_t) == sizeof (int))
1733 obstack_int_grow_fast (&extrapool
, weightidx
);
1735 obstack_grow (&extrapool
, &weightidx
, sizeof (int32_t));
1736 obstack_1grow_fast (&extrapool
, runp
->section
->ruleidx
);
1737 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
1738 for (i
= 1; i
< runp
->nmbs
; ++i
)
1739 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
1741 /* Now find the end of the consecutive sequence. */
1744 while (runp
->mbnext
!= NULL
1745 && runp
->nmbs
== runp
->mbnext
->nmbs
1746 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
1747 runp
->nmbs
- 1) == 0
1748 && (runp
->mbs
[runp
->nmbs
- 1] + 1
1749 == runp
->mbnext
->mbs
[runp
->nmbs
- 1]));
1751 /* And add the end by sequence. Without length this time. */
1752 for (i
= 1; i
< runp
->nmbs
; ++i
)
1753 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
1757 /* A single entry. Simply add the index and the length and
1758 string (except for the first character which is already
1762 added
= ((sizeof (int32_t) + 1 + 1 + runp
->nmbs
- 1
1763 + __alignof__ (int32_t) - 1)
1764 & ~(__alignof__ (int32_t) - 1));
1765 obstack_make_room (&extrapool
, added
);
1767 if (sizeof (int32_t) == sizeof (int))
1768 obstack_int_grow_fast (&extrapool
, weightidx
);
1770 obstack_grow (&extrapool
, &weightidx
, sizeof (int32_t));
1771 obstack_1grow_fast (&extrapool
, runp
->section
->ruleidx
);
1772 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
1773 for (i
= 1; i
< runp
->nmbs
; ++i
)
1774 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
1777 /* Add alignment bytes if necessary. */
1778 i
= added
% __alignof__ (int32_t);
1781 obstack_1grow_fast (&extrapool
, '\0');
1782 while (++i
!= __alignof__ (int32_t));
1786 runp
= runp
->mbnext
;
1788 while (runp
!= NULL
);
1790 /* If the final entry in the list is not a single character we
1791 add an UNDEFINED entry here. */
1792 if (lastp
->nmbs
!= 1)
1794 int added
= ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t))
1795 & ~(__alignof__ (int32_t) - 1));
1796 obstack_make_room (&extrapool
, added
);
1798 if (sizeof (int32_t) == sizeof (int))
1799 obstack_int_grow_fast (&extrapool
, 0);
1803 obstack_grow (&extrapool
, &zero
, sizeof (int32_t));
1805 /* XXX What rule? We just pick the first. */
1806 obstack_1grow_fast (&extrapool
, 0);
1807 /* Length is zero. */
1808 obstack_1grow_fast (&extrapool
, 0);
1810 /* Add alignment bytes if necessary. */
1811 i
= added
% __alignof__ (int32_t);
1814 obstack_1grow_fast (&extrapool
, '\0');
1815 while (++i
!= __alignof__ (int32_t));
1819 /* Now add the three tables. */
1820 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB
));
1821 iov
[2 + cnt
].iov_base
= tablemb
;
1822 iov
[2 + cnt
].iov_len
= sizeof (tablemb
);
1823 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1826 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB
));
1827 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
1828 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
1829 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1832 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB
));
1833 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
1834 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
1835 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1839 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
1841 write_locale_data (output_path
, "LC_COLLATE", 2 + cnt
, iov
);
1846 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
1847 struct charmap_t
*charmap
, const char *repertoire_name
,
1850 struct repertoire_t
*repertoire
= NULL
;
1851 struct locale_collate_t
*collate
;
1853 struct token
*arg
= NULL
;
1854 enum token_t nowtok
;
1856 enum token_t was_ellipsis
= tok_none
;
1857 struct localedef_t
*copy_locale
= NULL
;
1859 /* Get the repertoire we have to use. */
1860 if (repertoire_name
!= NULL
)
1861 repertoire
= repertoire_read (repertoire_name
);
1863 /* The rest of the line containing `LC_COLLATE' must be free. */
1864 lr_ignore_rest (ldfile
, 1);
1868 now
= lr_token (ldfile
, charmap
, NULL
);
1871 while (nowtok
== tok_eol
);
1873 if (nowtok
== tok_copy
)
1876 now
= lr_token (ldfile
, charmap
, NULL
);
1877 if (now
->tok
!= tok_string
)
1879 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1883 now
= lr_token (ldfile
, charmap
, NULL
);
1884 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
1886 if (now
->tok
!= tok_eof
1887 || (now
= lr_token (ldfile
, charmap
, NULL
), now
->tok
== tok_eof
))
1888 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
1889 else if (now
->tok
!= tok_lc_collate
)
1891 lr_error (ldfile
, _("\
1892 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1893 lr_ignore_rest (ldfile
, 0);
1896 lr_ignore_rest (ldfile
, 1);
1901 /* Get the locale definition. */
1902 copy_locale
= find_locale (LC_COLLATE
, now
->val
.str
.startmb
,
1903 repertoire_name
, charmap
);
1904 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
1906 /* Not yet loaded. So do it now. */
1907 if (locfile_read (copy_locale
, charmap
) != 0)
1911 lr_ignore_rest (ldfile
, 1);
1913 now
= lr_token (ldfile
, charmap
, NULL
);
1917 /* Prepare the data structures. */
1918 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
1919 collate
= result
->categories
[LC_COLLATE
].collate
;
1923 /* Of course we don't proceed beyond the end of file. */
1924 if (nowtok
== tok_eof
)
1927 /* Ingore empty lines. */
1928 if (nowtok
== tok_eol
)
1930 now
= lr_token (ldfile
, charmap
, NULL
);
1937 case tok_coll_weight_max
:
1938 /* Ignore the rest of the line if we don't need the input of
1942 lr_ignore_rest (ldfile
, 0);
1949 arg
= lr_token (ldfile
, charmap
, NULL
);
1950 if (arg
->tok
!= tok_number
)
1952 if (collate
->col_weight_max
!= -1)
1953 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
1954 "LC_COLLATE", "col_weight_max");
1956 collate
->col_weight_max
= arg
->val
.num
;
1957 lr_ignore_rest (ldfile
, 1);
1960 case tok_section_symbol
:
1961 /* Ignore the rest of the line if we don't need the input of
1965 lr_ignore_rest (ldfile
, 0);
1972 arg
= lr_token (ldfile
, charmap
, repertoire
);
1973 if (arg
->tok
!= tok_bsymbol
)
1975 else if (!ignore_content
)
1977 /* Check whether this section is already known. */
1978 struct section_list
*known
= collate
->sections
;
1979 while (known
!= NULL
)
1980 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
1986 _("%s: duplicate declaration of section `%s'"),
1987 "LC_COLLATE", arg
->val
.str
.startmb
);
1988 free (arg
->val
.str
.startmb
);
1991 collate
->sections
= make_seclist_elem (collate
,
1992 arg
->val
.str
.startmb
,
1995 lr_ignore_rest (ldfile
, known
== NULL
);
1999 free (arg
->val
.str
.startmb
);
2000 lr_ignore_rest (ldfile
, 0);
2004 case tok_collating_element
:
2005 /* Ignore the rest of the line if we don't need the input of
2009 lr_ignore_rest (ldfile
, 0);
2016 arg
= lr_token (ldfile
, charmap
, repertoire
);
2017 if (arg
->tok
!= tok_bsymbol
)
2021 const char *symbol
= arg
->val
.str
.startmb
;
2022 size_t symbol_len
= arg
->val
.str
.lenmb
;
2024 /* Next the `from' keyword. */
2025 arg
= lr_token (ldfile
, charmap
, repertoire
);
2026 if (arg
->tok
!= tok_from
)
2028 free ((char *) symbol
);
2032 ldfile
->return_widestr
= 1;
2034 /* Finally the string with the replacement. */
2035 arg
= lr_token (ldfile
, charmap
, repertoire
);
2036 ldfile
->return_widestr
= 0;
2037 if (arg
->tok
!= tok_string
)
2040 if (!ignore_content
)
2043 lr_error (ldfile
, _("\
2044 %s: unknown character in collating element name"),
2046 if (arg
->val
.str
.startmb
== NULL
)
2047 lr_error (ldfile
, _("\
2048 %s: unknown character in collating element definition"),
2050 if (arg
->val
.str
.startwc
== NULL
)
2051 lr_error (ldfile
, _("\
2052 %s: unknown wide character in collating element definition"),
2054 else if (arg
->val
.str
.lenwc
< 2)
2055 lr_error (ldfile
, _("\
2056 %s: substitution string in collating element definition must have at least two characters"),
2061 /* The name is already defined. */
2062 if (check_duplicate (ldfile
, collate
, charmap
,
2063 repertoire
, symbol
, symbol_len
))
2066 if (insert_entry (&collate
->elem_table
,
2068 new_element (collate
,
2069 NULL
, 0, NULL
, symbol
,
2071 lr_error (ldfile
, _("\
2072 error while adding collating element"));
2081 free ((char *) symbol
);
2082 if (arg
->val
.str
.startmb
!= NULL
)
2083 free (arg
->val
.str
.startmb
);
2084 if (arg
->val
.str
.startwc
!= NULL
)
2085 free (arg
->val
.str
.startwc
);
2087 lr_ignore_rest (ldfile
, 1);
2091 case tok_collating_symbol
:
2092 /* Ignore the rest of the line if we don't need the input of
2096 lr_ignore_rest (ldfile
, 0);
2103 arg
= lr_token (ldfile
, charmap
, repertoire
);
2104 if (arg
->tok
!= tok_bsymbol
)
2108 const char *symbol
= arg
->val
.str
.startmb
;
2109 size_t symbol_len
= arg
->val
.str
.lenmb
;
2111 if (!ignore_content
)
2114 lr_error (ldfile
, _("\
2115 %s: unknown character in collating symbol name"),
2119 /* The name is already defined. */
2120 if (check_duplicate (ldfile
, collate
, charmap
,
2121 repertoire
, symbol
, symbol_len
))
2124 if (insert_entry (&collate
->sym_table
,
2126 new_symbol (collate
)) < 0)
2127 lr_error (ldfile
, _("\
2128 error while adding collating symbol"));
2135 free ((char *) symbol
);
2137 lr_ignore_rest (ldfile
, 1);
2141 case tok_symbol_equivalence
:
2142 /* Ignore the rest of the line if we don't need the input of
2146 lr_ignore_rest (ldfile
, 0);
2153 arg
= lr_token (ldfile
, charmap
, repertoire
);
2154 if (arg
->tok
!= tok_bsymbol
)
2158 const char *newname
= arg
->val
.str
.startmb
;
2159 size_t newname_len
= arg
->val
.str
.lenmb
;
2160 const char *symname
;
2162 struct symbol_t
*symval
;
2164 arg
= lr_token (ldfile
, charmap
, repertoire
);
2165 if (arg
->tok
!= tok_bsymbol
)
2167 if (newname
!= NULL
)
2168 free ((char *) newname
);
2172 symname
= arg
->val
.str
.startmb
;
2173 symname_len
= arg
->val
.str
.lenmb
;
2175 if (!ignore_content
)
2177 if (newname
== NULL
)
2179 lr_error (ldfile
, _("\
2180 %s: unknown character in equivalent definition name"),
2182 goto sym_equiv_free
;
2184 if (symname
== NULL
)
2186 lr_error (ldfile
, _("\
2187 %s: unknown character in equivalent definition value"),
2189 goto sym_equiv_free
;
2191 /* The name is already defined. */
2192 if (check_duplicate (ldfile
, collate
, charmap
,
2193 repertoire
, symname
, symname_len
))
2196 /* See whether the symbol name is already defined. */
2197 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
2198 (void **) &symval
) != 0)
2200 lr_error (ldfile
, _("\
2201 %s: unknown symbol `%s' in equivalent definition"),
2202 "LC_COLLATE", symname
);
2206 if (insert_entry (&collate
->sym_table
,
2207 newname
, newname_len
, symval
) < 0)
2209 lr_error (ldfile
, _("\
2210 error while adding equivalent collating symbol"));
2211 goto sym_equiv_free
;
2214 free ((char *) symname
);
2219 if (newname
!= NULL
)
2220 free ((char *) newname
);
2221 if (symname
!= NULL
)
2222 free ((char *) symname
);
2224 lr_ignore_rest (ldfile
, 1);
2228 case tok_order_start
:
2229 /* Ignore the rest of the line if we don't need the input of
2233 lr_ignore_rest (ldfile
, 0);
2237 if (state
!= 0 && state
!= 1)
2241 /* The 14652 draft does not specify whether all `order_start' lines
2242 must contain the same number of sort-rules, but 14651 does. So
2243 we require this here as well. */
2244 arg
= lr_token (ldfile
, charmap
, repertoire
);
2245 if (arg
->tok
== tok_bsymbol
)
2247 /* This better should be a section name. */
2248 struct section_list
*sp
= collate
->sections
;
2250 && strcmp (sp
->name
, arg
->val
.str
.startmb
) != 0)
2255 lr_error (ldfile
, _("\
2256 %s: unknown section name `%s'"),
2257 "LC_COLLATE", arg
->val
.str
.startmb
);
2258 /* We use the error section. */
2259 collate
->current_section
= &collate
->error_section
;
2261 if (collate
->error_section
.first
== NULL
)
2263 collate
->error_section
.next
= collate
->sections
;
2264 collate
->sections
= &collate
->error_section
;
2269 /* Remember this section. */
2270 collate
->current_section
= sp
;
2272 /* One should not be allowed to open the same
2274 if (sp
->first
!= NULL
)
2275 lr_error (ldfile
, _("\
2276 %s: multiple order definitions for section `%s'"),
2277 "LC_COLLATE", sp
->name
);
2280 sp
->next
= collate
->sections
;
2281 collate
->sections
= sp
;
2284 /* Next should come the end of the line or a semicolon. */
2285 arg
= lr_token (ldfile
, charmap
, repertoire
);
2286 if (arg
->tok
== tok_eol
)
2290 /* This means we have exactly one rule: `forward'. */
2291 if (collate
->nrules
> 1)
2292 lr_error (ldfile
, _("\
2293 %s: invalid number of sorting rules"),
2296 collate
->nrules
= 1;
2297 sp
->rules
= obstack_alloc (&collate
->mempool
,
2298 (sizeof (enum coll_sort_rule
)
2299 * collate
->nrules
));
2300 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
2301 sp
->rules
[cnt
] = sort_forward
;
2307 /* Get the next token. */
2308 arg
= lr_token (ldfile
, charmap
, repertoire
);
2313 /* There is no section symbol. Therefore we use the unnamed
2315 collate
->current_section
= &collate
->unnamed_section
;
2317 if (collate
->unnamed_section
.first
!= NULL
)
2318 lr_error (ldfile
, _("\
2319 %s: multiple order definitions for unnamed section"),
2323 collate
->unnamed_section
.next
= collate
->sections
;
2324 collate
->sections
= &collate
->unnamed_section
;
2328 /* Now read the direction names. */
2329 read_directions (ldfile
, arg
, charmap
, repertoire
, collate
);
2331 /* From now be need the strings untranslated. */
2332 ldfile
->translate_strings
= 0;
2336 /* Ignore the rest of the line if we don't need the input of
2340 lr_ignore_rest (ldfile
, 0);
2347 /* Handle ellipsis at end of list. */
2348 if (was_ellipsis
!= tok_none
)
2350 handle_ellipsis (ldfile
, NULL
, was_ellipsis
, charmap
, repertoire
,
2352 was_ellipsis
= tok_none
;
2356 lr_ignore_rest (ldfile
, 1);
2359 case tok_reorder_after
:
2360 /* Ignore the rest of the line if we don't need the input of
2364 lr_ignore_rest (ldfile
, 0);
2370 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
2374 /* Handle ellipsis at end of list. */
2375 if (was_ellipsis
!= tok_none
)
2377 handle_ellipsis (ldfile
, arg
, was_ellipsis
, charmap
,
2378 repertoire
, collate
);
2379 was_ellipsis
= tok_none
;
2382 else if (state
!= 2 && state
!= 3)
2386 arg
= lr_token (ldfile
, charmap
, repertoire
);
2387 if (arg
->tok
== tok_bsymbol
)
2389 /* Find this symbol in the sequence table. */
2390 struct element_t
*insp
;
2393 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
2394 arg
->val
.str
.lenmb
, (void **) &insp
) == 0)
2395 /* Yes, the symbol exists. Simply point the cursor
2397 collate
->cursor
= insp
;
2400 /* This is bad. The symbol after which we have to
2401 insert does not exist. */
2402 lr_error (ldfile
, _("\
2403 %s: cannot reorder after %.*s: symbol not known"),
2404 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
2405 arg
->val
.str
.startmb
);
2406 collate
->cursor
= NULL
;
2410 lr_ignore_rest (ldfile
, no_error
);
2413 /* This must not happen. */
2417 case tok_reorder_end
:
2418 /* Ignore the rest of the line if we don't need the input of
2426 lr_ignore_rest (ldfile
, 1);
2429 case tok_reorder_sections_after
:
2430 /* Ignore the rest of the line if we don't need the input of
2434 lr_ignore_rest (ldfile
, 0);
2440 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
2444 /* Handle ellipsis at end of list. */
2445 if (was_ellipsis
!= tok_none
)
2447 handle_ellipsis (ldfile
, NULL
, was_ellipsis
, charmap
,
2448 repertoire
, collate
);
2449 was_ellipsis
= tok_none
;
2452 else if (state
== 3)
2454 error (0, 0, _("%s: missing `reorder-end' keyword"),
2458 else if (state
!= 2 && state
!= 4)
2462 /* Get the name of the sections we are adding after. */
2463 arg
= lr_token (ldfile
, charmap
, repertoire
);
2464 if (arg
->tok
== tok_bsymbol
)
2466 /* Now find a section with this name. */
2467 struct section_list
*runp
= collate
->sections
;
2469 while (runp
!= NULL
)
2471 if (runp
->name
!= NULL
2472 && strlen (runp
->name
) == arg
->val
.str
.lenmb
2473 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
2474 arg
->val
.str
.lenmb
) == 0)
2481 collate
->current_section
= runp
;
2484 /* This is bad. The section after which we have to
2485 reorder does not exist. Therefore we cannot
2486 process the whole rest of this reorder
2488 lr_error (ldfile
, _("%s: section `%.*s' not known"),
2489 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
2490 arg
->val
.str
.startmb
);
2494 lr_ignore_rest (ldfile
, 0);
2496 now
= lr_token (ldfile
, charmap
, NULL
);
2498 while (now
->tok
== tok_reorder_sections_after
2499 || now
->tok
== tok_reorder_sections_end
2500 || now
->tok
== tok_end
);
2502 /* Process the token we just saw. */
2508 /* This must not happen. */
2512 case tok_reorder_sections_end
:
2513 /* Ignore the rest of the line if we don't need the input of
2521 lr_ignore_rest (ldfile
, 1);
2525 /* Ignore the rest of the line if we don't need the input of
2529 lr_ignore_rest (ldfile
, 0);
2533 if (state
!= 1 && state
!= 3)
2538 /* It is possible that we already have this collation sequence.
2539 In this case we move the entry. */
2540 struct element_t
*seqp
;
2542 /* If the symbol after which we have to insert was not found
2543 ignore all entries. */
2544 if (collate
->cursor
== NULL
)
2546 lr_ignore_rest (ldfile
, 0);
2550 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
2551 arg
->val
.str
.lenmb
, (void **) &seqp
) == 0)
2553 /* Remove the entry from the old position. */
2554 if (seqp
->last
== NULL
)
2555 collate
->start
= seqp
->next
;
2557 seqp
->last
->next
= seqp
->next
;
2558 if (seqp
->next
!= NULL
)
2559 seqp
->next
->last
= seqp
->last
;
2561 /* We also have to check whether this entry is the
2562 first or last of a section. */
2563 if (seqp
->section
->first
== seqp
)
2565 if (seqp
->section
->first
== seqp
->section
->last
)
2566 /* This setion has no content anymore. */
2567 seqp
->section
->first
= seqp
->section
->last
= NULL
;
2569 seqp
->section
->first
= seqp
->next
;
2571 else if (seqp
->section
->last
== seqp
)
2572 seqp
->section
->last
= seqp
->last
;
2574 /* Now insert it in the new place. */
2575 seqp
->next
= collate
->cursor
->next
;
2576 seqp
->last
= collate
->cursor
;
2577 collate
->cursor
->next
= seqp
;
2578 if (seqp
->next
!= NULL
)
2579 seqp
->next
->last
= seqp
;
2581 seqp
->section
= collate
->cursor
->section
;
2582 if (seqp
->section
->last
== collate
->cursor
)
2583 seqp
->section
->last
= seqp
;
2588 /* Otherwise we just add a new entry. */
2590 else if (state
== 5)
2592 /* We are reordering sections. Find the named section. */
2593 struct section_list
*runp
= collate
->sections
;
2594 struct section_list
*prevp
= NULL
;
2596 while (runp
!= NULL
)
2598 if (runp
->name
!= NULL
2599 && strlen (runp
->name
) == arg
->val
.str
.lenmb
2600 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
2601 arg
->val
.str
.lenmb
) == 0)
2610 lr_error (ldfile
, _("%s: section `%.*s' not known"),
2611 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
2612 arg
->val
.str
.startmb
);
2613 lr_ignore_rest (ldfile
, 0);
2617 if (runp
!= collate
->current_section
)
2619 /* Remove the named section from the old place and
2620 insert it in the new one. */
2621 prevp
->next
= runp
->next
;
2623 runp
->next
= collate
->current_section
->next
;
2624 collate
->current_section
->next
= runp
;
2625 collate
->current_section
= runp
;
2628 /* Process the rest of the line which might change
2629 the collation rules. */
2630 arg
= lr_token (ldfile
, charmap
, repertoire
);
2631 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
2632 read_directions (ldfile
, arg
, charmap
, repertoire
,
2637 else if (was_ellipsis
!= tok_none
)
2639 /* Using the information in the `ellipsis_weight'
2640 element and this and the last value we have to handle
2641 the ellipsis now. */
2642 assert (state
== 1);
2644 handle_ellipsis (ldfile
, arg
, was_ellipsis
, charmap
, repertoire
,
2647 /* Remember that we processed the ellipsis. */
2648 was_ellipsis
= tok_none
;
2650 /* And don't add the value a second time. */
2654 /* Now insert in the new place. */
2655 insert_value (ldfile
, arg
, charmap
, repertoire
, collate
);
2659 /* Ignore the rest of the line if we don't need the input of
2663 lr_ignore_rest (ldfile
, 0);
2670 if (was_ellipsis
!= tok_none
)
2673 _("%s: cannot have `%s' as end of ellipsis range"),
2674 "LC_COLLATE", "UNDEFINED");
2676 unlink_element (collate
);
2677 was_ellipsis
= tok_none
;
2680 /* See whether UNDEFINED already appeared somewhere. */
2681 if (collate
->undefined
.next
!= NULL
2682 || (collate
->cursor
!= NULL
2683 && collate
->undefined
.next
== collate
->cursor
))
2686 _("%s: order for `%.*s' already defined at %s:%zu"),
2687 "LC_COLLATE", 9, "UNDEFINED", collate
->undefined
.file
,
2688 collate
->undefined
.line
);
2689 lr_ignore_rest (ldfile
, 0);
2692 /* Parse the weights. */
2693 insert_weights (ldfile
, &collate
->undefined
, charmap
,
2694 repertoire
, collate
, tok_none
);
2700 /* This is the symbolic (decimal or hexadecimal) or absolute
2702 if (was_ellipsis
!= tok_none
)
2705 if (state
!= 1 && state
!= 3)
2708 was_ellipsis
= nowtok
;
2710 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
2711 repertoire
, collate
, nowtok
);
2715 /* Next we assume `LC_COLLATE'. */
2716 if (!ignore_content
)
2719 /* We must either see a copy statement or have
2722 _("%s: empty category description not allowed"),
2724 else if (state
== 1)
2726 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
2729 /* Handle ellipsis at end of list. */
2730 if (was_ellipsis
!= tok_none
)
2732 handle_ellipsis (ldfile
, NULL
, was_ellipsis
, charmap
,
2733 repertoire
, collate
);
2734 was_ellipsis
= tok_none
;
2737 else if (state
== 3)
2738 error (0, 0, _("%s: missing `reorder-end' keyword"),
2740 else if (state
== 5)
2741 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
2744 arg
= lr_token (ldfile
, charmap
, NULL
);
2745 if (arg
->tok
== tok_eof
)
2747 if (arg
->tok
== tok_eol
)
2748 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
2749 else if (arg
->tok
!= tok_lc_collate
)
2750 lr_error (ldfile
, _("\
2751 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2752 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
2757 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2760 /* Prepare for the next round. */
2761 now
= lr_token (ldfile
, charmap
, NULL
);
2765 /* When we come here we reached the end of the file. */
2766 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");