1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
30 #include "localeinfo.h"
31 #include "linereader.h"
33 #include "localedef.h"
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
42 /* Forward declaration. */
45 /* Data type for list of strings. */
48 struct section_list
*next
;
49 /* Name of the section. */
51 /* First element of this section. */
52 struct element_t
*first
;
53 /* Last element of this section. */
54 struct element_t
*last
;
55 /* These are the rules for this section. */
56 enum coll_sort_rule
*rules
;
57 /* Index of the rule set in the appropriate section of the output file. */
65 /* Number of elements. */
71 /* Data type for collating element. */
83 /* The following is a bit mask which bits are set if this element is
84 used in the appropriate level. Interesting for the singlebyte
87 XXX The type here restricts the number of levels to 32. It could
88 we changed if necessary but I doubt this is necessary. */
89 unsigned int used_in_level
;
91 struct element_list_t
*weights
;
93 /* Where does the definition come from. */
97 /* Which section does this belong to. */
98 struct section_list
*section
;
100 /* Predecessor and successor in the order list. */
101 struct element_t
*last
;
102 struct element_t
*next
;
104 /* Next element in multibyte output list. */
105 struct element_t
*mbnext
;
108 /* Special element value. */
109 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
110 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
111 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
113 /* Data type for collating symbol. */
116 /* Point to place in the order list. */
117 struct element_t
*order
;
119 /* Where does the definition come from. */
125 /* The real definition of the struct for the LC_COLLATE locale. */
126 struct locale_collate_t
131 /* List of known scripts. */
132 struct section_list
*sections
;
133 /* Current section using definition. */
134 struct section_list
*current_section
;
135 /* There always can be an unnamed section. */
136 struct section_list unnamed_section
;
137 /* To make handling of errors easier we have another section. */
138 struct section_list error_section
;
140 /* Number of sorting rules given in order_start line. */
143 /* Start of the order list. */
144 struct element_t
*start
;
146 /* The undefined element. */
147 struct element_t undefined
;
149 /* This is the cursor for `reorder_after' insertions. */
150 struct element_t
*cursor
;
152 /* This value is used when handling ellipsis. */
153 struct element_t ellipsis_weight
;
155 /* Known collating elements. */
156 hash_table elem_table
;
158 /* Known collating symbols. */
159 hash_table sym_table
;
161 /* Known collation sequences. */
162 hash_table seq_table
;
164 struct obstack mempool
;
166 /* The LC_COLLATE category is a bit special as it is sometimes possible
167 that the definitions from more than one input file contains information.
168 Therefore we keep all relevant input in a list. */
169 struct locale_collate_t
*next
;
171 /* Arrays with heads of the list for each of the leading bytes in
172 the multibyte sequences. */
173 struct element_t
*mbheads
[256];
177 /* We have a few global variables which are used for reading all
178 LC_COLLATE category descriptions in all files. */
182 /* These are definitions used by some of the functions for handling
183 UTF-8 encoding below. */
184 static const uint32_t encoding_mask
[] =
186 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
189 static const unsigned char encoding_byte
[] =
191 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
195 /* We need UTF-8 encoding of numbers. */
197 utf8_encode (char *buf
, int val
)
211 for (step
= 2; step
< 6; ++step
)
212 if ((val
& encoding_mask
[step
- 2]) == 0)
216 *buf
= encoding_byte
[step
- 2];
220 buf
[step
] = 0x80 | (val
& 0x3f);
231 static struct section_list
*
232 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
233 struct section_list
*next
)
235 struct section_list
*newp
;
237 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
247 static struct element_t
*
248 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
249 const uint32_t *wcs
, const char *name
, size_t namelen
)
251 struct element_t
*newp
;
253 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
255 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
259 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
269 size_t nwcs
= wcslen ((wchar_t *) wcs
);
271 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
272 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
273 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
281 newp
->mborder
= NULL
;
283 newp
->used_in_level
= 0;
285 /* Will be allocated later. */
286 newp
->weights
= NULL
;
291 newp
->section
= collate
->current_section
;
302 static struct symbol_t
*
303 new_symbol (struct locale_collate_t
*collate
)
305 struct symbol_t
*newp
;
307 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
318 /* Test whether this name is already defined somewhere. */
320 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
321 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
322 const char *symbol
, size_t symbol_len
)
326 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
328 lr_error (ldfile
, _("`%s' already defined in charmap"), symbol
);
332 if (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
334 lr_error (ldfile
, _("`%s' already defined in repertoire"), symbol
);
338 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
340 lr_error (ldfile
, _("`%s' already defined as collating symbol"), symbol
);
344 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
346 lr_error (ldfile
, _("`%s' already defined as collating element"),
355 /* Read the direction specification. */
357 read_directions (struct linereader
*ldfile
, struct token
*arg
,
358 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
359 struct locale_collate_t
*collate
)
362 int max
= nrules
?: 10;
363 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
370 if (arg
->tok
== tok_forward
)
372 if (rules
[cnt
] & sort_backward
)
376 lr_error (ldfile
, _("\
377 %s: `forward' and `backward' are mutually excluding each other"),
382 else if (rules
[cnt
] & sort_forward
)
386 lr_error (ldfile
, _("\
387 %s: `%s' mentioned twice in definition of weight %d"),
388 "LC_COLLATE", "forward", cnt
+ 1);
392 rules
[cnt
] |= sort_forward
;
396 else if (arg
->tok
== tok_backward
)
398 if (rules
[cnt
] & sort_forward
)
402 lr_error (ldfile
, _("\
403 %s: `forward' and `backward' are mutually excluding each other"),
408 else if (rules
[cnt
] & sort_backward
)
412 lr_error (ldfile
, _("\
413 %s: `%s' mentioned twice in definition of weight %d"),
414 "LC_COLLATE", "backward", cnt
+ 1);
418 rules
[cnt
] |= sort_backward
;
422 else if (arg
->tok
== tok_position
)
424 if (rules
[cnt
] & sort_position
)
428 lr_error (ldfile
, _("\
429 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
430 "LC_COLLATE", "position", cnt
+ 1);
434 rules
[cnt
] |= sort_position
;
440 arg
= lr_token (ldfile
, charmap
, repertoire
);
442 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
443 || arg
->tok
== tok_semicolon
)
445 if (! valid
&& ! warned
)
447 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
451 /* See whether we have to increment the counter. */
452 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
455 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
456 /* End of line or file, so we exit the loop. */
461 /* See whether we have enough room in the array. */
465 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
468 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
475 /* There must not be any more rule. */
478 lr_error (ldfile
, _("\
479 %s: too many rules; first entry only had %d"),
480 "LC_COLLATE", nrules
);
484 lr_ignore_rest (ldfile
, 0);
493 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
498 arg
= lr_token (ldfile
, charmap
, repertoire
);
503 /* Now we know how many rules we have. */
505 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
506 nrules
* sizeof (*rules
));
512 /* Not enough rules in this specification. */
514 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
517 rules
[cnt
] = sort_forward
;
518 while (++cnt
< nrules
);
522 collate
->current_section
->rules
= rules
;
526 static struct element_t
*
527 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
528 const char *str
, size_t len
, uint32_t *wcstr
)
530 struct element_t
*result
= NULL
;
532 /* Search for the entries among the collation sequences already define. */
533 if (find_entry (&collate
->seq_table
, str
, len
, (void **) &result
) != 0)
535 /* Nope, not define yet. So we see whether it is a
539 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
541 /* It's a collation symbol. */
542 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
546 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
549 else if (find_entry (&collate
->elem_table
, str
, len
,
550 (void **) &result
) != 0)
552 /* It's also no collation element. So it is a character
553 element defined later. */
554 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
);
556 /* Insert it into the sequence table. */
557 insert_entry (&collate
->seq_table
, str
, len
, result
);
566 unlink_element (struct locale_collate_t
*collate
)
568 if (collate
->cursor
== collate
->start
)
570 assert (collate
->cursor
->next
== NULL
);
571 assert (collate
->cursor
->last
== NULL
);
572 collate
->cursor
= NULL
;
576 if (collate
->cursor
->next
!= NULL
)
577 collate
->cursor
->next
->last
= collate
->cursor
->last
;
578 if (collate
->cursor
->last
!= NULL
)
579 collate
->cursor
->last
->next
= collate
->cursor
->next
;
580 collate
->cursor
= collate
->cursor
->last
;
586 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
587 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
588 struct locale_collate_t
*collate
, enum token_t ellipsis
)
593 /* Initialize all the fields. */
594 elem
->file
= ldfile
->fname
;
595 elem
->line
= ldfile
->lineno
;
596 elem
->last
= collate
->cursor
;
597 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
598 elem
->section
= collate
->current_section
;
599 if (collate
->cursor
!= NULL
)
600 collate
->cursor
->next
= elem
;
601 if (collate
->start
== NULL
)
603 assert (collate
->cursor
== NULL
);
604 collate
->start
= elem
;
606 elem
->weights
= (struct element_list_t
*)
607 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
608 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
610 if (collate
->current_section
->first
== NULL
)
611 collate
->current_section
->first
= elem
;
612 if (collate
->current_section
->last
== collate
->cursor
)
613 collate
->current_section
->last
= elem
;
615 collate
->cursor
= elem
;
619 arg
= lr_token (ldfile
, charmap
, repertoire
);
622 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
625 if (arg
->tok
== tok_ignore
)
627 /* The weight for this level has to be ignored. We use the
628 null pointer to indicate this. */
629 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
630 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
631 elem
->weights
[weight_cnt
].w
[0] = NULL
;
632 elem
->weights
[weight_cnt
].cnt
= 1;
634 else if (arg
->tok
== tok_bsymbol
)
636 struct element_t
*val
= find_element (ldfile
, collate
,
637 arg
->val
.str
.startmb
,
639 arg
->val
.str
.startwc
);
644 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
645 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
646 elem
->weights
[weight_cnt
].w
[0] = val
;
647 elem
->weights
[weight_cnt
].cnt
= 1;
649 else if (arg
->tok
== tok_string
)
651 /* Split the string up in the individual characters and put
652 the element definitions in the list. */
653 const char *cp
= arg
->val
.str
.startmb
;
655 struct element_t
*charelem
;
656 struct element_t
**weights
= NULL
;
661 lr_error (ldfile
, _("%s: empty weight string not allowed"),
663 lr_ignore_rest (ldfile
, 0);
671 /* Ahh, it's a bsymbol. That's what we want. */
672 const char *startp
= ++cp
;
676 if (*cp
== ldfile
->escape_char
)
679 /* It's a syntax error. */
685 charelem
= find_element (ldfile
, collate
, startp
,
691 /* People really shouldn't use characters directly in
692 the string. Especially since it's not really clear
693 what this means. We interpret all characters in the
694 string as if that would be bsymbols. Otherwise we
695 would have to match back to bsymbols somehow and this
696 is normally not what people normally expect. */
697 charelem
= find_element (ldfile
, collate
, cp
++, 1, NULL
);
700 if (charelem
== NULL
)
702 /* We ignore the rest of the line. */
703 lr_ignore_rest (ldfile
, 0);
707 /* Add the pointer. */
710 struct element_t
**newp
;
712 newp
= (struct element_t
**)
713 alloca (max
* sizeof (struct element_t
*));
714 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
717 weights
[cnt
++] = charelem
;
721 /* Now store the information. */
722 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
723 obstack_alloc (&collate
->mempool
,
724 cnt
* sizeof (struct element_t
*));
725 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
726 cnt
* sizeof (struct element_t
*));
727 elem
->weights
[weight_cnt
].cnt
= cnt
;
729 /* We don't need the string anymore. */
730 free (arg
->val
.str
.startmb
);
732 else if (ellipsis
!= tok_none
733 && (arg
->tok
== tok_ellipsis2
734 || arg
->tok
== tok_ellipsis3
735 || arg
->tok
== tok_ellipsis4
))
737 /* It must be the same ellipsis as used in the initial column. */
738 if (arg
->tok
!= ellipsis
)
739 lr_error (ldfile
, _("\
740 %s: weights must use the same ellipsis symbol as the name"),
743 /* The weight for this level has to be ignored. We use the
744 null pointer to indicate this. */
745 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
746 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
747 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
748 elem
->weights
[weight_cnt
].cnt
= 1;
753 /* It's a syntax error. */
754 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
755 lr_ignore_rest (ldfile
, 0);
759 arg
= lr_token (ldfile
, charmap
, repertoire
);
760 /* This better should be the end of the line or a semicolon. */
761 if (arg
->tok
== tok_semicolon
)
762 /* OK, ignore this and read the next token. */
763 arg
= lr_token (ldfile
, charmap
, repertoire
);
764 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
766 /* It's a syntax error. */
767 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
768 lr_ignore_rest (ldfile
, 0);
772 while (++weight_cnt
< nrules
);
774 if (weight_cnt
< nrules
)
776 /* This means the rest of the line uses the current element as
780 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
781 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
782 elem
->weights
[weight_cnt
].w
[0] = elem
;
783 elem
->weights
[weight_cnt
].cnt
= 1;
785 while (++weight_cnt
< nrules
);
789 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
791 /* Too many rule values. */
792 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
793 lr_ignore_rest (ldfile
, 0);
796 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
802 insert_value (struct linereader
*ldfile
, struct token
*arg
,
803 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
804 struct locale_collate_t
*collate
)
806 /* First find out what kind of symbol this is. */
809 struct element_t
*elem
= NULL
;
811 /* Try to find the character in the charmap. */
812 seq
= charmap_find_value (charmap
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
814 /* Determine the wide character. */
815 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
817 wc
= repertoire_find_value (repertoire
, arg
->val
.str
.startmb
,
825 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
827 /* It's no character, so look through the collation elements and
831 if (find_entry (&collate
->sym_table
, arg
->val
.str
.startmb
,
832 arg
->val
.str
.lenmb
, &result
) == 0)
834 /* It's a collation symbol. */
835 struct symbol_t
*sym
= (struct symbol_t
*) result
;
839 elem
= sym
->order
= new_element (collate
, NULL
, 0, NULL
, NULL
, 0);
841 else if (find_entry (&collate
->elem_table
, arg
->val
.str
.startmb
,
842 arg
->val
.str
.lenmb
, (void **) &elem
) != 0)
844 /* It's also no collation element. Therefore ignore it. */
845 lr_ignore_rest (ldfile
, 0);
851 /* Otherwise the symbols stands for a character. */
852 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
853 arg
->val
.str
.lenmb
, (void **) &elem
) != 0)
855 uint32_t wcs
[2] = { wc
, 0 };
857 /* We have to allocate an entry. */
858 elem
= new_element (collate
, seq
!= NULL
? seq
->bytes
: NULL
,
859 seq
!= NULL
? seq
->nbytes
: 0,
860 wcs
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
862 /* And add it to the table. */
863 if (insert_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
864 arg
->val
.str
.lenmb
, elem
) != 0)
865 /* This cannot happen. */
866 assert (! "Internal error");
870 /* Maybe the character was used before the definition. In this case
871 we have to insert the byte sequences now. */
872 if (elem
->mbs
== NULL
&& seq
!= NULL
)
874 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
875 seq
->bytes
, seq
->nbytes
);
876 elem
->nmbs
= seq
->nbytes
;
879 if (elem
->wcs
== NULL
&& seq
!= ILLEGAL_CHAR_VALUE
)
881 uint32_t wcs
[2] = { wc
, 0 };
883 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
889 /* Test whether this element is not already in the list. */
890 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
891 && elem
->next
== collate
->cursor
))
893 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%zu"),
894 (int) arg
->val
.str
.lenmb
, arg
->val
.str
.startmb
,
895 elem
->file
, elem
->line
);
896 lr_ignore_rest (ldfile
, 0);
900 insert_weights (ldfile
, elem
, charmap
, repertoire
, collate
, tok_none
);
907 handle_ellipsis (struct linereader
*ldfile
, struct token
*arg
,
908 enum token_t ellipsis
, struct charmap_t
*charmap
,
909 struct repertoire_t
*repertoire
,
910 struct locale_collate_t
*collate
)
912 struct element_t
*startp
;
913 struct element_t
*endp
;
915 /* Unlink the entry added for the ellipsis. */
916 unlink_element (collate
);
917 startp
= collate
->cursor
;
919 /* Process and add the end-entry. */
921 && insert_value (ldfile
, arg
, charmap
, repertoire
, collate
))
922 /* Something went wrong with inserting the to-value. This means
923 we cannot process the ellipsis. */
926 /* Reset the cursor. */
927 collate
->cursor
= startp
;
929 /* Now we have to handle many different situations:
930 - we have to distinguish between the three different ellipsis forms
931 - the is the ellipsis at the beginning, in the middle, or at the end.
933 endp
= collate
->cursor
->next
;
934 assert (arg
== NULL
|| endp
!= NULL
);
936 /* Both, the start and the end symbol, must stand for characters. */
937 if ((startp
== NULL
|| startp
->name
== NULL
)
938 || (endp
== NULL
|| endp
->name
== NULL
))
940 lr_error (ldfile
, _("\
941 %s: the start end the end symbol of a range must stand for characters"),
946 if (ellipsis
== tok_ellipsis3
)
948 /* One requirement we make here: the length of the byte
949 sequences for the first and end character must be the same.
950 This is mainly to prevent unwanted effects and this is often
951 not what is wanted. */
952 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
953 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
957 /* Well, this should be caught somewhere else already. Just to
959 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
960 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
962 if (startp
!= NULL
&& endp
!= NULL
963 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
964 && startp
->nmbs
!= endp
->nmbs
)
966 lr_error (ldfile
, _("\
967 %s: byte sequences of first and last character must have the same length"),
972 /* Determine whether we have to generate multibyte sequences. */
973 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
974 && (endp
== NULL
|| endp
->mbs
!= NULL
))
979 /* Prepare the beginning byte sequence. This is either from the
980 beginning byte sequence or it is all nulls if it was an
982 if (startp
== NULL
|| startp
->mbs
== NULL
)
983 memset (mbcnt
, '\0', len
);
986 memcpy (mbcnt
, startp
->mbs
, len
);
988 /* And increment it so that the value is the first one we will
990 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
991 if (++mbcnt
[cnt
] != '\0')
996 /* And the end sequence. */
997 if (endp
== NULL
|| endp
->mbs
== NULL
)
998 memset (mbend
, '\0', len
);
1000 memcpy (mbend
, endp
->mbs
, len
);
1003 /* Test whether we have a correct range. */
1004 ret
= memcmp (mbcnt
, mbend
, len
);
1008 lr_error (ldfile
, _("%s: byte sequence of first character of \
1009 sequence is not lower than that of the last character"), "LC_COLLATE");
1013 /* Generate the byte sequences data. */
1016 struct charseq
*seq
;
1018 /* Quite a bit of work ahead. We have to find the character
1019 definition for the byte sequence and then determine the
1020 wide character belonging to it. */
1021 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1024 struct element_t
*elem
;
1027 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1028 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1029 strlen (seq
->name
));
1031 /* I don't this this can ever happen. */
1032 assert (seq
->name
!= NULL
);
1033 namelen
= strlen (seq
->name
);
1035 /* Now we are ready to insert the new value in the
1036 sequence. Find out whether the element is
1038 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1039 (void **) &elem
) != 0)
1041 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1043 /* We have to allocate an entry. */
1044 elem
= new_element (collate
, mbcnt
, len
, wcs
, seq
->name
,
1047 /* And add it to the table. */
1048 if (insert_entry (&collate
->seq_table
, seq
->name
,
1049 namelen
, elem
) != 0)
1050 /* This cannot happen. */
1051 assert (! "Internal error");
1054 /* Test whether this element is not already in the list. */
1055 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1056 && elem
->next
== collate
->cursor
))
1058 lr_error (ldfile
, _("\
1059 order for `%.*s' already defined at %s:%zu"),
1060 (int) namelen
, seq
->name
,
1061 elem
->file
, elem
->line
);
1065 /* Enqueue the new element. */
1066 elem
->last
= collate
->cursor
;
1067 if (collate
->cursor
!= NULL
)
1071 elem
->next
= collate
->cursor
->next
;
1072 elem
->last
->next
= elem
;
1073 if (elem
->next
!= NULL
)
1074 elem
->next
->last
= elem
;
1076 if (collate
->start
== NULL
)
1078 assert (collate
->cursor
== NULL
);
1079 collate
->start
= elem
;
1081 collate
->cursor
= elem
;
1083 /* Add the weight value. We take them from the
1084 `ellipsis_weights' member of `collate'. */
1085 elem
->weights
= (struct element_list_t
*)
1086 obstack_alloc (&collate
->mempool
,
1087 nrules
* sizeof (struct element_list_t
));
1088 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1089 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1090 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1091 == ELEMENT_ELLIPSIS2
))
1093 elem
->weights
[cnt
].w
= (struct element_t
**)
1094 obstack_alloc (&collate
->mempool
,
1095 sizeof (struct element_t
*));
1096 elem
->weights
[cnt
].w
[0] = elem
;
1097 elem
->weights
[cnt
].cnt
= 1;
1101 /* Simly use the weight from `ellipsis_weight'. */
1102 elem
->weights
[cnt
].w
=
1103 collate
->ellipsis_weight
.weights
[cnt
].w
;
1104 elem
->weights
[cnt
].cnt
=
1105 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1109 /* Increment for the next round. */
1111 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1112 if (++mbcnt
[cnt
] != '\0')
1115 /* Find out whether this was all. */
1116 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1117 /* Yep, that's all. */
1124 /* For symbolic range we naturally must have a beginning and an
1125 end specified by the user. */
1127 lr_error (ldfile
, _("\
1128 %s: symbolic range ellipsis must not directly follow `order_start'"),
1130 else if (endp
== NULL
)
1131 lr_error (ldfile
, _("\
1132 %s: symbolic range ellipsis must not be direct followed by `order_end'"),
1136 /* Determine the range. To do so we have to determine the
1137 common prefix of the both names and then the numeric
1138 values of both ends. */
1139 size_t lenfrom
= strlen (startp
->name
);
1140 size_t lento
= strlen (endp
->name
);
1141 char buf
[lento
+ 1];
1146 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1148 if (lenfrom
!= lento
)
1151 lr_error (ldfile
, _("\
1152 `%s' and `%.*s' are no valid names for symbolic range"),
1153 startp
->name
, (int) lento
, endp
->name
);
1157 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1158 if (startp
->name
[preflen
] == '\0')
1159 /* Nothing to be done. The start and end point are identical
1160 and while inserting the end point we have already given
1161 the user an error message. */
1167 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1168 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1172 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1173 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1176 /* Copy the prefix. */
1177 memcpy (buf
, startp
->name
, preflen
);
1179 /* Loop over all values. */
1180 for (++from
; from
< to
; ++from
)
1182 struct element_t
*elem
= NULL
;
1183 struct charseq
*seq
;
1187 /* Generate the the name. */
1188 sprintf (buf
+ preflen
, base
== 10 ? "%d" : "%x", from
);
1190 /* Look whether this name is already defined. */
1191 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
1192 arg
->val
.str
.lenmb
, (void **) &elem
) == 0)
1194 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1195 && elem
->next
== collate
->cursor
))
1197 lr_error (ldfile
, _("\
1198 %s: order for `%.*s' already defined at %s:%zu"),
1199 "LC_COLLATE", (int) lenfrom
, buf
,
1200 elem
->file
, elem
->line
);
1204 if (elem
->name
== NULL
)
1206 lr_error (ldfile
, _("%s: `%s' must be a charater"),
1212 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1214 /* Search for a character of this name. */
1215 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1216 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1218 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1226 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1227 /* We don't know anything about a character with this
1228 name. XXX Should we warn? */
1233 uint32_t wcs
[2] = { wc
, 0 };
1235 /* We have to allocate an entry. */
1236 elem
= new_element (collate
,
1237 seq
!= NULL
? seq
->bytes
: NULL
,
1238 seq
!= NULL
? seq
->nbytes
: 0,
1239 wc
== ILLEGAL_CHAR_VALUE
1245 /* Update the element. */
1248 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1249 seq
->bytes
, seq
->nbytes
);
1250 elem
->nmbs
= seq
->nbytes
;
1253 if (wc
!= ILLEGAL_CHAR_VALUE
)
1257 obstack_grow (&collate
->mempool
,
1258 &wc
, sizeof (uint32_t));
1259 obstack_grow (&collate
->mempool
,
1260 &zero
, sizeof (uint32_t));
1261 elem
->wcs
= obstack_finish (&collate
->mempool
);
1266 elem
->file
= ldfile
->fname
;
1267 elem
->line
= ldfile
->lineno
;
1268 elem
->section
= collate
->current_section
;
1271 /* Enqueue the new element. */
1272 elem
->last
= collate
->cursor
;
1273 elem
->next
= collate
->cursor
->next
;
1274 elem
->last
->next
= elem
;
1275 if (elem
->next
!= NULL
)
1276 elem
->next
->last
= elem
;
1277 collate
->cursor
= elem
;
1279 /* Now add the weights. They come from the `ellipsis_weights'
1280 member of `collate'. */
1281 elem
->weights
= (struct element_list_t
*)
1282 obstack_alloc (&collate
->mempool
,
1283 nrules
* sizeof (struct element_list_t
));
1284 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1285 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1286 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1287 == ELEMENT_ELLIPSIS2
))
1289 elem
->weights
[cnt
].w
= (struct element_t
**)
1290 obstack_alloc (&collate
->mempool
,
1291 sizeof (struct element_t
*));
1292 elem
->weights
[cnt
].w
[0] = elem
;
1293 elem
->weights
[cnt
].cnt
= 1;
1297 /* Simly use the weight from `ellipsis_weight'. */
1298 elem
->weights
[cnt
].w
=
1299 collate
->ellipsis_weight
.weights
[cnt
].w
;
1300 elem
->weights
[cnt
].cnt
=
1301 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1310 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1311 struct localedef_t
*copy_locale
, int ignore_content
)
1313 if (!ignore_content
)
1315 struct locale_collate_t
*collate
;
1317 if (copy_locale
== NULL
)
1319 collate
= locale
->categories
[LC_COLLATE
].collate
=
1320 (struct locale_collate_t
*)
1321 xcalloc (1, sizeof (struct locale_collate_t
));
1323 /* Init the various data structures. */
1324 init_hash (&collate
->elem_table
, 100);
1325 init_hash (&collate
->sym_table
, 100);
1326 init_hash (&collate
->seq_table
, 500);
1327 obstack_init (&collate
->mempool
);
1329 collate
->col_weight_max
= -1;
1332 collate
= locale
->categories
[LC_COLLATE
].collate
=
1333 copy_locale
->categories
[LC_COLLATE
].collate
;
1336 ldfile
->translate_strings
= 0;
1337 ldfile
->return_widestr
= 0;
1342 collate_finish (struct localedef_t
*locale
, struct charmap_t
*charmap
)
1344 /* Now is the time when we can assign the individual collation
1345 values for all the symbols. We have possibly different values
1346 for the wide- and the multibyte-character symbols. This is done
1347 since it might make a difference in the encoding if there is in
1348 some cases no multibyte-character but there are wide-characters.
1349 (The other way around it is not important since theencoded
1350 collation value in the wide-character case is 32 bits wide and
1351 therefore requires no encoding).
1353 The lowest collation value assigned is 2. Zero is reserved for
1354 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1355 functions and 1 is used to separate the individual passes for the
1358 We also have to construct is list with all the bytes/words which
1359 can come first in a sequence, followed by all the elements which
1360 also start with this byte/word. The order is reverse which has
1361 among others the important effect that longer strings are located
1362 first in the list. This is required for the output data since
1363 the algorithm used in `strcoll' etc depends on this.
1365 The multibyte case is easy. We simply sort into an array with
1367 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1370 struct element_t
*runp
;
1372 int need_undefined
= 0;
1373 struct section_list
*sect
;
1376 /* If this assertion is hit change the type in `element_t'. */
1377 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1379 /* Find out which elements are used at which level. At the same
1380 time we find out whether we have any undefined symbols. */
1381 runp
= collate
->start
;
1382 while (runp
!= NULL
)
1384 if (runp
->mbs
!= NULL
)
1386 for (i
= 0; i
< nrules
; ++i
)
1390 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1391 /* A NULL pointer as the weight means IGNORE. */
1392 if (runp
->weights
[i
].w
[j
] != NULL
)
1394 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1396 error_at_line (0, 0, runp
->file
, runp
->line
,
1397 _("symbol `%s' not defined"),
1398 runp
->weights
[i
].w
[j
]->name
);
1401 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1404 /* Set the bit for the level. */
1405 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1410 /* Up to the next entry. */
1414 /* Walk through the list of defined sequences and assign weights. Also
1415 create the data structure which will allow generating the single byte
1416 character based tables.
1418 Since at each time only the weights for each of the rules are
1419 only compared to other weights for this rule it is possible to
1420 assign more compact weight values than simply counting all
1421 weights in sequence. We can assign weights from 3, one for each
1422 rule individually and only for those elements, which are actually
1425 Why is this important? It is not for the wide char table. But
1426 it is for the singlebyte output since here larger numbers have to
1427 be encoded to make it possible to emit the value as a byte
1429 for (i
= 0; i
< nrules
; ++i
)
1432 runp
= collate
->start
;
1433 while (runp
!= NULL
)
1435 /* Determine the order. */
1436 if (runp
->used_in_level
!= 0)
1438 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1439 nrules
* sizeof (int));
1441 for (i
= 0; i
< nrules
; ++i
)
1442 if ((runp
->used_in_level
& (1 << i
)) != 0)
1443 runp
->mborder
[i
] = mbact
[i
]++;
1445 runp
->mborder
[i
] = 0;
1448 if (runp
->mbs
!= NULL
)
1450 struct element_t
**eptr
;
1452 /* Find the point where to insert in the list. */
1453 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1454 while (*eptr
!= NULL
)
1456 if ((*eptr
)->nmbs
< runp
->nmbs
)
1459 if ((*eptr
)->nmbs
== runp
->nmbs
)
1461 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1465 /* This should not happen. It means that we have
1466 to symbols with the same byte sequence. It is
1467 of course an error. */
1468 error_at_line (0, 0, (*eptr
)->file
, (*eptr
)->line
,
1469 _("symbol `%s' has same encoding as"),
1471 error_at_line (0, 0, runp
->file
, runp
->line
,
1472 _("symbol `%s'"), runp
->name
);
1476 /* Insert it here. */
1480 /* To the next entry. */
1481 eptr
= &(*eptr
)->mbnext
;
1484 /* Set the pointers. */
1485 runp
->mbnext
= *eptr
;
1490 if (runp
->wcs
!= NULL
)
1491 runp
->wcorder
= wcact
++;
1493 /* Up to the next entry. */
1497 /* Find out whether any of the `mbheads' entries is unset. In this
1498 case we use the UNDEFINED entry. */
1499 for (i
= 1; i
< 256; ++i
)
1500 if (collate
->mbheads
[i
] == NULL
)
1503 collate
->mbheads
[i
] = &collate
->undefined
;
1506 /* Now determine whether the UNDEFINED entry is needed and if yes,
1507 whether it was defined. */
1508 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1509 if (need_undefined
&& collate
->undefined
.file
== NULL
)
1511 error (0, 0, _("no definition of `UNDEFINED'"));
1513 /* Add UNDEFINED at the end. */
1514 collate
->undefined
.mborder
=
1515 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1517 for (i
= 0; i
< nrules
; ++i
)
1518 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1520 collate
->undefined
.wcorder
= wcact
++;
1523 /* Finally, try to unify the rules for the sections. Whenever the rules
1524 for a section are the same as those for another section give the
1525 ruleset the same index. Since there are never many section we can
1526 use an O(n^2) algorithm here. */
1527 sect
= collate
->sections
;
1528 assert (sect
!= NULL
);
1532 struct section_list
*osect
= collate
->sections
;
1534 while (osect
!= sect
)
1535 if (memcmp (osect
->rules
, sect
->rules
, nrules
) == 0)
1538 osect
= osect
->next
;
1541 sect
->ruleidx
= ruleidx
++;
1543 sect
->ruleidx
= osect
->ruleidx
;
1548 while (sect
!= NULL
);
1549 /* We are currently not prepared for more than 256 rulesets. But this
1550 should never really be a problem. */
1551 assert (ruleidx
<= 256);
1555 static inline int32_t
1556 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1557 struct element_t
*elem
)
1562 /* Optimize the use of UNDEFINED. */
1563 if (elem
== &collate
->undefined
)
1564 /* The weights are already inserted. */
1567 /* This byte can start exactly one collation element and this is
1568 a single byte. We can directly give the index to the weights. */
1569 retval
= obstack_object_size (pool
);
1571 /* Construct the weight. */
1572 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1574 char buf
[elem
->weights
[cnt
].cnt
* 7];
1578 /* Add the direction. */
1579 obstack_1grow (pool
, elem
->section
->rules
[cnt
]);
1581 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1582 /* Encode the weight value. */
1583 if (elem
->weights
[cnt
].w
[i
] == NULL
)
1585 /* This entry was IGNORE. */
1586 buf
[len
++] = IGNORE_CHAR
;
1589 len
+= utf8_encode (&buf
[len
],
1590 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1592 /* And add the buffer content. */
1593 obstack_grow (pool
, buf
, len
);
1601 collate_output (struct localedef_t
*locale
, struct charmap_t
*charmap
,
1602 const char *output_path
)
1604 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1605 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
1606 struct iovec iov
[2 + nelems
];
1607 struct locale_file data
;
1608 uint32_t idx
[nelems
];
1611 int32_t tablemb
[256];
1612 struct obstack weightpool
;
1613 struct obstack extrapool
;
1614 struct section_list
*sect
;
1617 obstack_init (&weightpool
);
1618 obstack_init (&extrapool
);
1620 data
.magic
= LIMAGIC (LC_COLLATE
);
1622 iov
[0].iov_base
= (void *) &data
;
1623 iov
[0].iov_len
= sizeof (data
);
1625 iov
[1].iov_base
= (void *) idx
;
1626 iov
[1].iov_len
= sizeof (idx
);
1628 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1631 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_NRULES
));
1632 iov
[2 + cnt
].iov_base
= &collate
->nrules
;
1633 iov
[2 + cnt
].iov_len
= sizeof (uint32_t);
1634 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1637 /* Prepare the ruleset table. */
1638 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
1639 if (sect
->ruleidx
== i
)
1641 obstack_grow (&weightpool
, sect
->rules
, nrules
);
1644 /* And align the output. */
1645 i
= (nrules
* i
) % __alignof__ (int32_t);
1648 obstack_1grow (&weightpool
, '\0');
1649 while (++i
< __alignof__ (int32_t));
1651 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_RULESETS
));
1652 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
1653 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
1654 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1657 /* Generate the 8-bit table. Walk through the lists of sequences
1658 starting with the same byte and add them one after the other to
1659 the table. In case we have more than one sequence starting with
1660 the same byte we have to use extra indirection.
1662 First add a record for the NUL byte. This entry will never be used
1663 so it does not matter. */
1666 /* Now insert the `UNDEFINED' value if it is used. Since this value
1667 will probably be used more than once it is good to store the
1668 weights only once. */
1669 if (collate
->undefined
.used_in_level
!= 0)
1670 output_weight (&weightpool
, collate
, &collate
->undefined
);
1672 for (ch
= 1; ch
< 256; ++ch
)
1673 if (collate
->mbheads
[ch
]->mbnext
== NULL
1674 && collate
->mbheads
[ch
]->nmbs
== 1)
1676 tablemb
[ch
] = output_weight (&weightpool
, collate
,
1677 collate
->mbheads
[ch
]);
1681 /* The entries in the list are sorted by length and then
1682 alphabetically. This is the order in which we will add the
1683 elements to the collation table. This allows to simply
1684 walk the table in sequence and stop at the first matching
1685 entry. Since the longer sequences are coming first in the
1686 list they have the possibility to match first, just as it
1687 has to be. In the worst case we are walking to the end of
1688 the list where we put, if no singlebyte sequence is defined
1689 in the locale definition, the weights for UNDEFINED.
1691 To reduce the length of the search list we compress them a bit.
1692 This happens by collecting sequences of consecutive byte
1693 sequences in one entry (having and begin and end byte sequence)
1694 and add only one index into the weight table. We can find the
1695 consecutive entries since they are also consecutive in the list. */
1696 struct element_t
*runp
= collate
->mbheads
[ch
];
1697 struct element_t
*lastp
;
1699 tablemb
[ch
] = -obstack_object_size (&extrapool
);
1703 /* Store the current index in the weight table. We know that
1704 the current position in the `extrapool' is aligned on a
1709 /* Output the weight info. */
1710 weightidx
= output_weight (&weightpool
, collate
, runp
);
1712 /* Find out wether this is a single entry or we have more than
1713 one consecutive entry. */
1714 if (runp
->mbnext
!= NULL
1715 && runp
->nmbs
== runp
->mbnext
->nmbs
1716 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
1717 && (runp
->mbs
[runp
->nmbs
- 1] + 1
1718 == runp
->mbnext
->mbs
[runp
->nmbs
- 1]))
1722 /* More than one consecutive entry. We mark this by having
1723 a negative index into the weight table. */
1724 weightidx
= -weightidx
;
1726 /* Now add first the initial byte sequence. */
1727 added
= ((sizeof (int32_t) + 1 + 1 + 2 * (runp
->nmbs
- 1)
1728 + __alignof__ (int32_t) - 1)
1729 & ~(__alignof__ (int32_t) - 1));
1730 obstack_make_room (&extrapool
, added
);
1732 if (sizeof (int32_t) == sizeof (int))
1733 obstack_int_grow_fast (&extrapool
, weightidx
);
1735 obstack_grow (&extrapool
, &weightidx
, sizeof (int32_t));
1736 obstack_1grow_fast (&extrapool
, runp
->section
->ruleidx
);
1737 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
1738 for (i
= 1; i
< runp
->nmbs
; ++i
)
1739 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
1741 /* Now find the end of the consecutive sequence. */
1744 while (runp
->mbnext
!= NULL
1745 && runp
->nmbs
== runp
->mbnext
->nmbs
1746 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
1747 runp
->nmbs
- 1) == 0
1748 && (runp
->mbs
[runp
->nmbs
- 1] + 1
1749 == runp
->mbnext
->mbs
[runp
->nmbs
- 1]));
1751 /* And add the end by sequence. Without length this time. */
1752 for (i
= 1; i
< runp
->nmbs
; ++i
)
1753 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
1757 /* A single entry. Simply add the index and the length and
1758 string (except for the first character which is already
1762 added
= ((sizeof (int32_t) + 1 + 1 + runp
->nmbs
- 1
1763 + __alignof__ (int32_t) - 1)
1764 & ~(__alignof__ (int32_t) - 1));
1765 obstack_make_room (&extrapool
, added
);
1767 if (sizeof (int32_t) == sizeof (int))
1768 obstack_int_grow_fast (&extrapool
, weightidx
);
1770 obstack_grow (&extrapool
, &weightidx
, sizeof (int32_t));
1771 obstack_1grow_fast (&extrapool
, runp
->section
->ruleidx
);
1772 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
1773 for (i
= 1; i
< runp
->nmbs
; ++i
)
1774 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
1777 /* Add alignment bytes if necessary. */
1778 i
= added
% __alignof__ (int32_t);
1781 obstack_1grow_fast (&extrapool
, '\0');
1782 while (++i
!= __alignof__ (int32_t));
1786 runp
= runp
->mbnext
;
1788 while (runp
!= NULL
);
1790 /* If the final entry in the list is not a single character we
1791 add an UNDEFINED entry here. */
1792 if (lastp
->nmbs
!= 1)
1794 int added
= ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t))
1795 & ~(__alignof__ (int32_t) - 1));
1796 obstack_make_room (&extrapool
, added
);
1798 if (sizeof (int32_t) == sizeof (int))
1799 obstack_int_grow_fast (&extrapool
, 0);
1803 obstack_grow (&extrapool
, &zero
, sizeof (int32_t));
1805 /* XXX What rule? We just pick the first. */
1806 obstack_1grow_fast (&extrapool
, 0);
1807 /* Length is zero. */
1808 obstack_1grow_fast (&extrapool
, 0);
1810 /* Add alignment bytes if necessary. */
1811 i
= added
% __alignof__ (int32_t);
1814 obstack_1grow_fast (&extrapool
, '\0');
1815 while (++i
!= __alignof__ (int32_t));
1819 /* Now add the three tables. */
1820 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB
));
1821 iov
[2 + cnt
].iov_base
= tablemb
;
1822 iov
[2 + cnt
].iov_len
= sizeof (tablemb
);
1823 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1826 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB
));
1827 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
1828 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
1829 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1832 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB
));
1833 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
1834 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
1835 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1839 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
1841 write_locale_data (output_path
, "LC_COLLATE", 2 + cnt
, iov
);
1843 obstack_free (&weightpool
, NULL
);
1844 obstack_free (&extrapool
, NULL
);
1849 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
1850 struct charmap_t
*charmap
, const char *repertoire_name
,
1853 struct repertoire_t
*repertoire
= NULL
;
1854 struct locale_collate_t
*collate
;
1856 struct token
*arg
= NULL
;
1857 enum token_t nowtok
;
1859 enum token_t was_ellipsis
= tok_none
;
1860 struct localedef_t
*copy_locale
= NULL
;
1862 /* Get the repertoire we have to use. */
1863 if (repertoire_name
!= NULL
)
1864 repertoire
= repertoire_read (repertoire_name
);
1866 /* The rest of the line containing `LC_COLLATE' must be free. */
1867 lr_ignore_rest (ldfile
, 1);
1871 now
= lr_token (ldfile
, charmap
, NULL
);
1874 while (nowtok
== tok_eol
);
1876 if (nowtok
== tok_copy
)
1879 now
= lr_token (ldfile
, charmap
, NULL
);
1880 if (now
->tok
!= tok_string
)
1882 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1886 now
= lr_token (ldfile
, charmap
, NULL
);
1887 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
1889 if (now
->tok
!= tok_eof
1890 || (now
= lr_token (ldfile
, charmap
, NULL
), now
->tok
== tok_eof
))
1891 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
1892 else if (now
->tok
!= tok_lc_collate
)
1894 lr_error (ldfile
, _("\
1895 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1896 lr_ignore_rest (ldfile
, 0);
1899 lr_ignore_rest (ldfile
, 1);
1904 /* Get the locale definition. */
1905 copy_locale
= find_locale (LC_COLLATE
, now
->val
.str
.startmb
,
1906 repertoire_name
, charmap
);
1907 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
1909 /* Not yet loaded. So do it now. */
1910 if (locfile_read (copy_locale
, charmap
) != 0)
1914 lr_ignore_rest (ldfile
, 1);
1916 now
= lr_token (ldfile
, charmap
, NULL
);
1920 /* Prepare the data structures. */
1921 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
1922 collate
= result
->categories
[LC_COLLATE
].collate
;
1926 /* Of course we don't proceed beyond the end of file. */
1927 if (nowtok
== tok_eof
)
1930 /* Ingore empty lines. */
1931 if (nowtok
== tok_eol
)
1933 now
= lr_token (ldfile
, charmap
, NULL
);
1940 case tok_coll_weight_max
:
1941 /* Ignore the rest of the line if we don't need the input of
1945 lr_ignore_rest (ldfile
, 0);
1952 arg
= lr_token (ldfile
, charmap
, NULL
);
1953 if (arg
->tok
!= tok_number
)
1955 if (collate
->col_weight_max
!= -1)
1956 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
1957 "LC_COLLATE", "col_weight_max");
1959 collate
->col_weight_max
= arg
->val
.num
;
1960 lr_ignore_rest (ldfile
, 1);
1963 case tok_section_symbol
:
1964 /* Ignore the rest of the line if we don't need the input of
1968 lr_ignore_rest (ldfile
, 0);
1975 arg
= lr_token (ldfile
, charmap
, repertoire
);
1976 if (arg
->tok
!= tok_bsymbol
)
1978 else if (!ignore_content
)
1980 /* Check whether this section is already known. */
1981 struct section_list
*known
= collate
->sections
;
1982 while (known
!= NULL
)
1983 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
1989 _("%s: duplicate declaration of section `%s'"),
1990 "LC_COLLATE", arg
->val
.str
.startmb
);
1991 free (arg
->val
.str
.startmb
);
1994 collate
->sections
= make_seclist_elem (collate
,
1995 arg
->val
.str
.startmb
,
1998 lr_ignore_rest (ldfile
, known
== NULL
);
2002 free (arg
->val
.str
.startmb
);
2003 lr_ignore_rest (ldfile
, 0);
2007 case tok_collating_element
:
2008 /* Ignore the rest of the line if we don't need the input of
2012 lr_ignore_rest (ldfile
, 0);
2019 arg
= lr_token (ldfile
, charmap
, repertoire
);
2020 if (arg
->tok
!= tok_bsymbol
)
2024 const char *symbol
= arg
->val
.str
.startmb
;
2025 size_t symbol_len
= arg
->val
.str
.lenmb
;
2027 /* Next the `from' keyword. */
2028 arg
= lr_token (ldfile
, charmap
, repertoire
);
2029 if (arg
->tok
!= tok_from
)
2031 free ((char *) symbol
);
2035 ldfile
->return_widestr
= 1;
2037 /* Finally the string with the replacement. */
2038 arg
= lr_token (ldfile
, charmap
, repertoire
);
2039 ldfile
->return_widestr
= 0;
2040 if (arg
->tok
!= tok_string
)
2043 if (!ignore_content
)
2046 lr_error (ldfile
, _("\
2047 %s: unknown character in collating element name"),
2049 if (arg
->val
.str
.startmb
== NULL
)
2050 lr_error (ldfile
, _("\
2051 %s: unknown character in collating element definition"),
2053 if (arg
->val
.str
.startwc
== NULL
)
2054 lr_error (ldfile
, _("\
2055 %s: unknown wide character in collating element definition"),
2057 else if (arg
->val
.str
.lenwc
< 2)
2058 lr_error (ldfile
, _("\
2059 %s: substitution string in collating element definition must have at least two characters"),
2064 /* The name is already defined. */
2065 if (check_duplicate (ldfile
, collate
, charmap
,
2066 repertoire
, symbol
, symbol_len
))
2069 if (insert_entry (&collate
->elem_table
,
2071 new_element (collate
,
2072 NULL
, 0, NULL
, symbol
,
2074 lr_error (ldfile
, _("\
2075 error while adding collating element"));
2084 free ((char *) symbol
);
2085 if (arg
->val
.str
.startmb
!= NULL
)
2086 free (arg
->val
.str
.startmb
);
2087 if (arg
->val
.str
.startwc
!= NULL
)
2088 free (arg
->val
.str
.startwc
);
2090 lr_ignore_rest (ldfile
, 1);
2094 case tok_collating_symbol
:
2095 /* Ignore the rest of the line if we don't need the input of
2099 lr_ignore_rest (ldfile
, 0);
2106 arg
= lr_token (ldfile
, charmap
, repertoire
);
2107 if (arg
->tok
!= tok_bsymbol
)
2111 const char *symbol
= arg
->val
.str
.startmb
;
2112 size_t symbol_len
= arg
->val
.str
.lenmb
;
2114 if (!ignore_content
)
2117 lr_error (ldfile
, _("\
2118 %s: unknown character in collating symbol name"),
2122 /* The name is already defined. */
2123 if (check_duplicate (ldfile
, collate
, charmap
,
2124 repertoire
, symbol
, symbol_len
))
2127 if (insert_entry (&collate
->sym_table
,
2129 new_symbol (collate
)) < 0)
2130 lr_error (ldfile
, _("\
2131 error while adding collating symbol"));
2138 free ((char *) symbol
);
2140 lr_ignore_rest (ldfile
, 1);
2144 case tok_symbol_equivalence
:
2145 /* Ignore the rest of the line if we don't need the input of
2149 lr_ignore_rest (ldfile
, 0);
2156 arg
= lr_token (ldfile
, charmap
, repertoire
);
2157 if (arg
->tok
!= tok_bsymbol
)
2161 const char *newname
= arg
->val
.str
.startmb
;
2162 size_t newname_len
= arg
->val
.str
.lenmb
;
2163 const char *symname
;
2165 struct symbol_t
*symval
;
2167 arg
= lr_token (ldfile
, charmap
, repertoire
);
2168 if (arg
->tok
!= tok_bsymbol
)
2170 if (newname
!= NULL
)
2171 free ((char *) newname
);
2175 symname
= arg
->val
.str
.startmb
;
2176 symname_len
= arg
->val
.str
.lenmb
;
2178 if (!ignore_content
)
2180 if (newname
== NULL
)
2182 lr_error (ldfile
, _("\
2183 %s: unknown character in equivalent definition name"),
2185 goto sym_equiv_free
;
2187 if (symname
== NULL
)
2189 lr_error (ldfile
, _("\
2190 %s: unknown character in equivalent definition value"),
2192 goto sym_equiv_free
;
2194 /* The name is already defined. */
2195 if (check_duplicate (ldfile
, collate
, charmap
,
2196 repertoire
, symname
, symname_len
))
2199 /* See whether the symbol name is already defined. */
2200 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
2201 (void **) &symval
) != 0)
2203 lr_error (ldfile
, _("\
2204 %s: unknown symbol `%s' in equivalent definition"),
2205 "LC_COLLATE", symname
);
2209 if (insert_entry (&collate
->sym_table
,
2210 newname
, newname_len
, symval
) < 0)
2212 lr_error (ldfile
, _("\
2213 error while adding equivalent collating symbol"));
2214 goto sym_equiv_free
;
2217 free ((char *) symname
);
2222 if (newname
!= NULL
)
2223 free ((char *) newname
);
2224 if (symname
!= NULL
)
2225 free ((char *) symname
);
2227 lr_ignore_rest (ldfile
, 1);
2231 case tok_order_start
:
2232 /* Ignore the rest of the line if we don't need the input of
2236 lr_ignore_rest (ldfile
, 0);
2240 if (state
!= 0 && state
!= 1)
2244 /* The 14652 draft does not specify whether all `order_start' lines
2245 must contain the same number of sort-rules, but 14651 does. So
2246 we require this here as well. */
2247 arg
= lr_token (ldfile
, charmap
, repertoire
);
2248 if (arg
->tok
== tok_bsymbol
)
2250 /* This better should be a section name. */
2251 struct section_list
*sp
= collate
->sections
;
2253 && strcmp (sp
->name
, arg
->val
.str
.startmb
) != 0)
2258 lr_error (ldfile
, _("\
2259 %s: unknown section name `%s'"),
2260 "LC_COLLATE", arg
->val
.str
.startmb
);
2261 /* We use the error section. */
2262 collate
->current_section
= &collate
->error_section
;
2264 if (collate
->error_section
.first
== NULL
)
2266 collate
->error_section
.next
= collate
->sections
;
2267 collate
->sections
= &collate
->error_section
;
2272 /* Remember this section. */
2273 collate
->current_section
= sp
;
2275 /* One should not be allowed to open the same
2277 if (sp
->first
!= NULL
)
2278 lr_error (ldfile
, _("\
2279 %s: multiple order definitions for section `%s'"),
2280 "LC_COLLATE", sp
->name
);
2283 sp
->next
= collate
->sections
;
2284 collate
->sections
= sp
;
2287 /* Next should come the end of the line or a semicolon. */
2288 arg
= lr_token (ldfile
, charmap
, repertoire
);
2289 if (arg
->tok
== tok_eol
)
2293 /* This means we have exactly one rule: `forward'. */
2294 if (collate
->nrules
> 1)
2295 lr_error (ldfile
, _("\
2296 %s: invalid number of sorting rules"),
2299 collate
->nrules
= 1;
2300 sp
->rules
= obstack_alloc (&collate
->mempool
,
2301 (sizeof (enum coll_sort_rule
)
2302 * collate
->nrules
));
2303 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
2304 sp
->rules
[cnt
] = sort_forward
;
2310 /* Get the next token. */
2311 arg
= lr_token (ldfile
, charmap
, repertoire
);
2316 /* There is no section symbol. Therefore we use the unnamed
2318 collate
->current_section
= &collate
->unnamed_section
;
2320 if (collate
->unnamed_section
.first
!= NULL
)
2321 lr_error (ldfile
, _("\
2322 %s: multiple order definitions for unnamed section"),
2326 collate
->unnamed_section
.next
= collate
->sections
;
2327 collate
->sections
= &collate
->unnamed_section
;
2331 /* Now read the direction names. */
2332 read_directions (ldfile
, arg
, charmap
, repertoire
, collate
);
2334 /* From now be need the strings untranslated. */
2335 ldfile
->translate_strings
= 0;
2339 /* Ignore the rest of the line if we don't need the input of
2343 lr_ignore_rest (ldfile
, 0);
2350 /* Handle ellipsis at end of list. */
2351 if (was_ellipsis
!= tok_none
)
2353 handle_ellipsis (ldfile
, NULL
, was_ellipsis
, charmap
, repertoire
,
2355 was_ellipsis
= tok_none
;
2359 lr_ignore_rest (ldfile
, 1);
2362 case tok_reorder_after
:
2363 /* Ignore the rest of the line if we don't need the input of
2367 lr_ignore_rest (ldfile
, 0);
2373 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
2377 /* Handle ellipsis at end of list. */
2378 if (was_ellipsis
!= tok_none
)
2380 handle_ellipsis (ldfile
, arg
, was_ellipsis
, charmap
,
2381 repertoire
, collate
);
2382 was_ellipsis
= tok_none
;
2385 else if (state
!= 2 && state
!= 3)
2389 arg
= lr_token (ldfile
, charmap
, repertoire
);
2390 if (arg
->tok
== tok_bsymbol
)
2392 /* Find this symbol in the sequence table. */
2393 struct element_t
*insp
;
2396 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
2397 arg
->val
.str
.lenmb
, (void **) &insp
) == 0)
2398 /* Yes, the symbol exists. Simply point the cursor
2400 collate
->cursor
= insp
;
2403 /* This is bad. The symbol after which we have to
2404 insert does not exist. */
2405 lr_error (ldfile
, _("\
2406 %s: cannot reorder after %.*s: symbol not known"),
2407 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
2408 arg
->val
.str
.startmb
);
2409 collate
->cursor
= NULL
;
2413 lr_ignore_rest (ldfile
, no_error
);
2416 /* This must not happen. */
2420 case tok_reorder_end
:
2421 /* Ignore the rest of the line if we don't need the input of
2429 lr_ignore_rest (ldfile
, 1);
2432 case tok_reorder_sections_after
:
2433 /* Ignore the rest of the line if we don't need the input of
2437 lr_ignore_rest (ldfile
, 0);
2443 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
2447 /* Handle ellipsis at end of list. */
2448 if (was_ellipsis
!= tok_none
)
2450 handle_ellipsis (ldfile
, NULL
, was_ellipsis
, charmap
,
2451 repertoire
, collate
);
2452 was_ellipsis
= tok_none
;
2455 else if (state
== 3)
2457 error (0, 0, _("%s: missing `reorder-end' keyword"),
2461 else if (state
!= 2 && state
!= 4)
2465 /* Get the name of the sections we are adding after. */
2466 arg
= lr_token (ldfile
, charmap
, repertoire
);
2467 if (arg
->tok
== tok_bsymbol
)
2469 /* Now find a section with this name. */
2470 struct section_list
*runp
= collate
->sections
;
2472 while (runp
!= NULL
)
2474 if (runp
->name
!= NULL
2475 && strlen (runp
->name
) == arg
->val
.str
.lenmb
2476 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
2477 arg
->val
.str
.lenmb
) == 0)
2484 collate
->current_section
= runp
;
2487 /* This is bad. The section after which we have to
2488 reorder does not exist. Therefore we cannot
2489 process the whole rest of this reorder
2491 lr_error (ldfile
, _("%s: section `%.*s' not known"),
2492 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
2493 arg
->val
.str
.startmb
);
2497 lr_ignore_rest (ldfile
, 0);
2499 now
= lr_token (ldfile
, charmap
, NULL
);
2501 while (now
->tok
== tok_reorder_sections_after
2502 || now
->tok
== tok_reorder_sections_end
2503 || now
->tok
== tok_end
);
2505 /* Process the token we just saw. */
2511 /* This must not happen. */
2515 case tok_reorder_sections_end
:
2516 /* Ignore the rest of the line if we don't need the input of
2524 lr_ignore_rest (ldfile
, 1);
2528 /* Ignore the rest of the line if we don't need the input of
2532 lr_ignore_rest (ldfile
, 0);
2536 if (state
!= 1 && state
!= 3)
2541 /* It is possible that we already have this collation sequence.
2542 In this case we move the entry. */
2543 struct element_t
*seqp
;
2545 /* If the symbol after which we have to insert was not found
2546 ignore all entries. */
2547 if (collate
->cursor
== NULL
)
2549 lr_ignore_rest (ldfile
, 0);
2553 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
2554 arg
->val
.str
.lenmb
, (void **) &seqp
) == 0)
2556 /* Remove the entry from the old position. */
2557 if (seqp
->last
== NULL
)
2558 collate
->start
= seqp
->next
;
2560 seqp
->last
->next
= seqp
->next
;
2561 if (seqp
->next
!= NULL
)
2562 seqp
->next
->last
= seqp
->last
;
2564 /* We also have to check whether this entry is the
2565 first or last of a section. */
2566 if (seqp
->section
->first
== seqp
)
2568 if (seqp
->section
->first
== seqp
->section
->last
)
2569 /* This setion has no content anymore. */
2570 seqp
->section
->first
= seqp
->section
->last
= NULL
;
2572 seqp
->section
->first
= seqp
->next
;
2574 else if (seqp
->section
->last
== seqp
)
2575 seqp
->section
->last
= seqp
->last
;
2577 /* Now insert it in the new place. */
2578 seqp
->next
= collate
->cursor
->next
;
2579 seqp
->last
= collate
->cursor
;
2580 collate
->cursor
->next
= seqp
;
2581 if (seqp
->next
!= NULL
)
2582 seqp
->next
->last
= seqp
;
2584 seqp
->section
= collate
->cursor
->section
;
2585 if (seqp
->section
->last
== collate
->cursor
)
2586 seqp
->section
->last
= seqp
;
2591 /* Otherwise we just add a new entry. */
2593 else if (state
== 5)
2595 /* We are reordering sections. Find the named section. */
2596 struct section_list
*runp
= collate
->sections
;
2597 struct section_list
*prevp
= NULL
;
2599 while (runp
!= NULL
)
2601 if (runp
->name
!= NULL
2602 && strlen (runp
->name
) == arg
->val
.str
.lenmb
2603 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
2604 arg
->val
.str
.lenmb
) == 0)
2613 lr_error (ldfile
, _("%s: section `%.*s' not known"),
2614 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
2615 arg
->val
.str
.startmb
);
2616 lr_ignore_rest (ldfile
, 0);
2620 if (runp
!= collate
->current_section
)
2622 /* Remove the named section from the old place and
2623 insert it in the new one. */
2624 prevp
->next
= runp
->next
;
2626 runp
->next
= collate
->current_section
->next
;
2627 collate
->current_section
->next
= runp
;
2628 collate
->current_section
= runp
;
2631 /* Process the rest of the line which might change
2632 the collation rules. */
2633 arg
= lr_token (ldfile
, charmap
, repertoire
);
2634 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
2635 read_directions (ldfile
, arg
, charmap
, repertoire
,
2640 else if (was_ellipsis
!= tok_none
)
2642 /* Using the information in the `ellipsis_weight'
2643 element and this and the last value we have to handle
2644 the ellipsis now. */
2645 assert (state
== 1);
2647 handle_ellipsis (ldfile
, arg
, was_ellipsis
, charmap
, repertoire
,
2650 /* Remember that we processed the ellipsis. */
2651 was_ellipsis
= tok_none
;
2653 /* And don't add the value a second time. */
2657 /* Now insert in the new place. */
2658 insert_value (ldfile
, arg
, charmap
, repertoire
, collate
);
2662 /* Ignore the rest of the line if we don't need the input of
2666 lr_ignore_rest (ldfile
, 0);
2673 if (was_ellipsis
!= tok_none
)
2676 _("%s: cannot have `%s' as end of ellipsis range"),
2677 "LC_COLLATE", "UNDEFINED");
2679 unlink_element (collate
);
2680 was_ellipsis
= tok_none
;
2683 /* See whether UNDEFINED already appeared somewhere. */
2684 if (collate
->undefined
.next
!= NULL
2685 || (collate
->cursor
!= NULL
2686 && collate
->undefined
.next
== collate
->cursor
))
2689 _("%s: order for `%.*s' already defined at %s:%zu"),
2690 "LC_COLLATE", 9, "UNDEFINED", collate
->undefined
.file
,
2691 collate
->undefined
.line
);
2692 lr_ignore_rest (ldfile
, 0);
2695 /* Parse the weights. */
2696 insert_weights (ldfile
, &collate
->undefined
, charmap
,
2697 repertoire
, collate
, tok_none
);
2703 /* This is the symbolic (decimal or hexadecimal) or absolute
2705 if (was_ellipsis
!= tok_none
)
2708 if (state
!= 1 && state
!= 3)
2711 was_ellipsis
= nowtok
;
2713 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
2714 repertoire
, collate
, nowtok
);
2718 /* Next we assume `LC_COLLATE'. */
2719 if (!ignore_content
)
2722 /* We must either see a copy statement or have
2725 _("%s: empty category description not allowed"),
2727 else if (state
== 1)
2729 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
2732 /* Handle ellipsis at end of list. */
2733 if (was_ellipsis
!= tok_none
)
2735 handle_ellipsis (ldfile
, NULL
, was_ellipsis
, charmap
,
2736 repertoire
, collate
);
2737 was_ellipsis
= tok_none
;
2740 else if (state
== 3)
2741 error (0, 0, _("%s: missing `reorder-end' keyword"),
2743 else if (state
== 5)
2744 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
2747 arg
= lr_token (ldfile
, charmap
, NULL
);
2748 if (arg
->tok
== tok_eof
)
2750 if (arg
->tok
== tok_eol
)
2751 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
2752 else if (arg
->tok
!= tok_lc_collate
)
2753 lr_error (ldfile
, _("\
2754 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2755 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
2760 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2763 /* Prepare for the next round. */
2764 now
= lr_token (ldfile
, charmap
, NULL
);
2768 /* When we come here we reached the end of the file. */
2769 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");