X-Git-Url: https://sourceware.org/git/?a=blobdiff_plain;f=locale%2Fprograms%2Fld-collate.c;h=2cbea388b2f3783dc8e8b19f7e833ff547385557;hb=ac8295d23b59e34d2f7c5757ea71336eab2c9e6e;hp=982462f7b340a3d4f8618868ac3becd1a93c5f93;hpb=d980842cab63c6ccc760aa89341a3ed9006cba12;p=glibc.git diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c index 982462f7b3..2cbea388b2 100644 --- a/locale/programs/ld-collate.c +++ b/locale/programs/ld-collate.c @@ -21,14 +21,18 @@ # include #endif +#include #include #include +#include +#include #include "charmap.h" #include "localeinfo.h" #include "linereader.h" #include "locfile.h" #include "localedef.h" +#include "elem-hash.h" /* Uncomment the following line in the production version. */ /* #define NDEBUG 1 */ @@ -52,6 +56,8 @@ struct section_list struct element_t *last; /* These are the rules for this section. */ enum coll_sort_rule *rules; + /* Index of the rule set in the appropriate section of the output file. */ + int ruleidx; }; struct element_t; @@ -67,12 +73,28 @@ struct element_list_t /* Data type for collating element. */ struct element_t { + const char *name; + const char *mbs; + size_t nmbs; const uint32_t *wcs; - int order; + size_t nwcs; + int *mborder; + int wcorder; + + /* The following is a bit mask which bits are set if this element is + used in the appropriate level. Interesting for the singlebyte + weight computation. + + XXX The type here restricts the number of levels to 32. It could + we changed if necessary but I doubt this is necessary. */ + unsigned int used_in_level; struct element_list_t *weights; + /* Nonzero if this is a real character definition. */ + int is_character; + /* Where does the definition come from. */ const char *file; size_t line; @@ -83,8 +105,19 @@ struct element_t /* Predecessor and successor in the order list. */ struct element_t *last; struct element_t *next; + + /* Next element in multibyte output list. */ + struct element_t *mbnext; + + /* Next element in wide character output list. */ + struct element_t *wcnext; }; +/* Special element value. */ +#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1) +#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2) +#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3) + /* Data type for collating symbol. */ struct symbol_t { @@ -112,9 +145,6 @@ struct locale_collate_t /* To make handling of errors easier we have another section. */ struct section_list error_section; - /* Number of sorting rules given in order_start line. */ - uint32_t nrules; - /* Start of the order list. */ struct element_t *start; @@ -124,8 +154,8 @@ struct locale_collate_t /* This is the cursor for `reorder_after' insertions. */ struct element_t *cursor; - /* Remember whether last weight was an ellipsis. */ - int was_ellipsis; + /* This value is used when handling ellipsis. */ + struct element_t ellipsis_weight; /* Known collating elements. */ hash_table elem_table; @@ -142,12 +172,73 @@ struct locale_collate_t that the definitions from more than one input file contains information. Therefore we keep all relevant input in a list. */ struct locale_collate_t *next; + + /* Arrays with heads of the list for each of the leading bytes in + the multibyte sequences. */ + struct element_t *mbheads[256]; + + /* Table size of wide character hash table. */ + size_t plane_size; + size_t plane_cnt; + + /* Arrays with heads of the list for each of the leading bytes in + the multibyte sequences. */ + struct element_t **wcheads; }; /* We have a few global variables which are used for reading all LC_COLLATE category descriptions in all files. */ -static int nrules; +static uint32_t nrules; + + +/* These are definitions used by some of the functions for handling + UTF-8 encoding below. */ +static const uint32_t encoding_mask[] = +{ + ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff +}; + +static const unsigned char encoding_byte[] = +{ + 0xc0, 0xe0, 0xf0, 0xf8, 0xfc +}; + + +/* We need UTF-8 encoding of numbers. */ +static inline int +utf8_encode (char *buf, int val) +{ + char *startp = buf; + int retval; + + if (val < 0x80) + { + *buf++ = (char) val; + retval = 1; + } + else + { + int step; + + for (step = 2; step < 6; ++step) + if ((val & encoding_mask[step - 2]) == 0) + break; + retval = step; + + *buf = encoding_byte[step - 2]; + --step; + do + { + buf[step] = 0x80 | (val & 0x3f); + val >>= 6; + } + while (--step > 0); + *buf |= val; + } + + return buf - startp; +} static struct section_list * @@ -167,16 +258,44 @@ make_seclist_elem (struct locale_collate_t *collate, const char *string, static struct element_t * -new_element (struct locale_collate_t *collate, const char *mbs, - size_t len, const uint32_t *wcs) +new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen, + const uint32_t *wcs, const char *name, size_t namelen, + int is_character) { struct element_t *newp; newp = (struct element_t *) obstack_alloc (&collate->mempool, sizeof (*newp)); - newp->mbs = obstack_copy0 (&collate->mempool, mbs, len); - newp->wcs = wcs; - newp->order = 0; + newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool, + name, namelen); + if (mbs != NULL) + { + newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen); + newp->nmbs = mbslen; + } + else + { + newp->mbs = NULL; + newp->nmbs = 0; + } + if (wcs != NULL) + { + size_t nwcs = wcslen ((wchar_t *) wcs); + uint32_t zero = 0; + obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t)); + obstack_grow (&collate->mempool, &zero, sizeof (uint32_t)); + newp->wcs = (uint32_t *) obstack_finish (&collate->mempool); + newp->nwcs = nwcs; + } + else + { + newp->wcs = NULL; + newp->nwcs = 0; + } + newp->mborder = NULL; + newp->wcorder = 0; + newp->used_in_level = 0; + newp->is_character = is_character; /* Will be allocated later. */ newp->weights = NULL; @@ -184,11 +303,13 @@ new_element (struct locale_collate_t *collate, const char *mbs, newp->file = NULL; newp->line = 0; - newp->section = NULL; + newp->section = collate->current_section; newp->last = NULL; newp->next = NULL; + newp->mbnext = NULL; + return newp; } @@ -278,7 +399,7 @@ read_directions (struct linereader *ldfile, struct token *arg, if (! warned) { lr_error (ldfile, _("\ -%s: `%s' mentioned twice in definition of weight %d"), +%s: `%s' mentioned more than once in definition of weight %d"), "LC_COLLATE", "forward", cnt + 1); } } @@ -304,7 +425,7 @@ read_directions (struct linereader *ldfile, struct token *arg, if (! warned) { lr_error (ldfile, _("\ -%s: `%s' mentioned twice in definition of weight %d"), +%s: `%s' mentioned more than once in definition of weight %d"), "LC_COLLATE", "backward", cnt + 1); } } @@ -320,7 +441,7 @@ read_directions (struct linereader *ldfile, struct token *arg, if (! warned) { lr_error (ldfile, _("\ -%s: `%s' mentioned twice in definition of weight %d in category `%s'"), +%s: `%s' mentioned more than once in definition of weight %d"), "LC_COLLATE", "position", cnt + 1); } } @@ -344,7 +465,13 @@ read_directions (struct linereader *ldfile, struct token *arg, /* See whether we have to increment the counter. */ if (arg->tok != tok_comma && rules[cnt] != 0) - ++cnt; + { + /* Add the default `forward' if we have seen only `position'. */ + if (rules[cnt] == sort_position) + rules[cnt] = sort_position | sort_forward; + + ++cnt; + } if (arg->tok == tok_eof || arg->tok == tok_eol) /* End of line or file, so we exit the loop. */ @@ -437,14 +564,15 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate, result = sym->order; if (result == NULL) - result = sym->order = new_element (collate, str, len, NULL); + result = sym->order = new_element (collate, NULL, 0, NULL, + NULL, 0, 0); } else if (find_entry (&collate->elem_table, str, len, (void **) &result) != 0) { - /* It's also no collation element. So it is an element defined - later. */ - result = new_element (collate, str, len, wcstr); + /* It's also no collation element. So it is a character + element defined later. */ + result = new_element (collate, NULL, 0, NULL, str, len, 1); if (result != NULL) /* Insert it into the sequence table. */ insert_entry (&collate->seq_table, str, len, result); @@ -455,10 +583,30 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate, } +static void +unlink_element (struct locale_collate_t *collate) +{ + if (collate->cursor == collate->start) + { + assert (collate->cursor->next == NULL); + assert (collate->cursor->last == NULL); + collate->cursor = NULL; + } + else + { + if (collate->cursor->next != NULL) + collate->cursor->next->last = collate->cursor->last; + if (collate->cursor->last != NULL) + collate->cursor->last->next = collate->cursor->next; + collate->cursor = collate->cursor->last; + } +} + + static void insert_weights (struct linereader *ldfile, struct element_t *elem, struct charmap_t *charmap, struct repertoire_t *repertoire, - struct locale_collate_t *collate) + struct locale_collate_t *collate, enum token_t ellipsis) { int weight_cnt; struct token *arg; @@ -468,6 +616,14 @@ insert_weights (struct linereader *ldfile, struct element_t *elem, elem->line = ldfile->lineno; elem->last = collate->cursor; elem->next = collate->cursor ? collate->cursor->next : NULL; + elem->section = collate->current_section; + if (collate->cursor != NULL) + collate->cursor->next = elem; + if (collate->start == NULL) + { + assert (collate->cursor == NULL); + collate->start = elem; + } elem->weights = (struct element_list_t *) obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t)); memset (elem->weights, '\0', nrules * sizeof (struct element_list_t)); @@ -494,7 +650,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem, elem->weights[weight_cnt].w = (struct element_t **) obstack_alloc (&collate->mempool, sizeof (struct element_t *)); elem->weights[weight_cnt].w[0] = NULL; - elem->weights[weight_cnt].cnt = 0; + elem->weights[weight_cnt].cnt = 1; } else if (arg->tok == tok_bsymbol) { @@ -518,7 +674,8 @@ insert_weights (struct linereader *ldfile, struct element_t *elem, const char *cp = arg->val.str.startmb; int cnt = 0; struct element_t *charelem; - void *base = obstack_base (&collate->mempool); + struct element_t **weights = NULL; + int max = 0; if (*cp == '\0') { @@ -533,18 +690,17 @@ insert_weights (struct linereader *ldfile, struct element_t *elem, if (*cp == '<') { /* Ahh, it's a bsymbol. That's what we want. */ - const char *startp = cp; + const char *startp = ++cp; - while (*++cp != '>') + while (*cp != '>') { if (*cp == ldfile->escape_char) ++cp; if (*cp == '\0') - { - /* It's a syntax error. */ - obstack_free (&collate->mempool, base); - goto syntax; - } + /* It's a syntax error. */ + goto syntax; + + ++cp; } charelem = find_element (ldfile, collate, startp, @@ -558,7 +714,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem, what this means. We interpret all characters in the string as if that would be bsymbols. Otherwise we would have to match back to bsymbols somehow and this - is also not what people normally expect. */ + is normally not what people normally expect. */ charelem = find_element (ldfile, collate, cp++, 1, NULL); } @@ -570,19 +726,48 @@ insert_weights (struct linereader *ldfile, struct element_t *elem, } /* Add the pointer. */ - obstack_ptr_grow (&collate->mempool, charelem); - ++cnt; + if (cnt >= max) + { + struct element_t **newp; + max += 10; + newp = (struct element_t **) + alloca (max * sizeof (struct element_t *)); + memcpy (newp, weights, cnt * sizeof (struct element_t *)); + weights = newp; + } + weights[cnt++] = charelem; } while (*cp != '\0'); /* Now store the information. */ elem->weights[weight_cnt].w = (struct element_t **) - obstack_finish (&collate->mempool); + obstack_alloc (&collate->mempool, + cnt * sizeof (struct element_t *)); + memcpy (elem->weights[weight_cnt].w, weights, + cnt * sizeof (struct element_t *)); elem->weights[weight_cnt].cnt = cnt; /* We don't need the string anymore. */ free (arg->val.str.startmb); } + else if (ellipsis != tok_none + && (arg->tok == tok_ellipsis2 + || arg->tok == tok_ellipsis3 + || arg->tok == tok_ellipsis4)) + { + /* It must be the same ellipsis as used in the initial column. */ + if (arg->tok != ellipsis) + lr_error (ldfile, _("\ +%s: weights must use the same ellipsis symbol as the name"), + "LC_COLLATE"); + + /* The weight for this level has to be ignored. We use the + null pointer to indicate this. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2; + elem->weights[weight_cnt].cnt = 1; + } else { syntax: @@ -634,7 +819,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem, } -static void +static int insert_value (struct linereader *ldfile, struct token *arg, struct charmap_t *charmap, struct repertoire_t *repertoire, struct locale_collate_t *collate) @@ -644,14 +829,20 @@ insert_value (struct linereader *ldfile, struct token *arg, uint32_t wc; struct element_t *elem = NULL; - /* First determine the wide character. There must be such a value, - otherwise we ignore it (if it is no collatio symbol or element). */ - wc = repertoire_find_value (repertoire, arg->val.str.startmb, - arg->val.str.lenmb); - /* Try to find the character in the charmap. */ seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb); + /* Determine the wide character. */ + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, arg->val.str.startmb, + arg->val.str.lenmb); + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) { /* It's no character, so look through the collation elements and @@ -666,16 +857,15 @@ insert_value (struct linereader *ldfile, struct token *arg, elem = sym->order; if (elem == NULL) - elem = sym->order = new_element (collate, arg->val.str.startmb, - arg->val.str.lenmb, - arg->val.str.startwc); + elem = sym->order = new_element (collate, NULL, 0, NULL, NULL, 0, + 0); } else if (find_entry (&collate->elem_table, arg->val.str.startmb, arg->val.str.lenmb, (void **) &elem) != 0) { /* It's also no collation element. Therefore ignore it. */ lr_ignore_rest (ldfile, 0); - return; + return 1; } } else @@ -684,16 +874,38 @@ insert_value (struct linereader *ldfile, struct token *arg, if (find_entry (&collate->seq_table, arg->val.str.startmb, arg->val.str.lenmb, (void **) &elem) != 0) { + uint32_t wcs[2] = { wc, 0 }; + /* We have to allocate an entry. */ - elem = new_element (collate, arg->val.str.startmb, - arg->val.str.lenmb, - arg->val.str.startwc); + elem = new_element (collate, seq != NULL ? seq->bytes : NULL, + seq != NULL ? seq->nbytes : 0, + wc == ILLEGAL_CHAR_VALUE ? NULL : wcs, + arg->val.str.startmb, arg->val.str.lenmb, 1); /* And add it to the table. */ if (insert_entry (&collate->seq_table, arg->val.str.startmb, arg->val.str.lenmb, elem) != 0) /* This cannot happen. */ - abort (); + assert (! "Internal error"); + } + else + { + /* Maybe the character was used before the definition. In this case + we have to insert the byte sequences now. */ + if (elem->mbs == NULL && seq != NULL) + { + elem->mbs = obstack_copy0 (&collate->mempool, + seq->bytes, seq->nbytes); + elem->nmbs = seq->nbytes; + } + + if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE) + { + uint32_t wcs[2] = { wc, 0 }; + + elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs)); + elem->nwcs = 1; + } } } @@ -701,2927 +913,2501 @@ insert_value (struct linereader *ldfile, struct token *arg, if (elem->next != NULL || (collate->cursor != NULL && elem->next == collate->cursor)) { - lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"), - arg->val.str.lenmb, arg->val.str.startmb, + lr_error (ldfile, _("order for `%.*s' already defined at %s:%zu"), + (int) arg->val.str.lenmb, arg->val.str.startmb, elem->file, elem->line); lr_ignore_rest (ldfile, 0); - return; + return 1; } - insert_weights (ldfile, elem, charmap, repertoire, collate); + insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none); + + return 0; } static void -collate_startup (struct linereader *ldfile, struct localedef_t *locale, - struct localedef_t *copy_locale, int ignore_content) +handle_ellipsis (struct linereader *ldfile, struct token *arg, + enum token_t ellipsis, struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct locale_collate_t *collate) { - if (!ignore_content) + struct element_t *startp; + struct element_t *endp; + + /* Unlink the entry added for the ellipsis. */ + unlink_element (collate); + startp = collate->cursor; + + /* Process and add the end-entry. */ + if (arg != NULL + && insert_value (ldfile, arg, charmap, repertoire, collate)) + /* Something went wrong with inserting the to-value. This means + we cannot process the ellipsis. */ + return; + + /* Reset the cursor. */ + collate->cursor = startp; + + /* Now we have to handle many different situations: + - we have to distinguish between the three different ellipsis forms + - the is the ellipsis at the beginning, in the middle, or at the end. + */ + endp = collate->cursor->next; + assert (arg == NULL || endp != NULL); + + /* Both, the start and the end symbol, must stand for characters. */ + if ((startp != NULL && (startp->name == NULL || ! startp->is_character)) + || (endp != NULL && (endp->name == NULL|| ! endp->is_character))) { - struct locale_collate_t *collate; - - if (copy_locale == NULL) - collate = locale->categories[LC_COLLATE].collate = - (struct locale_collate_t *) xcalloc (1, - sizeof (struct locale_collate_t)); - else - collate = locale->categories[LC_COLLATE].collate = - copy_locale->categories[LC_COLLATE].collate; - - /* Init the various data structures. */ - init_hash (&collate->elem_table, 100); - init_hash (&collate->sym_table, 100); - init_hash (&collate->seq_table, 500); - obstack_init (&collate->mempool); - - collate->col_weight_max = -1; + lr_error (ldfile, _("\ +%s: the start end the end symbol of a range must stand for characters"), + "LC_COLLATE"); + return; } - ldfile->translate_strings = 0; - ldfile->return_widestr = 0; -} + if (ellipsis == tok_ellipsis3) + { + /* One requirement we make here: the length of the byte + sequences for the first and end character must be the same. + This is mainly to prevent unwanted effects and this is often + not what is wanted. */ + size_t len = (startp->mbs != NULL ? startp->nmbs + : (endp->mbs != NULL ? endp->nmbs : 0)); + char mbcnt[len + 1]; + char mbend[len + 1]; + + /* Well, this should be caught somewhere else already. Just to + make sure. */ + assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0); + assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0); + + if (startp != NULL && endp != NULL + && startp->mbs != NULL && endp->mbs != NULL + && startp->nmbs != endp->nmbs) + { + lr_error (ldfile, _("\ +%s: byte sequences of first and last character must have the same length"), + "LC_COLLATE"); + return; + } + + /* Determine whether we have to generate multibyte sequences. */ + if ((startp == NULL || startp->mbs != NULL) + && (endp == NULL || endp->mbs != NULL)) + { + int cnt; + int ret; + + /* Prepare the beginning byte sequence. This is either from the + beginning byte sequence or it is all nulls if it was an + initial ellipsis. */ + if (startp == NULL || startp->mbs == NULL) + memset (mbcnt, '\0', len); + else + { + memcpy (mbcnt, startp->mbs, len); + /* And increment it so that the value is the first one we will + try to insert. */ + for (cnt = len - 1; cnt >= 0; --cnt) + if (++mbcnt[cnt] != '\0') + break; + } + mbcnt[len] = '\0'; -void -collate_finish (struct localedef_t *locale, struct charmap_t *charmap) -{ -} + /* And the end sequence. */ + if (endp == NULL || endp->mbs == NULL) + memset (mbend, '\0', len); + else + memcpy (mbend, endp->mbs, len); + mbend[len] = '\0'; + /* Test whether we have a correct range. */ + ret = memcmp (mbcnt, mbend, len); + if (ret >= 0) + { + if (ret > 0) + lr_error (ldfile, _("%s: byte sequence of first character of \ +sequence is not lower than that of the last character"), "LC_COLLATE"); + return; + } -void -collate_output (struct localedef_t *locale, struct charmap_t *charmap, - const char *output_path) -{ -} + /* Generate the byte sequences data. */ + while (1) + { + struct charseq *seq; + /* Quite a bit of work ahead. We have to find the character + definition for the byte sequence and then determine the + wide character belonging to it. */ + seq = charmap_find_symbol (charmap, mbcnt, len); + if (seq != NULL) + { + struct element_t *elem; + size_t namelen; + + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + strlen (seq->name)); + + /* I don't this this can ever happen. */ + assert (seq->name != NULL); + namelen = strlen (seq->name); + + /* Now we are ready to insert the new value in the + sequence. Find out whether the element is + already known. */ + if (find_entry (&collate->seq_table, seq->name, namelen, + (void **) &elem) != 0) + { + uint32_t wcs[2] = { seq->ucs4, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, mbcnt, len, + seq->ucs4 == ILLEGAL_CHAR_VALUE + ? NULL : wcs, seq->name, + namelen, 1); + + /* And add it to the table. */ + if (insert_entry (&collate->seq_table, seq->name, + namelen, elem) != 0) + /* This cannot happen. */ + assert (! "Internal error"); + } -void -collate_read (struct linereader *ldfile, struct localedef_t *result, - struct charmap_t *charmap, const char *repertoire_name, - int ignore_content) -{ - struct repertoire_t *repertoire = NULL; - struct locale_collate_t *collate; - struct token *now; - struct token *arg = NULL; - enum token_t nowtok; - int state = 0; - int was_ellipsis = 0; - struct localedef_t *copy_locale = NULL; + /* Test whether this element is not already in the list. */ + if (elem->next != NULL || (collate->cursor != NULL + && elem->next == collate->cursor)) + { + lr_error (ldfile, _("\ +order for `%.*s' already defined at %s:%zu"), + (int) namelen, seq->name, + elem->file, elem->line); + goto increment; + } - /* Get the repertoire we have to use. */ - if (repertoire_name != NULL) - repertoire = repertoire_read (repertoire_name); + /* Enqueue the new element. */ + elem->last = collate->cursor; + if (collate->cursor != NULL) + elem->next = NULL; + else + { + elem->next = collate->cursor->next; + elem->last->next = elem; + if (elem->next != NULL) + elem->next->last = elem; + } + if (collate->start == NULL) + { + assert (collate->cursor == NULL); + collate->start = elem; + } + collate->cursor = elem; + + /* Add the weight value. We take them from the + `ellipsis_weights' member of `collate'. */ + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, + nrules * sizeof (struct element_list_t)); + for (cnt = 0; cnt < nrules; ++cnt) + if (collate->ellipsis_weight.weights[cnt].cnt == 1 + && (collate->ellipsis_weight.weights[cnt].w[0] + == ELEMENT_ELLIPSIS2)) + { + elem->weights[cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + sizeof (struct element_t *)); + elem->weights[cnt].w[0] = elem; + elem->weights[cnt].cnt = 1; + } + else + { + /* Simly use the weight from `ellipsis_weight'. */ + elem->weights[cnt].w = + collate->ellipsis_weight.weights[cnt].w; + elem->weights[cnt].cnt = + collate->ellipsis_weight.weights[cnt].cnt; + } + } - /* The rest of the line containing `LC_COLLATE' must be free. */ - lr_ignore_rest (ldfile, 1); + /* Increment for the next round. */ + increment: + for (cnt = len - 1; cnt >= 0; --cnt) + if (++mbcnt[cnt] != '\0') + break; - do - { - now = lr_token (ldfile, charmap, NULL); - nowtok = now->tok; + /* Find out whether this was all. */ + if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0) + /* Yep, that's all. */ + break; + } + } } - while (nowtok == tok_eol); - - if (nowtok == tok_copy) + else { - state = 2; - now = lr_token (ldfile, charmap, NULL); - if (now->tok != tok_string) + /* For symbolic range we naturally must have a beginning and an + end specified by the user. */ + if (startp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not directly follow `order_start'"), + "LC_COLLATE"); + else if (endp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not be direct followed by `order_end'"), + "LC_COLLATE"); + else { - SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); - - skip_category: - do - now = lr_token (ldfile, charmap, NULL); - while (now->tok != tok_eof && now->tok != tok_end); - - if (now->tok != tok_eof - || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof)) - lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); - else if (now->tok != tok_lc_collate) + /* Determine the range. To do so we have to determine the + common prefix of the both names and then the numeric + values of both ends. */ + size_t lenfrom = strlen (startp->name); + size_t lento = strlen (endp->name); + char buf[lento + 1]; + int preflen = 0; + long int from; + long int to; + char *cp; + int base = ellipsis == tok_ellipsis2 ? 16 : 10; + + if (lenfrom != lento) { + invalid_range: lr_error (ldfile, _("\ -%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); - lr_ignore_rest (ldfile, 0); +`%s' and `%.*s' are no valid names for symbolic range"), + startp->name, (int) lento, endp->name); + return; } - else - lr_ignore_rest (ldfile, 1); - - return; - } - - /* Get the locale definition. */ - copy_locale = find_locale (LC_COLLATE, now->val.str.startmb, - repertoire_name, charmap); - if ((copy_locale->avail & COLLATE_LOCALE) == 0) - { - /* Not yet loaded. So do it now. */ - if (locfile_read (copy_locale, charmap) != 0) - goto skip_category; - } - - lr_ignore_rest (ldfile, 1); - now = lr_token (ldfile, charmap, NULL); - nowtok = now->tok; - } + while (startp->name[preflen] == endp->name[preflen]) + if (startp->name[preflen] == '\0') + /* Nothing to be done. The start and end point are identical + and while inserting the end point we have already given + the user an error message. */ + return; + else + ++preflen; - /* Prepare the data structures. */ - collate_startup (ldfile, result, copy_locale, ignore_content); - collate = result->categories[LC_COLLATE].collate; + errno = 0; + from = strtol (startp->name + preflen, &cp, base); + if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; - while (1) - { - /* Of course we don't proceed beyond the end of file. */ - if (nowtok == tok_eof) - break; + errno = 0; + to = strtol (endp->name + preflen, &cp, base); + if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; - /* Ingore empty lines. */ - if (nowtok == tok_eol) - { - now = lr_token (ldfile, charmap, NULL); - nowtok = now->tok; - continue; - } + /* Copy the prefix. */ + memcpy (buf, startp->name, preflen); - switch (nowtok) - { - case tok_coll_weight_max: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) + /* Loop over all values. */ + for (++from; from < to; ++from) { - lr_ignore_rest (ldfile, 0); - break; - } + struct element_t *elem = NULL; + struct charseq *seq; + uint32_t wc; + int cnt; - if (state != 0) - goto err_label; + /* Generate the the name. */ + sprintf (buf + preflen, base == 10 ? "%d" : "%x", from); - arg = lr_token (ldfile, charmap, NULL); - if (arg->tok != tok_number) - goto err_label; - if (collate->col_weight_max != -1) - lr_error (ldfile, _("%s: duplicate definition of `%s'"), - "LC_COLLATE", "col_weight_max"); - else - collate->col_weight_max = arg->val.num; - lr_ignore_rest (ldfile, 1); - break; + /* Look whether this name is already defined. */ + if (find_entry (&collate->seq_table, arg->val.str.startmb, + arg->val.str.lenmb, (void **) &elem) == 0) + { + if (elem->next != NULL || (collate->cursor != NULL + && elem->next == collate->cursor)) + { + lr_error (ldfile, _("\ +%s: order for `%.*s' already defined at %s:%zu"), + "LC_COLLATE", (int) lenfrom, buf, + elem->file, elem->line); + continue; + } - case tok_section_symbol: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - { - lr_ignore_rest (ldfile, 0); - break; - } + if (elem->name == NULL) + { + lr_error (ldfile, _("%s: `%s' must be a charater"), + "LC_COLLATE", buf); + continue; + } + } - if (state != 0) - goto err_label; + if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL)) + { + /* Search for a character of this name. */ + seq = charmap_find_value (charmap, buf, lenfrom); + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, buf, lenfrom); - arg = lr_token (ldfile, charmap, repertoire); - if (arg->tok != tok_bsymbol) - goto err_label; - else if (!ignore_content) - { - /* Check whether this section is already known. */ - struct section_list *known = collate->sections; - while (known != NULL) - if (strcmp (known->name, arg->val.str.startmb) == 0) - break; + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; - if (known != NULL) - { - lr_error (ldfile, - _("%s: duplicate declaration of section `%s'"), - "LC_COLLATE", arg->val.str.startmb); - free (arg->val.str.startmb); + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) + /* We don't know anything about a character with this + name. XXX Should we warn? */ + continue; + + if (elem == NULL) + { + uint32_t wcs[2] = { wc, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, + seq != NULL ? seq->bytes : NULL, + seq != NULL ? seq->nbytes : 0, + wc == ILLEGAL_CHAR_VALUE + ? NULL : wcs, buf, lenfrom, 1); + } + else + { + /* Update the element. */ + if (seq != NULL) + { + elem->mbs = obstack_copy0 (&collate->mempool, + seq->bytes, seq->nbytes); + elem->nmbs = seq->nbytes; + } + + if (wc != ILLEGAL_CHAR_VALUE) + { + uint32_t zero = 0; + + obstack_grow (&collate->mempool, + &wc, sizeof (uint32_t)); + obstack_grow (&collate->mempool, + &zero, sizeof (uint32_t)); + elem->wcs = obstack_finish (&collate->mempool); + elem->nwcs = 1; + } + } + + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + elem->section = collate->current_section; } - else - collate->sections = make_seclist_elem (collate, - arg->val.str.startmb, - collate->sections); - lr_ignore_rest (ldfile, known == NULL); - } - else - { - free (arg->val.str.startmb); - lr_ignore_rest (ldfile, 0); + /* Enqueue the new element. */ + elem->last = collate->cursor; + elem->next = collate->cursor->next; + elem->last->next = elem; + if (elem->next != NULL) + elem->next->last = elem; + collate->cursor = elem; + + /* Now add the weights. They come from the `ellipsis_weights' + member of `collate'. */ + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, + nrules * sizeof (struct element_list_t)); + for (cnt = 0; cnt < nrules; ++cnt) + if (collate->ellipsis_weight.weights[cnt].cnt == 1 + && (collate->ellipsis_weight.weights[cnt].w[0] + == ELEMENT_ELLIPSIS2)) + { + elem->weights[cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + sizeof (struct element_t *)); + elem->weights[cnt].w[0] = elem; + elem->weights[cnt].cnt = 1; + } + else + { + /* Simly use the weight from `ellipsis_weight'. */ + elem->weights[cnt].w = + collate->ellipsis_weight.weights[cnt].w; + elem->weights[cnt].cnt = + collate->ellipsis_weight.weights[cnt].cnt; + } } + } + } +} + + +static void +collate_startup (struct linereader *ldfile, struct localedef_t *locale, + struct localedef_t *copy_locale, int ignore_content) +{ + if (!ignore_content) + { + struct locale_collate_t *collate; + + if (copy_locale == NULL) + { + collate = locale->categories[LC_COLLATE].collate = + (struct locale_collate_t *) + xcalloc (1, sizeof (struct locale_collate_t)); + + /* Init the various data structures. */ + init_hash (&collate->elem_table, 100); + init_hash (&collate->sym_table, 100); + init_hash (&collate->seq_table, 500); + obstack_init (&collate->mempool); + + collate->col_weight_max = -1; + } + else + collate = locale->categories[LC_COLLATE].collate = + copy_locale->categories[LC_COLLATE].collate; + } + + ldfile->translate_strings = 0; + ldfile->return_widestr = 0; +} + + +void +collate_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ + /* Now is the time when we can assign the individual collation + values for all the symbols. We have possibly different values + for the wide- and the multibyte-character symbols. This is done + since it might make a difference in the encoding if there is in + some cases no multibyte-character but there are wide-characters. + (The other way around it is not important since theencoded + collation value in the wide-character case is 32 bits wide and + therefore requires no encoding). + + The lowest collation value assigned is 2. Zero is reserved for + the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm' + functions and 1 is used to separate the individual passes for the + different rules. + + We also have to construct is list with all the bytes/words which + can come first in a sequence, followed by all the elements which + also start with this byte/word. The order is reverse which has + among others the important effect that longer strings are located + first in the list. This is required for the output data since + the algorithm used in `strcoll' etc depends on this. + + The multibyte case is easy. We simply sort into an array with + 256 elements. */ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + int mbact[nrules]; + int wcact; + struct element_t *runp; + int i; + int need_undefined = 0; + struct section_list *sect; + int ruleidx; + int nr_wide_elems = 0; + size_t min_total; + size_t act_size; + + if (collate == NULL) + { + /* No data, no check. */ + if (! be_quiet) + error (0, 0, _("No definition for %s category found"), "LC_COLLATE"); + return; + } + + /* If this assertion is hit change the type in `element_t'. */ + assert (nrules <= sizeof (runp->used_in_level) * 8); + + /* Make sure that the `position' rule is used either in all sections + or in none. */ + for (i = 0; i < nrules; ++i) + for (sect = collate->sections; sect != NULL; sect = sect->next) + if ((sect->rules[i] & sort_position) + != (collate->sections->rules[i] & sort_position)) + { + error (0, 0, _("\ +%s: `position' must be used for a specific level in all sections or none"), + "LC_COLLATE"); break; + } - case tok_collating_element: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) + /* Find out which elements are used at which level. At the same + time we find out whether we have any undefined symbols. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL) + { + for (i = 0; i < nrules; ++i) { - lr_ignore_rest (ldfile, 0); - break; + int j; + + for (j = 0; j < runp->weights[i].cnt; ++j) + /* A NULL pointer as the weight means IGNORE. */ + if (runp->weights[i].w[j] != NULL) + { + if (runp->weights[i].w[j]->weights == NULL) + { + error_at_line (0, 0, runp->file, runp->line, + _("symbol `%s' not defined"), + runp->weights[i].w[j]->name); + + need_undefined = 1; + runp->weights[i].w[j] = &collate->undefined; + } + else + /* Set the bit for the level. */ + runp->weights[i].w[j]->used_in_level |= 1 << i; + } } + } - if (state != 0) - goto err_label; + /* Up to the next entry. */ + runp = runp->next; + } - arg = lr_token (ldfile, charmap, repertoire); - if (arg->tok != tok_bsymbol) - goto err_label; - else - { - const char *symbol = arg->val.str.startmb; - size_t symbol_len = arg->val.str.lenmb; + /* Walk through the list of defined sequences and assign weights. Also + create the data structure which will allow generating the single byte + character based tables. + + Since at each time only the weights for each of the rules are + only compared to other weights for this rule it is possible to + assign more compact weight values than simply counting all + weights in sequence. We can assign weights from 3, one for each + rule individually and only for those elements, which are actually + used for this rule. + + Why is this important? It is not for the wide char table. But + it is for the singlebyte output since here larger numbers have to + be encoded to make it possible to emit the value as a byte + string. */ + for (i = 0; i < nrules; ++i) + mbact[i] = 2; + wcact = 2; + runp = collate->start; + while (runp != NULL) + { + /* Determine the order. */ + if (runp->used_in_level != 0) + { + runp->mborder = (int *) obstack_alloc (&collate->mempool, + nrules * sizeof (int)); - /* Next the `from' keyword. */ - arg = lr_token (ldfile, charmap, repertoire); - if (arg->tok != tok_from) - { - free ((char *) symbol); - goto err_label; - } + for (i = 0; i < nrules; ++i) + if ((runp->used_in_level & (1 << i)) != 0) + runp->mborder[i] = mbact[i]++; + else + runp->mborder[i] = 0; + } - ldfile->return_widestr = 1; + if (runp->mbs != NULL) + { + struct element_t **eptr; - /* Finally the string with the replacement. */ - arg = lr_token (ldfile, charmap, repertoire); - ldfile->return_widestr = 0; - if (arg->tok != tok_string) - goto err_label; + /* Find the point where to insert in the list. */ + eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]]; + while (*eptr != NULL) + { + if ((*eptr)->nmbs < runp->nmbs) + break; - if (!ignore_content) + if ((*eptr)->nmbs == runp->nmbs) { - if (symbol == NULL) - lr_error (ldfile, _("\ -%s: unknown character in collating element name"), - "LC_COLLATE"); - if (arg->val.str.startmb == NULL) - lr_error (ldfile, _("\ -%s: unknown character in collating element definition"), - "LC_COLLATE"); - if (arg->val.str.startwc == NULL) - lr_error (ldfile, _("\ -%s: unknown wide character in collating element definition"), - "LC_COLLATE"); - else if (arg->val.str.lenwc < 2) - lr_error (ldfile, _("\ -%s: substitution string in collating element definition must have at least two characters"), - "LC_COLLATE"); + int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs); - if (symbol != NULL) + if (c == 0) { - /* The name is already defined. */ - if (check_duplicate (ldfile, collate, charmap, - repertoire, symbol, symbol_len)) - goto col_elem_free; - - if (insert_entry (&collate->elem_table, - symbol, symbol_len, - new_element (collate, - arg->val.str.startmb, - arg->val.str.lenmb, - arg->val.str.startwc)) - < 0) - lr_error (ldfile, _("\ -error while adding collating element")); + /* This should not happen. It means that we have + to symbols with the same byte sequence. It is + of course an error. */ + error_at_line (0, 0, (*eptr)->file, (*eptr)->line, + _("symbol `%s' has same encoding as"), + (*eptr)->name); + error_at_line (0, 0, runp->file, runp->line, + _("symbol `%s'"), runp->name); + goto dont_insert; } - else - goto col_elem_free; - } - else - { - col_elem_free: - if (symbol != NULL) - free ((char *) symbol); - if (arg->val.str.startmb != NULL) - free (arg->val.str.startmb); - if (arg->val.str.startwc != NULL) - free (arg->val.str.startwc); + else if (c < 0) + /* Insert it here. */ + break; } - lr_ignore_rest (ldfile, 1); - } - break; - case tok_collating_symbol: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - { - lr_ignore_rest (ldfile, 0); - break; + /* To the next entry. */ + eptr = &(*eptr)->mbnext; } - if (state != 0) - goto err_label; + /* Set the pointers. */ + runp->mbnext = *eptr; + *eptr = runp; + dont_insert: + } - arg = lr_token (ldfile, charmap, repertoire); - if (arg->tok != tok_bsymbol) - goto err_label; - else + if (runp->wcs != NULL) + { + runp->wcorder = wcact++; + + /* We take the opportunity to count the elements which have + wide characters. */ + ++nr_wide_elems; + } + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Find out whether any of the `mbheads' entries is unset. In this + case we use the UNDEFINED entry. */ + for (i = 1; i < 256; ++i) + if (collate->mbheads[i] == NULL) + { + need_undefined = 1; + collate->mbheads[i] = &collate->undefined; + } + + /* Now to the wide character case. Here we have to find first a good + mapping function to get the wide range of wide character values + (0x00000000 to 0x7fffffff) to a managable table. This might take + some time so we issue a warning. + + We use a very trivial hashing function to store the sparse + table. CH % TABSIZE is used as an index. To solve multiple hits + we have N planes. This guarantees a fixed search time for a + character [N / 2]. In the following code we determine the minimum + value for TABSIZE * N, where TABSIZE >= 256. + + Some people complained that this algorithm takes too long. Well, + go on, improve it. But changing the step size is *not* an + option. Some people changed this to use only sizes of prime + numbers. Think again, do some math. We are looking for the + optimal solution, not something which works in general. Unless + somebody can provide a dynamic programming solution I think this + implementation is as good as it can get. */ + if (nr_wide_elems > 512 && !be_quiet) + fputs (_("\ +Computing table size for collation table might take a while..."), + stderr); + + min_total = UINT_MAX; + act_size = 256; + + /* While we want to have a small total size we are willing to use a + little bit larger table if this reduces the number of layers. + Therefore we add a little penalty to the number of planes. + Maybe this constant has to be adjusted a bit. */ +#define PENALTY 128 + do + { + size_t cnt[act_size]; + struct element_t *elem[act_size]; + size_t act_planes = 1; + + memset (cnt, '\0', sizeof cnt); + memset (elem, '\0', sizeof elem); + + runp = collate->start; + while (runp != NULL) + { + if (runp->wcs != NULL) { - const char *symbol = arg->val.str.startmb; - size_t symbol_len = arg->val.str.lenmb; + size_t nr = runp->wcs[0] % act_size; + struct element_t *elemp = elem[nr]; - if (!ignore_content) + while (elemp != NULL) { - if (symbol == NULL) - lr_error (ldfile, _("\ -%s: unknown character in collating symbol name"), - "LC_COLLATE"); - else - { - /* The name is already defined. */ - if (check_duplicate (ldfile, collate, charmap, - repertoire, symbol, symbol_len)) - goto col_sym_free; - - if (insert_entry (&collate->sym_table, - symbol, symbol_len, - new_symbol (collate)) < 0) - lr_error (ldfile, _("\ -error while adding collating symbol")); - } + if (elemp->wcs[0] == runp->wcs[0]) + break; + elemp = elemp->wcnext; } - else + + if (elemp == NULL && ++cnt[nr] > act_planes) { - col_sym_free: - if (symbol != NULL) - free ((char *) symbol); + act_planes = cnt[nr]; + + runp->wcnext = elem[nr]; + elem[nr] = runp; + + if ((act_size + PENALTY) * act_planes >= min_total) + break; } - lr_ignore_rest (ldfile, 1); } - break; - case tok_symbol_equivalence: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - { - lr_ignore_rest (ldfile, 0); - break; - } + /* Up to the next entry. */ + runp = runp->next; + } - if (state != 0) - goto err_label; + if ((act_size + PENALTY) * act_planes < min_total) + { + min_total = (act_size + PENALTY) * act_planes; + collate->plane_size = act_size; + collate->plane_cnt = act_planes; + } - arg = lr_token (ldfile, charmap, repertoire); - if (arg->tok != tok_bsymbol) - goto err_label; - else + ++act_size; + } + while (act_size < min_total); + + if (nr_wide_elems > 512 && !be_quiet) + fputs (_(" done\n"), stderr); + + /* Now that we know how large the table has to be we are able to + allocate the array and start adding the characters to the lists + in the same way we did it for the multibyte characters. */ + collate->wcheads = (struct element_t **) + obstack_alloc (&collate->mempool, (collate->plane_size + * collate->plane_cnt + * sizeof (struct element_t *))); + memset (collate->wcheads, '\0', (collate->plane_size + * collate->plane_cnt + * sizeof (struct element_t *))); + + /* Start adding. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->wcs != NULL) + { + struct element_t **eptr; + size_t idx; + + /* Find a free index. */ + idx = runp->wcs[0] % collate->plane_size; + while (collate->wcheads[idx] != NULL) { - const char *newname = arg->val.str.startmb; - size_t newname_len = arg->val.str.lenmb; - const char *symname; - size_t symname_len; - struct symbol_t *symval; + /* Stop if this is an entry with the same starting character. */ + if (collate->wcheads[idx]->wcs[0] == runp->wcs[0]) + break; - arg = lr_token (ldfile, charmap, repertoire); - if (arg->tok != tok_bsymbol) - { - if (newname != NULL) - free ((char *) newname); - goto err_label; - } + idx += collate->plane_size; + } - symname = arg->val.str.startmb; - symname_len = arg->val.str.lenmb; + /* Find the point where to insert in the list. */ + eptr = &collate->wcheads[idx]; + while (*eptr != NULL) + { + if ((*eptr)->nwcs < runp->nwcs) + break; - if (!ignore_content) + if ((*eptr)->nwcs == runp->nwcs) { - if (newname == NULL) - { - lr_error (ldfile, _("\ -%s: unknown character in equivalent definition name"), - "LC_COLLATE"); - goto sym_equiv_free; - } - if (symname == NULL) - { - lr_error (ldfile, _("\ -%s: unknown character in equivalent definition value"), - "LC_COLLATE"); - goto sym_equiv_free; - } - /* The name is already defined. */ - if (check_duplicate (ldfile, collate, charmap, - repertoire, symname, symname_len)) - goto col_sym_free; - - /* See whether the symbol name is already defined. */ - if (find_entry (&collate->sym_table, symname, symname_len, - (void **) &symval) != 0) - { - lr_error (ldfile, _("\ -%s: unknown symbol `%s' in equivalent definition"), - "LC_COLLATE", symname); - goto col_sym_free; - } + int c = wmemcmp ((wchar_t *) (*eptr)->wcs, + (wchar_t *) runp->wcs, runp->nwcs); - if (insert_entry (&collate->sym_table, - newname, newname_len, symval) < 0) + if (c == 0) { - lr_error (ldfile, _("\ -error while adding equivalent collating symbol")); - goto sym_equiv_free; + /* This should not happen. It means that we have + to symbols with the same byte sequence. It is + of course an error. */ + error_at_line (0, 0, (*eptr)->file, (*eptr)->line, + _("symbol `%s' has same encoding as"), + (*eptr)->name); + error_at_line (0, 0, runp->file, runp->line, + _("symbol `%s'"), runp->name); + goto dont_insertwc; } - - free ((char *) symname); - } - else - { - sym_equiv_free: - if (newname != NULL) - free ((char *) newname); - if (symname != NULL) - free ((char *) symname); + else if (c < 0) + /* Insert it here. */ + break; } - lr_ignore_rest (ldfile, 1); - } - break; - case tok_order_start: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - { - lr_ignore_rest (ldfile, 0); - break; + /* To the next entry. */ + eptr = &(*eptr)->wcnext; } - if (state != 0 && state != 1) - goto err_label; - state = 1; + /* Set the pointers. */ + runp->wcnext = *eptr; + *eptr = runp; + dont_insertwc: + } - /* The 14652 draft does not specify whether all `order_start' lines - must contain the same number of sort-rules, but 14651 does. So - we require this here as well. */ - arg = lr_token (ldfile, charmap, repertoire); - if (arg->tok == tok_bsymbol) - { - /* This better should be a section name. */ - struct section_list *sp = collate->sections; - while (sp != NULL - && strcmp (sp->name, arg->val.str.startmb) != 0) - sp = sp->next; + /* Up to the next entry. */ + runp = runp->next; + } - if (sp == NULL) - { - lr_error (ldfile, _("\ -%s: unknown section name `%s'"), - "LC_COLLATE", arg->val.str.startmb); - /* We use the error section. */ - collate->current_section = &collate->error_section; - } - else - { - /* Remember this section. */ - collate->current_section = sp; + /* Now determine whether the UNDEFINED entry is needed and if yes, + whether it was defined. */ + collate->undefined.used_in_level = need_undefined ? ~0ul : 0; + if (collate->undefined.file == NULL) + { + if (need_undefined) + { + error (0, 0, _("no definition of `UNDEFINED'")); - /* One should not be allowed to open the same - section twice. */ - if (sp->first != NULL) - lr_error (ldfile, _("\ -%s: multiple order definitions for section `%s'"), - "LC_COLLATE", sp->name); + /* Add UNDEFINED at the end. */ + collate->undefined.mborder = + (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int)); - /* Next should come the end of the line or a semicolon. */ - arg = lr_token (ldfile, charmap, repertoire); - if (arg->tok == tok_eol) - { - uint32_t cnt; + for (i = 0; i < nrules; ++i) + collate->undefined.mborder[i] = mbact[i]++; + } - /* This means we have exactly one rule: `forward'. */ - if (collate->nrules > 1) - lr_error (ldfile, _("\ -%s: invalid number of sorting rules"), - "LC_COLLATE"); - else - collate->nrules = 1; - sp->rules = obstack_alloc (&collate->mempool, - (sizeof (enum coll_sort_rule) - * collate->nrules)); - for (cnt = 0; cnt < collate->nrules; ++cnt) - sp->rules[cnt] = sort_forward; + /* In any case we will need the definition for the wide character + case. But we will not complain that it is missing since the + specification strangely enough does not seem to account for + this. */ + collate->undefined.wcorder = wcact++; + } - /* Next line. */ - break; - } + /* Finally, try to unify the rules for the sections. Whenever the rules + for a section are the same as those for another section give the + ruleset the same index. Since there are never many section we can + use an O(n^2) algorithm here. */ + sect = collate->sections; + assert (sect != NULL); + ruleidx = 0; + do + { + struct section_list *osect = collate->sections; - /* Get the next token. */ - arg = lr_token (ldfile, charmap, repertoire); - } - } - else - { - /* There is no section symbol. Therefore we use the unnamed - section. */ - collate->current_section = &collate->unnamed_section; + while (osect != sect) + if (memcmp (osect->rules, sect->rules, nrules) == 0) + break; + else + osect = osect->next; - if (collate->unnamed_section.first != NULL) - lr_error (ldfile, _("\ -%s: multiple order definitions for unnamed section"), - "LC_COLLATE"); - } + if (osect == sect) + sect->ruleidx = ruleidx++; + else + sect->ruleidx = osect->ruleidx; - /* Now read the direction names. */ - read_directions (ldfile, arg, charmap, repertoire, collate); + /* Next section. */ + sect = sect->next; + } + while (sect != NULL); + /* We are currently not prepared for more than 256 rulesets. But this + should never really be a problem. */ + assert (ruleidx <= 256); +} - /* From now be need the strings untranslated. */ - ldfile->translate_strings = 0; - break; - case tok_order_end: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - { - lr_ignore_rest (ldfile, 0); - break; - } +static int32_t +output_weight (struct obstack *pool, struct locale_collate_t *collate, + struct element_t *elem) +{ + size_t cnt; + int32_t retval; - if (state != 1) - goto err_label; - state = 2; - lr_ignore_rest (ldfile, 1); - break; + /* Optimize the use of UNDEFINED. */ + if (elem == &collate->undefined) + /* The weights are already inserted. */ + return 0; - case tok_reorder_after: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - { - lr_ignore_rest (ldfile, 0); - break; - } + /* This byte can start exactly one collation element and this is + a single byte. We can directly give the index to the weights. */ + retval = obstack_object_size (pool); - if (state != 2 && state != 3) - goto err_label; - state = 3; + /* Construct the weight. */ + for (cnt = 0; cnt < nrules; ++cnt) + { + char buf[elem->weights[cnt].cnt * 7]; + int len = 0; + int i; + + for (i = 0; i < elem->weights[cnt].cnt; ++i) + /* Encode the weight value. We do nothing for IGNORE entries. */ + if (elem->weights[cnt].w[i] != NULL) + len += utf8_encode (&buf[len], + elem->weights[cnt].w[i]->mborder[cnt]); + + /* And add the buffer content. */ + obstack_1grow (pool, len); + obstack_grow (pool, buf, len); + } - arg = lr_token (ldfile, charmap, repertoire); - if (arg->tok == tok_bsymbol) - { - /* Find this symbol in the sequence table. */ - struct element_t *insp; - int no_error = 1; + return retval | ((elem->section->ruleidx & 0x7f) << 24); +} - if (find_entry (&collate->seq_table, arg->val.str.startmb, - arg->val.str.lenmb, (void **) &insp) == 0) - /* Yes, the symbol exists. Simply point the cursor - to it. */ - collate->cursor = insp; - else - { - /* This is bad. The symbol after which we have to - insert does not exist. */ - lr_error (ldfile, _("\ -%s: cannot reorder after %.*s: symbol not known"), - "LC_COLLATE", arg->val.str.lenmb, - arg->val.str.startmb); - collate->cursor = NULL; - no_error = 0; - } - lr_ignore_rest (ldfile, no_error); - } - else - /* This must not happen. */ - goto err_label; - break; +static int32_t +output_weightwc (struct obstack *pool, struct locale_collate_t *collate, + struct element_t *elem) +{ + size_t cnt; + int32_t retval; - case tok_reorder_end: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - break; + /* Optimize the use of UNDEFINED. */ + if (elem == &collate->undefined) + /* The weights are already inserted. */ + return 0; - if (state != 3) - goto err_label; - state = 4; - lr_ignore_rest (ldfile, 1); - break; + /* This byte can start exactly one collation element and this is + a single byte. We can directly give the index to the weights. */ + retval = obstack_object_size (pool); - case tok_bsymbol: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - { - lr_ignore_rest (ldfile, 0); - break; - } + /* Construct the weight. */ + for (cnt = 0; cnt < nrules; ++cnt) + { + int32_t buf[elem->weights[cnt].cnt]; + int32_t i; - if (state != 1 && state != 3) - goto err_label; + for (i = 0; i < elem->weights[cnt].cnt; ++i) + if (elem->weights[cnt].w[i] != NULL) + buf[i] = elem->weights[cnt].w[i]->wcorder; - if (state == 3) - { - /* It is possible that we already have this collation sequence. - In this case we move the entry. */ - struct element_t *seqp; - - /* If the symbol after which we have to insert was not found - ignore all entries. */ - if (collate->cursor == NULL) - { - lr_ignore_rest (ldfile, 0); - break; - } + /* And add the buffer content. */ + if (sizeof (int) == sizeof (int32_t)) + obstack_int_grow (pool, i); + else + obstack_grow (pool, &i, sizeof (int32_t)); - if (find_entry (&collate->seq_table, arg->val.str.startmb, - arg->val.str.lenmb, (void **) &seqp) == 0) - { - /* Remove the entry from the old position. */ - if (seqp->last == NULL) - collate->start = seqp->next; - else - seqp->last->next = seqp->next; - if (seqp->next != NULL) - seqp->next->last = seqp->last; + obstack_grow (pool, buf, i * sizeof (int32_t)); + } - /* We also have to check whether this entry is the - first or last of a section. */ - if (seqp->section->first == seqp) - { - if (seqp->section->first == seqp->section->last) - /* This setion has no content anymore. */ - seqp->section->first = seqp->section->last = NULL; - else - seqp->section->first = seqp->next; - } - else if (seqp->section->last == seqp) - seqp->section->last = seqp->last; + return retval | ((elem->section->ruleidx & 0x7f) << 24); +} - /* Now insert it in the new place. */ - seqp->next = collate->cursor->next; - seqp->last = collate->cursor; - collate->cursor->next = seqp; - if (seqp->next != NULL) - seqp->next->last = seqp; - seqp->section = collate->cursor->section; - if (seqp->section->last == collate->cursor) - seqp->section->last = seqp; +void +collate_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE); + struct iovec iov[2 + nelems]; + struct locale_file data; + uint32_t idx[nelems]; + size_t cnt; + size_t ch; + int32_t tablemb[256]; + struct obstack weightpool; + struct obstack extrapool; + struct obstack indirectpool; + struct section_list *sect; + uint32_t *names; + uint32_t *tablewc; + size_t table_size; + uint32_t elem_size; + uint32_t *elem_table; + int i; + struct element_t *runp; - break; - } + data.magic = LIMAGIC (LC_COLLATE); + data.n = nelems; + iov[0].iov_base = (void *) &data; + iov[0].iov_len = sizeof (data); - /* Otherwise we just add a new entry. */ - } + iov[1].iov_base = (void *) idx; + iov[1].iov_len = sizeof (idx); - /* Now insert in the new place. */ - insert_value (ldfile, arg, charmap, repertoire, collate); - break; + idx[0] = iov[0].iov_len + iov[1].iov_len; + cnt = 0; - case tok_undefined: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - { - lr_ignore_rest (ldfile, 0); - break; - } + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES)); + iov[2 + cnt].iov_base = &nrules; + iov[2 + cnt].iov_len = sizeof (uint32_t); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; - if (state != 1) - goto err_label; + /* If we have no LC_COLLATE data emit only the number of rules as zero. */ + if (collate == NULL) + { + int32_t dummy = 0; - /* See whether UNDEFINED already appeared somewhere. */ - if (collate->undefined.next != NULL - || (collate->cursor != NULL - && collate->undefined.next == collate->cursor)) + while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE)) + { + /* The words have to be handled specially. */ + if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE) + || cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS) + || cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB)) { - lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"), - 9, "UNDEFINED", collate->undefined.file, - collate->undefined.line); - lr_ignore_rest (ldfile, 0); + iov[2 + cnt].iov_base = &dummy; + iov[2 + cnt].iov_len = sizeof (int32_t); } else - /* Parse the weights. */ - insert_weights (ldfile, &collate->undefined, charmap, - repertoire, collate); - break; - - case tok_ellipsis3: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) { - lr_ignore_rest (ldfile, 0); - break; + iov[2 + cnt].iov_base = (char *) ""; + iov[2 + cnt].iov_len = 0; } - if (state != 1 && state != 3) - goto err_label; + if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE)) + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + } - was_ellipsis = 1; - /* XXX Read the remainder of the line and remember what are - the weights. */ - break; + assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE)); - case tok_end: - /* Next we assume `LC_COLLATE'. */ - if (!ignore_content) - { - if (state == 0) - /* We must either see a copy statement or have - ordering values. */ - lr_error (ldfile, - _("%s: empty category description not allowed"), - "LC_COLLATE"); - else if (state == 1) - lr_error (ldfile, _("%s: missing `order_end' keyword"), - "LC_COLLATE"); - else if (state == 3) - error (0, 0, _("%s: missing `reorder-end' keyword"), - "LC_COLLATE"); - } - arg = lr_token (ldfile, charmap, NULL); - if (arg->tok == tok_eof) - break; - if (arg->tok == tok_eol) - lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE"); - else if (arg->tok != tok_lc_collate) - lr_error (ldfile, _("\ -%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); - lr_ignore_rest (ldfile, arg->tok == tok_lc_collate); - return; + write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov); - default: - err_label: - SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); - } + return; + } - /* Prepare for the next round. */ - now = lr_token (ldfile, charmap, NULL); - nowtok = now->tok; + obstack_init (&weightpool); + obstack_init (&extrapool); + obstack_init (&indirectpool); + + /* Since we are using the sign of an integer to mark indirection the + offsets in the arrays we are indirectly referring to must not be + zero since -0 == 0. Therefore we add a bit of dummy content. */ + if (sizeof (int) == sizeof (int32_t)) + { + obstack_int_grow (&extrapool, 0); + obstack_int_grow (&indirectpool, 0); + } + else + { + int32_t zero = 0; + obstack_grow (&extrapool, &zero, sizeof (zero)); + obstack_grow (&indirectpool, &zero, sizeof (zero)); } - /* When we come here we reached the end of the file. */ - lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); -} + /* Prepare the ruleset table. */ + for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next) + if (sect->ruleidx == i) + { + int j; + obstack_make_room (&weightpool, nrules); -#if 0 + for (j = 0; j < nrules; ++j) + obstack_1grow_fast (&weightpool, sect->rules[j]); + ++i; + } + /* And align the output. */ + i = (nrules * i) % __alignof__ (int32_t); + if (i > 0) + do + obstack_1grow (&weightpool, '\0'); + while (++i < __alignof__ (int32_t)); + + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS)); + iov[2 + cnt].iov_len = obstack_object_size (&weightpool); + iov[2 + cnt].iov_base = obstack_finish (&weightpool); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + /* Generate the 8-bit table. Walk through the lists of sequences + starting with the same byte and add them one after the other to + the table. In case we have more than one sequence starting with + the same byte we have to use extra indirection. + + First add a record for the NUL byte. This entry will never be used + so it does not matter. */ + tablemb[0] = 0; + + /* Now insert the `UNDEFINED' value if it is used. Since this value + will probably be used more than once it is good to store the + weights only once. */ + if (collate->undefined.used_in_level != 0) + output_weight (&weightpool, collate, &collate->undefined); + + for (ch = 1; ch < 256; ++ch) + if (collate->mbheads[ch]->mbnext == NULL + && collate->mbheads[ch]->nmbs == 1) + { + tablemb[ch] = output_weight (&weightpool, collate, + collate->mbheads[ch]); + } + else + { + /* The entries in the list are sorted by length and then + alphabetically. This is the order in which we will add the + elements to the collation table. This allows to simply + walk the table in sequence and stop at the first matching + entry. Since the longer sequences are coming first in the + list they have the possibility to match first, just as it + has to be. In the worst case we are walking to the end of + the list where we put, if no singlebyte sequence is defined + in the locale definition, the weights for UNDEFINED. + + To reduce the length of the search list we compress them a bit. + This happens by collecting sequences of consecutive byte + sequences in one entry (having and begin and end byte sequence) + and add only one index into the weight table. We can find the + consecutive entries since they are also consecutive in the list. */ + struct element_t *runp = collate->mbheads[ch]; + struct element_t *lastp; + + tablemb[ch] = -obstack_object_size (&extrapool); + + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Output the weight info. */ + weightidx = output_weight (&weightpool, collate, runp); + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->mbnext != NULL + && runp->nmbs == runp->mbnext->nmbs + && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0 + && (runp->mbs[runp->nmbs - 1] + 1 + == runp->mbnext->mbs[runp->nmbs - 1])) + { + int i; + + /* Now add first the initial byte sequence. */ + added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1) + + __alignof__ (int32_t) - 1) + & ~(__alignof__ (int32_t) - 1)); + obstack_make_room (&extrapool, added); + + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, + obstack_object_size (&indirectpool) + / sizeof (int32_t)); + else + { + int32_t i = (obstack_object_size (&indirectpool) + / sizeof (int32_t)); + obstack_grow (&extrapool, &i, sizeof (int32_t)); + } + obstack_1grow_fast (&extrapool, runp->nmbs - 1); + for (i = 1; i < runp->nmbs; ++i) + obstack_1grow_fast (&extrapool, runp->mbs[i]); -/* What kind of symbols get defined? */ -enum coll_symbol -{ - undefined, - ellipsis, - character, - element, - symbol -}; + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + while (1) + { + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (&extrapool, weightidx); + else + obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); + + runp = runp->next; + if (runp->mbnext == NULL + || runp->nmbs != runp->mbnext->nmbs + || memcmp (runp->mbs, runp->mbnext->mbs, + runp->nmbs - 1) != 0 + || (runp->mbs[runp->nmbs - 1] + 1 + != runp->mbnext->mbs[runp->nmbs - 1])) + break; + /* Insert the weight. */ + weightidx = output_weight (&weightpool, collate, runp); + } -typedef struct patch_t -{ - const char *fname; - size_t lineno; - const char *token; - union - { - unsigned int *pos; - size_t idx; - } where; - struct patch_t *next; -} patch_t; - - -typedef struct element_t -{ - const char *namemb; - const uint32_t *namewc; - unsigned int this_weight; + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < runp->nmbs; ++i) + obstack_1grow_fast (&extrapool, runp->mbs[i]); - struct element_t *next; + weightidx = output_weight (&weightpool, collate, runp); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (&extrapool, weightidx); + else + obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); + } + else + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; + + added = ((sizeof (int32_t) + 1 + runp->nmbs - 1 + + __alignof__ (int32_t) - 1) + & ~(__alignof__ (int32_t) - 1)); + obstack_make_room (&extrapool, added); + + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, weightidx); + else + obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); + obstack_1grow_fast (&extrapool, runp->nmbs - 1); + for (i = 1; i < runp->nmbs; ++i) + obstack_1grow_fast (&extrapool, runp->mbs[i]); + } + + /* Add alignment bytes if necessary. */ + i = added % __alignof__ (int32_t); + if (i > 0) + do + obstack_1grow_fast (&extrapool, '\0'); + while (++i != __alignof__ (int32_t)); - unsigned int *ordering; - size_t ordering_len; -} element_t; + /* Next entry. */ + lastp = runp; + runp = runp->mbnext; + } + while (runp != NULL); + /* If the final entry in the list is not a single character we + add an UNDEFINED entry here. */ + if (lastp->nmbs != 1) + { + int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t)) + & ~(__alignof__ (int32_t) - 1)); + obstack_make_room (&extrapool, added); -/* The real definition of the struct for the LC_COLLATE locale. */ -struct locale_collate_t -{ - /* Collate symbol table. Simple mapping to number. */ - hash_table symbols; + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, 0); + else + { + int32_t zero = 0; + obstack_grow (&extrapool, &zero, sizeof (int32_t)); + } + /* XXX What rule? We just pick the first. */ + obstack_1grow_fast (&extrapool, 0); + /* Length is zero. */ + obstack_1grow_fast (&extrapool, 0); + + /* Add alignment bytes if necessary. */ + i = added % __alignof__ (int32_t); + if (i > 0) + do + obstack_1grow_fast (&extrapool, '\0'); + while (++i != __alignof__ (int32_t)); + } + } - /* The collation elements. */ - hash_table elements; - struct obstack element_mem; + /* Now add the four tables. */ + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB)); + iov[2 + cnt].iov_base = tablemb; + iov[2 + cnt].iov_len = sizeof (tablemb); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB)); + iov[2 + cnt].iov_len = obstack_object_size (&weightpool); + iov[2 + cnt].iov_base = obstack_finish (&weightpool); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB)); + iov[2 + cnt].iov_len = obstack_object_size (&extrapool); + iov[2 + cnt].iov_base = obstack_finish (&extrapool); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB)); + iov[2 + cnt].iov_len = obstack_object_size (&indirectpool); + iov[2 + cnt].iov_base = obstack_finish (&indirectpool); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + + /* Now the same for the wide character table. We need to store some + more information here. */ + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)); + iov[2 + cnt].iov_base = &collate->plane_size; + iov[2 + cnt].iov_len = sizeof (collate->plane_size); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)); + iov[2 + cnt].iov_base = &collate->plane_cnt; + iov[2 + cnt].iov_len = sizeof (collate->plane_cnt); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + /* Construct a table with the names. The size of the table is the same + as the table with the pointers. */ + table_size = collate->plane_size * collate->plane_cnt; + names = (uint32_t *) alloca (table_size * sizeof (uint32_t)); + for (ch = 0; ch < table_size; ++ch) + if (collate->wcheads[ch] == NULL) + names[ch] = 0; + else + names[ch] = collate->wcheads[ch]->wcs[0]; + + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NAMES)); + iov[2 + cnt].iov_base = names; + iov[2 + cnt].iov_len = table_size * sizeof (uint32_t); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + /* Since we are using the sign of an integer to mark indirection the + offsets in the arrays we are indirectly referring to must not be + zero since -0 == 0. Therefore we add a bit of dummy content. */ + if (sizeof (int) == sizeof (int32_t)) + { + obstack_int_grow (&extrapool, 0); + obstack_int_grow (&indirectpool, 0); + } + else + { + int32_t zero = 0; + obstack_grow (&extrapool, &zero, sizeof (zero)); + obstack_grow (&indirectpool, &zero, sizeof (zero)); + } - /* The result tables. */ - hash_table resultmb; - hash_table resultwc; + /* Now insert the `UNDEFINED' value if it is used. Since this value + will probably be used more than once it is good to store the + weights only once. */ + output_weightwc (&weightpool, collate, &collate->undefined); + + /* Generate the table. Walk through the lists of sequences + starting with the same byte and add them one after the other to + the table. In case we have more than one sequence starting with + the same byte we have to use extra indirection. */ + tablewc = (uint32_t *) alloca (table_size * sizeof (uint32_t)); + for (ch = 0; ch < table_size; ++ch) + if (collate->wcheads[ch] == NULL) + { + /* Set the entry to zero. */ + tablewc[ch] = 0; + } + else if (collate->wcheads[ch]->wcnext == NULL + && collate->wcheads[ch]->nwcs == 1) + { + tablewc[ch] = output_weightwc (&weightpool, collate, + collate->wcheads[ch]); + } + else + { + /* As for the singlebyte table, we recognize sequences and + compress them. */ + struct element_t *runp = collate->wcheads[ch]; + struct element_t *lastp; - /* Sorting rules given in order_start line. */ - uint32_t nrules; - enum coll_sort_rule *rules; + tablewc[ch] = -obstack_object_size (&extrapool); - /* Used while recognizing symbol composed of multiple tokens - (collating-element). */ - const char *combine_token; - size_t combine_token_len; - - /* How many sorting order specifications so far. */ - unsigned int order_cnt; - - /* Was lastline ellipsis? */ - int was_ellipsis; - /* Value of last entry if was character. */ - uint32_t last_char; - /* Current element. */ - element_t *current_element; - /* What kind of symbol is current element. */ - enum coll_symbol kind; - - /* Patch lists. */ - patch_t *current_patch; - patch_t *all_patches; - - /* Room for the UNDEFINED information. */ - element_t undefined; - unsigned int undefined_len; - - /* Script information. */ - const char **scripts; - unsigned int nscripts; -}; + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Output the weight info. */ + weightidx = output_weightwc (&weightpool, collate, runp); + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + 1 + == runp->wcnext->wcs[runp->nwcs - 1])) + { + int i; + /* Now add first the initial byte sequence. */ + added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t); + if (sizeof (int32_t) == sizeof (int)) + obstack_make_room (&extrapool, added); -/* Be verbose? Defined in localedef.c. */ -extern int verbose; + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + if (sizeof (int32_t) == sizeof (int)) + { + obstack_int_grow_fast (&extrapool, + obstack_object_size (&indirectpool) + / sizeof (int32_t)); + obstack_int_grow_fast (&extrapool, runp->nwcs - 1); + } + else + { + int32_t i = (obstack_object_size (&indirectpool) + / sizeof (int32_t)); + obstack_grow (&extrapool, &i, sizeof (int32_t)); + i = runp->nwcs - 1; + obstack_grow (&extrapool, &i, sizeof (int32_t)); + } + for (i = 1; i < runp->nwcs; ++i) + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, runp->wcs[i]); + else + obstack_grow (&extrapool, &runp->wcs[i], sizeof (int32_t)); + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + while (1) + { + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (&extrapool, weightidx); + else + obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); + + runp = runp->next; + if (runp->wcnext == NULL + || runp->nwcs != runp->wcnext->nwcs + || wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *) runp->wcnext->wcs, + runp->nwcs - 1) != 0 + || (runp->wcs[runp->nwcs - 1] + 1 + != runp->wcnext->wcs[runp->nwcs - 1])) + break; + /* Insert the weight. */ + weightidx = output_weightwc (&weightpool, collate, runp); + } -#define obstack_chunk_alloc malloc -#define obstack_chunk_free free + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < runp->nwcs; ++i) + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (&extrapool, runp->wcs[i]); + else + obstack_grow (&extrapool, &runp->wcs[i], sizeof (int32_t)); + weightidx = output_weightwc (&weightpool, collate, runp); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (&extrapool, weightidx); + else + obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); + } + else + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; -/* Prototypes for local functions. */ -static void collate_startup (struct linereader *ldfile, - struct localedef_t *locale, - struct charmap_t *charmap, int ignore_content); + added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t); + if (sizeof (int) == sizeof (int32_t)) + obstack_make_room (&extrapool, added); + if (sizeof (int32_t) == sizeof (int)) + { + obstack_int_grow_fast (&extrapool, weightidx); + obstack_int_grow_fast (&extrapool, runp->nwcs - 1); + } + else + { + int32_t l = runp->nwcs - 1; + obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); + obstack_grow (&extrapool, &l, sizeof (int32_t)); + } + for (i = 1; i < runp->nwcs; ++i) + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, runp->wcs[i]); + else + obstack_grow (&extrapool, &runp->wcs[i], sizeof (int32_t)); + } -static void -collate_startup (struct linereader *ldfile, struct localedef_t *locale, - struct charmap_t *charset, int ignore_content) -{ - struct locale_collate_t *collate; + /* Next entry. */ + lastp = runp; + runp = runp->wcnext; + } + while (runp != NULL); + } - /* Allocate the needed room. */ - locale->categories[LC_COLLATE].collate = collate = - (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t)); + /* Now add the four tables. */ + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC)); + iov[2 + cnt].iov_base = tablewc; + iov[2 + cnt].iov_len = table_size * sizeof (int32_t); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC)); + iov[2 + cnt].iov_len = obstack_object_size (&weightpool); + iov[2 + cnt].iov_base = obstack_finish (&weightpool); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC)); + iov[2 + cnt].iov_len = obstack_object_size (&extrapool); + iov[2 + cnt].iov_base = obstack_finish (&extrapool); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC)); + iov[2 + cnt].iov_len = obstack_object_size (&indirectpool); + iov[2 + cnt].iov_base = obstack_finish (&indirectpool); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + + + /* Finally write the table with collation element names out. It is + a hash table with a simple function which gets the name of the + character as the input. One character might have many names. The + value associated with the name is an index into the weight table + where we are then interested in the first-level weight value. + + To determine how large the table should be we are counting the + elements have to put in. Since we are using internal chaining + using a secondary hash function we have to make the table a bit + larger to avoid extremely long search times. We can achieve + good results with a 40% larger table than there are entries. */ + elem_size = 0; + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL && runp->weights != NULL) + /* Yep, the element really counts. */ + ++elem_size; + + runp = runp->next; + } + /* Add 40% and find the next prime number. */ + elem_size = MIN (next_prime (elem_size * 1.4), 257); + + /* Allocate the table. Each entry consists of two words: the hash + value and an index in a secondary table which provides the index + into the weight table and the string itself (so that a match can + be determined). */ + elem_table = (uint32_t *) obstack_alloc (&extrapool, + elem_size * 2 * sizeof (uint32_t)); + memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t)); + + /* Now add the elements. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL && runp->weights != NULL) + { + /* Compute the hash value of the name. */ + uint32_t namelen = strlen (runp->name); + uint32_t hash = elem_hash (runp->name, namelen); + size_t idx = hash % elem_size; + + if (elem_table[idx * 2] != 0) + { + /* The spot is already take. Try iterating using the value + from the secondary hashing function. */ + size_t iter = hash % (elem_size - 2); - /* Allocate hash table for collating elements. */ - if (init_hash (&collate->elements, 512)) - error (4, 0, _("memory exhausted")); - collate->combine_token = NULL; - obstack_init (&collate->element_mem); + do + { + idx += iter; + if (idx >= elem_size) + idx -= elem_size; + } + while (elem_table[idx * 2] != 0); + + /* This is the spot where we will insert the value. */ + elem_table[idx * 2] = hash; + elem_table[idx * 2 + 1] = obstack_object_size (&extrapool); + + /* The the string itself including length. */ + obstack_1grow (&extrapool, namelen); + obstack_grow (&extrapool, runp->name, namelen); + + /* And the multibyte representation. */ + obstack_1grow (&extrapool, runp->nmbs); + obstack_grow (&extrapool, runp->mbs, runp->nmbs); + + /* And align again to 32 bits. */ + if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0) + obstack_grow (&extrapool, "\0\0", + (sizeof (int32_t) + - ((1 + namelen + 1 + runp->nmbs) + % sizeof (int32_t)))); + } + } - /* Allocate hash table for collating elements. */ - if (init_hash (&collate->symbols, 64)) - error (4, 0, _("memory exhausted")); + runp = runp->next; + } - /* Allocate hash table for result. */ - if (init_hash (&collate->result, 512)) - error (4, 0, _("memory exhausted")); + /* Prepare to write out this data. */ + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB)); + iov[2 + cnt].iov_base = &elem_size; + iov[2 + cnt].iov_len = sizeof (int32_t); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; - collate->nrules = 0; - collate->nrules_max = 10; - collate->rules - = (enum coll_sort_rule *) xmalloc (collate->nrules_max - * sizeof (enum coll_sort_rule)); + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB)); + iov[2 + cnt].iov_base = elem_table; + iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; - collate->order_cnt = 1; /* The smallest weight is 2. */ + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB)); + iov[2 + cnt].iov_len = obstack_object_size (&extrapool); + iov[2 + cnt].iov_base = obstack_finish (&extrapool); + ++cnt; - collate->was_ellipsis = 0; - collate->last_char = L'\0'; /* 0 because leading ellipsis is allowed. */ - collate->all_patches = NULL; + assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE)); - /* This tells us no UNDEFINED entry was found until now. */ - memset (&collate->undefined, '\0', sizeof (collate->undefined)); + write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov); - ldfile->translate_strings = 0; - ldfile->return_widestr = 0; + obstack_free (&weightpool, NULL); + obstack_free (&extrapool, NULL); + obstack_free (&indirectpool, NULL); } void -collate_finish (struct localedef_t *locale, struct charset_t *charset, - struct repertoire_t *repertoire) +collate_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) { - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - patch_t *patch; - size_t cnt; + struct repertoire_t *repertoire = NULL; + struct locale_collate_t *collate; + struct token *now; + struct token *arg = NULL; + enum token_t nowtok; + int state = 0; + enum token_t was_ellipsis = tok_none; + struct localedef_t *copy_locale = NULL; - /* Patch the constructed table so that forward references are - correctly filled. */ - for (patch = collate->all_patches; patch != NULL; patch = patch->next) - { - uint32_t wch; - size_t toklen = strlen (patch->token); - void *ptmp; - unsigned int value = 0; + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); - wch = charset_find_value (&charset->char_table, patch->token, toklen); - if (wch != ILLEGAL_CHAR_VALUE) - { - element_t *runp; - - if (find_entry (&collate->result, &wch, sizeof (uint32_t), - (void *) &runp) < 0) - runp = NULL; - for (; runp != NULL; runp = runp->next) - if (runp->name[0] == wch && runp->name[1] == L'\0') - break; - - value = runp == NULL ? 0 : runp->this_weight; - } - else if (find_entry (&collate->elements, patch->token, toklen, &ptmp) - >= 0) - { - value = ((element_t *) ptmp)->this_weight; - } - else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp) - >= 0) - { - value = (unsigned long int) ptmp; - } - else - value = 0; + /* The rest of the line containing `LC_COLLATE' must be free. */ + lr_ignore_rest (ldfile, 1); - if (value == 0) - { - if (!be_quiet) - error_at_line (0, 0, patch->fname, patch->lineno, - _("no weight defined for symbol `%s'"), - patch->token); - } - else - *patch->where.pos = value; + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } + while (nowtok == tok_eol); - /* If no definition for UNDEFINED is given, all characters in the - given charset must be specified. */ - if (collate->undefined.ordering == NULL) + if (nowtok == tok_copy) { - /**************************************************************\ - |* XXX We should test whether really an unspecified character *| - |* exists before giving the message. *| - \**************************************************************/ - uint32_t weight; + state = 2; + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_string) + { + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + + skip_category: + do + now = lr_token (ldfile, charmap, NULL); + while (now->tok != tok_eof && now->tok != tok_end); - if (!be_quiet) - error (0, 0, _("no definition of `UNDEFINED'")); + if (now->tok != tok_eof + || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof)) + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); + else if (now->tok != tok_lc_collate) + { + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + } + else + lr_ignore_rest (ldfile, 1); - collate->undefined.ordering_len = collate->nrules; - weight = ++collate->order_cnt; + return; + } - for (cnt = 0; cnt < collate->nrules; ++cnt) + /* Get the locale definition. */ + copy_locale = load_locale (LC_COLLATE, now->val.str.startmb, + repertoire_name, charmap); + if ((copy_locale->avail & COLLATE_LOCALE) == 0) { - uint32_t one = 1; - obstack_grow (&collate->element_mem, &one, sizeof (one)); + /* Not yet loaded. So do it now. */ + if (locfile_read (copy_locale, charmap) != 0) + goto skip_category; } - for (cnt = 0; cnt < collate->nrules; ++cnt) - obstack_grow (&collate->element_mem, &weight, sizeof (weight)); + lr_ignore_rest (ldfile, 1); - collate->undefined.ordering = obstack_finish (&collate->element_mem); + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } - collate->undefined_len = 2; /* For the name: 1 x uint32_t + L'\0'. */ - for (cnt = 0; cnt < collate->nrules; ++cnt) - collate->undefined_len += 1 + collate->undefined.ordering[cnt]; -} - - + /* Prepare the data structures. */ + collate_startup (ldfile, result, copy_locale, ignore_content); + collate = result->categories[LC_COLLATE].collate; -void -collate_output (struct localedef_t *locale, struct charset_t *charset, - struct repertoire_t *repertoire, const char *output_path) -{ - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - uint32_t table_size, table_best, level_best, sum_best; - void *last; - element_t *pelem; - uint32_t *name; - size_t len; - const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE); - struct iovec iov[2 + nelems]; - struct locale_file data; - uint32_t idx[nelems]; - struct obstack non_simple; - struct obstack string_pool; - size_t cnt, entry_size; - uint32_t undefined_offset = UINT_MAX; - uint32_t *table, *extra, *table2, *extra2; - size_t extra_len; - uint32_t element_hash_tab_size; - uint32_t *element_hash_tab; - uint32_t *element_hash_tab_ob; - uint32_t element_string_pool_size; - char *element_string_pool; - uint32_t element_value_size; - uint32_t *element_value; - uint32_t *element_value_ob; - uint32_t symbols_hash_tab_size; - uint32_t *symbols_hash_tab; - uint32_t *symbols_hash_tab_ob; - uint32_t symbols_string_pool_size; - char *symbols_string_pool; - uint32_t symbols_class_size; - uint32_t *symbols_class; - uint32_t *symbols_class_ob; - hash_table *hash_tab; - unsigned int dummy_weights[collate->nrules + 1]; - - sum_best = UINT_MAX; - table_best = 0xffff; - level_best = 0xffff; - - /* Compute table size. */ - if (!be_quiet) - fputs (_("\ -Computing table size for collation information might take a while..."), - stderr); - for (table_size = 256; table_size < sum_best; ++table_size) + while (1) { - size_t hits[table_size]; - unsigned int worst = 1; - size_t cnt; - - last = NULL; - - for (cnt = 0; cnt < 256; ++cnt) - hits[cnt] = 1; - memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t)); - - while (iterate_table (&collate->result, &last, (const void **) &name, - &len, (void **) &pelem) >= 0) - if (pelem->ordering != NULL && pelem->name[0] > 0xff) - if (++hits[(unsigned int) pelem->name[0] % table_size] > worst) - { - worst = hits[(unsigned int) pelem->name[0] % table_size]; - if (table_size * worst > sum_best) - break; - } + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; - if (table_size * worst < sum_best) + /* Ingore empty lines. */ + if (nowtok == tok_eol) { - sum_best = table_size * worst; - table_best = table_size; - level_best = worst; + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; } - } - assert (table_best != 0xffff || level_best != 0xffff); - if (!be_quiet) - fputs (_(" done\n"), stderr); - - obstack_init (&non_simple); - obstack_init (&string_pool); - - data.magic = LIMAGIC (LC_COLLATE); - data.n = nelems; - iov[0].iov_base = (void *) &data; - iov[0].iov_len = sizeof (data); - - iov[1].iov_base = (void *) idx; - iov[1].iov_len = sizeof (idx); - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (uint32_t); - - table = (uint32_t *) alloca (collate->nrules * sizeof (uint32_t)); - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len - = collate->nrules * sizeof (uint32_t); - /* Another trick here. Describing the collation method needs only a - few bits (3, to be exact). But the binary file should be - accessible by machines with both endianesses and so we store both - forms in the same word. */ - for (cnt = 0; cnt < collate->nrules; ++cnt) - table[cnt] = collate->rules[cnt] | bswap_32 (collate->rules[cnt]); - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (uint32_t); - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len - = sizeof (uint32_t); - - entry_size = 1 + MAX (collate->nrules, 2); - - table = (uint32_t *) alloca (table_best * level_best * entry_size - * sizeof (table[0])); - memset (table, '\0', table_best * level_best * entry_size - * sizeof (table[0])); - - - /* Macros for inserting in output table. */ -#define ADD_VALUE(expr) \ - do { \ - uint32_t to_write = (uint32_t) expr; \ - obstack_grow (&non_simple, &to_write, sizeof (to_write)); \ - } while (0) - -#define ADD_ELEMENT(pelem, len) \ - do { \ - size_t cnt, idx; \ - \ - ADD_VALUE (len); \ - \ - wlen = wcslen (pelem->name); \ - obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (uint32_t)); \ - \ - idx = collate->nrules; \ - for (cnt = 0; cnt < collate->nrules; ++cnt) \ - { \ - size_t disp; \ - \ - ADD_VALUE (pelem->ordering[cnt]); \ - for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \ - ADD_VALUE (pelem->ordering[idx++]); \ - } \ - } while (0) - -#define ADD_FORWARD(pelem) \ - do { \ - /* We leave a reference in the main table and put all \ - information in the table for the extended entries. */ \ - element_t *runp; \ - element_t *has_simple = NULL; \ - size_t wlen; \ - \ - table[(level * table_best + slot) * entry_size + 1] \ - = FORWARD_CHAR; \ - table[(level * table_best + slot) * entry_size + 2] \ - = obstack_object_size (&non_simple) / sizeof (uint32_t); \ - \ - /* Here we have to construct the non-simple table entry. First \ - compute the total length of this entry. */ \ - for (runp = (pelem); runp != NULL; runp = runp->next) \ - if (runp->ordering != NULL) \ - { \ - uint32_t value; \ - size_t cnt; \ - \ - value = 1 + wcslen (runp->name) + 1; \ - \ - for (cnt = 0; cnt < collate->nrules; ++cnt) \ - /* We have to take care for entries without ordering \ - information. While reading them they get inserted in the \ - table and later not removed when something goes wrong with \ - reading its weights. */ \ - value += 1 + runp->ordering[cnt]; \ - \ - if (runp->name[1] == L'\0') \ - has_simple = runp; \ - \ - ADD_ELEMENT (runp, value); \ - } \ - \ - if (has_simple == NULL) \ - { \ - size_t idx, cnt; \ - \ - ADD_VALUE (collate->undefined_len + 1); \ - \ - /* Add the name. */ \ - ADD_VALUE ((pelem)->name[0]); \ - ADD_VALUE (0); \ - \ - idx = collate->nrules; \ - for (cnt = 0; cnt < collate->nrules; ++cnt) \ - { \ - size_t disp; \ - \ - ADD_VALUE (collate->undefined.ordering[cnt]); \ - for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \ - { \ - if ((uint32_t) collate->undefined.ordering[idx] \ - == ELLIPSIS_CHAR) \ - ADD_VALUE ((pelem)->name[0]); \ - else \ - ADD_VALUE (collate->undefined.ordering[idx++]); \ - ++idx; \ - } \ - } \ - } \ - } while (0) - - - - /* Fill the table now. First we look for all the characters which - fit into one single byte. This speeds up the 8-bit string - functions. */ - last = NULL; - while (iterate_table (&collate->result, &last, (const void **) &name, - &len, (void **) &pelem) >= 0) - if (pelem->name[0] <= 0xff) - { - /* We have a single byte name. Now we must distinguish - between entries in simple form (i.e., only one value per - weight and no collation element starting with the same - character) and those which are not. */ - size_t slot = ((size_t) pelem->name[0]); - const size_t level = 0; - - table[slot * entry_size] = pelem->name[0]; - - if (pelem->name[1] == L'\0' && pelem->next == NULL - && pelem->ordering_len == collate->nrules) - { - /* Yes, we have a simple one. Lucky us. */ - size_t cnt; - - for (cnt = 0; cnt < collate->nrules; ++cnt) - table[slot * entry_size + 1 + cnt] - = pelem->ordering[collate->nrules + cnt]; - } - else - ADD_FORWARD (pelem); - } - - /* Now check for missing single byte entries. If one exist we fill - with the UNDEFINED entry. */ - for (cnt = 0; cnt < 256; ++cnt) - /* The first weight is never 0 for existing entries. */ - if (table[cnt * entry_size + 1] == 0) - { - /* We have to fill in the information from the UNDEFINED - entry. */ - table[cnt * entry_size] = (uint32_t) cnt; - - if (collate->undefined.ordering_len == collate->nrules) - { - size_t inner; - - for (inner = 0; inner < collate->nrules; ++inner) - if ((uint32_t)collate->undefined.ordering[collate->nrules - + inner] - == ELLIPSIS_CHAR) - table[cnt * entry_size + 1 + inner] = cnt; - else - table[cnt * entry_size + 1 + inner] - = collate->undefined.ordering[collate->nrules + inner]; - } - else - { - if (undefined_offset != UINT_MAX) - { - table[cnt * entry_size + 1] = FORWARD_CHAR; - table[cnt * entry_size + 2] = undefined_offset; - } - else - { - const size_t slot = cnt; - const size_t level = 0; - - ADD_FORWARD (&collate->undefined); - undefined_offset = table[cnt * entry_size + 2]; - } - } - } - - /* Now we are ready for inserting the whole rest. */ - last = NULL; - while (iterate_table (&collate->result, &last, (const void **) &name, - &len, (void **) &pelem) >= 0) - if (pelem->name[0] > 0xff) - { - /* Find the position. */ - size_t slot = ((size_t) pelem->name[0]) % table_best; - size_t level = 0; - - while (table[(level * table_best + slot) * entry_size + 1] != 0) - ++level; - assert (level < level_best); - - if (pelem->name[1] == L'\0' && pelem->next == NULL - && pelem->ordering_len == collate->nrules) - { - /* Again a simple entry. */ - size_t inner; - - for (inner = 0; inner < collate->nrules; ++inner) - table[(level * table_best + slot) * entry_size + 1 + inner] - = pelem->ordering[collate->nrules + inner]; - } - else - ADD_FORWARD (pelem); - } - /* Add the UNDEFINED entry. */ - { - /* Here we have to construct the non-simple table entry. */ - size_t idx, cnt; - - undefined_offset = obstack_object_size (&non_simple); - - idx = collate->nrules; - for (cnt = 0; cnt < collate->nrules; ++cnt) - { - size_t disp; - - ADD_VALUE (collate->undefined.ordering[cnt]); - for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) - ADD_VALUE (collate->undefined.ordering[idx++]); - } - } - - /* Finish the extra block. */ - extra_len = obstack_object_size (&non_simple); - extra = (uint32_t *) obstack_finish (&non_simple); - assert ((extra_len % sizeof (uint32_t)) == 0); - - /* Now we have to build the two array for the other byte ordering. */ - table2 = (uint32_t *) alloca (table_best * level_best * entry_size - * sizeof (table[0])); - extra2 = (uint32_t *) alloca (extra_len); - - for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt) - table2[cnt] = bswap_32 (table[cnt]); - - for (cnt = 0; cnt < extra_len / sizeof (uint32_t); ++cnt) - extra2[cnt] = bswap_32 (extra2[cnt]); - - /* We need a simple hashing table to get a collation-element->chars - mapping. We again use internal hashing using a secondary hashing - function. - - Each string has an associate hashing value V, computed by a - fixed function. To locate the string we use open addressing with - double hashing. The first index will be V % M, where M is the - size of the hashing table. If no entry is found, iterating with - a second, independent hashing function takes place. This second - value will be 1 + V % (M - 2). The approximate number of probes - will be - - for unsuccessful search: (1 - N / M) ^ -1 - for successful search: - (N / M) ^ -1 * ln (1 - N / M) - - where N is the number of keys. - - If we now choose M to be the next prime bigger than 4 / 3 * N, - we get the values 4 and 1.85 resp. Because unsuccessful searches - are unlikely this is a good value. Formulas: [Knuth, The Art of - Computer Programming, Volume 3, Sorting and Searching, 1973, - Addison Wesley] */ - if (collate->elements.filled == 0) - { - /* We don't need any element table since there are no collating - elements. */ - element_hash_tab_size = 0; - element_hash_tab = NULL; - element_hash_tab_ob = NULL; - element_string_pool_size = 0; - element_string_pool = NULL; - element_value_size = 0; - element_value = NULL; - element_value_ob = NULL; - } - else - { - void *ptr; /* Running pointer. */ - const char *key; /* Key for current bucket. */ - size_t keylen; /* Length of key data. */ - const element_t *data; /* Data, i.e., the character sequence. */ - - element_hash_tab_size = next_prime ((collate->elements.filled * 4) / 3); - if (element_hash_tab_size < 7) - /* We need a minimum to make the following code work. */ - element_hash_tab_size = 7; - - element_hash_tab = obstack_alloc (&non_simple, (2 * element_hash_tab_size - * sizeof (uint32_t))); - memset (element_hash_tab, '\377', (2 * element_hash_tab_size - * sizeof (uint32_t))); - - ptr = NULL; - while (iterate_table (&collate->elements, &ptr, (const void **) &key, - &keylen, (void **) &data) == 0) + switch (nowtok) { - size_t hash_val = hash_string (key, keylen); - size_t idx = hash_val % element_hash_tab_size; - - if (element_hash_tab[2 * idx] != (~((uint32_t) 0))) + case tok_coll_weight_max: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) { - /* We need the second hashing function. */ - size_t c = 1 + (hash_val % (element_hash_tab_size - 2)); - - do - if (idx >= element_hash_tab_size - c) - idx -= element_hash_tab_size - c; - else - idx += c; - while (element_hash_tab[2 * idx] != (~((uint32_t) 0))); + lr_ignore_rest (ldfile, 0); + break; } - element_hash_tab[2 * idx] = obstack_object_size (&non_simple); - element_hash_tab[2 * idx + 1] = (obstack_object_size (&string_pool) - / sizeof (uint32_t)); + if (state != 0) + goto err_label; - obstack_grow0 (&non_simple, key, keylen); - obstack_grow (&string_pool, data->name, - (wcslen (data->name) + 1) * sizeof (uint32_t)); - } + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok != tok_number) + goto err_label; + if (collate->col_weight_max != -1) + lr_error (ldfile, _("%s: duplicate definition of `%s'"), + "LC_COLLATE", "col_weight_max"); + else + collate->col_weight_max = arg->val.num; + lr_ignore_rest (ldfile, 1); + break; - if (obstack_object_size (&non_simple) % 4 != 0) - obstack_blank (&non_simple, - 4 - (obstack_object_size (&non_simple) % 4)); - element_string_pool_size = obstack_object_size (&non_simple); - element_string_pool = obstack_finish (&non_simple); - - element_value_size = obstack_object_size (&string_pool); - element_value = obstack_finish (&string_pool); - - /* Create the tables for the other byte order. */ - element_hash_tab_ob = obstack_alloc (&non_simple, - (2 * element_hash_tab_size - * sizeof (uint32_t))); - for (cnt = 0; cnt < 2 * element_hash_tab_size; ++cnt) - element_hash_tab_ob[cnt] = bswap_U32 (element_hash_tab[cnt]); - - element_value_ob = obstack_alloc (&string_pool, element_value_size); - for (cnt = 0; cnt < element_value_size / 4; ++cnt) - element_value_ob[cnt] = bswap_32 (element_value[cnt]); - } + case tok_section_symbol: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } - /* Store collation elements as map to collation class. There are - three kinds of symbols: - - simple characters - - collation elements - - collation symbols - We need to make a table which lets the user to access the primary - weight based on the symbol string. */ - symbols_hash_tab_size = next_prime ((4 * (charset->char_table.filled - + collate->elements.filled - + collate->symbols.filled)) / 3); - symbols_hash_tab = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size - * sizeof (uint32_t))); - memset (symbols_hash_tab, '\377', (2 * symbols_hash_tab_size - * sizeof (uint32_t))); - - /* Now fill the array. First the symbols from the character set, - then the collation elements and last the collation symbols. */ - hash_tab = &charset->char_table; - while (1) - { - void *ptr; /* Running pointer. */ - const char *key; /* Key for current bucket. */ - size_t keylen; /* Length of key data. */ - void *data; /* Data. */ - - ptr = NULL; - while (iterate_table (hash_tab, &ptr, (const void **) &key, - &keylen, (void **) &data) == 0) - { - size_t hash_val; - size_t idx; - uint32_t word; - unsigned int *weights; + if (state != 0) + goto err_label; - if (hash_tab == &charset->char_table - || hash_tab == &collate->elements) + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else if (!ignore_content) { - element_t *lastp, *firstp; - uint32_t dummy_name[2]; - const uint32_t *name; - size_t name_len; + /* Check whether this section is already known. */ + struct section_list *known = collate->sections; + while (known != NULL) + if (strcmp (known->name, arg->val.str.startmb) == 0) + break; - if (hash_tab == &charset->char_table) - { - dummy_name[0] = (uint32_t) ((unsigned long int) data); - dummy_name[1] = L'\0'; - name = dummy_name; - name_len = sizeof (uint32_t); - } - else + if (known != NULL) { - element_t *elemp = (element_t *) data; - name = elemp->name; - name_len = wcslen (name) * sizeof (uint32_t); + lr_error (ldfile, + _("%s: duplicate declaration of section `%s'"), + "LC_COLLATE", arg->val.str.startmb); + free (arg->val.str.startmb); } - - /* First check whether this character is used at all. */ - if (find_entry (&collate->result, name, name_len, - (void *) &firstp) < 0) - /* The symbol is not directly mentioned in the collation. - I.e., we use the value for UNDEFINED. */ - lastp = &collate->undefined; else - { - /* The entry for the simple character is always found at - the end. */ - lastp = firstp; - while (lastp->next != NULL && wcscmp (name, lastp->name)) - lastp = lastp->next; - } + collate->sections = make_seclist_elem (collate, + arg->val.str.startmb, + collate->sections); - weights = lastp->ordering; + lr_ignore_rest (ldfile, known == NULL); } else { - dummy_weights[0] = 1; - dummy_weights[collate->nrules] - = (unsigned int) ((unsigned long int) data); - - weights = dummy_weights; - } - - /* In LASTP->ordering we now have the collation class. - Determine the place in the hashing table next. */ - hash_val = hash_string (key, keylen); - idx = hash_val % symbols_hash_tab_size; - - if (symbols_hash_tab[2 * idx] != (~((uint32_t) 0))) - { - /* We need the second hashing function. */ - size_t c = 1 + (hash_val % (symbols_hash_tab_size - 2)); - - do - if (idx >= symbols_hash_tab_size - c) - idx -= symbols_hash_tab_size - c; - else - idx += c; - while (symbols_hash_tab[2 * idx] != (~((uint32_t) 0))); + free (arg->val.str.startmb); + lr_ignore_rest (ldfile, 0); } + break; - symbols_hash_tab[2 * idx] = obstack_object_size (&string_pool); - symbols_hash_tab[2 * idx + 1] = (obstack_object_size (&non_simple) - / sizeof (uint32_t)); - - obstack_grow0 (&string_pool, key, keylen); - /* Adding the first weight looks complicated. We have to deal - with the kind it is stored and with the fact that original - form uses `unsigned int's while we need `uint32_t' here. */ - word = weights[0]; - obstack_grow (&non_simple, &word, sizeof (uint32_t)); - for (cnt = 0; cnt < weights[0]; ++cnt) + case tok_collating_element: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) { - word = weights[collate->nrules + cnt]; - obstack_grow (&non_simple, &word, sizeof (uint32_t)); + lr_ignore_rest (ldfile, 0); + break; } - } - - if (hash_tab == &charset->char_table) - hash_tab = &collate->elements; - else if (hash_tab == &collate->elements) - hash_tab = &collate->symbols; - else - break; - } - - /* Now we have the complete tables. */ - if (obstack_object_size (&string_pool) % 4 != 0) - obstack_blank (&non_simple, 4 - (obstack_object_size (&string_pool) % 4)); - symbols_string_pool_size = obstack_object_size (&string_pool); - symbols_string_pool = obstack_finish (&string_pool); - - symbols_class_size = obstack_object_size (&non_simple); - symbols_class = obstack_finish (&non_simple); - /* Generate tables with other byte order. */ - symbols_hash_tab_ob = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size - * sizeof (uint32_t))); - for (cnt = 0; cnt < 2 * symbols_hash_tab_size; ++cnt) - symbols_hash_tab_ob[cnt] = bswap_32 (symbols_hash_tab[cnt]); - - symbols_class_ob = obstack_alloc (&non_simple, symbols_class_size); - for (cnt = 0; cnt < symbols_class_size / 4; ++cnt) - symbols_class_ob[cnt] = bswap_32 (symbols_class[cnt]); - - - /* Store table addresses and lengths. */ -#if __BYTE_ORDER == __BIG_ENDIAN - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len - = table_best * level_best * entry_size * sizeof (table[0]); + if (state != 0) + goto err_label; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len - = table_best * level_best * entry_size * sizeof (table[0]); + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len; + /* Next the `from' keyword. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_from) + { + free ((char *) symbol); + goto err_label; + } - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len; -#else - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len - = table_best * level_best * entry_size * sizeof (table[0]); + ldfile->return_widestr = 1; + ldfile->translate_strings = 1; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len - = table_best * level_best * entry_size * sizeof (table[0]); + /* Finally the string with the replacement. */ + arg = lr_token (ldfile, charmap, repertoire); - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len; + ldfile->return_widestr = 0; + ldfile->translate_strings = 0; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len; -#endif + if (arg->tok != tok_string) + goto err_label; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (uint32_t); - - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_base - = &element_hash_tab_size; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_len - = sizeof (uint32_t); - -#if __BYTE_ORDER == __BIG_ENDIAN - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base - = element_hash_tab; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len - = 2 * element_hash_tab_size * sizeof (uint32_t); - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base - = element_hash_tab_ob; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len - = 2 * element_hash_tab_size * sizeof (uint32_t); -#else - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base - = element_hash_tab; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len - = 2 * element_hash_tab_size * sizeof (uint32_t); - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base - = element_hash_tab_ob; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len - = 2 * element_hash_tab_size * sizeof (uint32_t); -#endif + if (!ignore_content && symbol != NULL) + { + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + goto col_elem_free; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_base - = element_string_pool; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_len - = element_string_pool_size; - -#if __BYTE_ORDER == __BIG_ENDIAN - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base - = element_value; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len - = element_value_size; - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base - = element_value_ob; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len - = element_value_size; -#else - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base - = element_value; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len - = element_value_size; - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base - = element_value_ob; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len - = element_value_size; -#endif - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_base - = &symbols_hash_tab_size; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_len - = sizeof (uint32_t); - -#if __BYTE_ORDER == __BIG_ENDIAN - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base - = symbols_hash_tab; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len - = 2 * symbols_hash_tab_size * sizeof (uint32_t); - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base - = symbols_hash_tab_ob; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len - = 2 * symbols_hash_tab_size * sizeof (uint32_t); -#else - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base - = symbols_hash_tab; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len - = 2 * symbols_hash_tab_size * sizeof (uint32_t); - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base - = symbols_hash_tab_ob; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len - = 2 * symbols_hash_tab_size * sizeof (uint32_t); -#endif - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_base - = symbols_string_pool; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_len - = symbols_string_pool_size; - -#if __BYTE_ORDER == __BIG_ENDIAN - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base - = symbols_class; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len - = symbols_class_size; - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base - = symbols_class_ob; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len - = symbols_class_size; -#else - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base - = symbols_class; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len - = symbols_class_size; - - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base - = symbols_class_ob; - iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len - = symbols_class_size; -#endif - - /* Update idx array. */ - idx[0] = iov[0].iov_len + iov[1].iov_len; - for (cnt = 1; cnt < nelems; ++cnt) - idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len; - - write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov); - - obstack_free (&non_simple, NULL); - obstack_free (&string_pool, NULL); -} - - -static int -collate_element_to (struct linereader *ldfile, - struct locale_collate_t *collate, - struct token *code, struct charmap_t *charmap, - struct repertoire_t *repertoire) -{ - struct charseq *seq; - uint32_t value; - void *not_used; - - seq = charmap_find_value (charmap, code->val.str.start, code->val.str.len); - if (seq != NULL) - { - lr_error (ldfile, _("symbol for multicharacter collating element " - "`%.*s' duplicates symbolic name in charmap"), - (int) code->val.str.len, code->val.str.start); - return 1; - } - - value = repertoire_find_value (repertoire, code->val.str.start, - code->val.str.len); - if (value != ILLEGAL_CHAR_VALUE) - { - lr_error (ldfile, _("symbol for multicharacter collating element " - "`%.*s' duplicates symbolic name in repertoire"), - (int) code->val.str.len, code->val.str.start); - return 1; - } - - if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, - ¬_used) >= 0) - { - lr_error (ldfile, _("symbol for multicharacter collating element " - "`%.*s' duplicates other element definition"), - (int) code->val.str.len, code->val.str.start); - return 1; - } - - if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, - ¬_used) >= 0) - { - lr_error (ldfile, _("symbol for multicharacter collating element " - "`%.*s' duplicates symbol definition"), - (int) code->val.str.len, code->val.str.start); - return 1; - } - - return 0; -} - - -static void -collate_element_from (struct linereader *ldfile, - struct locale_collate_t *collate, - const char *to_str, struct token *code, - struct charmap_t *charmap, - struct repertoire_t *repertoire) -{ - element_t *elemp, *runp; - - /* CODE is a string. */ - elemp = (element_t *) obstack_alloc (&collate->element_mem, - sizeof (element_t)); - - /* We have to translate the string. It may contain <...> character - names. */ - elemp->namemb = code->val.str.startmb; - elemp->namewc = code->val.str.startwc; - elemp->this_weight = 0; - elemp->ordering = NULL; - elemp->ordering_len = 0; - - if (elemp->namemb == NULL && elemp->namewc == NULL) - { - /* The string contains characters which are not in the charmap nor - in the repertoire. Ignore the string. */ - if (verbose) - lr_error (ldfile, _("\ -`from' string in collation element declaration contains unknown character")); - return; - } - - /* The entries in the linked lists of RESULT are sorting in - descending order. The order is important for the `strcoll' and - `wcscoll' functions. */ - if (find_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t), - (void *) &runp) >= 0) - { - /* We already have an entry with this key. Check whether it is - identical. */ - element_t *prevp = NULL; - int cmpres; - - do - { - cmpres = wcscmp (elemp->namewc, runp->namewc); - if (cmpres <= 0) - break; - prevp = runp; - } - while ((runp = runp->next) != NULL); + if (insert_entry (&collate->elem_table, + symbol, symbol_len, + new_element (collate, + arg->val.str.startmb, + arg->val.str.lenmb - 1, + arg->val.str.startwc, + symbol, symbol_len, 0)) < 0) + lr_error (ldfile, _("\ +error while adding collating element")); + } + else + { + col_elem_free: + if (symbol != NULL) + free ((char *) symbol); + if (arg->val.str.startmb != NULL) + free (arg->val.str.startmb); + if (arg->val.str.startwc != NULL) + free (arg->val.str.startwc); + } + lr_ignore_rest (ldfile, 1); + } + break; - if (cmpres == 0) - lr_error (ldfile, _("\ -duplicate collating element definition (repertoire)")); - else - { - elemp->next = runp; - if (prevp == NULL) + case tok_collating_symbol: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) { - if (set_entry (&collate->resultwc, elemp->namewc, - sizeof (uint32_t), elemp) < 0) - error (EXIT_FAILURE, 0, _("\ -error while inserting collation element into hash table")); + lr_ignore_rest (ldfile, 0); + break; } - else - prevp->next = elemp; - } - } - else - { - elemp->next = NULL; - if (insert_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t), - elemp) < 0) - error (EXIT_FAILURE, errno, _("error while inserting to hash table")); - } - /* Now also insert the element definition in the multibyte table. */ - if (find_entry (&collate->resultmb, elemp->namemb, 1, (void *) &runp) >= 0) - { - /* We already have an entry with this key. Check whether it is - identical. */ - element_t *prevp = NULL; - int cmpres; - - do - { - cmpres = strcmp (elemp->namemb, runp->namemb); - if (cmpres <= 0) - break; - prevp = runp; - } - while ((runp = runp->next) != NULL); + if (state != 0) + goto err_label; - if (cmpres == 0) - lr_error (ldfile, _("\ -duplicate collating element definition (charmap)")); - else - { - elemp->next = runp; - if (prevp == NULL) - { - if (set_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0) - error (EXIT_FAILURE, 0, _("\ -error while inserting collation element into hash table")); - } + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; else - prevp->next = elemp; - } - } - else - { - elemp->next = NULL; - if (insert_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0) - error (EXIT_FAILURE, errno, _("error while inserting to hash table")); - } - - /* Finally install the mapping from the `to'-name to the `from'-name. */ - if (insert_entry (&collate->elements, to_str, strlen (to_str), - (void *) elemp) < 0) - lr_error (ldfile, _("cannot insert new collating symbol definition: %s"), - strerror (errno)); -} + { + const char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + if (!ignore_content) + { + if (symbol == NULL) + lr_error (ldfile, _("\ +%s: unknown character in collating symbol name"), + "LC_COLLATE"); + else + { + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + goto col_sym_free; -static void -collate_symbol (struct linereader *ldfile, struct locale_collate_t *collate, - struct token *code, struct charmap_t *charmap, - struct repertoire_t *repertoire) -{ - uint32_t value; - struct charseq *seq; - void *not_used; + if (insert_entry (&collate->sym_table, + symbol, symbol_len, + new_symbol (collate)) < 0) + lr_error (ldfile, _("\ +error while adding collating symbol")); + } + } + else + { + col_sym_free: + if (symbol != NULL) + free ((char *) symbol); + } + lr_ignore_rest (ldfile, 1); + } + break; - seq = charset_find_value (charmap, code->val.str.start, code->val.str.len); - if (seq != NULL) - { - lr_error (ldfile, _("symbol for multicharacter collating element " - "`%.*s' duplicates symbolic name in charmap"), - (int) code->val.str.len, code->val.str.start); - return; - } + case tok_symbol_equivalence: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } - value = repertoire (repertoire, code->val.str.start, code->val.str.len); - if (value != ILLEGAL_CHAR_VALUE) - { - lr_error (ldfile, _("symbol for multicharacter collating element " - "`%.*s' duplicates symbolic name in repertoire"), - (int) code->val.str.len, code->val.str.start); - return; - } + if (state != 0) + goto err_label; - if (find_entry (&collate->elements, code->val.str.start, code->val.str.len, - ¬_used) >= 0) - { - lr_error (ldfile, _("symbol for multicharacter collating element " - "`%.*s' duplicates element definition"), - (int) code->val.str.len, code->val.str.start); - return; - } + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *newname = arg->val.str.startmb; + size_t newname_len = arg->val.str.lenmb; + const char *symname; + size_t symname_len; + struct symbol_t *symval; - if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len, - ¬_used) >= 0) - { - lr_error (ldfile, _("symbol for multicharacter collating element " - "`%.*s' duplicates other symbol definition"), - (int) code->val.str.len, code->val.str.start); - return; - } + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_bsymbol) + { + if (newname != NULL) + free ((char *) newname); + goto err_label; + } - if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len, - (void *) 0) < 0) - lr_error (ldfile, _("cannot insert new collating symbol definition: %s"), - strerror (errno)); -} + symname = arg->val.str.startmb; + symname_len = arg->val.str.lenmb; + if (!ignore_content) + { + if (newname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition name"), + "LC_COLLATE"); + goto sym_equiv_free; + } + if (symname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition value"), + "LC_COLLATE"); + goto sym_equiv_free; + } + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symname, symname_len)) + goto col_sym_free; -void -collate_new_order (struct linereader *ldfile, struct localedef_t *locale, - enum coll_sort_rule sort_rule) -{ - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + /* See whether the symbol name is already defined. */ + if (find_entry (&collate->sym_table, symname, symname_len, + (void **) &symval) != 0) + { + lr_error (ldfile, _("\ +%s: unknown symbol `%s' in equivalent definition"), + "LC_COLLATE", symname); + goto col_sym_free; + } - if (collate->nrules >= collate->nrules_max) - { - collate->nrules_max *= 2; - collate->rules - = (enum coll_sort_rule *) xrealloc (collate->rules, - collate->nrules_max - * sizeof (enum coll_sort_rule)); - } + if (insert_entry (&collate->sym_table, + newname, newname_len, symval) < 0) + { + lr_error (ldfile, _("\ +error while adding equivalent collating symbol")); + goto sym_equiv_free; + } - collate->rules[collate->nrules++] = sort_rule; -} + free ((char *) symname); + } + else + { + sym_equiv_free: + if (newname != NULL) + free ((char *) newname); + if (symname != NULL) + free ((char *) symname); + } + lr_ignore_rest (ldfile, 1); + } + break; + case tok_order_start: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } -void -collate_build_arrays (struct linereader *ldfile, struct localedef_t *locale) -{ - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + if (state != 0 && state != 1) + goto err_label; + state = 1; - collate->rules - = (enum coll_sort_rule *) xrealloc (collate->rules, - collate->nrules - * sizeof (enum coll_sort_rule)); + /* The 14652 draft does not specify whether all `order_start' lines + must contain the same number of sort-rules, but 14651 does. So + we require this here as well. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok == tok_bsymbol) + { + /* This better should be a section name. */ + struct section_list *sp = collate->sections; + while (sp != NULL + && strcmp (sp->name, arg->val.str.startmb) != 0) + sp = sp->next; - /* Allocate arrays for temporary weights. */ - collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int)); + if (sp == NULL) + { + lr_error (ldfile, _("\ +%s: unknown section name `%s'"), + "LC_COLLATE", arg->val.str.startmb); + /* We use the error section. */ + collate->current_section = &collate->error_section; - /* Choose arbitrary start value for table size. */ - collate->nweight_max = 5 * collate->nrules; - collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int)); -} + if (collate->error_section.first == NULL) + { + collate->error_section.next = collate->sections; + collate->sections = &collate->error_section; + } + } + else + { + /* Remember this section. */ + collate->current_section = sp; + /* One should not be allowed to open the same + section twice. */ + if (sp->first != NULL) + lr_error (ldfile, _("\ +%s: multiple order definitions for section `%s'"), + "LC_COLLATE", sp->name); + else + { + sp->next = collate->sections; + collate->sections = sp; + } -int -collate_order_elem (struct linereader *ldfile, struct localedef_t *locale, - struct token *code, struct charset_t *charset) -{ - const uint32_t zero = L'\0'; - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - int result = 0; - uint32_t value; - void *tmp; - unsigned int i; + /* Next should come the end of the line or a semicolon. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok == tok_eol) + { + uint32_t cnt; - switch (code->tok) - { - case tok_bsymbol: - /* We have a string to find in one of the three hashing tables. */ - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); - if (value != ILLEGAL_CHAR_VALUE) - { - element_t *lastp, *firstp; + /* This means we have exactly one rule: `forward'. */ + if (nrules > 1) + lr_error (ldfile, _("\ +%s: invalid number of sorting rules"), + "LC_COLLATE"); + else + nrules = 1; + sp->rules = obstack_alloc (&collate->mempool, + (sizeof (enum coll_sort_rule) + * nrules)); + for (cnt = 0; cnt < nrules; ++cnt) + sp->rules[cnt] = sort_forward; - collate->kind = character; + /* Next line. */ + break; + } - if (find_entry (&collate->result, &value, sizeof (uint32_t), - (void *) &firstp) < 0) - firstp = lastp = NULL; + /* Get the next token. */ + arg = lr_token (ldfile, charmap, repertoire); + } + } else { - /* The entry for the simple character is always found at - the end. */ - lastp = firstp; - while (lastp->next != NULL) - lastp = lastp->next; + /* There is no section symbol. Therefore we use the unnamed + section. */ + collate->current_section = &collate->unnamed_section; - if (lastp->name[0] == value && lastp->name[1] == L'\0') + if (collate->unnamed_section.first != NULL) + lr_error (ldfile, _("\ +%s: multiple order definitions for unnamed section"), + "LC_COLLATE"); + else { - lr_error (ldfile, - _("duplicate definition for character `%.*s'"), - (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (ldfile, 0); - result = -1; - break; + collate->unnamed_section.next = collate->sections; + collate->sections = &collate->unnamed_section; } } - collate->current_element - = (element_t *) obstack_alloc (&collate->element_mem, - sizeof (element_t)); - - obstack_grow (&collate->element_mem, &value, sizeof (value)); - obstack_grow (&collate->element_mem, &zero, sizeof (zero)); - - collate->current_element->name = - (const uint32_t *) obstack_finish (&collate->element_mem); - - collate->current_element->this_weight = ++collate->order_cnt; - - collate->current_element->next = NULL; + /* Now read the direction names. */ + read_directions (ldfile, arg, charmap, repertoire, collate); - if (firstp == NULL) - { - if (insert_entry (&collate->result, &value, sizeof (uint32_t), - (void *) collate->current_element) < 0) - { - lr_error (ldfile, _("cannot insert collation element `%.*s'"), - (int) code->val.str.len, code->val.str.start); - exit (4); - } - } - else - lastp->next = collate->current_element; - } - else if (find_entry (&collate->elements, code->val.str.start, - code->val.str.len, &tmp) >= 0) - { - collate->current_element = (element_t *) tmp; + /* From now be need the strings untranslated. */ + ldfile->translate_strings = 0; + break; - if (collate->current_element->this_weight != 0) + case tok_order_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) { - lr_error (ldfile, _("\ -collation element `%.*s' appears more than once: ignore line"), - (int) code->val.str.len, code->val.str.start); lr_ignore_rest (ldfile, 0); - result = -1; break; } - collate->kind = element; - collate->current_element->this_weight = ++collate->order_cnt; - } - else if (find_entry (&collate->symbols, code->val.str.start, - code->val.str.len, &tmp) >= 0) - { - unsigned int order = ++collate->order_cnt; + if (state != 1) + goto err_label; - if ((unsigned long int) tmp != 0ul) + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) { - lr_error (ldfile, _("\ -collation symbol `%.*s' appears more than once: ignore line"), - (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (ldfile, 0); - result = -1; - break; + handle_ellipsis (ldfile, NULL, was_ellipsis, charmap, repertoire, + collate); + was_ellipsis = tok_none; } - collate->kind = symbol; + state = 2; + lr_ignore_rest (ldfile, 1); + break; - if (set_entry (&collate->symbols, code->val.str.start, - code->val.str.len, (void *) order) < 0) + case tok_reorder_after: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) { - lr_error (ldfile, _("cannot process order specification")); - exit (4); + lr_ignore_rest (ldfile, 0); + break; } - } - else - { - if (verbose) - lr_error (ldfile, _("unknown symbol `%.*s': line ignored"), - (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (ldfile, 0); - - result = -1; - } - break; - - case tok_undefined: - collate->kind = undefined; - collate->current_element = &collate->undefined; - break; - - case tok_ellipsis: - if (collate->was_ellipsis) - { - lr_error (ldfile, _("\ -two lines in a row containing `...' are not allowed")); - result = -1; - } - else if (collate->kind != character) - { - /* An ellipsis requires the previous line to be an - character definition. */ - lr_error (ldfile, _("\ -line before ellipsis does not contain definition for character constant")); - lr_ignore_rest (ldfile, 0); - result = -1; - } - else - collate->kind = ellipsis; - break; - default: - assert (! "illegal token in `collate_order_elem'"); - } - - /* Now it's time to handle the ellipsis in the previous line. We do - this only when the last line contained an definition for a - character, the current line also defines an character, the - character code for the later is bigger than the former. */ - if (collate->was_ellipsis) - { - if (collate->kind != character) - { - lr_error (ldfile, _("\ -line after ellipsis must contain character definition")); - lr_ignore_rest (ldfile, 0); - result = -1; - } - else if (collate->last_char > value) - { - lr_error (ldfile, _("end point of ellipsis range is bigger then start")); - lr_ignore_rest (ldfile, 0); - result = -1; - } - else - { - /* We can fill the arrays with the information we need. */ - uint32_t name[2]; - unsigned int *data; - size_t *ptr; - size_t cnt; - - name[0] = collate->last_char + 1; - name[1] = L'\0'; - - data = (unsigned int *) alloca ((collate->nrules + collate->nweight) - * sizeof (unsigned int)); - ptr = (size_t *) alloca (collate->nrules * sizeof (size_t)); - - /* Prepare data. Because the characters covered by an - ellipsis all have equal values we prepare the data once - and only change the variable number (if there are any). - PTR[...] will point to the entries which will have to be - fixed during the output loop. */ - for (cnt = 0; cnt < collate->nrules; ++cnt) + if (state == 1) { - data[cnt] = collate->weight_cnt[cnt]; - ptr[cnt] = (cnt == 0 - ? collate->nweight - : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]); - } - - for (cnt = 0; cnt < collate->nweight; ++cnt) - data[collate->nrules + cnt] = collate->weight[cnt]; - - for (cnt = 0; cnt < collate->nrules; ++cnt) - if ((uint32_t) data[ptr[cnt]] != ELLIPSIS_CHAR) - ptr[cnt] = 0; + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; - while (name[0] <= value) - { - element_t *pelem; - - pelem = (element_t *) obstack_alloc (&collate->element_mem, - sizeof (element_t)); - pelem->name - = (const uint32_t *) obstack_copy (&collate->element_mem, - name, 2 * sizeof (uint32_t)); - pelem->this_weight = ++collate->order_cnt; - - pelem->ordering_len = collate->nweight; - pelem->ordering - = (unsigned int *) obstack_copy (&collate->element_mem, data, - (collate->nrules - + pelem->ordering_len) - * sizeof (unsigned int)); - - /* `...' weights need to be adjusted. */ - for (cnt = 0; cnt < collate->nrules; ++cnt) - if (ptr[cnt] != 0) - pelem->ordering[ptr[cnt]] = pelem->this_weight; - - /* Insert new entry into result table. */ - if (find_entry (&collate->result, name, sizeof (uint32_t), - (void *) &pelem->next) >= 0) + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) { - if (set_entry (&collate->result, name, sizeof (uint32_t), - (void *) pelem) < 0) - error (4, 0, _("cannot insert into result table")); + handle_ellipsis (ldfile, arg, was_ellipsis, charmap, + repertoire, collate); + was_ellipsis = tok_none; } + } + else if (state != 2 && state != 3) + goto err_label; + state = 3; + + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok == tok_bsymbol) + { + /* Find this symbol in the sequence table. */ + struct element_t *insp; + int no_error = 1; + + if (find_entry (&collate->seq_table, arg->val.str.startmb, + arg->val.str.lenmb, (void **) &insp) == 0) + /* Yes, the symbol exists. Simply point the cursor + to it. */ + collate->cursor = insp; else { - pelem->next = NULL; - if (insert_entry (&collate->result, name, sizeof (uint32_t), - (void *) pelem) < 0) - error (4, 0, _("cannot insert into result table")); + /* This is bad. The symbol after which we have to + insert does not exist. */ + lr_error (ldfile, _("\ +%s: cannot reorder after %.*s: symbol not known"), + "LC_COLLATE", (int) arg->val.str.lenmb, + arg->val.str.startmb); + collate->cursor = NULL; + no_error = 0; } - /* Increment counter. */ - ++name[0]; + lr_ignore_rest (ldfile, no_error); } - } - } - - /* Reset counters for weights. */ - collate->weight_idx = 0; - collate->nweight = 0; - for (i = 0; i < collate->nrules; ++i) - collate->weight_cnt[i] = 0; - collate->current_patch = NULL; - - return result; -} - - -int -collate_weight_bsymbol (struct linereader *ldfile, struct localedef_t *locale, - struct token *code, struct charset_t *charset) -{ - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - unsigned int here_weight; - uint32_t value; - void *tmp; - - assert (code->tok == tok_bsymbol); - - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); - if (value != ILLEGAL_CHAR_VALUE) - { - element_t *runp; - - if (find_entry (&collate->result, &value, sizeof (uint32_t), - (void *)&runp) < 0) - runp = NULL; - - while (runp != NULL - && (runp->name[0] != value || runp->name[1] != L'\0')) - runp = runp->next; - - here_weight = runp == NULL ? 0 : runp->this_weight; - } - else if (find_entry (&collate->elements, code->val.str.start, - code->val.str.len, &tmp) >= 0) - { - element_t *runp = (element_t *) tmp; - - here_weight = runp->this_weight; - } - else if (find_entry (&collate->symbols, code->val.str.start, - code->val.str.len, &tmp) >= 0) - { - here_weight = (unsigned int) tmp; - } - else - { - if (verbose) - lr_error (ldfile, _("unknown symbol `%.*s': line ignored"), - (int) code->val.str.len, code->val.str.start); - lr_ignore_rest (ldfile, 0); - return -1; - } - - /* When we currently work on a collation symbol we do not expect any - weight. */ - if (collate->kind == symbol) - { - lr_error (ldfile, _("\ -specification of sorting weight for collation symbol does not make sense")); - lr_ignore_rest (ldfile, 0); - return -1; - } - - /* Add to the current collection of weights. */ - if (collate->nweight >= collate->nweight_max) - { - collate->nweight_max *= 2; - collate->weight = (unsigned int *) xrealloc (collate->weight, - collate->nweight_max); - } - - /* If the weight is currently not known, we remember to patch the - resulting tables. */ - if (here_weight == 0) - { - patch_t *newp; - - newp = (patch_t *) obstack_alloc (&collate->element_mem, - sizeof (patch_t)); - newp->fname = ldfile->fname; - newp->lineno = ldfile->lineno; - newp->token = (const char *) obstack_copy0 (&collate->element_mem, - code->val.str.start, - code->val.str.len); - newp->where.idx = collate->nweight++; - newp->next = collate->current_patch; - collate->current_patch = newp; - } - else - collate->weight[collate->nweight++] = here_weight; - ++collate->weight_cnt[collate->weight_idx]; - - return 0; -} - - -int -collate_next_weight (struct linereader *ldfile, struct localedef_t *locale) -{ - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - - if (collate->kind == symbol) - { - lr_error (ldfile, _("\ -specification of sorting weight for collation symbol does not make sense")); - lr_ignore_rest (ldfile, 0); - return -1; - } - - ++collate->weight_idx; - if (collate->weight_idx >= collate->nrules) - { - lr_error (ldfile, _("too many weights")); - lr_ignore_rest (ldfile, 0); - return -1; - } - - return 0; -} - - -int -collate_simple_weight (struct linereader *ldfile, struct localedef_t *locale, - struct token *code, struct charset_t *charset) -{ - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - unsigned int value = 0; - - /* There current tokens can be `IGNORE', `...', or a string. */ - switch (code->tok) - { - case tok_ignore: - /* This token is allowed in all situations. */ - value = IGNORE_CHAR; - break; - - case tok_ellipsis: - /* The ellipsis is only allowed for the `...' or `UNDEFINED' - entry. */ - if (collate->kind != ellipsis && collate->kind != undefined) - { - lr_error (ldfile, _("\ -`...' must only be used in `...' and `UNDEFINED' entries")); - lr_ignore_rest (ldfile, 0); - return -1; - } - value = ELLIPSIS_CHAR; - break; - - case tok_string: - /* This can become difficult. We have to get the weights which - correspond to the single wide chars in the string. But some - of the `chars' might not be real characters, but collation - elements or symbols. And so the string decoder might have - signaled errors. The string at this point is not translated. - I.e., all <...> sequences are still there. */ - { - char *runp = code->val.str.start; - void *tmp; - - while (*runp != '\0') - { - char *startp = (char *) runp; - char *putp = (char *) runp; - uint32_t wch; - - /* Lookup weight for char and store it. */ - if (*runp == '<') - { - while (*++runp != '\0' && *runp != '>') - { - if (*runp == ldfile->escape_char) - if (*++runp == '\0') - { - lr_error (ldfile, _("unterminated weight name")); - lr_ignore_rest (ldfile, 0); - return -1; - } - *putp++ = *runp; - } - if (*runp == '>') - ++runp; - - if (putp == startp) - { - lr_error (ldfile, _("empty weight name: line ignored")); - lr_ignore_rest (ldfile, 0); - return -1; - } - - wch = charset_find_value (&charset->char_table, startp, - putp - startp); - if (wch != ILLEGAL_CHAR_VALUE) - { - element_t *pelem; - - if (find_entry (&collate->result, &wch, sizeof (uint32_t), - (void *)&pelem) < 0) - pelem = NULL; - - while (pelem != NULL - && (pelem->name[0] != wch - || pelem->name[1] != L'\0')) - pelem = pelem->next; - - value = pelem == NULL ? 0 : pelem->this_weight; - } - else if (find_entry (&collate->elements, startp, putp - startp, - &tmp) >= 0) - { - element_t *pelem = (element_t *) tmp; - - value = pelem->this_weight; - } - else if (find_entry (&collate->symbols, startp, putp - startp, - &tmp) >= 0) - { - value = (unsigned int) tmp; - } - else - { - if (verbose) - lr_error (ldfile, _("unknown symbol `%.*s': line ignored"), - (int) (putp - startp), startp); - lr_ignore_rest (ldfile, 0); - return -1; - } - } - else - { - element_t *wp; - uint32_t wch; - - if (*runp == ldfile->escape_char) - { - static const char digits[] = "0123456789abcdef"; - const char *dp; - int base; - - ++runp; - if (tolower (*runp) == 'x') - { - ++runp; - base = 16; - } - else if (tolower (*runp) == 'd') - { - ++runp; - base = 10; - } - else - base = 8; - - dp = strchr (digits, tolower (*runp)); - if (dp == NULL || (dp - digits) >= base) - { - illegal_char: - lr_error (ldfile, _("\ -illegal character constant in string")); - lr_ignore_rest (ldfile, 0); - return -1; - } - wch = dp - digits; - ++runp; - - dp = strchr (digits, tolower (*runp)); - if (dp == NULL || (dp - digits) >= base) - goto illegal_char; - wch *= base; - wch += dp - digits; - ++runp; - - if (base != 16) - { - dp = strchr (digits, tolower (*runp)); - if (dp != NULL && (dp - digits < base)) - { - wch *= base; - wch += dp - digits; - ++runp; - } - } - } - else - wch = (uint32_t) *runp++; - - /* Lookup the weight for WCH. */ - if (find_entry (&collate->result, &wch, sizeof (wch), - (void *)&wp) < 0) - wp = NULL; - - while (wp != NULL - && (wp->name[0] != wch || wp->name[1] != L'\0')) - wp = wp->next; - - value = wp == NULL ? 0 : wp->this_weight; - - /* To get the correct name for the error message. */ - putp = runp; + else + /* This must not happen. */ + goto err_label; + break; - /**************************************************\ - |* I know here is something wrong. Characters in *| - |* the string which are not in the <...> form *| - |* cannot be declared forward for now!!! *| - \**************************************************/ - } + case tok_reorder_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + break; - /* Store in weight array. */ - if (collate->nweight >= collate->nweight_max) - { - collate->nweight_max *= 2; - collate->weight - = (unsigned int *) xrealloc (collate->weight, - collate->nweight_max); - } + if (state != 3) + goto err_label; + state = 4; + lr_ignore_rest (ldfile, 1); + break; - if (value == 0) - { - patch_t *newp; - - newp = (patch_t *) obstack_alloc (&collate->element_mem, - sizeof (patch_t)); - newp->fname = ldfile->fname; - newp->lineno = ldfile->lineno; - newp->token - = (const char *) obstack_copy0 (&collate->element_mem, - startp, putp - startp); - newp->where.idx = collate->nweight++; - newp->next = collate->current_patch; - collate->current_patch = newp; - } - else - collate->weight[collate->nweight++] = value; - ++collate->weight_cnt[collate->weight_idx]; - } - } - return 0; + case tok_reorder_sections_after: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } - default: - assert (! "should not happen"); - } + if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, was_ellipsis, charmap, + repertoire, collate); + was_ellipsis = tok_none; + } + } + else if (state == 3) + { + error (0, 0, _("%s: missing `reorder-end' keyword"), + "LC_COLLATE"); + state = 4; + } + else if (state != 2 && state != 4) + goto err_label; + state = 5; - if (collate->nweight >= collate->nweight_max) - { - collate->nweight_max *= 2; - collate->weight = (unsigned int *) xrealloc (collate->weight, - collate->nweight_max); - } + /* Get the name of the sections we are adding after. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok == tok_bsymbol) + { + /* Now find a section with this name. */ + struct section_list *runp = collate->sections; - collate->weight[collate->nweight++] = value; - ++collate->weight_cnt[collate->weight_idx]; + while (runp != NULL) + { + if (runp->name != NULL + && strlen (runp->name) == arg->val.str.lenmb + && memcmp (runp->name, arg->val.str.startmb, + arg->val.str.lenmb) == 0) + break; - return 0; -} + runp = runp->next; + } + if (runp != NULL) + collate->current_section = runp; + else + { + /* This is bad. The section after which we have to + reorder does not exist. Therefore we cannot + process the whole rest of this reorder + specification. */ + lr_error (ldfile, _("%s: section `%.*s' not known"), + "LC_COLLATE", (int) arg->val.str.lenmb, + arg->val.str.startmb); -void -collate_end_weight (struct linereader *ldfile, struct localedef_t *locale) -{ - struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; - element_t *pelem = collate->current_element; + do + { + lr_ignore_rest (ldfile, 0); - if (collate->kind == symbol) - { - /* We don't have to do anything. */ - collate->was_ellipsis = 0; - return; - } + now = lr_token (ldfile, charmap, NULL); + } + while (now->tok == tok_reorder_sections_after + || now->tok == tok_reorder_sections_end + || now->tok == tok_end); - if (collate->kind == ellipsis) - { - /* Before the next line is processed the ellipsis is handled. */ - collate->was_ellipsis = 1; - return; - } + /* Process the token we just saw. */ + nowtok = now->tok; + continue; + } + } + else + /* This must not happen. */ + goto err_label; + break; - assert (collate->kind == character || collate->kind == element - || collate->kind == undefined); + case tok_reorder_sections_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + break; - /* Fill in the missing weights. */ - while (++collate->weight_idx < collate->nrules) - { - collate->weight[collate->nweight++] = pelem->this_weight; - ++collate->weight_cnt[collate->weight_idx]; - } + if (state != 5) + goto err_label; + state = 6; + lr_ignore_rest (ldfile, 1); + break; - /* Now we know how many ordering weights the current - character/element has. Allocate room in the element structure - and copy information. */ - pelem->ordering_len = collate->nweight; + case tok_bsymbol: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } - /* First we write an array with the number of values for each - weight. */ - obstack_grow (&collate->element_mem, collate->weight_cnt, - collate->nrules * sizeof (unsigned int)); + if (state != 1 && state != 3) + goto err_label; - /* Now the weights itselves. */ - obstack_grow (&collate->element_mem, collate->weight, - collate->nweight * sizeof (unsigned int)); + if (state == 3) + { + /* It is possible that we already have this collation sequence. + In this case we move the entry. */ + struct element_t *seqp; - /* Get result. */ - pelem->ordering = obstack_finish (&collate->element_mem); + /* If the symbol after which we have to insert was not found + ignore all entries. */ + if (collate->cursor == NULL) + { + lr_ignore_rest (ldfile, 0); + break; + } - /* Now we handle the "patches". */ - while (collate->current_patch != NULL) - { - patch_t *this_patch; + if (find_entry (&collate->seq_table, arg->val.str.startmb, + arg->val.str.lenmb, (void **) &seqp) == 0) + { + /* Remove the entry from the old position. */ + if (seqp->last == NULL) + collate->start = seqp->next; + else + seqp->last->next = seqp->next; + if (seqp->next != NULL) + seqp->next->last = seqp->last; - this_patch = collate->current_patch; + /* We also have to check whether this entry is the + first or last of a section. */ + if (seqp->section->first == seqp) + { + if (seqp->section->first == seqp->section->last) + /* This setion has no content anymore. */ + seqp->section->first = seqp->section->last = NULL; + else + seqp->section->first = seqp->next; + } + else if (seqp->section->last == seqp) + seqp->section->last = seqp->last; - this_patch->where.pos = &pelem->ordering[collate->nrules - + this_patch->where.idx]; + /* Now insert it in the new place. */ + seqp->next = collate->cursor->next; + seqp->last = collate->cursor; + collate->cursor->next = seqp; + if (seqp->next != NULL) + seqp->next->last = seqp; - collate->current_patch = this_patch->next; - this_patch->next = collate->all_patches; - collate->all_patches = this_patch; - } + seqp->section = collate->cursor->section; + if (seqp->section->last == collate->cursor) + seqp->section->last = seqp; - /* Set information for next round. */ - collate->was_ellipsis = 0; - if (collate->kind != undefined) - collate->last_char = pelem->name[0]; -} + break; + } + /* Otherwise we just add a new entry. */ + } + else if (state == 5) + { + /* We are reordering sections. Find the named section. */ + struct section_list *runp = collate->sections; + struct section_list *prevp = NULL; -/* The parser for the LC_CTYPE section of the locale definition. */ -void -read_lc_collate (struct linereader *ldfile, struct localedef_t *result, - struct charmap_t *charmap, struct repertoire_t *repertoire, - int ignore_content) -{ - struct locale_collate_t *collate; - int did_copy = 0; - const char *save_str; + while (runp != NULL) + { + if (runp->name != NULL + && strlen (runp->name) == arg->val.str.lenmb + && memcmp (runp->name, arg->val.str.startmb, + arg->val.str.lenmb) == 0) + break; + + prevp = runp; + runp = runp->next; + } - /* The rest of the line containing `LC_COLLATE' must be free. */ - lr_ignore_rest (ldfile, 1); + if (runp == NULL) + { + lr_error (ldfile, _("%s: section `%.*s' not known"), + "LC_COLLATE", (int) arg->val.str.lenmb, + arg->val.str.startmb); + lr_ignore_rest (ldfile, 0); + } + else + { + if (runp != collate->current_section) + { + /* Remove the named section from the old place and + insert it in the new one. */ + prevp->next = runp->next; - now = lr_token (ldfile, charmap, NULL); - nowtok = now->tok; + runp->next = collate->current_section->next; + collate->current_section->next = runp; + collate->current_section = runp; + } - /* If we see `copy' now we are almost done. */ - if (nowtok == tok_copy) - { - handle_copy (ldfile, charmap, repertoire, result, tok_lc_collate, - LC_COLLATE, "LC_COLLATE", ignore_content); - did_copy = 1; - } + /* Process the rest of the line which might change + the collation rules. */ + arg = lr_token (ldfile, charmap, repertoire); + if (arg->tok != tok_eof && arg->tok != tok_eol) + read_directions (ldfile, arg, charmap, repertoire, + collate); + } + break; + } + else if (was_ellipsis != tok_none) + { + /* Using the information in the `ellipsis_weight' + element and this and the last value we have to handle + the ellipsis now. */ + assert (state == 1); - /* Prepare the data structures. */ - collate_startup (ldfile, result, charmap, ignore_content); - collate = result->categories[LC_COLLATE].collate; + handle_ellipsis (ldfile, arg, was_ellipsis, charmap, repertoire, + collate); - while (1) - { - /* Of course we don't proceed beyond the end of file. */ - if (nowtok == tok_eof) - break; + /* Remember that we processed the ellipsis. */ + was_ellipsis = tok_none; - /* Ignore empty lines. */ - if (nowtok == tok_eol) - { - now = lr_token (ldfile, charmap, NULL); - nowtok = now->tok; - continue; - } + /* And don't add the value a second time. */ + break; + } - switch (nowtok) - { - case tok_coll_weight_max: - if (did_copy) - goto err_label; - /* The rest of the line must be a single integer value. */ - now = lr_token (ldfile, charmap, NULL); - if (now->tok != tok_number) - goto err_label; - /* We simply forget about the value we just read, the implementation - has no fixed limits. */ - lr_ignore_rest (ldfile, 1); + /* Now insert in the new place. */ + insert_value (ldfile, arg, charmap, repertoire, collate); break; - case tok_script: - if (did_copy) - goto err_label; - /* We expect the name of the script in brackets. */ - now = lr_token (ldfile, charmap, NULL); - if (now->tok != tok_bsymbol && now->tok != tok_ucs4) - goto err_label; - if (now->tok != tok_bsymbol) + case tok_undefined: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) { - lr_error (ldfile, _("\ -script name `%s' must not duplicate any known name"), - tok->val.str.startmb); lr_ignore_rest (ldfile, 0); break; } - collate->scripts = xmalloc (collate->scripts, - (collate->nscripts - * sizeof (const char *))); - collate->scripts[collate->nscripts++] = tok->val.str.startmb; - lr_ignore_rest (ldfile, 1); - break; - case tok_collating_element: - if (did_copy) - goto err_label; - /* Get the first argument, a symbol in brackets. */ - now = lr_token (ldfile, charmap, NULL); - if (now->tok != tok_bsymbol) + if (state != 1) goto err_label; - /* Test it. */ - if (collate_element_to (ldfile, collate, now, charmap, repertoire)) + + if (was_ellipsis != tok_none) + { + lr_error (ldfile, + _("%s: cannot have `%s' as end of ellipsis range"), + "LC_COLLATE", "UNDEFINED"); + + unlink_element (collate); + was_ellipsis = tok_none; + } + + /* See whether UNDEFINED already appeared somewhere. */ + if (collate->undefined.next != NULL + || (collate->cursor != NULL + && collate->undefined.next == collate->cursor)) { - /* An error occurred. */ + lr_error (ldfile, + _("%s: order for `%.*s' already defined at %s:%zu"), + "LC_COLLATE", 9, "UNDEFINED", collate->undefined.file, + collate->undefined.line); lr_ignore_rest (ldfile, 0); - break; } - save_str = tok->val.str.startmb; - /* Next comes `from'. */ - now = lr_token (ldfile, charmap, NULL); - if (now->tok != tok_from) - goto err_label; - /* Now comes a string. */ - now = lr_token (ldfile, charmap, repertoire); - if (now->tok != tok_string) - goto err_label; - collate_element_from (ldfile, collate, save_str, now, charmap, - repertoire); - /* The rest of the line should be empty. */ - lr_ignore_rest (ldfile, 1); + else + /* Parse the weights. */ + insert_weights (ldfile, &collate->undefined, charmap, + repertoire, collate, tok_none); break; - case tok_collating_symbol: - if (did_copy) - goto err_label; - /* Get the argument, a single symbol in brackets. */ - now = lr_token (ldfile, charmap, NULL); - if (now->tok != tok_bsymbol) + case tok_ellipsis2: + case tok_ellipsis3: + case tok_ellipsis4: + /* This is the symbolic (decimal or hexadecimal) or absolute + ellipsis. */ + if (was_ellipsis != tok_none) goto err_label; - collate_symbol (ldfile, collate, now, charmap, repertoire); - break; - case tok_order_start: - if (did_copy) + if (state != 1 && state != 3) goto err_label; - /* We expect now a scripting symbol or start right away - with the order keywords. Or we have no argument at all - in which means `forward'. */ - now = lr_token (ldfile, charmap, NULL); - if (now->tok == tok_eol) - { - static enum coll_sort_rule default_rule = sort_forward; - /* Use a single `forward' rule. */ - collate->nrules = 1; - collate->rules = &default_rule; - } - else - { - /* XXX We don't recognize the ISO 14651 extensions yet. */ - uint32_t nrules = 0; - uint32_t nrules_max = 32; - enum coll_sort_rule *rules = alloca (nrules_max - * sizeof (*rules)); - int saw_semicolon = 0; - - memset (rules, '\0', nrules_max * sizeof (*rules)); - do - { - if (now->tok != tok_forward && now->tok != tok_backward - && now->tok != tok_position) - goto err_label; + was_ellipsis = nowtok; - if (saw_semicolon) - { - if (nrules == nrules_max) - { - newp = alloca (nrules_max * 2 * sizeof (*rules)); - rules = memcpy (newp, rules, - nrules_max * sizeof (*rules)); - memset (&rules[nrules_max], '\0', - nrules_max * sizeof (*rules)); - nrules_max *= 2; - } - ++nrules; - } + insert_weights (ldfile, &collate->ellipsis_weight, charmap, + repertoire, collate, nowtok); + break; - switch (now->tok) - { - case tok_forward: - if ((rules[nrules] & sort_backward) != 0) - { - lr_error (ldfile, _("\ -`forward' and `backward' order exclude each other")); - lr_ignore_rest (ldfile, 0); - goto error_sort; - } - rules[nrules] |= sort_forward; - break; - case tok_backward: - if ((rules[nrules] & sort_forward) != 0) - { - lr_error (ldfile, _("\ -`forward' and `backward' order exclude each other")); - lr_ignore_rest (ldfile, 0); - goto error_sort; - } - rules[nrules] |= sort_backward; - break; - case tok_position: - rules[nrules] |= tok_position; - break; - } + case tok_end: + /* Next we assume `LC_COLLATE'. */ + if (!ignore_content) + { + if (state == 0) + /* We must either see a copy statement or have + ordering values. */ + lr_error (ldfile, + _("%s: empty category description not allowed"), + "LC_COLLATE"); + else if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); - /* Get the next token. This is either the end of the line, - a comma or a semicolon. */ - now = lr_token (ldfile, charmap, NULL); - if (now->tok == tok_comma || now->tok == tok_semicolon) + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) { - saw_semicolon = now->tok == tok_semicolon; - now = lr_token (ldfile, charmap, NULL); + handle_ellipsis (ldfile, NULL, was_ellipsis, charmap, + repertoire, collate); + was_ellipsis = tok_none; } } - while (now->tok != tok_eol || now->tok != tok_eof); - - error_sort: - collate->nrules = nrules; - collate->rules = memcpy (xmalloc (nrules * sizeof (*rules)), - rules, nrules * sizeof (*rules)); + else if (state == 3) + error (0, 0, _("%s: missing `reorder-end' keyword"), + "LC_COLLATE"); + else if (state == 5) + error (0, 0, _("%s: missing `reorder-sections-end' keyword"), + "LC_COLLATE"); } + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE"); + else if (arg->tok != tok_lc_collate) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_collate); + return; - /* Now read the rules. */ - read_rules (ldfile, collate, charmap, repertoire); - break; - - case tok_reorder_after: - break; - - case tok_reorder_script_after: - break; - - default: - err_label: - if (now->tok != tok_eof) - SYNTAX_ERROR (_("syntax error in %s locale definition"), - "LC_COLLATE"); + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); } /* Prepare for the next round. */ @@ -3630,8 +3416,5 @@ script name `%s' must not duplicate any known name"), } /* When we come here we reached the end of the file. */ - lr_error (ldfile, _("premature end of file while reading category `%s'"), - "LC_COLLATE"); + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); } - -#endif