]> sourceware.org Git - glibc.git/blob - locale/programs/ld-collate.c
Wed May 22 22:10:01 1996 Roland McGrath <roland@delasyd.gnu.ai.mit.edu>
[glibc.git] / locale / programs / ld-collate.c
1 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <endian.h>
25 #include <errno.h>
26 #include <limits.h>
27 #include <locale.h>
28 #include <obstack.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <wchar.h>
32
33 #include "localeinfo.h"
34 #include "locales.h"
35 #include "simple-hash.h"
36 #include "stringtrans.h"
37
38 /* Uncomment the following line in the production version. */
39 /* #define NDEBUG 1 */
40 #include <assert.h>
41
42
43 #define MAX(a, b) ((a) > (b) ? (a) : (b))
44
45 #define SWAPU32(w) \
46 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
47
48
49 /* What kind of symbols get defined? */
50 enum coll_symbol
51 {
52 undefined,
53 ellipsis,
54 character,
55 element,
56 symbol
57 };
58
59
60 typedef struct patch_t
61 {
62 const char *fname;
63 size_t lineno;
64 const char *token;
65 union
66 {
67 unsigned int *pos;
68 size_t idx;
69 } where;
70 struct patch_t *next;
71 } patch_t;
72
73
74 typedef struct element_t
75 {
76 const wchar_t *name;
77 unsigned int this_weight;
78
79 struct element_t *next;
80
81 unsigned int *ordering;
82 size_t ordering_len;
83 } element_t;
84
85
86 /* The real definition of the struct for the LC_CTYPE locale. */
87 struct locale_collate_t
88 {
89 /* Collate symbol table. Simple mapping to number. */
90 hash_table symbols;
91
92 /* The collation elements. */
93 hash_table elements;
94 struct obstack element_mem;
95
96 /* The result table. */
97 hash_table result;
98
99 /* Sorting rules given in order_start line. */
100 int nrules;
101 int nrules_max;
102 enum coll_sort_rule *rules;
103
104 /* Used while recognizing symbol composed of multiple tokens
105 (collating-element). */
106 const char *combine_token;
107 size_t combine_token_len;
108
109 /* How many sorting order specifications so far. */
110 unsigned int order_cnt;
111
112 /* Was lastline ellipsis? */
113 int was_ellipsis;
114 /* Value of last entry if was character. */
115 wchar_t last_char;
116 /* Current element. */
117 element_t *current_element;
118 /* What kind of symbol is current element. */
119 enum coll_symbol kind;
120
121 /* While collecting the weigths we need some temporary space. */
122 unsigned int current_order;
123 int *weight_cnt;
124 int weight_idx;
125 unsigned int *weight;
126 int nweight;
127 int nweight_max;
128
129 /* Patch lists. */
130 patch_t *current_patch;
131 patch_t *all_patches;
132
133 /* Room for the UNDEFINED information. */
134 element_t undefined;
135 unsigned int undefined_len;
136 };
137
138
139 /* Be verbose? Defined in localedef.c. */
140 extern int verbose;
141
142
143 void *xmalloc (size_t __n);
144 void *xrealloc (void *__p, size_t __n);
145
146
147 #define obstack_chunk_alloc xmalloc
148 #define obstack_chunk_free free
149
150
151 void
152 collate_startup (struct linereader *lr, struct localedef_t *locale,
153 struct charset_t *charset)
154 {
155 struct locale_collate_t *collate;
156
157 /* It is important that we always use UCS4 encoding for strings now. */
158 encoding_method = ENC_UCS4;
159
160 /* Allocate the needed room. */
161 locale->categories[LC_COLLATE].collate = collate =
162 (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t));
163
164 /* Allocate hash table for collating elements. */
165 if (init_hash (&collate->elements, 512))
166 error (4, 0, _("memory exhausted"));
167 collate->combine_token = NULL;
168 obstack_init (&collate->element_mem);
169
170 /* Allocate hash table for collating elements. */
171 if (init_hash (&collate->symbols, 64))
172 error (4, 0, _("memory exhausted"));
173
174 /* Allocate hash table for result. */
175 if (init_hash (&collate->result, 512))
176 error (4, 0, _("memory exhausted"));
177
178 collate->nrules = 0;
179 collate->nrules_max = 10;
180 collate->rules
181 = (enum coll_sort_rule *) xmalloc (collate->nrules_max
182 * sizeof (enum coll_sort_rule));
183
184 collate->order_cnt = 1; /* The smallest weight is 2. */
185
186 collate->was_ellipsis = 0;
187 collate->last_char = L'\0'; /* 0 because leading ellipsis is allowed. */
188
189 collate->all_patches = NULL;
190
191 /* This tells us no UNDEFINED entry was found until now. */
192 collate->undefined.this_weight = 0;
193
194 lr->translate_strings = 0;
195 }
196
197
198 void
199 collate_finish (struct localedef_t *locale, struct charset_t *charset)
200 {
201 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
202 patch_t *patch;
203 size_t cnt;
204
205 /* Patch the constructed table so that forward references are
206 correctly filled. */
207 for (patch = collate->all_patches; patch != NULL; patch = patch->next)
208 {
209 wchar_t wch;
210 size_t toklen = strlen (patch->token);
211 void *ptmp;
212 unsigned int value = 0;
213
214 wch = charset_find_value (charset, patch->token, toklen);
215 if (wch != ILLEGAL_CHAR_VALUE)
216 {
217 element_t *runp;
218
219 if (find_entry (&collate->result, &wch, sizeof (wchar_t),
220 (void *) &runp) < 0)
221 runp = NULL;
222 for (; runp != NULL; runp = runp->next)
223 if (runp->name[0] == wch && runp->name[1] == L'\0')
224 break;
225
226 value = runp == NULL ? 0 : runp->this_weight;
227 }
228 else if (find_entry (&collate->elements, patch->token, toklen, &ptmp)
229 >= 0)
230 {
231 value = ((element_t *) ptmp)->this_weight;
232 }
233 else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp)
234 >= 0)
235 {
236 value = (unsigned long int) ptmp;
237 }
238 else
239 value = 0;
240
241 if (value == 0)
242 error_at_line (0, 0, patch->fname, patch->lineno,
243 _("no weight defined for symbol `%s'"), patch->token);
244 else
245 *patch->where.pos = value;
246 }
247
248 /* If no definition for UNDEFINED is given, all characters in the
249 given charset must be specified. */
250 if (collate->undefined.ordering == NULL)
251 {
252 /**************************************************************\
253 |* XXX We should test whether really an unspecified character *|
254 |* exists before giving the message. *|
255 \**************************************************************/
256 u_int32_t weight;
257
258 error (0, 0, _("no definition of `UNDEFINED'"));
259
260 collate->undefined.ordering_len = collate->nrules;
261 weight = ++collate->order_cnt;
262
263 for (cnt = 0; cnt < collate->nrules; ++cnt)
264 {
265 u_int32_t one = 1;
266 obstack_grow (&collate->element_mem, &one, sizeof (one));
267 }
268
269 for (cnt = 0; cnt < collate->nrules; ++cnt)
270 obstack_grow (&collate->element_mem, &weight, sizeof (weight));
271
272 collate->undefined.ordering = obstack_finish (&collate->element_mem);
273 }
274
275 collate->undefined_len = 2; /* For the name: 1 x wchar_t + L'\0'. */
276 for (cnt = 0; cnt < collate->nrules; ++cnt)
277 collate->undefined_len += 1 + collate->undefined.ordering[cnt];
278
279 /* Collating symbols are not used anymore. */
280 (void) delete_hash (&collate->symbols);
281 }
282
283
284
285 void
286 collate_output (struct localedef_t *locale, const char *output_path)
287 {
288 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
289 u_int32_t table_size, table_best, level_best, sum_best;
290 void *last;
291 element_t *pelem;
292 wchar_t *name;
293 size_t len;
294 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
295 struct iovec iov[2 + nelems];
296 struct locale_file data;
297 u_int32_t idx[nelems];
298 struct obstack non_simple;
299 size_t cnt, entry_size;
300 u_int32_t undefined_offset = UINT_MAX;
301 u_int32_t *table, *extra, *table2, *extra2;
302 size_t extra_len;
303
304 sum_best = UINT_MAX;
305 table_best = 0xffff;
306 level_best = 0xffff;
307
308 /* Compute table size. */
309 fputs (_("\
310 Computing table size for collation information might take a while..."),
311 stderr);
312 for (table_size = 256; table_size < sum_best; ++table_size)
313 {
314 size_t hits[table_size];
315 unsigned int worst = 1;
316 size_t cnt;
317
318 last = NULL;
319
320 for (cnt = 0; cnt < 256; ++cnt)
321 hits[cnt] = 1;
322 memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t));
323
324 while (iterate_table (&collate->result, &last, (const void **) &name,
325 &len, (void **) &pelem) >= 0)
326 if (pelem->ordering != NULL && pelem->name[0] > 0xff)
327 if (++hits[(unsigned int) pelem->name[0] % table_size] > worst)
328 {
329 worst = hits[(unsigned int) pelem->name[0] % table_size];
330 if (table_size * worst > sum_best)
331 break;
332 }
333
334 if (table_size * worst < sum_best)
335 {
336 sum_best = table_size * worst;
337 table_best = table_size;
338 level_best = worst;
339 }
340 }
341 assert (table_best != 0xffff || level_best != 0xffff);
342 fputs (_(" done\n"), stderr);
343
344 obstack_init (&non_simple);
345
346 data.magic = LIMAGIC (LC_COLLATE);
347 data.n = nelems;
348 iov[0].iov_base = (void *) &data;
349 iov[0].iov_len = sizeof (data);
350
351 iov[1].iov_base = (void *) idx;
352 iov[1].iov_len = sizeof (idx);
353
354 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules;
355 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (u_int32_t);
356
357 table = (u_int32_t *) alloca (collate->nrules * sizeof (u_int32_t));
358 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table;
359 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len
360 = collate->nrules * sizeof (u_int32_t);
361 /* Another trick here. Describing the collation method needs only a
362 few bits (3, to be exact). But the binary file should be
363 accessible by maschines with both endianesses and so we store both
364 information in the same word. */
365 for (cnt = 0; cnt < collate->nrules; ++cnt)
366 table[cnt] = collate->rules[cnt] | SWAPU32 (collate->rules[cnt]);
367
368 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best;
369 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (u_int32_t);
370
371 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best;
372 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len
373 = sizeof (u_int32_t);
374
375 entry_size = 1 + MAX (collate->nrules, 2);
376
377 table = (u_int32_t *) alloca (table_best * level_best * entry_size
378 * sizeof (table[0]));
379 memset (table, '\0', table_best * level_best * entry_size
380 * sizeof (table[0]));
381
382
383 /* Macros for inserting in output table. */
384 #define ADD_VALUE(expr) \
385 do { \
386 u_int32_t to_write = (u_int32_t) expr; \
387 obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
388 } while (0)
389
390 #define ADD_ELEMENT(pelem, len) \
391 do { \
392 size_t cnt, idx; \
393 \
394 ADD_VALUE (len); \
395 \
396 wlen = wcslen (pelem->name); \
397 obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
398 \
399 idx = collate->nrules; \
400 for (cnt = 0; cnt < collate->nrules; ++cnt) \
401 { \
402 size_t disp; \
403 \
404 ADD_VALUE (pelem->ordering[cnt]); \
405 for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
406 ADD_VALUE (pelem->ordering[idx++]); \
407 } \
408 } while (0)
409
410 #define ADD_FORWARD(pelem) \
411 do { \
412 /* We leave a reference in the main table and put all \
413 information in the table for the extended entries. */ \
414 element_t *runp; \
415 element_t *has_simple = NULL; \
416 size_t wlen; \
417 \
418 table[(level * table_best + slot) * entry_size + 1] \
419 = FORWARD_CHAR; \
420 table[(level * table_best + slot) * entry_size + 2] \
421 = obstack_object_size (&non_simple) / sizeof (u_int32_t); \
422 \
423 /* Here we have to construct the non-simple table entry. First \
424 compute the total length of this entry. */ \
425 for (runp = (pelem); runp != NULL; runp = runp->next) \
426 if (runp->ordering != NULL) \
427 { \
428 u_int32_t value; \
429 size_t cnt; \
430 \
431 value = 1 + wcslen (runp->name) + 1; \
432 \
433 for (cnt = 0; cnt < collate->nrules; ++cnt) \
434 /* We have to take care for entries without ordering \
435 information. While reading them they get inserted in the \
436 table and later not removed when something goes wrong with \
437 reading its weights. */ \
438 { \
439 value += 1 + runp->ordering[cnt]; \
440 \
441 if (runp->name[1] == L'\0') \
442 has_simple = runp; \
443 } \
444 \
445 ADD_ELEMENT (runp, value); \
446 } \
447 \
448 if (has_simple == NULL) \
449 { \
450 size_t idx, cnt; \
451 \
452 ADD_VALUE (collate->undefined_len + 1); \
453 \
454 /* Add the name. */ \
455 ADD_VALUE ((pelem)->name[0]); \
456 ADD_VALUE (0); \
457 \
458 idx = collate->nrules; \
459 for (cnt = 0; cnt < collate->nrules; ++cnt) \
460 { \
461 size_t disp; \
462 \
463 ADD_VALUE (collate->undefined.ordering[cnt]); \
464 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
465 { \
466 if (collate->undefined.ordering[idx] == ELLIPSIS_CHAR) \
467 ADD_VALUE ((pelem)->name[0]); \
468 else \
469 ADD_VALUE (collate->undefined.ordering[idx++]); \
470 ++idx; \
471 } \
472 } \
473 } \
474 } while (0)
475
476
477
478 /* Fill the table now. First we look for all the characters which
479 fit into one single byte. This speeds up the 8-bit string
480 functions. */
481 last = NULL;
482 while (iterate_table (&collate->result, &last, (const void **) &name,
483 &len, (void **) &pelem) >= 0)
484 if (pelem->name[0] <= 0xff)
485 {
486 /* We have a single byte name. Now we must distinguish
487 between entries in simple form (i.e., only one value per
488 weight and no collation element starting with the same
489 character) and those which are not. */
490 size_t slot = ((size_t) pelem->name[0]);
491 const size_t level = 0;
492
493 table[slot * entry_size] = pelem->name[0];
494
495 if (pelem->name[1] == L'\0' && pelem->next == NULL
496 && pelem->ordering_len == collate->nrules)
497 {
498 /* Yes, we have a simple one. Lucky us. */
499 size_t cnt;
500
501 for (cnt = 0; cnt < collate->nrules; ++cnt)
502 table[slot * entry_size + 1 + cnt]
503 = pelem->ordering[collate->nrules + cnt];
504 }
505 else
506 ADD_FORWARD (pelem);
507 }
508
509 /* Now check for missing single byte entries. If one exist we fill
510 with the UNDEFINED entry. */
511 for (cnt = 0; cnt < 256; ++cnt)
512 /* The first weight is never 0 for existing entries. */
513 if (table[cnt * entry_size + 1] == 0)
514 {
515 /* We have to fill in the information from the UNDEFINED
516 entry. */
517 table[cnt * entry_size] = (u_int32_t) cnt;
518
519 if (collate->undefined.ordering_len == collate->nrules)
520 {
521 size_t inner;
522
523 for (inner = 0; inner < collate->nrules; ++inner)
524 if (collate->undefined.ordering[collate->nrules + inner]
525 == ELLIPSIS_CHAR)
526 table[cnt * entry_size + 1 + inner] = cnt;
527 else
528 table[cnt * entry_size + 1 + inner]
529 = collate->undefined.ordering[collate->nrules + inner];
530 }
531 else
532 {
533 if (undefined_offset != UINT_MAX)
534 {
535 table[cnt * entry_size + 1] = FORWARD_CHAR;
536 table[cnt * entry_size + 2] = undefined_offset;
537 }
538 else
539 {
540 const size_t slot = cnt;
541 const size_t level = 0;
542
543 ADD_FORWARD (&collate->undefined);
544 undefined_offset = table[cnt * entry_size + 2];
545 }
546 }
547 }
548
549 /* Now we are ready for inserting the whole rest. */
550 last = NULL;
551 while (iterate_table (&collate->result, &last, (const void **) &name,
552 &len, (void **) &pelem) >= 0)
553 if (pelem->name[0] > 0xff)
554 {
555 /* Find the position. */
556 size_t slot = ((size_t) pelem->name[0]) % table_best;
557 size_t level = 0;
558
559 while (table[(level * table_best + slot) * entry_size + 1] != 0)
560 ++level;
561 assert (level < level_best);
562
563 if (pelem->name[1] == L'\0' && pelem->next == NULL
564 && pelem->ordering_len == collate->nrules)
565 {
566 /* Again a simple entry. */
567 size_t inner;
568
569 for (inner = 0; inner < collate->nrules; ++inner)
570 table[(level * table_best + slot) * entry_size + 1 + inner]
571 = pelem->ordering[collate->nrules + inner];
572 }
573 else
574 ADD_FORWARD (pelem);
575 }
576
577 /* Add the UNDEFINED entry. */
578 {
579 /* Here we have to construct the non-simple table entry. */
580 size_t idx, cnt;
581
582 undefined_offset = obstack_object_size (&non_simple);
583
584 idx = collate->nrules;
585 for (cnt = 0; cnt < collate->nrules; ++cnt)
586 {
587 size_t disp;
588
589 ADD_VALUE (collate->undefined.ordering[cnt]);
590 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)
591 ADD_VALUE (collate->undefined.ordering[idx++]);
592 }
593 }
594
595 /* Finish the extra block. */
596 extra_len = obstack_object_size (&non_simple);
597 extra = (u_int32_t *) obstack_finish (&non_simple);
598 assert ((extra_len % sizeof (u_int32_t)) == 0);
599
600 /* Now we have to build the two array for the other byte ordering. */
601 table2 = (u_int32_t *) alloca (table_best * level_best * entry_size
602 * sizeof (table[0]));
603 extra2 = (u_int32_t *) alloca (extra_len);
604
605 for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt)
606 table2[cnt] = SWAPU32 (table[cnt]);
607
608 for (cnt = 0; cnt < extra_len / sizeof (u_int32_t); ++cnt)
609 extra2[cnt] = SWAPU32 (extra2[cnt]);
610
611 /* Store table adresses and lengths. */
612 #if __BYTE_ORDER == __BIG_ENDIAN
613 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table;
614 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
615 = table_best * level_best * entry_size * sizeof (table[0]);
616
617 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2;
618 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
619 = table_best * level_best * entry_size * sizeof (table[0]);
620
621 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra;
622 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
623
624 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2;
625 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
626 #else
627 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2;
628 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
629 = table_best * level_best * entry_size * sizeof (table[0]);
630
631 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table;
632 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
633 = table_best * level_best * entry_size * sizeof (table[0]);
634
635 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2;
636 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
637
638 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra;
639 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
640 #endif
641
642 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset;
643 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (u_int32_t);
644
645 /* Update idx array. */
646 idx[0] = iov[0].iov_len + iov[1].iov_len;
647 for (cnt = 1; cnt < nelems; ++cnt)
648 idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len;
649
650 write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov);
651 }
652
653
654 void
655 collate_element_to (struct linereader *lr, struct localedef_t *locale,
656 struct token *code, struct charset_t *charset)
657 {
658 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
659 unsigned int value;
660 void *not_used;
661
662 if (collate->combine_token != NULL)
663 {
664 free ((void *) collate->combine_token);
665 collate->combine_token = NULL;
666 }
667
668 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
669 if (value != ILLEGAL_CHAR_VALUE)
670 {
671 lr_error (lr, _("symbol for multicharacter collating element "
672 "`%.*s' duplicates symbolic name in charset"),
673 code->val.str.len, code->val.str.start);
674 return;
675 }
676
677 if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
678 &not_used) >= 0)
679 {
680 lr_error (lr, _("symbol for multicharacter collating element "
681 "`%.*s' duplicates other element definition"),
682 code->val.str.len, code->val.str.start);
683 return;
684 }
685
686 if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
687 &not_used) >= 0)
688 {
689 lr_error (lr, _("symbol for multicharacter collating element "
690 "`%.*s' duplicates symbol definition"),
691 code->val.str.len, code->val.str.start);
692 return;
693 }
694
695 collate->combine_token = code->val.str.start;
696 collate->combine_token_len = code->val.str.len;
697 }
698
699
700 void
701 collate_element_from (struct linereader *lr, struct localedef_t *locale,
702 struct token *code, struct charset_t *charset)
703 {
704 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
705 element_t *elemp, *runp;
706
707 /* CODE is a string. */
708 elemp = (element_t *) obstack_alloc (&collate->element_mem,
709 sizeof (element_t));
710
711 /* We have to translate the string. It may contain <...> character
712 names. */
713 elemp->name = (wchar_t *) translate_string (code->val.str.start, charset);
714 elemp->this_weight = 0;
715 elemp->ordering = NULL;
716 elemp->ordering_len = 0;
717
718 free (code->val.str.start);
719
720 if (elemp->name == NULL)
721 {
722 /* At least one character in the string is not defined. We simply
723 do nothing. */
724 if (verbose)
725 lr_error (lr, _("\
726 `from' string in collation element declaration contains unknown character"));
727 return;
728 }
729
730 if (elemp->name[0] == L'\0' || elemp->name[1] == L'\0')
731 {
732 lr_error (lr, _("illegal colltion element"));
733 return;
734 }
735
736 /* The entries in the linked lists of RESULT are sorting in
737 descending order. The order is important for the `strcoll' and
738 `wcscoll' functions. */
739 if (find_entry (&collate->result, elemp->name, sizeof (wchar_t),
740 (void *) &runp) >= 0)
741 {
742 /* We already have an entry with this key. Check whether it is
743 identical. */
744 element_t *prevp = NULL;
745 int cmpres;
746
747 do
748 {
749 cmpres = wcscmp (elemp->name, runp->name);
750 if (cmpres <= 0)
751 break;
752 prevp = runp;
753 }
754 while ((runp = runp->next) != NULL);
755
756 if (cmpres == 0)
757 lr_error (lr, _("duplicate collating element definition"));
758 else
759 {
760 elemp->next = runp;
761 if (prevp == NULL)
762 {
763 if (set_entry (&collate->result, elemp->name, sizeof (wchar_t),
764 elemp) < 0)
765 error (EXIT_FAILURE, 0,
766 _("\
767 error while inserting collation element into hash table"));
768 }
769 else
770 prevp->next = elemp;
771 }
772 }
773 else
774 {
775 elemp->next = NULL;
776 if (insert_entry (&collate->result, elemp->name, sizeof (wchar_t), elemp)
777 < 0)
778 error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
779 }
780
781 if (insert_entry (&collate->elements, collate->combine_token,
782 collate->combine_token_len, (void *) elemp) < 0)
783 lr_error (lr, _("cannot insert new collating symbol definition: %s"),
784 strerror (errno));
785 }
786
787
788 void
789 collate_symbol (struct linereader *lr, struct localedef_t *locale,
790 struct token *code, struct charset_t *charset)
791 {
792 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
793 wchar_t value;
794 void *not_used;
795
796 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
797 if (value != ILLEGAL_CHAR_VALUE)
798 {
799 lr_error (lr, _("symbol for multicharacter collating element "
800 "`%.*s' duplicates symbolic name in charset"),
801 code->val.str.len, code->val.str.start);
802 return;
803 }
804
805 if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
806 &not_used) >= 0)
807 {
808 lr_error (lr, _("symbol for multicharacter collating element "
809 "`%.*s' duplicates element definition"),
810 code->val.str.len, code->val.str.start);
811 return;
812 }
813
814 if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len,
815 &not_used) >= 0)
816 {
817 lr_error (lr, _("symbol for multicharacter collating element "
818 "`%.*s' duplicates other symbol definition"),
819 code->val.str.len, code->val.str.start);
820 return;
821 }
822
823 if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len,
824 (void *) 0) < 0)
825 lr_error (lr, _("cannot insert new collating symbol definition: %s"),
826 strerror (errno));
827 }
828
829
830 void
831 collate_new_order (struct linereader *lr, struct localedef_t *locale,
832 enum coll_sort_rule sort_rule)
833 {
834 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
835
836 if (collate->nrules >= collate->nrules_max)
837 {
838 collate->nrules_max *= 2;
839 collate->rules
840 = (enum coll_sort_rule *) xrealloc (collate->rules,
841 collate->nrules_max
842 * sizeof (enum coll_sort_rule));
843 }
844
845 collate->rules[collate->nrules++] = sort_rule;
846 }
847
848
849 void
850 collate_build_arrays (struct linereader *lr, struct localedef_t *locale)
851 {
852 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
853
854 collate->rules
855 = (enum coll_sort_rule *) xrealloc (collate->rules,
856 collate->nrules
857 * sizeof (enum coll_sort_rule));
858
859 /* Allocate arrays for temporary weights. */
860 collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int));
861
862 /* Choose arbitrary start value for table size. */
863 collate->nweight_max = 5 * collate->nrules;
864 collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int));
865 }
866
867
868 int
869 collate_order_elem (struct linereader *lr, struct localedef_t *locale,
870 struct token *code, struct charset_t *charset)
871 {
872 const wchar_t zero = L'\0';
873 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
874 int result = 0;
875 wchar_t value;
876 void *tmp;
877 int i;
878
879 switch (code->tok)
880 {
881 case tok_bsymbol:
882 /* We have a string to find in one of the three hashing tables. */
883 value = charset_find_value (charset, code->val.str.start,
884 code->val.str.len);
885 if (value != ILLEGAL_CHAR_VALUE)
886 {
887 element_t *lastp, *firstp;
888
889 collate->kind = character;
890
891 if (find_entry (&collate->result, &value, sizeof (wchar_t),
892 (void *) &firstp) < 0)
893 firstp = lastp = NULL;
894 else
895 {
896 /* The entry for the simple character is always found at
897 the end. */
898 lastp = firstp;
899 while (lastp->next != NULL)
900 lastp = lastp->next;
901
902 if (lastp->name[0] == value && lastp->name[1] == L'\0')
903 {
904 lr_error (lr, _("duplicate definition for character `%.*s'"),
905 code->val.str.len, code->val.str.start);
906 lr_ignore_rest (lr, 0);
907 result = -1;
908 break;
909 }
910 }
911
912 collate->current_element
913 = (element_t *) obstack_alloc (&collate->element_mem,
914 sizeof (element_t));
915
916 obstack_grow (&collate->element_mem, &value, sizeof (value));
917 obstack_grow (&collate->element_mem, &zero, sizeof (zero));
918
919 collate->current_element->name =
920 (const wchar_t *) obstack_finish (&collate->element_mem);
921
922 collate->current_element->this_weight = ++collate->order_cnt;
923
924 collate->current_element->next = NULL;
925
926 if (firstp == NULL)
927 {
928 if (insert_entry (&collate->result, &value, sizeof (wchar_t),
929 (void *) collate->current_element) < 0)
930 {
931 lr_error (lr, _("cannot insert collation element `%.*s'"),
932 code->val.str.len, code->val.str.start);
933 exit (4);
934 }
935 }
936 else
937 lastp->next = collate->current_element;
938 }
939 else if (find_entry (&collate->elements, code->val.str.start,
940 code->val.str.len, &tmp) >= 0)
941 {
942 collate->current_element = (element_t *) tmp;
943
944 if (collate->current_element->this_weight != 0)
945 {
946 lr_error (lr, _("\
947 collation element `%.*s' appears more than once: ignore line"),
948 code->val.str.len, code->val.str.start);
949 lr_ignore_rest (lr, 0);
950 result = -1;
951 break;
952 }
953
954 collate->kind = element;
955 collate->current_element->this_weight = ++collate->order_cnt;
956 }
957 else if (find_entry (&collate->symbols, code->val.str.start,
958 code->val.str.len, &tmp) >= 0)
959 {
960 unsigned int order = ++collate->order_cnt;
961
962 if ((unsigned int) tmp != 0)
963 {
964 lr_error (lr, _("\
965 collation symbol `.*s' appears more than once: ignore line"),
966 code->val.str.len, code->val.str.start);
967 lr_ignore_rest (lr, 0);
968 result = -1;
969 break;
970 }
971
972 collate->kind = symbol;
973
974 if (set_entry (&collate->symbols, code->val.str.start,
975 code->val.str.len, (void *) order) < 0)
976 {
977 lr_error (lr, _("cannot process order specification"));
978 exit (4);
979 }
980 }
981 else
982 {
983 if (verbose)
984 lr_error (lr, _("unknown symbol `%.*s': line ignored"),
985 code->val.str.len, code->val.str.start);
986 lr_ignore_rest (lr, 0);
987
988 result = -1;
989 }
990 break;
991
992 case tok_undefined:
993 collate->kind = undefined;
994 collate->current_element = &collate->undefined;
995 break;
996
997 case tok_ellipsis:
998 if (collate->was_ellipsis)
999 {
1000 lr_error (lr, _("\
1001 two lines in a row containing `...' are not allowed"));
1002 result = -1;
1003 }
1004 else if (collate->kind != character)
1005 {
1006 /* An ellipsis requires the previous line to be an
1007 character definition. */
1008 lr_error (lr, _("\
1009 line before ellipsis does not contain definition for character constant"));
1010 lr_ignore_rest (lr, 0);
1011 result = -1;
1012 }
1013 else
1014 collate->kind = ellipsis;
1015 break;
1016
1017 default:
1018 assert (! "illegal token in `collate_order_elem'");
1019 }
1020
1021 /* Now it's time to handle the ellipsis in the previous line. We do
1022 this only when the last line contained an definition for an
1023 character, the current line also defines an character, the
1024 character code for the later is bigger than the former. */
1025 if (collate->was_ellipsis)
1026 {
1027 if (collate->kind != character)
1028 {
1029 lr_error (lr, _("\
1030 line after ellipsis must contain character definition"));
1031 lr_ignore_rest (lr, 0);
1032 result = -1;
1033 }
1034 else if (collate->last_char > value)
1035 {
1036 lr_error (lr, _("end point of ellipsis range is bigger then start"));
1037 lr_ignore_rest (lr, 0);
1038 result = -1;
1039 }
1040 else
1041 {
1042 /* We can fill the arrays with the information we need. */
1043 wchar_t name[2];
1044 unsigned int *data;
1045 size_t *ptr;
1046 size_t cnt;
1047
1048 name[0] = collate->last_char + 1;
1049 name[1] = L'\0';
1050
1051 data = (unsigned int *) alloca ((collate->nrules + collate->nweight)
1052 * sizeof (unsigned int));
1053 ptr = (size_t *) alloca (collate->nrules * sizeof (size_t));
1054
1055 if (data == NULL || ptr == NULL)
1056 error (4, 0, _("memory exhausted"));
1057
1058 /* Prepare data. Because the characters covered by an
1059 ellipsis all have equal values we prepare the data once
1060 and only change the variable number (if there are any).
1061 PTR[...] will point to the entries which will have to be
1062 fixed during the output loop. */
1063 for (cnt = 0; cnt < collate->nrules; ++cnt)
1064 {
1065 data[cnt] = collate->weight_cnt[cnt];
1066 ptr[cnt] = (cnt == 0
1067 ? collate->nweight
1068 : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]);
1069 }
1070
1071 for (cnt = 0; cnt < collate->nweight; ++cnt)
1072 data[collate->nrules + cnt] = collate->weight[cnt];
1073
1074 for (cnt = 0; cnt < collate->nrules; ++cnt)
1075 if (data[ptr[cnt]] != ELLIPSIS_CHAR)
1076 ptr[cnt] = 0;
1077
1078 while (name[0] <= value)
1079 {
1080 element_t *pelem;
1081
1082 pelem = (element_t *) obstack_alloc (&collate->element_mem,
1083 sizeof (element_t));
1084 if (pelem == NULL)
1085 error (4, 0, _("memory exhausted"));
1086
1087 pelem->name
1088 = (const wchar_t *) obstack_copy (&collate->element_mem,
1089 name, 2 * sizeof (wchar_t));
1090 pelem->this_weight = ++collate->order_cnt;
1091
1092 pelem->ordering_len = collate->nweight;
1093 pelem->ordering
1094 = (unsigned int *) obstack_copy (&collate->element_mem, data,
1095 (collate->nrules
1096 * pelem->ordering_len)
1097 * sizeof (unsigned int));
1098
1099 /* `...' weights need to be adjusted. */
1100 for (cnt = 0; cnt < collate->nrules; ++cnt)
1101 if (ptr[cnt] != 0)
1102 pelem->ordering[ptr[cnt]] = pelem->this_weight;
1103
1104 /* Insert new entry into result table. */
1105 if (find_entry (&collate->result, name, sizeof (wchar_t),
1106 (void *) &pelem->next) >= 0)
1107 {
1108 if (set_entry (&collate->result, name, sizeof (wchar_t),
1109 (void *) pelem->next) < 0)
1110 error (4, 0, _("cannot insert into result table"));
1111 }
1112 else
1113 if (insert_entry (&collate->result, name, sizeof (wchar_t),
1114 (void *) pelem->next) < 0)
1115 error (4, 0, _("cannot insert into result table"));
1116
1117 /* Increment counter. */
1118 ++name[0];
1119 }
1120 }
1121 }
1122
1123 /* Reset counters for weights. */
1124 collate->weight_idx = 0;
1125 collate->nweight = 0;
1126 for (i = 0; i < collate->nrules; ++i)
1127 collate->weight_cnt[i] = 0;
1128 collate->current_patch = NULL;
1129
1130 return result;
1131 }
1132
1133
1134 int
1135 collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
1136 struct token *code, struct charset_t *charset)
1137 {
1138 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1139 unsigned int here_weight;
1140 wchar_t value;
1141 void *tmp;
1142
1143 assert (code->tok == tok_bsymbol);
1144
1145 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
1146 if (value != ILLEGAL_CHAR_VALUE)
1147 {
1148 element_t *runp;
1149
1150 if (find_entry (&collate->result, &value, sizeof (wchar_t),
1151 (void *)&runp) < 0)
1152 runp = NULL;
1153
1154 while (runp != NULL
1155 && (runp->name[0] != value || runp->name[1] != L'\0'))
1156 runp = runp->next;
1157
1158 here_weight = runp == NULL ? 0 : runp->this_weight;
1159 }
1160 else if (find_entry (&collate->elements, code->val.str.start,
1161 code->val.str.len, &tmp) >= 0)
1162 {
1163 element_t *runp = (element_t *) tmp;
1164
1165 here_weight = runp->this_weight;
1166 }
1167 else if (find_entry (&collate->symbols, code->val.str.start,
1168 code->val.str.len, &tmp) >= 0)
1169 {
1170 here_weight = (unsigned int) tmp;
1171 }
1172 else
1173 {
1174 if (verbose)
1175 lr_error (lr, _("unknown symbol `%.*s': line ignored"),
1176 code->val.str.len, code->val.str.start);
1177 lr_ignore_rest (lr, 0);
1178 return -1;
1179 }
1180
1181 /* When we currently work on a collation symbol we do not expect any
1182 weight. */
1183 if (collate->kind == symbol)
1184 {
1185 lr_error (lr, _("\
1186 specification of sorting weight for collation symbol does not make sense"));
1187 lr_ignore_rest (lr, 0);
1188 return -1;
1189 }
1190
1191 /* Add to the current collection of weights. */
1192 if (collate->nweight >= collate->nweight_max)
1193 {
1194 collate->nweight_max *= 2;
1195 collate->weight = (unsigned int *) xrealloc (collate->weight,
1196 collate->nweight_max);
1197 }
1198
1199 /* If the weight is currently not known, we remember to patch the
1200 resulting tables. */
1201 if (here_weight == 0)
1202 {
1203 patch_t *newp;
1204
1205 newp = (patch_t *) obstack_alloc (&collate->element_mem,
1206 sizeof (patch_t));
1207 newp->fname = lr->fname;
1208 newp->lineno = lr->lineno;
1209 newp->token = (const char *) obstack_copy0 (&collate->element_mem,
1210 code->val.str.start,
1211 code->val.str.len);
1212 newp->where.idx = collate->nweight++;
1213 newp->next = collate->current_patch;
1214 collate->current_patch = newp;
1215 }
1216 else
1217 collate->weight[collate->nweight++] = here_weight;
1218 ++collate->weight_cnt[collate->weight_idx];
1219
1220 return 0;
1221 }
1222
1223
1224 int
1225 collate_next_weight (struct linereader *lr, struct localedef_t *locale)
1226 {
1227 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1228
1229 if (collate->kind == symbol)
1230 {
1231 lr_error (lr, _("\
1232 specification of sorting weight for collation symbol does not make sense"));
1233 lr_ignore_rest (lr, 0);
1234 return -1;
1235 }
1236
1237 ++collate->weight_idx;
1238 if (collate->weight_idx >= collate->nrules)
1239 {
1240 lr_error (lr, _("too many weights"));
1241 lr_ignore_rest (lr, 0);
1242 return -1;
1243 }
1244
1245 return 0;
1246 }
1247
1248
1249 int
1250 collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
1251 struct token *code, struct charset_t *charset)
1252 {
1253 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1254 unsigned int value = 0;
1255
1256 /* There current tokens can be `IGNORE', `...', or a string. */
1257 switch (code->tok)
1258 {
1259 case tok_ignore:
1260 /* This token is allowed in all situations. */
1261 value = IGNORE_CHAR;
1262 break;
1263
1264 case tok_ellipsis:
1265 /* The ellipsis is only allowed for the `...' or `UNDEFINED'
1266 entry. */
1267 if (collate->kind != ellipsis && collate->kind != undefined)
1268 {
1269 lr_error (lr, _("\
1270 `...' must only be used in `...' and `UNDEFINED' entries"));
1271 lr_ignore_rest (lr, 0);
1272 return -1;
1273 }
1274 value = ELLIPSIS_CHAR;
1275 break;
1276
1277 case tok_string:
1278 /* This can become difficult. We have to get the weights which
1279 correspind the the single wide chars in the string. But some
1280 of the `chars' might not be real characters, but collation
1281 elements or symbols. And so the string decoder might have
1282 signaled errors. The string at this point is not translated.
1283 I.e., all <...> sequences are still there. */
1284 {
1285 char *runp = code->val.str.start;
1286 void *tmp;
1287
1288 while (*runp != '\0')
1289 {
1290 char *startp = (char *) runp;
1291 char *putp = (char *) runp;
1292 wchar_t wch;
1293
1294 /* Lookup weight for char and store it. */
1295 if (*runp == '<')
1296 {
1297 while (*++runp != '\0' && *runp != '>')
1298 {
1299 if (*runp == lr->escape_char)
1300 if (*++runp == '\0')
1301 {
1302 lr_error (lr, _("unterminated weight name"));
1303 lr_ignore_rest (lr, 0);
1304 return -1;
1305 }
1306 *putp++ = *runp;
1307 }
1308 if (*runp == '>')
1309 ++runp;
1310
1311 if (putp == startp)
1312 {
1313 lr_error (lr, _("empty weight name: line ignored"));
1314 lr_ignore_rest (lr, 0);
1315 return -1;
1316 }
1317
1318 wch = charset_find_value (charset, startp, putp - startp);
1319 if (wch != ILLEGAL_CHAR_VALUE)
1320 {
1321 element_t *pelem;
1322
1323 if (find_entry (&collate->result, &wch, sizeof (wchar_t),
1324 (void *)&pelem) < 0)
1325 pelem = NULL;
1326
1327 while (pelem != NULL
1328 && (pelem->name[0] != wch
1329 || pelem->name[1] != L'\0'))
1330 pelem = pelem->next;
1331
1332 value = pelem == NULL ? 0 : pelem->this_weight;
1333 }
1334 else if (find_entry (&collate->elements, startp, putp - startp,
1335 &tmp) >= 0)
1336 {
1337 element_t *pelem = (element_t *) tmp;
1338
1339 value = pelem->this_weight;
1340 }
1341 else if (find_entry (&collate->symbols, startp, putp - startp,
1342 &tmp) >= 0)
1343 {
1344 value = (unsigned int) tmp;
1345 }
1346 else
1347 {
1348 if (verbose)
1349 lr_error (lr, _("unknown symbol `%.*s': line ignored"),
1350 putp - startp, startp);
1351 lr_ignore_rest (lr, 0);
1352 return -1;
1353 }
1354 }
1355 else
1356 {
1357 element_t *wp;
1358 wchar_t wch;
1359
1360 if (*runp == lr->escape_char)
1361 {
1362 static char digits[] = "0123456789abcdef";
1363 char *dp;
1364 int base;
1365
1366 ++runp;
1367 if (tolower (*runp) == 'x')
1368 {
1369 ++runp;
1370 base = 16;
1371 }
1372 else if (tolower (*runp) == 'd')
1373 {
1374 ++runp;
1375 base = 10;
1376 }
1377 else
1378 base = 8;
1379
1380 dp = strchr (digits, tolower (*runp));
1381 if (dp == NULL || (dp - digits) >= base)
1382 {
1383 illegal_char:
1384 lr_error (lr, _("\
1385 illegal character constant in string"));
1386 lr_ignore_rest (lr, 0);
1387 return -1;
1388 }
1389 wch = dp - digits;
1390 ++runp;
1391
1392 dp = strchr (digits, tolower (*runp));
1393 if (dp == NULL || (dp - digits) >= base)
1394 goto illegal_char;
1395 wch *= base;
1396 wch += dp - digits;
1397 ++runp;
1398
1399 if (base != 16)
1400 {
1401 dp = strchr (digits, tolower (*runp));
1402 if (dp != NULL && (dp - digits < base))
1403 {
1404 wch *= base;
1405 wch += dp - digits;
1406 ++runp;
1407 }
1408 }
1409 }
1410 else
1411 wch = (wchar_t) *runp++;
1412
1413 /* Lookup the weight for WCH. */
1414 if (find_entry (&collate->result, &wch, sizeof (wch),
1415 (void *)&wp) < 0)
1416 wp = NULL;
1417
1418 while (wp != NULL
1419 && (wp->name[0] != wch || wp->name[1] != L'\0'))
1420 wp = wp->next;
1421
1422 value = wp == NULL ? 0 : wp->this_weight;
1423
1424 /* To get the correct name for the error message. */
1425 putp = runp;
1426
1427 /**************************************************\
1428 |* I know here is something wrong. Characters in *|
1429 |* the string which are not in the <...> form *|
1430 |* cannot be declared forward for now!!! *|
1431 \**************************************************/
1432 }
1433
1434 /* Store in weight array. */
1435 if (collate->nweight >= collate->nweight_max)
1436 {
1437 collate->nweight_max *= 2;
1438 collate->weight
1439 = (unsigned int *) xrealloc (collate->weight,
1440 collate->nweight_max);
1441 }
1442
1443 if (value == 0)
1444 {
1445 patch_t *newp;
1446
1447 newp = (patch_t *) obstack_alloc (&collate->element_mem,
1448 sizeof (patch_t));
1449 newp->fname = lr->fname;
1450 newp->lineno = lr->lineno;
1451 newp->token
1452 = (const char *) obstack_copy0 (&collate->element_mem,
1453 startp, putp - startp);
1454 newp->where.idx = collate->nweight++;
1455 newp->next = collate->current_patch;
1456 collate->current_patch = newp;
1457 }
1458 else
1459 collate->weight[collate->nweight++] = value;
1460 ++collate->weight_cnt[collate->weight_idx];
1461 }
1462 }
1463 return 0;
1464
1465 default:
1466 assert (! "should not happen");
1467 }
1468
1469
1470 if (collate->nweight >= collate->nweight_max)
1471 {
1472 collate->nweight_max *= 2;
1473 collate->weight = (unsigned int *) xrealloc (collate->weight,
1474 collate->nweight_max);
1475 }
1476
1477 collate->weight[collate->nweight++] = value;
1478 ++collate->weight_cnt[collate->weight_idx];
1479
1480 return 0;
1481 }
1482
1483
1484 void
1485 collate_end_weight (struct linereader *lr, struct localedef_t *locale)
1486 {
1487 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1488 element_t *pelem = collate->current_element;
1489
1490 if (collate->kind == symbol)
1491 {
1492 /* We don't have to do anything. */
1493 collate->was_ellipsis = 0;
1494 return;
1495 }
1496
1497 if (collate->kind == ellipsis)
1498 {
1499 /* Before the next line is processed the ellipsis is handled. */
1500 collate->was_ellipsis = 1;
1501 return;
1502 }
1503
1504 assert (collate->kind == character || collate->kind == element
1505 || collate->kind == undefined);
1506
1507 /* Fill in the missing weights. */
1508 while (++collate->weight_idx < collate->nrules)
1509 {
1510 collate->weight[collate->nweight++] = pelem->this_weight;
1511 ++collate->weight_cnt[collate->weight_idx];
1512 }
1513
1514 /* Now we know how many ordering weights the current
1515 character/element has. Allocate room in the element structure
1516 and copy information. */
1517 pelem->ordering_len = collate->nweight;
1518
1519 /* First we write an array with the number of values for each
1520 weight. */
1521 obstack_grow (&collate->element_mem, collate->weight_cnt,
1522 collate->nrules * sizeof (unsigned int));
1523
1524 /* Now the weights itselves. */
1525 obstack_grow (&collate->element_mem, collate->weight,
1526 collate->nweight * sizeof (unsigned int));
1527
1528 /* Get result. */
1529 pelem->ordering = obstack_finish (&collate->element_mem);
1530
1531 /* Now we handle the "patches". */
1532 while (collate->current_patch != NULL)
1533 {
1534 patch_t *this_patch;
1535
1536 this_patch = collate->current_patch;
1537
1538 this_patch->where.pos = &pelem->ordering[collate->nrules
1539 + this_patch->where.idx];
1540
1541 collate->current_patch = this_patch->next;
1542 this_patch->next = collate->all_patches;
1543 collate->all_patches = this_patch;
1544 }
1545
1546 /* Set information for next round. */
1547 collate->was_ellipsis = 0;
1548 if (collate->kind != undefined)
1549 collate->last_char = pelem->name[0];
1550 }
This page took 1.395323 seconds and 6 git commands to generate.