]> sourceware.org Git - glibc.git/blob - locale/programs/ld-ctype.c
Update.
[glibc.git] / locale / programs / ld-ctype.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25 #include <endian.h>
26 #include <limits.h>
27 #include <string.h>
28
29 #include "locales.h"
30 #include "localeinfo.h"
31 #include "langinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
34
35 /* Uncomment the following line in the production version. */
36 /* define NDEBUG 1 */
37 #include <assert.h>
38
39
40 void *xmalloc (size_t __n);
41 void *xcalloc (size_t __n, size_t __s);
42 void *xrealloc (void *__ptr, size_t __n);
43
44
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
48
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
52
53 #define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
55
56 #define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
58
59
60 /* To be compatible with former implementations we for now restrict
61 the number of bits for character classes to 16. When compatibility
62 is not necessary anymore increase the number to 32. */
63 #define char_class_t u_int16_t
64 #define CHAR_CLASS_TRANS SWAPU16
65 #define char_class32_t u_int32_t
66 #define CHAR_CLASS32_TRANS SWAPU32
67
68
69 /* The real definition of the struct for the LC_CTYPE locale. */
70 struct locale_ctype_t
71 {
72 unsigned int *charnames;
73 size_t charnames_max;
74 size_t charnames_act;
75
76 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
77 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
78 size_t nr_charclass;
79 const char *classnames[MAX_NR_CHARCLASS];
80 unsigned long int current_class_mask;
81 unsigned int last_class_char;
82 u_int32_t *class_collection;
83 size_t class_collection_max;
84 size_t class_collection_act;
85 unsigned long int class_done;
86
87 /* If the following number ever turns out to be too small simply
88 increase it. But I doubt it will. --drepper@gnu */
89 #define MAX_NR_CHARMAP 16
90 const char *mapnames[MAX_NR_CHARMAP];
91 u_int32_t *map_collection[MAX_NR_CHARMAP];
92 size_t map_collection_max[MAX_NR_CHARMAP];
93 size_t map_collection_act[MAX_NR_CHARMAP];
94 size_t map_collection_nr;
95 size_t last_map_idx;
96 unsigned int from_map_char;
97 int toupper_done;
98 int tolower_done;
99
100 /* The arrays for the binary representation. */
101 u_int32_t plane_size;
102 u_int32_t plane_cnt;
103 char_class_t *ctype_b;
104 char_class32_t *ctype32_b;
105 u_int32_t *names_el;
106 u_int32_t *names_eb;
107 u_int32_t **map_eb;
108 u_int32_t **map_el;
109 u_int32_t *class_name_ptr;
110 u_int32_t *map_name_ptr;
111 unsigned char *width;
112 u_int32_t mb_cur_max;
113 const char *codeset_name;
114 };
115
116
117 /* Prototypes for local functions. */
118 static void ctype_class_newP (struct linereader *lr,
119 struct locale_ctype_t *ctype, const char *name);
120 static void ctype_map_newP (struct linereader *lr,
121 struct locale_ctype_t *ctype,
122 const char *name, struct charset_t *charset);
123 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
124 size_t *max, size_t *act, unsigned int idx);
125 static void set_class_defaults (struct locale_ctype_t *ctype,
126 struct charset_t *charset);
127 static void allocate_arrays (struct locale_ctype_t *ctype,
128 struct charset_t *charset);
129
130
131 void
132 ctype_startup (struct linereader *lr, struct localedef_t *locale,
133 struct charset_t *charset)
134 {
135 unsigned int cnt;
136 struct locale_ctype_t *ctype;
137
138 /* We have a definition for LC_CTYPE. */
139 copy_posix.mask &= ~(1 << LC_CTYPE);
140
141 /* It is important that we always use UCS1 encoding for strings now. */
142 encoding_method = ENC_UCS1;
143
144 /* Allocate the needed room. */
145 locale->categories[LC_CTYPE].ctype = ctype =
146 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
147
148 /* We have no names seen yet. */
149 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
150 ctype->charnames =
151 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
152 for (cnt = 0; cnt < 256; ++cnt)
153 ctype->charnames[cnt] = cnt;
154 ctype->charnames_act = 256;
155
156 /* Fill character class information. */
157 ctype->nr_charclass = 0;
158 ctype->current_class_mask = 0;
159 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
160 /* The order of the following instructions determines the bit
161 positions! */
162 ctype_class_newP (lr, ctype, "upper");
163 ctype_class_newP (lr, ctype, "lower");
164 ctype_class_newP (lr, ctype, "alpha");
165 ctype_class_newP (lr, ctype, "digit");
166 ctype_class_newP (lr, ctype, "xdigit");
167 ctype_class_newP (lr, ctype, "space");
168 ctype_class_newP (lr, ctype, "print");
169 ctype_class_newP (lr, ctype, "graph");
170 ctype_class_newP (lr, ctype, "blank");
171 ctype_class_newP (lr, ctype, "cntrl");
172 ctype_class_newP (lr, ctype, "punct");
173 ctype_class_newP (lr, ctype, "alnum");
174
175 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
176 ctype->class_collection
177 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
178 * ctype->class_collection_max);
179 memset (ctype->class_collection, '\0',
180 sizeof (unsigned long int) * ctype->class_collection_max);
181 ctype->class_collection_act = 256;
182
183 /* Fill character map information. */
184 ctype->map_collection_nr = 0;
185 ctype->last_map_idx = MAX_NR_CHARMAP;
186 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
187 ctype_map_newP (lr, ctype, "toupper", charset);
188 ctype_map_newP (lr, ctype, "tolower", charset);
189
190 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
191 for (cnt = 0; cnt < 256; ++cnt)
192 {
193 ctype->map_collection[0][cnt] = cnt;
194 ctype->map_collection[1][cnt] = cnt;
195 }
196 }
197
198
199 void
200 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
201 {
202 /* See POSIX.2, table 2-6 for the meaning of the following table. */
203 #define NCLASS 12
204 static const struct
205 {
206 const char *name;
207 const char allow[NCLASS];
208 }
209 valid_table[NCLASS] =
210 {
211 /* The order is important. See token.h for more information.
212 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
213 { "upper", "--MX-XDDXXX-" },
214 { "lower", "--MX-XDDXXX-" },
215 { "alpha", "---X-XDDXXX-" },
216 { "digit", "XXX--XDDXXX-" },
217 { "xdigit", "-----XDDXXX-" },
218 { "space", "XXXXX------X" },
219 { "print", "---------X--" },
220 { "graph", "---------X--" },
221 { "blank", "XXXXXM-----X" },
222 { "cntrl", "XXXXX-XX--XX" },
223 { "punct", "XXXXX-DD-X-X" },
224 { "alnum", "-----XDDXXX-" }
225 };
226 size_t cnt;
227 int cls1, cls2;
228 unsigned int space_value;
229 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
230
231 /* Set default value for classes not specified. */
232 set_class_defaults (ctype, charset);
233
234 /* Check according to table. */
235 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
236 {
237 unsigned long int tmp;
238
239 tmp = ctype->class_collection[cnt];
240 if (tmp == 0)
241 continue;
242
243 for (cls1 = 0; cls1 < NCLASS; ++cls1)
244 if ((tmp & (1 << cls1)) != 0)
245 for (cls2 = 0; cls2 < NCLASS; ++cls2)
246 if (valid_table[cls1].allow[cls2] != '-')
247 {
248 int eq = (tmp & (1 << cls2)) != 0;
249 switch (valid_table[cls1].allow[cls2])
250 {
251 case 'M':
252 if (!eq)
253 {
254 char buf[17];
255 char *cp = buf;
256 unsigned int value;
257
258 value = ctype->charnames[cnt];
259
260 if ((value & 0xff000000) != 0)
261 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
262 if ((value & 0xffff0000) != 0)
263 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
264 if ((value & 0xffffff00) != 0)
265 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
266 sprintf (cp, "\\%o", value & 0xff);
267
268 if (!be_quiet)
269 error (0, 0, _("\
270 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
271 buf, valid_table[cls1].name,
272 valid_table[cls2].name);
273 }
274 break;
275
276 case 'X':
277 if (eq)
278 {
279 char buf[17];
280 char *cp = buf;
281 unsigned int value;
282
283 value = ctype->charnames[cnt];
284
285 if ((value & 0xff000000) != 0)
286 cp += sprintf (cp, "\\%o", value >> 24);
287 if ((value & 0xffff0000) != 0)
288 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
289 if ((value & 0xffffff00) != 0)
290 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
291 sprintf (cp, "\\%o", value & 0xff);
292
293 if (!be_quiet)
294 error (0, 0, _("\
295 character %s'%s' in class `%s' must not be in class `%s'"),
296 value > 256 ? "L" : "", buf,
297 valid_table[cls1].name,
298 valid_table[cls2].name);
299 }
300 break;
301
302 case 'D':
303 ctype->class_collection[cnt] |= 1 << cls2;
304 break;
305
306 default:
307 error (5, 0, _("internal error in %s, line %u"),
308 __FUNCTION__, __LINE__);
309 }
310 }
311 }
312
313 /* ... and now test <SP> as a special case. */
314 space_value = charset_find_value (&charset->char_table, "SP", 2);
315 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
316 space_value = charset_find_value (&charset->char_table, "space", 5);
317 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
318 {
319 if (!be_quiet)
320 error (0, 0, _("character <SP> not defined in character map"));
321 }
322 else if (((cnt = BITPOS (tok_space),
323 (ELEM (ctype, class_collection, , space_value)
324 & BIT (tok_space)) == 0)
325 || (cnt = BITPOS (tok_blank),
326 (ELEM (ctype, class_collection, , space_value)
327 & BIT (tok_blank)) == 0)))
328 {
329 if (!be_quiet)
330 error (0, 0, _("<SP> character not in class `%s'"),
331 valid_table[cnt].name);
332 }
333 else if (((cnt = BITPOS (tok_punct),
334 (ELEM (ctype, class_collection, , space_value)
335 & BIT (tok_punct)) != 0)
336 || (cnt = BITPOS (tok_graph),
337 (ELEM (ctype, class_collection, , space_value)
338 & BIT (tok_graph))
339 != 0)))
340 {
341 if (!be_quiet)
342 error (0, 0, _("<SP> character must not be in class `%s'"),
343 valid_table[cnt].name);
344 }
345 else
346 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
347
348 /* Now that the tests are done make sure the name array contains all
349 characters which are handled in the WIDTH section of the
350 character set definition file. */
351 if (charset->width_rules != NULL)
352 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
353 {
354 size_t inner;
355 for (inner = charset->width_rules[cnt].from;
356 inner <= charset->width_rules[cnt].to; ++inner)
357 (void) find_idx (ctype, NULL, NULL, NULL, inner);
358 }
359 }
360
361
362 void
363 ctype_output (struct localedef_t *locale, struct charset_t *charset,
364 const char *output_path)
365 {
366 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
367 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
368 + 2 * (ctype->map_collection_nr - 2));
369 struct iovec iov[2 + nelems + ctype->nr_charclass
370 + ctype->map_collection_nr];
371 struct locale_file data;
372 u_int32_t idx[nelems];
373 size_t elem, cnt, offset, total;
374
375
376 if ((locale->binary & (1 << LC_CTYPE)) != 0)
377 {
378 iov[0].iov_base = ctype;
379 iov[0].iov_len = locale->len[LC_CTYPE];
380
381 write_locale_data (output_path, "LC_CTYPE", 1, iov);
382
383 return;
384 }
385
386
387 /* Now prepare the output: Find the sizes of the table we can use. */
388 allocate_arrays (ctype, charset);
389
390 data.magic = LIMAGIC (LC_CTYPE);
391 data.n = nelems;
392 iov[0].iov_base = (void *) &data;
393 iov[0].iov_len = sizeof (data);
394
395 iov[1].iov_base = (void *) idx;
396 iov[1].iov_len = sizeof (idx);
397
398 idx[0] = iov[0].iov_len + iov[1].iov_len;
399 offset = 0;
400
401 for (elem = 0; elem < nelems; ++elem)
402 {
403 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
404 switch (elem)
405 {
406 #define CTYPE_DATA(name, base, len) \
407 case _NL_ITEM_INDEX (name): \
408 iov[2 + elem + offset].iov_base = (base); \
409 iov[2 + elem + offset].iov_len = (len); \
410 if (elem + 1 < nelems) \
411 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
412 break
413
414 CTYPE_DATA (_NL_CTYPE_CLASS,
415 ctype->ctype_b,
416 (256 + 128) * sizeof (char_class_t));
417
418 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
419 ctype->map_eb[0],
420 (ctype->plane_size * ctype->plane_cnt + 128)
421 * sizeof (u_int32_t));
422 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
423 ctype->map_eb[1],
424 (ctype->plane_size * ctype->plane_cnt + 128)
425 * sizeof (u_int32_t));
426
427 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
428 ctype->map_el[0],
429 (ctype->plane_size * ctype->plane_cnt + 128)
430 * sizeof (u_int32_t));
431 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
432 ctype->map_el[1],
433 (ctype->plane_size * ctype->plane_cnt + 128)
434 * sizeof (u_int32_t));
435
436 CTYPE_DATA (_NL_CTYPE_CLASS32,
437 ctype->ctype32_b,
438 (ctype->plane_size * ctype->plane_cnt
439 * sizeof (char_class32_t)));
440
441 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
442 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
443 * sizeof (u_int32_t)));
444 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
445 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
446 * sizeof (u_int32_t)));
447
448 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
449 &ctype->plane_size, sizeof (u_int32_t));
450 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
451 &ctype->plane_cnt, sizeof (u_int32_t));
452
453 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
454 /* The class name array. */
455 total = 0;
456 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
457 {
458 iov[2 + elem + offset].iov_base
459 = (void *) ctype->classnames[cnt];
460 iov[2 + elem + offset].iov_len
461 = strlen (ctype->classnames[cnt]) + 1;
462 total += iov[2 + elem + offset].iov_len;
463 }
464 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
465 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
466 total += 1 + (4 - ((total + 1) % 4));
467
468 if (elem + 1 < nelems)
469 idx[elem + 1] = idx[elem] + total;
470 break;
471
472 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
473 /* The class name array. */
474 total = 0;
475 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
476 {
477 iov[2 + elem + offset].iov_base
478 = (void *) ctype->mapnames[cnt];
479 iov[2 + elem + offset].iov_len
480 = strlen (ctype->mapnames[cnt]) + 1;
481 total += iov[2 + elem + offset].iov_len;
482 }
483 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
484 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
485 total += 1 + (4 - ((total + 1) % 4));
486
487 if (elem + 1 < nelems)
488 idx[elem + 1] = idx[elem] + total;
489 break;
490
491 CTYPE_DATA (_NL_CTYPE_WIDTH,
492 ctype->width, ctype->plane_size * ctype->plane_cnt);
493
494 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
495 &ctype->mb_cur_max, sizeof (u_int32_t));
496
497 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
498 total = strlen (ctype->codeset_name) + 1;
499 if (total % 4 == 0)
500 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
501 else
502 {
503 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
504 memset (mempcpy (iov[2 + elem + offset].iov_base,
505 ctype->codeset_name, total),
506 '\0', 4 - (total & 3));
507 total = (total + 3) & ~3;
508 }
509 iov[2 + elem + offset].iov_len = total;
510 if (elem + 1 < nelems)
511 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
512 break;
513
514 default:
515 assert (! "unknown CTYPE element");
516 }
517 else
518 {
519 /* Handle extra maps. */
520 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
521
522 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
523 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
524 else
525 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
526
527 iov[2 + elem + offset].iov_len = ((ctype->plane_size
528 * ctype->plane_cnt + 128)
529 * sizeof (u_int32_t));
530
531 if (elem + 1 < nelems)
532 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
533 }
534 }
535
536 assert (2 + elem + offset == (nelems + ctype->nr_charclass
537 + ctype->map_collection_nr + 2));
538
539 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
540 }
541
542
543 /* Character class handling. */
544 void
545 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
546 enum token_t tok, struct token *code,
547 struct charset_t *charset)
548 {
549 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
550 code->val.str.start);
551 }
552
553
554 int
555 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
556 const char *name)
557 {
558 size_t cnt;
559
560 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
561 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
562 == 0)
563 return 1;
564
565 return 0;
566 }
567
568
569 void
570 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
571 enum token_t tok, const char *str,
572 struct charset_t *charset)
573 {
574 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
575 size_t cnt;
576
577 switch (tok)
578 {
579 case tok_upper:
580 str = "upper";
581 break;
582 case tok_lower:
583 str = "lower";
584 break;
585 case tok_alpha:
586 str = "alpha";
587 break;
588 case tok_digit:
589 str = "digit";
590 break;
591 case tok_xdigit:
592 str = "xdigit";
593 break;
594 case tok_space:
595 str = "space";
596 break;
597 case tok_print:
598 str = "print";
599 break;
600 case tok_graph:
601 str = "graph";
602 break;
603 case tok_blank:
604 str = "blank";
605 break;
606 case tok_cntrl:
607 str = "cntrl";
608 break;
609 case tok_punct:
610 str = "punct";
611 break;
612 case tok_alnum:
613 str = "alnum";
614 break;
615 case tok_ident:
616 break;
617 default:
618 assert (! "illegal token as class name: should not happen");
619 }
620
621 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
622 if (strcmp (str, ctype->classnames[cnt]) == 0)
623 break;
624
625 if (cnt >= ctype->nr_charclass)
626 assert (! "unknown class in class definition: should not happen");
627
628 ctype->class_done |= BIT (tok);
629
630 ctype->current_class_mask = 1 << cnt;
631 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
632 }
633
634
635 void
636 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
637 struct token *code, struct charset_t *charset)
638 {
639 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
640 unsigned int value;
641
642 value = charset_find_value (&charset->char_table, code->val.str.start,
643 code->val.str.len);
644
645 ctype->last_class_char = value;
646
647 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
648 /* In the LC_CTYPE category it is no error when a character is
649 not found. This has to be ignored silently. */
650 return;
651
652 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
653 &ctype->class_collection_act, value)
654 |= ctype->current_class_mask;
655 }
656
657
658 void
659 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
660 struct token *code, struct charset_t *charset)
661 {
662 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
663 unsigned int value, cnt;
664
665 value = charset_find_value (&charset->char_table, code->val.str.start,
666 code->val.str.len);
667
668 /* In the LC_CTYPE category it is no error when a character is
669 not found. This has to be ignored silently. */
670 if ((wchar_t) ctype->last_class_char != ILLEGAL_CHAR_VALUE
671 && (wchar_t) value != ILLEGAL_CHAR_VALUE)
672 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
673 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
674 &ctype->class_collection_act, cnt)
675 |= ctype->current_class_mask;
676
677 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
678 }
679
680
681 void
682 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
683 {
684 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
685
686 /* We have no special actions to perform here. */
687 ctype->current_class_mask = 0;
688 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
689 }
690
691
692 /* Character map handling. */
693 void
694 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
695 enum token_t tok, struct token *code,
696 struct charset_t *charset)
697 {
698 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
699 code->val.str.start, charset);
700 }
701
702
703 int
704 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
705 const char *name)
706 {
707 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
708 size_t cnt;
709
710 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
711 if (strcmp (name, ctype->mapnames[cnt]) == 0)
712 return 1;
713
714 return 0;
715 }
716
717
718 void
719 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
720 enum token_t tok, const char *name, struct charset_t *charset)
721 {
722 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
723 size_t cnt;
724
725 switch (tok)
726 {
727 case tok_toupper:
728 ctype->toupper_done = 1;
729 name = "toupper";
730 break;
731 case tok_tolower:
732 ctype->tolower_done = 1;
733 name = "tolower";
734 break;
735 case tok_ident:
736 break;
737 default:
738 assert (! "unknown token in category `LC_CTYPE' should not happen");
739 }
740
741 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
742 if (strcmp (name, ctype->mapnames[cnt]) == 0)
743 break;
744
745 if (cnt == ctype->map_collection_nr)
746 assert (! "unknown token in category `LC_CTYPE' should not happen");
747
748 ctype->last_map_idx = cnt;
749 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
750 }
751
752
753 void
754 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
755 struct token *code, struct charset_t *charset)
756 {
757 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
758 unsigned int value;
759
760 value = charset_find_value (&charset->char_table, code->val.str.start,
761 code->val.str.len);
762
763 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
764 /* In the LC_CTYPE category it is no error when a character is
765 not found. This has to be ignored silently. */
766 return;
767
768 assert (ctype->last_map_idx < ctype->map_collection_nr);
769
770 ctype->from_map_char = value;
771 }
772
773
774 void
775 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
776 struct token *code, struct charset_t *charset)
777 {
778 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
779 unsigned int value;
780
781 value = charset_find_value (&charset->char_table, code->val.str.start,
782 code->val.str.len);
783
784 if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
785 || (wchar_t) value == ILLEGAL_CHAR_VALUE)
786 {
787 /* In the LC_CTYPE category it is no error when a character is
788 not found. This has to be ignored silently. */
789 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
790 return;
791 }
792
793 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
794 &ctype->map_collection_max[ctype->last_map_idx],
795 &ctype->map_collection_act[ctype->last_map_idx],
796 ctype->from_map_char) = value;
797
798 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
799 }
800
801
802 void
803 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
804 {
805 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
806
807 ctype->last_map_idx = MAX_NR_CHARMAP;
808 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
809 }
810
811
812 /* Local functions. */
813 static void
814 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
815 const char *name)
816 {
817 size_t cnt;
818
819 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
820 if (strcmp (ctype->classnames[cnt], name) == 0)
821 break;
822
823 if (cnt < ctype->nr_charclass)
824 {
825 lr_error (lr, _("character class `%s' already defined"), name);
826 return;
827 }
828
829 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
830 /* Exit code 2 is prescribed in P1003.2b. */
831 error (2, 0, _("\
832 implementation limit: no more than %d character classes allowed"),
833 MAX_NR_CHARCLASS);
834
835 ctype->classnames[ctype->nr_charclass++] = name;
836 }
837
838
839 static void
840 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
841 const char *name, struct charset_t *charset)
842 {
843 size_t max_chars = 0;
844 size_t cnt;
845
846 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
847 {
848 if (strcmp (ctype->mapnames[cnt], name) == 0)
849 break;
850
851 if (max_chars < ctype->map_collection_max[cnt])
852 max_chars = ctype->map_collection_max[cnt];
853 }
854
855 if (cnt < ctype->map_collection_nr)
856 {
857 lr_error (lr, _("character map `%s' already defined"), name);
858 return;
859 }
860
861 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
862 /* Exit code 2 is prescribed in P1003.2b. */
863 error (2, 0, _("\
864 implementation limit: no more than %d character maps allowed"),
865 MAX_NR_CHARMAP);
866
867 ctype->mapnames[cnt] = name;
868
869 if (max_chars == 0)
870 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
871 else
872 ctype->map_collection_max[cnt] = max_chars;
873
874 ctype->map_collection[cnt] = (u_int32_t *)
875 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
876 memset (ctype->map_collection[cnt], '\0',
877 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
878 ctype->map_collection_act[cnt] = 256;
879
880 ++ctype->map_collection_nr;
881 }
882
883
884 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
885 is possible if we only want to extend the name array. */
886 static u_int32_t *
887 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
888 size_t *act, unsigned int idx)
889 {
890 size_t cnt;
891
892 if (idx < 256)
893 return table == NULL ? NULL : &(*table)[idx];
894
895 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
896 if (ctype->charnames[cnt] == idx)
897 break;
898
899 /* We have to distinguish two cases: the name is found or not. */
900 if (cnt == ctype->charnames_act)
901 {
902 /* Extend the name array. */
903 if (ctype->charnames_act == ctype->charnames_max)
904 {
905 ctype->charnames_max *= 2;
906 ctype->charnames = (unsigned int *)
907 xrealloc (ctype->charnames,
908 sizeof (unsigned int) * ctype->charnames_max);
909 }
910 ctype->charnames[ctype->charnames_act++] = idx;
911 }
912
913 if (table == NULL)
914 /* We have done everything we are asked to do. */
915 return NULL;
916
917 if (cnt >= *act)
918 {
919 if (cnt >= *max)
920 {
921 size_t old_max = *max;
922 do
923 *max *= 2;
924 while (*max <= cnt);
925
926 *table =
927 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
928 memset (&(*table)[old_max], '\0',
929 (*max - old_max) * sizeof (u_int32_t));
930 }
931
932 (*table)[cnt] = 0;
933 *act = cnt;
934 }
935
936 return &(*table)[cnt];
937 }
938
939
940 static void
941 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
942 {
943 /* These function defines the default values for the classes and conversions
944 according to POSIX.2 2.5.2.1.
945 It may seem that the order of these if-blocks is arbitrary but it is NOT.
946 Don't move them unless you know what you do! */
947
948 void set_default (int bit, int from, int to)
949 {
950 char tmp[2];
951 int ch;
952 /* Define string. */
953 strcpy (tmp, "?");
954
955 for (ch = from; ch <= to; ++ch)
956 {
957 unsigned int value;
958 tmp[0] = ch;
959
960 value = charset_find_value (&charset->char_table, tmp, 1);
961 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
962 {
963 if (!be_quiet)
964 error (0, 0, _("\
965 character `%s' not defined while needed as default value"),
966 tmp);
967 continue;
968 }
969 else
970 ELEM (ctype, class_collection, , value) |= bit;
971 }
972 }
973
974 /* Set default values if keyword was not present. */
975 if ((ctype->class_done & BIT (tok_upper)) == 0)
976 /* "If this keyword [lower] is not specified, the lowercase letters
977 `A' through `Z', ..., shall automatically belong to this class,
978 with implementation defined character values." [P1003.2, 2.5.2.1] */
979 set_default (BIT (tok_upper), 'A', 'Z');
980
981 if ((ctype->class_done & BIT (tok_lower)) == 0)
982 /* "If this keyword [lower] is not specified, the lowercase letters
983 `a' through `z', ..., shall automatically belong to this class,
984 with implementation defined character values." [P1003.2, 2.5.2.1] */
985 set_default (BIT (tok_lower), 'a', 'z');
986
987 if ((ctype->class_done & BIT (tok_alpha)) == 0)
988 {
989 /* Table 2-6 in P1003.2 says that characters in class `upper' or
990 class `lower' *must* be in class `alpha'. */
991 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
992 size_t cnt;
993
994 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
995 if ((ctype->class_collection[cnt] & mask) != 0)
996 ctype->class_collection[cnt] |= BIT (tok_alpha);
997 }
998
999 if ((ctype->class_done & BIT (tok_digit)) == 0)
1000 /* "If this keyword [digit] is not specified, the digits `0' through
1001 `9', ..., shall automatically belong to this class, with
1002 implementation-defined character values." [P1003.2, 2.5.2.1] */
1003 set_default (BIT (tok_digit), '0', '9');
1004
1005 /* "Only characters specified for the `alpha' and `digit' keyword
1006 shall be specified. Characters specified for the keyword `alpha'
1007 and `digit' are automatically included in this class. */
1008 {
1009 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
1010 size_t cnt;
1011
1012 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1013 if ((ctype->class_collection[cnt] & mask) != 0)
1014 ctype->class_collection[cnt] |= BIT (tok_alnum);
1015 }
1016
1017 if ((ctype->class_done & BIT (tok_space)) == 0)
1018 /* "If this keyword [space] is not specified, the characters <space>,
1019 <form-feed>, <newline>, <carriage-return>, <tab>, and
1020 <vertical-tab>, ..., shall automatically belong to this class,
1021 with implementation-defined character values." [P1003.2, 2.5.2.1] */
1022 {
1023 unsigned int value;
1024
1025 value = charset_find_value (&charset->char_table, "space", 5);
1026 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1027 {
1028 if (!be_quiet)
1029 error (0, 0, _("\
1030 character `%s' not defined while needed as default value"),
1031 "<space>");
1032 }
1033 else
1034 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1035
1036 value = charset_find_value (&charset->char_table, "form-feed", 9);
1037 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1038 {
1039 if (!be_quiet)
1040 error (0, 0, _("\
1041 character `%s' not defined while needed as default value"),
1042 "<form-feed>");
1043 }
1044 else
1045 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1046
1047 value = charset_find_value (&charset->char_table, "newline", 7);
1048 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1049 {
1050 if (!be_quiet)
1051 error (0, 0, _("\
1052 character `%s' not defined while needed as default value"),
1053 "<newline>");
1054 }
1055 else
1056 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1057
1058 value = charset_find_value (&charset->char_table, "carriage-return", 15);
1059 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1060 {
1061 if (!be_quiet)
1062 error (0, 0, _("\
1063 character `%s' not defined while needed as default value"),
1064 "<carriage-return>");
1065 }
1066 else
1067 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1068
1069 value = charset_find_value (&charset->char_table, "tab", 3);
1070 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1071 {
1072 if (!be_quiet)
1073 error (0, 0, _("\
1074 character `%s' not defined while needed as default value"),
1075 "<tab>");
1076 }
1077 else
1078 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1079
1080 value = charset_find_value (&charset->char_table, "vertical-tab", 12);
1081 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1082 {
1083 if (!be_quiet)
1084 error (0, 0, _("\
1085 character `%s' not defined while needed as default value"),
1086 "<vertical-tab>");
1087 }
1088 else
1089 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1090 }
1091
1092 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1093 /* "If this keyword is not specified, the digits `0' to `9', the
1094 uppercase letters `A' through `F', and the lowercase letters `a'
1095 through `f', ..., shell automatically belong to this class, with
1096 implementation defined character values." [P1003.2, 2.5.2.1] */
1097 {
1098 set_default (BIT (tok_xdigit), '0', '9');
1099 set_default (BIT (tok_xdigit), 'A', 'F');
1100 set_default (BIT (tok_xdigit), 'a', 'f');
1101 }
1102
1103 if ((ctype->class_done & BIT (tok_blank)) == 0)
1104 /* "If this keyword [blank] is unspecified, the characters <space> and
1105 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1106 {
1107 unsigned int value;
1108
1109 value = charset_find_value (&charset->char_table, "space", 5);
1110 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1111 {
1112 if (!be_quiet)
1113 error (0, 0, _("\
1114 character `%s' not defined while needed as default value"),
1115 "<space>");
1116 }
1117 else
1118 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1119
1120 value = charset_find_value (&charset->char_table, "tab", 3);
1121 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1122 {
1123 if (!be_quiet)
1124 error (0, 0, _("\
1125 character `%s' not defined while needed as default value"),
1126 "<tab>");
1127 }
1128 else
1129 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1130 }
1131
1132 if ((ctype->class_done & BIT (tok_graph)) == 0)
1133 /* "If this keyword [graph] is not specified, characters specified for
1134 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1135 shall belong to this character class." [P1003.2, 2.5.2.1] */
1136 {
1137 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1138 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1139 size_t cnt;
1140
1141 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1142 if ((ctype->class_collection[cnt] & mask) != 0)
1143 ctype->class_collection[cnt] |= BIT (tok_graph);
1144 }
1145
1146 if ((ctype->class_done & BIT (tok_print)) == 0)
1147 /* "If this keyword [print] is not provided, characters specified for
1148 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1149 and the <space> character shall belong to this character class."
1150 [P1003.2, 2.5.2.1] */
1151 {
1152 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1153 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1154 size_t cnt;
1155 wchar_t space;
1156
1157 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1158 if ((ctype->class_collection[cnt] & mask) != 0)
1159 ctype->class_collection[cnt] |= BIT (tok_print);
1160
1161 space = charset_find_value (&charset->char_table, "space", 5);
1162 if (space == ILLEGAL_CHAR_VALUE)
1163 {
1164 if (!be_quiet)
1165 error (0, 0, _("\
1166 character `%s' not defined while needed as default value"),
1167 "<space>");
1168 }
1169 else
1170 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1171 }
1172
1173 if (ctype->toupper_done == 0)
1174 /* "If this keyword [toupper] is not specified, the lowercase letters
1175 `a' through `z', and their corresponding uppercase letters `A' to
1176 `Z', ..., shall automatically be included, with implementation-
1177 defined character values." [P1003.2, 2.5.2.1] */
1178 {
1179 char tmp[4];
1180 int ch;
1181
1182 strcpy (tmp, "<?>");
1183
1184 for (ch = 'a'; ch <= 'z'; ++ch)
1185 {
1186 unsigned int value_from, value_to;
1187
1188 tmp[1] = (char) ch;
1189
1190 value_from = charset_find_value (&charset->char_table, &tmp[1], 1);
1191 if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE)
1192 {
1193 if (!be_quiet)
1194 error (0, 0, _("\
1195 character `%s' not defined while needed as default value"),
1196 tmp);
1197 continue;
1198 }
1199
1200 /* This conversion is implementation defined. */
1201 tmp[1] = (char) (ch + ('A' - 'a'));
1202 value_to = charset_find_value (&charset->char_table, &tmp[1], 1);
1203 if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE)
1204 {
1205 if (!be_quiet)
1206 error (0, 0, _("\
1207 character `%s' not defined while needed as default value"),
1208 tmp);
1209 continue;
1210 }
1211
1212 /* The index [0] is determined by the order of the
1213 `ctype_map_newP' calls in `ctype_startup'. */
1214 ELEM (ctype, map_collection, [0], value_from) = value_to;
1215 }
1216 }
1217
1218 if (ctype->tolower_done == 0)
1219 /* "If this keyword [tolower] is not specified, the mapping shall be
1220 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1221 {
1222 size_t cnt;
1223
1224 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1225 if (ctype->map_collection[0][cnt] != 0)
1226 ELEM (ctype, map_collection, [1],
1227 ctype->map_collection[0][cnt])
1228 = ctype->charnames[cnt];
1229 }
1230 }
1231
1232
1233 static void
1234 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1235 {
1236 size_t idx;
1237
1238 /* First we have to decide how we organize the arrays. It is easy
1239 for a one-byte character set. But multi-byte character set
1240 cannot be stored flat because the chars might be sparsely used.
1241 So we determine an optimal hashing function for the used
1242 characters.
1243
1244 We use a very trivial hashing function to store the sparse
1245 table. CH % TABSIZE is used as an index. To solve multiple hits
1246 we have N planes. This guarantees a fixed search time for a
1247 character [N / 2]. In the following code we determine the minmum
1248 value for TABSIZE * N, where TABSIZE >= 256. */
1249 size_t min_total = UINT_MAX;
1250 size_t act_size = 256;
1251
1252 if (!be_quiet)
1253 fputs (_("\
1254 Computing table size for character classes might take a while..."),
1255 stderr);
1256
1257 while (act_size < min_total)
1258 {
1259 size_t cnt[act_size];
1260 size_t act_planes = 1;
1261
1262 memset (cnt, '\0', sizeof cnt);
1263
1264 for (idx = 0; idx < 256; ++idx)
1265 cnt[idx] = 1;
1266
1267 for (idx = 0; idx < ctype->charnames_act; ++idx)
1268 if (ctype->charnames[idx] >= 256)
1269 {
1270 size_t nr = ctype->charnames[idx] % act_size;
1271
1272 if (++cnt[nr] > act_planes)
1273 {
1274 act_planes = cnt[nr];
1275 if (act_size * act_planes >= min_total)
1276 break;
1277 }
1278 }
1279
1280 if (act_size * act_planes < min_total)
1281 {
1282 min_total = act_size * act_planes;
1283 ctype->plane_size = act_size;
1284 ctype->plane_cnt = act_planes;
1285 }
1286
1287 ++act_size;
1288 }
1289
1290 if (!be_quiet)
1291 fputs (_(" done\n"), stderr);
1292
1293
1294 #if __BYTE_ORDER == __LITTLE_ENDIAN
1295 # define NAMES_B1 ctype->names_el
1296 # define NAMES_B2 ctype->names_eb
1297 #else
1298 # define NAMES_B1 ctype->names_eb
1299 # define NAMES_B2 ctype->names_el
1300 #endif
1301
1302 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1303 * ctype->plane_cnt,
1304 sizeof (u_int32_t));
1305 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1306 * ctype->plane_cnt,
1307 sizeof (u_int32_t));
1308
1309 for (idx = 1; idx < 256; ++idx)
1310 NAMES_B1[idx] = idx;
1311
1312 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1313 NAMES_B1[0] = 1;
1314
1315 for (idx = 256; idx < ctype->charnames_act; ++idx)
1316 {
1317 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1318 size_t depth = 0;
1319
1320 while (NAMES_B1[nr + depth * ctype->plane_size])
1321 ++depth;
1322 assert (depth < ctype->plane_cnt);
1323
1324 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1325
1326 /* Now for faster access remember the index in the NAMES_B array. */
1327 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1328 }
1329 NAMES_B1[0] = 0;
1330
1331 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1332 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1333
1334
1335 /* You wonder about this amount of memory? This is only because some
1336 users do not manage to address the array with unsigned values or
1337 data types with range >= 256. '\200' would result in the array
1338 index -128. To help these poor people we duplicate the entries for
1339 128 up to 255 below the entry for \0. */
1340 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1341 sizeof (char_class_t));
1342 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1343 * ctype->plane_cnt,
1344 sizeof (char_class32_t));
1345
1346 /* Fill in the character class information. */
1347 #if __BYTE_ORDER == __LITTLE_ENDIAN
1348 # define TRANS(w) CHAR_CLASS_TRANS (w)
1349 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1350 #else
1351 # define TRANS(w) (w)
1352 # define TRANS32(w) (w)
1353 #endif
1354
1355 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1356 if (ctype->charnames[idx] < 256)
1357 ctype->ctype_b[128 + ctype->charnames[idx]]
1358 = TRANS (ctype->class_collection[idx]);
1359
1360 /* Mirror first 127 entries. We must take care that entry -1 is not
1361 mirrored because EOF == -1. */
1362 for (idx = 0; idx < 127; ++idx)
1363 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1364
1365 /* The 32 bit array contains all characters. */
1366 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1367 ctype->ctype32_b[ctype->charnames[idx]]
1368 = TRANS32 (ctype->class_collection[idx]);
1369
1370 /* Room for table of mappings. */
1371 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1372 * sizeof (u_int32_t *));
1373 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1374 * sizeof (u_int32_t *));
1375
1376 /* Fill in all mappings. */
1377 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1378 {
1379 unsigned int idx2;
1380
1381 /* Allocate table. */
1382 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1383 * ctype->plane_cnt + 128)
1384 * sizeof (u_int32_t));
1385 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1386 * ctype->plane_cnt + 128)
1387 * sizeof (u_int32_t));
1388
1389 #if __BYTE_ORDER == __LITTLE_ENDIAN
1390 # define MAP_B1 ctype->map_el
1391 # define MAP_B2 ctype->map_eb
1392 #else
1393 # define MAP_B1 ctype->map_eb
1394 # define MAP_B2 ctype->map_el
1395 #endif
1396
1397 /* Copy default value (identity mapping). */
1398 memcpy (&MAP_B1[idx][128], NAMES_B1,
1399 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1400
1401 /* Copy values from collection. */
1402 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1403 if (ctype->map_collection[idx][idx2] != 0)
1404 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1405 ctype->map_collection[idx][idx2];
1406
1407 /* Mirror first 127 entries. We must take care not to map entry
1408 -1 because EOF == -1. */
1409 for (idx2 = 0; idx2 < 127; ++idx2)
1410 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1411
1412 /* EOF must map to EOF. */
1413 MAP_B1[idx][127] = EOF;
1414
1415 /* And now the other byte order. */
1416 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1417 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1418 }
1419
1420 /* Extra array for class and map names. */
1421 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1422 * sizeof (u_int32_t));
1423 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1424 * sizeof (u_int32_t));
1425
1426 /* Array for width information. Because the expected width are very
1427 small we use only one single byte. This save space and we need
1428 not provide the information twice with both endianesses. */
1429 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1430 * ctype->plane_cnt);
1431 /* Initialize with default width value. */
1432 memset (ctype->width, charset->width_default,
1433 ctype->plane_size * ctype->plane_cnt);
1434 if (charset->width_rules != NULL)
1435 {
1436 size_t cnt;
1437
1438 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1439 if (charset->width_rules[cnt].width != charset->width_default)
1440 for (idx = charset->width_rules[cnt].from;
1441 idx <= charset->width_rules[cnt].to; ++idx)
1442 {
1443 size_t nr = idx % ctype->plane_size;
1444 size_t depth = 0;
1445
1446 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1447 ++depth;
1448 assert (depth < ctype->plane_cnt);
1449
1450 ctype->width[nr + depth * ctype->plane_size]
1451 = charset->width_rules[cnt].width;
1452 }
1453 }
1454
1455 /* Compute MB_CUR_MAX. */
1456 ctype->mb_cur_max = charset->mb_cur_max;
1457
1458 /* We need the name of the currently used 8-bit character set to
1459 make correct conversion between this 8-bit representation and the
1460 ISO 10646 character set used internally for wide characters. */
1461 ctype->codeset_name = charset->code_set_name ? : "";
1462 }
This page took 0.107835 seconds and 5 git commands to generate.