]>
Commit | Line | Data |
---|---|---|
4b10dd6c | 1 | /* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. |
6d52618b | 2 | This file is part of the GNU C Library. |
5d08d218 | 3 | Written by Ulrich Drepper, <drepper@cygnus.com>. |
19bc17a9 | 4 | |
6d52618b UD |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Library General Public License as | |
7 | published by the Free Software Foundation; either version 2 of the | |
8 | License, or (at your option) any later version. | |
19bc17a9 | 9 | |
6d52618b UD |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Library General Public License for more details. | |
19bc17a9 | 14 | |
6d52618b UD |
15 | You should have received a copy of the GNU Library General Public |
16 | License along with the GNU C Library; see the file COPYING.LIB. If not, | |
17 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
18 | Boston, MA 02111-1307, USA. */ | |
19bc17a9 RM |
19 | |
20 | #include <alloca.h> | |
77f855ed | 21 | #include <errno.h> |
0393dfd6 RM |
22 | #include <langinfo.h> |
23 | #include "localeinfo.h" | |
19bc17a9 RM |
24 | |
25 | #ifndef STRING_TYPE | |
26 | # error STRING_TYPE not defined | |
27 | #endif | |
28 | ||
29 | #ifndef USTRING_TYPE | |
30 | # error USTRING_TYPE not defined | |
31 | #endif | |
32 | ||
33 | typedef struct weight_t | |
34 | { | |
35 | struct weight_t *prev; | |
36 | struct weight_t *next; | |
0393dfd6 RM |
37 | struct data_pair |
38 | { | |
7a12c6bb | 39 | int number; |
4b10dd6c | 40 | const uint32_t *value; |
0393dfd6 | 41 | } data[0]; |
19bc17a9 RM |
42 | } weight_t; |
43 | ||
44 | ||
6d52618b UD |
45 | /* The following five macros grant access to the values in the |
46 | collate locale file that do not depend on byte order. */ | |
c84142e8 UD |
47 | #ifndef USE_IN_EXTENDED_LOCALE_MODEL |
48 | # define collate_nrules \ | |
19bc17a9 | 49 | (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES)) |
c84142e8 | 50 | # define collate_hash_size \ |
19bc17a9 | 51 | (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE)) |
c84142e8 | 52 | # define collate_hash_layers \ |
19bc17a9 | 53 | (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS)) |
c84142e8 | 54 | # define collate_undefined \ |
4b10dd6c | 55 | (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_UNDEFINED_WC)) |
c84142e8 | 56 | # define collate_rules \ |
4b10dd6c | 57 | ((uint32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULES)) |
19bc17a9 | 58 | |
5a97622d UD |
59 | static __inline void get_weight (const STRING_TYPE **str, weight_t *result); |
60 | static __inline void | |
19bc17a9 | 61 | get_weight (const STRING_TYPE **str, weight_t *result) |
c84142e8 UD |
62 | #else |
63 | # define collate_nrules \ | |
64 | current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word | |
65 | # define collate_hash_size \ | |
66 | current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].word | |
67 | # define collate_hash_layers \ | |
68 | current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].word | |
69 | # define collate_undefined \ | |
4b10dd6c | 70 | current->values[_NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED_WC)].word |
c84142e8 | 71 | # define collate_rules \ |
4b10dd6c | 72 | ((uint32_t *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULES)].string) |
c84142e8 | 73 | |
5a97622d UD |
74 | static __inline void get_weight (const STRING_TYPE **str, weight_t *result, |
75 | struct locale_data *current, | |
4b10dd6c UD |
76 | const uint32_t *__collate_tablewc, |
77 | const uint32_t *__collate_extrawc); | |
5a97622d | 78 | static __inline void |
c84142e8 | 79 | get_weight (const STRING_TYPE **str, weight_t *result, |
4b10dd6c UD |
80 | struct locale_data *current, const uint32_t *__collate_tablewc, |
81 | const uint32_t *__collate_extrawc) | |
c84142e8 | 82 | #endif |
19bc17a9 RM |
83 | { |
84 | unsigned int ch = *((USTRING_TYPE *) (*str))++; | |
85 | size_t slot; | |
86 | ||
87 | if (sizeof (STRING_TYPE) == 1) | |
88 | slot = ch * (collate_nrules + 1); | |
89 | else | |
90 | { | |
91 | const size_t level_size = collate_hash_size * (collate_nrules + 1); | |
92 | size_t level; | |
93 | ||
00de59a6 | 94 | slot = (ch % collate_hash_size) * (collate_nrules + 1); |
19bc17a9 RM |
95 | |
96 | level = 0; | |
4b10dd6c | 97 | while (__collate_tablewc[slot] != (uint32_t) ch) |
19bc17a9 | 98 | { |
4b10dd6c | 99 | if (__collate_tablewc[slot + 1] == 0 |
19bc17a9 RM |
100 | || ++level >= collate_hash_layers) |
101 | { | |
102 | size_t idx = collate_undefined; | |
103 | size_t cnt; | |
104 | ||
105 | for (cnt = 0; cnt < collate_nrules; ++cnt) | |
106 | { | |
4b10dd6c UD |
107 | result->data[cnt].number = __collate_extrawc[idx++]; |
108 | result->data[cnt].value = &__collate_extrawc[idx]; | |
19bc17a9 RM |
109 | idx += result->data[cnt].number; |
110 | } | |
5d08d218 UD |
111 | /* The Unix standard requires that a character outside |
112 | the domain is signalled by setting `errno'. */ | |
113 | __set_errno (EINVAL); | |
5a97622d | 114 | return; |
19bc17a9 RM |
115 | } |
116 | slot += level_size; | |
117 | } | |
118 | } | |
119 | ||
4b10dd6c | 120 | if (__collate_tablewc[slot + 1] != (uint32_t) FORWARD_CHAR) |
19bc17a9 | 121 | { |
d17a729b | 122 | /* We have a simple form. One value for each weight. */ |
19bc17a9 RM |
123 | size_t cnt; |
124 | ||
125 | for (cnt = 0; cnt < collate_nrules; ++cnt) | |
126 | { | |
127 | result->data[cnt].number = 1; | |
4b10dd6c | 128 | result->data[cnt].value = &__collate_tablewc[slot + 1 + cnt]; |
19bc17a9 | 129 | } |
5a97622d | 130 | return; |
19bc17a9 RM |
131 | } |
132 | ||
133 | /* We now look for any collation element which starts with CH. | |
134 | There might none, but the last list member is a catch-all case | |
135 | because it is simple the character CH. The value of this entry | |
136 | might be the same as UNDEFINED. */ | |
4b10dd6c | 137 | slot = __collate_tablewc[slot + 2]; |
19bc17a9 RM |
138 | |
139 | while (1) | |
140 | { | |
141 | size_t idx; | |
142 | ||
4b10dd6c | 143 | /* This is a comparison between a uint32_t array (aka wchar_t) and |
19bc17a9 | 144 | an 8-bit string. */ |
4b10dd6c UD |
145 | for (idx = 0; __collate_extrawc[slot + 2 + idx] != 0; ++idx) |
146 | if (__collate_extrawc[slot + 2 + idx] != (uint32_t) (*str)[idx]) | |
19bc17a9 RM |
147 | break; |
148 | ||
4b10dd6c | 149 | /* When the loop finished with all character of the collation |
19bc17a9 | 150 | element used, we found the longest prefix. */ |
4b10dd6c | 151 | if (__collate_extrawc[slot + 2 + idx] == 0) |
19bc17a9 RM |
152 | { |
153 | size_t cnt; | |
154 | ||
7e3be507 | 155 | *str += idx; |
19bc17a9 RM |
156 | idx += slot + 3; |
157 | for (cnt = 0; cnt < collate_nrules; ++cnt) | |
158 | { | |
4b10dd6c UD |
159 | result->data[cnt].number = __collate_extrawc[idx++]; |
160 | result->data[cnt].value = &__collate_extrawc[idx]; | |
19bc17a9 RM |
161 | idx += result->data[cnt].number; |
162 | } | |
5a97622d | 163 | return; |
19bc17a9 RM |
164 | } |
165 | ||
166 | /* To next entry in list. */ | |
4b10dd6c | 167 | slot += __collate_extrawc[slot]; |
19bc17a9 | 168 | } |
19bc17a9 RM |
169 | } |
170 | ||
171 | ||
172 | /* To process a string efficiently we retrieve all information about | |
173 | the string at once. The following macro constructs a double linked | |
174 | list of this information. It is a macro because we use `alloca' | |
175 | and we use a double linked list because of the backward collation | |
c84142e8 UD |
176 | order. |
177 | ||
178 | We have this strange extra macro since the functions which use the | |
5a97622d | 179 | given locale (not the global one) cannot use the global tables. */ |
c84142e8 | 180 | #ifndef USE_IN_EXTENDED_LOCALE_MODEL |
4b10dd6c | 181 | # define call_get_weight(strp, newp) get_weight ((strp), (newp)) |
c84142e8 UD |
182 | #else |
183 | # define call_get_weight(strp, newp) \ | |
4b10dd6c | 184 | get_weight ((strp), (newp), current, collate_table, collate_extra) |
c84142e8 UD |
185 | #endif |
186 | ||
5a97622d | 187 | #define get_string(str, forw, backw) \ |
19bc17a9 RM |
188 | do \ |
189 | { \ | |
190 | weight_t *newp; \ | |
5a97622d | 191 | while (*str != '\0') \ |
19bc17a9 RM |
192 | { \ |
193 | newp = (weight_t *) alloca (sizeof (weight_t) \ | |
194 | + (collate_nrules \ | |
195 | * sizeof (struct data_pair))); \ | |
196 | \ | |
197 | newp->prev = backw; \ | |
198 | if (backw == NULL) \ | |
199 | forw = newp; \ | |
200 | else \ | |
201 | backw->next = newp; \ | |
202 | newp->next = NULL; \ | |
203 | backw = newp; \ | |
5a97622d | 204 | call_get_weight (&str, newp); \ |
19bc17a9 | 205 | } \ |
19bc17a9 RM |
206 | } \ |
207 | while (0) |