]>
Commit | Line | Data |
---|---|---|
55985355 UD |
1 | /* Transliteration using the locale's data. |
2 | Copyright (C) 2000 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Library General Public License as | |
8 | published by the Free Software Foundation; either version 2 of the | |
9 | License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Library General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Library General Public | |
17 | License along with the GNU C Library; see the file COPYING.LIB. If not, | |
18 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
20 | ||
d6204268 | 21 | #include <assert.h> |
f1d5c60d | 22 | #include <dlfcn.h> |
d6204268 | 23 | #include <search.h> |
55985355 | 24 | #include <stdint.h> |
d6204268 | 25 | #include <string.h> |
7884bf47 | 26 | #include <stdlib.h> |
55985355 | 27 | |
d6204268 | 28 | #include <bits/libc-lock.h> |
55985355 UD |
29 | #include "gconv_int.h" |
30 | #include "../locale/localeinfo.h" | |
31 | ||
32 | ||
33 | int | |
f1d5c60d UD |
34 | __gconv_transliterate (struct __gconv_step *step, |
35 | struct __gconv_step_data *step_data, | |
d6204268 | 36 | void *trans_data __attribute__ ((unused)), |
f1d5c60d UD |
37 | const unsigned char *inbufstart, |
38 | const unsigned char **inbufp, | |
39 | const unsigned char *inbufend, | |
40 | unsigned char **outbufstart, size_t *irreversible) | |
55985355 UD |
41 | { |
42 | /* Find out about the locale's transliteration. */ | |
f1d5c60d | 43 | uint_fast32_t size; |
17427edd UD |
44 | const uint32_t *from_idx; |
45 | const uint32_t *from_tbl; | |
46 | const uint32_t *to_idx; | |
47 | const uint32_t *to_tbl; | |
48 | const uint32_t *winbuf; | |
49 | const uint32_t *winbufend; | |
f1d5c60d UD |
50 | uint_fast32_t low; |
51 | uint_fast32_t high; | |
55985355 | 52 | |
d5055a20 | 53 | /* The input buffer. There are actually 4-byte values. */ |
17427edd UD |
54 | winbuf = (const uint32_t *) *inbufp; |
55 | winbufend = (const uint32_t *) inbufend; | |
d5055a20 | 56 | |
55985355 UD |
57 | /* If there is no transliteration information in the locale don't do |
58 | anything and return the error. */ | |
04fbc779 | 59 | size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE); |
55985355 | 60 | if (size == 0) |
1d96d74d | 61 | goto no_rules; |
55985355 | 62 | |
f1d5c60d | 63 | /* Get the rest of the values. */ |
17427edd UD |
64 | from_idx = |
65 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX); | |
66 | from_tbl = | |
67 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL); | |
68 | to_idx = | |
69 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX); | |
70 | to_tbl = | |
71 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL); | |
f1d5c60d | 72 | |
f1d5c60d UD |
73 | /* Test whether there is enough input. */ |
74 | if (winbuf + 1 > winbufend) | |
75 | return (winbuf == winbufend | |
76 | ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); | |
77 | ||
78 | /* The array starting at FROM_IDX contains indeces to the string table | |
79 | in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we | |
80 | are doing binary search. */ | |
81 | low = 0; | |
82 | high = size; | |
83 | while (low < high) | |
84 | { | |
85 | uint_fast32_t med = (low + high) / 2; | |
86 | uint32_t idx; | |
87 | int cnt; | |
88 | ||
89 | /* Compare the string at this index with the string at the current | |
90 | position in the input buffer. */ | |
91 | idx = from_idx[med]; | |
92 | cnt = 0; | |
93 | do | |
94 | { | |
95 | if (from_tbl[idx + cnt] != winbuf[cnt]) | |
96 | /* Does not match. */ | |
97 | break; | |
98 | ++cnt; | |
99 | } | |
100 | while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend); | |
101 | ||
102 | if (cnt > 0 && from_tbl[idx + cnt] == L'\0') | |
103 | { | |
104 | /* Found a matching input sequence. Now try to convert the | |
105 | possible replacements. */ | |
106 | uint32_t idx2 = to_idx[med]; | |
107 | ||
108 | do | |
109 | { | |
110 | /* Determine length of replacement. */ | |
111 | uint_fast32_t len = 0; | |
112 | int res; | |
113 | const unsigned char *toinptr; | |
114 | ||
115 | while (to_tbl[idx2 + len] != L'\0') | |
116 | ++len; | |
117 | ||
118 | /* Try this input text. */ | |
119 | toinptr = (const unsigned char *) &to_tbl[idx2]; | |
120 | res = DL_CALL_FCT (step->__fct, | |
121 | (step, step_data, &toinptr, | |
122 | (const unsigned char *) &to_tbl[idx2 + len], | |
123 | (unsigned char **) outbufstart, | |
b572c2da | 124 | NULL, 0, 0)); |
f1d5c60d UD |
125 | if (res != __GCONV_ILLEGAL_INPUT) |
126 | { | |
127 | /* If the conversion succeeds we have to increment the | |
128 | input buffer. */ | |
129 | if (res == __GCONV_EMPTY_INPUT) | |
130 | { | |
131 | *inbufp += cnt * sizeof (uint32_t); | |
132 | ++*irreversible; | |
a8e4c924 | 133 | res = __GCONV_OK; |
f1d5c60d UD |
134 | } |
135 | ||
136 | return res; | |
137 | } | |
138 | ||
139 | /* Next replacement. */ | |
140 | idx2 += len + 1; | |
141 | } | |
142 | while (to_tbl[idx2] != L'\0'); | |
143 | ||
144 | /* Nothing found, continue searching. */ | |
145 | } | |
a8e4c924 UD |
146 | else if (cnt > 0) |
147 | /* This means that the input buffer contents matches a prefix of | |
148 | an entry. Since we cannot match it unless we get more input, | |
149 | we will tell the caller about it. */ | |
150 | return __GCONV_INCOMPLETE_INPUT; | |
f1d5c60d UD |
151 | |
152 | if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt]) | |
04fbc779 | 153 | low = med + 1; |
f1d5c60d | 154 | else |
04fbc779 | 155 | high = med; |
f1d5c60d UD |
156 | } |
157 | ||
1d96d74d | 158 | no_rules: |
a8e4c924 UD |
159 | /* Maybe the character is supposed to be ignored. */ |
160 | if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0) | |
161 | { | |
162 | int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN); | |
17427edd UD |
163 | const uint32_t *ranges = |
164 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE); | |
165 | const uint32_t wc = *(const uint32_t *) (*inbufp); | |
a8e4c924 UD |
166 | int i; |
167 | ||
168 | /* Test whether there is enough input. */ | |
169 | if (winbuf + 1 > winbufend) | |
170 | return (winbuf == winbufend | |
171 | ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); | |
172 | ||
173 | for (i = 0; i < n; ranges += 3, ++i) | |
174 | if (ranges[0] <= wc && wc <= ranges[1] | |
175 | && (wc - ranges[0]) % ranges[2] == 0) | |
176 | { | |
177 | /* Matches the range. Ignore it. */ | |
178 | *inbufp += 4; | |
179 | ++*irreversible; | |
180 | return __GCONV_OK; | |
181 | } | |
182 | else if (wc < ranges[0]) | |
183 | /* There cannot be any other matching range since they are | |
184 | sorted. */ | |
185 | break; | |
186 | } | |
187 | ||
188 | /* One last chance: use the default replacement. */ | |
fb46e8d2 | 189 | if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0) |
1d96d74d | 190 | { |
17427edd | 191 | const uint32_t *default_missing = (const uint32_t *) |
fb46e8d2 | 192 | _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING); |
1d96d74d UD |
193 | const unsigned char *toinptr = (const unsigned char *) default_missing; |
194 | uint32_t len = _NL_CURRENT_WORD (LC_CTYPE, | |
195 | _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN); | |
196 | int res; | |
197 | ||
a8e4c924 UD |
198 | /* Test whether there is enough input. */ |
199 | if (winbuf + 1 > winbufend) | |
200 | return (winbuf == winbufend | |
201 | ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); | |
202 | ||
1d96d74d UD |
203 | res = DL_CALL_FCT (step->__fct, |
204 | (step, step_data, &toinptr, | |
205 | (const unsigned char *) (default_missing + len), | |
206 | (unsigned char **) outbufstart, | |
b572c2da | 207 | NULL, 0, 0)); |
1d96d74d UD |
208 | |
209 | if (res != __GCONV_ILLEGAL_INPUT) | |
210 | { | |
211 | /* If the conversion succeeds we have to increment the | |
212 | input buffer. */ | |
213 | if (res == __GCONV_EMPTY_INPUT) | |
214 | { | |
a8e4c924 | 215 | /* This worked but is not reversible. */ |
1d96d74d | 216 | ++*irreversible; |
a8e4c924 UD |
217 | *inbufp += 4; |
218 | res = __GCONV_OK; | |
1d96d74d UD |
219 | } |
220 | ||
221 | return res; | |
222 | } | |
223 | } | |
224 | ||
f1d5c60d | 225 | /* Haven't found a match. */ |
55985355 UD |
226 | return __GCONV_ILLEGAL_INPUT; |
227 | } | |
d6204268 UD |
228 | |
229 | ||
230 | /* Structure to represent results of found (or not) transliteration | |
231 | modules. */ | |
232 | struct known_trans | |
233 | { | |
234 | /* This structure must remain the first member. */ | |
235 | struct trans_struct info; | |
236 | ||
17427edd | 237 | char *fname; |
d6204268 UD |
238 | void *handle; |
239 | int open_count; | |
240 | }; | |
241 | ||
242 | ||
243 | /* Tree with results of previous calls to __gconv_translit_find. */ | |
244 | static void *search_tree; | |
245 | ||
246 | /* We modify global data. */ | |
247 | __libc_lock_define_initialized (static, lock); | |
248 | ||
249 | ||
250 | /* Compare two transliteration entries. */ | |
251 | static int | |
252 | trans_compare (const void *p1, const void *p2) | |
253 | { | |
17427edd UD |
254 | const struct known_trans *s1 = (const struct known_trans *) p1; |
255 | const struct known_trans *s2 = (const struct known_trans *) p2; | |
d6204268 UD |
256 | |
257 | return strcmp (s1->info.name, s2->info.name); | |
258 | } | |
259 | ||
260 | ||
261 | /* Open (maybe reopen) the module named in the struct. Get the function | |
262 | and data structure pointers we need. */ | |
263 | static int | |
264 | open_translit (struct known_trans *trans) | |
265 | { | |
266 | __gconv_trans_query_fct queryfct; | |
267 | ||
268 | trans->handle = __libc_dlopen (trans->fname); | |
269 | if (trans->handle == NULL) | |
270 | /* Not available. */ | |
271 | return 1; | |
272 | ||
273 | /* Find the required symbol. */ | |
274 | queryfct = __libc_dlsym (trans->handle, "gconv_trans_context"); | |
275 | if (queryfct == NULL) | |
276 | { | |
277 | /* We cannot live with that. */ | |
278 | close_and_out: | |
279 | __libc_dlclose (trans->handle); | |
280 | trans->handle = NULL; | |
281 | return 1; | |
282 | } | |
283 | ||
284 | /* Get the context. */ | |
285 | if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames) | |
286 | != 0) | |
287 | goto close_and_out; | |
288 | ||
289 | /* Of course we also have to have the actual function. */ | |
290 | trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans"); | |
291 | if (trans->info.trans_fct == NULL) | |
292 | goto close_and_out; | |
293 | ||
294 | /* Now the optional functions. */ | |
295 | trans->info.trans_init_fct = | |
296 | __libc_dlsym (trans->handle, "gconv_trans_init"); | |
297 | trans->info.trans_context_fct = | |
298 | __libc_dlsym (trans->handle, "gconv_trans_context"); | |
299 | trans->info.trans_end_fct = | |
300 | __libc_dlsym (trans->handle, "gconv_trans_end"); | |
301 | ||
302 | trans->open_count = 1; | |
303 | ||
304 | return 0; | |
305 | } | |
306 | ||
307 | ||
308 | int | |
309 | internal_function | |
310 | __gconv_translit_find (struct trans_struct *trans) | |
311 | { | |
312 | struct known_trans **found; | |
313 | const struct path_elem *runp; | |
314 | int res = 1; | |
315 | ||
316 | /* We have to have a name. */ | |
317 | assert (trans->name != NULL); | |
318 | ||
319 | /* Acquire the lock. */ | |
320 | __libc_lock_lock (lock); | |
321 | ||
322 | /* See whether we know this module already. */ | |
323 | found = __tfind (trans, &search_tree, trans_compare); | |
324 | if (found != NULL) | |
325 | { | |
326 | /* Is this module available? */ | |
327 | if ((*found)->handle != NULL) | |
328 | { | |
329 | /* Maybe we have to reopen the file. */ | |
330 | if ((*found)->handle != (void *) -1) | |
331 | /* The object is not unloaded. */ | |
332 | res = 0; | |
333 | else if (open_translit (*found) == 0) | |
334 | { | |
335 | /* Copy the data. */ | |
336 | *trans = (*found)->info; | |
b79f74cd | 337 | (*found)->open_count++; |
d6204268 UD |
338 | res = 0; |
339 | } | |
340 | } | |
341 | } | |
342 | else | |
343 | { | |
344 | size_t name_len = strlen (trans->name) + 1; | |
345 | int need_so = 0; | |
346 | struct known_trans *newp; | |
347 | ||
348 | /* We have to continue looking for the module. */ | |
349 | if (__gconv_path_elem == NULL) | |
350 | __gconv_get_path (); | |
351 | ||
352 | /* See whether we have to append .so. */ | |
b79f74cd | 353 | if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0) |
d6204268 UD |
354 | need_so = 1; |
355 | ||
356 | /* Create a new entry. */ | |
357 | newp = (struct known_trans *) malloc (sizeof (struct known_trans) | |
358 | + (__gconv_max_path_elem_len | |
359 | + name_len + 3) | |
360 | + name_len); | |
361 | if (newp != NULL) | |
362 | { | |
363 | char *cp; | |
364 | ||
365 | /* Clear the struct. */ | |
366 | memset (newp, '\0', sizeof (struct known_trans)); | |
367 | ||
368 | /* Store a copy of the module name. */ | |
369 | newp->info.name = (char *) (newp + 1); | |
370 | cp = __mempcpy ((char *) newp->info.name, trans->name, name_len); | |
371 | ||
372 | newp->fname = cp; | |
373 | ||
b79f74cd | 374 | /* Search in all the directories. */ |
d6204268 UD |
375 | for (runp = __gconv_path_elem; runp->name != NULL; ++runp) |
376 | { | |
377 | cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name), | |
378 | trans->name, name_len); | |
379 | if (need_so) | |
380 | memcpy (cp, ".so", sizeof (".so")); | |
381 | ||
382 | if (open_translit (newp) == 0) | |
383 | { | |
384 | /* We found a module. */ | |
385 | res = 0; | |
386 | break; | |
387 | } | |
388 | } | |
389 | ||
b79f74cd UD |
390 | if (res) |
391 | newp->fname = NULL; | |
392 | ||
d6204268 UD |
393 | /* In any case we'll add the entry to our search tree. */ |
394 | if (__tsearch (newp, &search_tree, trans_compare) == NULL) | |
395 | { | |
396 | /* Yickes, this should not happen. Unload the object. */ | |
397 | res = 1; | |
398 | /* XXX unload here. */ | |
399 | } | |
400 | } | |
401 | } | |
402 | ||
403 | __libc_lock_unlock (lock); | |
404 | ||
405 | return res; | |
406 | } |