]> sourceware.org Git - glibc.git/blame - iconv/gconv_trans.c
Update.
[glibc.git] / iconv / gconv_trans.c
CommitLineData
55985355
UD
1/* Transliteration using the locale's data.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
d6204268 21#include <assert.h>
f1d5c60d 22#include <dlfcn.h>
d6204268 23#include <search.h>
55985355 24#include <stdint.h>
d6204268 25#include <string.h>
7884bf47 26#include <stdlib.h>
55985355 27
d6204268 28#include <bits/libc-lock.h>
55985355
UD
29#include "gconv_int.h"
30#include "../locale/localeinfo.h"
31
32
33int
f1d5c60d
UD
34__gconv_transliterate (struct __gconv_step *step,
35 struct __gconv_step_data *step_data,
d6204268 36 void *trans_data __attribute__ ((unused)),
f1d5c60d
UD
37 const unsigned char *inbufstart,
38 const unsigned char **inbufp,
39 const unsigned char *inbufend,
40 unsigned char **outbufstart, size_t *irreversible)
55985355
UD
41{
42 /* Find out about the locale's transliteration. */
f1d5c60d 43 uint_fast32_t size;
17427edd
UD
44 const uint32_t *from_idx;
45 const uint32_t *from_tbl;
46 const uint32_t *to_idx;
47 const uint32_t *to_tbl;
48 const uint32_t *winbuf;
49 const uint32_t *winbufend;
f1d5c60d
UD
50 uint_fast32_t low;
51 uint_fast32_t high;
55985355 52
d5055a20 53 /* The input buffer. There are actually 4-byte values. */
17427edd
UD
54 winbuf = (const uint32_t *) *inbufp;
55 winbufend = (const uint32_t *) inbufend;
d5055a20 56
55985355
UD
57 /* If there is no transliteration information in the locale don't do
58 anything and return the error. */
04fbc779 59 size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
55985355 60 if (size == 0)
1d96d74d 61 goto no_rules;
55985355 62
f1d5c60d 63 /* Get the rest of the values. */
17427edd
UD
64 from_idx =
65 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
66 from_tbl =
67 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
68 to_idx =
69 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
70 to_tbl =
71 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
f1d5c60d 72
f1d5c60d
UD
73 /* Test whether there is enough input. */
74 if (winbuf + 1 > winbufend)
75 return (winbuf == winbufend
76 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
77
78 /* The array starting at FROM_IDX contains indeces to the string table
79 in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
80 are doing binary search. */
81 low = 0;
82 high = size;
83 while (low < high)
84 {
85 uint_fast32_t med = (low + high) / 2;
86 uint32_t idx;
87 int cnt;
88
89 /* Compare the string at this index with the string at the current
90 position in the input buffer. */
91 idx = from_idx[med];
92 cnt = 0;
93 do
94 {
95 if (from_tbl[idx + cnt] != winbuf[cnt])
96 /* Does not match. */
97 break;
98 ++cnt;
99 }
100 while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
101
102 if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
103 {
104 /* Found a matching input sequence. Now try to convert the
105 possible replacements. */
106 uint32_t idx2 = to_idx[med];
107
108 do
109 {
110 /* Determine length of replacement. */
111 uint_fast32_t len = 0;
112 int res;
113 const unsigned char *toinptr;
114
115 while (to_tbl[idx2 + len] != L'\0')
116 ++len;
117
118 /* Try this input text. */
119 toinptr = (const unsigned char *) &to_tbl[idx2];
120 res = DL_CALL_FCT (step->__fct,
121 (step, step_data, &toinptr,
122 (const unsigned char *) &to_tbl[idx2 + len],
123 (unsigned char **) outbufstart,
b572c2da 124 NULL, 0, 0));
f1d5c60d
UD
125 if (res != __GCONV_ILLEGAL_INPUT)
126 {
127 /* If the conversion succeeds we have to increment the
128 input buffer. */
129 if (res == __GCONV_EMPTY_INPUT)
130 {
131 *inbufp += cnt * sizeof (uint32_t);
132 ++*irreversible;
a8e4c924 133 res = __GCONV_OK;
f1d5c60d
UD
134 }
135
136 return res;
137 }
138
139 /* Next replacement. */
140 idx2 += len + 1;
141 }
142 while (to_tbl[idx2] != L'\0');
143
144 /* Nothing found, continue searching. */
145 }
a8e4c924
UD
146 else if (cnt > 0)
147 /* This means that the input buffer contents matches a prefix of
148 an entry. Since we cannot match it unless we get more input,
149 we will tell the caller about it. */
150 return __GCONV_INCOMPLETE_INPUT;
f1d5c60d
UD
151
152 if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
04fbc779 153 low = med + 1;
f1d5c60d 154 else
04fbc779 155 high = med;
f1d5c60d
UD
156 }
157
1d96d74d 158 no_rules:
a8e4c924
UD
159 /* Maybe the character is supposed to be ignored. */
160 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
161 {
162 int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
17427edd
UD
163 const uint32_t *ranges =
164 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE);
165 const uint32_t wc = *(const uint32_t *) (*inbufp);
a8e4c924
UD
166 int i;
167
168 /* Test whether there is enough input. */
169 if (winbuf + 1 > winbufend)
170 return (winbuf == winbufend
171 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
172
173 for (i = 0; i < n; ranges += 3, ++i)
174 if (ranges[0] <= wc && wc <= ranges[1]
175 && (wc - ranges[0]) % ranges[2] == 0)
176 {
177 /* Matches the range. Ignore it. */
178 *inbufp += 4;
179 ++*irreversible;
180 return __GCONV_OK;
181 }
182 else if (wc < ranges[0])
183 /* There cannot be any other matching range since they are
184 sorted. */
185 break;
186 }
187
188 /* One last chance: use the default replacement. */
fb46e8d2 189 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
1d96d74d 190 {
17427edd 191 const uint32_t *default_missing = (const uint32_t *)
fb46e8d2 192 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
1d96d74d
UD
193 const unsigned char *toinptr = (const unsigned char *) default_missing;
194 uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
195 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
196 int res;
197
a8e4c924
UD
198 /* Test whether there is enough input. */
199 if (winbuf + 1 > winbufend)
200 return (winbuf == winbufend
201 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
202
1d96d74d
UD
203 res = DL_CALL_FCT (step->__fct,
204 (step, step_data, &toinptr,
205 (const unsigned char *) (default_missing + len),
206 (unsigned char **) outbufstart,
b572c2da 207 NULL, 0, 0));
1d96d74d
UD
208
209 if (res != __GCONV_ILLEGAL_INPUT)
210 {
211 /* If the conversion succeeds we have to increment the
212 input buffer. */
213 if (res == __GCONV_EMPTY_INPUT)
214 {
a8e4c924 215 /* This worked but is not reversible. */
1d96d74d 216 ++*irreversible;
a8e4c924
UD
217 *inbufp += 4;
218 res = __GCONV_OK;
1d96d74d
UD
219 }
220
221 return res;
222 }
223 }
224
f1d5c60d 225 /* Haven't found a match. */
55985355
UD
226 return __GCONV_ILLEGAL_INPUT;
227}
d6204268
UD
228
229
230/* Structure to represent results of found (or not) transliteration
231 modules. */
232struct known_trans
233{
234 /* This structure must remain the first member. */
235 struct trans_struct info;
236
17427edd 237 char *fname;
d6204268
UD
238 void *handle;
239 int open_count;
240};
241
242
243/* Tree with results of previous calls to __gconv_translit_find. */
244static void *search_tree;
245
246/* We modify global data. */
247__libc_lock_define_initialized (static, lock);
248
249
250/* Compare two transliteration entries. */
251static int
252trans_compare (const void *p1, const void *p2)
253{
17427edd
UD
254 const struct known_trans *s1 = (const struct known_trans *) p1;
255 const struct known_trans *s2 = (const struct known_trans *) p2;
d6204268
UD
256
257 return strcmp (s1->info.name, s2->info.name);
258}
259
260
261/* Open (maybe reopen) the module named in the struct. Get the function
262 and data structure pointers we need. */
263static int
264open_translit (struct known_trans *trans)
265{
266 __gconv_trans_query_fct queryfct;
267
268 trans->handle = __libc_dlopen (trans->fname);
269 if (trans->handle == NULL)
270 /* Not available. */
271 return 1;
272
273 /* Find the required symbol. */
274 queryfct = __libc_dlsym (trans->handle, "gconv_trans_context");
275 if (queryfct == NULL)
276 {
277 /* We cannot live with that. */
278 close_and_out:
279 __libc_dlclose (trans->handle);
280 trans->handle = NULL;
281 return 1;
282 }
283
284 /* Get the context. */
285 if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames)
286 != 0)
287 goto close_and_out;
288
289 /* Of course we also have to have the actual function. */
290 trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans");
291 if (trans->info.trans_fct == NULL)
292 goto close_and_out;
293
294 /* Now the optional functions. */
295 trans->info.trans_init_fct =
296 __libc_dlsym (trans->handle, "gconv_trans_init");
297 trans->info.trans_context_fct =
298 __libc_dlsym (trans->handle, "gconv_trans_context");
299 trans->info.trans_end_fct =
300 __libc_dlsym (trans->handle, "gconv_trans_end");
301
302 trans->open_count = 1;
303
304 return 0;
305}
306
307
308int
309internal_function
310__gconv_translit_find (struct trans_struct *trans)
311{
312 struct known_trans **found;
313 const struct path_elem *runp;
314 int res = 1;
315
316 /* We have to have a name. */
317 assert (trans->name != NULL);
318
319 /* Acquire the lock. */
320 __libc_lock_lock (lock);
321
322 /* See whether we know this module already. */
323 found = __tfind (trans, &search_tree, trans_compare);
324 if (found != NULL)
325 {
326 /* Is this module available? */
327 if ((*found)->handle != NULL)
328 {
329 /* Maybe we have to reopen the file. */
330 if ((*found)->handle != (void *) -1)
331 /* The object is not unloaded. */
332 res = 0;
333 else if (open_translit (*found) == 0)
334 {
335 /* Copy the data. */
336 *trans = (*found)->info;
b79f74cd 337 (*found)->open_count++;
d6204268
UD
338 res = 0;
339 }
340 }
341 }
342 else
343 {
344 size_t name_len = strlen (trans->name) + 1;
345 int need_so = 0;
346 struct known_trans *newp;
347
348 /* We have to continue looking for the module. */
349 if (__gconv_path_elem == NULL)
350 __gconv_get_path ();
351
352 /* See whether we have to append .so. */
b79f74cd 353 if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0)
d6204268
UD
354 need_so = 1;
355
356 /* Create a new entry. */
357 newp = (struct known_trans *) malloc (sizeof (struct known_trans)
358 + (__gconv_max_path_elem_len
359 + name_len + 3)
360 + name_len);
361 if (newp != NULL)
362 {
363 char *cp;
364
365 /* Clear the struct. */
366 memset (newp, '\0', sizeof (struct known_trans));
367
368 /* Store a copy of the module name. */
369 newp->info.name = (char *) (newp + 1);
370 cp = __mempcpy ((char *) newp->info.name, trans->name, name_len);
371
372 newp->fname = cp;
373
b79f74cd 374 /* Search in all the directories. */
d6204268
UD
375 for (runp = __gconv_path_elem; runp->name != NULL; ++runp)
376 {
377 cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name),
378 trans->name, name_len);
379 if (need_so)
380 memcpy (cp, ".so", sizeof (".so"));
381
382 if (open_translit (newp) == 0)
383 {
384 /* We found a module. */
385 res = 0;
386 break;
387 }
388 }
389
b79f74cd
UD
390 if (res)
391 newp->fname = NULL;
392
d6204268
UD
393 /* In any case we'll add the entry to our search tree. */
394 if (__tsearch (newp, &search_tree, trans_compare) == NULL)
395 {
396 /* Yickes, this should not happen. Unload the object. */
397 res = 1;
398 /* XXX unload here. */
399 }
400 }
401 }
402
403 __libc_lock_unlock (lock);
404
405 return res;
406}
This page took 0.091228 seconds and 5 git commands to generate.