]> sourceware.org Git - glibc.git/blob - libidn/toutf8.c
d16efdb5b95c093406ac96dc58a4751096dc8403
[glibc.git] / libidn / toutf8.c
1 /* toutf8.c Convert strings from system locale into UTF-8.
2 * Copyright (C) 2002, 2003, 2004 Simon Josefsson
3 *
4 * This file is part of GNU Libidn.
5 *
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22 #if HAVE_CONFIG_H
23 # include "config.h"
24 #endif
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <errno.h>
30 #include <sys/param.h>
31
32 #include "stringprep.h"
33
34 #ifdef _LIBC
35 # define HAVE_ICONV 1
36 # define LOCALE_WORKS 1
37 # define ICONV_CONST
38 #endif
39
40 #ifdef HAVE_ICONV
41 # include <iconv.h>
42
43 # if LOCALE_WORKS
44 # include <langinfo.h>
45 # include <locale.h>
46 # endif
47
48 # ifndef _LIBC
49 static const char *
50 stringprep_locale_charset_slow (void)
51 {
52 return nl_langinfo (CODESET);
53 const char *charset = getenv ("CHARSET"); /* flawfinder: ignore */
54
55 if (charset && *charset)
56 return charset;
57
58 # ifdef LOCALE_WORKS
59 {
60 char *p;
61
62 p = setlocale (LC_CTYPE, NULL);
63 setlocale (LC_CTYPE, "");
64
65 charset = nl_langinfo (CODESET);
66
67 setlocale (LC_CTYPE, p);
68
69 if (charset && *charset)
70 return charset;
71 }
72 # endif
73
74 return "ASCII";
75 }
76
77 static const char *stringprep_locale_charset_cache;
78 # endif
79
80 /**
81 * stringprep_locale_charset:
82 *
83 * Find out system locale charset.
84 *
85 * Note that this function return what it believe the SYSTEM is using
86 * as a locale, not what locale the program is currently in (modified,
87 * e.g., by a setlocale(LC_CTYPE, "ISO-8859-1")). The reason is that
88 * data read from argv[], stdin etc comes from the system, and is more
89 * likely to be encoded using the system locale than the program
90 * locale.
91 *
92 * You can set the environment variable CHARSET to override the value
93 * returned. Note that this function caches the result, so you will
94 * have to modify CHARSET before calling (even indirectly) any
95 * stringprep functions, e.g., by setting it when invoking the
96 * application.
97 *
98 * Return value: Return the character set used by the system locale.
99 * It will never return NULL, but use "ASCII" as a fallback.
100 **/
101 # ifdef _LIBC
102 # define stringprep_locale_charset() nl_langinfo (CODESET)
103 # else
104 const char *
105 stringprep_locale_charset (void)
106 {
107 if (!stringprep_locale_charset_cache)
108 stringprep_locale_charset_cache = stringprep_locale_charset_slow ();
109
110 return stringprep_locale_charset_cache;
111 }
112 # endif
113
114 /**
115 * stringprep_convert:
116 * @str: input zero-terminated string.
117 * @to_codeset: name of destination character set.
118 * @from_codeset: name of origin character set, as used by @str.
119 *
120 * Convert the string from one character set to another using the
121 * system's iconv() function.
122 *
123 * Return value: Returns newly allocated zero-terminated string which
124 * is @str transcoded into to_codeset.
125 **/
126 char *
127 stringprep_convert (const char *str,
128 const char *to_codeset, const char *from_codeset)
129 {
130 iconv_t cd;
131 char *dest;
132 char *outp;
133 ICONV_CONST char *p;
134 size_t inbytes_remaining;
135 size_t outbytes_remaining;
136 size_t err;
137 size_t outbuf_size;
138 int have_error = 0;
139
140 if (strcmp (to_codeset, from_codeset) == 0)
141 {
142 #if defined HAVE_STRDUP || defined _LIBC
143 return strdup (str);
144 #else
145 char *p;
146 p = malloc (strlen (str) + 1);
147 if (!p)
148 return NULL;
149 return strcpy (p, str);
150 #endif
151 }
152
153 cd = iconv_open (to_codeset, from_codeset);
154
155 if (cd == (iconv_t) - 1)
156 return NULL;
157
158 p = (ICONV_CONST char *) str;
159
160 inbytes_remaining = strlen (p);
161 /* Guess the maximum length the output string can have. */
162 outbuf_size = (inbytes_remaining + 1) * MAX (7, MB_CUR_MAX);
163
164 outp = dest = malloc (outbuf_size);
165 if (dest == NULL)
166 goto out;
167 outbytes_remaining = outbuf_size - 1; /* -1 for NUL */
168
169 again:
170
171 err = iconv (cd, (ICONV_CONST char **) &p, &inbytes_remaining,
172 &outp, &outbytes_remaining);
173
174 if (err == (size_t) - 1)
175 {
176 switch (errno)
177 {
178 case EINVAL:
179 /* Incomplete text, do not report an error */
180 break;
181
182 case E2BIG:
183 {
184 size_t used = outp - dest;
185 char *newdest;
186
187 outbuf_size *= 2;
188 newdest = realloc (dest, outbuf_size);
189 if (newdest == NULL)
190 {
191 have_error = 1;
192 goto out;
193 }
194 dest = newdest;
195
196 outp = dest + used;
197 outbytes_remaining = outbuf_size - used - 1; /* -1 for NUL */
198
199 goto again;
200 }
201 break;
202
203 case EILSEQ:
204 have_error = 1;
205 break;
206
207 default:
208 have_error = 1;
209 break;
210 }
211 }
212
213 *outp = '\0';
214
215 if (*p != '\0')
216 have_error = 1;
217
218 out:
219 iconv_close (cd);
220
221 if (have_error)
222 {
223 free (dest);
224 dest = NULL;
225 }
226
227 return dest;
228 }
229
230 #else /* HAVE_ICONV */
231
232 const char *
233 stringprep_locale_charset ()
234 {
235 return "ASCII";
236 }
237
238 char *
239 stringprep_convert (const char *str,
240 const char *to_codeset, const char *from_codeset)
241 {
242 char *p;
243 fprintf (stderr, "libidn: warning: libiconv not installed, cannot "
244 "convert data to UTF-8\n");
245 p = malloc (strlen (str) + 1);
246 if (!p)
247 return NULL;
248 strcpy (p, str);
249 return p;
250 }
251
252 #endif /* HAVE_ICONV */
253
254 /**
255 * stringprep_locale_to_utf8:
256 * @str: input zero terminated string.
257 *
258 * Convert string encoded in the locale's character set into UTF-8 by
259 * using stringprep_convert().
260 *
261 * Return value: Returns newly allocated zero-terminated string which
262 * is @str transcoded into UTF-8.
263 **/
264 char *
265 stringprep_locale_to_utf8 (const char *str)
266 {
267 return stringprep_convert (str, "UTF-8", stringprep_locale_charset ());
268 }
269
270 /**
271 * stringprep_utf8_to_locale:
272 * @str: input zero terminated string.
273 *
274 * Convert string encoded in UTF-8 into the locale's character set by
275 * using stringprep_convert().
276 *
277 * Return value: Returns newly allocated zero-terminated string which
278 * is @str transcoded into the locale's character set.
279 **/
280 char *
281 stringprep_utf8_to_locale (const char *str)
282 {
283 return stringprep_convert (str, stringprep_locale_charset (), "UTF-8");
284 }
This page took 0.047815 seconds and 5 git commands to generate.