]> sourceware.org Git - glibc.git/blob - libidn/toutf8.c
450cb6db50d1b1fdac7c959993f91c12900b65a7
[glibc.git] / libidn / toutf8.c
1 /* toutf8.c Convert strings from system locale into UTF-8.
2 * Copyright (C) 2002, 2003, 2004 Simon Josefsson
3 *
4 * This file is part of GNU Libidn.
5 *
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22 #if HAVE_CONFIG_H
23 # include "config.h"
24 #endif
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <errno.h>
30
31 #include "stringprep.h"
32
33 #ifdef _LIBC
34 # define HAVE_ICONV 1
35 # define LOCALE_WORKS 1
36 # define ICONV_CONST
37 #endif
38
39 #ifdef HAVE_ICONV
40 # include <iconv.h>
41
42 # if LOCALE_WORKS
43 # include <langinfo.h>
44 # include <locale.h>
45 # endif
46
47 # ifndef _LIBC
48 static const char *
49 stringprep_locale_charset_slow (void)
50 {
51 return nl_langinfo (CODESET);
52 const char *charset = getenv ("CHARSET"); /* flawfinder: ignore */
53
54 if (charset && *charset)
55 return charset;
56
57 # ifdef LOCALE_WORKS
58 {
59 char *p;
60
61 p = setlocale (LC_CTYPE, NULL);
62 setlocale (LC_CTYPE, "");
63
64 charset = nl_langinfo (CODESET);
65
66 setlocale (LC_CTYPE, p);
67
68 if (charset && *charset)
69 return charset;
70 }
71 # endif
72
73 return "ASCII";
74 }
75
76 static const char *stringprep_locale_charset_cache;
77 # endif
78
79 /**
80 * stringprep_locale_charset:
81 *
82 * Find out system locale charset.
83 *
84 * Note that this function return what it believe the SYSTEM is using
85 * as a locale, not what locale the program is currently in (modified,
86 * e.g., by a setlocale(LC_CTYPE, "ISO-8859-1")). The reason is that
87 * data read from argv[], stdin etc comes from the system, and is more
88 * likely to be encoded using the system locale than the program
89 * locale.
90 *
91 * You can set the environment variable CHARSET to override the value
92 * returned. Note that this function caches the result, so you will
93 * have to modify CHARSET before calling (even indirectly) any
94 * stringprep functions, e.g., by setting it when invoking the
95 * application.
96 *
97 * Return value: Return the character set used by the system locale.
98 * It will never return NULL, but use "ASCII" as a fallback.
99 **/
100 # ifdef _LIBC
101 # define stringprep_locale_charset() nl_langinfo (CODESET)
102 # else
103 const char *
104 stringprep_locale_charset (void)
105 {
106 if (!stringprep_locale_charset_cache)
107 stringprep_locale_charset_cache = stringprep_locale_charset_slow ();
108
109 return stringprep_locale_charset_cache;
110 }
111 # endif
112
113 /**
114 * stringprep_convert:
115 * @str: input zero-terminated string.
116 * @to_codeset: name of destination character set.
117 * @from_codeset: name of origin character set, as used by @str.
118 *
119 * Convert the string from one character set to another using the
120 * system's iconv() function.
121 *
122 * Return value: Returns newly allocated zero-terminated string which
123 * is @str transcoded into to_codeset.
124 **/
125 char *
126 stringprep_convert (const char *str,
127 const char *to_codeset, const char *from_codeset)
128 {
129 iconv_t cd;
130 char *dest;
131 char *outp;
132 ICONV_CONST char *p;
133 size_t inbytes_remaining;
134 size_t outbytes_remaining;
135 size_t err;
136 size_t outbuf_size;
137 int have_error = 0;
138
139 if (strcmp (to_codeset, from_codeset) == 0)
140 {
141 #if defined HAVE_STRDUP || defined _LIBC
142 return strdup (str);
143 #else
144 char *p;
145 p = malloc (strlen (str) + 1);
146 if (!p)
147 return NULL;
148 return strcpy (p, str);
149 #endif
150 }
151
152 cd = iconv_open (to_codeset, from_codeset);
153
154 if (cd == (iconv_t) - 1)
155 return NULL;
156
157 p = (ICONV_CONST char *) str;
158
159 inbytes_remaining = strlen (p);
160 /* Guess the maximum length the output string can have. */
161 outbuf_size = (inbytes_remaining + 1) * MAX (7, MB_CUR_MAX);
162
163 outp = dest = malloc (outbuf_size);
164 if (dest == NULL)
165 goto out;
166 outbytes_remaining = outbuf_size - 1; /* -1 for NUL */
167
168 again:
169
170 err = iconv (cd, (ICONV_CONST char **) &p, &inbytes_remaining,
171 &outp, &outbytes_remaining);
172
173 if (err == (size_t) - 1)
174 {
175 switch (errno)
176 {
177 case EINVAL:
178 /* Incomplete text, do not report an error */
179 break;
180
181 case E2BIG:
182 {
183 size_t used = outp - dest;
184 char *newdest;
185
186 outbuf_size *= 2;
187 newdest = realloc (dest, outbuf_size);
188 if (newdest == NULL)
189 {
190 have_error = 1;
191 goto out;
192 }
193 dest = newdest;
194
195 outp = dest + used;
196 outbytes_remaining = outbuf_size - used - 1; /* -1 for NUL */
197
198 goto again;
199 }
200 break;
201
202 case EILSEQ:
203 have_error = 1;
204 break;
205
206 default:
207 have_error = 1;
208 break;
209 }
210 }
211
212 *outp = '\0';
213
214 if (*p != '\0')
215 have_error = 1;
216
217 out:
218 iconv_close (cd);
219
220 if (have_error)
221 {
222 free (dest);
223 dest = NULL;
224 }
225
226 return dest;
227 }
228
229 #else /* HAVE_ICONV */
230
231 const char *
232 stringprep_locale_charset ()
233 {
234 return "ASCII";
235 }
236
237 char *
238 stringprep_convert (const char *str,
239 const char *to_codeset, const char *from_codeset)
240 {
241 char *p;
242 fprintf (stderr, "libidn: warning: libiconv not installed, cannot "
243 "convert data to UTF-8\n");
244 p = malloc (strlen (str) + 1);
245 if (!p)
246 return NULL;
247 strcpy (p, str);
248 return p;
249 }
250
251 #endif /* HAVE_ICONV */
252
253 /**
254 * stringprep_locale_to_utf8:
255 * @str: input zero terminated string.
256 *
257 * Convert string encoded in the locale's character set into UTF-8 by
258 * using stringprep_convert().
259 *
260 * Return value: Returns newly allocated zero-terminated string which
261 * is @str transcoded into UTF-8.
262 **/
263 char *
264 stringprep_locale_to_utf8 (const char *str)
265 {
266 return stringprep_convert (str, "UTF-8", stringprep_locale_charset ());
267 }
268
269 /**
270 * stringprep_utf8_to_locale:
271 * @str: input zero terminated string.
272 *
273 * Convert string encoded in UTF-8 into the locale's character set by
274 * using stringprep_convert().
275 *
276 * Return value: Returns newly allocated zero-terminated string which
277 * is @str transcoded into the locale's character set.
278 **/
279 char *
280 stringprep_utf8_to_locale (const char *str)
281 {
282 return stringprep_convert (str, stringprep_locale_charset (), "UTF-8");
283 }
This page took 0.043872 seconds and 5 git commands to generate.