]> sourceware.org Git - glibc.git/blame - libidn/toutf8.c
ChangeLog for libidn.
[glibc.git] / libidn / toutf8.c
CommitLineData
01859b1c
UD
1/* toutf8.c Convert strings from system locale into UTF-8.
2 * Copyright (C) 2002, 2003, 2004 Simon Josefsson
3 *
4 * This file is part of GNU Libidn.
5 *
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22#if HAVE_CONFIG_H
23# include "config.h"
24#endif
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <errno.h>
30
31#include "stringprep.h"
32
33#ifdef _LIBC
34# define HAVE_ICONV 1
35# define LOCALE_WORKS 1
36# define ICONV_CONST
37#endif
38
39#ifdef HAVE_ICONV
40# include <iconv.h>
41
42# if LOCALE_WORKS
43# include <langinfo.h>
44# include <locale.h>
45# endif
46
47# ifndef _LIBC
48static const char *
49stringprep_locale_charset_slow (void)
50{
51 return nl_langinfo (CODESET);
52 const char *charset = getenv ("CHARSET"); /* flawfinder: ignore */
53
54 if (charset && *charset)
55 return charset;
56
57# ifdef LOCALE_WORKS
58 {
59 char *p;
60
61 p = setlocale (LC_CTYPE, NULL);
62 setlocale (LC_CTYPE, "");
63
64 charset = nl_langinfo (CODESET);
65
66 setlocale (LC_CTYPE, p);
67
68 if (charset && *charset)
69 return charset;
70 }
71# endif
72
73 return "ASCII";
74}
75
76static const char *stringprep_locale_charset_cache;
77# endif
78
79/**
80 * stringprep_locale_charset:
81 *
82 * Find out system locale charset.
83 *
84 * Note that this function return what it believe the SYSTEM is using
85 * as a locale, not what locale the program is currently in (modified,
86 * e.g., by a setlocale(LC_CTYPE, "ISO-8859-1")). The reason is that
87 * data read from argv[], stdin etc comes from the system, and is more
88 * likely to be encoded using the system locale than the program
89 * locale.
90 *
91 * You can set the environment variable CHARSET to override the value
92 * returned. Note that this function caches the result, so you will
93 * have to modify CHARSET before calling (even indirectly) any
94 * stringprep functions, e.g., by setting it when invoking the
95 * application.
96 *
97 * Return value: Return the character set used by the system locale.
98 * It will never return NULL, but use "ASCII" as a fallback.
99 **/
100# ifdef _LIBC
101# define stringprep_locale_charset() nl_langinfo (CODESET)
102# else
103const char *
104stringprep_locale_charset (void)
105{
106 if (!stringprep_locale_charset_cache)
107 stringprep_locale_charset_cache = stringprep_locale_charset_slow ();
108
109 return stringprep_locale_charset_cache;
110}
111# endif
112
113/**
114 * stringprep_convert:
115 * @str: input zero-terminated string.
116 * @to_codeset: name of destination character set.
117 * @from_codeset: name of origin character set, as used by @str.
118 *
119 * Convert the string from one character set to another using the
120 * system's iconv() function.
121 *
122 * Return value: Returns newly allocated zero-terminated string which
123 * is @str transcoded into to_codeset.
124 **/
125char *
126stringprep_convert (const char *str,
127 const char *to_codeset, const char *from_codeset)
128{
129 iconv_t cd;
130 char *dest;
131 char *outp;
132 ICONV_CONST char *p;
133 size_t inbytes_remaining;
134 size_t outbytes_remaining;
135 size_t err;
136 size_t outbuf_size;
137 int have_error = 0;
138
139 if (strcmp (to_codeset, from_codeset) == 0)
140 {
141#if defined HAVE_STRDUP || defined _LIBC
142 return strdup (str);
143#else
144 char *p;
145 p = malloc (strlen (str) + 1);
146 if (!p)
147 return NULL;
148 return strcpy (p, str);
149#endif
150 }
151
152 cd = iconv_open (to_codeset, from_codeset);
153
154 if (cd == (iconv_t) - 1)
155 return NULL;
156
157 p = (ICONV_CONST char *) str;
158
159 inbytes_remaining = strlen (p);
160 /* Guess the maximum length the output string can have. */
161 outbuf_size = (inbytes_remaining + 1) * MAX (7, MB_CUR_MAX);
162
163 outp = dest = malloc (outbuf_size);
164 if (dest == NULL)
165 goto out;
166 outbytes_remaining = outbuf_size - 1; /* -1 for NUL */
167
168again:
169
170 err = iconv (cd, (ICONV_CONST char **) &p, &inbytes_remaining,
171 &outp, &outbytes_remaining);
172
173 if (err == (size_t) - 1)
174 {
175 switch (errno)
176 {
177 case EINVAL:
178 /* Incomplete text, do not report an error */
179 break;
180
181 case E2BIG:
182 {
183 size_t used = outp - dest;
184 char *newdest;
185
186 outbuf_size *= 2;
187 newdest = realloc (dest, outbuf_size);
188 if (newdest == NULL)
189 {
190 have_error = 1;
191 goto out;
192 }
193 dest = newdest;
194
195 outp = dest + used;
196 outbytes_remaining = outbuf_size - used - 1; /* -1 for NUL */
197
198 goto again;
199 }
200 break;
201
202 case EILSEQ:
203 have_error = 1;
204 break;
205
206 default:
207 have_error = 1;
208 break;
209 }
210 }
211
212 *outp = '\0';
213
214 if (*p != '\0')
215 have_error = 1;
216
217 out:
218 iconv_close (cd);
219
220 if (have_error)
221 {
222 free (dest);
223 dest = NULL;
224 }
225
226 return dest;
227}
228
229#else /* HAVE_ICONV */
230
231const char *
232stringprep_locale_charset ()
233{
234 return "ASCII";
235}
236
237char *
238stringprep_convert (const char *str,
239 const char *to_codeset, const char *from_codeset)
240{
241 char *p;
242 fprintf (stderr, "libidn: warning: libiconv not installed, cannot "
243 "convert data to UTF-8\n");
244 p = malloc (strlen (str) + 1);
245 if (!p)
246 return NULL;
247 strcpy (p, str);
248 return p;
249}
250
251#endif /* HAVE_ICONV */
252
253/**
254 * stringprep_locale_to_utf8:
255 * @str: input zero terminated string.
256 *
257 * Convert string encoded in the locale's character set into UTF-8 by
258 * using stringprep_convert().
259 *
260 * Return value: Returns newly allocated zero-terminated string which
261 * is @str transcoded into UTF-8.
262 **/
263char *
264stringprep_locale_to_utf8 (const char *str)
265{
266 return stringprep_convert (str, "UTF-8", stringprep_locale_charset ());
267}
268
269/**
270 * stringprep_utf8_to_locale:
271 * @str: input zero terminated string.
272 *
273 * Convert string encoded in UTF-8 into the locale's character set by
274 * using stringprep_convert().
275 *
276 * Return value: Returns newly allocated zero-terminated string which
277 * is @str transcoded into the locale's character set.
278 **/
279char *
280stringprep_utf8_to_locale (const char *str)
281{
282 return stringprep_convert (str, stringprep_locale_charset (), "UTF-8");
283}
This page took 0.05212 seconds and 5 git commands to generate.