]> sourceware.org Git - glibc.git/blame - iconv/gconv_simple.c
Update.
[glibc.git] / iconv / gconv_simple.c
CommitLineData
6973fc01 1/* Simple transformations functions.
bd32e4a6 2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
6973fc01
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
f1fa8b68
UD
21#include <byteswap.h>
22#include <endian.h>
f4017d20 23#include <errno.h>
6973fc01 24#include <gconv.h>
d2374599 25#include <stdint.h>
6973fc01
UD
26#include <stdlib.h>
27#include <string.h>
28#include <wchar.h>
29#include <sys/param.h>
30
a904b5d9
UD
31#ifndef EILSEQ
32# define EILSEQ EINVAL
33#endif
34
35
36/* These are definitions used by some of the functions for handling
37 UTF-8 encoding below. */
8619129f 38static const uint32_t encoding_mask[] =
a904b5d9
UD
39{
40 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
41};
42
43static const unsigned char encoding_byte[] =
44{
45 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
46};
47
48
f1fa8b68
UD
49/* Transform from the internal, UCS4-like format, to UCS4. The
50 difference between the internal ucs4 format and the real UCS4
51 format is, if any, the endianess. The Unicode/ISO 10646 says that
52 unless some higher protocol specifies it differently, the byte
53 order is big endian.*/
8619129f
UD
54#define DEFINE_INIT 0
55#define DEFINE_FINI 0
56#define MIN_NEEDED_FROM 4
57#define MIN_NEEDED_TO 4
58#define FROM_DIRECTION 1
59#define FROM_LOOP internal_ucs4_loop
60#define TO_LOOP internal_ucs4_loop /* This is not used. */
61#define FUNCTION_NAME __gconv_transform_internal_ucs4
62
63
64static inline int
65internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
66 unsigned char **outptrp, unsigned char *outend,
67 mbstate_t *state, void *data, size_t *converted)
4bca4c17 68{
8619129f
UD
69 const unsigned char *inptr = *inptrp;
70 unsigned char *outptr = *outptrp;
71 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
4bca4c17
UD
72 int result;
73
f1fa8b68 74#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f
UD
75 /* Sigh, we have to do some real work. */
76 size_t cnt;
f1fa8b68 77
fdf19bf7
UD
78 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
79 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
f1fa8b68 80
8619129f
UD
81 *inptrp = inptr;
82 *outptrp = outptr;
f1fa8b68 83#elif __BYTE_ORDER == __BIG_ENDIAN
8619129f
UD
84 /* Simply copy the data. */
85 *inptrp = inptr + n_convert * 4;
86 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
f1fa8b68
UD
87#else
88# error "This endianess is not supported."
89#endif
90
8619129f
UD
91 /* Determine the status. */
92 if (*outptrp == outend)
d64b6ad0 93 result = __GCONV_FULL_OUTPUT;
8619129f 94 else if (*inptrp == inend)
d64b6ad0 95 result = __GCONV_EMPTY_INPUT;
6973fc01 96 else
d64b6ad0 97 result = __GCONV_INCOMPLETE_INPUT;
6973fc01 98
8619129f
UD
99 if (converted != NULL)
100 converted += n_convert;
6973fc01 101
f43ce637 102 return result;
6973fc01 103}
d2374599 104
8619129f 105#include <iconv/skeleton.c>
d2374599 106
d2374599 107
8619129f
UD
108/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
109#define DEFINE_INIT 0
110#define DEFINE_FINI 0
111#define MIN_NEEDED_FROM 1
112#define MIN_NEEDED_TO 4
113#define FROM_DIRECTION 1
114#define FROM_LOOP ascii_internal_loop
115#define TO_LOOP ascii_internal_loop /* This is not used. */
116#define FUNCTION_NAME __gconv_transform_ascii_internal
117
118#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
119#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
120#define LOOPFCT FROM_LOOP
121#define BODY \
122 { \
123 if (*inptr > '\x7f') \
124 { \
125 /* This is no correct ANSI_X3.4-1968 character. */ \
d64b6ad0 126 result = __GCONV_ILLEGAL_INPUT; \
8619129f
UD
127 break; \
128 } \
129 \
130 /* It's an one byte sequence. */ \
131 *((uint32_t *) outptr)++ = *inptr++; \
132 }
133#include <iconv/loop.c>
134#include <iconv/skeleton.c>
135
136
137/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
138#define DEFINE_INIT 0
139#define DEFINE_FINI 0
140#define MIN_NEEDED_FROM 4
141#define MIN_NEEDED_TO 1
142#define FROM_DIRECTION 1
143#define FROM_LOOP internal_ascii_loop
144#define TO_LOOP internal_ascii_loop /* This is not used. */
145#define FUNCTION_NAME __gconv_transform_internal_ascii
146
147#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
148#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
149#define LOOPFCT FROM_LOOP
150#define BODY \
151 { \
5aa8ff62 152 if (*((uint32_t *) inptr) > 0x7f) \
8619129f
UD
153 { \
154 /* This is no correct ANSI_X3.4-1968 character. */ \
d64b6ad0 155 result = __GCONV_ILLEGAL_INPUT; \
8619129f
UD
156 break; \
157 } \
158 \
159 /* It's an one byte sequence. */ \
160 *outptr++ = *((uint32_t *) inptr)++; \
161 }
162#include <iconv/loop.c>
163#include <iconv/skeleton.c>
164
165
166/* Convert from the internal (UCS4-like) format to UTF-8. */
167#define DEFINE_INIT 0
168#define DEFINE_FINI 0
169#define MIN_NEEDED_FROM 4
170#define MIN_NEEDED_TO 1
171#define MAX_NEEDED_TO 6
172#define FROM_DIRECTION 1
173#define FROM_LOOP internal_utf8_loop
174#define TO_LOOP internal_utf8_loop /* This is not used. */
175#define FUNCTION_NAME __gconv_transform_internal_utf8
176
177#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
178#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
5aa8ff62 179#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
8619129f
UD
180#define LOOPFCT FROM_LOOP
181#define BODY \
182 { \
183 uint32_t wc = *((uint32_t *) inptr); \
184 \
185 /* Since we control every character we read this cannot happen. */ \
186 assert (wc <= 0x7fffffff); \
187 \
188 if (wc < 0x80) \
189 /* It's an one byte sequence. */ \
190 *outptr++ = (unsigned char) wc; \
191 else \
192 { \
193 size_t step; \
194 char *start; \
195 \
196 for (step = 2; step < 6; ++step) \
197 if ((wc & encoding_mask[step - 2]) == 0) \
198 break; \
199 \
200 if (outptr + step >= outend) \
201 { \
202 /* Too long. */ \
d64b6ad0 203 result = __GCONV_FULL_OUTPUT; \
8619129f
UD
204 break; \
205 } \
206 \
207 start = outptr; \
208 *outptr = encoding_byte[step - 2]; \
209 outptr += step; \
210 --step; \
211 do \
212 { \
213 start[step] = 0x80 | (wc & 0x3f); \
214 wc >>= 6; \
215 } \
216 while (--step > 0); \
217 start[0] |= wc; \
218 } \
219 \
220 inptr += 4; \
221 }
222#include <iconv/loop.c>
223#include <iconv/skeleton.c>
224
225
226/* Convert from UTF-8 to the internal (UCS4-like) format. */
227#define DEFINE_INIT 0
228#define DEFINE_FINI 0
229#define MIN_NEEDED_FROM 1
230#define MAX_NEEDED_FROM 6
231#define MIN_NEEDED_TO 4
232#define FROM_DIRECTION 1
233#define FROM_LOOP utf8_internal_loop
234#define TO_LOOP utf8_internal_loop /* This is not used. */
235#define FUNCTION_NAME __gconv_transform_utf8_internal
236
237#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
5aa8ff62 238#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
8619129f
UD
239#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
240#define LOOPFCT FROM_LOOP
241#define BODY \
242 { \
243 uint32_t ch; \
244 uint_fast32_t cnt; \
245 uint_fast32_t i; \
246 \
247 /* Next input byte. */ \
248 ch = *inptr; \
249 \
250 if (ch < 0x80) \
8619129f 251 { \
5aa8ff62
UD
252 /* One byte sequence. */ \
253 cnt = 1; \
254 ++inptr; \
8619129f
UD
255 } \
256 else \
257 { \
bd32e4a6 258 if (ch >= 0xc2 && ch < 0xe0) \
5aa8ff62 259 { \
bd32e4a6
UD
260 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
261 otherwise the wide character could have been represented \
262 using a single byte. */ \
5aa8ff62
UD
263 cnt = 2; \
264 ch &= 0x1f; \
265 } \
266 else if ((ch & 0xf0) == 0xe0) \
267 { \
268 /* We expect three bytes. */ \
269 cnt = 3; \
270 ch &= 0x0f; \
271 } \
272 else if ((ch & 0xf8) == 0xf0) \
273 { \
274 /* We expect four bytes. */ \
275 cnt = 4; \
276 ch &= 0x07; \
277 } \
278 else if ((ch & 0xfc) == 0xf8) \
279 { \
280 /* We expect five bytes. */ \
281 cnt = 5; \
282 ch &= 0x03; \
283 } \
284 else if ((ch & 0xfe) == 0xfc) \
285 { \
286 /* We expect six bytes. */ \
287 cnt = 6; \
288 ch &= 0x01; \
289 } \
290 else \
8619129f
UD
291 { \
292 /* This is an illegal encoding. */ \
d64b6ad0 293 result = __GCONV_ILLEGAL_INPUT; \
8619129f
UD
294 break; \
295 } \
296 \
5aa8ff62
UD
297 if (NEED_LENGTH_TEST && inptr + cnt > inend) \
298 { \
299 /* We don't have enough input. */ \
d64b6ad0 300 result = __GCONV_INCOMPLETE_INPUT; \
5aa8ff62
UD
301 break; \
302 } \
303 \
304 /* Read the possible remaining bytes. */ \
305 for (i = 1; i < cnt; ++i) \
306 { \
307 uint32_t byte = inptr[i]; \
308 \
309 if ((byte & 0xc0) != 0x80) \
bd32e4a6
UD
310 /* This is an illegal encoding. */ \
311 break; \
5aa8ff62
UD
312 \
313 ch <<= 6; \
314 ch |= byte & 0x3f; \
315 } \
bd32e4a6
UD
316 \
317 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
318 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
319 have been represented with fewer than cnt bytes. */ \
320 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
321 { \
322 /* This is an illegal encoding. */ \
786731fc 323 result = __GCONV_ILLEGAL_INPUT; \
bd32e4a6
UD
324 break; \
325 } \
326 \
5aa8ff62 327 inptr += cnt; \
8619129f
UD
328 } \
329 \
330 /* Now adjust the pointers and store the result. */ \
8619129f
UD
331 *((uint32_t *) outptr)++ = ch; \
332 }
333#include <iconv/loop.c>
334#include <iconv/skeleton.c>
335
336
337/* Convert from UCS2 to the internal (UCS4-like) format. */
338#define DEFINE_INIT 0
339#define DEFINE_FINI 0
340#define MIN_NEEDED_FROM 2
341#define MIN_NEEDED_TO 4
342#define FROM_DIRECTION 1
343#define FROM_LOOP ucs2_internal_loop
344#define TO_LOOP ucs2_internal_loop /* This is not used. */
345#define FUNCTION_NAME __gconv_transform_ucs2_internal
346
347#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
348#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
349#define LOOPFCT FROM_LOOP
f1fa8b68 350#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f 351# define BODY \
fdf19bf7
UD
352 *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
353 inptr += 2;
f1fa8b68 354#else
8619129f
UD
355# define BODY \
356 *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
f1fa8b68 357#endif
8619129f
UD
358#include <iconv/loop.c>
359#include <iconv/skeleton.c>
360
361
362/* Convert from the internal (UCS4-like) format to UCS2. */
363#define DEFINE_INIT 0
364#define DEFINE_FINI 0
365#define MIN_NEEDED_FROM 4
366#define MIN_NEEDED_TO 2
367#define FROM_DIRECTION 1
368#define FROM_LOOP internal_ucs2_loop
369#define TO_LOOP internal_ucs2_loop /* This is not used. */
370#define FUNCTION_NAME __gconv_transform_internal_ucs2
371
372#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
373#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
374#define LOOPFCT FROM_LOOP
f1fa8b68 375#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f
UD
376# define BODY \
377 { \
378 if (*((uint32_t *) inptr) >= 0x10000) \
379 { \
d64b6ad0 380 result = __GCONV_ILLEGAL_INPUT; \
8619129f
UD
381 break; \
382 } \
383 /* Please note that we use the `uint32_t' from-pointer as an `uint16_t' \
384 pointer which works since we are on a little endian machine. */ \
385 *((uint16_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr)); \
386 inptr += 4; \
387 }
f1fa8b68 388#else
8619129f
UD
389# define BODY \
390 { \
391 if (*((uint32_t *) inptr) >= 0x10000) \
392 { \
d64b6ad0 393 result = __GCONV_ILLEGAL_INPUT; \
8619129f
UD
394 break; \
395 } \
396 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
397 }
f1fa8b68 398#endif
8619129f
UD
399#include <iconv/loop.c>
400#include <iconv/skeleton.c>
9b26f5c4
UD
401
402
403/* Convert from UCS2 in little endian to the internal (UCS4-like) format. */
404#define DEFINE_INIT 0
405#define DEFINE_FINI 0
406#define MIN_NEEDED_FROM 2
407#define MIN_NEEDED_TO 4
408#define FROM_DIRECTION 1
409#define FROM_LOOP ucs2little_internal_loop
410#define TO_LOOP ucs2little_internal_loop /* This is not used.*/
411#define FUNCTION_NAME __gconv_transform_ucs2little_internal
412
413#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
414#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
415#define LOOPFCT FROM_LOOP
416#if __BYTE_ORDER == __LITTLE_ENDIAN
417# define BODY \
418 *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
419#else
420# define BODY \
fdf19bf7
UD
421 *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
422 inptr += 2;
9b26f5c4
UD
423#endif
424#include <iconv/loop.c>
425#include <iconv/skeleton.c>
426
427
428/* Convert from the internal (UCS4-like) format to UCS2 in little endian. */
429#define DEFINE_INIT 0
430#define DEFINE_FINI 0
431#define MIN_NEEDED_FROM 4
432#define MIN_NEEDED_TO 2
433#define FROM_DIRECTION 1
434#define FROM_LOOP internal_ucs2little_loop
435#define TO_LOOP internal_ucs2little_loop /* This is not used.*/
436#define FUNCTION_NAME __gconv_transform_internal_ucs2little
437
438#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
439#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
440#define LOOPFCT FROM_LOOP
441#if __BYTE_ORDER == __LITTLE_ENDIAN
442# define BODY \
443 { \
444 if (*((uint32_t *) inptr) >= 0x10000) \
445 { \
d64b6ad0 446 result = __GCONV_ILLEGAL_INPUT; \
9b26f5c4
UD
447 break; \
448 } \
449 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
450 }
451#else
452# define BODY \
453 { \
454 if (*((uint32_t *) inptr) >= 0x10000) \
455 { \
d64b6ad0 456 result = __GCONV_ILLEGAL_INPUT; \
9b26f5c4
UD
457 break; \
458 } \
fdf19bf7 459 *((uint16_t *) outptr)++ = bswap_16 (((uint16_t *) inptr)[1]); \
9b26f5c4
UD
460 inptr += 4; \
461 }
462#endif
463#include <iconv/loop.c>
464#include <iconv/skeleton.c>
This page took 0.119688 seconds and 5 git commands to generate.