]> sourceware.org Git - glibc.git/blame - iconv/gconv_simple.c
Report error if setaffinity wrapper fails (Bug 32040)
[glibc.git] / iconv / gconv_simple.c
CommitLineData
6973fc01 1/* Simple transformations functions.
dff8da6b 2 Copyright (C) 1997-2024 Free Software Foundation, Inc.
6973fc01 3 This file is part of the GNU C Library.
6973fc01
UD
4
5 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
6973fc01
UD
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 13 Lesser General Public License for more details.
6973fc01 14
41bdb6e2 15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
6973fc01 18
f1fa8b68 19#include <byteswap.h>
55985355 20#include <dlfcn.h>
f1fa8b68 21#include <endian.h>
f4017d20 22#include <errno.h>
6973fc01 23#include <gconv.h>
d2374599 24#include <stdint.h>
6973fc01
UD
25#include <stdlib.h>
26#include <string.h>
27#include <wchar.h>
28#include <sys/param.h>
f9ad060c 29#include <gconv_int.h>
6973fc01 30
17427edd 31#define BUILTIN_ALIAS(s1, s2) /* nothing */
f9ad060c
UD
32#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
33 MinF, MaxF, MinT, MaxT) \
17427edd 34 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
a784e502 35 const unsigned char **, const unsigned char *, \
17427edd
UD
36 unsigned char **, size_t *, int, int);
37#include "gconv_builtin.h"
38
39
a904b5d9
UD
40#ifndef EILSEQ
41# define EILSEQ EINVAL
42#endif
43
44
f9ad060c
UD
45/* Specialized conversion function for a single byte to INTERNAL, recognizing
46 only ASCII characters. */
47wint_t
48__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
49{
50 if (c < 0x80)
51 return c;
52 else
53 return WEOF;
54}
55
56
f1fa8b68
UD
57/* Transform from the internal, UCS4-like format, to UCS4. The
58 difference between the internal ucs4 format and the real UCS4
f58a8c1c 59 format is, if any, the endianness. The Unicode/ISO 10646 says that
f1fa8b68
UD
60 unless some higher protocol specifies it differently, the byte
61 order is big endian.*/
8619129f
UD
62#define DEFINE_INIT 0
63#define DEFINE_FINI 0
64#define MIN_NEEDED_FROM 4
65#define MIN_NEEDED_TO 4
66#define FROM_DIRECTION 1
67#define FROM_LOOP internal_ucs4_loop
68#define TO_LOOP internal_ucs4_loop /* This is not used. */
69#define FUNCTION_NAME __gconv_transform_internal_ucs4
0cdddc25 70#define ONE_DIRECTION 0
8619129f
UD
71
72
73static inline int
dd9423a6 74__attribute ((always_inline))
55985355
UD
75internal_ucs4_loop (struct __gconv_step *step,
76 struct __gconv_step_data *step_data,
77 const unsigned char **inptrp, const unsigned char *inend,
4802be92 78 unsigned char **outptrp, const unsigned char *outend,
38677ace 79 size_t *irreversible)
4bca4c17 80{
8619129f
UD
81 const unsigned char *inptr = *inptrp;
82 unsigned char *outptr = *outptrp;
83 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
4bca4c17
UD
84 int result;
85
f1fa8b68 86#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f
UD
87 /* Sigh, we have to do some real work. */
88 size_t cnt;
c1db8b0d
UD
89
90 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
91 {
3e20ddad
AZ
92 uint32_t val = get32 (inptr);
93 put32 (outptr, __builtin_bswap32 (val));
c1db8b0d
UD
94 }
95
96 *inptrp = inptr;
97 *outptrp = outptr;
3e20ddad 98#elif __BYTE_ORDER == __BIG_ENDIAN
c1db8b0d
UD
99 /* Simply copy the data. */
100 *inptrp = inptr + n_convert * 4;
101 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
3e20ddad 102#else
f58a8c1c 103# error "This endianness is not supported."
3e20ddad 104#endif
c1db8b0d
UD
105
106 /* Determine the status. */
eacde9d0 107 if (*inptrp == inend)
c1db8b0d 108 result = __GCONV_EMPTY_INPUT;
eacde9d0
UD
109 else if (*outptrp + 4 > outend)
110 result = __GCONV_FULL_OUTPUT;
c1db8b0d
UD
111 else
112 result = __GCONV_INCOMPLETE_INPUT;
113
114 return result;
115}
c1db8b0d 116
fd1b5c0f
UD
117
118static inline int
dd9423a6 119__attribute ((always_inline))
55985355
UD
120internal_ucs4_loop_single (struct __gconv_step *step,
121 struct __gconv_step_data *step_data,
122 const unsigned char **inptrp,
fd1b5c0f 123 const unsigned char *inend,
4802be92
AS
124 unsigned char **outptrp,
125 const unsigned char *outend,
38677ace 126 size_t *irreversible)
fd1b5c0f 127{
55985355 128 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
129 size_t cnt = state->__count & 7;
130
131 while (*inptrp < inend && cnt < 4)
132 state->__value.__wchb[cnt++] = *(*inptrp)++;
133
a1ffb40e 134 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
135 {
136 /* Still not enough bytes. Store the ones in the input buffer. */
137 state->__count &= ~7;
138 state->__count |= cnt;
139
140 return __GCONV_INCOMPLETE_INPUT;
141 }
142
143#if __BYTE_ORDER == __LITTLE_ENDIAN
144 (*outptrp)[0] = state->__value.__wchb[3];
145 (*outptrp)[1] = state->__value.__wchb[2];
146 (*outptrp)[2] = state->__value.__wchb[1];
147 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 148
fd1b5c0f
UD
149#elif __BYTE_ORDER == __BIG_ENDIAN
150 /* XXX unaligned */
cdda3d7d
AJ
151 (*outptrp)[0] = state->__value.__wchb[0];
152 (*outptrp)[1] = state->__value.__wchb[1];
153 (*outptrp)[2] = state->__value.__wchb[2];
154 (*outptrp)[3] = state->__value.__wchb[3];
fd1b5c0f 155#else
f58a8c1c 156# error "This endianness is not supported."
fd1b5c0f 157#endif
cdda3d7d 158 *outptrp += 4;
fd1b5c0f
UD
159
160 /* Clear the state buffer. */
161 state->__count &= ~7;
162
163 return __GCONV_OK;
164}
165
8619129f 166#include <iconv/skeleton.c>
d2374599 167
d2374599 168
4a069c33
UD
169/* Transform from UCS4 to the internal, UCS4-like format. Unlike
170 for the other direction we have to check for correct values here. */
171#define DEFINE_INIT 0
172#define DEFINE_FINI 0
173#define MIN_NEEDED_FROM 4
174#define MIN_NEEDED_TO 4
175#define FROM_DIRECTION 1
176#define FROM_LOOP ucs4_internal_loop
177#define TO_LOOP ucs4_internal_loop /* This is not used. */
178#define FUNCTION_NAME __gconv_transform_ucs4_internal
0cdddc25 179#define ONE_DIRECTION 0
4a069c33
UD
180
181
182static inline int
dd9423a6 183__attribute ((always_inline))
55985355
UD
184ucs4_internal_loop (struct __gconv_step *step,
185 struct __gconv_step_data *step_data,
186 const unsigned char **inptrp, const unsigned char *inend,
4802be92 187 unsigned char **outptrp, const unsigned char *outend,
38677ace 188 size_t *irreversible)
4a069c33 189{
55985355 190 int flags = step_data->__flags;
4a069c33
UD
191 const unsigned char *inptr = *inptrp;
192 unsigned char *outptr = *outptrp;
4a069c33 193 int result;
4a069c33 194
228edd35 195 for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
4a069c33 196 {
3e20ddad 197 uint32_t inval = get32 (inptr);
4a069c33 198#if __BYTE_ORDER == __LITTLE_ENDIAN
3e20ddad 199 inval = __builtin_bswap32 (inval);
4a069c33
UD
200#endif
201
a1ffb40e 202 if (__glibc_unlikely (inval > 0x7fffffff))
4a069c33 203 {
55985355
UD
204 /* The value is too large. We don't try transliteration here since
205 this is not an error because of the lack of possibilities to
206 represent the result. This is a genuine bug in the input since
207 UCS4 does not allow such values. */
0cdb4983
UD
208 if (irreversible == NULL)
209 /* We are transliterating, don't try to correct anything. */
210 return __GCONV_ILLEGAL_INPUT;
211
85830c4c
UD
212 if (flags & __GCONV_IGNORE_ERRORS)
213 {
214 /* Just ignore this character. */
38677ace 215 ++*irreversible;
85830c4c
UD
216 continue;
217 }
218
4a069c33
UD
219 *inptrp = inptr;
220 *outptrp = outptr;
221 return __GCONV_ILLEGAL_INPUT;
222 }
223
3e20ddad 224 put32 (outptr, inval);
cdda3d7d 225 outptr += sizeof (uint32_t);
4a069c33
UD
226 }
227
228 *inptrp = inptr;
229 *outptrp = outptr;
230
231 /* Determine the status. */
fc08075d 232 if (*inptrp == inend)
4a069c33 233 result = __GCONV_EMPTY_INPUT;
c4f66413 234 else if (*outptrp + 4 > outend)
fc08075d 235 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
236 else
237 result = __GCONV_INCOMPLETE_INPUT;
238
239 return result;
240}
241
4a069c33
UD
242
243static inline int
dd9423a6 244__attribute ((always_inline))
55985355
UD
245ucs4_internal_loop_single (struct __gconv_step *step,
246 struct __gconv_step_data *step_data,
247 const unsigned char **inptrp,
4a069c33 248 const unsigned char *inend,
4802be92
AS
249 unsigned char **outptrp,
250 const unsigned char *outend,
38677ace 251 size_t *irreversible)
4a069c33 252{
55985355
UD
253 mbstate_t *state = step_data->__statep;
254 int flags = step_data->__flags;
4a069c33
UD
255 size_t cnt = state->__count & 7;
256
257 while (*inptrp < inend && cnt < 4)
258 state->__value.__wchb[cnt++] = *(*inptrp)++;
259
a1ffb40e 260 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
261 {
262 /* Still not enough bytes. Store the ones in the input buffer. */
263 state->__count &= ~7;
264 state->__count |= cnt;
265
266 return __GCONV_INCOMPLETE_INPUT;
267 }
268
db2d05f9
UD
269 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
270 0))
85830c4c 271 {
55985355
UD
272 /* The value is too large. We don't try transliteration here since
273 this is not an error because of the lack of possibilities to
274 represent the result. This is a genuine bug in the input since
275 UCS4 does not allow such values. */
85830c4c 276 if (!(flags & __GCONV_IGNORE_ERRORS))
3593973b
UD
277 {
278 *inptrp -= cnt - (state->__count & 7);
279 return __GCONV_ILLEGAL_INPUT;
280 }
85830c4c
UD
281 }
282 else
283 {
4a069c33 284#if __BYTE_ORDER == __LITTLE_ENDIAN
85830c4c
UD
285 (*outptrp)[0] = state->__value.__wchb[3];
286 (*outptrp)[1] = state->__value.__wchb[2];
287 (*outptrp)[2] = state->__value.__wchb[1];
288 (*outptrp)[3] = state->__value.__wchb[0];
4a069c33 289#elif __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
290 (*outptrp)[0] = state->__value.__wchb[0];
291 (*outptrp)[1] = state->__value.__wchb[1];
292 (*outptrp)[2] = state->__value.__wchb[2];
293 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
294#endif
295
85830c4c
UD
296 *outptrp += 4;
297 }
298
4a069c33
UD
299 /* Clear the state buffer. */
300 state->__count &= ~7;
301
302 return __GCONV_OK;
303}
304
305#include <iconv/skeleton.c>
306
307
308/* Similarly for the little endian form. */
8d617a71
UD
309#define DEFINE_INIT 0
310#define DEFINE_FINI 0
311#define MIN_NEEDED_FROM 4
312#define MIN_NEEDED_TO 4
313#define FROM_DIRECTION 1
314#define FROM_LOOP internal_ucs4le_loop
315#define TO_LOOP internal_ucs4le_loop /* This is not used. */
316#define FUNCTION_NAME __gconv_transform_internal_ucs4le
0cdddc25 317#define ONE_DIRECTION 0
8d617a71
UD
318
319
320static inline int
dd9423a6 321__attribute ((always_inline))
55985355
UD
322internal_ucs4le_loop (struct __gconv_step *step,
323 struct __gconv_step_data *step_data,
324 const unsigned char **inptrp, const unsigned char *inend,
4802be92 325 unsigned char **outptrp, const unsigned char *outend,
38677ace 326 size_t *irreversible)
8d617a71
UD
327{
328 const unsigned char *inptr = *inptrp;
329 unsigned char *outptr = *outptrp;
330 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
331 int result;
332
333#if __BYTE_ORDER == __BIG_ENDIAN
334 /* Sigh, we have to do some real work. */
335 size_t cnt;
336
3e20ddad
AZ
337 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
338 {
339 uint32_t val = get32 (inptr);
340 put32 (outptr, __builtin_bswap32 (val));
341 }
8d617a71
UD
342
343 *inptrp = inptr;
344 *outptrp = outptr;
345#elif __BYTE_ORDER == __LITTLE_ENDIAN
346 /* Simply copy the data. */
347 *inptrp = inptr + n_convert * 4;
348 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
349#else
f58a8c1c 350# error "This endianness is not supported."
8d617a71
UD
351#endif
352
353 /* Determine the status. */
fc08075d 354 if (*inptrp == inend)
8d617a71 355 result = __GCONV_EMPTY_INPUT;
c4f66413 356 else if (*outptrp + 4 > outend)
fc08075d 357 result = __GCONV_FULL_OUTPUT;
8d617a71
UD
358 else
359 result = __GCONV_INCOMPLETE_INPUT;
360
8d617a71
UD
361 return result;
362}
363
fd1b5c0f
UD
364
365static inline int
dd9423a6 366__attribute ((always_inline))
55985355
UD
367internal_ucs4le_loop_single (struct __gconv_step *step,
368 struct __gconv_step_data *step_data,
369 const unsigned char **inptrp,
fd1b5c0f 370 const unsigned char *inend,
4802be92
AS
371 unsigned char **outptrp,
372 const unsigned char *outend,
38677ace 373 size_t *irreversible)
fd1b5c0f 374{
55985355 375 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
376 size_t cnt = state->__count & 7;
377
378 while (*inptrp < inend && cnt < 4)
379 state->__value.__wchb[cnt++] = *(*inptrp)++;
380
a1ffb40e 381 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
382 {
383 /* Still not enough bytes. Store the ones in the input buffer. */
384 state->__count &= ~7;
385 state->__count |= cnt;
386
387 return __GCONV_INCOMPLETE_INPUT;
388 }
389
390#if __BYTE_ORDER == __BIG_ENDIAN
391 (*outptrp)[0] = state->__value.__wchb[3];
392 (*outptrp)[1] = state->__value.__wchb[2];
393 (*outptrp)[2] = state->__value.__wchb[1];
394 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 395
fd1b5c0f
UD
396#else
397 /* XXX unaligned */
cdda3d7d
AJ
398 (*outptrp)[0] = state->__value.__wchb[0];
399 (*outptrp)[1] = state->__value.__wchb[1];
400 (*outptrp)[2] = state->__value.__wchb[2];
401 (*outptrp)[3] = state->__value.__wchb[3];
402
fd1b5c0f 403#endif
540e1b45 404
cdda3d7d 405 *outptrp += 4;
fd1b5c0f
UD
406
407 /* Clear the state buffer. */
408 state->__count &= ~7;
409
410 return __GCONV_OK;
411}
412
8d617a71
UD
413#include <iconv/skeleton.c>
414
415
4a069c33
UD
416/* And finally from UCS4-LE to the internal encoding. */
417#define DEFINE_INIT 0
418#define DEFINE_FINI 0
419#define MIN_NEEDED_FROM 4
420#define MIN_NEEDED_TO 4
421#define FROM_DIRECTION 1
422#define FROM_LOOP ucs4le_internal_loop
423#define TO_LOOP ucs4le_internal_loop /* This is not used. */
424#define FUNCTION_NAME __gconv_transform_ucs4le_internal
0cdddc25 425#define ONE_DIRECTION 0
4a069c33
UD
426
427
428static inline int
dd9423a6 429__attribute ((always_inline))
55985355
UD
430ucs4le_internal_loop (struct __gconv_step *step,
431 struct __gconv_step_data *step_data,
432 const unsigned char **inptrp, const unsigned char *inend,
4802be92 433 unsigned char **outptrp, const unsigned char *outend,
38677ace 434 size_t *irreversible)
4a069c33 435{
55985355 436 int flags = step_data->__flags;
4a069c33
UD
437 const unsigned char *inptr = *inptrp;
438 unsigned char *outptr = *outptrp;
4a069c33 439 int result;
4a069c33 440
228edd35 441 for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
4a069c33 442 {
3e20ddad 443 uint32_t inval = get32 (inptr);
4a069c33 444#if __BYTE_ORDER == __BIG_ENDIAN
3e20ddad 445 inval = __builtin_bswap32 (inval);
4a069c33
UD
446#endif
447
a1ffb40e 448 if (__glibc_unlikely (inval > 0x7fffffff))
85830c4c 449 {
55985355
UD
450 /* The value is too large. We don't try transliteration here since
451 this is not an error because of the lack of possibilities to
452 represent the result. This is a genuine bug in the input since
453 UCS4 does not allow such values. */
0cdb4983
UD
454 if (irreversible == NULL)
455 /* We are transliterating, don't try to correct anything. */
456 return __GCONV_ILLEGAL_INPUT;
457
85830c4c
UD
458 if (flags & __GCONV_IGNORE_ERRORS)
459 {
460 /* Just ignore this character. */
38677ace 461 ++*irreversible;
85830c4c
UD
462 continue;
463 }
464
8f25676c
SL
465 *inptrp = inptr;
466 *outptrp = outptr;
85830c4c
UD
467 return __GCONV_ILLEGAL_INPUT;
468 }
4a069c33 469
3e20ddad 470 put32 (outptr, inval);
cdda3d7d 471 outptr += sizeof (uint32_t);
4a069c33
UD
472 }
473
474 *inptrp = inptr;
475 *outptrp = outptr;
476
477 /* Determine the status. */
fc08075d 478 if (*inptrp == inend)
4a069c33 479 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 480 else if (*inptrp + 4 > inend)
4a069c33 481 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
482 else
483 {
484 assert (*outptrp + 4 > outend);
485 result = __GCONV_FULL_OUTPUT;
486 }
4a069c33
UD
487
488 return result;
489}
490
4a069c33
UD
491
492static inline int
dd9423a6 493__attribute ((always_inline))
55985355
UD
494ucs4le_internal_loop_single (struct __gconv_step *step,
495 struct __gconv_step_data *step_data,
496 const unsigned char **inptrp,
4a069c33 497 const unsigned char *inend,
4802be92
AS
498 unsigned char **outptrp,
499 const unsigned char *outend,
38677ace 500 size_t *irreversible)
4a069c33 501{
55985355
UD
502 mbstate_t *state = step_data->__statep;
503 int flags = step_data->__flags;
4a069c33
UD
504 size_t cnt = state->__count & 7;
505
506 while (*inptrp < inend && cnt < 4)
507 state->__value.__wchb[cnt++] = *(*inptrp)++;
508
a1ffb40e 509 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
510 {
511 /* Still not enough bytes. Store the ones in the input buffer. */
512 state->__count &= ~7;
513 state->__count |= cnt;
514
515 return __GCONV_INCOMPLETE_INPUT;
516 }
517
db2d05f9
UD
518 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
519 0))
85830c4c 520 {
55985355
UD
521 /* The value is too large. We don't try transliteration here since
522 this is not an error because of the lack of possibilities to
523 represent the result. This is a genuine bug in the input since
524 UCS4 does not allow such values. */
85830c4c
UD
525 if (!(flags & __GCONV_IGNORE_ERRORS))
526 return __GCONV_ILLEGAL_INPUT;
527 }
528 else
529 {
4a069c33 530#if __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
531 (*outptrp)[0] = state->__value.__wchb[3];
532 (*outptrp)[1] = state->__value.__wchb[2];
533 (*outptrp)[2] = state->__value.__wchb[1];
534 (*outptrp)[3] = state->__value.__wchb[0];
9ea2c194 535#else
85830c4c
UD
536 (*outptrp)[0] = state->__value.__wchb[0];
537 (*outptrp)[1] = state->__value.__wchb[1];
538 (*outptrp)[2] = state->__value.__wchb[2];
539 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
540#endif
541
85830c4c
UD
542 *outptrp += 4;
543 }
544
4a069c33
UD
545 /* Clear the state buffer. */
546 state->__count &= ~7;
547
548 return __GCONV_OK;
549}
550
551#include <iconv/skeleton.c>
552
553
8619129f
UD
554/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
555#define DEFINE_INIT 0
556#define DEFINE_FINI 0
557#define MIN_NEEDED_FROM 1
558#define MIN_NEEDED_TO 4
559#define FROM_DIRECTION 1
560#define FROM_LOOP ascii_internal_loop
561#define TO_LOOP ascii_internal_loop /* This is not used. */
562#define FUNCTION_NAME __gconv_transform_ascii_internal
fd1b5c0f 563#define ONE_DIRECTION 1
8619129f
UD
564
565#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
566#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
567#define LOOPFCT FROM_LOOP
568#define BODY \
569 { \
a1ffb40e 570 if (__glibc_unlikely (*inptr > '\x7f')) \
8619129f 571 { \
55985355
UD
572 /* The value is too large. We don't try transliteration here since \
573 this is not an error because of the lack of possibilities to \
574 represent the result. This is a genuine bug in the input since \
575 ASCII does not allow such values. */ \
e438a468 576 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
85830c4c
UD
577 } \
578 else \
5deca9bb
UD
579 { \
580 /* It's an one byte sequence. */ \
581 *((uint32_t *) outptr) = *inptr++; \
582 outptr += sizeof (uint32_t); \
583 } \
8619129f 584 }
55985355 585#define LOOP_NEED_FLAGS
8619129f
UD
586#include <iconv/loop.c>
587#include <iconv/skeleton.c>
588
589
590/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
591#define DEFINE_INIT 0
592#define DEFINE_FINI 0
593#define MIN_NEEDED_FROM 4
594#define MIN_NEEDED_TO 1
595#define FROM_DIRECTION 1
596#define FROM_LOOP internal_ascii_loop
597#define TO_LOOP internal_ascii_loop /* This is not used. */
598#define FUNCTION_NAME __gconv_transform_internal_ascii
fd1b5c0f 599#define ONE_DIRECTION 1
8619129f
UD
600
601#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
602#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
603#define LOOPFCT FROM_LOOP
604#define BODY \
605 { \
a1ffb40e 606 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
8619129f 607 { \
601d2942 608 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
e438a468 609 STANDARD_TO_LOOP_ERR_HANDLER (4); \
85830c4c
UD
610 } \
611 else \
5deca9bb
UD
612 { \
613 /* It's an one byte sequence. */ \
614 *outptr++ = *((const uint32_t *) inptr); \
615 inptr += sizeof (uint32_t); \
616 } \
8619129f 617 }
55985355 618#define LOOP_NEED_FLAGS
8619129f
UD
619#include <iconv/loop.c>
620#include <iconv/skeleton.c>
621
622
623/* Convert from the internal (UCS4-like) format to UTF-8. */
624#define DEFINE_INIT 0
625#define DEFINE_FINI 0
626#define MIN_NEEDED_FROM 4
627#define MIN_NEEDED_TO 1
628#define MAX_NEEDED_TO 6
629#define FROM_DIRECTION 1
630#define FROM_LOOP internal_utf8_loop
631#define TO_LOOP internal_utf8_loop /* This is not used. */
632#define FUNCTION_NAME __gconv_transform_internal_utf8
fd1b5c0f 633#define ONE_DIRECTION 1
8619129f
UD
634
635#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
636#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
5aa8ff62 637#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
8619129f
UD
638#define LOOPFCT FROM_LOOP
639#define BODY \
640 { \
17427edd 641 uint32_t wc = *((const uint32_t *) inptr); \
8619129f 642 \
a1ffb40e 643 if (__glibc_likely (wc < 0x80)) \
8619129f
UD
644 /* It's an one byte sequence. */ \
645 *outptr++ = (unsigned char) wc; \
7ab1de21
SL
646 else if (__glibc_likely (wc <= 0x7fffffff \
647 && (wc < 0xd800 || wc > 0xdfff))) \
8619129f
UD
648 { \
649 size_t step; \
3cc4a097 650 unsigned char *start; \
8619129f
UD
651 \
652 for (step = 2; step < 6; ++step) \
b79f74cd 653 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
8619129f
UD
654 break; \
655 \
a1ffb40e 656 if (__glibc_unlikely (outptr + step > outend)) \
8619129f
UD
657 { \
658 /* Too long. */ \
d64b6ad0 659 result = __GCONV_FULL_OUTPUT; \
8619129f
UD
660 break; \
661 } \
662 \
663 start = outptr; \
b79f74cd 664 *outptr = (unsigned char) (~0xff >> step); \
8619129f 665 outptr += step; \
8619129f
UD
666 do \
667 { \
347bace2 668 start[--step] = 0x80 | (wc & 0x3f); \
8619129f
UD
669 wc >>= 6; \
670 } \
347bace2 671 while (step > 1); \
8619129f 672 start[0] |= wc; \
db2d05f9
UD
673 } \
674 else \
675 { \
e438a468 676 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f
UD
677 } \
678 \
679 inptr += 4; \
680 }
db2d05f9 681#define LOOP_NEED_FLAGS
8619129f
UD
682#include <iconv/loop.c>
683#include <iconv/skeleton.c>
684
685
686/* Convert from UTF-8 to the internal (UCS4-like) format. */
687#define DEFINE_INIT 0
688#define DEFINE_FINI 0
689#define MIN_NEEDED_FROM 1
690#define MAX_NEEDED_FROM 6
691#define MIN_NEEDED_TO 4
692#define FROM_DIRECTION 1
693#define FROM_LOOP utf8_internal_loop
694#define TO_LOOP utf8_internal_loop /* This is not used. */
695#define FUNCTION_NAME __gconv_transform_utf8_internal
fd1b5c0f 696#define ONE_DIRECTION 1
8619129f
UD
697
698#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
5aa8ff62 699#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
8619129f
UD
700#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
701#define LOOPFCT FROM_LOOP
702#define BODY \
703 { \
8619129f 704 /* Next input byte. */ \
26a51060 705 uint32_t ch = *inptr; \
8619129f 706 \
a1ffb40e 707 if (__glibc_likely (ch < 0x80)) \
8619129f 708 { \
5aa8ff62 709 /* One byte sequence. */ \
5aa8ff62 710 ++inptr; \
8619129f
UD
711 } \
712 else \
713 { \
535e935a
NG
714 unsigned int cnt; \
715 unsigned int i; \
26a51060 716 \
9ea2c194 717 if (ch >= 0xc2 && ch < 0xe0) \
5aa8ff62 718 { \
9ea2c194
AJ
719 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
720 otherwise the wide character could have been represented \
721 using a single byte. */ \
5aa8ff62
UD
722 cnt = 2; \
723 ch &= 0x1f; \
724 } \
a1ffb40e 725 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
5aa8ff62
UD
726 { \
727 /* We expect three bytes. */ \
728 cnt = 3; \
729 ch &= 0x0f; \
730 } \
a1ffb40e 731 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
5aa8ff62
UD
732 { \
733 /* We expect four bytes. */ \
734 cnt = 4; \
735 ch &= 0x07; \
736 } \
a1ffb40e 737 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
5aa8ff62
UD
738 { \
739 /* We expect five bytes. */ \
740 cnt = 5; \
741 ch &= 0x03; \
742 } \
a1ffb40e 743 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
5aa8ff62
UD
744 { \
745 /* We expect six bytes. */ \
746 cnt = 6; \
747 ch &= 0x01; \
748 } \
749 else \
8619129f 750 { \
85830c4c
UD
751 /* Search the end of this ill-formed UTF-8 character. This \
752 is the next byte with (x & 0xc0) != 0x80. */ \
347bace2 753 i = 0; \
e438a468 754 do \
347bace2
UD
755 ++i; \
756 while (inptr + i < inend \
757 && (*(inptr + i) & 0xc0) == 0x80 \
758 && i < 5); \
85830c4c 759 \
347bace2
UD
760 errout: \
761 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
8619129f
UD
762 } \
763 \
a1ffb40e 764 if (__glibc_unlikely (inptr + cnt > inend)) \
5aa8ff62 765 { \
fd1b5c0f
UD
766 /* We don't have enough input. But before we report that check \
767 that all the bytes are correct. */ \
768 for (i = 1; inptr + i < inend; ++i) \
769 if ((inptr[i] & 0xc0) != 0x80) \
770 break; \
85830c4c 771 \
a1ffb40e 772 if (__glibc_likely (inptr + i == inend)) \
85830c4c
UD
773 { \
774 result = __GCONV_INCOMPLETE_INPUT; \
775 break; \
776 } \
777 \
347bace2 778 goto errout; \
5aa8ff62
UD
779 } \
780 \
781 /* Read the possible remaining bytes. */ \
782 for (i = 1; i < cnt; ++i) \
783 { \
784 uint32_t byte = inptr[i]; \
785 \
786 if ((byte & 0xc0) != 0x80) \
bd32e4a6
UD
787 /* This is an illegal encoding. */ \
788 break; \
5aa8ff62
UD
789 \
790 ch <<= 6; \
791 ch |= byte & 0x3f; \
792 } \
85830c4c 793 \
bd32e4a6
UD
794 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
795 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
796 have been represented with fewer than cnt bytes. */ \
9c32c895
UD
797 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
798 /* Do not accept UTF-16 surrogates. */ \
799 || (ch >= 0xd800 && ch <= 0xdfff)) \
bd32e4a6
UD
800 { \
801 /* This is an illegal encoding. */ \
347bace2 802 goto errout; \
bd32e4a6
UD
803 } \
804 \
5aa8ff62 805 inptr += cnt; \
8619129f
UD
806 } \
807 \
808 /* Now adjust the pointers and store the result. */ \
cdda3d7d
AJ
809 *((uint32_t *) outptr) = ch; \
810 outptr += sizeof (uint32_t); \
8619129f 811 }
55985355 812#define LOOP_NEED_FLAGS
fd1b5c0f
UD
813
814#define STORE_REST \
815 { \
816 /* We store the remaining bytes while converting them into the UCS4 \
817 format. We can assume that the first byte in the buffer is \
818 correct and that it requires a larger number of bytes than there \
819 are in the input buffer. */ \
820 wint_t ch = **inptrp; \
ea31b613 821 size_t cnt, r; \
fd1b5c0f
UD
822 \
823 state->__count = inend - *inptrp; \
824 \
9954432e 825 assert (ch != 0xc0 && ch != 0xc1); \
fd1b5c0f
UD
826 if (ch >= 0xc2 && ch < 0xe0) \
827 { \
828 /* We expect two bytes. The first byte cannot be 0xc0 or \
829 0xc1, otherwise the wide character could have been \
830 represented using a single byte. */ \
831 cnt = 2; \
832 ch &= 0x1f; \
833 } \
a1ffb40e 834 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
fd1b5c0f
UD
835 { \
836 /* We expect three bytes. */ \
837 cnt = 3; \
838 ch &= 0x0f; \
839 } \
a1ffb40e 840 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
fd1b5c0f
UD
841 { \
842 /* We expect four bytes. */ \
843 cnt = 4; \
844 ch &= 0x07; \
845 } \
a1ffb40e 846 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
fd1b5c0f
UD
847 { \
848 /* We expect five bytes. */ \
849 cnt = 5; \
850 ch &= 0x03; \
851 } \
852 else \
853 { \
854 /* We expect six bytes. */ \
855 cnt = 6; \
856 ch &= 0x01; \
857 } \
858 \
859 /* The first byte is already consumed. */ \
ea31b613 860 r = cnt - 1; \
fd1b5c0f
UD
861 while (++(*inptrp) < inend) \
862 { \
863 ch <<= 6; \
864 ch |= **inptrp & 0x3f; \
ea31b613 865 --r; \
fd1b5c0f
UD
866 } \
867 \
868 /* Shift for the so far missing bytes. */ \
ea31b613
UD
869 ch <<= r * 6; \
870 \
871 /* Store the number of bytes expected for the entire sequence. */ \
41f112ad 872 state->__count |= cnt << 8; \
fd1b5c0f
UD
873 \
874 /* Store the value. */ \
875 state->__value.__wch = ch; \
876 }
877
878#define UNPACK_BYTES \
879 { \
ea31b613 880 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
fd1b5c0f 881 wint_t wch = state->__value.__wch; \
41f112ad 882 size_t ntotal = state->__count >> 8; \
ea31b613 883 \
41f112ad 884 inlen = state->__count & 255; \
fd1b5c0f 885 \
ea31b613 886 bytebuf[0] = inmask[ntotal - 2]; \
fd1b5c0f 887 \
cd201e38
UD
888 do \
889 { \
890 if (--ntotal < inlen) \
891 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
892 wch >>= 6; \
893 } \
894 while (ntotal > 1); \
fd1b5c0f
UD
895 \
896 bytebuf[0] |= wch; \
897 }
898
41f112ad
UD
899#define CLEAR_STATE \
900 state->__count = 0
901
902
8619129f
UD
903#include <iconv/loop.c>
904#include <iconv/skeleton.c>
905
906
907/* Convert from UCS2 to the internal (UCS4-like) format. */
908#define DEFINE_INIT 0
909#define DEFINE_FINI 0
910#define MIN_NEEDED_FROM 2
911#define MIN_NEEDED_TO 4
912#define FROM_DIRECTION 1
913#define FROM_LOOP ucs2_internal_loop
914#define TO_LOOP ucs2_internal_loop /* This is not used. */
915#define FUNCTION_NAME __gconv_transform_ucs2_internal
fd1b5c0f 916#define ONE_DIRECTION 1
8619129f
UD
917
918#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
919#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
920#define LOOPFCT FROM_LOOP
428bcea4 921#define BODY \
755104ed 922 { \
606135cf 923 uint16_t u1 = get16 (inptr); \
755104ed 924 \
a1ffb40e 925 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
926 { \
927 /* Surrogate characters in UCS-2 input are not valid. Reject \
928 them. (Catching this here is not security relevant.) */ \
e438a468 929 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
755104ed
UD
930 } \
931 \
cdda3d7d
AJ
932 *((uint32_t *) outptr) = u1; \
933 outptr += sizeof (uint32_t); \
755104ed
UD
934 inptr += 2; \
935 }
936#define LOOP_NEED_FLAGS
8619129f
UD
937#include <iconv/loop.c>
938#include <iconv/skeleton.c>
939
940
941/* Convert from the internal (UCS4-like) format to UCS2. */
942#define DEFINE_INIT 0
943#define DEFINE_FINI 0
944#define MIN_NEEDED_FROM 4
945#define MIN_NEEDED_TO 2
946#define FROM_DIRECTION 1
947#define FROM_LOOP internal_ucs2_loop
948#define TO_LOOP internal_ucs2_loop /* This is not used. */
949#define FUNCTION_NAME __gconv_transform_internal_ucs2
fd1b5c0f 950#define ONE_DIRECTION 1
8619129f
UD
951
952#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
953#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
954#define LOOPFCT FROM_LOOP
428bcea4 955#define BODY \
8619129f 956 { \
17427edd 957 uint32_t val = *((const uint32_t *) inptr); \
755104ed 958 \
a1ffb40e 959 if (__glibc_unlikely (val >= 0x10000)) \
8619129f 960 { \
601d2942 961 UNICODE_TAG_HANDLER (val, 4); \
e438a468 962 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f 963 } \
a1ffb40e 964 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
965 { \
966 /* Surrogate characters in UCS-4 input are not valid. \
967 We must catch this, because the UCS-2 output might be \
968 interpreted as UTF-16 by other programs. If we let \
969 surrogates pass through, attackers could make a security \
970 hole exploit by synthesizing any desired plane 1-16 \
971 character. */ \
e438a468 972 result = __GCONV_ILLEGAL_INPUT; \
755104ed 973 if (! ignore_errors_p ()) \
e438a468 974 break; \
755104ed
UD
975 inptr += 4; \
976 ++*irreversible; \
977 continue; \
978 } \
9ea2c194 979 else \
755104ed 980 { \
606135cf 981 put16 (outptr, val); \
db6af3eb 982 outptr += sizeof (uint16_t); \
755104ed
UD
983 inptr += 4; \
984 } \
8619129f 985 }
55985355 986#define LOOP_NEED_FLAGS
8619129f
UD
987#include <iconv/loop.c>
988#include <iconv/skeleton.c>
9b26f5c4
UD
989
990
428bcea4 991/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
9b26f5c4
UD
992#define DEFINE_INIT 0
993#define DEFINE_FINI 0
994#define MIN_NEEDED_FROM 2
995#define MIN_NEEDED_TO 4
996#define FROM_DIRECTION 1
428bcea4
UD
997#define FROM_LOOP ucs2reverse_internal_loop
998#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
8d617a71 999#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
fd1b5c0f 1000#define ONE_DIRECTION 1
9b26f5c4
UD
1001
1002#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1003#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1004#define LOOPFCT FROM_LOOP
428bcea4 1005#define BODY \
755104ed 1006 { \
606135cf 1007 uint16_t u1 = bswap_16 (get16 (inptr)); \
755104ed 1008 \
a1ffb40e 1009 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
1010 { \
1011 /* Surrogate characters in UCS-2 input are not valid. Reject \
1012 them. (Catching this here is not security relevant.) */ \
1013 if (! ignore_errors_p ()) \
1014 { \
1015 result = __GCONV_ILLEGAL_INPUT; \
1016 break; \
1017 } \
1018 inptr += 2; \
1019 ++*irreversible; \
1020 continue; \
1021 } \
1022 \
cdda3d7d
AJ
1023 *((uint32_t *) outptr) = u1; \
1024 outptr += sizeof (uint32_t); \
755104ed
UD
1025 inptr += 2; \
1026 }
1027#define LOOP_NEED_FLAGS
9b26f5c4
UD
1028#include <iconv/loop.c>
1029#include <iconv/skeleton.c>
1030
1031
428bcea4 1032/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
9b26f5c4
UD
1033#define DEFINE_INIT 0
1034#define DEFINE_FINI 0
1035#define MIN_NEEDED_FROM 4
1036#define MIN_NEEDED_TO 2
1037#define FROM_DIRECTION 1
428bcea4
UD
1038#define FROM_LOOP internal_ucs2reverse_loop
1039#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
8d617a71 1040#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
fd1b5c0f 1041#define ONE_DIRECTION 1
9b26f5c4
UD
1042
1043#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1044#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1045#define LOOPFCT FROM_LOOP
428bcea4 1046#define BODY \
9b26f5c4 1047 { \
17427edd 1048 uint32_t val = *((const uint32_t *) inptr); \
a1ffb40e 1049 if (__glibc_unlikely (val >= 0x10000)) \
9b26f5c4 1050 { \
601d2942 1051 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1052 STANDARD_TO_LOOP_ERR_HANDLER (4); \
9b26f5c4 1053 } \
a1ffb40e 1054 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
1055 { \
1056 /* Surrogate characters in UCS-4 input are not valid. \
1057 We must catch this, because the UCS-2 output might be \
1058 interpreted as UTF-16 by other programs. If we let \
1059 surrogates pass through, attackers could make a security \
1060 hole exploit by synthesizing any desired plane 1-16 \
1061 character. */ \
1062 if (! ignore_errors_p ()) \
1063 { \
1064 result = __GCONV_ILLEGAL_INPUT; \
1065 break; \
1066 } \
1067 inptr += 4; \
1068 ++*irreversible; \
1069 continue; \
1070 } \
9ea2c194 1071 else \
755104ed 1072 { \
606135cf 1073 put16 (outptr, bswap_16 (val)); \
cdda3d7d 1074 outptr += sizeof (uint16_t); \
755104ed
UD
1075 inptr += 4; \
1076 } \
9b26f5c4 1077 }
55985355 1078#define LOOP_NEED_FLAGS
9b26f5c4
UD
1079#include <iconv/loop.c>
1080#include <iconv/skeleton.c>
This page took 0.699195 seconds and 6 git commands to generate.