]>
Commit | Line | Data |
---|---|---|
6973fc01 | 1 | /* Simple transformations functions. |
dff8da6b | 2 | Copyright (C) 1997-2024 Free Software Foundation, Inc. |
6973fc01 | 3 | This file is part of the GNU C Library. |
6973fc01 UD |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
6973fc01 UD |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 13 | Lesser General Public License for more details. |
6973fc01 | 14 | |
41bdb6e2 | 15 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
6973fc01 | 18 | |
f1fa8b68 | 19 | #include <byteswap.h> |
55985355 | 20 | #include <dlfcn.h> |
f1fa8b68 | 21 | #include <endian.h> |
f4017d20 | 22 | #include <errno.h> |
6973fc01 | 23 | #include <gconv.h> |
d2374599 | 24 | #include <stdint.h> |
6973fc01 UD |
25 | #include <stdlib.h> |
26 | #include <string.h> | |
27 | #include <wchar.h> | |
28 | #include <sys/param.h> | |
f9ad060c | 29 | #include <gconv_int.h> |
6973fc01 | 30 | |
17427edd | 31 | #define BUILTIN_ALIAS(s1, s2) /* nothing */ |
f9ad060c UD |
32 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ |
33 | MinF, MaxF, MinT, MaxT) \ | |
17427edd | 34 | extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ |
a784e502 | 35 | const unsigned char **, const unsigned char *, \ |
17427edd UD |
36 | unsigned char **, size_t *, int, int); |
37 | #include "gconv_builtin.h" | |
38 | ||
39 | ||
a904b5d9 UD |
40 | #ifndef EILSEQ |
41 | # define EILSEQ EINVAL | |
42 | #endif | |
43 | ||
44 | ||
f9ad060c UD |
45 | /* Specialized conversion function for a single byte to INTERNAL, recognizing |
46 | only ASCII characters. */ | |
47 | wint_t | |
48 | __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c) | |
49 | { | |
50 | if (c < 0x80) | |
51 | return c; | |
52 | else | |
53 | return WEOF; | |
54 | } | |
55 | ||
56 | ||
f1fa8b68 UD |
57 | /* Transform from the internal, UCS4-like format, to UCS4. The |
58 | difference between the internal ucs4 format and the real UCS4 | |
f58a8c1c | 59 | format is, if any, the endianness. The Unicode/ISO 10646 says that |
f1fa8b68 UD |
60 | unless some higher protocol specifies it differently, the byte |
61 | order is big endian.*/ | |
8619129f UD |
62 | #define DEFINE_INIT 0 |
63 | #define DEFINE_FINI 0 | |
64 | #define MIN_NEEDED_FROM 4 | |
65 | #define MIN_NEEDED_TO 4 | |
66 | #define FROM_DIRECTION 1 | |
67 | #define FROM_LOOP internal_ucs4_loop | |
68 | #define TO_LOOP internal_ucs4_loop /* This is not used. */ | |
69 | #define FUNCTION_NAME __gconv_transform_internal_ucs4 | |
0cdddc25 | 70 | #define ONE_DIRECTION 0 |
8619129f UD |
71 | |
72 | ||
73 | static inline int | |
dd9423a6 | 74 | __attribute ((always_inline)) |
55985355 UD |
75 | internal_ucs4_loop (struct __gconv_step *step, |
76 | struct __gconv_step_data *step_data, | |
77 | const unsigned char **inptrp, const unsigned char *inend, | |
4802be92 | 78 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 79 | size_t *irreversible) |
4bca4c17 | 80 | { |
8619129f UD |
81 | const unsigned char *inptr = *inptrp; |
82 | unsigned char *outptr = *outptrp; | |
83 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
4bca4c17 UD |
84 | int result; |
85 | ||
f1fa8b68 | 86 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
8619129f UD |
87 | /* Sigh, we have to do some real work. */ |
88 | size_t cnt; | |
c1db8b0d UD |
89 | |
90 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) | |
91 | { | |
3e20ddad AZ |
92 | uint32_t val = get32 (inptr); |
93 | put32 (outptr, __builtin_bswap32 (val)); | |
c1db8b0d UD |
94 | } |
95 | ||
96 | *inptrp = inptr; | |
97 | *outptrp = outptr; | |
3e20ddad | 98 | #elif __BYTE_ORDER == __BIG_ENDIAN |
c1db8b0d UD |
99 | /* Simply copy the data. */ |
100 | *inptrp = inptr + n_convert * 4; | |
101 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
3e20ddad | 102 | #else |
f58a8c1c | 103 | # error "This endianness is not supported." |
3e20ddad | 104 | #endif |
c1db8b0d UD |
105 | |
106 | /* Determine the status. */ | |
eacde9d0 | 107 | if (*inptrp == inend) |
c1db8b0d | 108 | result = __GCONV_EMPTY_INPUT; |
eacde9d0 UD |
109 | else if (*outptrp + 4 > outend) |
110 | result = __GCONV_FULL_OUTPUT; | |
c1db8b0d UD |
111 | else |
112 | result = __GCONV_INCOMPLETE_INPUT; | |
113 | ||
114 | return result; | |
115 | } | |
c1db8b0d | 116 | |
fd1b5c0f UD |
117 | |
118 | static inline int | |
dd9423a6 | 119 | __attribute ((always_inline)) |
55985355 UD |
120 | internal_ucs4_loop_single (struct __gconv_step *step, |
121 | struct __gconv_step_data *step_data, | |
122 | const unsigned char **inptrp, | |
fd1b5c0f | 123 | const unsigned char *inend, |
4802be92 AS |
124 | unsigned char **outptrp, |
125 | const unsigned char *outend, | |
38677ace | 126 | size_t *irreversible) |
fd1b5c0f | 127 | { |
55985355 | 128 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
129 | size_t cnt = state->__count & 7; |
130 | ||
131 | while (*inptrp < inend && cnt < 4) | |
132 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
133 | ||
a1ffb40e | 134 | if (__glibc_unlikely (cnt < 4)) |
fd1b5c0f UD |
135 | { |
136 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
137 | state->__count &= ~7; | |
138 | state->__count |= cnt; | |
139 | ||
140 | return __GCONV_INCOMPLETE_INPUT; | |
141 | } | |
142 | ||
143 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
144 | (*outptrp)[0] = state->__value.__wchb[3]; | |
145 | (*outptrp)[1] = state->__value.__wchb[2]; | |
146 | (*outptrp)[2] = state->__value.__wchb[1]; | |
147 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c | 148 | |
fd1b5c0f UD |
149 | #elif __BYTE_ORDER == __BIG_ENDIAN |
150 | /* XXX unaligned */ | |
cdda3d7d AJ |
151 | (*outptrp)[0] = state->__value.__wchb[0]; |
152 | (*outptrp)[1] = state->__value.__wchb[1]; | |
153 | (*outptrp)[2] = state->__value.__wchb[2]; | |
154 | (*outptrp)[3] = state->__value.__wchb[3]; | |
fd1b5c0f | 155 | #else |
f58a8c1c | 156 | # error "This endianness is not supported." |
fd1b5c0f | 157 | #endif |
cdda3d7d | 158 | *outptrp += 4; |
fd1b5c0f UD |
159 | |
160 | /* Clear the state buffer. */ | |
161 | state->__count &= ~7; | |
162 | ||
163 | return __GCONV_OK; | |
164 | } | |
165 | ||
8619129f | 166 | #include <iconv/skeleton.c> |
d2374599 | 167 | |
d2374599 | 168 | |
4a069c33 UD |
169 | /* Transform from UCS4 to the internal, UCS4-like format. Unlike |
170 | for the other direction we have to check for correct values here. */ | |
171 | #define DEFINE_INIT 0 | |
172 | #define DEFINE_FINI 0 | |
173 | #define MIN_NEEDED_FROM 4 | |
174 | #define MIN_NEEDED_TO 4 | |
175 | #define FROM_DIRECTION 1 | |
176 | #define FROM_LOOP ucs4_internal_loop | |
177 | #define TO_LOOP ucs4_internal_loop /* This is not used. */ | |
178 | #define FUNCTION_NAME __gconv_transform_ucs4_internal | |
0cdddc25 | 179 | #define ONE_DIRECTION 0 |
4a069c33 UD |
180 | |
181 | ||
182 | static inline int | |
dd9423a6 | 183 | __attribute ((always_inline)) |
55985355 UD |
184 | ucs4_internal_loop (struct __gconv_step *step, |
185 | struct __gconv_step_data *step_data, | |
186 | const unsigned char **inptrp, const unsigned char *inend, | |
4802be92 | 187 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 188 | size_t *irreversible) |
4a069c33 | 189 | { |
55985355 | 190 | int flags = step_data->__flags; |
4a069c33 UD |
191 | const unsigned char *inptr = *inptrp; |
192 | unsigned char *outptr = *outptrp; | |
4a069c33 | 193 | int result; |
4a069c33 | 194 | |
228edd35 | 195 | for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) |
4a069c33 | 196 | { |
3e20ddad | 197 | uint32_t inval = get32 (inptr); |
4a069c33 | 198 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
3e20ddad | 199 | inval = __builtin_bswap32 (inval); |
4a069c33 UD |
200 | #endif |
201 | ||
a1ffb40e | 202 | if (__glibc_unlikely (inval > 0x7fffffff)) |
4a069c33 | 203 | { |
55985355 UD |
204 | /* The value is too large. We don't try transliteration here since |
205 | this is not an error because of the lack of possibilities to | |
206 | represent the result. This is a genuine bug in the input since | |
207 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
208 | if (irreversible == NULL) |
209 | /* We are transliterating, don't try to correct anything. */ | |
210 | return __GCONV_ILLEGAL_INPUT; | |
211 | ||
85830c4c UD |
212 | if (flags & __GCONV_IGNORE_ERRORS) |
213 | { | |
214 | /* Just ignore this character. */ | |
38677ace | 215 | ++*irreversible; |
85830c4c UD |
216 | continue; |
217 | } | |
218 | ||
4a069c33 UD |
219 | *inptrp = inptr; |
220 | *outptrp = outptr; | |
221 | return __GCONV_ILLEGAL_INPUT; | |
222 | } | |
223 | ||
3e20ddad | 224 | put32 (outptr, inval); |
cdda3d7d | 225 | outptr += sizeof (uint32_t); |
4a069c33 UD |
226 | } |
227 | ||
228 | *inptrp = inptr; | |
229 | *outptrp = outptr; | |
230 | ||
231 | /* Determine the status. */ | |
fc08075d | 232 | if (*inptrp == inend) |
4a069c33 | 233 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 234 | else if (*outptrp + 4 > outend) |
fc08075d | 235 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
236 | else |
237 | result = __GCONV_INCOMPLETE_INPUT; | |
238 | ||
239 | return result; | |
240 | } | |
241 | ||
4a069c33 UD |
242 | |
243 | static inline int | |
dd9423a6 | 244 | __attribute ((always_inline)) |
55985355 UD |
245 | ucs4_internal_loop_single (struct __gconv_step *step, |
246 | struct __gconv_step_data *step_data, | |
247 | const unsigned char **inptrp, | |
4a069c33 | 248 | const unsigned char *inend, |
4802be92 AS |
249 | unsigned char **outptrp, |
250 | const unsigned char *outend, | |
38677ace | 251 | size_t *irreversible) |
4a069c33 | 252 | { |
55985355 UD |
253 | mbstate_t *state = step_data->__statep; |
254 | int flags = step_data->__flags; | |
4a069c33 UD |
255 | size_t cnt = state->__count & 7; |
256 | ||
257 | while (*inptrp < inend && cnt < 4) | |
258 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
259 | ||
a1ffb40e | 260 | if (__glibc_unlikely (cnt < 4)) |
4a069c33 UD |
261 | { |
262 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
263 | state->__count &= ~7; | |
264 | state->__count |= cnt; | |
265 | ||
266 | return __GCONV_INCOMPLETE_INPUT; | |
267 | } | |
268 | ||
db2d05f9 UD |
269 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80, |
270 | 0)) | |
85830c4c | 271 | { |
55985355 UD |
272 | /* The value is too large. We don't try transliteration here since |
273 | this is not an error because of the lack of possibilities to | |
274 | represent the result. This is a genuine bug in the input since | |
275 | UCS4 does not allow such values. */ | |
85830c4c | 276 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
3593973b UD |
277 | { |
278 | *inptrp -= cnt - (state->__count & 7); | |
279 | return __GCONV_ILLEGAL_INPUT; | |
280 | } | |
85830c4c UD |
281 | } |
282 | else | |
283 | { | |
4a069c33 | 284 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
85830c4c UD |
285 | (*outptrp)[0] = state->__value.__wchb[3]; |
286 | (*outptrp)[1] = state->__value.__wchb[2]; | |
287 | (*outptrp)[2] = state->__value.__wchb[1]; | |
288 | (*outptrp)[3] = state->__value.__wchb[0]; | |
4a069c33 | 289 | #elif __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
290 | (*outptrp)[0] = state->__value.__wchb[0]; |
291 | (*outptrp)[1] = state->__value.__wchb[1]; | |
292 | (*outptrp)[2] = state->__value.__wchb[2]; | |
293 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
294 | #endif |
295 | ||
85830c4c UD |
296 | *outptrp += 4; |
297 | } | |
298 | ||
4a069c33 UD |
299 | /* Clear the state buffer. */ |
300 | state->__count &= ~7; | |
301 | ||
302 | return __GCONV_OK; | |
303 | } | |
304 | ||
305 | #include <iconv/skeleton.c> | |
306 | ||
307 | ||
308 | /* Similarly for the little endian form. */ | |
8d617a71 UD |
309 | #define DEFINE_INIT 0 |
310 | #define DEFINE_FINI 0 | |
311 | #define MIN_NEEDED_FROM 4 | |
312 | #define MIN_NEEDED_TO 4 | |
313 | #define FROM_DIRECTION 1 | |
314 | #define FROM_LOOP internal_ucs4le_loop | |
315 | #define TO_LOOP internal_ucs4le_loop /* This is not used. */ | |
316 | #define FUNCTION_NAME __gconv_transform_internal_ucs4le | |
0cdddc25 | 317 | #define ONE_DIRECTION 0 |
8d617a71 UD |
318 | |
319 | ||
320 | static inline int | |
dd9423a6 | 321 | __attribute ((always_inline)) |
55985355 UD |
322 | internal_ucs4le_loop (struct __gconv_step *step, |
323 | struct __gconv_step_data *step_data, | |
324 | const unsigned char **inptrp, const unsigned char *inend, | |
4802be92 | 325 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 326 | size_t *irreversible) |
8d617a71 UD |
327 | { |
328 | const unsigned char *inptr = *inptrp; | |
329 | unsigned char *outptr = *outptrp; | |
330 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
331 | int result; | |
332 | ||
333 | #if __BYTE_ORDER == __BIG_ENDIAN | |
334 | /* Sigh, we have to do some real work. */ | |
335 | size_t cnt; | |
336 | ||
3e20ddad AZ |
337 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) |
338 | { | |
339 | uint32_t val = get32 (inptr); | |
340 | put32 (outptr, __builtin_bswap32 (val)); | |
341 | } | |
8d617a71 UD |
342 | |
343 | *inptrp = inptr; | |
344 | *outptrp = outptr; | |
345 | #elif __BYTE_ORDER == __LITTLE_ENDIAN | |
346 | /* Simply copy the data. */ | |
347 | *inptrp = inptr + n_convert * 4; | |
348 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
349 | #else | |
f58a8c1c | 350 | # error "This endianness is not supported." |
8d617a71 UD |
351 | #endif |
352 | ||
353 | /* Determine the status. */ | |
fc08075d | 354 | if (*inptrp == inend) |
8d617a71 | 355 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 356 | else if (*outptrp + 4 > outend) |
fc08075d | 357 | result = __GCONV_FULL_OUTPUT; |
8d617a71 UD |
358 | else |
359 | result = __GCONV_INCOMPLETE_INPUT; | |
360 | ||
8d617a71 UD |
361 | return result; |
362 | } | |
363 | ||
fd1b5c0f UD |
364 | |
365 | static inline int | |
dd9423a6 | 366 | __attribute ((always_inline)) |
55985355 UD |
367 | internal_ucs4le_loop_single (struct __gconv_step *step, |
368 | struct __gconv_step_data *step_data, | |
369 | const unsigned char **inptrp, | |
fd1b5c0f | 370 | const unsigned char *inend, |
4802be92 AS |
371 | unsigned char **outptrp, |
372 | const unsigned char *outend, | |
38677ace | 373 | size_t *irreversible) |
fd1b5c0f | 374 | { |
55985355 | 375 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
376 | size_t cnt = state->__count & 7; |
377 | ||
378 | while (*inptrp < inend && cnt < 4) | |
379 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
380 | ||
a1ffb40e | 381 | if (__glibc_unlikely (cnt < 4)) |
fd1b5c0f UD |
382 | { |
383 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
384 | state->__count &= ~7; | |
385 | state->__count |= cnt; | |
386 | ||
387 | return __GCONV_INCOMPLETE_INPUT; | |
388 | } | |
389 | ||
390 | #if __BYTE_ORDER == __BIG_ENDIAN | |
391 | (*outptrp)[0] = state->__value.__wchb[3]; | |
392 | (*outptrp)[1] = state->__value.__wchb[2]; | |
393 | (*outptrp)[2] = state->__value.__wchb[1]; | |
394 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c | 395 | |
fd1b5c0f UD |
396 | #else |
397 | /* XXX unaligned */ | |
cdda3d7d AJ |
398 | (*outptrp)[0] = state->__value.__wchb[0]; |
399 | (*outptrp)[1] = state->__value.__wchb[1]; | |
400 | (*outptrp)[2] = state->__value.__wchb[2]; | |
401 | (*outptrp)[3] = state->__value.__wchb[3]; | |
402 | ||
fd1b5c0f | 403 | #endif |
540e1b45 | 404 | |
cdda3d7d | 405 | *outptrp += 4; |
fd1b5c0f UD |
406 | |
407 | /* Clear the state buffer. */ | |
408 | state->__count &= ~7; | |
409 | ||
410 | return __GCONV_OK; | |
411 | } | |
412 | ||
8d617a71 UD |
413 | #include <iconv/skeleton.c> |
414 | ||
415 | ||
4a069c33 UD |
416 | /* And finally from UCS4-LE to the internal encoding. */ |
417 | #define DEFINE_INIT 0 | |
418 | #define DEFINE_FINI 0 | |
419 | #define MIN_NEEDED_FROM 4 | |
420 | #define MIN_NEEDED_TO 4 | |
421 | #define FROM_DIRECTION 1 | |
422 | #define FROM_LOOP ucs4le_internal_loop | |
423 | #define TO_LOOP ucs4le_internal_loop /* This is not used. */ | |
424 | #define FUNCTION_NAME __gconv_transform_ucs4le_internal | |
0cdddc25 | 425 | #define ONE_DIRECTION 0 |
4a069c33 UD |
426 | |
427 | ||
428 | static inline int | |
dd9423a6 | 429 | __attribute ((always_inline)) |
55985355 UD |
430 | ucs4le_internal_loop (struct __gconv_step *step, |
431 | struct __gconv_step_data *step_data, | |
432 | const unsigned char **inptrp, const unsigned char *inend, | |
4802be92 | 433 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 434 | size_t *irreversible) |
4a069c33 | 435 | { |
55985355 | 436 | int flags = step_data->__flags; |
4a069c33 UD |
437 | const unsigned char *inptr = *inptrp; |
438 | unsigned char *outptr = *outptrp; | |
4a069c33 | 439 | int result; |
4a069c33 | 440 | |
228edd35 | 441 | for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) |
4a069c33 | 442 | { |
3e20ddad | 443 | uint32_t inval = get32 (inptr); |
4a069c33 | 444 | #if __BYTE_ORDER == __BIG_ENDIAN |
3e20ddad | 445 | inval = __builtin_bswap32 (inval); |
4a069c33 UD |
446 | #endif |
447 | ||
a1ffb40e | 448 | if (__glibc_unlikely (inval > 0x7fffffff)) |
85830c4c | 449 | { |
55985355 UD |
450 | /* The value is too large. We don't try transliteration here since |
451 | this is not an error because of the lack of possibilities to | |
452 | represent the result. This is a genuine bug in the input since | |
453 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
454 | if (irreversible == NULL) |
455 | /* We are transliterating, don't try to correct anything. */ | |
456 | return __GCONV_ILLEGAL_INPUT; | |
457 | ||
85830c4c UD |
458 | if (flags & __GCONV_IGNORE_ERRORS) |
459 | { | |
460 | /* Just ignore this character. */ | |
38677ace | 461 | ++*irreversible; |
85830c4c UD |
462 | continue; |
463 | } | |
464 | ||
8f25676c SL |
465 | *inptrp = inptr; |
466 | *outptrp = outptr; | |
85830c4c UD |
467 | return __GCONV_ILLEGAL_INPUT; |
468 | } | |
4a069c33 | 469 | |
3e20ddad | 470 | put32 (outptr, inval); |
cdda3d7d | 471 | outptr += sizeof (uint32_t); |
4a069c33 UD |
472 | } |
473 | ||
474 | *inptrp = inptr; | |
475 | *outptrp = outptr; | |
476 | ||
477 | /* Determine the status. */ | |
fc08075d | 478 | if (*inptrp == inend) |
4a069c33 | 479 | result = __GCONV_EMPTY_INPUT; |
eb9dc2a2 | 480 | else if (*inptrp + 4 > inend) |
4a069c33 | 481 | result = __GCONV_INCOMPLETE_INPUT; |
eb9dc2a2 UD |
482 | else |
483 | { | |
484 | assert (*outptrp + 4 > outend); | |
485 | result = __GCONV_FULL_OUTPUT; | |
486 | } | |
4a069c33 UD |
487 | |
488 | return result; | |
489 | } | |
490 | ||
4a069c33 UD |
491 | |
492 | static inline int | |
dd9423a6 | 493 | __attribute ((always_inline)) |
55985355 UD |
494 | ucs4le_internal_loop_single (struct __gconv_step *step, |
495 | struct __gconv_step_data *step_data, | |
496 | const unsigned char **inptrp, | |
4a069c33 | 497 | const unsigned char *inend, |
4802be92 AS |
498 | unsigned char **outptrp, |
499 | const unsigned char *outend, | |
38677ace | 500 | size_t *irreversible) |
4a069c33 | 501 | { |
55985355 UD |
502 | mbstate_t *state = step_data->__statep; |
503 | int flags = step_data->__flags; | |
4a069c33 UD |
504 | size_t cnt = state->__count & 7; |
505 | ||
506 | while (*inptrp < inend && cnt < 4) | |
507 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
508 | ||
a1ffb40e | 509 | if (__glibc_unlikely (cnt < 4)) |
4a069c33 UD |
510 | { |
511 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
512 | state->__count &= ~7; | |
513 | state->__count |= cnt; | |
514 | ||
515 | return __GCONV_INCOMPLETE_INPUT; | |
516 | } | |
517 | ||
db2d05f9 UD |
518 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80, |
519 | 0)) | |
85830c4c | 520 | { |
55985355 UD |
521 | /* The value is too large. We don't try transliteration here since |
522 | this is not an error because of the lack of possibilities to | |
523 | represent the result. This is a genuine bug in the input since | |
524 | UCS4 does not allow such values. */ | |
85830c4c UD |
525 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
526 | return __GCONV_ILLEGAL_INPUT; | |
527 | } | |
528 | else | |
529 | { | |
4a069c33 | 530 | #if __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
531 | (*outptrp)[0] = state->__value.__wchb[3]; |
532 | (*outptrp)[1] = state->__value.__wchb[2]; | |
533 | (*outptrp)[2] = state->__value.__wchb[1]; | |
534 | (*outptrp)[3] = state->__value.__wchb[0]; | |
9ea2c194 | 535 | #else |
85830c4c UD |
536 | (*outptrp)[0] = state->__value.__wchb[0]; |
537 | (*outptrp)[1] = state->__value.__wchb[1]; | |
538 | (*outptrp)[2] = state->__value.__wchb[2]; | |
539 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
540 | #endif |
541 | ||
85830c4c UD |
542 | *outptrp += 4; |
543 | } | |
544 | ||
4a069c33 UD |
545 | /* Clear the state buffer. */ |
546 | state->__count &= ~7; | |
547 | ||
548 | return __GCONV_OK; | |
549 | } | |
550 | ||
551 | #include <iconv/skeleton.c> | |
552 | ||
553 | ||
8619129f UD |
554 | /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ |
555 | #define DEFINE_INIT 0 | |
556 | #define DEFINE_FINI 0 | |
557 | #define MIN_NEEDED_FROM 1 | |
558 | #define MIN_NEEDED_TO 4 | |
559 | #define FROM_DIRECTION 1 | |
560 | #define FROM_LOOP ascii_internal_loop | |
561 | #define TO_LOOP ascii_internal_loop /* This is not used. */ | |
562 | #define FUNCTION_NAME __gconv_transform_ascii_internal | |
fd1b5c0f | 563 | #define ONE_DIRECTION 1 |
8619129f UD |
564 | |
565 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
566 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
567 | #define LOOPFCT FROM_LOOP | |
568 | #define BODY \ | |
569 | { \ | |
a1ffb40e | 570 | if (__glibc_unlikely (*inptr > '\x7f')) \ |
8619129f | 571 | { \ |
55985355 UD |
572 | /* The value is too large. We don't try transliteration here since \ |
573 | this is not an error because of the lack of possibilities to \ | |
574 | represent the result. This is a genuine bug in the input since \ | |
575 | ASCII does not allow such values. */ \ | |
e438a468 | 576 | STANDARD_FROM_LOOP_ERR_HANDLER (1); \ |
85830c4c UD |
577 | } \ |
578 | else \ | |
5deca9bb UD |
579 | { \ |
580 | /* It's an one byte sequence. */ \ | |
581 | *((uint32_t *) outptr) = *inptr++; \ | |
582 | outptr += sizeof (uint32_t); \ | |
583 | } \ | |
8619129f | 584 | } |
55985355 | 585 | #define LOOP_NEED_FLAGS |
8619129f UD |
586 | #include <iconv/loop.c> |
587 | #include <iconv/skeleton.c> | |
588 | ||
589 | ||
590 | /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ | |
591 | #define DEFINE_INIT 0 | |
592 | #define DEFINE_FINI 0 | |
593 | #define MIN_NEEDED_FROM 4 | |
594 | #define MIN_NEEDED_TO 1 | |
595 | #define FROM_DIRECTION 1 | |
596 | #define FROM_LOOP internal_ascii_loop | |
597 | #define TO_LOOP internal_ascii_loop /* This is not used. */ | |
598 | #define FUNCTION_NAME __gconv_transform_internal_ascii | |
fd1b5c0f | 599 | #define ONE_DIRECTION 1 |
8619129f UD |
600 | |
601 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
602 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
603 | #define LOOPFCT FROM_LOOP | |
604 | #define BODY \ | |
605 | { \ | |
a1ffb40e | 606 | if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \ |
8619129f | 607 | { \ |
601d2942 | 608 | UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ |
e438a468 | 609 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
85830c4c UD |
610 | } \ |
611 | else \ | |
5deca9bb UD |
612 | { \ |
613 | /* It's an one byte sequence. */ \ | |
614 | *outptr++ = *((const uint32_t *) inptr); \ | |
615 | inptr += sizeof (uint32_t); \ | |
616 | } \ | |
8619129f | 617 | } |
55985355 | 618 | #define LOOP_NEED_FLAGS |
8619129f UD |
619 | #include <iconv/loop.c> |
620 | #include <iconv/skeleton.c> | |
621 | ||
622 | ||
623 | /* Convert from the internal (UCS4-like) format to UTF-8. */ | |
624 | #define DEFINE_INIT 0 | |
625 | #define DEFINE_FINI 0 | |
626 | #define MIN_NEEDED_FROM 4 | |
627 | #define MIN_NEEDED_TO 1 | |
628 | #define MAX_NEEDED_TO 6 | |
629 | #define FROM_DIRECTION 1 | |
630 | #define FROM_LOOP internal_utf8_loop | |
631 | #define TO_LOOP internal_utf8_loop /* This is not used. */ | |
632 | #define FUNCTION_NAME __gconv_transform_internal_utf8 | |
fd1b5c0f | 633 | #define ONE_DIRECTION 1 |
8619129f UD |
634 | |
635 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
636 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
5aa8ff62 | 637 | #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO |
8619129f UD |
638 | #define LOOPFCT FROM_LOOP |
639 | #define BODY \ | |
640 | { \ | |
17427edd | 641 | uint32_t wc = *((const uint32_t *) inptr); \ |
8619129f | 642 | \ |
a1ffb40e | 643 | if (__glibc_likely (wc < 0x80)) \ |
8619129f UD |
644 | /* It's an one byte sequence. */ \ |
645 | *outptr++ = (unsigned char) wc; \ | |
7ab1de21 SL |
646 | else if (__glibc_likely (wc <= 0x7fffffff \ |
647 | && (wc < 0xd800 || wc > 0xdfff))) \ | |
8619129f UD |
648 | { \ |
649 | size_t step; \ | |
3cc4a097 | 650 | unsigned char *start; \ |
8619129f UD |
651 | \ |
652 | for (step = 2; step < 6; ++step) \ | |
b79f74cd | 653 | if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ |
8619129f UD |
654 | break; \ |
655 | \ | |
a1ffb40e | 656 | if (__glibc_unlikely (outptr + step > outend)) \ |
8619129f UD |
657 | { \ |
658 | /* Too long. */ \ | |
d64b6ad0 | 659 | result = __GCONV_FULL_OUTPUT; \ |
8619129f UD |
660 | break; \ |
661 | } \ | |
662 | \ | |
663 | start = outptr; \ | |
b79f74cd | 664 | *outptr = (unsigned char) (~0xff >> step); \ |
8619129f | 665 | outptr += step; \ |
8619129f UD |
666 | do \ |
667 | { \ | |
347bace2 | 668 | start[--step] = 0x80 | (wc & 0x3f); \ |
8619129f UD |
669 | wc >>= 6; \ |
670 | } \ | |
347bace2 | 671 | while (step > 1); \ |
8619129f | 672 | start[0] |= wc; \ |
db2d05f9 UD |
673 | } \ |
674 | else \ | |
675 | { \ | |
e438a468 | 676 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
8619129f UD |
677 | } \ |
678 | \ | |
679 | inptr += 4; \ | |
680 | } | |
db2d05f9 | 681 | #define LOOP_NEED_FLAGS |
8619129f UD |
682 | #include <iconv/loop.c> |
683 | #include <iconv/skeleton.c> | |
684 | ||
685 | ||
686 | /* Convert from UTF-8 to the internal (UCS4-like) format. */ | |
687 | #define DEFINE_INIT 0 | |
688 | #define DEFINE_FINI 0 | |
689 | #define MIN_NEEDED_FROM 1 | |
690 | #define MAX_NEEDED_FROM 6 | |
691 | #define MIN_NEEDED_TO 4 | |
692 | #define FROM_DIRECTION 1 | |
693 | #define FROM_LOOP utf8_internal_loop | |
694 | #define TO_LOOP utf8_internal_loop /* This is not used. */ | |
695 | #define FUNCTION_NAME __gconv_transform_utf8_internal | |
fd1b5c0f | 696 | #define ONE_DIRECTION 1 |
8619129f UD |
697 | |
698 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
5aa8ff62 | 699 | #define MAX_NEEDED_INPUT MAX_NEEDED_FROM |
8619129f UD |
700 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
701 | #define LOOPFCT FROM_LOOP | |
702 | #define BODY \ | |
703 | { \ | |
8619129f | 704 | /* Next input byte. */ \ |
26a51060 | 705 | uint32_t ch = *inptr; \ |
8619129f | 706 | \ |
a1ffb40e | 707 | if (__glibc_likely (ch < 0x80)) \ |
8619129f | 708 | { \ |
5aa8ff62 | 709 | /* One byte sequence. */ \ |
5aa8ff62 | 710 | ++inptr; \ |
8619129f UD |
711 | } \ |
712 | else \ | |
713 | { \ | |
535e935a NG |
714 | unsigned int cnt; \ |
715 | unsigned int i; \ | |
26a51060 | 716 | \ |
9ea2c194 | 717 | if (ch >= 0xc2 && ch < 0xe0) \ |
5aa8ff62 | 718 | { \ |
9ea2c194 AJ |
719 | /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ |
720 | otherwise the wide character could have been represented \ | |
721 | using a single byte. */ \ | |
5aa8ff62 UD |
722 | cnt = 2; \ |
723 | ch &= 0x1f; \ | |
724 | } \ | |
a1ffb40e | 725 | else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ |
5aa8ff62 UD |
726 | { \ |
727 | /* We expect three bytes. */ \ | |
728 | cnt = 3; \ | |
729 | ch &= 0x0f; \ | |
730 | } \ | |
a1ffb40e | 731 | else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ |
5aa8ff62 UD |
732 | { \ |
733 | /* We expect four bytes. */ \ | |
734 | cnt = 4; \ | |
735 | ch &= 0x07; \ | |
736 | } \ | |
a1ffb40e | 737 | else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ |
5aa8ff62 UD |
738 | { \ |
739 | /* We expect five bytes. */ \ | |
740 | cnt = 5; \ | |
741 | ch &= 0x03; \ | |
742 | } \ | |
a1ffb40e | 743 | else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \ |
5aa8ff62 UD |
744 | { \ |
745 | /* We expect six bytes. */ \ | |
746 | cnt = 6; \ | |
747 | ch &= 0x01; \ | |
748 | } \ | |
749 | else \ | |
8619129f | 750 | { \ |
85830c4c UD |
751 | /* Search the end of this ill-formed UTF-8 character. This \ |
752 | is the next byte with (x & 0xc0) != 0x80. */ \ | |
347bace2 | 753 | i = 0; \ |
e438a468 | 754 | do \ |
347bace2 UD |
755 | ++i; \ |
756 | while (inptr + i < inend \ | |
757 | && (*(inptr + i) & 0xc0) == 0x80 \ | |
758 | && i < 5); \ | |
85830c4c | 759 | \ |
347bace2 UD |
760 | errout: \ |
761 | STANDARD_FROM_LOOP_ERR_HANDLER (i); \ | |
8619129f UD |
762 | } \ |
763 | \ | |
a1ffb40e | 764 | if (__glibc_unlikely (inptr + cnt > inend)) \ |
5aa8ff62 | 765 | { \ |
fd1b5c0f UD |
766 | /* We don't have enough input. But before we report that check \ |
767 | that all the bytes are correct. */ \ | |
768 | for (i = 1; inptr + i < inend; ++i) \ | |
769 | if ((inptr[i] & 0xc0) != 0x80) \ | |
770 | break; \ | |
85830c4c | 771 | \ |
a1ffb40e | 772 | if (__glibc_likely (inptr + i == inend)) \ |
85830c4c UD |
773 | { \ |
774 | result = __GCONV_INCOMPLETE_INPUT; \ | |
775 | break; \ | |
776 | } \ | |
777 | \ | |
347bace2 | 778 | goto errout; \ |
5aa8ff62 UD |
779 | } \ |
780 | \ | |
781 | /* Read the possible remaining bytes. */ \ | |
782 | for (i = 1; i < cnt; ++i) \ | |
783 | { \ | |
784 | uint32_t byte = inptr[i]; \ | |
785 | \ | |
786 | if ((byte & 0xc0) != 0x80) \ | |
bd32e4a6 UD |
787 | /* This is an illegal encoding. */ \ |
788 | break; \ | |
5aa8ff62 UD |
789 | \ |
790 | ch <<= 6; \ | |
791 | ch |= byte & 0x3f; \ | |
792 | } \ | |
85830c4c | 793 | \ |
bd32e4a6 UD |
794 | /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ |
795 | If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ | |
796 | have been represented with fewer than cnt bytes. */ \ | |
9c32c895 UD |
797 | if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ |
798 | /* Do not accept UTF-16 surrogates. */ \ | |
799 | || (ch >= 0xd800 && ch <= 0xdfff)) \ | |
bd32e4a6 UD |
800 | { \ |
801 | /* This is an illegal encoding. */ \ | |
347bace2 | 802 | goto errout; \ |
bd32e4a6 UD |
803 | } \ |
804 | \ | |
5aa8ff62 | 805 | inptr += cnt; \ |
8619129f UD |
806 | } \ |
807 | \ | |
808 | /* Now adjust the pointers and store the result. */ \ | |
cdda3d7d AJ |
809 | *((uint32_t *) outptr) = ch; \ |
810 | outptr += sizeof (uint32_t); \ | |
8619129f | 811 | } |
55985355 | 812 | #define LOOP_NEED_FLAGS |
fd1b5c0f UD |
813 | |
814 | #define STORE_REST \ | |
815 | { \ | |
816 | /* We store the remaining bytes while converting them into the UCS4 \ | |
817 | format. We can assume that the first byte in the buffer is \ | |
818 | correct and that it requires a larger number of bytes than there \ | |
819 | are in the input buffer. */ \ | |
820 | wint_t ch = **inptrp; \ | |
ea31b613 | 821 | size_t cnt, r; \ |
fd1b5c0f UD |
822 | \ |
823 | state->__count = inend - *inptrp; \ | |
824 | \ | |
9954432e | 825 | assert (ch != 0xc0 && ch != 0xc1); \ |
fd1b5c0f UD |
826 | if (ch >= 0xc2 && ch < 0xe0) \ |
827 | { \ | |
828 | /* We expect two bytes. The first byte cannot be 0xc0 or \ | |
829 | 0xc1, otherwise the wide character could have been \ | |
830 | represented using a single byte. */ \ | |
831 | cnt = 2; \ | |
832 | ch &= 0x1f; \ | |
833 | } \ | |
a1ffb40e | 834 | else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ |
fd1b5c0f UD |
835 | { \ |
836 | /* We expect three bytes. */ \ | |
837 | cnt = 3; \ | |
838 | ch &= 0x0f; \ | |
839 | } \ | |
a1ffb40e | 840 | else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ |
fd1b5c0f UD |
841 | { \ |
842 | /* We expect four bytes. */ \ | |
843 | cnt = 4; \ | |
844 | ch &= 0x07; \ | |
845 | } \ | |
a1ffb40e | 846 | else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ |
fd1b5c0f UD |
847 | { \ |
848 | /* We expect five bytes. */ \ | |
849 | cnt = 5; \ | |
850 | ch &= 0x03; \ | |
851 | } \ | |
852 | else \ | |
853 | { \ | |
854 | /* We expect six bytes. */ \ | |
855 | cnt = 6; \ | |
856 | ch &= 0x01; \ | |
857 | } \ | |
858 | \ | |
859 | /* The first byte is already consumed. */ \ | |
ea31b613 | 860 | r = cnt - 1; \ |
fd1b5c0f UD |
861 | while (++(*inptrp) < inend) \ |
862 | { \ | |
863 | ch <<= 6; \ | |
864 | ch |= **inptrp & 0x3f; \ | |
ea31b613 | 865 | --r; \ |
fd1b5c0f UD |
866 | } \ |
867 | \ | |
868 | /* Shift for the so far missing bytes. */ \ | |
ea31b613 UD |
869 | ch <<= r * 6; \ |
870 | \ | |
871 | /* Store the number of bytes expected for the entire sequence. */ \ | |
41f112ad | 872 | state->__count |= cnt << 8; \ |
fd1b5c0f UD |
873 | \ |
874 | /* Store the value. */ \ | |
875 | state->__value.__wch = ch; \ | |
876 | } | |
877 | ||
878 | #define UNPACK_BYTES \ | |
879 | { \ | |
ea31b613 | 880 | static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ |
fd1b5c0f | 881 | wint_t wch = state->__value.__wch; \ |
41f112ad | 882 | size_t ntotal = state->__count >> 8; \ |
ea31b613 | 883 | \ |
41f112ad | 884 | inlen = state->__count & 255; \ |
fd1b5c0f | 885 | \ |
ea31b613 | 886 | bytebuf[0] = inmask[ntotal - 2]; \ |
fd1b5c0f | 887 | \ |
cd201e38 UD |
888 | do \ |
889 | { \ | |
890 | if (--ntotal < inlen) \ | |
891 | bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ | |
892 | wch >>= 6; \ | |
893 | } \ | |
894 | while (ntotal > 1); \ | |
fd1b5c0f UD |
895 | \ |
896 | bytebuf[0] |= wch; \ | |
897 | } | |
898 | ||
41f112ad UD |
899 | #define CLEAR_STATE \ |
900 | state->__count = 0 | |
901 | ||
902 | ||
8619129f UD |
903 | #include <iconv/loop.c> |
904 | #include <iconv/skeleton.c> | |
905 | ||
906 | ||
907 | /* Convert from UCS2 to the internal (UCS4-like) format. */ | |
908 | #define DEFINE_INIT 0 | |
909 | #define DEFINE_FINI 0 | |
910 | #define MIN_NEEDED_FROM 2 | |
911 | #define MIN_NEEDED_TO 4 | |
912 | #define FROM_DIRECTION 1 | |
913 | #define FROM_LOOP ucs2_internal_loop | |
914 | #define TO_LOOP ucs2_internal_loop /* This is not used. */ | |
915 | #define FUNCTION_NAME __gconv_transform_ucs2_internal | |
fd1b5c0f | 916 | #define ONE_DIRECTION 1 |
8619129f UD |
917 | |
918 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
919 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
920 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 921 | #define BODY \ |
755104ed | 922 | { \ |
606135cf | 923 | uint16_t u1 = get16 (inptr); \ |
755104ed | 924 | \ |
a1ffb40e | 925 | if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ |
755104ed UD |
926 | { \ |
927 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
928 | them. (Catching this here is not security relevant.) */ \ | |
e438a468 | 929 | STANDARD_FROM_LOOP_ERR_HANDLER (2); \ |
755104ed UD |
930 | } \ |
931 | \ | |
cdda3d7d AJ |
932 | *((uint32_t *) outptr) = u1; \ |
933 | outptr += sizeof (uint32_t); \ | |
755104ed UD |
934 | inptr += 2; \ |
935 | } | |
936 | #define LOOP_NEED_FLAGS | |
8619129f UD |
937 | #include <iconv/loop.c> |
938 | #include <iconv/skeleton.c> | |
939 | ||
940 | ||
941 | /* Convert from the internal (UCS4-like) format to UCS2. */ | |
942 | #define DEFINE_INIT 0 | |
943 | #define DEFINE_FINI 0 | |
944 | #define MIN_NEEDED_FROM 4 | |
945 | #define MIN_NEEDED_TO 2 | |
946 | #define FROM_DIRECTION 1 | |
947 | #define FROM_LOOP internal_ucs2_loop | |
948 | #define TO_LOOP internal_ucs2_loop /* This is not used. */ | |
949 | #define FUNCTION_NAME __gconv_transform_internal_ucs2 | |
fd1b5c0f | 950 | #define ONE_DIRECTION 1 |
8619129f UD |
951 | |
952 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
953 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
954 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 955 | #define BODY \ |
8619129f | 956 | { \ |
17427edd | 957 | uint32_t val = *((const uint32_t *) inptr); \ |
755104ed | 958 | \ |
a1ffb40e | 959 | if (__glibc_unlikely (val >= 0x10000)) \ |
8619129f | 960 | { \ |
601d2942 | 961 | UNICODE_TAG_HANDLER (val, 4); \ |
e438a468 | 962 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
8619129f | 963 | } \ |
a1ffb40e | 964 | else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ |
755104ed UD |
965 | { \ |
966 | /* Surrogate characters in UCS-4 input are not valid. \ | |
967 | We must catch this, because the UCS-2 output might be \ | |
968 | interpreted as UTF-16 by other programs. If we let \ | |
969 | surrogates pass through, attackers could make a security \ | |
970 | hole exploit by synthesizing any desired plane 1-16 \ | |
971 | character. */ \ | |
e438a468 | 972 | result = __GCONV_ILLEGAL_INPUT; \ |
755104ed | 973 | if (! ignore_errors_p ()) \ |
e438a468 | 974 | break; \ |
755104ed UD |
975 | inptr += 4; \ |
976 | ++*irreversible; \ | |
977 | continue; \ | |
978 | } \ | |
9ea2c194 | 979 | else \ |
755104ed | 980 | { \ |
606135cf | 981 | put16 (outptr, val); \ |
db6af3eb | 982 | outptr += sizeof (uint16_t); \ |
755104ed UD |
983 | inptr += 4; \ |
984 | } \ | |
8619129f | 985 | } |
55985355 | 986 | #define LOOP_NEED_FLAGS |
8619129f UD |
987 | #include <iconv/loop.c> |
988 | #include <iconv/skeleton.c> | |
9b26f5c4 UD |
989 | |
990 | ||
428bcea4 | 991 | /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ |
9b26f5c4 UD |
992 | #define DEFINE_INIT 0 |
993 | #define DEFINE_FINI 0 | |
994 | #define MIN_NEEDED_FROM 2 | |
995 | #define MIN_NEEDED_TO 4 | |
996 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
997 | #define FROM_LOOP ucs2reverse_internal_loop |
998 | #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/ | |
8d617a71 | 999 | #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal |
fd1b5c0f | 1000 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1001 | |
1002 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1003 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1004 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1005 | #define BODY \ |
755104ed | 1006 | { \ |
606135cf | 1007 | uint16_t u1 = bswap_16 (get16 (inptr)); \ |
755104ed | 1008 | \ |
a1ffb40e | 1009 | if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ |
755104ed UD |
1010 | { \ |
1011 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
1012 | them. (Catching this here is not security relevant.) */ \ | |
1013 | if (! ignore_errors_p ()) \ | |
1014 | { \ | |
1015 | result = __GCONV_ILLEGAL_INPUT; \ | |
1016 | break; \ | |
1017 | } \ | |
1018 | inptr += 2; \ | |
1019 | ++*irreversible; \ | |
1020 | continue; \ | |
1021 | } \ | |
1022 | \ | |
cdda3d7d AJ |
1023 | *((uint32_t *) outptr) = u1; \ |
1024 | outptr += sizeof (uint32_t); \ | |
755104ed UD |
1025 | inptr += 2; \ |
1026 | } | |
1027 | #define LOOP_NEED_FLAGS | |
9b26f5c4 UD |
1028 | #include <iconv/loop.c> |
1029 | #include <iconv/skeleton.c> | |
1030 | ||
1031 | ||
428bcea4 | 1032 | /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ |
9b26f5c4 UD |
1033 | #define DEFINE_INIT 0 |
1034 | #define DEFINE_FINI 0 | |
1035 | #define MIN_NEEDED_FROM 4 | |
1036 | #define MIN_NEEDED_TO 2 | |
1037 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
1038 | #define FROM_LOOP internal_ucs2reverse_loop |
1039 | #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/ | |
8d617a71 | 1040 | #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse |
fd1b5c0f | 1041 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1042 | |
1043 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1044 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1045 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1046 | #define BODY \ |
9b26f5c4 | 1047 | { \ |
17427edd | 1048 | uint32_t val = *((const uint32_t *) inptr); \ |
a1ffb40e | 1049 | if (__glibc_unlikely (val >= 0x10000)) \ |
9b26f5c4 | 1050 | { \ |
601d2942 | 1051 | UNICODE_TAG_HANDLER (val, 4); \ |
e438a468 | 1052 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
9b26f5c4 | 1053 | } \ |
a1ffb40e | 1054 | else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ |
755104ed UD |
1055 | { \ |
1056 | /* Surrogate characters in UCS-4 input are not valid. \ | |
1057 | We must catch this, because the UCS-2 output might be \ | |
1058 | interpreted as UTF-16 by other programs. If we let \ | |
1059 | surrogates pass through, attackers could make a security \ | |
1060 | hole exploit by synthesizing any desired plane 1-16 \ | |
1061 | character. */ \ | |
1062 | if (! ignore_errors_p ()) \ | |
1063 | { \ | |
1064 | result = __GCONV_ILLEGAL_INPUT; \ | |
1065 | break; \ | |
1066 | } \ | |
1067 | inptr += 4; \ | |
1068 | ++*irreversible; \ | |
1069 | continue; \ | |
1070 | } \ | |
9ea2c194 | 1071 | else \ |
755104ed | 1072 | { \ |
606135cf | 1073 | put16 (outptr, bswap_16 (val)); \ |
cdda3d7d | 1074 | outptr += sizeof (uint16_t); \ |
755104ed UD |
1075 | inptr += 4; \ |
1076 | } \ | |
9b26f5c4 | 1077 | } |
55985355 | 1078 | #define LOOP_NEED_FLAGS |
9b26f5c4 UD |
1079 | #include <iconv/loop.c> |
1080 | #include <iconv/skeleton.c> |