]>
Commit | Line | Data |
---|---|---|
8619129f | 1 | /* Conversion loop frame work. |
dff8da6b | 2 | Copyright (C) 1998-2024 Free Software Foundation, Inc. |
8619129f | 3 | This file is part of the GNU C Library. |
8619129f UD |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
8619129f UD |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 13 | Lesser General Public License for more details. |
8619129f | 14 | |
41bdb6e2 | 15 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
8619129f UD |
18 | |
19 | /* This file provides a frame for the reader loop in all conversion modules. | |
20 | The actual code must (of course) be provided in the actual module source | |
21 | code but certain actions can be written down generically, with some | |
22 | customization options which are these: | |
23 | ||
24 | MIN_NEEDED_INPUT minimal number of input bytes needed for the next | |
25 | conversion. | |
26 | MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round | |
27 | of conversion. | |
28 | ||
29 | MAX_NEEDED_INPUT you guess it, this is the maximal number of input | |
30 | bytes needed. It defaults to MIN_NEEDED_INPUT | |
31 | MAX_NEEDED_OUTPUT likewise for output bytes. | |
32 | ||
8619129f UD |
33 | LOOPFCT name of the function created. If not specified |
34 | the name is `loop' but this prevents the use | |
35 | of multiple functions in the same file. | |
36 | ||
8619129f UD |
37 | BODY this is supposed to expand to the body of the loop. |
38 | The user must provide this. | |
28f1c862 | 39 | |
382466e0 | 40 | EXTRA_LOOP_DECLS extra arguments passed from conversion loop call. |
66175fa8 UD |
41 | |
42 | INIT_PARAMS code to define and initialize variables from params. | |
43 | UPDATE_PARAMS code to store result in params. | |
f9ad060c UD |
44 | |
45 | ONEBYTE_BODY body of the specialized conversion function for a | |
46 | single byte from the current character set to INTERNAL. | |
8619129f UD |
47 | */ |
48 | ||
fd1b5c0f | 49 | #include <assert.h> |
b35e58e4 | 50 | #include <endian.h> |
7ac6fad9 | 51 | #include <iconv/gconv_int.h> |
b35e58e4 UD |
52 | #include <stdint.h> |
53 | #include <string.h> | |
d64b6ad0 | 54 | #include <wchar.h> |
8619129f UD |
55 | #include <sys/param.h> /* For MIN. */ |
56 | #define __need_size_t | |
57 | #include <stddef.h> | |
9090848d | 58 | #include <libc-diag.h> |
8619129f | 59 | |
b35e58e4 | 60 | #undef FCTNAME2 |
3e20ddad | 61 | #define FCTNAME(name) name |
b35e58e4 UD |
62 | |
63 | ||
8619129f UD |
64 | /* We need at least one byte for the next round. */ |
65 | #ifndef MIN_NEEDED_INPUT | |
5aa8ff62 | 66 | # error "MIN_NEEDED_INPUT definition missing" |
4a0de63b UD |
67 | #elif MIN_NEEDED_INPUT < 1 |
68 | # error "MIN_NEEDED_INPUT must be >= 1" | |
8619129f UD |
69 | #endif |
70 | ||
71 | /* Let's see how many bytes we produce. */ | |
72 | #ifndef MAX_NEEDED_INPUT | |
73 | # define MAX_NEEDED_INPUT MIN_NEEDED_INPUT | |
74 | #endif | |
75 | ||
76 | /* We produce at least one byte in the next round. */ | |
77 | #ifndef MIN_NEEDED_OUTPUT | |
5aa8ff62 | 78 | # error "MIN_NEEDED_OUTPUT definition missing" |
c0a0f9a3 UD |
79 | #elif MIN_NEEDED_OUTPUT < 1 |
80 | # error "MIN_NEEDED_OUTPUT must be >= 1" | |
8619129f UD |
81 | #endif |
82 | ||
83 | /* Let's see how many bytes we produce. */ | |
84 | #ifndef MAX_NEEDED_OUTPUT | |
85 | # define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT | |
86 | #endif | |
87 | ||
88 | /* Default name for the function. */ | |
89 | #ifndef LOOPFCT | |
90 | # define LOOPFCT loop | |
91 | #endif | |
92 | ||
93 | /* Make sure we have a loop body. */ | |
94 | #ifndef BODY | |
95 | # error "Definition of BODY missing for function" LOOPFCT | |
96 | #endif | |
97 | ||
8619129f | 98 | |
28f1c862 UD |
99 | /* If no arguments have to passed to the loop function define the macro |
100 | as empty. */ | |
101 | #ifndef EXTRA_LOOP_DECLS | |
102 | # define EXTRA_LOOP_DECLS | |
103 | #endif | |
104 | ||
4b1b449d UD |
105 | /* Allow using UPDATE_PARAMS in macros where #ifdef UPDATE_PARAMS test |
106 | isn't possible. */ | |
107 | #ifndef UPDATE_PARAMS | |
108 | # define UPDATE_PARAMS do { } while (0) | |
109 | #endif | |
110 | #ifndef REINIT_PARAMS | |
111 | # define REINIT_PARAMS do { } while (0) | |
112 | #endif | |
113 | ||
28f1c862 | 114 | |
85830c4c UD |
115 | /* To make it easier for the writers of the modules, we define a macro |
116 | to test whether we have to ignore errors. */ | |
b572c2da UD |
117 | #define ignore_errors_p() \ |
118 | (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS)) | |
85830c4c UD |
119 | |
120 | ||
e438a468 UD |
121 | /* Error handling for the FROM_LOOP direction, with ignoring of errors. |
122 | Note that we cannot use the do while (0) trick since `break' and | |
123 | `continue' must reach certain points. */ | |
124 | #define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \ | |
125 | { \ | |
126 | result = __GCONV_ILLEGAL_INPUT; \ | |
127 | \ | |
128 | if (! ignore_errors_p ()) \ | |
129 | break; \ | |
130 | \ | |
131 | /* We ignore the invalid input byte sequence. */ \ | |
132 | inptr += (Incr); \ | |
133 | ++*irreversible; \ | |
134 | /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \ | |
135 | that "iconv -c" must give the same exitcode as "iconv". */ \ | |
136 | continue; \ | |
137 | } | |
138 | ||
139 | /* Error handling for the TO_LOOP direction, with use of transliteration/ | |
140 | transcription functions and ignoring of errors. Note that we cannot use | |
141 | the do while (0) trick since `break' and `continue' must reach certain | |
142 | points. */ | |
143 | #define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \ | |
d6204268 | 144 | { \ |
d6204268 | 145 | result = __GCONV_ILLEGAL_INPUT; \ |
b572c2da UD |
146 | \ |
147 | if (irreversible == NULL) \ | |
148 | /* This means we are in call from __gconv_transliterate. In this \ | |
149 | case we are not doing any error recovery outself. */ \ | |
150 | break; \ | |
151 | \ | |
4b1b449d UD |
152 | /* If needed, flush any conversion state, so that __gconv_transliterate \ |
153 | starts with current shift state. */ \ | |
154 | UPDATE_PARAMS; \ | |
155 | \ | |
d6204268 | 156 | /* First try the transliteration methods. */ \ |
ba7b4d29 FW |
157 | if ((step_data->__flags & __GCONV_TRANSLIT) != 0) \ |
158 | result = __gconv_transliterate \ | |
159 | (step, step_data, *inptrp, \ | |
160 | &inptr, inend, &outptr, irreversible); \ | |
4b1b449d UD |
161 | \ |
162 | REINIT_PARAMS; \ | |
163 | \ | |
7888313d | 164 | /* If any of them recognized the input continue with the loop. */ \ |
d6204268 | 165 | if (result != __GCONV_ILLEGAL_INPUT) \ |
f2a8406a | 166 | { \ |
a1ffb40e | 167 | if (__glibc_unlikely (result == __GCONV_FULL_OUTPUT)) \ |
f2a8406a UD |
168 | break; \ |
169 | \ | |
170 | continue; \ | |
171 | } \ | |
d6204268 UD |
172 | \ |
173 | /* Next see whether we have to ignore the error. If not, stop. */ \ | |
174 | if (! ignore_errors_p ()) \ | |
175 | break; \ | |
b572c2da | 176 | \ |
d6204268 UD |
177 | /* When we come here it means we ignore the character. */ \ |
178 | ++*irreversible; \ | |
179 | inptr += Incr; \ | |
e438a468 UD |
180 | /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \ |
181 | that "iconv -c" must give the same exitcode as "iconv". */ \ | |
d6204268 UD |
182 | continue; \ |
183 | } | |
184 | ||
185 | ||
6900d2ca JM |
186 | /* With GCC 7 when compiling with -Os for 32-bit s390 the compiler |
187 | warns that the variable 'ch', in the definition of BODY in | |
188 | sysdeps/s390/multiarch/8bit-generic.c, may be used uninitialized in | |
189 | the call to UNICODE_TAG_HANDLER in that macro. This variable is | |
190 | actually always initialized before use, in the prior loop if INDEX | |
191 | is nonzero and in the following 'if' if INDEX is zero. That code | |
192 | has a comment referencing this diagnostic disabling; updates in one | |
193 | place may require updates in the other. */ | |
194 | DIAG_PUSH_NEEDS_COMMENT; | |
195 | DIAG_IGNORE_Os_NEEDS_COMMENT (7, "-Wmaybe-uninitialized"); | |
9a1f6754 UD |
196 | /* Handling of Unicode 3.1 TAG characters. Unicode recommends |
197 | "If language codes are not relevant to the particular processing | |
e438a468 UD |
198 | operation, then they should be ignored." This macro is usually |
199 | called right before STANDARD_TO_LOOP_ERR_HANDLER (Incr). */ | |
9a1f6754 UD |
200 | #define UNICODE_TAG_HANDLER(Character, Incr) \ |
201 | { \ | |
202 | /* TAG characters are those in the range U+E0000..U+E007F. */ \ | |
203 | if (((Character) >> 7) == (0xe0000 >> 7)) \ | |
204 | { \ | |
205 | inptr += Incr; \ | |
206 | continue; \ | |
207 | } \ | |
208 | } | |
6900d2ca | 209 | DIAG_POP_NEEDS_COMMENT; |
9a1f6754 UD |
210 | |
211 | ||
8619129f UD |
212 | /* The function returns the status, as defined in gconv.h. */ |
213 | static inline int | |
dd9423a6 | 214 | __attribute ((always_inline)) |
55985355 UD |
215 | FCTNAME (LOOPFCT) (struct __gconv_step *step, |
216 | struct __gconv_step_data *step_data, | |
217 | const unsigned char **inptrp, const unsigned char *inend, | |
17427edd | 218 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 219 | size_t *irreversible EXTRA_LOOP_DECLS) |
8619129f | 220 | { |
55985355 UD |
221 | #ifdef LOOP_NEED_STATE |
222 | mbstate_t *state = step_data->__statep; | |
223 | #endif | |
224 | #ifdef LOOP_NEED_FLAGS | |
225 | int flags = step_data->__flags; | |
226 | #endif | |
227 | #ifdef LOOP_NEED_DATA | |
228 | void *data = step->__data; | |
229 | #endif | |
230 | int result = __GCONV_EMPTY_INPUT; | |
8619129f UD |
231 | const unsigned char *inptr = *inptrp; |
232 | unsigned char *outptr = *outptrp; | |
8619129f | 233 | |
66175fa8 UD |
234 | #ifdef INIT_PARAMS |
235 | INIT_PARAMS; | |
236 | #endif | |
237 | ||
55985355 | 238 | while (inptr != inend) |
8619129f | 239 | { |
55985355 | 240 | /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the |
ca3c0135 | 241 | compiler generating better code. They will be optimized away |
55985355 | 242 | since MIN_NEEDED_OUTPUT is always a constant. */ |
eb9dc2a2 UD |
243 | if (MIN_NEEDED_INPUT > 1 |
244 | && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0)) | |
245 | { | |
246 | /* We don't have enough input for another complete input | |
247 | character. */ | |
248 | result = __GCONV_INCOMPLETE_INPUT; | |
249 | break; | |
250 | } | |
55985355 UD |
251 | if ((MIN_NEEDED_OUTPUT != 1 |
252 | && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0)) | |
253 | || (MIN_NEEDED_OUTPUT == 1 | |
254 | && __builtin_expect (outptr >= outend, 0))) | |
255 | { | |
256 | /* Overflow in the output buffer. */ | |
257 | result = __GCONV_FULL_OUTPUT; | |
258 | break; | |
259 | } | |
55985355 UD |
260 | |
261 | /* Here comes the body the user provides. It can stop with | |
262 | RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the | |
263 | input characters vary in size), GCONV_ILLEGAL_INPUT, or | |
264 | GCONV_FULL_OUTPUT (if the output characters vary in size). */ | |
265 | BODY | |
8619129f UD |
266 | } |
267 | ||
8619129f UD |
268 | /* Update the pointers pointed to by the parameters. */ |
269 | *inptrp = inptr; | |
270 | *outptrp = outptr; | |
66175fa8 | 271 | UPDATE_PARAMS; |
8619129f UD |
272 | |
273 | return result; | |
274 | } | |
275 | ||
276 | ||
3e20ddad AZ |
277 | #if MAX_NEEDED_INPUT > 1 |
278 | # define SINGLE(fct) SINGLE2 (fct) | |
279 | # define SINGLE2(fct) fct##_single | |
fd1b5c0f | 280 | static inline int |
dd9423a6 | 281 | __attribute ((always_inline)) |
55985355 UD |
282 | SINGLE(LOOPFCT) (struct __gconv_step *step, |
283 | struct __gconv_step_data *step_data, | |
284 | const unsigned char **inptrp, const unsigned char *inend, | |
fd1b5c0f | 285 | unsigned char **outptrp, unsigned char *outend, |
55985355 | 286 | size_t *irreversible EXTRA_LOOP_DECLS) |
fd1b5c0f | 287 | { |
55985355 | 288 | mbstate_t *state = step_data->__statep; |
3e20ddad | 289 | # ifdef LOOP_NEED_FLAGS |
55985355 | 290 | int flags = step_data->__flags; |
3e20ddad AZ |
291 | # endif |
292 | # ifdef LOOP_NEED_DATA | |
55985355 | 293 | void *data = step->__data; |
3e20ddad | 294 | # endif |
fd1b5c0f UD |
295 | int result = __GCONV_OK; |
296 | unsigned char bytebuf[MAX_NEEDED_INPUT]; | |
297 | const unsigned char *inptr = *inptrp; | |
298 | unsigned char *outptr = *outptrp; | |
299 | size_t inlen; | |
300 | ||
3e20ddad | 301 | # ifdef INIT_PARAMS |
fd1b5c0f | 302 | INIT_PARAMS; |
3e20ddad | 303 | # endif |
fd1b5c0f | 304 | |
3e20ddad | 305 | # ifdef UNPACK_BYTES |
fd1b5c0f | 306 | UNPACK_BYTES |
3e20ddad | 307 | # else |
fd1b5c0f | 308 | /* Add the bytes from the state to the input buffer. */ |
5e0d0300 | 309 | assert ((state->__count & 7) <= sizeof (state->__value)); |
17427edd | 310 | for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen) |
fd1b5c0f | 311 | bytebuf[inlen] = state->__value.__wchb[inlen]; |
3e20ddad | 312 | # endif |
fd1b5c0f UD |
313 | |
314 | /* Are there enough bytes in the input buffer? */ | |
0656e90e UD |
315 | if (MIN_NEEDED_INPUT > 1 |
316 | && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0)) | |
fd1b5c0f | 317 | { |
fd1b5c0f | 318 | *inptrp = inend; |
3e20ddad | 319 | # ifdef STORE_REST |
5fe8e359 AK |
320 | |
321 | /* Building with -O3 GCC emits a `array subscript is above array | |
322 | bounds' warning. GCC BZ #64739 has been opened for this. */ | |
323 | DIAG_PUSH_NEEDS_COMMENT; | |
324 | DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Warray-bounds"); | |
1af4e298 UD |
325 | while (inptr < inend) |
326 | bytebuf[inlen++] = *inptr++; | |
5fe8e359 | 327 | DIAG_POP_NEEDS_COMMENT; |
1af4e298 | 328 | |
fd1b5c0f UD |
329 | inptr = bytebuf; |
330 | inptrp = &inptr; | |
331 | inend = &bytebuf[inlen]; | |
332 | ||
333 | STORE_REST | |
3e20ddad | 334 | # else |
fd1b5c0f UD |
335 | /* We don't have enough input for another complete input |
336 | character. */ | |
08538f36 SL |
337 | size_t inlen_after = inlen + (inend - inptr); |
338 | assert (inlen_after <= sizeof (state->__value.__wchb)); | |
339 | for (; inlen < inlen_after; inlen++) | |
340 | state->__value.__wchb[inlen] = *inptr++; | |
3e20ddad | 341 | # endif |
fd1b5c0f UD |
342 | |
343 | return __GCONV_INCOMPLETE_INPUT; | |
344 | } | |
345 | ||
346 | /* Enough space in output buffer. */ | |
347 | if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) | |
348 | || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) | |
349 | /* Overflow in the output buffer. */ | |
350 | return __GCONV_FULL_OUTPUT; | |
351 | ||
352 | /* Now add characters from the normal input buffer. */ | |
876cdf51 | 353 | if (inlen >= MAX_NEEDED_INPUT || inptr >= inend) |
c8126360 JM |
354 | /* Avoid a -Wstringop-overflow= warning when this loop is |
355 | unrolled. The compiler cannot otherwise see that this is | |
356 | unreachable because it depends on (state->__count & 7) not | |
876cdf51 SL |
357 | being too large after a previous conversion step. |
358 | Starting with GCC 12, we also have mark the inptr >= inend | |
359 | case as unreachable to omit the warning. Note that this SINGLE | |
360 | function is only used to implement the mb*towc*() or wc*tomb*() | |
361 | functions. Those functions use inptr and inend pointing to a | |
362 | variable on stack, compute the inend pointer or explicitly check | |
363 | the arguments which always leads to inptr < inend. */ | |
c8126360 | 364 | __builtin_unreachable (); |
fd1b5c0f UD |
365 | do |
366 | bytebuf[inlen++] = *inptr++; | |
316518d6 | 367 | while (inlen < MAX_NEEDED_INPUT && inptr < inend); |
fd1b5c0f UD |
368 | |
369 | inptr = bytebuf; | |
316518d6 | 370 | inend = &bytebuf[inlen]; |
55985355 | 371 | |
fd1b5c0f UD |
372 | do |
373 | { | |
374 | BODY | |
375 | } | |
376 | while (0); | |
377 | ||
316518d6 UD |
378 | /* Now we either have produced an output character and consumed all the |
379 | bytes from the state and at least one more, or the character is still | |
380 | incomplete, or we have some other error (like illegal input character, | |
381 | no space in output buffer). */ | |
a1ffb40e | 382 | if (__glibc_likely (inptr != bytebuf)) |
fd1b5c0f | 383 | { |
316518d6 | 384 | /* We found a new character. */ |
fd1b5c0f UD |
385 | assert (inptr - bytebuf > (state->__count & 7)); |
386 | ||
387 | *inptrp += inptr - bytebuf - (state->__count & 7); | |
388 | *outptrp = outptr; | |
389 | ||
316518d6 UD |
390 | result = __GCONV_OK; |
391 | ||
fd1b5c0f | 392 | /* Clear the state buffer. */ |
3e20ddad | 393 | # ifdef CLEAR_STATE |
41f112ad | 394 | CLEAR_STATE; |
3e20ddad | 395 | # else |
fd1b5c0f | 396 | state->__count &= ~7; |
3e20ddad | 397 | # endif |
fd1b5c0f | 398 | } |
316518d6 UD |
399 | else if (result == __GCONV_INCOMPLETE_INPUT) |
400 | { | |
401 | /* This can only happen if we have less than MAX_NEEDED_INPUT bytes | |
402 | available. */ | |
403 | assert (inend != &bytebuf[MAX_NEEDED_INPUT]); | |
404 | ||
405 | *inptrp += inend - bytebuf - (state->__count & 7); | |
3e20ddad | 406 | # ifdef STORE_REST |
316518d6 UD |
407 | inptrp = &inptr; |
408 | ||
409 | STORE_REST | |
3e20ddad | 410 | # else |
316518d6 UD |
411 | /* We don't have enough input for another complete input |
412 | character. */ | |
5512d89b | 413 | assert (inend - inptr > (state->__count & ~7)); |
08538f36 | 414 | assert (inend - inptr <= sizeof (state->__value.__wchb)); |
5512d89b | 415 | state->__count = (state->__count & ~7) | (inend - inptr); |
08538f36 SL |
416 | for (inlen = 0; inlen < inend - inptr; inlen++) |
417 | state->__value.__wchb[inlen] = inptr[inlen]; | |
418 | inptr = inend; | |
3e20ddad | 419 | # endif |
316518d6 | 420 | } |
fd1b5c0f UD |
421 | |
422 | return result; | |
423 | } | |
3e20ddad AZ |
424 | # undef SINGLE |
425 | # undef SINGLE2 | |
fd1b5c0f UD |
426 | |
427 | ||
32bead5b | 428 | # ifdef ONEBYTE_BODY |
f9ad060c UD |
429 | /* Define the shortcut function for btowc. */ |
430 | static wint_t | |
431 | gconv_btowc (struct __gconv_step *step, unsigned char c) | |
432 | ONEBYTE_BODY | |
32bead5b WN |
433 | # define FROM_ONEBYTE gconv_btowc |
434 | # endif | |
f9ad060c | 435 | |
32bead5b | 436 | #endif |
f9ad060c | 437 | |
8619129f UD |
438 | /* We remove the macro definitions so that we can include this file again |
439 | for the definition of another function. */ | |
440 | #undef MIN_NEEDED_INPUT | |
441 | #undef MAX_NEEDED_INPUT | |
442 | #undef MIN_NEEDED_OUTPUT | |
443 | #undef MAX_NEEDED_OUTPUT | |
444 | #undef LOOPFCT | |
8619129f UD |
445 | #undef BODY |
446 | #undef LOOPFCT | |
28f1c862 | 447 | #undef EXTRA_LOOP_DECLS |
66175fa8 UD |
448 | #undef INIT_PARAMS |
449 | #undef UPDATE_PARAMS | |
4b1b449d | 450 | #undef REINIT_PARAMS |
f9ad060c | 451 | #undef ONEBYTE_BODY |
55985355 | 452 | #undef UNPACK_BYTES |
41f112ad | 453 | #undef CLEAR_STATE |
55985355 UD |
454 | #undef LOOP_NEED_STATE |
455 | #undef LOOP_NEED_FLAGS | |
456 | #undef LOOP_NEED_DATA |