[PATCH v2 4/7] iconv: Preserve iconv -c error exit on invalid inputs (bug 32046)
DJ Delorie
dj@redhat.com
Fri Sep 20 02:27:33 GMT 2024
Florian Weimer <fweimer@redhat.com> writes:
One suggestion to use support_quote_blob(), one question about a
possible future testcase, but otherwise OK.
Reviewed-by: DJ Delorie <dj@redhat.com>
> +tests-internal = \
> + tst-iconv-sticky-input-error \
> + # tests-internal
Ok.
> diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
>
> +/* Internal extensions for <gconv.h>. */
> +
> +/* Internal flags for __flags in struct __gconv_step_data. Overlaps
> + with flags for __gconv_open. */
> +enum
> + {
> + /* The conversion encountered an illegal input character at one
> + point. */
> + __GCONV_ENCOUNTERED_ILLEGAL_INPUT = 1U << 30,
> + };
Do we need a test that makes sure bits aren't re-used between internal
and external flags?
> +/* Mark *STEP_DATA as having seen illegal input, and return
> + __GCONV_ILLEGAL_INPUT. */
> +static inline int
> +__gconv_mark_illegal_input (struct __gconv_step_data *step_data)
> +{
> + step_data->__flags |= __GCONV_ENCOUNTERED_ILLEGAL_INPUT;
> + return __GCONV_ILLEGAL_INPUT;
> +}
Ok.
> +/* Returns true if any of the conversion steps encountered illegal input. */
> +static _Bool __attribute__ ((unused))
> +__gconv_has_illegal_input (__gconv_t cd)
> +{
> + for (size_t i = 0; i < cd->__nsteps; ++i)
> + if (cd->__data[i].__flags & __GCONV_ENCOUNTERED_ILLEGAL_INPUT)
> + return true;
> + return false;
> +}
Ok.
> diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
> if (irreversible == NULL)
> /* We are transliterating, don't try to correct anything. */
> - return __GCONV_ILLEGAL_INPUT;
> + return __gconv_mark_illegal_input (step_data);
Ok.
> - return __GCONV_ILLEGAL_INPUT;
> + return __gconv_mark_illegal_input (step_data);
Ok.
> - return __GCONV_ILLEGAL_INPUT;
> + return __gconv_mark_illegal_input (step_data);
Ok.
> - return __GCONV_ILLEGAL_INPUT;
> + return __gconv_mark_illegal_input (step_data);
Ok.
> - return __GCONV_ILLEGAL_INPUT;
> + return __gconv_mark_illegal_input (step_data);
Ok.
> - return __GCONV_ILLEGAL_INPUT;
> + return __gconv_mark_illegal_input (step_data);
Ok.
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
> /* Haven't found a match. */
> - return __GCONV_ILLEGAL_INPUT;
> + return __gconv_mark_illegal_input (step_data);
Ok.
> diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
> while (++remaining < argc);
>
> + /* Ensure that iconv -c still exits with failure if iconv (the
> + function) has failed with E2BIG instead of EILSEQ. */
> + if (__gconv_has_illegal_input (cd))
> + status = EXIT_FAILURE;
Ok.
> diff --git a/iconv/loop.c b/iconv/loop.c
> `continue' must reach certain points. */
> #define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
> { \
> - result = __GCONV_ILLEGAL_INPUT; \
> - \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> diff --git a/iconv/tst-iconv-sticky-input-error.c b/iconv/tst-iconv-sticky-input-error.c
> new file mode 100644
> index 0000000000..c93b5b5160
> --- /dev/null
> +++ b/iconv/tst-iconv-sticky-input-error.c
> @@ -0,0 +1,135 @@
> +/* Test __GCONV_ENCOUNTERED_ILLEGAL_INPUT, as used by iconv -c (bug 32046).
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +
> +#include <array_length.h>
> +#include <errno.h>
> +#include <gconv_int.h>
> +#include <iconv.h>
> +#include <stdbool.h>
> +#include <support/check.h>
> +#include <support/support.h>
> +#include <support/test-driver.h>
> +#include <stdio.h>
Ok.
> +/* FROM is the input character set, TO the output character set. If
> + IGNORE is true, the iconv descriptor is set up in the same way as
> + iconv -c would. INPUT is the input string, EXPECTED_OUTPUT the
> + output. OUTPUT_LIMIT is a byte count, specifying how many input
> + bytes are passed to the iconv function on each invocation. */
> +static void
> +one_direction (const char *from, const char *to, bool ignore,
> + const char *input, const char *expected_output,
> + size_t output_limit)
Ok.
> +{
> + if (test_verbose)
> + {
> + printf ("info: testing from=\"%s\" to=\"%s\" ignore=%d input=\"",
> + from, to, (int) ignore);
> + for (const char *p = input; *p != '\0'; ++p)
> + if (*p >= ' ' && *p <= '~' && *p != '\\' && *p != '"')
> + putchar (*p);
> + else
> + printf ("\\x%02x", *p & 0xff);
Should use support_quote_blob() ?
> + printf (" \" expected_output=\"%s\" output_limit=%zu\n",
> + expected_output, output_limit);
> + }
Ok.
> + __gconv_t cd;
> + if (ignore)
> + {
> + struct gconv_spec conv_spec;
> + TEST_VERIFY_EXIT (__gconv_create_spec (&conv_spec, from, to)
> + == &conv_spec);
> + conv_spec.ignore = true;
> + cd = (iconv_t) -1;
> + TEST_COMPARE (__gconv_open (&conv_spec, &cd, 0), __GCONV_OK);
> + }
> + else
> + cd = iconv_open (to, from);
> + TEST_VERIFY_EXIT (cd != (iconv_t) -1);
Ok.
> + char *input_ptr = (char *) input;
> + size_t input_len = strlen (input);
> + char output_buf[20];
> + char *output_ptr = output_buf;
> + size_t output_len;
> + do
> + {
> + output_len = array_end (output_buf) - output_ptr;
> + if (output_len > output_limit)
> + /* Limit the buffer size as requested by the caller. */
> + output_len = output_limit;
> + TEST_VERIFY_EXIT (output_len > 0);
> + if (input_len == 0)
> + /* Trigger final flush. */
> + input_ptr = NULL;
Ok.
> + char *old_input_ptr = input_ptr;
> + size_t ret = iconv (cd, &input_ptr, &input_len,
> + &output_ptr, &output_len);
> + if (ret == (size_t) -1)
> + {
> + if (errno != EILSEQ)
> + TEST_COMPARE (errno, E2BIG);
> + }
> +
> + if (input_ptr == old_input_ptr)
> + /* Avoid endless loop if stuck on an invalid input character. */
> + break;
> + }
> + while (input_ptr != NULL);
Ok.
> + /* Test the sticky illegal input bit. */
> + TEST_VERIFY (__gconv_has_illegal_input (cd));
Ok.
> + TEST_COMPARE_BLOB (expected_output, strlen (expected_output),
> + output_buf, output_ptr - output_buf);
Ok.
> + TEST_COMPARE (iconv_close (cd), 0);
> +}
Ok.
> +static int
> +do_test (void)
> +{
> + static const char charsets[][14] =
> + {
> + "ASCII",
> + "ASCII//IGNORE",
> + "UTF-8",
> + "UTF-8//IGNORE",
> + };
> +
> + for (size_t from_idx = 0; from_idx < array_length (charsets); ++from_idx)
> + for (size_t to_idx = 0; to_idx < array_length (charsets); ++to_idx)
> + for (int do_ignore = 0; do_ignore < 2; ++do_ignore)
> + for (int limit = 1; limit < 5; ++limit)
> + for (int skip = 0; skip < 3; ++skip)
> + {
> + const char *expected_output;
> + if (do_ignore || strstr (charsets[to_idx], "//IGNORE") != NULL)
> + expected_output = "ABXY" + skip;
> + else
> + expected_output = "AB" + skip;
> + one_direction (charsets[from_idx], charsets[to_idx], do_ignore,
> + "AB\xffXY" + skip, expected_output, limit);
> + }
> +
> + return 0;
> +}
> +#include <support/test-driver.c>
Ok.
> diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh
> ! test -s "$tmp/err"
> expect_files abc def
>
> -# FIXME: This is not correct, -c should not change the exit status.
> cp "$tmp/out-template" "$tmp/out"
> -run_iconv -c -o "$tmp/out" \
> +expect_exit 1 run_iconv -c -o "$tmp/out" \
> "$tmp/abc" "$tmp/0xff-wrapped" "$tmp/def" 2>"$tmp/err"
> ! test -s "$tmp/err"
> expect_files abc xy zt def
Ok.
> diff --git a/iconvdata/cp932.c b/iconvdata/cp932.c
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> /* This is an illegal character. */ \
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> /* This is an illegal character. */ \
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> diff --git a/iconvdata/euc-jp-ms.c b/iconvdata/euc-jp-ms.c
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> /* This is an illegal character. */ \
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> /* This is an illegal character. */ \
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> diff --git a/iconvdata/gbbig5.c b/iconvdata/gbbig5.c
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> /* We do not have a mapping for this character. \
> If ignore errors, map it to 0xa1f5 - gb box character */ \
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> - failure = __GCONV_ILLEGAL_INPUT; \
> + failure = __gconv_mark_illegal_input (step_data); \
Ok.
> diff --git a/iconvdata/unicode.c b/iconvdata/unicode.c
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> diff --git a/iconvdata/utf-16.c b/iconvdata/utf-16.c
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
> diff --git a/iconvdata/utf-32.c b/iconvdata/utf-32.c
> - result = __GCONV_ILLEGAL_INPUT; \
> + result = __gconv_mark_illegal_input (step_data); \
Ok.
More information about the Libc-alpha
mailing list