[PATCH] gconv: Fix assertion failure in ISO-2022-JP-3 module (bug 27256)
Adhemerval Zanella
adhemerval.zanella@linaro.org
Wed Jan 27 12:47:41 GMT 2021
On 27/01/2021 09:39, Florian Weimer wrote:
> The conversion loop to the internal encoding does not follow
> the interface contract that __GCONV_FULL_OUTPUT is only returned
> after the internal wchar_t buffer has been filled completely. This
> is enforced by the first of the two asserts in iconv/skeleton.c:
>
> /* We must run out of output buffer space in this
> rerun. */
> assert (outbuf == outerr);
> assert (nstatus == __GCONV_FULL_OUTPUT);
>
> This commit solves this issue by queuing a second wide character
> which cannot be written immediately in the state variable, like
> other converters already do (e.g., BIG5-HKSCS or TSCII).
>
> Reported-by: Tavis Ormandy <taviso@gmail.com>
This is ok for 2.33, since we discussed it off-line and Andreas and
Bruno both acked this change.
>
> ---
> Thanks to Andreas Schwab and Bruno Haible for off-list review. We
> decided that no embargo was needed.
>
> iconvdata/Makefile | 4 +-
> iconvdata/bug-iconv14.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++
> iconvdata/iso-2022-jp-3.c | 67 +++++++++++++++++-------
> 3 files changed, 178 insertions(+), 20 deletions(-)
>
> diff --git a/iconvdata/Makefile b/iconvdata/Makefile
> index c8c532a3e4..55c527a5f7 100644
> --- a/iconvdata/Makefile
> +++ b/iconvdata/Makefile
> @@ -74,7 +74,7 @@ ifeq (yes,$(build-shared))
> tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
> tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
> bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
> - bug-iconv13
> + bug-iconv13 bug-iconv14
> ifeq ($(have-thread-library),yes)
> tests += bug-iconv3
> endif
> @@ -322,6 +322,8 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
> $(addprefix $(objpfx),$(modules.so))
> $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
> $(addprefix $(objpfx),$(modules.so))
> +$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
> + $(addprefix $(objpfx),$(modules.so))
>
> $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
> $(addprefix $(objpfx),$(modules.so)) \
> diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c
> new file mode 100644
> index 0000000000..902f140fa9
> --- /dev/null
> +++ b/iconvdata/bug-iconv14.c
> @@ -0,0 +1,127 @@
> +/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <iconv.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <support/check.h>
> +
> +/* Use an escape sequence to return to the initial state. */
> +static void
> +with_escape_sequence (void)
> +{
> + iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
> + TEST_VERIFY_EXIT (c != (iconv_t) -1);
> +
> + char in[] = "\e$(O+D\e(B";
> + char *inbuf = in;
> + size_t inleft = strlen (in);
> + char out[3]; /* Space for one output character. */
> + char *outbuf;
> + size_t outleft;
> +
> + outbuf = out;
> + outleft = sizeof (out);
> + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
> + TEST_COMPARE (errno, E2BIG);
> + TEST_COMPARE (inleft, 3);
> + TEST_COMPARE (inbuf - in, strlen (in) - 3);
> + TEST_COMPARE (outleft, sizeof (out) - 2);
> + TEST_COMPARE (outbuf - out, 2);
> + TEST_COMPARE (out[0] & 0xff, 0xc3);
> + TEST_COMPARE (out[1] & 0xff, 0xa6);
> +
> + /* Return to the initial shift state, producing the pending
> + character. */
> + outbuf = out;
> + outleft = sizeof (out);
> + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
> + TEST_COMPARE (inleft, 0);
> + TEST_COMPARE (inbuf - in, strlen (in));
> + TEST_COMPARE (outleft, sizeof (out) - 2);
> + TEST_COMPARE (outbuf - out, 2);
> + TEST_COMPARE (out[0] & 0xff, 0xcc);
> + TEST_COMPARE (out[1] & 0xff, 0x80);
> +
> + /* Nothing should be flushed the second time. */
> + outbuf = out;
> + outleft = sizeof (out);
> + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
> + TEST_COMPARE (outleft, sizeof (out));
> + TEST_COMPARE (outbuf - out, 0);
> + TEST_COMPARE (out[0] & 0xff, 0xcc);
> + TEST_COMPARE (out[1] & 0xff, 0x80);
> +
> + TEST_COMPARE (iconv_close (c), 0);
> +}
> +
> +/* Use an explicit flush to return to the initial state. */
> +static void
> +with_flush (void)
> +{
> + iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
> + TEST_VERIFY_EXIT (c != (iconv_t) -1);
> +
> + char in[] = "\e$(O+D";
> + char *inbuf = in;
> + size_t inleft = strlen (in);
> + char out[3]; /* Space for one output character. */
> + char *outbuf;
> + size_t outleft;
> +
> + outbuf = out;
> + outleft = sizeof (out);
> + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
> + TEST_COMPARE (errno, E2BIG);
> + TEST_COMPARE (inleft, 0);
> + TEST_COMPARE (inbuf - in, strlen (in));
> + TEST_COMPARE (outleft, sizeof (out) - 2);
> + TEST_COMPARE (outbuf - out, 2);
> + TEST_COMPARE (out[0] & 0xff, 0xc3);
> + TEST_COMPARE (out[1] & 0xff, 0xa6);
> +
> + /* Flush the pending character. */
> + outbuf = out;
> + outleft = sizeof (out);
> + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
> + TEST_COMPARE (outleft, sizeof (out) - 2);
> + TEST_COMPARE (outbuf - out, 2);
> + TEST_COMPARE (out[0] & 0xff, 0xcc);
> + TEST_COMPARE (out[1] & 0xff, 0x80);
> +
> + /* Nothing should be flushed the second time. */
> + outbuf = out;
> + outleft = sizeof (out);
> + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
> + TEST_COMPARE (outleft, sizeof (out));
> + TEST_COMPARE (outbuf - out, 0);
> + TEST_COMPARE (out[0] & 0xff, 0xcc);
> + TEST_COMPARE (out[1] & 0xff, 0x80);
> +
> + TEST_COMPARE (iconv_close (c), 0);
> +}
> +
> +static int
> +do_test (void)
> +{
> + with_escape_sequence ();
> + with_flush ();
> + return 0;
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
> index 3eaa847ad9..c8ba88cdc9 100644
> --- a/iconvdata/iso-2022-jp-3.c
> +++ b/iconvdata/iso-2022-jp-3.c
> @@ -67,23 +67,34 @@ enum
> CURRENT_SEL_MASK = 7 << 3
> };
>
> -/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
> - also contains the last two bytes to be output, shifted by 6 bits, and a
> - one-bit indicator whether they must be preceded by the shift sequence,
> - in bit 22. */
> +/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
> + state also contains the last two bytes to be output, shifted by 6
> + bits, and a one-bit indicator whether they must be preceded by the
> + shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4
> + conversion, COUNT may also contain a non-zero pending wide
> + character, shifted by six bits. This happens for certain inputs in
> + JISX0213_1_2004_set and JISX0213_2_set if the second wide character
> + in a combining sequence cannot be written because the buffer is
> + full. */
>
> /* Since this is a stateful encoding we have to provide code which resets
> the output state to the initial state. This has to be done during the
> flushing. */
> #define EMIT_SHIFT_TO_INIT \
> - if ((data->__statep->__count & ~7) != ASCII_set) \
> + if (data->__statep->__count != ASCII_set) \
> { \
> if (FROM_DIRECTION) \
> { \
> - /* It's easy, we don't have to emit anything, we just reset the \
> - state for the input. */ \
> - data->__statep->__count &= 7; \
> - data->__statep->__count |= ASCII_set; \
> + if (__glibc_likely (outbuf + 4 <= outend)) \
> + { \
> + /* Write out the last character. */ \
> + *((uint32_t *) outbuf) = data->__statep->__count >> 6; \
> + outbuf += sizeof (uint32_t); \
> + data->__statep->__count = ASCII_set; \
> + } \
> + else \
> + /* We don't have enough room in the output buffer. */ \
> + status = __GCONV_FULL_OUTPUT; \
> } \
> else \
> { \
> @@ -151,7 +162,21 @@ enum
> #define LOOPFCT FROM_LOOP
> #define BODY \
> { \
> - uint32_t ch = *inptr; \
> + uint32_t ch; \
> + \
> + /* Output any pending character. */ \
> + ch = set >> 6; \
> + if (__glibc_unlikely (ch != 0)) \
> + { \
> + put32 (outptr, ch); \
> + outptr += 4; \
> + /* Remove the pending character, but preserve state bits. */ \
> + set &= (1 << 6) - 1; \
> + continue; \
> + } \
> + \
> + /* Otherwise read the next input byte. */ \
> + ch = *inptr; \
> \
> /* Recognize escape sequences. */ \
> if (__glibc_unlikely (ch == ESC)) \
> @@ -297,21 +322,25 @@ enum
> uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
> uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
> \
> + inptr += 2; \
> + \
> + put32 (outptr, u1); \
> + outptr += 4; \
> + \
> /* See whether we have room for two characters. */ \
> - if (outptr + 8 <= outend) \
> + if (outptr + 4 <= outend) \
> { \
> - inptr += 2; \
> - put32 (outptr, u1); \
> - outptr += 4; \
> put32 (outptr, u2); \
> outptr += 4; \
> continue; \
> } \
> - else \
> - { \
> - result = __GCONV_FULL_OUTPUT; \
> - break; \
> - } \
> + \
> + /* Otherwise store only the first character now, and \
> + put the second one into the queue. */ \
> + set |= u2 << 6; \
> + /* Tell the caller why we terminate the loop. */ \
> + result = __GCONV_FULL_OUTPUT; \
> + break; \
> } \
> \
> inptr += 2; \
>
More information about the Libc-alpha
mailing list