[PATCH v2] iconv: Fix matching of multi-character transliterations (bug 31859)
Andreas Schwab
schwab@suse.de
Mon Jun 10 13:33:35 GMT 2024
Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of
the input buffer is reached. Otherwise it is a non-match, and other
patterns should be tried.
---
iconv/Makefile | 13 ++++++++++
iconv/gconv_trans.c | 2 +-
iconv/tst-translit-locale | 10 ++++++++
iconv/tst-translit-mchar.c | 47 ++++++++++++++++++++++++++++++++++++
iconv/tst-translit-mchar.sh | 48 +++++++++++++++++++++++++++++++++++++
5 files changed, 119 insertions(+), 1 deletion(-)
create mode 100644 iconv/tst-translit-locale
create mode 100644 iconv/tst-translit-mchar.c
create mode 100644 iconv/tst-translit-mchar.sh
diff --git a/iconv/Makefile b/iconv/Makefile
index 63afc853ff..e93322da85 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -57,6 +57,10 @@ tests = \
tst-iconv-opt \
# tests
+test-srcs := \
+ tst-translit-mchar \
+ # test-srcs
+
others = iconv_prog iconvconfig
install-others-programs = $(inst_bindir)/iconv
install-sbin = iconvconfig
@@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
ifeq ($(run-built-tests),yes)
xtests-special += $(objpfx)test-iconvconfig.out
tests-special += $(objpfx)tst-iconv_prog.out
+tests-special += $(objpfx)tst-translit-mchar.out
endif
# Make a copy of the file because gconv module names are constructed
@@ -126,3 +131,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog
$(BASH) $< $(common-objdir) '$(test-wrapper-env)' \
'$(run-program-env)' > $@; \
$(evaluate-test)
+
+$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \
+ $(objpfx)tst-translit-mchar \
+ tst-translit-locale
+ $(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \
+ '$(run-program-env)' '$(run-program-prefix-after-env)' \
+ $< > $@; \
+ $(evaluate-test)
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
index 08b7a3f71d..44f0fd849a 100644
--- a/iconv/gconv_trans.c
+++ b/iconv/gconv_trans.c
@@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step,
/* Nothing found, continue searching. */
}
- else if (cnt > 0)
+ else if (cnt > 0 && winbuf + cnt == winbufend)
/* This means that the input buffer contents matches a prefix of
an entry. Since we cannot match it unless we get more input,
we will tell the caller about it. */
diff --git a/iconv/tst-translit-locale b/iconv/tst-translit-locale
new file mode 100644
index 0000000000..712b08628a
--- /dev/null
+++ b/iconv/tst-translit-locale
@@ -0,0 +1,10 @@
+# Test multi-character transliteration rule
+
+LC_CTYPE
+copy "POSIX"
+
+translit_start
+"ÄÄ" "AA"
+translit_end
+
+END LC_CTYPE
diff --git a/iconv/tst-translit-mchar.c b/iconv/tst-translit-mchar.c
new file mode 100644
index 0000000000..b3e54db6a2
--- /dev/null
+++ b/iconv/tst-translit-mchar.c
@@ -0,0 +1,47 @@
+/* Test multi-character transliterations.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <locale.h>
+#include <iconv.h>
+#include <support/support.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+ iconv_t cd;
+ /* An input sequence with a common prefix with a transliteration rule. */
+ char input[] = "\xc3\x84\xc3\x85";
+ char *inptr = input;
+ char outbuf[10];
+ char *outptr = outbuf;
+ size_t inlen = sizeof (input), outlen = sizeof (outbuf);
+ size_t n;
+
+ xsetlocale (LC_CTYPE, "tst-translit");
+
+ cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+ TEST_VERIFY (cd != (iconv_t) -1);
+
+ /* This call used to loop infinitely. */
+ n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+ iconv_close (cd);
+ return n != 0;
+}
+
+#include <support/test-driver.c>
diff --git a/iconv/tst-translit-mchar.sh b/iconv/tst-translit-mchar.sh
new file mode 100644
index 0000000000..79efd6abc8
--- /dev/null
+++ b/iconv/tst-translit-mchar.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+# Testing of multi-character transliterations
+# Copyright (C) 2024 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+set -e
+
+common_objpfx=$1
+run_program_prefix_before_env=$2
+run_program_env=$3
+run_program_prefix_after_env=$4
+
+# Generate data files.
+${run_program_prefix_before_env} \
+${run_program_env} \
+I18NPATH=../localedata \
+${run_program_prefix_after_env} ${common_objpfx}locale/localedef \
+--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$?
+if [ $ret -ne 1 ]; then
+ echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)."
+ exit 1
+fi
+
+set -x
+
+# Run the test.
+${run_program_prefix_before_env} \
+${run_program_env} \
+LOCPATH=${common_objpfx}iconv \
+${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar
+
+# Local Variables:
+# mode:shell-script
+# End:
--
2.45.2
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
More information about the Libc-alpha
mailing list