[PATCH v2] iconv: Fix matching of multi-character transliterations (bug 31859)

Andreas Schwab schwab@suse.de
Mon Jun 10 13:33:35 GMT 2024


Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of
the input buffer is reached.  Otherwise it is a non-match, and other
patterns should be tried.
---
 iconv/Makefile              | 13 ++++++++++
 iconv/gconv_trans.c         |  2 +-
 iconv/tst-translit-locale   | 10 ++++++++
 iconv/tst-translit-mchar.c  | 47 ++++++++++++++++++++++++++++++++++++
 iconv/tst-translit-mchar.sh | 48 +++++++++++++++++++++++++++++++++++++
 5 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 iconv/tst-translit-locale
 create mode 100644 iconv/tst-translit-mchar.c
 create mode 100644 iconv/tst-translit-mchar.sh

diff --git a/iconv/Makefile b/iconv/Makefile
index 63afc853ff..e93322da85 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -57,6 +57,10 @@ tests = \
 	tst-iconv-opt \
 	# tests
 
+test-srcs := \
+	tst-translit-mchar \
+	# test-srcs
+
 others		= iconv_prog iconvconfig
 install-others-programs	= $(inst_bindir)/iconv
 install-sbin	= iconvconfig
@@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
 ifeq ($(run-built-tests),yes)
 xtests-special += $(objpfx)test-iconvconfig.out
 tests-special += $(objpfx)tst-iconv_prog.out
+tests-special += $(objpfx)tst-translit-mchar.out
 endif
 
 # Make a copy of the file because gconv module names are constructed
@@ -126,3 +131,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog
 	$(BASH) $< $(common-objdir) '$(test-wrapper-env)' \
 		 '$(run-program-env)' > $@; \
 	$(evaluate-test)
+
+$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \
+			$(objpfx)tst-translit-mchar \
+			tst-translit-locale
+	$(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \
+		 '$(run-program-env)' '$(run-program-prefix-after-env)' \
+		 $< > $@; \
+	$(evaluate-test)
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
index 08b7a3f71d..44f0fd849a 100644
--- a/iconv/gconv_trans.c
+++ b/iconv/gconv_trans.c
@@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step,
 
 	  /* Nothing found, continue searching.  */
 	}
-      else if (cnt > 0)
+      else if (cnt > 0 && winbuf + cnt == winbufend)
 	/* This means that the input buffer contents matches a prefix of
 	   an entry.  Since we cannot match it unless we get more input,
 	   we will tell the caller about it.  */
diff --git a/iconv/tst-translit-locale b/iconv/tst-translit-locale
new file mode 100644
index 0000000000..712b08628a
--- /dev/null
+++ b/iconv/tst-translit-locale
@@ -0,0 +1,10 @@
+# Test multi-character transliteration rule
+
+LC_CTYPE
+copy "POSIX"
+
+translit_start
+"ÄÄ" "AA"
+translit_end
+
+END LC_CTYPE
diff --git a/iconv/tst-translit-mchar.c b/iconv/tst-translit-mchar.c
new file mode 100644
index 0000000000..b3e54db6a2
--- /dev/null
+++ b/iconv/tst-translit-mchar.c
@@ -0,0 +1,47 @@
+/* Test multi-character transliterations.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <locale.h>
+#include <iconv.h>
+#include <support/support.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+  iconv_t cd;
+  /* An input sequence with a common prefix with a transliteration rule.  */
+  char input[] = "\xc3\x84\xc3\x85";
+  char *inptr = input;
+  char outbuf[10];
+  char *outptr = outbuf;
+  size_t inlen = sizeof (input), outlen = sizeof (outbuf);
+  size_t n;
+
+  xsetlocale (LC_CTYPE, "tst-translit");
+
+  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+  TEST_VERIFY (cd != (iconv_t) -1);
+
+  /* This call used to loop infinitely.  */
+  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+  iconv_close (cd);
+  return n != 0;
+}
+
+#include <support/test-driver.c>
diff --git a/iconv/tst-translit-mchar.sh b/iconv/tst-translit-mchar.sh
new file mode 100644
index 0000000000..79efd6abc8
--- /dev/null
+++ b/iconv/tst-translit-mchar.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+# Testing of multi-character transliterations
+# Copyright (C) 2024 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+set -e
+
+common_objpfx=$1
+run_program_prefix_before_env=$2
+run_program_env=$3
+run_program_prefix_after_env=$4
+
+# Generate data files.
+${run_program_prefix_before_env} \
+${run_program_env} \
+I18NPATH=../localedata \
+${run_program_prefix_after_env} ${common_objpfx}locale/localedef \
+--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$?
+if [ $ret -ne 1 ]; then
+  echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)."
+  exit 1
+fi
+
+set -x
+
+# Run the test.
+${run_program_prefix_before_env} \
+${run_program_env} \
+LOCPATH=${common_objpfx}iconv \
+${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar
+
+# Local Variables:
+#  mode:shell-script
+# End:
-- 
2.45.2


-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


More information about the Libc-alpha mailing list