[PING2][PATCH v2] setlocale: Fail if iconv module for charset is not present [BZ #27996]
Siddhesh Poyarekar
siddhesh@gotplt.org
Tue Aug 17 02:58:29 GMT 2021
Ping!
On 8/11/21 1:12 PM, Siddhesh Poyarekar via Libc-alpha wrote:
> Ping!
>
> On 7/20/21 8:06 AM, Siddhesh Poyarekar via Libc-alpha wrote:
>> setlocale currently succeeds even if the requested locale uses a
>> charset that does not have a converter module installed. Check for
>> existence of the charset (either the one requested through the input
>> name or the one needed by the selected locale file) and fail if it
>> doesn't.
>>
>> The new test tst-invalid-charset verifes that loading test5 and test6
>> locales fail because both locales have charsets without a converter,
>> viz. test5 and test6 respectively. Also, test6.c has been removed as
>> it was unused.
>> ---
>> Changes from v1:
>> - Find full transformation paths both ways instead of merely looking for
>> a FROM converter.
>>
>> locale/findlocale.c | 77 ++++++++++++-----
>> localedata/Makefile | 12 ++-
>> localedata/tests/test6.c | 137 -------------------------------
>> localedata/tst-invalid-charset.c | 31 +++++++
>> 4 files changed, 95 insertions(+), 162 deletions(-)
>> delete mode 100644 localedata/tests/test6.c
>> create mode 100644 localedata/tst-invalid-charset.c
>>
>> diff --git a/locale/findlocale.c b/locale/findlocale.c
>> index ab09122b0c..7ccc98cd8b 100644
>> --- a/locale/findlocale.c
>> +++ b/locale/findlocale.c
>> @@ -98,6 +98,30 @@ valid_locale_name (const char *name)
>> return 1;
>> }
>> +/* Return true if we have gconv modules to transform between the
>> INTERNAL
>> + encoding and CODESET. */
>> +static bool
>> +codeset_has_module (const char *codeset)
>> +{
>> + struct __gconv_step *steps;
>> + size_t nsteps;
>> +
>> + char *ccodeset = (char *) alloca (strlen (codeset) + 3);
>> + strip (ccodeset, codeset);
>> +
>> + if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0)
>> + != __GCONV_OK)
>> + return false;
>> + __gconv_close_transform (steps, nsteps);
>> +
>> + if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0)
>> + != __GCONV_OK)
>> + return false;
>> + __gconv_close_transform (steps, nsteps);
>> +
>> + return true;
>> +}
>> +
>> struct __locale_data *
>> _nl_find_locale (const char *locale_path, size_t locale_path_len,
>> int category, const char **name)
>> @@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t
>> locale_path_len,
>> /* Memory allocate problem. */
>> return NULL;
>> + /* The requested codeset does not have a converter, don't use it. */
>> + if (codeset != NULL && !codeset_has_module (codeset))
>> + return NULL;
>> +
>> /* If exactly this locale was already asked for we have an entry with
>> the complete name. */
>> locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
>> @@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t
>> locale_path_len,
>> return NULL;
>> }
>> + /* Get the codeset information from the locale file. */
>> + static const int codeset_idx[] =
>> + {
>> + [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
>> + [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
>> + [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
>> + [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
>> + [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
>> + [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
>> + [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
>> + [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
>> + [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
>> + [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
>> + [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
>> + [__LC_IDENTIFICATION] = _NL_ITEM_INDEX
>> (_NL_IDENTIFICATION_CODESET)
>> + };
>> + const struct __locale_data *data;
>> + const char *locale_codeset;
>> +
>> + data = (const struct __locale_data *) locale_file->data;
>> + locale_codeset = (const char *)
>> data->values[codeset_idx[category]].string;
>> + assert (locale_codeset != NULL);
>> +
>> + /* The locale codeset does not have a converter, don't use it. */
>> + if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset))
>> + return NULL;
>> +
>> /* The LC_CTYPE category allows to check whether a locale is really
>> usable. If the locale name contains a charset name and the
>> charset name used in the locale (present in the LC_CTYPE data) is
>> @@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t
>> locale_path_len,
>> in the locale name. */
>> if (codeset != NULL)
>> {
>> - /* Get the codeset information from the locale file. */
>> - static const int codeset_idx[] =
>> - {
>> - [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
>> - [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
>> - [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
>> - [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
>> - [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
>> - [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
>> - [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
>> - [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
>> - [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
>> - [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
>> - [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
>> - [__LC_IDENTIFICATION] = _NL_ITEM_INDEX
>> (_NL_IDENTIFICATION_CODESET)
>> - };
>> - const struct __locale_data *data;
>> - const char *locale_codeset;
>> char *clocale_codeset;
>> char *ccodeset;
>> - data = (const struct __locale_data *) locale_file->data;
>> - locale_codeset =
>> - (const char *) data->values[codeset_idx[category]].string;
>> - assert (locale_codeset != NULL);
>> /* Note the length of the allocated memory: +3 for up to two
>> slashes
>> and the NUL byte. */
>> clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
>> diff --git a/localedata/Makefile b/localedata/Makefile
>> index 14e04cd3c5..2af399ec51 100644
>> --- a/localedata/Makefile
>> +++ b/localedata/Makefile
>> @@ -124,11 +124,13 @@ test-input := \
>> test-input-data = $(addsuffix .in, $(test-input))
>> test-output := $(foreach s, .out .xout, \
>> $(addsuffix $s, $(basename $(test-input))))
>> +# Note that tst-invalid-charset depends on test5 and test6 being
>> locales that
>> +# do not have valid charset converters.
>> ld-test-names := test1 test2 test3 test4 test5 test6 test7
>> ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
>> $(addsuffix .def,$(ld-test-names)) \
>> $(addsuffix .ds,test5 test6) \
>> - test6.c trans.def)
>> + trans.def)
>> fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \
>> y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21
>> @@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits
>> tst-setlocale bug-iconv-trans \
>> tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4
>> tst-mbswcs5 \
>> tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
>> tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2
>> tst-setlocale3 \
>> - tst-wctype tst-iconv-math-trans
>> + tst-wctype tst-iconv-math-trans tst-invalid-charset
>> tests-static = bug-setlocale1-static
>> tests += $(tests-static)
>> ifeq (yes,$(build-shared))
>> @@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out:
>> tst-langinfo.sh \
>> '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \
>> $(evaluate-test)
>> +# These tests depend on tst-locale because they use the locales
>> compiled by
>> +# that test.
>> $(objpfx)tst-digits.out: $(objpfx)tst-locale.out
>> +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out
>> $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES))
>> endif
>> @@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out
>> $(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \
>> $(evaluate-test)
>> -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
>> +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \
>> + LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
>> bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only)
>> $(objdir)/iconvdata/gconv-modules:
>> diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c
>> deleted file mode 100644
>> index edb5fe4a5f..0000000000
>> --- a/localedata/tests/test6.c
>> +++ /dev/null
>> @@ -1,137 +0,0 @@
>> -/* Test program for character classes and mappings.
>> - Copyright (C) 1999-2021 Free Software Foundation, Inc.
>> - This file is part of the GNU C Library.
>> - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
>> -
>> - The GNU C Library is free software; you can redistribute it and/or
>> - modify it under the terms of the GNU Lesser General Public
>> - License as published by the Free Software Foundation; either
>> - version 2.1 of the License, or (at your option) any later version.
>> -
>> - The GNU C Library is distributed in the hope that it will be useful,
>> - but WITHOUT ANY WARRANTY; without even the implied warranty of
>> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> - Lesser General Public License for more details.
>> -
>> - You should have received a copy of the GNU Lesser General Public
>> - License along with the GNU C Library; if not, see
>> - <https://www.gnu.org/licenses/>. */
>> -
>> -#include <ctype.h>
>> -#include <locale.h>
>> -#include <wchar.h>
>> -
>> -
>> -int
>> -main (void)
>> -{
>> - const char lower[] = "abcdefghijklmnopqrstuvwxyz";
>> - const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
>> -#define LEN (sizeof (upper) - 1)
>> - const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz";
>> - const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
>> - int i;
>> - int result = 0;
>> -
>> - setlocale (LC_ALL, "test6");
>> -
>> - for (i = 0; i < LEN; ++i)
>> - {
>> - /* Test basic table handling (basic == not more than 256
>> characters).
>> - The charmaps swaps the normal lower-upper case meaning of the
>> - ASCII characters used in the source code while the Unicode mapping
>> - in the repertoire map has the normal correspondents. This test
>> - shows the independence of the tables for `char' and `wchar_t'
>> - characters. */
>> -
>> - if (islower (lower[i]))
>> - {
>> - printf ("islower ('%c') false\n", lower[i]);
>> - result = 1;
>> - }
>> - if (! isupper (lower[i]))
>> - {
>> - printf ("isupper ('%c') false\n", lower[i]);
>> - result = 1;
>> - }
>> -
>> - if (! islower (upper[i]))
>> - {
>> - printf ("islower ('%c') false\n", upper[i]);
>> - result = 1;
>> - }
>> - if (isupper (upper[i]))
>> - {
>> - printf ("isupper ('%c') false\n", upper[i]);
>> - result = 1;
>> - }
>> -
>> - if (toupper (lower[i]) != lower[i])
>> - {
>> - printf ("toupper ('%c') false\n", lower[i]);
>> - result = 1;
>> - }
>> - if (tolower (lower[i]) != upper[i])
>> - {
>> - printf ("tolower ('%c') false\n", lower[i]);
>> - result = 1;
>> - }
>> -
>> - if (tolower (upper[i]) != upper[i])
>> - {
>> - printf ("tolower ('%c') false\n", upper[i]);
>> - result = 1;
>> - }
>> - if (toupper (upper[i]) != lower[i])
>> - {
>> - printf ("toupper ('%c') false\n", upper[i]);
>> - result = 1;
>> - }
>> -
>> - if (iswlower (wupper[i]))
>> - {
>> - printf ("iswlower (L'%c') false\n", upper[i]);
>> - result = 1;
>> - }
>> - if (! iswupper (wupper[i]))
>> - {
>> - printf ("iswupper (L'%c') false\n", upper[i]);
>> - result = 1;
>> - }
>> -
>> - if (iswupper (wlower[i]))
>> - {
>> - printf ("iswupper (L'%c') false\n", lower[i]);
>> - result = 1;
>> - }
>> - if (! iswlower (wlower[i]))
>> - {
>> - printf ("iswlower (L'%c') false\n", lower[i]);
>> - result = 1;
>> - }
>> -
>> - if (towupper (wlower[i]) != wupper[i])
>> - {
>> - printf ("towupper ('%c') false\n", lower[i]);
>> - result = 1;
>> - }
>> - if (towlower (wlower[i]) != wlower[i])
>> - {
>> - printf ("towlower ('%c') false\n", lower[i]);
>> - result = 1;
>> - }
>> -
>> - if (towlower (wupper[i]) != wlower[i])
>> - {
>> - printf ("towlower ('%c') false\n", upper[i]);
>> - result = 1;
>> - }
>> - if (towupper (wupper[i]) != wupper[i])
>> - {
>> - printf ("towupper ('%c') false\n", upper[i]);
>> - result = 1;
>> - }
>> - }
>> -
>> - return result;
>> -}
>> diff --git a/localedata/tst-invalid-charset.c
>> b/localedata/tst-invalid-charset.c
>> new file mode 100644
>> index 0000000000..46a5198c66
>> --- /dev/null
>> +++ b/localedata/tst-invalid-charset.c
>> @@ -0,0 +1,31 @@
>> +/* Test program to verify that setlocale fails for charsets that do
>> not have a
>> + converter.
>> + Copyright (C) 2021 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#include <ctype.h>
>> +#include <locale.h>
>> +#include <wchar.h>
>> +
>> +
>> +int
>> +main (void)
>> +{
>> + /* Fail if setlocale succeeds for any of these locales. */
>> + return (setlocale (LC_ALL, "test5") != NULL
>> + || setlocale (LC_ALL, "test6") != NULL);
>> +}
>>
>
More information about the Libc-alpha
mailing list