This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] locale: Make _nl_C_locobj_ptr usable outside of libc


On 09/16/2016 01:53 PM, Florian Weimer wrote:
I need something like this to fix incorrect locale dependence in
libresolv (bug 19687).

I tested this with the fixes, including one for nscd, which uses
__libc_C_locobj@GLIBC_PRIVATE.  libresolv uses _nl_C_locobj@GLIBC_PRIVATE.

I'm not entirely sure if we should use the function approach everywhere
instead.  But as far as I can tell, this patch does not lead to the
allocation of space for a _nl_C_locobj copy in libresolv (which we would
not want because it is wasteful).

An alternative would add of functions such as those in <ctype.h> and
strcasecmp/strncasecmp which are always tied to the C locale.  This
would be more efficient because access to the locale object is easier,
or the variants can even be optimized inline (such as isblank).

This patch implements the alternative approach. With it, is no longer necessary to export the C locale object.

I added __isdigit_C and __isxdigit_C implementations, although maybe they are not required because isdigit and isxdigit can be changed.

Beyond the new test, I checked that the approach works by converting a few configuration file parsers, and fixing the domain name comparisons in libresolv.

Florian

Implement character classification and conversion tied to the C locale

2016-09-19  Florian Weimer  <fweimer@redhat.com>

	Character classification and conversion tied to the C locale.
	* ctype/C-locale.h: New file.
	* string/strcasecmp_C.c: Likewise.
	* string/strncasecmp_C.c: Likewise.
	* string/Makefile (routines): Add strcasecmp_C, strncasecmp_C.
	* string/Versions (GLIBC_PRIVATE): Add __strcasecmp_C,
	__strncasecmp_C.

2016-09-19  Florian Weimer  <fweimer@redhat.com>

	* tst-C-locale.c (get_classification_ctype): New test.
	* Makefile (tests): Add it.

diff --git a/ctype/C-locale.h b/ctype/C-locale.h
new file mode 100644
index 0000000..ba41a10
--- /dev/null
+++ b/ctype/C-locale.h
@@ -0,0 +1,111 @@
+/* Character classification and string comparison in the C locale.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _C_LOCALE_H
+#define _C_LOCALE_H 1
+
+#include <stddef.h>
+
+static inline int
+__isalnum_C (int ch)
+{
+  return ('0' <= ch && ch <= '9')
+    || ('A' <= ch && ch <= 'Z')
+    || ('a' <= ch && ch <= 'z');
+}
+
+static inline int
+__isalpha_C (int ch)
+{
+  return ('A' <= ch && ch <= 'Z')
+    || ('a' <= ch && ch <= 'z');
+}
+
+static inline int
+__isblank_C (int ch)
+{
+  return ch == ' ' || ch == '\t';
+}
+
+static inline int
+__isdigit_C (int ch)
+{
+  return '0' <= ch && ch <= '9';
+}
+
+static inline int
+__islower_C (int ch)
+{
+  return 'a' <= ch && ch <= 'z';
+}
+
+static inline int
+__isspace_C (int ch)
+{
+  return ch == ' '
+    || ch == '\f'
+    || ch == '\n'
+    || ch == '\r'
+    || ch == '\t'
+    || ch == '\v';
+}
+
+static inline int
+__isupper_C (int ch)
+{
+  return 'A' <= ch && ch <= 'Z';
+}
+
+static inline int
+__isxdigit_C (int ch)
+{
+  return ('0' <= ch && ch <= '9')
+    || ('A' <= ch && ch <= 'F')
+    || ('a' <= ch && ch <= 'f');
+}
+
+static inline int
+__tolower_C (int ch)
+{
+  if ('A' <= ch && ch <= 'Z')
+    return ch - 'A' + 'a';
+  /* Required by the standard. */
+  if (ch == -1)
+    return ch;
+  /* Required to match towlower behavior.  */
+  return ch & 0xff;
+}
+
+static inline int
+__toupper_C (int ch)
+{
+  if ('a' <= ch && ch <= 'z')
+    return ch - 'a' + 'A';
+  /* Required by the standard. */
+  if (ch == -1)
+    return ch;
+  /* Required to match towlower behavior.  */
+  return ch & 0xff;
+}
+
+int __strcasecmp_C (const char *, const char *);
+libc_hidden_proto (__strcasecmp_C)
+int __strncasecmp_C (const char *, const char *, size_t);
+libc_hidden_proto (__strncasecmp_C)
+
+#endif /* _C_LOCALE_H */
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index f5c58ae..a9354fe 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,8 @@
+2016-09-19  Florian Weimer  <fweimer@redhat.com>
+
+	* tst-C-locale.c (get_classification_ctype): New test.
+	* Makefile (tests): Add it.
+
 2016-08-24  Ernestas Kulik  <ernestas.kulik@gmail.com>
 
 	[BZ #20497]
diff --git a/localedata/Makefile b/localedata/Makefile
index 4ecb192..21565d9 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -75,7 +75,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
 	tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \
 	tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
 	tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \
-	tst-wctype
+	tst-wctype tst-C-locale
 tests-static = bug-setlocale1-static
 tests += $(tests-static)
 ifeq (yes,$(build-shared))
diff --git a/localedata/tst-C-locale.c b/localedata/tst-C-locale.c
new file mode 100644
index 0000000..2408abb
--- /dev/null
+++ b/localedata/tst-C-locale.c
@@ -0,0 +1,208 @@
+/* Tests for character classification and string comparison in the C locale.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ctype/C-locale.h>
+#include <ctype.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+struct classification
+{
+  bool alnum;
+  bool alpha;
+  bool blank;
+  bool digit;
+  bool lower;
+  bool space;
+  bool upper;
+  bool xdigit;
+  int as_lower;
+  int as_upper;
+};
+
+#define STORE_RESULT(name) result[ch + 128].name = MANGLE (name) (ch)
+#define GET_CLASSIFICATION             \
+  for (int ch = -128; ch < 256; ++ch)  \
+    {                                  \
+      STORE_RESULT (alnum);            \
+      STORE_RESULT (alpha);            \
+      STORE_RESULT (blank);            \
+      STORE_RESULT (digit);            \
+      STORE_RESULT (lower);            \
+      STORE_RESULT (space);            \
+      STORE_RESULT (upper);            \
+      STORE_RESULT (xdigit);           \
+    }
+
+static void
+get_classification_ctype (struct classification *result)
+{
+#define MANGLE(name) is##name
+  GET_CLASSIFICATION
+#undef MANGLE
+  for (int ch = -128; ch < 256; ++ch)
+    {
+      result[ch + 128].as_lower = tolower (ch);
+      result[ch + 128].as_upper = toupper (ch);
+    }
+}
+
+static void
+get_classification_C_locale (struct classification *result)
+{
+#define MANGLE(name) __is##name##_C
+  GET_CLASSIFICATION
+#undef MANGLE
+  for (int ch = -128; ch < 256; ++ch)
+    {
+      result[ch + 128].as_lower = __tolower_C (ch);
+      result[ch + 128].as_upper = __toupper_C (ch);
+    }
+}
+
+#undef GET_CLASSIFICATION
+#undef STORE_RESULT
+
+static bool errors;
+
+static void
+check_classification_1 (const char *context, int ch, const char *category,
+                        bool expected, bool actual)
+{
+  if (actual != expected)
+    {
+      printf ("error: %s: __is%s_C (%d): expected: %d actual: %d\n",
+              context, category, ch, expected, actual);
+      errors = true;
+    }
+}
+
+static void
+check_caseconv_1 (const char *context, int ch, const char *op,
+                  int expected, int actual)
+{
+  if (actual != expected)
+    {
+      printf ("error: %s: __%s_C (%d): expected: %d actual: %d\n",
+              context, op, ch, expected, actual);
+      errors = true;
+    }
+}
+
+static void
+check_classification (const char *context,
+                      struct classification *expected,
+                      struct classification *actual)
+{
+  for (int ch = -128; ch < 256; ++ch)
+    {
+#define CHECK(name)                                     \
+      check_classification_1 (context, ch, #name,       \
+                              expected[128 + ch].name,  \
+                              actual[128 + ch].name)
+      CHECK (alnum);
+      CHECK (alpha);
+      CHECK (blank);
+      CHECK (digit);
+      CHECK (lower);
+      CHECK (space);
+      CHECK (upper);
+      CHECK (xdigit);
+#undef CHECK
+      check_caseconv_1 (context, ch, "tolower",
+                        expected[128 + ch].as_lower,
+                        actual[128 + ch].as_lower);
+      check_caseconv_1 (context, ch, "toupper",
+                        expected[128 + ch].as_upper,
+                        actual[128 + ch].as_upper);
+    }
+}
+
+static int
+sign (int n)
+{
+  if (n < 0)
+    return -1;
+  else if (n > 0)
+    return 1;
+  return 0;
+}
+
+static void
+get_casecmp (signed char *result, int cmp (const char *, const char *))
+{
+  char left[2] = { 0, 0 };
+  char right[2] = { 0, 0 };
+  for (int l = 0; l < 256; ++l)
+    {
+      left[0] = l;
+      for (int r = 0; r < 256; ++r)
+        {
+          right[0] = r;
+          result[l * 256 + r] = sign (cmp (left, right));
+        }
+    }
+}
+
+static int
+do_test (void)
+{
+  /* Tests while running under the C locale.  */
+  struct classification class_expected[128 + 256];
+  get_classification_ctype (class_expected);
+  struct classification class_before[128 + 256];
+  get_classification_C_locale (class_before);
+  check_classification ("before setlocale", class_expected, class_before);
+
+  signed char cmp_expected[256 * 256];
+  get_casecmp (cmp_expected, strcasecmp);
+  signed char cmp_before[256 * 256];
+  get_casecmp (cmp_before, __strcasecmp_C);
+  if (memcmp (cmp_before, cmp_expected, sizeof (cmp_before)) != 0)
+    {
+      printf ("error: strcasecmp_C incorrect before setlocale\n");
+      return 1;
+    }
+
+  /* Switch locale.  */
+  if (setlocale (LC_ALL, "en_US.ISO-8859-1") == NULL)
+    {
+      printf ("error: setlocale: %m\n");
+      return 1;
+    }
+
+  /* Tests while running under a non-C locale.  */
+  struct classification class_after[128 + 256];
+  get_classification_C_locale (class_after);
+  check_classification ("after setlocale", class_expected, class_after);
+
+  signed char cmp_after[256 * 256];
+  get_casecmp (cmp_after, __strcasecmp_C);
+  if (memcmp (cmp_after, cmp_expected, sizeof (cmp_after)) != 0)
+    {
+      printf ("error: strcasecmp_C incorrect after setlocale\n");
+      return 1;
+    }
+
+  return errors;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/string/Makefile b/string/Makefile
index 69d3f80..d021805 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -41,7 +41,8 @@ routines	:= strcat strchr strcmp strcoll strcpy strcspn		\
 				     addsep replace)			\
 		   envz basename					\
 		   strcoll_l strxfrm_l string-inlines memrchr		\
-		   xpg-strerror strerror_l
+		   xpg-strerror strerror_l 				\
+		   strcasecmp_C strncasecmp_C
 
 strop-tests	:= memchr memcmp memcpy memmove mempcpy memset memccpy	\
 		   stpcpy stpncpy strcat strchr strcmp strcpy strcspn	\
diff --git a/string/Versions b/string/Versions
index 475c1fd..17d4007 100644
--- a/string/Versions
+++ b/string/Versions
@@ -82,4 +82,10 @@ libc {
   }
   GLIBC_2.24 {
   }
+
+  GLIBC_PRIVATE {
+    # Used from other libraries.
+    __strcasecmp_C;
+    __strncasecmp_C;
+  }
 }
diff --git a/string/strcasecmp_C.c b/string/strcasecmp_C.c
new file mode 100644
index 0000000..b99fb52
--- /dev/null
+++ b/string/strcasecmp_C.c
@@ -0,0 +1,30 @@
+/* strcasecmp tied to the C locale.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ctype/C-locale.h>
+#include <locale.h>
+#include <string.h>
+
+/* Variant of strcasecmp which always uses the C locale.  */
+int
+__strcasecmp_C (const char *left, const char *right)
+{
+  return __strcasecmp_l (left, right, _nl_C_locobj_ptr);
+}
+
+libc_hidden_def (__strcasecmp_C)
diff --git a/string/strncasecmp_C.c b/string/strncasecmp_C.c
new file mode 100644
index 0000000..08c1516
--- /dev/null
+++ b/string/strncasecmp_C.c
@@ -0,0 +1,30 @@
+/* strncasecmp tied to the C locale.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ctype/C-locale.h>
+#include <locale.h>
+#include <string.h>
+
+/* Variant of strncasecmp which always uses the C locale.  */
+int
+__strncasecmp_C (const char *left, const char *right, size_t length)
+{
+  return __strncasecmp_l (left, right, length, _nl_C_locobj_ptr);
+}
+
+libc_hidden_def (__strncasecmp_C)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]