This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] Faster strspn/strcspn implementation.


Hello, 

I now look on speeding up strspn/strcspn.

Now I wrote a generic dispatch routine. I will post x64 specialization
soon. 

There are several cases that can be optimized in platform specific way:

__strspn4( s,a0,a1,a2,a3) - when needle consist at most 4 characters.
__strspn_rng(s,from,to)   - when needle is consecutive interval.
__strspn16(s,a)           - when needle consist at most 16 characters.
__strspn_arf(s,a)         - anything else.

In header string/bits/string2.h I changed implementation to call
__strspn4 for small needles.  Currently I can not persudate gcc to
recognize intervals at compile time.


Now make check fails that libc_nonshared.a does not contain my
__strspn4. Where should add this dependence?

Ondra

---
 string/Makefile           |    2 +-
 string/bits/string2.h     |  179 +++------------------------------------------
 string/strcspn.c          |   52 +-------------
 string/strcspn_internal.c |    2 +
 string/strpbrk.c          |   12 +---
 string/strspn.c           |   63 ++++++++++------
 string/strspn_internal.c  |   76 +++++++++++++++++++
 7 files changed, 133 insertions(+), 253 deletions(-)
 create mode 100644 string/strcspn_internal.c
 create mode 100644 string/strspn_internal.c

diff --git a/string/Makefile b/string/Makefile
index 740006e..581a6f3 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -28,7 +28,7 @@ routines	:= strcat strchr strcmp strcoll strcpy strcspn		\
 		   strverscmp strdup strndup				\
 		   strerror _strerror strlen strnlen			\
 		   strncat strncmp strncpy				\
-		   strrchr strpbrk strsignal strspn strstr strtok	\
+		   strrchr strpbrk strsignal strspn strspn_internal strstr strtok	\
 		   strtok_r strxfrm memchr memcmp memmove memset	\
 		   mempcpy bcopy bzero ffs ffsll stpcpy stpncpy		\
 		   strcasecmp strncase strcasecmp_l strncase_l		\
diff --git a/string/bits/string2.h b/string/bits/string2.h
index bbf05a3..cc8d9d0 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -925,6 +925,9 @@ __stpcpy_small (char *__dest,
 		  ? strcmp (s1, s2) : strncmp (s1, s2, n)))
 #endif
 
+size_t __strspn4  (const char *a,unsigned char a0,unsigned char a1,unsigned char a2,unsigned char a3);
+size_t __strcspn4 (const char *a,unsigned char a0,unsigned char a1,unsigned char a2,unsigned char a3);
+
 
 /* Return the length of the initial segment of S which
    consists entirely of characters not in REJECT.  */
@@ -940,11 +943,11 @@ __stpcpy_small (char *__dest,
 	 : ((__r0 = ((const char *) (reject))[0], __r0 == '\0')		      \
 	    ? strlen (s)						      \
 	    : ((__r1 = ((const char *) (reject))[1], __r1 == '\0')	      \
-	       ? __strcspn_c1 (s, __r0)					      \
+	       ? strchrnul (s,__r0) - s					      \
 	       : ((__r2 = ((const char *) (reject))[2], __r2 == '\0')	      \
-		  ? __strcspn_c2 (s, __r0, __r1)			      \
+		  ? __strcspn4 (s, __r0, __r1, 0 , 0)			      \
 		  : (((const char *) (reject))[3] == '\0'		      \
-		     ? __strcspn_c3 (s, __r0, __r1, __r2)		      \
+		     ? __strcspn4 (s, __r0, __r1, __r2 , 0)		      \
 		     : __builtin_strcspn (s, reject))))))		      \
       : __builtin_strcspn (s, reject)); })
 #  else
@@ -964,182 +967,24 @@ __stpcpy_small (char *__dest,
       : strcspn (s, reject)); })
 #  endif
 # endif
-
-__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
-__STRING_INLINE size_t
-__strcspn_c1 (const char *__s, int __reject)
-{
-  register size_t __result = 0;
-  while (__s[__result] != '\0' && __s[__result] != __reject)
-    ++__result;
-  return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1,
-				     int __reject2);
-__STRING_INLINE size_t
-__strcspn_c2 (const char *__s, int __reject1, int __reject2)
-{
-  register size_t __result = 0;
-  while (__s[__result] != '\0' && __s[__result] != __reject1
-	 && __s[__result] != __reject2)
-    ++__result;
-  return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1,
-				     int __reject2, int __reject3);
-__STRING_INLINE size_t
-__strcspn_c3 (const char *__s, int __reject1, int __reject2,
-	      int __reject3)
-{
-  register size_t __result = 0;
-  while (__s[__result] != '\0' && __s[__result] != __reject1
-	 && __s[__result] != __reject2 && __s[__result] != __reject3)
-    ++__result;
-  return __result;
-}
 #endif
 
 
 /* Return the length of the initial segment of S which
    consists entirely of characters in ACCEPT.  */
 #if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strspn
-#  if __GNUC_PREREQ (3, 2)
-#   define strspn(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__builtin_constant_p (s) && __string2_1bptr_p (s))		      \
-	 ? __builtin_strspn (s, accept)					      \
-	 : ((__a0 = ((const char *) (accept))[0], __a0 == '\0')		      \
-	    ? ((void) (s), (size_t) 0)					      \
-	    : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')	      \
-	       ? __strspn_c1 (s, __a0)					      \
-	       : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-		  ? __strspn_c2 (s, __a0, __a1)				      \
-		  : (((const char *) (accept))[3] == '\0'		      \
-		     ? __strspn_c3 (s, __a0, __a1, __a2)		      \
-		     : __builtin_strspn (s, accept))))))		      \
-      : __builtin_strspn (s, accept)); })
-#  else
-#   define strspn(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0')		      \
-	 ? ((void) (s), (size_t) 0)					      \
-	 : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')		      \
-	    ? __strspn_c1 (s, __a0)					      \
-	    : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-	       ? __strspn_c2 (s, __a0, __a1)				      \
-	       : (((const char *) (accept))[3] == '\0'			      \
-		  ? __strspn_c3 (s, __a0, __a1, __a2)			      \
-		  : strspn (s, accept)))))				      \
-      : strspn (s, accept)); })
-#  endif
-# endif
-
-__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept);
-__STRING_INLINE size_t
-__strspn_c1 (const char *__s, int __accept)
-{
-  register size_t __result = 0;
-  /* Please note that __accept never can be '\0'.  */
-  while (__s[__result] == __accept)
-    ++__result;
-  return __result;
-}
-
-__STRING_INLINE size_t __strspn_c2 (const char *__s, int __accept1,
-				    int __accept2);
-__STRING_INLINE size_t
-__strspn_c2 (const char *__s, int __accept1, int __accept2)
-{
-  register size_t __result = 0;
-  /* Please note that __accept1 and __accept2 never can be '\0'.  */
-  while (__s[__result] == __accept1 || __s[__result] == __accept2)
-    ++__result;
-  return __result;
-}
-
-__STRING_INLINE size_t __strspn_c3 (const char *__s, int __accept1,
-				    int __accept2, int __accept3);
-__STRING_INLINE size_t
-__strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
-{
-  register size_t __result = 0;
-  /* Please note that __accept1 to __accept3 never can be '\0'.  */
-  while (__s[__result] == __accept1 || __s[__result] == __accept2
-	 || __s[__result] == __accept3)
-    ++__result;
-  return __result;
-}
 #endif
 
 
 /* Find the first occurrence in S of any character in ACCEPT.  */
-#if !defined _HAVE_STRING_ARCH_strpbrk || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strpbrk
-#  if __GNUC_PREREQ (3, 2)
-#   define strpbrk(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__builtin_constant_p (s) && __string2_1bptr_p (s))		      \
-	 ? __builtin_strpbrk (s, accept)				      \
-	 : ((__a0 = ((const char  *) (accept))[0], __a0 == '\0')	      \
-	    ? ((void) (s), (char *) NULL)				      \
-	    : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')	      \
-	       ? __builtin_strchr (s, __a0)				      \
-	       : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-		  ? __strpbrk_c2 (s, __a0, __a1)			      \
-		  : (((const char *) (accept))[3] == '\0'		      \
-		     ? __strpbrk_c3 (s, __a0, __a1, __a2)		      \
-		     : __builtin_strpbrk (s, accept))))))		      \
-      : __builtin_strpbrk (s, accept)); })
-#  else
-#   define strpbrk(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__a0 = ((const char  *) (accept))[0], __a0 == '\0')		      \
-	 ? ((void) (s), (char *) NULL)					      \
-	 : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')		      \
-	    ? strchr (s, __a0)						      \
-	    : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-	       ? __strpbrk_c2 (s, __a0, __a1)				      \
-	       : (((const char *) (accept))[3] == '\0'			      \
-		  ? __strpbrk_c3 (s, __a0, __a1, __a2)			      \
-		  : strpbrk (s, accept)))))				      \
-      : strpbrk (s, accept)); })
-#  endif
-# endif
-
-__STRING_INLINE char *__strpbrk_c2 (const char *__s, int __accept1,
-				    int __accept2);
-__STRING_INLINE char *
-__strpbrk_c2 (const char *__s, int __accept1, int __accept2)
-{
-  /* Please note that __accept1 and __accept2 never can be '\0'.  */
-  while (*__s != '\0' && *__s != __accept1 && *__s != __accept2)
-    ++__s;
-  return *__s == '\0' ? NULL : (char *) (size_t) __s;
-}
-
-__STRING_INLINE char *__strpbrk_c3 (const char *__s, int __accept1,
-				    int __accept2, int __accept3);
-__STRING_INLINE char *
-__strpbrk_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
+extern inline char *
+strpbrk (s, accept)
+     const char *s;
+     const char *accept;
 {
-  /* Please note that __accept1 to __accept3 never can be '\0'.  */
-  while (*__s != '\0' && *__s != __accept1 && *__s != __accept2
-	 && *__s != __accept3)
-    ++__s;
-  return *__s == '\0' ? NULL : (char *) (size_t) __s;
+  size_t i = strcspn (s, accept);
+  return s[i] ? (char *) s+i : NULL;
 }
-#endif
 
 
 /* Find the first occurrence of NEEDLE in HAYSTACK.  Newer gcc versions
diff --git a/string/strcspn.c b/string/strcspn.c
index 6290429..cf71f93 100644
--- a/string/strcspn.c
+++ b/string/strcspn.c
@@ -1,50 +1,2 @@
-/* Copyright (C) 1991, 1994, 1996, 1997, 2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#if defined _LIBC || HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-# ifndef strchr
-#  define strchr index
-# endif
-#endif
-
-#undef strcspn
-
-/* Return the length of the maximum initial segment of S
-   which contains no characters from REJECT.  */
-size_t
-strcspn (s, reject)
-     const char *s;
-     const char *reject;
-{
-  size_t count = 0;
-
-  while (*s != '\0')
-    if (strchr (reject, *s++) == NULL)
-      ++count;
-    else
-      return count;
-
-  return count;
-}
-libc_hidden_builtin_def (strcspn)
+#define AS_STRCSPN
+#include "strspn.c"
diff --git a/string/strcspn_internal.c b/string/strcspn_internal.c
new file mode 100644
index 0000000..1788997
--- /dev/null
+++ b/string/strcspn_internal.c
@@ -0,0 +1,2 @@
+#define AS_STRCSPN
+#include "strspn_internal.c"
diff --git a/string/strpbrk.c b/string/strpbrk.c
index 7f45fdf..4015ced 100644
--- a/string/strpbrk.c
+++ b/string/strpbrk.c
@@ -31,15 +31,7 @@ strpbrk (s, accept)
      const char *s;
      const char *accept;
 {
-  while (*s != '\0')
-    {
-      const char *a = accept;
-      while (*a != '\0')
-	if (*a++ == *s)
-	  return (char *) s;
-      ++s;
-    }
-
-  return NULL;
+  size_t i = strcspn (s, accept);
+  return s[i] ? (char *) s+i : NULL;
 }
 libc_hidden_builtin_def (strpbrk)
diff --git a/string/strspn.c b/string/strspn.c
index 48624aa..5237ac1 100644
--- a/string/strspn.c
+++ b/string/strspn.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,32 +15,45 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#ifdef AS_STRCSPN
+#define SPN_CSPN(x,y) y
+#else
+#define SPN_CSPN(x,y) x
+#endif
+#define SPN      SPN_CSPN (__strspn     ,__strcspn)
+#define SPN4     SPN_CSPN (__strspn4    ,__strcspn4)
+#define SPN16    SPN_CSPN (__strspn16   ,__strcspn16)
+#define SPN_ARY  SPN_CSPN (__strspn_ary ,__strcspn_ary)
+#define SPN_RNG  SPN_CSPN (__strspn_rng ,__strcspn_rng)
+
+#define _GNU_SOURCE
 #include <string.h>
 
-#undef strspn
 
-/* Return the length of the maximum initial segment
-   of S which contains only characters in ACCEPT.  */
-size_t
-strspn (s, accept)
-     const char *s;
-     const char *accept;
+size_t SPN (const char *_s,const char *_n)
 {
-  const char *p;
-  const char *a;
-  size_t count = 0;
-
-  for (p = s; *p != '\0'; ++p)
-    {
-      for (a = accept; *a != '\0'; ++a)
-	if (*p == *a)
-	  break;
-      if (*a == '\0')
-	return count;
-      else
-	++count;
-    }
-
-  return count;
+  size_t i;
+  unsigned char *s = (unsigned char*)_s, *n = (unsigned char*)_n;
+#ifdef AS_STRCSPN
+  if (!n[0]) return strlen(_s);
+  if (!n[1]) return strchrnul (_s,_n[0])-_s;
+  if (!n[2]) return SPN4 (s,n[0],n[1],0   ,0);
+  if (!n[3]) return SPN4 (s,n[0],n[1],n[2],0);
+#else
+  if (!n[0]) return 0;
+  if (!n[1]) return SPN4 (s,n[0],n[0],n[0],n[0]);
+  if (!n[2]) return SPN4 (s,n[0],n[1],n[0],n[0]);
+  if (!n[3]) return SPN4 (s,n[0],n[1],n[2],n[0]);
+  if (!n[4]) return SPN4 (s,n[0],n[1],n[2],n[3]);
+#endif
+  for (i = 0; n[i] == n[0]+i; i++)
+    ;
+  if  (!n[i])
+    return SPN_RNG (s,n[0],n[0]+i-1);
+  if (strlen (n)<16)
+    return SPN16 (s,n);
+  return SPN_ARY (s,n);
 }
-libc_hidden_builtin_def (strspn)
+
+libc_hidden_builtin_def (SPN)
+weak_alias(SPN, SPN_CSPN (strspn,strcspn))
diff --git a/string/strspn_internal.c b/string/strspn_internal.c
new file mode 100644
index 0000000..3962925
--- /dev/null
+++ b/string/strspn_internal.c
@@ -0,0 +1,76 @@
+/* Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#ifdef AS_STRCSPN
+#define SPN_CSPN(x,y) y
+#else
+#define SPN_CSPN(x,y) x
+#endif
+#define SPN      SPN_CSPN(  strspn     ,  strcspn)
+#define SPN4     SPN_CSPN(__strspn4    ,__strcspn4)
+#define SPN16    SPN_CSPN(__strspn16   ,__strcspn16)
+#define SPN_ARY  SPN_CSPN(__strspn_ary ,__strcspn_ary)
+#define SPN_RNG  SPN_CSPN(__strspn_rng ,__strcspn_rng)
+
+#include <string.h>
+
+size_t SPN_ARY (unsigned char* s,unsigned char* n)
+{
+  size_t i;
+  unsigned char ac,chars[256];
+  memset (chars,0,256);
+  for (i = 0; n[i]; i++)
+    chars[n[i]] = 1;
+  ac = chars[0] = SPN_CSPN (0,1);
+  i = 0;
+  while (1)
+    {
+      if (chars[s[i++]]==ac) return i-1;
+      if (chars[s[i++]]==ac) return i-1;
+      if (chars[s[i++]]==ac) return i-1;
+      if (chars[s[i++]]==ac) return i-1;
+    }
+}
+
+size_t SPN16 (unsigned char *s,unsigned char *n)
+{
+  return SPN_ARY (s,n);
+}
+
+size_t SPN4 ( const char *_s, unsigned char n0,unsigned char n1,unsigned char n2,unsigned char n3)
+{
+  unsigned char *s = (unsigned char *) _s;
+  size_t i;
+  for (i = 0;; i++)
+    {
+      if SPN_CSPN (  (s[i] != n0 && s[i] != n0 && s[i] != n2 && s[i] != n3) ,
+                   (!(s[i] != n0 && s[i] != n0 && s[i] != n2 && s[i] != n3)))
+        return i;
+    }
+}
+
+size_t SPN_RNG (unsigned char *s,unsigned char from,unsigned char to)
+{
+  size_t i;
+  for (i = 0;; i++)
+    {
+      if (SPN_CSPN ( (s[i] <  from || s[i] >  to),
+                    ((s[i] >= from && s[i] <= to) || s[i] == 0)))
+        return i;
+    }
+}
-- 
1.7.4.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]