This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] Improve generic strcspn performance
- From: Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>
- To: 'GNU C Library' <libc-alpha at sourceware dot org>
- Cc: nd <nd at arm dot com>
- Date: Fri, 8 Jan 2016 18:40:57 +0000
- Subject: [PATCH] Improve generic strcspn performance
- Authentication-results: sourceware.org; auth=none
- Nodisclaimer: True
- Spamdiagnosticmetadata: NSPM
- Spamdiagnosticoutput: 1:23
Improve strcspn performance using a much faster algorithm. It is kept simple
so it works well on most targets. It is generally at least 10 times faster
than the existing implementation on bench-strcspn on a few AArch64
implementations, and for some tests 100 times as fast (repeatedly calling
strchr on a small string is extremely slow...). In fact the string/bits/string2.h
inlines make no longer sense, as GCC already uses strlen if reject is an empty
string, strchrnul is 5 times as fast as __strcspn_c1, while __strcspn_c2 and
__strcspn_c3 are slower than the strcspn main loop for large strings (though reject
length 2-4 could be special cased in the future to gain even more performance).
Built and tested on AArch64. OK for GLIBC 2.23?
ChangeLog:
2016-01-08 Wilco Dijkstra <wdijkstr@arm.com>
* string/strcspn.c (strcspn): Rewrite function.
* string/bits/string2.h (strcspn): Use __builtin_strcspn.
(__strcspn_c1) Remove inline function.
(__strcspn_c2) Likewise.
(__strcspn_c3) Likewise.
diff --git a/string/bits/string2.h b/string/bits/string2.h
index e18c67530ec78338c9591015f3d95c785de54726..d0926f1e7898a13852081f45d54bff5145751695 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -199,62 +199,10 @@ extern void *__rawmemchr (const void *__s, int __c);
/* Return the length of the initial segment of S which
consists entirely of characters not in REJECT. */
-#if !defined _HAVE_STRING_ARCH_strcspn
-# ifndef _HAVE_STRING_ARCH_strcspn
-# if __GNUC_PREREQ (3, 2)
-# define strcspn(s, reject) \
- __extension__ \
- ({ char __r0, __r1, __r2; \
- (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \
- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \
- ? __builtin_strcspn (s, reject) \
- : ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \
- ? strlen (s) \
- : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \
- ? __strcspn_c1 (s, __r0) \
- : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \
- ? __strcspn_c2 (s, __r0, __r1) \
- : (((const char *) (reject))[3] == '\0' \
- ? __strcspn_c3 (s, __r0, __r1, __r2) \
- : __builtin_strcspn (s, reject)))))) \
- : __builtin_strcspn (s, reject)); })
-# endif
+#ifndef _HAVE_STRING_ARCH_strcspn
+# if __GNUC_PREREQ (3, 2)
+# define strcspn(s, reject) __builtin_strcspn (s, reject)
# endif
-
-__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
-__STRING_INLINE size_t
-__strcspn_c1 (const char *__s, int __reject)
-{
- size_t __result = 0;
- while (__s[__result] != '\0' && __s[__result] != __reject)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1,
- int __reject2);
-__STRING_INLINE size_t
-__strcspn_c2 (const char *__s, int __reject1, int __reject2)
-{
- size_t __result = 0;
- while (__s[__result] != '\0' && __s[__result] != __reject1
- && __s[__result] != __reject2)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1,
- int __reject2, int __reject3);
-__STRING_INLINE size_t
-__strcspn_c3 (const char *__s, int __reject1, int __reject2,
- int __reject3)
-{
- size_t __result = 0;
- while (__s[__result] != '\0' && __s[__result] != __reject1
- && __s[__result] != __reject2 && __s[__result] != __reject3)
- ++__result;
- return __result;
-}
#endif
diff --git a/string/strcspn.c b/string/strcspn.c
index 2694d2ab0e807d0712d6b103dbdfd75ef164ebf1..cdb2df315c394889fe04b69980c63ea4ddbdb286 100644
--- a/string/strcspn.c
+++ b/string/strcspn.c
@@ -26,16 +26,48 @@
/* Return the length of the maximum initial segment of S
which contains no characters from REJECT. */
size_t
-STRCSPN (const char *s, const char *reject)
+STRCSPN (const char *str, const char *reject)
{
- size_t count = 0;
+ unsigned char table[256];
+ unsigned char *p, *s;
+ unsigned int c0, c1, c2, c3;
+ size_t count;
- while (*s != '\0')
- if (strchr (reject, *s++) == NULL)
- ++count;
- else
- return count;
+ if (reject[0] == '\0')
+ return strlen (str);
+ if (reject[1] == '\0')
+ return __strchrnul (str, reject [0]) - str;
- return count;
+ /* Use multiple small memsets to enable inlining on most targets. */
+ p = memset (table, 0, 64);
+ memset (p + 64, 0, 64);
+ memset (p + 128, 0, 64);
+ memset (p + 192, 0, 64);
+
+ s = (unsigned char*) reject;
+ do
+ p[c0 = *s++] = 1;
+ while (c0);
+
+ s = (unsigned char*) str;
+ if (p[s[0]]) return 0;
+ if (p[s[1]]) return 1;
+ if (p[s[2]]) return 2;
+ if (p[s[3]]) return 3;
+
+ s = (unsigned char *) ((size_t)s & ~3);
+
+ do
+ {
+ s += 4;
+ c0 = p[s[0]];
+ c1 = p[s[1]];
+ c2 = p[s[2]];
+ c3 = p[s[3]];
+ }
+ while ((c0 | c1 | c2 | c3) == 0);
+
+ count = s - (unsigned char *) str;
+ return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3;
}
libc_hidden_builtin_def (strcspn)