This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.23-145-g528ffb3
- From: azanella at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 1 Apr 2016 13:45:15 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.23-145-g528ffb3
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 528ffb3a04966ce5dbc24e256c1926087876b6f7 (commit)
via 282b71f07eb5e24ddf1308f92c37cb42f7c7d86b (commit)
via 91f3b75f47c9eca3299098c3dcc2f5d9dad320b1 (commit)
via d3496c9f4f27d3009b71be87f6108b4fed7314bd (commit)
from d8a012c5c9e4bfc1b8db2bc6deacb85b44a2e1eb (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=528ffb3a04966ce5dbc24e256c1926087876b6f7
commit 528ffb3a04966ce5dbc24e256c1926087876b6f7
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Mon Mar 28 10:26:05 2016 -0300
Remove powerpc64 strspn, strcspn, and strpbrk implementation
This patch removes the powerpc64 optimized strspn, strcspn, and
strpbrk assembly implementation now that the default C one
implements the same strategy. On internal glibc benchtests
current implementations shows similar performance with -O2.
Tested on powerpc64le (POWER8).
* sysdeps/powerpc/powerpc64/strcspn.S: Remove file.
* sysdeps/powerpc/powerpc64/strpbrk.S: Remove file.
* sysdeps/powerpc/powerpc64/strspn.S: Remove file.
diff --git a/ChangeLog b/ChangeLog
index c6f9ac7..846a41d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2016-04-01 Adhemerval Zanella <adhemerval.zanella@linaro.org>
+ * sysdeps/powerpc/powerpc64/strcspn.S: Remove file.
+ * sysdeps/powerpc/powerpc64/strpbrk.S: Remove file.
+ * sysdeps/powerpc/powerpc64/strspn.S: Remove file.
+
* string/strpbrk.c (strpbrk): Rewrite function.
* string/bits/string2.h (strpbrk): Use __builtin_strpbrk.
(__strpbrk_c2): Likewise.
diff --git a/sysdeps/powerpc/powerpc64/strcspn.S b/sysdeps/powerpc/powerpc64/strcspn.S
deleted file mode 100644
index 31e619d..0000000
--- a/sysdeps/powerpc/powerpc64/strcspn.S
+++ /dev/null
@@ -1,127 +0,0 @@
-/* Optimized strcspn implementation for PowerPC64.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* size_t [r3] strcspn (const char [r4] *s, const char [r5] *reject) */
-
-EALIGN (strcspn, 4, 0)
- CALL_MCOUNT 3
-
- /* The idea to speed up the algorithm is to create a lookup table
- for fast check if input character should be considered. For ASCII
- or ISO-8859-X character sets it has 256 positions. */
-
- /* PPC64 ELF ABI stack is aligned to 16 bytes. */
- addi r9,r1,-256
- /* Clear the table with 0 values */
- li r6, 0
- li r8, 4
- mtctr r8
- mr r10, r9
- .align 4
-L(zerohash):
- std r6, 0(r10)
- std r6, 8(r10)
- std r6, 16(r10)
- std r6, 24(r10)
- std r6, 32(r10)
- std r6, 40(r10)
- std r6, 48(r10)
- std r6, 56(r10)
- addi r10, r10, 64
- bdnz L(zerohash)
-
- lbz r10,0(r4)
- cmpdi cr7,r10,0 /* reject[0] == '\0' ? */
- li r8,1
- beq cr7,L(finish_table) /* If reject[0] == '\0' skip */
-
- /* Initialize the table as:
- for (i=0; reject[i]; i++
- table[reject[i]]] = 1 */
- .align 4
-L(init_table):
- stbx r8,r9,r10
- lbzu r10,1(r4)
- cmpdi cr7,r10,0 /* If reject[0] == '\0' finish */
- bne cr7,L(init_table)
-L(finish_table):
- /* set table[0] = 1 */
- li r10,1
- stb r10,0(r9)
- li r10,0
- b L(mainloop)
-
- /* Unrool the loop 4 times and check using the table as:
- i = 0;
- while (1)
- {
- if (table[input[i++]] == 1)
- return i - 1;
- if (table[input[i++]] == 1)
- return i - 1;
- if (table[input[i++]] == 1)
- return i - 1;
- if (table[input[i++]] == 1)
- return i - 1;
- } */
- .align 4
-L(unroll):
- lbz r8,1(r3)
- addi r10,r10,4
- lbzx r8,r9,r8
- cmpwi r7,r8,1
- beq cr7,L(end)
- lbz r8,2(r3)
- addi r3,r3,4
- lbzx r8,r9,r8
- cmpwi cr7,r8,1
- beq cr7,L(end2)
- lbz r8,3(r7)
- lbzx r8,r9,r8
- cmpwi cr7,r8,1
- beq cr7,L(end3)
-L(mainloop):
- lbz r8,0(r3)
- mr r7,r3
- addi r6,r10,1
- addi r4,r10,2
- addi r5,r10,3
- lbzx r8,r9,8
- cmpwi cr7,r8,1
- bne cr7,L(unroll)
- mr r3,r10
- blr
-
- .align 4
-L(end):
- mr r3,r6
- blr
-
- .align 4
-L(end2):
- mr r3,r4
- blr
-
- .align 4
-L(end3):
- mr r3,r5
- blr
-END (strcspn)
-libc_hidden_builtin_def (strcspn)
diff --git a/sysdeps/powerpc/powerpc64/strpbrk.S b/sysdeps/powerpc/powerpc64/strpbrk.S
deleted file mode 100644
index 5e9d1a6..0000000
--- a/sysdeps/powerpc/powerpc64/strpbrk.S
+++ /dev/null
@@ -1,135 +0,0 @@
-/* Optimized strpbrk implementation for PowerPC64.
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* char [r3] *strpbrk(const char [r4] *s, const char [r5] *accept) */
-
-EALIGN (strpbrk, 4, 0)
- CALL_MCOUNT 3
-
- lbz r10,0(r4)
- cmpdi cr7,r10,0 /* accept[0] == '\0' ? */
- beq cr7,L(nullfound)
-
- /* The idea to speed up the algorithm is to create a lookup table
- for fast check if input character should be considered. For ASCII
- or ISO-8859-X character sets it has 256 positions. */
-
- /* PPC64 ELF ABI stack is aligned to 16 bytes. */
- addi r9,r1,-256
- /* Clear the table with 0 values */
- li r6, 0
- li r7, 4
- mtctr r7
- mr r8, r9
- .align 4
-L(zerohash):
- std r6, 0(r8)
- std r6, 8(r8)
- std r6, 16(r8)
- std r6, 24(r8)
- std r6, 32(r8)
- std r6, 40(r8)
- std r6, 48(r8)
- std r6, 56(r8)
- addi r8, r8, 64
- bdnz L(zerohash)
-
- /* Initialize the table as:
- for (i=0; accept[i]; i++
- table[accept[i]]] = 1 */
- li r0,1
- .align 4
-L(init_table):
- stbx r0,r9,r10
- lbzu r10,1(r4)
- cmpdi r0,r10,0
- bne cr0,L(init_table)
-L(finish_table):
- /* set table[0] = 1 */
- li r4,1
- stb r4,0(r9)
- b L(mainloop)
-
- /* Unrool the loop 4 times and check using the table as:
- i = 0;
- while (1)
- {
- if (table[input[i++]] == 1)
- return (s[i -1] ? s + i - 1: NULL);
- if (table[input[i++]] == 1)
- return (s[i -1] ? s + i - 1: NULL);
- if (table[input[i++]] == 1)
- return (s[i -1] ? s + i - 1: NULL);
- if (table[input[i++]] == 1)
- return (s[i -1] ? s + i - 1: NULL);
- } */
- .align 4
-L(unroll):
- lbz r0,1(r3)
- lbzx r8,r9,r0
- cmpwi cr6,r8,1
- beq cr6,L(checkend2)
- lbz r10,2(r3)
- lbzx r4,r9,r10
- cmpwi cr7,r4,1
- beq cr7,L(checkend3)
- lbz r12,3(r3)
- addi r3,r3,4
- lbzx r11,r9,r12
- cmpwi cr0,r11,1
- beq cr0,L(checkend)
-L(mainloop):
- lbz r12,0(r3)
- addi r11,r3,1
- addi r5,r3,2
- addi r7,r3,3
- lbzx r6,r9,r12
- cmpwi cr1,r6,1
- bne cr1,L(unroll)
- cmpdi cr0,r12,0
- beq cr0,L(nullfound)
-L(end):
- blr
-
- .align 4
-L(checkend):
- cmpdi cr1,r12,0
- mr r3,r7
- bne cr1,L(end)
-L(nullfound):
- /* return NULL */
- li 3,0
- blr
-
- .align 4
-L(checkend2):
- cmpdi cr7,r0,0
- mr r3,r11
- beq cr7,L(nullfound)
- blr
-
- .align 4
-L(checkend3):
- cmpdi cr6,r10,0
- mr r3,r5
- beq cr6,L(nullfound)
- blr
-END (strpbrk)
-libc_hidden_builtin_def (strpbrk)
diff --git a/sysdeps/powerpc/powerpc64/strspn.S b/sysdeps/powerpc/powerpc64/strspn.S
deleted file mode 100644
index cf10da1..0000000
--- a/sysdeps/powerpc/powerpc64/strspn.S
+++ /dev/null
@@ -1,144 +0,0 @@
-/* Optimized strspn implementation for PowerPC64.
-
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-/* size_t [r3] strspn (const char *string [r3],
- const char *needleAccept [r4] */
-
-/* Performance gains are grabbed through following techniques:
-
- > hashing of needle.
- > hashing avoids scanning of duplicate entries in needle
- across the string.
- > unrolling when scanning for character in string
- across hash table. */
-
-/* Algorithm is as below:
- 1. A empty hash table/dictionary is created comprising of
- 256 ascii character set
- 2. When hash entry is found in needle , the hash index
- is initialized to 1
- 3. The string is scanned until end and for every character,
- its corresponding hash index is compared.
- 4. initial length of string (count) until first hit of
- accept needle to be found is set to 0
- 4. If hash index is set to 1 for the index of string,
- count is returned.
- 5. Otherwise count is incremented and scanning continues
- until end of string. */
-
-#include <sysdep.h>
-
-EALIGN(strspn, 4, 0)
- CALL_MCOUNT 3
-
- /* PPC64 ELF ABI stack is aligned to 16 bytes. */
- addi r9,r1,-256
- /* Clear the table with 0 values */
- li r6, 0
- li r8, 4
- mtctr r8
- mr r10, r9
- .align 4
-L(zerohash):
- std r6, 0(r10)
- std r6, 8(r10)
- std r6, 16(r10)
- std r6, 24(r10)
- std r6, 32(r10)
- std r6, 40(r10)
- std r6, 48(r10)
- std r6, 56(r10)
- addi r10, r10, 64
- bdnz L(zerohash)
-
- lbz r10,0(r4)
- li r8, 1 /* r8=1, marker into hash if found in
- needle */
- cmpdi cr7, r10, 0 /* accept needle is NULL */
- beq cr7, L(skipHashing) /* if needle is NULL, skip hashing */
-
- .align 4 /* align section to 16 byte boundary */
-L(hashing):
- stbx r8, r9, r10 /* update hash with marker for the pivot of
- the needle */
- lbzu r10, 1(r4) /* load needle into r10 and update to next */
- cmpdi cr7, r10, 0 /* if needle is has reached NULL, continue */
- bne cr7, L(hashing) /* loop to hash the needle */
-
-L(skipHashing):
- li r10, 0 /* load counter = 0 */
- b L(beginScan)
-
- .align 4 /* align section to 16 byte boundary */
-L(scanUnroll):
- lbzx r8, r9, r8 /* load r8 with hash value at index */
- cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found
- accept needle */
- beq cr7, L(ret1stIndex) /* we have hit accept needle, return the
- count */
-
- lbz r8, 1(r3) /* load string[1] into r8 */
- addi r10, r10, 4 /* increment counter */
- lbzx r8, r9, r8 /* load r8 with hash value at index */
- cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found
- accept needle */
- beq cr7, L(ret2ndIndex) /* we have hit accept needle, return the
- count */
-
- lbz r8, 2(r3) /* load string[2] into r8 */
- lbzx r8, r9, r8 /* load r8 with hash value at index */
- cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found
- accept needle */
- beq cr7, L(ret3rdIndex) /* we have hit accept needle, return the
- count */
-
- lbz r8, 3(r3) /* load string[3] into r8 */
- lbzx r8, r9, r8 /* load r8 with hash value at index */
- addi r3, r3, 4 /* unroll factor , increment string by 4 */
- cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found
- accept needle */
- beq cr7,L(ret4thIndex) /* we have hit accept needle, return the
- count */
-
-L(beginScan):
- lbz r8, 0(r3) /* load string[0] into r8 */
- addi r6, r10, 1 /* place holder for counter + 1 */
- addi r5, r10, 2 /* place holder for counter + 2 */
- addi r4, r10, 3 /* place holder for counter + 3 */
- cmpdi cr7, r8, 0 /* if we hit marker in hash, we have found
- accept needle */
- bne cr7, L(scanUnroll) /* continue scanning */
-
-L(ret1stIndex):
- mr r3, r10 /* update r3 for return */
- blr /* return */
-
-L(ret2ndIndex):
- mr r3, r6 /* update r3 for return */
- blr /* return */
-
-L(ret3rdIndex):
- mr r3, r5 /* update r3 for return */
- blr /* return */
-
-L(ret4thIndex):
- mr r3, r4 /* update r3 for return */
- blr /* done */
-END(strspn)
-libc_hidden_builtin_def (strspn)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=282b71f07eb5e24ddf1308f92c37cb42f7c7d86b
commit 282b71f07eb5e24ddf1308f92c37cb42f7c7d86b
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Sun Mar 27 13:33:56 2016 +0000
Improve generic strpbrk performance
With now a faster strcspn implementation, it is faster to just use
it with some return tests than reimplementing strpbrk itself.
As for strcspn optimization, it is generally at least 10 times faster
than the existing implementation on bench-strspn on a few AArch64
implementations.
Also the string/bits/string2.h inlines make no longer sense, as current
implementation will already implement most of the optimizations.
Tested on x86_64, i386, and aarch64.
* string/strpbrk.c (strpbrk): Rewrite function.
* string/bits/string2.h (strpbrk): Use __builtin_strpbrk.
(__strpbrk_c2): Likewise.
(__strpbrk_c3): Likewise.
* string/string-inlines.c
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strpbrk_c2):
Likewise.
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strpbrk_c3):
Likewise.
diff --git a/ChangeLog b/ChangeLog
index 79e2e23..c6f9ac7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2016-04-01 Adhemerval Zanella <adhemerval.zanella@linaro.org>
+ * string/strpbrk.c (strpbrk): Rewrite function.
+ * string/bits/string2.h (strpbrk): Use __builtin_strpbrk.
+ (__strpbrk_c2): Likewise.
+ (__strpbrk_c3): Likewise.
+ * string/string-inlines.c
+ [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strpbrk_c2):
+ Likewise.
+ [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strpbrk_c3):
+ Likewise.
+
* string/strspn.c (strcspn): Rewrite function.
* string/bits/string2.h (strspn): Use __builtin_strcspn.
(__strspn_c1): Remove inline function.
diff --git a/string/bits/string2.h b/string/bits/string2.h
index 75a66a1..5a138dc 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -922,65 +922,10 @@ __stpcpy_small (char *__dest,
/* Find the first occurrence in S of any character in ACCEPT. */
-#if !defined _HAVE_STRING_ARCH_strpbrk || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strpbrk
-# if __GNUC_PREREQ (3, 2)
-# define strpbrk(s, accept) \
- __extension__ \
- ({ char __a0, __a1, __a2; \
- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \
- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \
- ? __builtin_strpbrk (s, accept) \
- : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \
- ? ((void) (s), (char *) NULL) \
- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \
- ? __builtin_strchr (s, __a0) \
- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \
- ? __strpbrk_c2 (s, __a0, __a1) \
- : (((const char *) (accept))[3] == '\0' \
- ? __strpbrk_c3 (s, __a0, __a1, __a2) \
- : __builtin_strpbrk (s, accept)))))) \
- : __builtin_strpbrk (s, accept)); })
-# else
-# define strpbrk(s, accept) \
- __extension__ \
- ({ char __a0, __a1, __a2; \
- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \
- ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \
- ? ((void) (s), (char *) NULL) \
- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \
- ? strchr (s, __a0) \
- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \
- ? __strpbrk_c2 (s, __a0, __a1) \
- : (((const char *) (accept))[3] == '\0' \
- ? __strpbrk_c3 (s, __a0, __a1, __a2) \
- : strpbrk (s, accept))))) \
- : strpbrk (s, accept)); })
-# endif
+#ifndef _HAVE_STRING_ARCH_strpbrk
+# if __GNUC_PREREQ (3, 2)
+# define strpbrk(s, accept) __builtin_strpbrk (s, accept)
# endif
-
-__STRING_INLINE char *__strpbrk_c2 (const char *__s, int __accept1,
- int __accept2);
-__STRING_INLINE char *
-__strpbrk_c2 (const char *__s, int __accept1, int __accept2)
-{
- /* Please note that __accept1 and __accept2 never can be '\0'. */
- while (*__s != '\0' && *__s != __accept1 && *__s != __accept2)
- ++__s;
- return *__s == '\0' ? NULL : (char *) (size_t) __s;
-}
-
-__STRING_INLINE char *__strpbrk_c3 (const char *__s, int __accept1,
- int __accept2, int __accept3);
-__STRING_INLINE char *
-__strpbrk_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
-{
- /* Please note that __accept1 to __accept3 never can be '\0'. */
- while (*__s != '\0' && *__s != __accept1 && *__s != __accept2
- && *__s != __accept3)
- ++__s;
- return *__s == '\0' ? NULL : (char *) (size_t) __s;
-}
#endif
diff --git a/string/string-inlines.c b/string/string-inlines.c
index 754b315..0648376 100644
--- a/string/string-inlines.c
+++ b/string/string-inlines.c
@@ -107,4 +107,25 @@ __old_strspn_c3 (const char *__s, int __accept1, int __accept2,
}
compat_symbol (libc, __old_strspn_c3, __strspn_c3, GLIBC_2_1_1);
+char *
+__strpbrk_c2 (const char *__s, int __accept1, int __accept2)
+{
+ /* Please note that __accept1 and __accept2 never can be '\0'. */
+ while (*__s != '\0' && *__s != __accept1 && *__s != __accept2)
+ ++__s;
+ return *__s == '\0' ? NULL : (char *) (size_t) __s;
+}
+compat_symbol (libc, __old_strpbrk_c2, __strpbrk_c2, GLIBC_2_1_1);
+
+char *
+__strpbrk_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
+{
+ /* Please note that __accept1 to __accept3 never can be '\0'. */
+ while (*__s != '\0' && *__s != __accept1 && *__s != __accept2
+ && *__s != __accept3)
+ ++__s;
+ return *__s == '\0' ? NULL : (char *) (size_t) __s;
+}
+compat_symbol (libc, __old_strpbrk_c3, __strpbrk_c3, GLIBC_2_1_1);
+
#endif
diff --git a/string/strpbrk.c b/string/strpbrk.c
index fddd473..1ede719 100644
--- a/string/strpbrk.c
+++ b/string/strpbrk.c
@@ -27,15 +27,7 @@
char *
STRPBRK (const char *s, const char *accept)
{
- while (*s != '\0')
- {
- const char *a = accept;
- while (*a != '\0')
- if (*a++ == *s)
- return (char *) s;
- ++s;
- }
-
- return NULL;
+ s += strcspn (s, accept);
+ return *s ? (char *)s : NULL;
}
libc_hidden_builtin_def (strpbrk)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=91f3b75f47c9eca3299098c3dcc2f5d9dad320b1
commit 91f3b75f47c9eca3299098c3dcc2f5d9dad320b1
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Sat Mar 26 19:09:07 2016 +0000
Improve generic strspn performance
As for strcspn, this patch improves strspn performance using a much
faster algorithm. It first constructs a 256-entry table based on
the accept string and then uses it as a lookup table for the
input string. As for strcspn optimization, it is generally at least
10 times faster than the existing implementation on bench-strspn
on a few AArch64 implementations.
Also the string/bits/string2.h inlines make no longer sense, as current
implementation will already implement most of the optimizations.
Tested on x86_64, i686, and aarch64.
* string/strspn.c (strcspn): Rewrite function.
* string/bits/string2.h (strspn): Use __builtin_strcspn.
(__strspn_c1): Remove inline function.
(__strspn_c2): Likewise.
(__strspn_c3): Likewise.
* string/string-inlines.c
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c1): Add
compatibility symbol.
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c2):
Likewise.
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c3):
Likewise.
diff --git a/ChangeLog b/ChangeLog
index 0d522ad..79e2e23 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2016-04-01 Adhemerval Zanella <adhemerval.zanella@linaro.org>
+
+ * string/strspn.c (strcspn): Rewrite function.
+ * string/bits/string2.h (strspn): Use __builtin_strcspn.
+ (__strspn_c1): Remove inline function.
+ (__strspn_c2): Likewise.
+ (__strspn_c3): Likewise.
+ * string/string-inlines.c
+ [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c1): Add
+ compatibility symbol.
+ [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c2):
+ Likewise.
+ [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c3):
+ Likewise.
+ * string/string-inlines.c: Include generic version.
+
2016-04-01 Wilco Dijkstra <wdijkstr@arm.com>
Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/string/bits/string2.h b/string/bits/string2.h
index a8df0db..75a66a1 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -914,78 +914,10 @@ __stpcpy_small (char *__dest,
/* Return the length of the initial segment of S which
consists entirely of characters in ACCEPT. */
-#if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strspn
-# if __GNUC_PREREQ (3, 2)
-# define strspn(s, accept) \
- __extension__ \
- ({ char __a0, __a1, __a2; \
- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \
- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \
- ? __builtin_strspn (s, accept) \
- : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \
- ? ((void) (s), (size_t) 0) \
- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \
- ? __strspn_c1 (s, __a0) \
- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \
- ? __strspn_c2 (s, __a0, __a1) \
- : (((const char *) (accept))[3] == '\0' \
- ? __strspn_c3 (s, __a0, __a1, __a2) \
- : __builtin_strspn (s, accept)))))) \
- : __builtin_strspn (s, accept)); })
-# else
-# define strspn(s, accept) \
- __extension__ \
- ({ char __a0, __a1, __a2; \
- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \
- ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \
- ? ((void) (s), (size_t) 0) \
- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \
- ? __strspn_c1 (s, __a0) \
- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \
- ? __strspn_c2 (s, __a0, __a1) \
- : (((const char *) (accept))[3] == '\0' \
- ? __strspn_c3 (s, __a0, __a1, __a2) \
- : strspn (s, accept))))) \
- : strspn (s, accept)); })
-# endif
+#ifndef _HAVE_STRING_ARCH_strspn
+# if __GNUC_PREREQ (3, 2)
+# define strspn(s, accept) __builtin_strspn (s, accept)
# endif
-
-__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept);
-__STRING_INLINE size_t
-__strspn_c1 (const char *__s, int __accept)
-{
- size_t __result = 0;
- /* Please note that __accept never can be '\0'. */
- while (__s[__result] == __accept)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strspn_c2 (const char *__s, int __accept1,
- int __accept2);
-__STRING_INLINE size_t
-__strspn_c2 (const char *__s, int __accept1, int __accept2)
-{
- size_t __result = 0;
- /* Please note that __accept1 and __accept2 never can be '\0'. */
- while (__s[__result] == __accept1 || __s[__result] == __accept2)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strspn_c3 (const char *__s, int __accept1,
- int __accept2, int __accept3);
-__STRING_INLINE size_t
-__strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
-{
- size_t __result = 0;
- /* Please note that __accept1 to __accept3 never can be '\0'. */
- while (__s[__result] == __accept1 || __s[__result] == __accept2
- || __s[__result] == __accept3)
- ++__result;
- return __result;
-}
#endif
diff --git a/string/string-inlines.c b/string/string-inlines.c
index 83bdd6c..754b315 100644
--- a/string/string-inlines.c
+++ b/string/string-inlines.c
@@ -71,4 +71,40 @@ __old_strcspn_c3 (const char *__s, int __reject1, int __reject2,
return __result;
}
compat_symbol (libc, __old_strcspn_c3, __strcspn_c3, GLIBC_2_1_1);
+
+size_t
+__old_strspn_c1 (const char *__s, int __accept)
+{
+ size_t __result = 0;
+ /* Please note that __accept never can be '\0'. */
+ while (__s[__result] == __accept)
+ ++__result;
+ return __result;
+}
+compat_symbol (libc, __old_strspn_c1, __strspn_c1, GLIBC_2_1_1);
+
+size_t
+__old_strspn_c2 (const char *__s, int __accept1, int __accept2)
+{
+ size_t __result = 0;
+ /* Please note that __accept1 and __accept2 never can be '\0'. */
+ while (__s[__result] == __accept1 || __s[__result] == __accept2)
+ ++__result;
+ return __result;
+}
+compat_symbol (libc, __old_strspn_c2, __strspn_c2, GLIBC_2_1_1);
+
+size_t
+__old_strspn_c3 (const char *__s, int __accept1, int __accept2,
+ int __accept3)
+{
+ size_t __result = 0;
+ /* Please note that __accept1 to __accept3 never can be '\0'. */
+ while (__s[__result] == __accept1 || __s[__result] == __accept2
+ || __s[__result] == __accept3)
+ ++__result;
+ return __result;
+}
+compat_symbol (libc, __old_strspn_c3, __strspn_c3, GLIBC_2_1_1);
+
#endif
diff --git a/string/strspn.c b/string/strspn.c
index f0635c1..f98340b 100644
--- a/string/strspn.c
+++ b/string/strspn.c
@@ -16,32 +16,59 @@
<http://www.gnu.org/licenses/>. */
#include <string.h>
+#include <stdint.h>
#undef strspn
#ifndef STRSPN
-#define STRSPN strspn
+# define STRSPN strspn
#endif
/* Return the length of the maximum initial segment
of S which contains only characters in ACCEPT. */
size_t
-STRSPN (const char *s, const char *accept)
+STRSPN (const char *str, const char *accept)
{
- const char *p;
- const char *a;
- size_t count = 0;
-
- for (p = s; *p != '\0'; ++p)
+ if (accept[0] == '\0')
+ return 0;
+ if (__glibc_unlikely (accept[1] == '\0'))
{
- for (a = accept; *a != '\0'; ++a)
- if (*p == *a)
- break;
- if (*a == '\0')
- return count;
- else
- ++count;
+ const char *a = str;
+ for (; *str == *accept; str++);
+ return str - a;
}
- return count;
+ /* Use multiple small memsets to enable inlining on most targets. */
+ unsigned char table[256];
+ unsigned char *p = memset (table, 0, 64);
+ memset (p + 64, 0, 64);
+ memset (p + 128, 0, 64);
+ memset (p + 192, 0, 64);
+
+ unsigned char *s = (unsigned char*) accept;
+ /* Different from strcspn it does not add the NULL on the table
+ so can avoid check if str[i] is NULL, since table['\0'] will
+ be 0 and thus stopping the loop check. */
+ do
+ p[*s++] = 1;
+ while (*s);
+
+ s = (unsigned char*) str;
+ if (!p[s[0]]) return 0;
+ if (!p[s[1]]) return 1;
+ if (!p[s[2]]) return 2;
+ if (!p[s[3]]) return 3;
+
+ s = (unsigned char *) ((uintptr_t)(s) & ~3);
+ unsigned int c0, c1, c2, c3;
+ do {
+ s += 4;
+ c0 = p[s[0]];
+ c1 = p[s[1]];
+ c2 = p[s[2]];
+ c3 = p[s[3]];
+ } while ((c0 & c1 & c2 & c3) != 0);
+
+ size_t count = s - (unsigned char *) str;
+ return (c0 & c1) == 0 ? count + c0 : count + c2 + 2;
}
libc_hidden_builtin_def (strspn)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d3496c9f4f27d3009b71be87f6108b4fed7314bd
commit d3496c9f4f27d3009b71be87f6108b4fed7314bd
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date: Fri Mar 25 16:44:26 2016 -0300
Improve generic strcspn performance
Improve strcspn performance using a much faster algorithm. It is kept simple
so it works well on most targets. It is generally at least 10 times faster
than the existing implementation on bench-strcspn on a few AArch64
implementations, and for some tests 100 times as fast (repeatedly calling
strchr on a small string is extremely slow...).
In fact the string/bits/string2.h inlines make no longer sense, as GCC
already uses strlen if reject is an empty string, strchrnul is 5 times as
fast as __strcspn_c1, while __strcspn_c2 and __strcspn_c3 are slower than
the strcspn main loop for large strings (though reject length 2-4 could be
special cased in the future to gain even more performance).
Tested on x86_64, i686, and aarch64.
* string/Version (libc): Add GLIBC_2.24.
* string/strcspn.c (strcspn): Rewrite function.
* string/bits/string2.h (strcspn): Use __builtin_strcspn.
(__strcspn_c1): Remove inline function.
(__strcspn_c2): Likewise.
(__strcspn_c3): Likewise.
* string/string-inline.c
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c1): Add
compatibility symbol.
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c2):
Likewise.
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c3):
Likewise.
* sysdeps/i386/string-inlines.c: Include generic string-inlines.c.
diff --git a/ChangeLog b/ChangeLog
index 084c3d8..0d522ad 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2016-04-01 Wilco Dijkstra <wdijkstr@arm.com>
+ Adhemerval Zanella <adhemerval.zanella@linaro.org>
+
+ * string/Version (libc): Add GLIBC_2.24.
+ * string/strcspn.c (strcspn): Rewrite function.
+ * string/bits/string2.h (strcspn): Use __builtin_strcspn.
+ (__strcspn_c1): Remove inline function.
+ (__strcspn_c2): Likewise.
+ (__strcspn_c3): Likewise.
+ * string/string-inline.c
+ [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c1): Add
+ compatibility symbol.
+ [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c2):
+ Likewise.
+ [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c3):
+ Likewise.
+ * sysdeps/i386/string-inlines.c: Include generic string-inlines.c.
+
2016-04-01 Stefan Liebler <stli@linux.vnet.ibm.com>
* sysdeps/s390/s390-32/dl-trampoline.h (_dl_runtime_resolve):
diff --git a/string/Versions b/string/Versions
index 59bf35a..475c1fd 100644
--- a/string/Versions
+++ b/string/Versions
@@ -80,4 +80,6 @@ libc {
GLIBC_2.6 {
strerror_l;
}
+ GLIBC_2.24 {
+ }
}
diff --git a/string/bits/string2.h b/string/bits/string2.h
index 8200ef1..a8df0db 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -905,77 +905,10 @@ __stpcpy_small (char *__dest,
/* Return the length of the initial segment of S which
consists entirely of characters not in REJECT. */
-#if !defined _HAVE_STRING_ARCH_strcspn || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strcspn
-# if __GNUC_PREREQ (3, 2)
-# define strcspn(s, reject) \
- __extension__ \
- ({ char __r0, __r1, __r2; \
- (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \
- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \
- ? __builtin_strcspn (s, reject) \
- : ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \
- ? strlen (s) \
- : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \
- ? __strcspn_c1 (s, __r0) \
- : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \
- ? __strcspn_c2 (s, __r0, __r1) \
- : (((const char *) (reject))[3] == '\0' \
- ? __strcspn_c3 (s, __r0, __r1, __r2) \
- : __builtin_strcspn (s, reject)))))) \
- : __builtin_strcspn (s, reject)); })
-# else
-# define strcspn(s, reject) \
- __extension__ \
- ({ char __r0, __r1, __r2; \
- (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \
- ? ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \
- ? strlen (s) \
- : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \
- ? __strcspn_c1 (s, __r0) \
- : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \
- ? __strcspn_c2 (s, __r0, __r1) \
- : (((const char *) (reject))[3] == '\0' \
- ? __strcspn_c3 (s, __r0, __r1, __r2) \
- : strcspn (s, reject))))) \
- : strcspn (s, reject)); })
-# endif
+#ifndef _HAVE_STRING_ARCH_strcspn
+# if __GNUC_PREREQ (3, 2)
+# define strcspn(s, reject) __builtin_strcspn (s, reject)
# endif
-
-__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
-__STRING_INLINE size_t
-__strcspn_c1 (const char *__s, int __reject)
-{
- size_t __result = 0;
- while (__s[__result] != '\0' && __s[__result] != __reject)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1,
- int __reject2);
-__STRING_INLINE size_t
-__strcspn_c2 (const char *__s, int __reject1, int __reject2)
-{
- size_t __result = 0;
- while (__s[__result] != '\0' && __s[__result] != __reject1
- && __s[__result] != __reject2)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1,
- int __reject2, int __reject3);
-__STRING_INLINE size_t
-__strcspn_c3 (const char *__s, int __reject1, int __reject2,
- int __reject3)
-{
- size_t __result = 0;
- while (__s[__result] != '\0' && __s[__result] != __reject1
- && __s[__result] != __reject2 && __s[__result] != __reject3)
- ++__result;
- return __result;
-}
#endif
diff --git a/string/strcspn.c b/string/strcspn.c
index 8888919..c535dd1 100644
--- a/string/strcspn.c
+++ b/string/strcspn.c
@@ -16,6 +16,7 @@
<http://www.gnu.org/licenses/>. */
#include <string.h>
+#include <stdint.h>
#undef strcspn
@@ -26,16 +27,45 @@
/* Return the length of the maximum initial segment of S
which contains no characters from REJECT. */
size_t
-STRCSPN (const char *s, const char *reject)
+STRCSPN (const char *str, const char *reject)
{
- size_t count = 0;
+ if (__glibc_unlikely (reject[0] == '\0') ||
+ __glibc_unlikely (reject[1] == '\0'))
+ return __strchrnul (str, reject [0]) - str;
- while (*s != '\0')
- if (strchr (reject, *s++) == NULL)
- ++count;
- else
- return count;
+ /* Use multiple small memsets to enable inlining on most targets. */
+ unsigned char table[256];
+ unsigned char *p = memset (table, 0, 64);
+ memset (p + 64, 0, 64);
+ memset (p + 128, 0, 64);
+ memset (p + 192, 0, 64);
- return count;
+ unsigned char *s = (unsigned char*) reject;
+ unsigned char tmp;
+ do
+ p[tmp = *s++] = 1;
+ while (tmp);
+
+ s = (unsigned char*) str;
+ if (p[s[0]]) return 0;
+ if (p[s[1]]) return 1;
+ if (p[s[2]]) return 2;
+ if (p[s[3]]) return 3;
+
+ s = (unsigned char *) ((uintptr_t)(s) & ~3);
+
+ unsigned int c0, c1, c2, c3;
+ do
+ {
+ s += 4;
+ c0 = p[s[0]];
+ c1 = p[s[1]];
+ c2 = p[s[2]];
+ c3 = p[s[3]];
+ }
+ while ((c0 | c1 | c2 | c3) == 0);
+
+ size_t count = s - (unsigned char *) str;
+ return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3;
}
libc_hidden_builtin_def (strcspn)
diff --git a/string/string-inlines.c b/string/string-inlines.c
index 16db3ea..83bdd6c 100644
--- a/string/string-inlines.c
+++ b/string/string-inlines.c
@@ -32,3 +32,43 @@
#undef __NO_INLINE__
#include <bits/string.h>
#include <bits/string2.h>
+
+#include "shlib-compat.h"
+
+#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24)
+/* The inline functions are not used from GLIBC 2.24 and forward, however
+ they are required to provide the symbols through string-inlines.c
+ (if inlining is not possible for compatibility reasons). */
+size_t
+__old_strcspn_c1 (const char *__s, int __reject)
+{
+ size_t __result = 0;
+ while (__s[__result] != '\0' && __s[__result] != __reject)
+ ++__result;
+ return __result;
+}
+compat_symbol (libc, __old_strcspn_c1, __strcspn_c1, GLIBC_2_1_1);
+
+size_t
+__old_strcspn_c2 (const char *__s, int __reject1, int __reject2)
+{
+ size_t __result = 0;
+ while (__s[__result] != '\0' && __s[__result] != __reject1
+ && __s[__result] != __reject2)
+ ++__result;
+ return __result;
+}
+compat_symbol (libc, __old_strcspn_c2, __strcspn_c2, GLIBC_2_1_1);
+
+size_t
+__old_strcspn_c3 (const char *__s, int __reject1, int __reject2,
+ int __reject3)
+{
+ size_t __result = 0;
+ while (__s[__result] != '\0' && __s[__result] != __reject1
+ && __s[__result] != __reject2 && __s[__result] != __reject3)
+ ++__result;
+ return __result;
+}
+compat_symbol (libc, __old_strcspn_c3, __strcspn_c3, GLIBC_2_1_1);
+#endif
diff --git a/sysdeps/i386/string-inlines.c b/sysdeps/i386/string-inlines.c
index c7de270..64d80e8 100644
--- a/sysdeps/i386/string-inlines.c
+++ b/sysdeps/i386/string-inlines.c
@@ -15,27 +15,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* <bits/string.h> and <bits/string2.h> declare some extern inline
- functions. These functions are declared additionally here if
- inlining is not possible. */
-
-#undef __USE_STRING_INLINES
-#define __USE_STRING_INLINES
-#define _FORCE_INLINES
-#define __STRING_INLINE /* empty */
-#define __NO_INLINE__
-
/* This is to avoid PLT entries for the x86 version. */
#define __memcpy_g __memcpy_g_internal
#define __strchr_g __strchr_g_internal
-
-#include <string.h>
-#undef index
-#undef rindex
-
-#undef __NO_INLINE__
-#include <bits/string.h>
-#include <bits/string2.h>
+#include <string/string-inlines.c>
void *
(__memcpy_c) (void *d, const void *s, size_t n)
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 48 ++++++++
string/Versions | 2 +
string/bits/string2.h | 208 ++---------------------------------
string/strcspn.c | 46 +++++++--
string/string-inlines.c | 97 ++++++++++++++++
string/strpbrk.c | 12 +--
string/strspn.c | 57 +++++++---
sysdeps/i386/string-inlines.c | 19 +---
sysdeps/powerpc/powerpc64/strcspn.S | 127 ---------------------
sysdeps/powerpc/powerpc64/strpbrk.S | 135 -----------------------
sysdeps/powerpc/powerpc64/strspn.S | 144 ------------------------
11 files changed, 239 insertions(+), 656 deletions(-)
delete mode 100644 sysdeps/powerpc/powerpc64/strcspn.S
delete mode 100644 sysdeps/powerpc/powerpc64/strpbrk.S
delete mode 100644 sysdeps/powerpc/powerpc64/strspn.S
hooks/post-receive
--
GNU C Library master sources