This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 2/3] powerpc: Add powerpc64 strcspn optimization
- From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- To: "GNU C. Library" <libc-alpha at sourceware dot org>
- Date: Mon, 24 Nov 2014 14:56:43 -0200
- Subject: [PATCH 2/3] powerpc: Add powerpc64 strcspn optimization
- Authentication-results: sourceware.org; auth=none
This patch makes the POWER7 optimized strcspn generic by using
default doubleword stores to zero the hash, instead of VSX
instructions. Performance on POWER7/POWER8 machines does not
changed and they are faster on older machine (POWER6).
Checked on powerpc64.
--
* sysdeps/powerpc/powerpc64/multiarch/Makefile [sysdep_routines]:
Remove strcspn objects.
* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Remove strcspn implementation.
* sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c: Remove file.
* sysdeps/powerpc/powerpc64/multiarch/strcspn.c: Remove file.
* sysdeps/powerpc/powerpc64/power7/strcspn.S: Remove file.
* sysdeps/powerpc/powerpc64/strcspn.S: New file.
---
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index d6de5a5..05dab25 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -15,7 +15,6 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
wordcopy-power7 wordcopy-power6 wordcopy-ppc64 \
strcpy-power7 strcpy-ppc64 stpcpy-power7 stpcpy-ppc64 \
strrchr-power7 strrchr-ppc64 strncat-power7 strncat-ppc64 \
- strcspn-power7 strcspn-ppc64 \
strpbrk-power7 strpbrk-ppc64 strncpy-power7 strncpy-ppc64 \
stpncpy-power7 stpncpy-ppc64 strcmp-power7 strcmp-ppc64 \
strcat-power7 strcat-ppc64 memmove-power7 memmove-ppc64 \
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 4a9e523..1a2e38d 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -272,14 +272,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strncat, 1,
__strncat_ppc))
- /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c. */
- IFUNC_IMPL (i, name, strcspn,
- IFUNC_IMPL_ADD (array, i, strcspn,
- hwcap & PPC_FEATURE_HAS_VSX,
- __strcspn_power7)
- IFUNC_IMPL_ADD (array, i, strcspn, 1,
- __strcspn_ppc))
-
/* Support sysdeps/powerpc/powerpc64/multiarch/strpbrk.c. */
IFUNC_IMPL (i, name, strpbrk,
IFUNC_IMPL_ADD (array, i, strpbrk,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S
deleted file mode 100644
index 02ffcc8..0000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Optimized strcspn implementation for POWER7.
- Copyright (C) 2014 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#undef EALIGN
-#define EALIGN(name, alignt, words) \
- .section ".text"; \
- ENTRY_2(__strcspn_power7) \
- .align ALIGNARG(alignt); \
- EALIGN_W_##words; \
- BODY_LABEL(__strcspn_power7): \
- cfi_startproc; \
- LOCALENTRY(__strcspn_power7)
-
-#undef END
-#define END(name) \
- cfi_endproc; \
- TRACEBACK(__strcspn_power7) \
- END_2(__strcspn_power7)
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#include <sysdeps/powerpc/powerpc64/power7/strcspn.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c
deleted file mode 100644
index 5f8b610..0000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright (C) 2014 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <string.h>
-
-#define STRCSPN __strcspn_ppc
-#ifdef SHARED
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
- __hidden_ver1 (__strcspn_ppc, __GI_strcspn, __strcspn_ppc);
-#endif
-
-extern __typeof (strcspn) __strcspn_ppc attribute_hidden;
-
-#include <string/strcspn.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn.c
deleted file mode 100644
index 156fa6a..0000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strcspn.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Multiple versions of strcspn. PowerPC64 version.
- Copyright (C) 2014 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-# include <string.h>
-# include <shlib-compat.h>
-# include "init-arch.h"
-
-extern __typeof (strcspn) __strcspn_ppc attribute_hidden;
-extern __typeof (strcspn) __strcspn_power7 attribute_hidden;
-
-libc_ifunc (strcspn,
- (hwcap & PPC_FEATURE_HAS_VSX)
- ? __strcspn_power7
- : __strcspn_ppc);
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/strcspn.S b/sysdeps/powerpc/powerpc64/power7/strcspn.S
deleted file mode 100644
index 3f6aa0a..0000000
--- a/sysdeps/powerpc/powerpc64/power7/strcspn.S
+++ /dev/null
@@ -1,139 +0,0 @@
-/* Optimized strcspn implementation for PowerPC64.
- Copyright (C) 2014 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* size_t [r3] strcspn (const char [r4] *s, const char [r5] *reject) */
-
- .machine power7
-EALIGN (strcspn, 4, 0)
- CALL_MCOUNT 3
-
- /* The idea to speed up the algorithm is to create a lookup table
- for fast check if input character should be considered. For ASCII
- or ISO-8859-X character sets it has 256 positions. */
- lbz r10,0(r4)
-
- /* First the table should be cleared and to avoid unaligned accesses
- when using the VSX stores the table address is aligned to 16
- bytes. */
- xxlxor v0,v0,v0
-
- /* PPC64 ELF ABI stack is aligned to 16 bytes. */
- addi r9,r1,-256
-
- li r8,48
- li r5,16
- li r6,32
- cmpdi cr7,r10,0 /* reject[0] == '\0' ? */
- addi r12,r9,64
- /* Clear the table with 0 values */
- stxvw4x v0,r0,r9
- addi r11,r9,128
- addi r7,r9,192
- stxvw4x v0,r9,r5
- stxvw4x v0,r9,r6
- stxvw4x v0,r9,r8
- stxvw4x v0,r0,r12
- stxvw4x v0,r12,r5
- stxvw4x v0,r12,r6
- stxvw4x v0,r12,r8
- stxvw4x v0,r0,r11
- stxvw4x v0,r11,r5
- stxvw4x v0,r11,r6
- stxvw4x v0,r11,r8
- stxvw4x v0,r0,r7
- stxvw4x v0,r7,r5
- stxvw4x v0,r7,r6
- stxvw4x v0,r7,r8
- li r8,1
- beq cr7,L(finish_table) /* If reject[0] == '\0' skip */
-
- /* Initialize the table as:
- for (i=0; reject[i]; i++
- table[reject[i]]] = 1 */
- .p2align 4,,15
-L(init_table):
- stbx r8,r9,r10
- lbzu r10,1(r4)
- cmpdi cr7,r10,0 /* If reject[0] == '\0' finish */
- bne cr7,L(init_table)
-L(finish_table):
- /* set table[0] = 1 */
- li r10,1
- stb r10,0(r9)
- li r10,0
- b L(mainloop)
-
- /* Unrool the loop 4 times and check using the table as:
- i = 0;
- while (1)
- {
- if (table[input[i++]] == 1)
- return i - 1;
- if (table[input[i++]] == 1)
- return i - 1;
- if (table[input[i++]] == 1)
- return i - 1;
- if (table[input[i++]] == 1)
- return i - 1;
- } */
- .p2align 4,,15
-L(unroll):
- lbz r8,1(r3)
- addi r10,r10,4
- lbzx r8,r9,r8
- cmpwi r7,r8,1
- beq cr7,L(end)
- lbz r8,2(r3)
- addi r3,r3,4
- lbzx r8,r9,r8
- cmpwi cr7,r8,1
- beq cr7,L(end2)
- lbz r8,3(r7)
- lbzx r8,r9,r8
- cmpwi cr7,r8,1
- beq cr7,L(end3)
-L(mainloop):
- lbz r8,0(r3)
- mr r7,r3
- addi r6,r10,1
- addi r4,r10,2
- addi r5,r10,3
- lbzx r8,r9,8
- cmpwi cr7,r8,1
- bne cr7,L(unroll)
- mr r3,r10
- blr
-
- .p2align 4,,15
-L(end):
- mr r3,r6
- blr
-
- .p2align 4,,15
-L(end2):
- mr r3,r4
- blr
-
- .p2align 4,,15
-L(end3):
- mr r3,r5
- blr
-END (strcspn)
-libc_hidden_builtin_def (strcspn)
diff --git a/sysdeps/powerpc/powerpc64/strcspn.S b/sysdeps/powerpc/powerpc64/strcspn.S
new file mode 100644
index 0000000..783803d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/strcspn.S
@@ -0,0 +1,128 @@
+/* Optimized strcspn implementation for PowerPC64.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* size_t [r3] strcspn (const char [r4] *s, const char [r5] *reject) */
+
+ .machine power7
+EALIGN (strcspn, 4, 0)
+ CALL_MCOUNT 3
+
+ /* The idea to speed up the algorithm is to create a lookup table
+ for fast check if input character should be considered. For ASCII
+ or ISO-8859-X character sets it has 256 positions. */
+
+ /* PPC64 ELF ABI stack is aligned to 16 bytes. */
+ addi r9,r1,-256
+ /* Clear the table with 0 values */
+ li r6, 0
+ li r8, 4
+ mtctr r8
+ mr r10, r9
+ .align 4
+L(zerohash):
+ std r6, 0(r10)
+ std r6, 8(r10)
+ std r6, 16(r10)
+ std r6, 24(r10)
+ std r6, 32(r10)
+ std r6, 40(r10)
+ std r6, 48(r10)
+ std r6, 56(r10)
+ addi r10, r10, 64
+ bdnz L(zerohash)
+
+ lbz r10,0(r4)
+ cmpdi cr7,r10,0 /* reject[0] == '\0' ? */
+ li r8,1
+ beq cr7,L(finish_table) /* If reject[0] == '\0' skip */
+
+ /* Initialize the table as:
+ for (i=0; reject[i]; i++
+ table[reject[i]]] = 1 */
+ .align 4
+L(init_table):
+ stbx r8,r9,r10
+ lbzu r10,1(r4)
+ cmpdi cr7,r10,0 /* If reject[0] == '\0' finish */
+ bne cr7,L(init_table)
+L(finish_table):
+ /* set table[0] = 1 */
+ li r10,1
+ stb r10,0(r9)
+ li r10,0
+ b L(mainloop)
+
+ /* Unrool the loop 4 times and check using the table as:
+ i = 0;
+ while (1)
+ {
+ if (table[input[i++]] == 1)
+ return i - 1;
+ if (table[input[i++]] == 1)
+ return i - 1;
+ if (table[input[i++]] == 1)
+ return i - 1;
+ if (table[input[i++]] == 1)
+ return i - 1;
+ } */
+ .p2align 4,,15
+L(unroll):
+ lbz r8,1(r3)
+ addi r10,r10,4
+ lbzx r8,r9,r8
+ cmpwi r7,r8,1
+ beq cr7,L(end)
+ lbz r8,2(r3)
+ addi r3,r3,4
+ lbzx r8,r9,r8
+ cmpwi cr7,r8,1
+ beq cr7,L(end2)
+ lbz r8,3(r7)
+ lbzx r8,r9,r8
+ cmpwi cr7,r8,1
+ beq cr7,L(end3)
+L(mainloop):
+ lbz r8,0(r3)
+ mr r7,r3
+ addi r6,r10,1
+ addi r4,r10,2
+ addi r5,r10,3
+ lbzx r8,r9,8
+ cmpwi cr7,r8,1
+ bne cr7,L(unroll)
+ mr r3,r10
+ blr
+
+ .p2align 4,,15
+L(end):
+ mr r3,r6
+ blr
+
+ .p2align 4,,15
+L(end2):
+ mr r3,r4
+ blr
+
+ .p2align 4,,15
+L(end3):
+ mr r3,r5
+ blr
+END (strcspn)
+libc_hidden_builtin_def (strcspn)