This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.24-545-g5e628dd


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  5e628dd118807981bdd880731e6fdf019f64a245 (commit)
       via  9314d3545e6641063b490918e2e8716556ba20db (commit)
      from  ffcf0f1cb77dd0d902495fd066a96784f76f2c3a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5e628dd118807981bdd880731e6fdf019f64a245

commit 5e628dd118807981bdd880731e6fdf019f64a245
Author: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
Date:   Wed Dec 28 11:38:56 2016 -0200

    powerpc: Fix powerpc32/power7 memchr for large input sizes
    
    The same error fixed in commit b224637928e9fc04e3cef3e10d02ccf042d01584
    happens in the 32-bit implementation of memchr for power7.
    
    This patch adopts the same solution, with a minimal change: it
    implements a saturated addition where overflows sets the maximum pointer
    size to UINTPTR_MAX.

diff --git a/ChangeLog b/ChangeLog
index d9b2c98..1ad6867 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-28  Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/powerpc32/power7/memchr.S (__memchr): Avoid an
+	overflow in pointer addition.
+
 2016-12-28  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
 
 	* sysdeps/powerpc/powerpc64/multiarch/Makefile
diff --git a/sysdeps/powerpc/powerpc32/power7/memchr.S b/sysdeps/powerpc/powerpc32/power7/memchr.S
index 318168b..5ce2bb0 100644
--- a/sysdeps/powerpc/powerpc32/power7/memchr.S
+++ b/sysdeps/powerpc/powerpc32/power7/memchr.S
@@ -26,7 +26,16 @@ ENTRY (__memchr)
 	dcbt	0,r3
 	clrrwi  r8,r3,2
 	insrwi	r4,r4,8,16    /* Replicate byte to word.  */
-	add	r7,r3,r5      /* Calculate the last acceptable address.  */
+
+	/* Calculate the last acceptable address and check for possible
+	   addition overflow by using satured math:
+	   r7 = r3 + r5
+	   r7 |= -(r7 < x)  */
+	add     r7,r3,r5
+	subfc   r6,r3,r7
+	subfe   r9,r9,r9
+	or      r7,r7,r9
+
 	insrwi	r4,r4,16,0
 	cmplwi	r5,16
 	li	r9, -1

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9314d3545e6641063b490918e2e8716556ba20db

commit 9314d3545e6641063b490918e2e8716556ba20db
Author: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Date:   Tue Dec 27 17:48:37 2016 -0200

    powerpc64: strchr/strchrnul optimization for power8
    
    The P7 code is used for <=32B strings and for > 32B vectorized loops are used.
    This shows as an average 25% improvement depending on the position of search
    character.  The performance is same for shorter strings.
    Tested on ppc64 and ppc64le.

diff --git a/ChangeLog b/ChangeLog
index b794cac..d9b2c98 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2016-12-28  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/multiarch/Makefile
+	(sysdep_routines): Add strchr-power8 and strchrnul_power8.
+	* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+	(strchr): Add __strchr_power8 to list of strchr functions.
+	(strchrnul): Add __strchrnul_power8 to list of strchr functions.
+	* sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S: New file.
+	* sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S: New file.
+	* sysdeps/powerpc/powerpc64/multiarch/strchr.c
+	(strchr): Add __strchr_power8 to ifunc list.
+	* sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
+	(__strchrnul): Add __strchrnul_power8 to ifunc list.
+	* sysdeps/powerpc/powerpc64/power8/strchr.S: New file.
+	* sysdeps/powerpc/powerpc64/power8/strchrnul.S: New file.
+
 2016-12-28  Florian Weimer  <fweimer@redhat.com>
 
 	* support/Makefile (libsupport-routines): Add
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 2997b9d..f5889a3 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -10,8 +10,8 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
 		   strncase-power7 strncase_l-power7 \
 		   strncmp-power9 strncmp-power8 strncmp-power7 \
 		   strncmp-power4 strncmp-ppc64 \
-		   strchr-power7 strchr-ppc64 \
-		   strchrnul-power7 strchrnul-ppc64 \
+		   strchr-power8 strchr-power7 strchr-ppc64 \
+		   strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \
 		   strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \
 		   stpcpy-power7 stpcpy-ppc64 \
 		   strrchr-power7 strrchr-ppc64 strncat-power7 strncat-ppc64 \
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 2d085a2..703a49b 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -124,6 +124,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/strchr.c.  */
   IFUNC_IMPL (i, name, strchr,
 	      IFUNC_IMPL_ADD (array, i, strchr,
+			      hwcap2 & PPC_FEATURE2_ARCH_2_07,
+			      __strchr_power8)
+	      IFUNC_IMPL_ADD (array, i, strchr,
 			      hwcap & PPC_FEATURE_HAS_VSX,
 			      __strchr_power7)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1,
@@ -132,6 +135,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/strchrnul.c.  */
   IFUNC_IMPL (i, name, strchrnul,
 	      IFUNC_IMPL_ADD (array, i, strchrnul,
+			      hwcap2 & PPC_FEATURE2_ARCH_2_07,
+			      __strchrnul_power8)
+	      IFUNC_IMPL_ADD (array, i, strchrnul,
 			      hwcap & PPC_FEATURE_HAS_VSX,
 			      __strchrnul_power7)
 	      IFUNC_IMPL_ADD (array, i, strchrnul, 1,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c b/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S
similarity index 54%
copy from sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
copy to sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S
index 682aa0f..dd0b7f5 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S
@@ -1,5 +1,5 @@
-/* Multiple versions of strchrnul.
-   Copyright (C) 2013-2016 Free Software Foundation, Inc.
+/* Optimized strchr implementation for POWER8.
+   Copyright (C) 2016 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,22 +16,24 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if IS_IN (libc)
-# include <string.h>
-# include <shlib-compat.h>
-# include "init-arch.h"
-
-extern __typeof (__strchrnul) __strchrnul_ppc attribute_hidden;
-extern __typeof (__strchrnul) __strchrnul_power7 attribute_hidden;
-
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
-   ifunc symbol properly.  */
-libc_ifunc (__strchrnul,
-	    (hwcap & PPC_FEATURE_HAS_VSX)
-            ? __strchrnul_power7
-            : __strchrnul_ppc);
-
-weak_alias (__strchrnul, strchrnul)
-#else
-#include <string/strchrnul.c>
-#endif
+#include <sysdep.h>
+
+#undef ENTRY
+#define ENTRY(name)						\
+  .section ".text";						\
+  ENTRY_2(__strchr_power8)					\
+  .align ALIGNARG(2);						\
+  BODY_LABEL(__strchr_power8):					\
+  cfi_startproc;						\
+  LOCALENTRY(__strchr_power8)
+
+#undef END
+#define END(name)						\
+  cfi_endproc;							\
+  TRACEBACK(__strchr_power8)					\
+  END_2(__strchr_power8)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power8/strchr.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strchr.c b/sysdeps/powerpc/powerpc64/multiarch/strchr.c
index e24d6b3..2ffb1f6 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strchr.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strchr.c
@@ -27,11 +27,14 @@
 
 extern __typeof (strchr) __strchr_ppc attribute_hidden;
 extern __typeof (strchr) __strchr_power7 attribute_hidden;
+extern __typeof (strchr) __strchr_power8 attribute_hidden;
 # undef strchr
 
 /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
    ifunc symbol properly.  */
 libc_ifunc_redirected (__redirect_strchr, strchr,
+		       (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+		       ? __strchr_power8 :
 		       (hwcap & PPC_FEATURE_HAS_VSX)
 		       ? __strchr_power7
 		       : __strchr_ppc);
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c b/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S
similarity index 54%
copy from sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
copy to sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S
index 682aa0f..d0bfeda 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S
@@ -1,5 +1,5 @@
-/* Multiple versions of strchrnul.
-   Copyright (C) 2013-2016 Free Software Foundation, Inc.
+/* Optimized strchrnul implementation for POWER8.
+   Copyright (C) 2016 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,22 +16,24 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if IS_IN (libc)
-# include <string.h>
-# include <shlib-compat.h>
-# include "init-arch.h"
-
-extern __typeof (__strchrnul) __strchrnul_ppc attribute_hidden;
-extern __typeof (__strchrnul) __strchrnul_power7 attribute_hidden;
-
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
-   ifunc symbol properly.  */
-libc_ifunc (__strchrnul,
-	    (hwcap & PPC_FEATURE_HAS_VSX)
-            ? __strchrnul_power7
-            : __strchrnul_ppc);
-
-weak_alias (__strchrnul, strchrnul)
-#else
-#include <string/strchrnul.c>
-#endif
+#include <sysdep.h>
+
+#undef ENTRY
+#define ENTRY(name)						\
+  .section ".text";						\
+  ENTRY_2(__strchrnul_power8)					\
+  .align ALIGNARG(2);						\
+  BODY_LABEL(__strchrnul_power8):				\
+  cfi_startproc;						\
+  LOCALENTRY(__strchrnul_power8)
+
+#undef END
+#define END(name)						\
+  cfi_endproc;							\
+  TRACEBACK(__strchrnul_power8)					\
+  END_2(__strchrnul_power8)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power8/strchrnul.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c b/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
index 682aa0f..63df401 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
@@ -23,10 +23,13 @@
 
 extern __typeof (__strchrnul) __strchrnul_ppc attribute_hidden;
 extern __typeof (__strchrnul) __strchrnul_power7 attribute_hidden;
+extern __typeof (__strchrnul) __strchrnul_power8 attribute_hidden;
 
 /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
    ifunc symbol properly.  */
 libc_ifunc (__strchrnul,
+	    (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+	    ? __strchrnul_power8 :
 	    (hwcap & PPC_FEATURE_HAS_VSX)
             ? __strchrnul_power7
             : __strchrnul_ppc);
diff --git a/sysdeps/powerpc/powerpc64/power8/strchr.S b/sysdeps/powerpc/powerpc64/power8/strchr.S
new file mode 100644
index 0000000..331d0a6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power8/strchr.S
@@ -0,0 +1,368 @@
+/* Optimized strchr implementation for PowerPC64/POWER8.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#ifdef USE_AS_STRCHRNUL
+# define FUNC_NAME __strchrnul
+#else
+# define FUNC_NAME strchr
+#endif
+/* int [r3] strchr (char *s [r3], int c [r4])  */
+/* TODO: change these to the actual instructions when the minimum required
+   binutils allows it.  */
+#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
+#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
+#define VBPERMQ(t,a,b)  .long (0x1000054c \
+			| ((t)<<(32-11)) \
+			| ((a)<<(32-16)) \
+			| ((b)<<(32-21)) )
+/* TODO: change this to .machine power8 when the minimum required binutils
+   allows it.  */
+	.machine  power7
+ENTRY (FUNC_NAME)
+	CALL_MCOUNT 2
+	dcbt	0,r3
+	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
+	cmpdi	cr7,r4,0
+	ld	r12,0(r8)     /* Load doubleword from memory.  */
+	li	r0,0	      /* Doubleword with null chars to use
+				 with cmpb.  */
+
+	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
+
+	beq	cr7,L(null_match)
+
+	/* Replicate byte to doubleword.  */
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
+	insrdi  r4,r4,32,0
+
+	/* Now r4 has a doubleword of c bytes and r0 has
+	   a doubleword of null bytes.  */
+
+	cmpb	r10,r12,r4     /* Compare each byte against c byte.  */
+	cmpb	r11,r12,r0     /* Compare each byte against null byte.  */
+
+	/* Move the doublewords left and right to discard the bits that are
+	   not part of the string and bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	srd	r11,r11,r6
+	sld	r10,r10,r6
+	sld	r11,r11,r6
+#else
+	sld	r10,r10,r6
+	sld	r11,r11,r6
+	srd	r10,r10,r6
+	srd	r11,r11,r6
+#endif
+	or	r5,r10,r11    /* OR the results to speed things up.  */
+	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
+				 have been found.  */
+	bne	cr7,L(done)
+
+	mtcrf   0x01,r8
+
+	/* Are we now aligned to a doubleword boundary?  If so, skip to
+	   the main loop.  Otherwise, go through the alignment code.  */
+
+	bt	28,L(loop)
+
+	/* Handle WORD2 of pair.  */
+	ldu	r12,8(r8)
+	cmpb    r10,r12,r4
+	cmpb	r11,r12,r0
+	or	r5,r10,r11
+	cmpdi	cr7,r5,0
+	bne	cr7,L(done)
+	b	L(loop)	      /* We branch here (rather than falling through)
+				 to skip the nops due to heavy alignment
+				 of the loop below.  */
+
+	.p2align  5
+L(loop):
+	/* Load two doublewords, compare and merge in a
+	   single register for speed.  This is an attempt
+	   to speed up the null-checking process for bigger strings.  */
+	ld	r12,8(r8)
+	ldu	r9,16(r8)
+	cmpb	r10,r12,r4
+	cmpb	r11,r12,r0
+	cmpb	r6,r9,r4
+	cmpb	r7,r9,r0
+	or	r5,r10,r11
+	or	r9,r6,r7
+	or	r12,r5,r9
+	cmpdi	cr7,r12,0
+	beq	cr7,L(vector)
+	/* OK, one (or both) of the doublewords contains a c/null byte.  Check
+	   the first doubleword and decrement the address in case the first
+	   doubleword really contains a c/null byte.  */
+
+	cmpdi	cr6,r5,0
+	addi	r8,r8,-8
+	bne	cr6,L(done)
+
+	/* The c/null byte must be in the second doubleword.  Adjust the
+	   address again and move the result of cmpb to r10 so we can calculate
+	   the pointer.  */
+
+	mr	r10,r6
+	mr	r11,r7
+	addi	r8,r8,8
+#ifdef USE_AS_STRCHRNUL
+	mr	r5, r9
+#endif
+	/* r10/r11 have the output of the cmpb instructions, that is,
+	   0xff in the same position as the c/null byte in the original
+	   doubleword from the string.  Use that to calculate the pointer.  */
+L(done):
+#ifdef USE_AS_STRCHRNUL
+	mr	r10, r5
+#endif
+#ifdef __LITTLE_ENDIAN__
+	addi    r3,r10,-1
+	andc    r3,r3,r10
+	popcntd	r0,r3
+# ifndef USE_AS_STRCHRNUL
+	addi    r4,r11,-1
+	andc    r4,r4,r11
+	cmpld	cr7,r3,r4
+	bgt	cr7,L(no_match)
+# endif
+#else
+	cntlzd	r0,r10	      /* Count leading zeros before c matches.  */
+# ifndef USE_AS_STRCHRNUL
+	cmpld	cr7,r11,r10
+	bgt	cr7,L(no_match)
+# endif
+#endif
+	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
+	add	r3,r8,r0      /* Return address of the matching c byte
+				 or null in case c was not found.  */
+	blr
+
+	/* Check the first 32B in GPR's and move to vectorized loop.  */
+	.p2align  5
+L(vector):
+	addi	r3, r8, 8
+	andi.	r10, r3, 31
+	bne	cr0, L(loop)
+	vspltisb	v0, 0
+	/* Precompute vbpermq constant.  */
+	vspltisb	v10, 3
+	lvsl	v11, r0, r0
+	vslb	v10, v11, v10
+	MTVRD(v1,r4)
+	li	r5, 16
+	vspltb	v1, v1, 7
+	/* Compare 32 bytes in each loop.  */
+L(continue):
+	lvx	v4, 0, r3
+	lvx	v5, r3, r5
+	vcmpequb	v2, v0, v4
+	vcmpequb	v3, v0, v5
+	vcmpequb	v6, v1, v4
+	vcmpequb	v7, v1, v5
+	vor	v8, v2, v3
+	vor	v9, v6, v7
+	vor	v11, v8, v9
+	vcmpequb.	v11, v0, v11
+	addi	r3, r3, 32
+	blt	cr6, L(continue)
+	/* One (or both) of the quadwords contains a c/null byte.  */
+	addi	r3, r3, -32
+#ifndef USE_AS_STRCHRNUL
+	vcmpequb.	v11, v0, v9
+	blt	cr6, L(no_match)
+#endif
+	/* Permute the first bit of each byte into bits 48-63.  */
+	VBPERMQ(v2, v2, v10)
+	VBPERMQ(v3, v3, v10)
+	VBPERMQ(v6, v6, v10)
+	VBPERMQ(v7, v7, v10)
+	/* Shift each component into its correct position for merging.  */
+#ifdef __LITTLE_ENDIAN__
+	vsldoi	v3, v3, v3, 2
+	vsldoi	v7, v7, v7, 2
+#else
+	vsldoi	v2, v2, v2, 6
+	vsldoi	v3, v3, v3, 4
+	vsldoi	v6, v6, v6, 6
+	vsldoi	v7, v7, v7, 4
+#endif
+
+        /* Merge the results and move to a GPR.  */
+        vor     v1, v3, v2
+        vor     v2, v6, v7
+        vor     v4, v1, v2
+	MFVRD(r5, v4)
+#ifdef __LITTLE_ENDIAN__
+	addi	r6, r5, -1
+	andc	r6, r6, r5
+	popcntd	r6, r6
+#else
+	cntlzd	r6, r5	/* Count leading zeros before the match.  */
+#endif
+	add	r3, r3, r6	/* Compute final length.  */
+	/* Return NULL if null found before c.  */
+#ifndef USE_AS_STRCHRNUL
+	lbz	r4, 0(r3)
+	cmpdi	cr7, r4, 0
+	beq	cr7, L(no_match)
+#endif
+	blr
+
+#ifndef USE_AS_STRCHRNUL
+	.align	4
+L(no_match):
+	li	r3,0
+	blr
+#endif
+
+/* We are here because strchr was called with a null byte.  */
+	.align	4
+L(null_match):
+	/* r0 has a doubleword of null bytes.  */
+
+	cmpb	r5,r12,r0     /* Compare each byte against null bytes.  */
+
+	/* Move the doublewords left and right to discard the bits that are
+	   not part of the string and bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r5,r5,r6
+	sld	r5,r5,r6
+#else
+	sld	r5,r5,r6
+	srd	r5,r5,r6
+#endif
+	cmpdi	cr7,r5,0      /* If r10 == 0, no c or null bytes
+				 have been found.  */
+	bne	cr7,L(done_null)
+
+	mtcrf   0x01,r8
+
+	/* Are we now aligned to a quadword boundary?  If so, skip to
+	   the main loop.  Otherwise, go through the alignment code.  */
+
+	bt	28,L(loop_null)
+
+	/* Handle WORD2 of pair.  */
+	ldu	r12,8(r8)
+	cmpb    r5,r12,r0
+	cmpdi	cr7,r5,0
+	bne	cr7,L(done_null)
+	b	L(loop_null)  /* We branch here (rather than falling through)
+				 to skip the nops due to heavy alignment
+				 of the loop below.  */
+
+	/* Main loop to look for the end of the string.  Since it's a
+	   small loop (< 8 instructions), align it to 32-bytes.  */
+	.p2align  5
+L(loop_null):
+	/* Load two doublewords, compare and merge in a
+	   single register for speed.  This is an attempt
+	   to speed up the null-checking process for bigger strings.  */
+	ld	r12,8(r8)
+	ldu     r11,16(r8)
+	cmpb	r5,r12,r0
+	cmpb	r10,r11,r0
+	or	r6,r5,r10
+	cmpdi	cr7,r6,0
+	beq	cr7,L(vector1)
+
+	/* OK, one (or both) of the doublewords contains a null byte.  Check
+	   the first doubleword and decrement the address in case the first
+	   doubleword really contains a null byte.  */
+
+	cmpdi	cr6,r5,0
+	addi	r8,r8,-8
+	bne	cr6,L(done_null)
+
+	/* The null byte must be in the second doubleword.  Adjust the address
+	   again and move the result of cmpb to r10 so we can calculate the
+	   pointer.  */
+
+	mr	r5,r10
+	addi	r8,r8,8
+
+	/* r5 has the output of the cmpb instruction, that is, it contains
+	   0xff in the same position as the null byte in the original
+	   doubleword from the string.  Use that to calculate the pointer.  */
+L(done_null):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntd	r0,r0
+#else
+	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
+#endif
+	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
+	add	r3,r8,r0      /* Return address of the matching null byte.  */
+	blr
+	.p2align  5
+L(vector1):
+	addi    r3, r8, 8
+	andi.	r10, r3, 31
+	bne	cr0, L(loop_null)
+	vspltisb	v8, -1
+	vspltisb	v0, 0
+	vspltisb	v10, 3
+	lvsl	v11, r0, r0
+	vslb	v10, v11, v10
+	li	r5, 16
+L(continue1):
+	lvx	v4, 0, r3
+	lvx	v5, r3, r5
+	vcmpequb	v2, v0, v4
+	vcmpequb	v3, v0, v5
+	vor	v8, v2, v3
+	vcmpequb.	v11, v0, v8
+	addi	r3, r3, 32
+	blt	cr6, L(continue1)
+	addi	r3, r3, -32
+L(end1):
+	VBPERMQ(v2, v2, v10)
+	VBPERMQ(v3, v3, v10)
+	/* Shift each component into its correct position for merging.  */
+#ifdef __LITTLE_ENDIAN__
+	vsldoi	v3, v3, v3, 2
+#else
+	vsldoi	v2, v2, v2, 6
+	vsldoi	v3, v3, v3, 4
+#endif
+
+        /* Merge the results and move to a GPR.  */
+        vor     v4, v3, v2
+	MFVRD(r5, v4)
+#ifdef __LITTLE_ENDIAN__
+	addi	r6, r5, -1
+	andc	r6, r6, r5
+	popcntd	r6, r6
+#else
+	cntlzd	r6, r5	/* Count leading zeros before the match.  */
+#endif
+	add	r3, r3, r6	/* Compute final length.  */
+	blr
+END (FUNC_NAME)
+
+#ifndef USE_AS_STRCHRNUL
+weak_alias (strchr, index)
+libc_hidden_builtin_def (strchr)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c b/sysdeps/powerpc/powerpc64/power8/strchrnul.S
similarity index 54%
copy from sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
copy to sysdeps/powerpc/powerpc64/power8/strchrnul.S
index 682aa0f..0229496 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c
+++ b/sysdeps/powerpc/powerpc64/power8/strchrnul.S
@@ -1,5 +1,5 @@
-/* Multiple versions of strchrnul.
-   Copyright (C) 2013-2016 Free Software Foundation, Inc.
+/* Optimized strchrnul implementation for PowerPC64/POWER8.
+   Copyright (C) 2016 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,22 +16,8 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if IS_IN (libc)
-# include <string.h>
-# include <shlib-compat.h>
-# include "init-arch.h"
+#define USE_AS_STRCHRNUL 1
+#include <sysdeps/powerpc/powerpc64/power8/strchr.S>
 
-extern __typeof (__strchrnul) __strchrnul_ppc attribute_hidden;
-extern __typeof (__strchrnul) __strchrnul_power7 attribute_hidden;
-
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
-   ifunc symbol properly.  */
-libc_ifunc (__strchrnul,
-	    (hwcap & PPC_FEATURE_HAS_VSX)
-            ? __strchrnul_power7
-            : __strchrnul_ppc);
-
-weak_alias (__strchrnul, strchrnul)
-#else
-#include <string/strchrnul.c>
-#endif
+weak_alias (__strchrnul,strchrnul)
+libc_hidden_builtin_def (__strchrnul)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                          |   21 ++
 sysdeps/powerpc/powerpc32/power7/memchr.S          |   11 +-
 sysdeps/powerpc/powerpc64/multiarch/Makefile       |    4 +-
 .../powerpc/powerpc64/multiarch/ifunc-impl-list.c  |    6 +
 .../powerpc/powerpc64/multiarch/strchr-power8.S    |   39 ++
 sysdeps/powerpc/powerpc64/multiarch/strchr.c       |    3 +
 .../powerpc/powerpc64/multiarch/strchrnul-power8.S |   39 ++
 sysdeps/powerpc/powerpc64/multiarch/strchrnul.c    |    3 +
 sysdeps/powerpc/powerpc64/power8/strchr.S          |  368 ++++++++++++++++++++
 sysdeps/powerpc/powerpc64/power8/strchrnul.S       |   23 ++
 10 files changed, 514 insertions(+), 3 deletions(-)
 create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S
 create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S
 create mode 100644 sysdeps/powerpc/powerpc64/power8/strchr.S
 create mode 100644 sysdeps/powerpc/powerpc64/power8/strchrnul.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]