This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 3/3] powerpc: Use default st{r,p}cpy optimization for POWER7


Following the discussion with Ondrej and recent changes to default
st{r,á}cpy algorithm, this patches uses it for both powerpc64 and
powerpc64/power7 instead of optimized ones (which will be removed).
This is faster in all but few inputs (mostly with very short sizes) 
for benchtests.

It removes the default powerpc64 st{r,p}cpy and uses the same
optimization, since powerpc64 optimized algorithm only uses a
slight optimized path for both doubleword aligned source and
destiny and resorting to byte-per-byte access to unaligned inputs.

Checked on powerpc64le and compared bench output in attachments.

--

	* string/stpcpy.c (__stpcpy): Use STPCPY to redefine symbol name and
	cleanup macro usage.
	* string/strcpy.c (strcpt): Use STRCPY to redefine symbol name.
	* sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.S: Remove file.
	* sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.S: Likewise.
	* sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.S: Likewise.
	* sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.S: Likewise.
	* sysdeps/powerpc/powerpc64/power7/stpcpy.S: Likewise.
	* sysdeps/powerpc/powerpc64/power7/strcpy.S: Likewise.
	* sysdeps/powerpc/powerpc64/power7/strcpy.c: Likewise.
	* sysdeps/powerpc/powerpc64/stpcpy.S: Likewise.
	* sysdeps/powerpc/powerpc64/strcpy.S: Likewise.
	* sysdeps/powerpc/powerpc64/multiarch/stpcpy.c
	[SHARED && IS_IN (libc)]: Include <string/strcpy.c>.
	* sysdeps/powerpc/powerpc64/multiarch/stpcpy.c
	[SHARED && IS_IN (libc)]: Include <string/stpcpy.c>.
	* sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c: New file.
	* sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c: Likewise.
	* sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c: Likewise.
	* sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c: Likewise.
	* sysdeps/powerpc/powerpc64/power7/strcpy.c: Likewise.

--

diff --git a/string/stpcpy.c b/string/stpcpy.c
index ecfb8d9..c783fbd 100644
--- a/string/stpcpy.c
+++ b/string/stpcpy.c
@@ -25,25 +25,17 @@
 #undef __stpcpy
 #undef stpcpy
 
-#ifndef weak_alias
-# define __stpcpy stpcpy
+#ifndef STPCPY
+# define STPCPY __stpcpy
 #endif
 
 /* Copy SRC to DEST, returning the address of the terminating '\0' in DEST.  */
 char *
-__stpcpy (dest, src)
-     char *dest;
-     const char *src;
+STPCPY (char *dest, const char *src)
 {
   size_t len = strlen (src);
   return memcpy (dest, src, len + 1) + len;
 }
-#ifdef libc_hidden_def
-libc_hidden_def (__stpcpy)
-#endif
-#ifdef weak_alias
 weak_alias (__stpcpy, stpcpy)
-#endif
-#ifdef libc_hidden_builtin_def
+libc_hidden_def (__stpcpy)
 libc_hidden_builtin_def (stpcpy)
-#endif
diff --git a/string/strcpy.c b/string/strcpy.c
index 3ebb51b..c227462 100644
--- a/string/strcpy.c
+++ b/string/strcpy.c
@@ -20,9 +20,13 @@
 
 #undef strcpy
 
+#ifndef STRCPY
+# define STRCPY strcpy
+#endif
+
 /* Copy SRC to DEST.  */
 char *
-strcpy (char *dest, const char *src)
+STRCPY (char *dest, const char *src)
 {
   return memcpy (dest, src, strlen (src) + 1);
 }
diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.S b/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.S
deleted file mode 100644
index 0943611..0000000
--- a/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Optimized stpcpy implementation for POWER7.
-   Copyright (C) 2013-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#undef EALIGN
-#define EALIGN(name, alignt, words)				\
-  .section ".text";						\
-  ENTRY_2(__stpcpy_power7)					\
-  .align ALIGNARG(alignt);					\
-  EALIGN_W_##words;						\
-  BODY_LABEL(__stpcpy_power7):					\
-  cfi_startproc;						\
-  LOCALENTRY(__stpcpy_power7)
-
-#undef END
-#define END(name)						\
-  cfi_endproc;							\
-  TRACEBACK(__stpcpy_power7)					\
-  END_2(__stpcpy_power7)
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#include <sysdeps/powerpc/powerpc64/power7/stpcpy.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c b/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c
new file mode 100644
index 0000000..6362066
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c
@@ -0,0 +1,36 @@
+/* Multiarch stpcpy for POWER7/PPC64.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+extern __typeof (memcpy) __memcpy_power7 attribute_hidden;
+extern __typeof (strlen) __strlen_power7 attribute_hidden;
+extern __typeof (stpcpy) __stpcpy_power7 attribute_hidden;
+
+#define STPCPY __stpcpy_power7
+#define memcpy __memcpy_power7
+#define strlen __strlen_power7
+
+#undef libc_hidden_def
+#define libc_hidden_def(name)
+#undef weak_alias
+#define weak_alias(name, alias)
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <string/stpcpy.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.S
deleted file mode 100644
index 858f885..0000000
--- a/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Default stpcpy implementation for PowerPC64.
-   Copyright (C) 2013-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#if defined SHARED && IS_IN (libc)
-# undef EALIGN
-# define EALIGN(name, alignt, words)				\
-  .section ".text";						\
-  ENTRY_2(__stpcpy_ppc)						\
-  .align ALIGNARG(alignt);					\
-  EALIGN_W_##words;						\
-  BODY_LABEL(__stpcpy_ppc):					\
-  cfi_startproc;						\
-  LOCALENTRY(__stpcpy_ppc)
-
-# undef END
-# define END(name)						\
-  cfi_endproc;							\
-  TRACEBACK(__stpcpy_ppc)					\
-  END_2(__stpcpy_ppc)
-
-# undef weak_alias
-# define weak_alias(name, alias)
-# undef libc_hidden_def
-# define libc_hidden_def(name)
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)				\
-    .globl __GI___stpcpy; __GI___stpcpy = __stpcpy_ppc
-#endif
-
-#include <sysdeps/powerpc/powerpc64/stpcpy.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c
new file mode 100644
index 0000000..6e437fb
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c
@@ -0,0 +1,39 @@
+/* Multiarch stpcpy for PPC64.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+extern __typeof (memcpy) __memcpy_ppc attribute_hidden;
+extern __typeof (strlen) __strlen_ppc attribute_hidden;
+extern __typeof (stpcpy) __stpcpy_ppc attribute_hidden;
+
+#define STPCPY __stpcpy_ppc
+#define memcpy __memcpy_ppc
+#define strlen __strlen_ppc
+
+#undef weak_alias
+#define weak_alias(name, aliasname) \
+  extern __typeof (__stpcpy_ppc) aliasname \
+    __attribute__ ((weak, alias ("__stpcpy_ppc")));
+
+#undef libc_hidden_def
+#define libc_hidden_def(name)
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <string/stpcpy.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c b/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c
index a5e1c03..c809f99 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c
@@ -32,4 +32,6 @@ libc_ifunc (__stpcpy,
 
 weak_alias (__stpcpy, stpcpy)
 libc_hidden_def (stpcpy)
+#else
+# include <string/stpcpy.c>
 #endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.S
deleted file mode 100644
index 69851bb..0000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Optimized strcpy implementation for POWER7.
-   Copyright (C) 2013-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#undef EALIGN
-#define EALIGN(name, alignt, words)				\
-  .section ".text";						\
-  ENTRY_2(__strcpy_power7)					\
-  .align ALIGNARG(alignt);					\
-  EALIGN_W_##words;						\
-  BODY_LABEL(__strcpy_power7):					\
-  cfi_startproc;						\
-  LOCALENTRY(__strcpy_power7)
-
-#undef END
-#define END(name)						\
-  cfi_endproc;							\
-  TRACEBACK(__strcpy_power7)					\
-  END_2(__strcpy_power7)
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#include <sysdeps/powerpc/powerpc64/power7/strcpy.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c
new file mode 100644
index 0000000..9f091d2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c
@@ -0,0 +1,32 @@
+/* Multiarch strcpy for POWER7/PPC64.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+extern __typeof (memcpy) __memcpy_power7 attribute_hidden;
+extern __typeof (strlen) __strlen_power7 attribute_hidden;
+extern __typeof (strcpy) __strcpy_power7 attribute_hidden;
+
+#define STRCPY __strcpy_power7
+#define memcpy __memcpy_power7
+#define strlen __strlen_power7
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <string/strcpy.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.S
deleted file mode 100644
index f937555..0000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Default strcpy implementation for PowerPC64.
-   Copyright (C) 2013-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#if defined SHARED && IS_IN (libc)
-# undef EALIGN
-# define EALIGN(name, alignt, words)				\
-  .section ".text";						\
-  ENTRY_2(__strcpy_ppc)						\
-  .align ALIGNARG(alignt);					\
-  EALIGN_W_##words;						\
-  BODY_LABEL(__strcpy_ppc):					\
-  cfi_startproc;						\
-  LOCALENTRY(__strcpy_ppc)
-
-# undef END
-# define END(name)						\
-  cfi_endproc;							\
-  TRACEBACK(__strcpy_ppc)					\
-  END_2(__strcpy_ppc)
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)				\
-    .globl __GI_strcpy; __GI_strcpy = __strcpy_ppc
-#endif
-
-#include <sysdeps/powerpc/powerpc64/strcpy.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c
new file mode 100644
index 0000000..b722030
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c
@@ -0,0 +1,35 @@
+/* Multiarch strcpy for PPC64.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+#if defined SHARED && IS_IN (libc)
+extern __typeof (memcpy) __memcpy_ppc attribute_hidden;
+extern __typeof (strlen) __strlen_ppc attribute_hidden;
+extern __typeof (strcpy) __strcpy_ppc attribute_hidden;
+
+# define STRCPY __strcpy_ppc
+# define memcpy __memcpy_ppc
+# define strlen __strlen_ppc
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+  __hidden_ver1 (__strcpy_ppc, __GI_strcpy, __strcpy_ppc);
+#endif
+
+#include <string/strcpy.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/stpcpy.S b/sysdeps/powerpc/powerpc64/power7/stpcpy.S
deleted file mode 100644
index ef90142..0000000
--- a/sysdeps/powerpc/powerpc64/power7/stpcpy.S
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Optimized stpcpy implementation for PowerPC64/POWER7.
-   Copyright (C) 2013-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define USE_AS_STPCPY
-#include <sysdeps/powerpc/powerpc64/power7/strcpy.S>
-
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
diff --git a/sysdeps/powerpc/powerpc64/power7/strcpy.S b/sysdeps/powerpc/powerpc64/power7/strcpy.S
deleted file mode 100644
index 70f2987..0000000
--- a/sysdeps/powerpc/powerpc64/power7/strcpy.S
+++ /dev/null
@@ -1,437 +0,0 @@
-/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7.
-   Copyright (C) 2013-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* Implements the function
-
-   char * [r3] strcpy (char *dest [r3], const char *src [r4])
-
-   or
-
-   char * [r3] strcpy (char *dest [r3], const char *src [r4])
-
-   if USE_AS_STPCPY is defined. It tries to use aligned memory accesses
-   when possible using the following algorithm:
-
-   if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0))
-     goto aligned_doubleword_copy;
-   if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL))
-     goto same_alignment;
-   goto unaligned;
-
-   The aligned comparison are made using cmpb instructions.  */
-
-#ifdef USE_AS_STPCPY
-# define FUNC_NAME __stpcpy
-#else
-# define FUNC_NAME strcpy
-#endif
-
-	.machine  power7
-EALIGN (FUNC_NAME, 4, 0)
-	CALL_MCOUNT 2
-
-#define rTMP	r0
-#ifdef USE_AS_STPCPY
-#define rRTN	r3	/* pointer to previous word/doubleword in dest */
-#else
-#define rRTN	r12	/* pointer to previous word/doubleword in dest */
-#endif
-#define rSRC	r4	/* pointer to previous word/doubleword in src */
-#define rMASK	r5	/* mask 0xffffffff | 0xffffffffffffffff */
-#define rWORD	r6	/* current word from src */
-#define rALT	r7	/* alternate word from src */
-#define rRTNAL	r8	/* alignment of return pointer */
-#define rSRCAL	r9	/* alignment of source pointer */
-#define rALCNT	r10	/* bytes to read to reach 8 bytes alignment */
-#define rSUBAL	r11	/* doubleword minus unaligned displacement */
-
-#ifndef USE_AS_STPCPY
-/* Save the dst pointer to use as return value.  */
-	mr	rRTN, r3
-#endif
-	or	rTMP, rSRC, rRTN
-	clrldi.	rTMP, rTMP, 61
-	bne	L(check_alignment)
-	b	L(aligned_doubleword_copy)
-
-	.align 4
-L(check_alignment):
-	rldicl	rRTNAL, rRTN, 0, 61
-	rldicl	rSRCAL, rSRC, 0, 61
-	cmpld	cr7, rSRCAL, rRTNAL
-	beq	cr7, L(same_alignment)
-	b	L(unaligned)
-
-	.align 4
-L(same_alignment):
-/* Src and dst with same alignment: align both to doubleword.  */
-	mr	rALCNT, rRTN
-	lbz	rWORD, 0(rSRC)
-	subfic	rSUBAL, rRTNAL, 8
-	addi	rRTN, rRTN, 1
-	addi	rSRC, rSRC, 1
-	cmpdi	cr7, rWORD, 0
-	stb	rWORD, 0(rALCNT)
-	beq	cr7, L(s2)
-
-	add	rALCNT, rALCNT, rSUBAL
-	subf	rALCNT, rRTN, rALCNT
-	addi	rALCNT, rALCNT, 1
-	mtctr	rALCNT
-	b	L(s1)
-
-	.align 4
-L(s0):
-	addi	rSRC, rSRC, 1
-	lbz	rWORD, -1(rSRC)
-	cmpdi	cr7, rWORD, 0
-	stb	rWORD, -1(rALCNT)
-	beqlr	cr7
-	mr	rRTN, rALCNT
-L(s1):
-	addi	rALCNT, rRTN,1
-	bdnz	L(s0)
-	b L(aligned_doubleword_copy)
-	.align 4
-L(s2):
-	mr	rRTN, rALCNT
-	blr
-
-/* For doubleword aligned memory, operate using doubleword load and stores.  */
-	.align 4
-L(aligned_doubleword_copy):
-	li	rMASK, 0
-	addi	rRTN, rRTN, -8
-	ld	rWORD, 0(rSRC)
-	b	L(g2)
-
-	.align 4
-L(g0):	ldu	rALT, 8(rSRC)
-	stdu	rWORD, 8(rRTN)
-	cmpb	rTMP, rALT, rMASK
-	cmpdi	rTMP, 0
-	bne	L(g1)
-	ldu	rWORD, 8(rSRC)
-	stdu	rALT, 8(rRTN)
-L(g2):	cmpb	rTMP, rWORD, rMASK
-	cmpdi	rTMP, 0		/* If rTMP is 0, no null's have been found.  */
-	beq	L(g0)
-
-	mr	rALT, rWORD
-/* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):
-#ifdef __LITTLE_ENDIAN__
-	extrdi.	rTMP, rALT, 8, 56
-	stbu	rALT, 8(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 48
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 40
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 32
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 24
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 16
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 8
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi	rTMP, rALT, 8, 0
-	stbu	rTMP, 1(rRTN)
-#else
-	extrdi.	rTMP, rALT, 8, 0
-	stbu	rTMP, 8(rRTN)
-	beqlr
-	extrdi.	rTMP, rALT, 8, 8
-	stbu	rTMP, 1(rRTN)
-	beqlr
-	extrdi.	rTMP, rALT, 8, 16
-	stbu	rTMP, 1(rRTN)
-	beqlr
-	extrdi.	rTMP, rALT, 8, 24
-	stbu	rTMP, 1(rRTN)
-	beqlr
-	extrdi.	rTMP, rALT, 8, 32
-	stbu	rTMP, 1(rRTN)
-	beqlr
-	extrdi.	rTMP, rALT, 8, 40
-	stbu	rTMP, 1(rRTN)
-	beqlr
-	extrdi.	rTMP, rALT, 8, 48
-	stbu	rTMP, 1(rRTN)
-	beqlr
-	stbu	rALT, 1(rRTN)
-#endif
-	blr
-
-	.align	4
-L(unaligned):
-	cmpdi	rSRCAL, 0		/* Check src alignment */
-	beq	L(srcaligndstunalign)
-	/* src is unaligned */
-	rlwinm	r10, rSRC, 3,26,28	/* Calculate padding.  */
-	clrrdi	rSRC, rSRC, 3		/* Align the addr to dw boundary */
-	ld	rWORD, 0(rSRC)		/* Load doubleword from memory.  */
-	li	rTMP, 0
-	/* Discard bits not part of the string */
-#ifdef __LITTLE_ENDIAN__
-	srd	rALT, rWORD, r10
-#else
-	sld	rALT, rWORD, r10
-#endif
-	cmpb	rTMP, rALT, rTMP	/* Compare each byte against null */
-	/* Discard bits not part of the string */
-#ifdef __LITTLE_ENDIAN__
-	sld	rTMP, rTMP, r10
-#else
-	srd	rTMP, rTMP, r10
-#endif
-	cmpdi	rTMP, 0
-	bne	L(bytebybyte)		/* if it has null, copy byte by byte */
-	subfic	r8, r9, 8
-	rlwinm	r5, rRTN, 3,26,28	/* Calculate padding in bits.  */
-	rldicl	r9, rRTN, 0, 61		/* Calculate padding in bytes. */
-	addi	rRTN, rRTN, -1
-
-	cmpdi	r5, 0			/* check dest alignment */
-	beq	L(srcunaligndstalign)
-
-	/* both src and dst unaligned */
-#ifdef __LITTLE_ENDIAN__
-	sld	rWORD, rALT, r10
-	mr 	r11, r10
-	addi	r11, r11, -8		/* Adjust byte pointer on loaded dw */
-#else
-	srd	rWORD, rALT, r10
-	subfic	r11, r10, 64
-#endif
-	/* dst alignment is greater then src alignment? */
-	cmpd	cr7, r5, r10
-	blt	cr7, L(dst_align_small)
-	/* src alignment is less than dst */
-
-	/* Calculate the dst alignment differnce */
-	subfic	rALT, r9, 8
-	mtctr	rALT
-
-	/* Write till dst is aligned */
-	cmpdi	rTMP, rALT, 4
-	blt	L(storebyte1)		/* less than 4, store byte by byte */
-	beq	L(equal1)		/* if its 4, store word */
-	addi	rTMP, rALT, -4		/* greater than 4, so stb and stw */
-	mtctr	rTMP
-L(storebyte1):
-#ifdef __LITTLE_ENDIAN__
-	addi	r11, r11, 8		/* Adjust byte pointer on loaded dw */
-#else
-	addi	r11, r11, -8
-#endif
-	srd	rALT, rWORD, r11
-	stbu	rALT, 1(rRTN)
-	bdnz	L(storebyte1)
-
-	subfic	rALT, r9, 8		/* Check the remaining bytes */
-	cmpdi	rTMP, rALT, 4
-	blt	L(proceed)
-
-	.align 4
-L(equal1):
-#ifdef __LITTLE_ENDIAN__
-	addi	r11, r11, 8		/* Adjust byte pointer on loaded dw */
-	srd	rALT, rWORD, r11
-#else
-	subfic	r11, r11, 64
-	sld	rALT, rWORD, r11
-	srdi	rALT, rALT, 32
-#endif
-	stw	rALT, 1(rRTN)
-	addi	rRTN, rRTN, 4
-
-L(proceed):
-	mr	rALT, rWORD
-	/* calculate the Left over bytes to be written */
-	subfic	r11, r10, 64
-	subfic	r5, r5, 64
-	subf	r5, r5, r11		/* remaining bytes on second dw */
-        subfic	r10, r5, 64		/* remaining bytes on first dw */
-	subfic	r9, r9, 8
-	subf	r8, r9, r8		/* recalculate padding */
-L(srcunaligndstalign):
-	addi	rRTN, rRTN, 1
-	subfic	r5, r10, 64		/* remaining bytes on second dw */
-	addi	rSRC, rSRC, 8
-	li	rTMP,0
-	b	L(storedouble)
-
-	.align 4
-L(dst_align_small):
-	mtctr	r8
-	/* Write till src is aligned */
-L(storebyte2):
-#ifdef __LITTLE_ENDIAN__
-	addi	r11, r11, 8		/* Adjust byte pointer on dw */
-#else
-	addi	r11, r11, -8
-#endif
-	srd	rALT, rWORD, r11
-	stbu	rALT, 1(rRTN)
-	bdnz	L(storebyte2)
-
-	addi	rSRC, rSRC, 8		/* Increment src pointer */
-	addi	rRTN, rRTN, 1		/* Increment dst pointer */
-	rldicl	r8, rRTN, 0, 61		/* Recalculate padding */
-
-	/* src is aligned */
-L(srcaligndstunalign):
-	ld	rWORD, 0(rSRC)
-	mr	rALT, rWORD
-	li	rTMP, 0			/* Check null */
-	cmpb	rTMP, rWORD, rTMP
-	cmpdi	rTMP, 0
-	bne	L(bytebybyte)		/* Do byte by byte if there is NULL */
-	rlwinm	r5, rRTN, 3,26,28	/* Calculate padding */
-	addi	rRTN, rRTN, -1
-	subfic	r10, r8, 8
-	/* write byte by byte till aligned */
-#ifdef __LITTLE_ENDIAN__
-	li	r11, -8
-#else
-	li	r11, 64
-#endif
-	mtctr	r10
-	cmpdi	rTMP, r10, 4
-	blt	L(storebyte)
-	beq	L(equal)
-	addi	rTMP, r10, -4
-	mtctr	rTMP
-L(storebyte):
-#ifdef __LITTLE_ENDIAN__
-	addi	r11, r11, 8		/* Adjust byte pointer on  dw */
-#else
-	addi	r11, r11, -8
-#endif
-	srd	rALT, rWORD, r11
-	stbu	rALT, 1(rRTN)
-	bdnz	L(storebyte)
-
-	cmpdi	rTMP, r10, 4
-	blt	L(align)
-
-	.align 4
-L(equal):
-#ifdef __LITTLE_ENDIAN__
-	addi	r11, r11, 8
-	srd	rALT, rWORD, r11
-#else
-	subfic	r11, r11, 64
-	sld	rALT, rWORD, r11
-	srdi	rALT, rALT, 32
-#endif
-	stw	rALT, 1(rRTN)
-	addi	rRTN, rRTN, 4
-L(align):
-	addi	rRTN, rRTN, 1
-	addi	rSRC, rSRC, 8		/* Increment src pointer */
-	subfic	r10, r5, 64
-	li	rTMP, 0
-	/* dst addr aligned to 8 */
-L(storedouble):
-	ld	rALT, 0(rSRC)		/* load next dw */
-	cmpb	rTMP, rALT, rTMP
-	cmpdi	rTMP, 0			/* check for null on each new dw */
-	bne	L(null)
-#ifdef __LITTLE_ENDIAN__
-	srd	r9, rWORD, r10		/* bytes from first dw */
-	sld	r11, rALT, r5		/* bytes from second dw */
-#else
-	sld	r9, rWORD, r10
-	srd	r11, rALT, r5
-#endif
-	or	r11, r9, r11		/* make as a single dw */
-	std	r11, 0(rRTN)		/* store as std on aligned addr */
-	mr	rWORD, rALT		/* still few bytes left to be written */
-	addi	rRTN, rRTN, 8		/* increment dst addr */
-	addi	rSRC, rSRC, 8		/* increment src addr */
-	b	L(storedouble)		/* Loop till NULL */
-
-	.align 4
-
-/* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(null):
-	addi	rRTN, rRTN, -1
-	mr	r10, r5
-	mtctr	r8
-#ifdef __LITTLE_ENDIAN__
-	subfic	r10, r10, 64
-	addi	r10, r10, -8
-#endif
-	cmpdi	rTMP, r8, 4
-	blt	L(loop)
-
-	/* we can still use stw if leftover >= 4*/
-#ifdef __LITTLE_ENDIAN__
-	addi	r10, r10, 8
-	srd	r11, rWORD, r10
-#else
-	subfic	r10, r10, 64
-	sld	r11, rWORD, r10
-	srdi	r11, r11, 32
-#endif
-	stw	r11, 1(rRTN)
-	addi	rRTN, rRTN, 4
-
-	beq	L(bytebybyte1)
-	addi	r10, r10, 32
-#ifdef __LITTLE_ENDIAN__
-	addi	r10, r10, -8
-#else
-	subfic	r10, r10, 64
-#endif
-	addi	rTMP, r8, -4
-	mtctr	rTMP
-	/* remaining byte by byte part of first dw */
-L(loop):
-#ifdef __LITTLE_ENDIAN__
-	addi	r10, r10, 8
-#else
-	addi	r10, r10, -8
-#endif
-	srd	rTMP, rWORD, r10
-	stbu	rTMP, 1(rRTN)
-	bdnz	L(loop)
-
-L(bytebybyte1):
-	addi	rRTN, rRTN, 1
-	/* remaining byte by byte part of second dw */
-L(bytebybyte):
-	addi	rRTN, rRTN, -8
-	b	L(g1)
-
-END (FUNC_NAME)
-
-#ifndef USE_AS_STPCPY
-libc_hidden_builtin_def (strcpy)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S
deleted file mode 100644
index 2ed7830..0000000
--- a/sysdeps/powerpc/powerpc64/stpcpy.S
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Optimized stpcpy implementation for PowerPC64.
-   Copyright (C) 1997-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define USE_AS_STPCPY
-#include <sysdeps/powerpc/powerpc64/strcpy.S>
-
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
diff --git a/sysdeps/powerpc/powerpc64/strcpy.S b/sysdeps/powerpc/powerpc64/strcpy.S
deleted file mode 100644
index c48b21e..0000000
--- a/sysdeps/powerpc/powerpc64/strcpy.S
+++ /dev/null
@@ -1,216 +0,0 @@
-/* Optimized strcpy implementation for PowerPC64.
-   Copyright (C) 1997-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* See strlen.s for comments on how the end-of-string testing works.  */
-
-/* char * [r3] strcpy (char *dest [r3], const char *src [r4])  */
-
-#ifdef USE_AS_STPCPY
-# define FUNC_NAME __stpcpy
-#else
-# define FUNC_NAME strcpy
-#endif
-
-EALIGN (FUNC_NAME, 4, 0)
-	CALL_MCOUNT 2
-
-#define rTMP	r0
-#ifdef USE_AS_STPCPY
-#define rRTN    r3      /* pointer to previous word/doubleword in dest */
-#else
-#define rRTN    r12     /* pointer to previous word/doubleword in dest */
-#endif
-#define rSRC	r4	/* pointer to previous word/doubleword in src */
-#define rWORD	r6	/* current word from src */
-#define rFEFE	r7	/* constant 0xfefefeff | 0xfefefefefefefeff */
-#define r7F7F	r8	/* constant 0x7f7f7f7f | 0x7f7f7f7f7f7f7f7f */
-#define rNEG	r9	/* ~(word in s1 | r7F7F) */
-#define rALT	r10	/* alternate word from src */
-
-#ifndef USE_AS_STPCPY
-/* Save the dst pointer to use as return value.  */
-	mr      rRTN, r3
-#endif
-	or	rTMP, rSRC, rRTN
-	clrldi.	rTMP, rTMP, 61
-	bne	L(check_word_alignment)
-
-/* For doubleword aligned memory, operate using doubleword load and stores.  */
-	addi	rRTN, rRTN, -8
-
-	lis	rFEFE, -0x101
-	lis	r7F7F, 0x7f7f
-	ld	rWORD, 0(rSRC)
-	addi	rFEFE, rFEFE, -0x101
-	addi	r7F7F, r7F7F, 0x7f7f
-	sldi	rTMP, rFEFE, 32
-	insrdi	r7F7F, r7F7F, 32, 0
-	add	rFEFE, rFEFE, rTMP
-	b	L(g2)
-
-L(g0):	ldu	rALT, 8(rSRC)
-	stdu	rWORD, 8(rRTN)
-	add	rTMP, rFEFE, rALT
-	nor	rNEG, r7F7F, rALT
-	and.	rTMP, rTMP, rNEG
-	bne-	L(g1)
-	ldu	rWORD, 8(rSRC)
-	stdu	rALT, 8(rRTN)
-L(g2):	add	rTMP, rFEFE, rWORD
-	nor	rNEG, r7F7F, rWORD
-	and.	rTMP, rTMP, rNEG
-	beq+	L(g0)
-
-	mr	rALT, rWORD
-/* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):
-#ifdef __LITTLE_ENDIAN__
-	extrdi.	rTMP, rALT, 8, 56
-	stbu	rALT, 8(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 48
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 40
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 32
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 24
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 16
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 8
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi	rTMP, rALT, 8, 0
-	stbu	rTMP, 1(rRTN)
-#else
-	extrdi.	rTMP, rALT, 8, 0
-	stbu	rTMP, 8(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 8
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 16
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 24
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 32
-	stbu	rTMP, 1(rRTN)
-	beqlr
-	extrdi.	rTMP, rALT, 8, 40
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	extrdi.	rTMP, rALT, 8, 48
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	stbu	rALT, 1(rRTN)
-#endif
-	blr
-
-L(check_word_alignment):
-	clrldi. rTMP, rTMP, 62
-	bne     L(unaligned)
-
-/* For word aligned memory, operate using word load and stores.  */
-	addi	rRTN, rRTN, -4
-
-	lis	rFEFE, -0x101
-	lis	r7F7F, 0x7f7f
-	lwz	rWORD, 0(rSRC)
-	addi	rFEFE, rFEFE, -0x101
-	addi	r7F7F, r7F7F, 0x7f7f
-	b	L(g5)
-
-L(g3):	lwzu	rALT, 4(rSRC)
-	stwu	rWORD, 4(rRTN)
-	add	rTMP, rFEFE, rALT
-	nor	rNEG, r7F7F, rALT
-	and.	rTMP, rTMP, rNEG
-	bne-	L(g4)
-	lwzu	rWORD, 4(rSRC)
-	stwu	rALT, 4(rRTN)
-L(g5):	add	rTMP, rFEFE, rWORD
-	nor	rNEG, r7F7F, rWORD
-	and.	rTMP, rTMP, rNEG
-	beq+	L(g3)
-
-	mr	rALT, rWORD
-/* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g4):
-#ifdef __LITTLE_ENDIAN__
-	rlwinm.	rTMP, rALT, 0, 24, 31
-	stbu	rALT, 4(rRTN)
-	beqlr-
-	rlwinm.	rTMP, rALT, 24, 24, 31
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	rlwinm.	rTMP, rALT, 16, 24, 31
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	rlwinm	rTMP, rALT, 8, 24, 31
-	stbu	rTMP, 1(rRTN)
-#else
-	rlwinm.	rTMP, rALT, 8, 24, 31
-	stbu	rTMP, 4(rRTN)
-	beqlr-
-	rlwinm.	rTMP, rALT, 16, 24, 31
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	rlwinm.	rTMP, rALT, 24, 24, 31
-	stbu	rTMP, 1(rRTN)
-	beqlr-
-	stbu	rALT, 1(rRTN)
-#endif
-	blr
-
-/* Oh well.  In this case, we just do a byte-by-byte copy.  */
-	.align 4
-	nop
-L(unaligned):
-	lbz	rWORD, 0(rSRC)
-	addi	rRTN, rRTN, -1
-	cmpwi	rWORD, 0
-	beq-	L(u2)
-
-L(u0):	lbzu	rALT, 1(rSRC)
-	stbu	rWORD, 1(rRTN)
-	cmpwi	rALT, 0
-	beq-	L(u1)
-	nop		/* Let 601 load start of loop.  */
-	lbzu	rWORD, 1(rSRC)
-	stbu	rALT, 1(rRTN)
-	cmpwi	rWORD, 0
-	bne+	L(u0)
-L(u2):	stbu	rWORD, 1(rRTN)
-	blr
-L(u1):	stbu	rALT, 1(rRTN)
-	blr
-END (FUNC_NAME)
-
-#ifndef USE_AS_STPCPY
-libc_hidden_builtin_def (strcpy)
-#endif

Attachment: bench-stpcpy.out
Description: Text document

Attachment: bench-stpcpy-patch.out
Description: Text document

Attachment: bench-strcpy.out
Description: Text document

Attachment: bench-strcpy-patch.out
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]