[PATCH] powerpc: Optimized strcat for POWER8/PPC64


With new optimized strcpy for POWER8 [1], this patch adds an optimized
strcat which uses it along with default implementation at strings/.

I see good improvements over POWER7 version on POWER8 machine, specially
for unaligned cases (where the new strcpy aims to optimize).  Benchtests
result in attachments.

Tested on powerpc64 and powerpc64le.



 	* sysdeps/powerpc/powerpc64/multiarch/Makefile [sysdep_routines]: Add
	strncat-power8 object.
	* sysdeps/powerpc/powerpc64/multiarch/strcat.c (strcat): Add
	__strcat_power8 implementation.
	* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
	(__libc_ifunc_impl_list): Add __strcat_power8 implementation.
	* sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c: New file:
	optimized strcat for power8.


diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index f170551..74b2daa 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -18,8 +18,8 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
 		   strrchr-power7 strrchr-ppc64 strncat-power7 strncat-ppc64 \
 		   strncpy-power7 strncpy-ppc64 \
 		   stpncpy-power7 stpncpy-ppc64 strcmp-power7 strcmp-ppc64 \
-		   strcat-power7 strcat-ppc64 memmove-power7 memmove-ppc64 \
-		   bcopy-ppc64
+		   strcat-power8 strcat-power7 strcat-ppc64 memmove-power7 \
+		   memmove-ppc64 bcopy-ppc64
 CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
 CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 2a7e7f5..d5b2184 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -303,6 +303,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/strcat.c.  */
   IFUNC_IMPL (i, name, strcat,
 	      IFUNC_IMPL_ADD (array, i, strcat,
+			      hwcap2 & PPC_FEATURE2_ARCH_2_07,
+			      __strcat_power8)
+	      IFUNC_IMPL_ADD (array, i, strcat,
 			      hwcap & PPC_FEATURE_HAS_VSX,
 	      IFUNC_IMPL_ADD (array, i, strcat, 1,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c
new file mode 100644
index 0000000..3dc0ef6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c
@@ -0,0 +1,30 @@
+/* Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   Lesser General Public License for more details.
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   < >.  */
+#include <string.h>
+#define STRCAT __strcat_power8
+#undef libc_hidden_def
+#define libc_hidden_def(name)
+extern typeof (strcpy) __strcpy_power8;
+extern typeof (strlen) __strlen_power7;
+#define strcpy __strcpy_power8
+#define strlen __strlen_power7
+#include <sysdeps/powerpc/strcat.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat.c b/sysdeps/powerpc/powerpc64/multiarch/strcat.c
index ec21062..b6f58a3 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcat.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcat.c
@@ -23,9 +23,12 @@
 extern __typeof (strcat) __strcat_ppc attribute_hidden;
 extern __typeof (strcat) __strcat_power7 attribute_hidden;
+extern __typeof (strcat) __strcat_power8 attribute_hidden;
 libc_ifunc (strcat,
-            (hwcap & PPC_FEATURE_HAS_VSX)
-            ? __strcat_power7
+            (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+            ? __strcat_power8 :
+              (hwcap & PPC_FEATURE_HAS_VSX)
+              ? __strcat_power7
             : __strcat_ppc);

