This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] powerpc: strcasestr optimization



This patch optimizes strcasestr function for power >= 7 systems.
This patch uses optimized strlen and strnlen for calculating
string length and the average improvement of this optimization is ~40%.
This patch is tested on powerpc64 and powerpc64le.
Attached the benchresults with this new patch.

2015-05-29  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc64/multiarch/Makefile: Add strcasestr().
	* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c: Likewise.
	* sysdeps/powerpc/powerpc64/power7/strcasestr.S: New File.
	* sysdeps/powerpc/powerpc64/multiarch/strcasestr-power7.S: New File.
	* sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c: New File.
	* sysdeps/powerpc/powerpc64/multiarch/strcasestr.c: New File.
---
  sysdeps/powerpc/powerpc64/multiarch/Makefile       |   2 +-
  .../powerpc/powerpc64/multiarch/ifunc-impl-list.c  |   7 +
  .../powerpc64/multiarch/strcasestr-power7.S        |  43 +++++
  .../powerpc/powerpc64/multiarch/strcasestr-ppc64.c |  34 ++++
  sysdeps/powerpc/powerpc64/multiarch/strcasestr.c   |  37 +++++
  sysdeps/powerpc/powerpc64/power7/strcasestr.S      | 177
+++++++++++++++++++++
  6 files changed, 299 insertions(+), 1 deletion(-)
create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasestr-power7.S
  create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c
  create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasestr.c
  create mode 100644 sysdeps/powerpc/powerpc64/power7/strcasestr.S

diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile
b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 17265bd..06b2c67 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -19,7 +19,7 @@ sysdep_routines += memcpy-power7 memcpy-a2
memcpy-power6 memcpy-cell \
  		   strcmp-power8 strcmp-power7 strcmp-ppc64 \
  		   strcat-power8 strcat-power7 strcat-ppc64 \
  		   memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \
-		   strncpy-power8
+		   strncpy-power8 strcasestr-power7 strcasestr-ppc64 \

  CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
  CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index f5fdea5..0fd2bd2 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -322,5 +322,12 @@ __libc_ifunc_impl_list (const char *name, struct
libc_ifunc_impl *array,
  	      IFUNC_IMPL_ADD (array, i, strcat, 1,
  			     __strcat_ppc))

+  /* Support sysdeps/powerpc/powerpc64/multiarch/strcasestr.c.  */
+  IFUNC_IMPL (i, name, strcasestr,
+	      IFUNC_IMPL_ADD (array, i, strcasestr,
+			      hwcap & PPC_FEATURE_HAS_VSX,
+			      __strcasestr_power7)
+	      IFUNC_IMPL_ADD (array, i, strcasestr, 1,
+			      __strcasestr_ppc))
    return i;
  }
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power7.S
b/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power7.S
new file mode 100644
index 0000000..e13f575
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power7.S
@@ -0,0 +1,43 @@
+/* Optimized strcasestr implementation for POWER7.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#undef EALIGN
+#define EALIGN(name, alignt, words)				\
+  .section ".text";						\
+  ENTRY_2(__strcasestr_power7)					\
+  .align ALIGNARG(alignt);					\
+  EALIGN_W_##words;						\
+  BODY_LABEL(__strcasestr_power7):				\
+  cfi_startproc;						\
+  LOCALENTRY(__strcasestr_power7)
+
+#undef END
+#define END(name)						\
+  cfi_endproc;							\
+  TRACEBACK(__strcasestr_power7)				\
+  END_2(__strcasestr_power7)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#define STRLEN __strlen_power7
+#define STRNLEN __strnlen_power7
+
+#include <sysdeps/powerpc/powerpc64/power7/strcasestr.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c
b/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c
new file mode 100644
index 0000000..614c7bf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c
@@ -0,0 +1,34 @@
+/* PowerPC64 default implementation of strcasestr.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+#define STRCASESTR  __strcasestr_ppc
+#if IS_IN (libc) && defined(SHARED)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+  __hidden_ver1(__strstr_ppc, __GI_strstr, __strstr_ppc);
+#endif
+
+
+#undef weak_alias
+#define weak_alias(a,b )
+
+extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden;
+
+#include <string/strcasestr.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c
b/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c
new file mode 100644
index 0000000..6564314
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c
@@ -0,0 +1,37 @@
+/* Multiple versions of strcasestr.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# include <string.h>
+# include <shlib-compat.h>
+# include "init-arch.h"
+
+extern __typeof (__strcasestr) __strcasestr_ppc attribute_hidden;
+extern __typeof (__strcasestr) __strcasestr_power7 attribute_hidden;
+
+/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
+   ifunc symbol properly.  */
+libc_ifunc (__strcasestr,
+	    (hwcap & PPC_FEATURE_HAS_VSX)
+            ? __strcasestr_power7
+            : __strcasestr_ppc);
+
+weak_alias (__strcasestr, strcasestr)
+#else
+#include <string/strcasestr.c>
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/strcasestr.S
b/sysdeps/powerpc/powerpc64/power7/strcasestr.S
new file mode 100644
index 0000000..521eadb
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/strcasestr.S
@@ -0,0 +1,177 @@
+/* Optimized strcasestr implementation for PowerPC64.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <locale-defines.h>
+
+#ifndef STRLEN
+/* For builds with no IFUNC support, local calls should be made to internal
+   GLIBC symbol (created by libc_hidden_builtin_def).  */
+# ifdef SHARED
+#  define STRLEN   __GI_strlen
+# else
+#  define STRLEN   strlen
+# endif
+#endif
+
+#ifndef STRNLEN
+/* For builds with no IFUNC support, local calls should be made to internal
+   GLIBC symbol (created by libc_hidden_builtin_def).  */
+# ifdef SHARED
+#  define STRNLEN   __GI_strnlen
+# else
+#  define STRNLEN   strnlen
+# endif
+#endif
+
+#undef strcasestr
+#undef __strcasestr
+
+#ifndef STRCASESTR
+#define STRCASESTR __strcasestr
+#endif
+
+/* char * [r3] strcasestr (char *s [r3], char * pat[r4])  */
+
+/*
+* Load byte from input string and search substring and convert
+* each character to lower case character and then compare both.
+* If they are same, load byte from both r3 and r4 and proceed,
+* Else, load next byte from r3 and compare with current r4.
+*/
+
+#define	FRAMESIZE	(FRAME_MIN_SIZE+32)
+	.machine	power7
+EALIGN (STRCASESTR, 4, 0)
+	CALL_MCOUNT 2
+	mflr	r0			/* Load link register LR to r0.  */
+	std	r31, -8(r1)		/* Save callers register r31.  */
+	cfi_offset(r31, -8)
+	std	r30, -16(r1)		/* Save callers register r30.  */
+	cfi_offset(r30, -16)
+	std	r29, -24(r1)		/* Save callers register r29.  */
+	cfi_offset(r29, -24)
+	std	r0, 16(r1)		/* Store the link register.  */
+	cfi_offset(lr, 16)
+	stdu	r1, -FRAMESIZE(r1)	/* Create the stack frame.  */
+	cfi_adjust_cfa_offset(FRAMESIZE)
+
+	dcbt	0, r3
+	dcbt	0, r4
+
+	and	r0, r3, r4
+	cmpdi	cr7, r0, 0
+	beq	cr7, L(retnull)
+
+	mr	r29, r3
+	mr	r30, r4
+	mr	r3, r4
+	bl	STRLEN
+	nop
+
+	/* Call __strcasestr_ppc if needle len > 2048.  */
+	cmpdi	cr7, r3, 2048
+	bgt	cr7, L(default)
+
+	cmpdi	cr7, r3, 0	/* If search str is null.  */
+	beq	cr7, L(ret_r3)
+	mr	r31, r3
+	mr	r4, r3
+	mr	r3, r29
+	bl	STRNLEN
+	nop
+
+	cmpd	cr7, r3, r31 	/* If len(r3) < len(r4).  */
+	blt	cr7, L(retnull)
+
+	mr	r3, r29
+	mr	r8, r3			/* Save  r3. */
+	addi	r8, r8, -1
+	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
+	add	r11, r10, __libc_tsd_LOCALE@tls
+	ld	r11, 0(r11)
+	ld	r11, LOCALE_CTYPE_TOLOWER(r11)
+
+	mr	r4, r30
+	lbz	r6, 0(r4)		/* Load next byte from r4.  */
+	cmpdi	cr7, r6, 0		/* Is it null?  */
+	beq	cr7, L(updater3)
+	sldi	r7, r6, 2		/* Convert to lower case.  */
+	lwzx	r7, r11, r7
+	mr	r12, r7			/* Save it for next loop.  */
+L(loop1):
+	addi	r8, r8, 1
+	mr	r3, r8			/* Restore r3.  */
+	mr	r4, r30			/* Restore r4.  */
+	mr	r7, r12
+L(loop):
+	lbz	r5, 0(r3)		/* Load byte from r3.  */
+	cmpdi	cr7, r5, 0		/* Is it null?  */
+	beq	cr7, L(retnull)		/* If yes, return.  */
+	sldi	r10, r5, 2		/* Convert to lower case.  */
+	lwzx	r10, r11, r10
+	cmpw	cr7, r7, r10		/* Compare with byte from r4.  */
+	bne	cr7, L(loop1)
+	addi	r3, r3, 1		/* Increment r3.  */
+	addi	r4, r4, 1		/* Increment r4.  */
+	lbz	r6, 0(r4)		/* Load next byte from r4.  */
+	cmpdi	cr7, r6, 0		/* Is it null?  */
+	beq	cr7, L(updater3)
+	sldi	r7, r6, 2		/* Convert to lower case.  */
+	lwzx	r7, r11, r7
+	b	L(loop)
+
+	/* Handling return values.  */
+	.align	4
+L(updater3):
+	subf	r3, r31, r3	/* Reduce len of r4 from r3.  */
+	b	L(end)
+
+	.align	4
+L(ret_r3):
+	mr	r3, r29		/* Return r3.  */
+	b	L(end)
+
+	.align	4
+L(retnull):
+	li	r3, 0		/* Return NULL.  */
+	b	L(end)
+
+	.align	4
+L(default):
+	mr	r3, r29
+	mr	r4, r30
+	bl	__strcasestr_ppc
+	nop
+
+	.align	4
+L(end):
+	addi	r1, r1, FRAMESIZE	/* Restore stack pointer.  */
+	cfi_adjust_cfa_offset(-FRAMESIZE)
+	ld	r0, 16(r1)	/* Restore the saved link register.  */
+	ld	r29, -24(r1)	/* Restore callers save register r29.  */
+	ld	r30, -16(r1)	/* Restore callers save register r30.  */
+	ld	r31, -8(r1)	/* Restore callers save register r31.  */
+	mtlr	r0		/* Branch to link register.  */
+	blr
+END (STRCASESTR)
+#ifndef NO_ALIAS
+weak_alias (__strcasestr, strcasestr)
+#endif
+
+libc_hidden_builtin_def (strcasestr)
--
2.1.0






Attachment: strcasestr-benchresults
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]