This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Faster strn?cat


---
 string/strcat.c                    |   23 +---
 string/strncat.c                   |   55 +-------
 sysdeps/x86_64/multiarch/strcat.S  |   84 ------------
 sysdeps/x86_64/multiarch/strncat.S |    3 -
 sysdeps/x86_64/strcat.S            |  259 ------------------------------------
 5 files changed, 5 insertions(+), 419 deletions(-)
 delete mode 100644 sysdeps/x86_64/multiarch/strcat.S
 delete mode 100644 sysdeps/x86_64/multiarch/strncat.S
 delete mode 100644 sysdeps/x86_64/strcat.S

diff --git a/string/strcat.c b/string/strcat.c
index f9e4bc6..dc8f3a4 100644
--- a/string/strcat.c
+++ b/string/strcat.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1997, 2003, 2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -26,26 +26,7 @@ strcat (dest, src)
      char *dest;
      const char *src;
 {
-  char *s1 = dest;
-  const char *s2 = src;
-  char c;
-
-  /* Find the end of the string.  */
-  do
-    c = *s1++;
-  while (c != '\0');
-
-  /* Make S1 point before the next character, so we can increment
-     it while memory is read (wins on pipelined cpus).  */
-  s1 -= 2;
-
-  do
-    {
-      c = *s2++;
-      *++s1 = c;
-    }
-  while (c != '\0');
-
+	strcpy(dest+strlen(dest),src);
   return dest;
 }
 libc_hidden_builtin_def (strcat)
diff --git a/string/strncat.c b/string/strncat.c
index dcfb04d..4faab94 100644
--- a/string/strncat.c
+++ b/string/strncat.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,1997,2011 Free Software Foundation, Inc.
+/* Copyright (C) 1991,1997,2011,2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -27,56 +27,7 @@
 #endif
 
 char *
-STRNCAT (char *s1, const char *s2, size_t n)
+STRNCAT (char *d, const char *s, size_t n)
 {
-  char c;
-  char *s = s1;
-
-  /* Find the end of S1.  */
-  do
-    c = *s1++;
-  while (c != '\0');
-
-  /* Make S1 point before next character, so we can increment
-     it while memory is read (wins on pipelined cpus).  */
-  s1 -= 2;
-
-  if (n >= 4)
-    {
-      size_t n4 = n >> 2;
-      do
-	{
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    return s;
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    return s;
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    return s;
-	  c = *s2++;
-	  *++s1 = c;
-	  if (c == '\0')
-	    return s;
-	} while (--n4 > 0);
-      n &= 3;
-    }
-
-  while (n > 0)
-    {
-      c = *s2++;
-      *++s1 = c;
-      if (c == '\0')
-	return s;
-      n--;
-    }
-
-  if (c != '\0')
-    *++s1 = '\0';
-
-  return s;
+	return 	strncpy(d+strlen(d),s,n);
 }
diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S
deleted file mode 100644
index 0c256de..0000000
--- a/sysdeps/x86_64/multiarch/strcat.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Multiple versions of strcat
-   Copyright (C) 2009, 2011 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifndef USE_AS_STRNCAT
-# ifndef STRCAT
-#  define STRCAT strcat
-# endif
-#endif
-
-#ifdef USE_AS_STRNCAT
-# define STRCAT_SSSE3	         	__strncat_ssse3
-# define STRCAT_SSE2	            	__strncat_sse2
-# define STRCAT_SSE2_UNALIGNED    	__strncat_sse2_unaligned
-# define __GI_STRCAT	            	__GI_strncat
-# define __GI___STRCAT              __GI___strncat
-#else
-# define STRCAT_SSSE3	         	__strcat_ssse3
-# define STRCAT_SSE2	            	__strcat_sse2
-# define STRCAT_SSE2_UNALIGNED    	__strcat_sse2_unaligned
-# define __GI_STRCAT	            	__GI_strcat
-# define __GI___STRCAT              __GI___strcat
-#endif
-
-
-/* Define multiple versions only for the definition in libc.  */
-#ifndef NOT_IN_libc
-	.text
-ENTRY(STRCAT)
-	.type	STRCAT, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	STRCAT_SSE2_UNALIGNED(%rip), %rax
-	testl	$bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
-	jnz	2f
-	leaq	STRCAT_SSE2(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
-	jz	2f
-	leaq	STRCAT_SSSE3(%rip), %rax
-2:	ret
-END(STRCAT)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCAT_SSE2, @function; \
-	.align 16; \
-	STRCAT_SSE2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCAT_SSE2, .-STRCAT_SSE2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCAT; __GI_STRCAT = STRCAT_SSE2
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
-	.globl __GI___STRCAT; __GI___STRCAT = STRCAT_SSE2
-#endif
-
-#ifndef USE_AS_STRNCAT
-# include "../strcat.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strncat.S b/sysdeps/x86_64/multiarch/strncat.S
deleted file mode 100644
index fd569c2..0000000
--- a/sysdeps/x86_64/multiarch/strncat.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define STRCAT strncat
-#define USE_AS_STRNCAT
-#include "strcat.S"
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
deleted file mode 100644
index 535a18d..0000000
--- a/sysdeps/x86_64/strcat.S
+++ /dev/null
@@ -1,259 +0,0 @@
-/* strcat(dest, src) -- Append SRC on the end of DEST.
-   Optimized for x86-64.
-   Copyright (C) 2002 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Andreas Jaeger <aj@suse.de>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-#include "bp-sym.h"
-#include "bp-asm.h"
-
-
-	.text
-ENTRY (BP_SYM (strcat))
-	movq %rdi, %rcx		/* Dest. register. */
-	andl $7, %ecx		/* mask alignment bits */
-	movq %rdi, %rax		/* Duplicate destination pointer.  */
-	movq $0xfefefefefefefeff,%r8
-
-	/* First step: Find end of destination.  */
-	jz 4f			/* aligned => start loop */
-
-	neg %ecx		/* We need to align to 8 bytes.  */
-	addl $8,%ecx
-	/* Search the first bytes directly.  */
-0:	cmpb $0x0,(%rax)	/* is byte NUL? */
-	je 2f			/* yes => start copy */
-	incq %rax		/* increment pointer */
-	decl %ecx
-	jnz 0b
-
-
-
-	/* Now the source is aligned.  Scan for NUL byte.  */
-	.p2align 4
-4:
-	/* First unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Second unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Third unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jnz 3f			/* found NUL => return pointer */
-
-	/* Fourth unroll.  */
-	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */
-	addq $8,%rax		/* adjust pointer for next word */
-	movq %r8, %rdx		/* magic value */
-	addq %rcx, %rdx		/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc 3f			/* highest byte is NUL => return pointer */
-	xorq %rcx, %rdx		/* (word+magic)^word */
-	orq %r8, %rdx		/* set all non-carry bits */
-	incq %rdx		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-	jz 4b			/* no NUL found => continue loop */
-
-	.p2align 4		/* Align, it's a jump target.  */
-3:	subq $8,%rax		/* correct pointer increment.  */
-
-	testb %cl, %cl		/* is first byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testb %ch, %ch		/* is second byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testl $0x00ff0000, %ecx /* is third byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-	testl $0xff000000, %ecx /* is fourth byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-	shrq $32, %rcx		/* look at other half.  */
-
-	testb %cl, %cl		/* is first byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testb %ch, %ch		/* is second byte NUL? */
-	jz 2f			/* yes => return */
-	incq %rax		/* increment pointer */
-
-	testl $0xff0000, %ecx	/* is third byte NUL? */
-	jz 2f			/* yes => return pointer */
-	incq %rax		/* increment pointer */
-
-2:
-	/* Second step: Copy source to destination.  */
-
-	movq	%rsi, %rcx	/* duplicate  */
-	andl	$7,%ecx		/* mask alignment bits */
-	movq	%rax, %rdx	/* move around */
-	jz	22f		/* aligned => start loop */
-
-	neg	%ecx		/* align to 8 bytes.  */
-	addl	$8, %ecx
-	/* Align the source pointer.  */
-21:
-	movb	(%rsi), %al	/* Fetch a byte */
-	testb	%al, %al	/* Is it NUL? */
-	movb	%al, (%rdx)	/* Store it */
-	jz	24f		/* If it was NUL, done! */
-	incq	%rsi
-	incq	%rdx
-	decl	%ecx
-	jnz	21b
-
-	/* Now the sources is aligned.  Unfortunatly we cannot force
-	   to have both source and destination aligned, so ignore the
-	   alignment of the destination.  */
-	.p2align 4
-22:
-	/* 1st unroll.  */
-	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
-	addq	$8, %rsi	/* Adjust pointer for next word.  */
-	movq	%rax, %r9	/* Save a copy for NUL finding.  */
-	addq	%r8, %r9	/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc	23f		/* highest byte is NUL => return pointer */
-	xorq	%rax, %r9	/* (word+magic)^word */
-	orq	%r8, %r9	/* set all non-carry bits */
-	incq	%r9		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-
-	jnz	23f		/* found NUL => return pointer */
-
-	movq	%rax, (%rdx)	/* Write value to destination.  */
-	addq	$8, %rdx	/* Adjust pointer.  */
-
-	/* 2nd unroll.  */
-	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
-	addq	$8, %rsi	/* Adjust pointer for next word.  */
-	movq	%rax, %r9	/* Save a copy for NUL finding.  */
-	addq	%r8, %r9	/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc	23f		/* highest byte is NUL => return pointer */
-	xorq	%rax, %r9	/* (word+magic)^word */
-	orq	%r8, %r9	/* set all non-carry bits */
-	incq	%r9		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-
-	jnz	23f		/* found NUL => return pointer */
-
-	movq	%rax, (%rdx)	/* Write value to destination.  */
-	addq	$8, %rdx	/* Adjust pointer.  */
-
-	/* 3rd unroll.  */
-	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
-	addq	$8, %rsi	/* Adjust pointer for next word.  */
-	movq	%rax, %r9	/* Save a copy for NUL finding.  */
-	addq	%r8, %r9	/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc	23f		/* highest byte is NUL => return pointer */
-	xorq	%rax, %r9	/* (word+magic)^word */
-	orq	%r8, %r9	/* set all non-carry bits */
-	incq	%r9		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-
-	jnz	23f		/* found NUL => return pointer */
-
-	movq	%rax, (%rdx)	/* Write value to destination.  */
-	addq	$8, %rdx	/* Adjust pointer.  */
-
-	/* 4th unroll.  */
-	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
-	addq	$8, %rsi	/* Adjust pointer for next word.  */
-	movq	%rax, %r9	/* Save a copy for NUL finding.  */
-	addq	%r8, %r9	/* add the magic value to the word.  We get
-				   carry bits reported for each byte which
-				   is *not* 0 */
-	jnc	23f		/* highest byte is NUL => return pointer */
-	xorq	%rax, %r9	/* (word+magic)^word */
-	orq	%r8, %r9	/* set all non-carry bits */
-	incq	%r9		/* add 1: if one carry bit was *not* set
-				   the addition will not result in 0.  */
-
-	jnz	23f		/* found NUL => return pointer */
-
-	movq	%rax, (%rdx)	/* Write value to destination.  */
-	addq	$8, %rdx	/* Adjust pointer.  */
-	jmp	22b		/* Next iteration.  */
-
-	/* Do the last few bytes. %rax contains the value to write.
-	   The loop is unrolled twice.  */
-	.p2align 4
-23:
-	movb	%al, (%rdx)	/* 1st byte.  */
-	testb	%al, %al	/* Is it NUL.  */
-	jz	24f		/* yes, finish.  */
-	incq	%rdx		/* Increment destination.  */
-	movb	%ah, (%rdx)	/* 2nd byte.  */
-	testb	%ah, %ah	/* Is it NUL?.  */
-	jz	24f		/* yes, finish.  */
-	incq	%rdx		/* Increment destination.  */
-	shrq	$16, %rax	/* Shift...  */
-	jmp	23b		/* and look at next two bytes in %rax.  */
-
-
-24:
-	movq	%rdi, %rax	/* Source is return value.  */
-	retq
-END (BP_SYM (strcat))
-libc_hidden_builtin_def (strcat)
-- 
1.7.7.6


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]