This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PING][PATCH] Fix strcpy_chk and stpcpy_chk performance.


LGTM with some nits below:

On 19-08-2015 03:15, OndÅej BÃlka wrote:
> ping
> On Wed, Aug 12, 2015 at 08:42:13AM +0200, OndÅej BÃlka wrote:
>> Hi, as I wrote in previous patches a performance of checked strcpy and
>> stpcpy is terrible as these don't use sse2 and are around four times
>> slower that strcpy and stpcpy now.
>>
>> As this bug shows that these functions are not performance sensitive I
>> decided just to improve generic implementation instead for easier
>> maintainance.
>>
>> I could improve performance more if we allow to write past bound and
>> then fail as we could just call stpcpy and check if it exceed bounds or
>> not.
>>
>> Other arches could also have bad performance so sobebody should finally
>> read benchtests there to see problems.
>>
>> 	* debug/strcpy_chk.c: Improve performance.
>> 	* debug/stpcpy_chk.c: Likewise.

I think you should add the (__stpcpy_chk) to indicate which function
you are refering.

>> 	* sysdeps/x86_64/strcpy_chk.S: Remove.
>> 	* sysdeps/x86_64/stpcpy_chk.S: Remove.
>>
>> diff --git a/debug/stpcpy_chk.c b/debug/stpcpy_chk.c
>> index 91c5031..69476a2 100644
>> --- a/debug/stpcpy_chk.c
>> +++ b/debug/stpcpy_chk.c
>> @@ -29,16 +29,9 @@ __stpcpy_chk (dest, src, destlen)
>>       const char *src;
>>       size_t destlen;
>>  {

Could you also get rid of the k&r function prototype (same for strcpy_chk) ?

>> -  char *d = dest;
>> -  const char *s = src;
>> -
>> -  do
>> -    {
>> -      if (__glibc_unlikely (destlen-- == 0))
>> -	__chk_fail ();
>> -      *d++ = *s;
>> -    }
>> -  while (*s++ != '\0');
>> -
>> -  return d - 1;
>> +  size_t len = strlen (src);
>> +  if (len > destlen)
>> +    __chk_fail ();
>> +    
>> +  return memcpy (dest, src, len + 1) + len;
>>  }
>> diff --git a/debug/strcpy_chk.c b/debug/strcpy_chk.c
>> index 91bf0dd..92eb69d2 100644
>> --- a/debug/strcpy_chk.c
>> +++ b/debug/strcpy_chk.c
>> @@ -28,40 +28,9 @@ __strcpy_chk (dest, src, destlen)
>>       const char *src;
>>       size_t destlen;
>>  {
>> -  char c;
>> -  char *s = (char *) src;
>> -  const ptrdiff_t off = dest - s;
>> -
>> -  while (__builtin_expect (destlen >= 4, 0))
>> -    {
>> -      c = s[0];
>> -      s[off] = c;
>> -      if (c == '\0')
>> -        return dest;
>> -      c = s[1];
>> -      s[off + 1] = c;
>> -      if (c == '\0')
>> -        return dest;
>> -      c = s[2];
>> -      s[off + 2] = c;
>> -      if (c == '\0')
>> -        return dest;
>> -      c = s[3];
>> -      s[off + 3] = c;
>> -      if (c == '\0')
>> -        return dest;
>> -      destlen -= 4;
>> -      s += 4;
>> -    }
>> -
>> -  do
>> -    {
>> -      if (__glibc_unlikely (destlen-- == 0))
>> -        __chk_fail ();
>> -      c = *s;
>> -      *(s++ + off) = c;
>> -    }
>> -  while (c != '\0');
>> -
>> -  return dest;
>> +  size_t len = strlen (src);
>> +  if (len > destlen)
>> +    __chk_fail ();
>> +    
>> +  return memcpy (dest, src, len + 1);
>>  }
>> diff --git a/sysdeps/x86_64/stpcpy_chk.S b/sysdeps/x86_64/stpcpy_chk.S
>> deleted file mode 100644
>> index 905e8d7..0000000
>> --- a/sysdeps/x86_64/stpcpy_chk.S
>> +++ /dev/null
>> @@ -1,3 +0,0 @@
>> -#define USE_AS_STPCPY_CHK
>> -#define STRCPY_CHK __stpcpy_chk
>> -#include <sysdeps/x86_64/strcpy_chk.S>
>> diff --git a/sysdeps/x86_64/strcpy_chk.S b/sysdeps/x86_64/strcpy_chk.S
>> deleted file mode 100644
>> index 24e51c6..0000000
>> --- a/sysdeps/x86_64/strcpy_chk.S
>> +++ /dev/null
>> @@ -1,208 +0,0 @@
>> -/* strcpy/stpcpy checking implementation for x86-64.
>> -   Copyright (C) 2002-2015 Free Software Foundation, Inc.
>> -   This file is part of the GNU C Library.
>> -   Contributed by Andreas Jaeger <aj@suse.de>, 2002.
>> -   Adopted into checking version by Jakub Jelinek <jakub@redhat.com>.
>> -
>> -   The GNU C Library is free software; you can redistribute it and/or
>> -   modify it under the terms of the GNU Lesser General Public
>> -   License as published by the Free Software Foundation; either
>> -   version 2.1 of the License, or (at your option) any later version.
>> -
>> -   The GNU C Library is distributed in the hope that it will be useful,
>> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> -   Lesser General Public License for more details.
>> -
>> -   You should have received a copy of the GNU Lesser General Public
>> -   License along with the GNU C Library; if not, see
>> -   <http://www.gnu.org/licenses/>.  */
>> -
>> -#include <sysdep.h>
>> -#include "asm-syntax.h"
>> -
>> -#ifndef USE_AS_STPCPY_CHK
>> -# define STRCPY_CHK __strcpy_chk
>> -#endif
>> -
>> -	.text
>> -ENTRY (STRCPY_CHK)
>> -	movq	%rsi, %rcx	/* Source register. */
>> -	andl	$7, %ecx	/* mask alignment bits */
>> -#ifndef USE_AS_STPCPY_CHK
>> -	movq	%rdi, %r10	/* Duplicate destination pointer.  */
>> -#endif
>> -	jz 5f			/* aligned => start loop */
>> -
>> -	cmpq	$8, %rdx	/* Check if only few bytes left in
>> -				   destination.  */
>> -	jb	50f
>> -
>> -	subq	$8, %rcx	/* We need to align to 8 bytes.  */
>> -	addq	%rcx, %rdx	/* Subtract count of stored bytes
>> -				   in the cycle below from destlen.  */
>> -
>> -	/* Search the first bytes directly.  */
>> -0:
>> -	movb	(%rsi), %al	/* Fetch a byte */
>> -	testb	%al, %al	/* Is it NUL? */
>> -	movb	%al, (%rdi)	/* Store it */
>> -	jz	4f		/* If it was NUL, done! */
>> -	incq	%rsi
>> -	incq	%rdi
>> -	incl	%ecx
>> -	jnz	0b
>> -
>> -5:
>> -	movq $0xfefefefefefefeff,%r8
>> -	cmpq	$32, %rdx	/* Are there enough bytes in destination
>> -				   for the next unrolled round?  */
>> -	jb	60f		/* If not, avoid the unrolled loop.  */
>> -
>> -	/* Now the sources is aligned.  Unfortunatly we cannot force
>> -	   to have both source and destination aligned, so ignore the
>> -	   alignment of the destination.  */
>> -	.p2align 4
>> -1:
>> -	/* 1st unroll.  */
>> -	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
>> -	addq	$8, %rsi	/* Adjust pointer for next word.  */
>> -	movq	%rax, %r9	/* Save a copy for NUL finding.  */
>> -	addq	%r8, %r9	/* add the magic value to the word.  We get
>> -				   carry bits reported for each byte which
>> -				   is *not* 0 */
>> -	jnc	3f		/* highest byte is NUL => return pointer */
>> -	xorq	%rax, %r9	/* (word+magic)^word */
>> -	orq	%r8, %r9	/* set all non-carry bits */
>> -	incq	%r9		/* add 1: if one carry bit was *not* set
>> -				   the addition will not result in 0.  */
>> -
>> -	jnz	3f		/* found NUL => return pointer */
>> -
>> -	movq	%rax, (%rdi)	/* Write value to destination.  */
>> -	addq	$8, %rdi	/* Adjust pointer.  */
>> -
>> -	/* 2nd unroll.  */
>> -	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
>> -	addq	$8, %rsi	/* Adjust pointer for next word.  */
>> -	movq	%rax, %r9	/* Save a copy for NUL finding.  */
>> -	addq	%r8, %r9	/* add the magic value to the word.  We get
>> -				   carry bits reported for each byte which
>> -				   is *not* 0 */
>> -	jnc	3f		/* highest byte is NUL => return pointer */
>> -	xorq	%rax, %r9	/* (word+magic)^word */
>> -	orq	%r8, %r9	/* set all non-carry bits */
>> -	incq	%r9		/* add 1: if one carry bit was *not* set
>> -				   the addition will not result in 0.  */
>> -
>> -	jnz	3f		/* found NUL => return pointer */
>> -
>> -	movq	%rax, (%rdi)	/* Write value to destination.  */
>> -	addq	$8, %rdi	/* Adjust pointer.  */
>> -
>> -	/* 3rd unroll.  */
>> -	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
>> -	addq	$8, %rsi	/* Adjust pointer for next word.  */
>> -	movq	%rax, %r9	/* Save a copy for NUL finding.  */
>> -	addq	%r8, %r9	/* add the magic value to the word.  We get
>> -				   carry bits reported for each byte which
>> -				   is *not* 0 */
>> -	jnc	3f		/* highest byte is NUL => return pointer */
>> -	xorq	%rax, %r9	/* (word+magic)^word */
>> -	orq	%r8, %r9	/* set all non-carry bits */
>> -	incq	%r9		/* add 1: if one carry bit was *not* set
>> -				   the addition will not result in 0.  */
>> -
>> -	jnz	3f		/* found NUL => return pointer */
>> -
>> -	movq	%rax, (%rdi)	/* Write value to destination.  */
>> -	addq	$8, %rdi	/* Adjust pointer.  */
>> -
>> -	/* 4th unroll.  */
>> -	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
>> -	addq	$8, %rsi	/* Adjust pointer for next word.  */
>> -	movq	%rax, %r9	/* Save a copy for NUL finding.  */
>> -	addq	%r8, %r9	/* add the magic value to the word.  We get
>> -				   carry bits reported for each byte which
>> -				   is *not* 0 */
>> -	jnc	3f		/* highest byte is NUL => return pointer */
>> -	xorq	%rax, %r9	/* (word+magic)^word */
>> -	orq	%r8, %r9	/* set all non-carry bits */
>> -	incq	%r9		/* add 1: if one carry bit was *not* set
>> -				   the addition will not result in 0.  */
>> -
>> -	jnz	3f		/* found NUL => return pointer */
>> -
>> -	subq	$32, %rdx	/* Adjust destlen.  */
>> -	movq	%rax, (%rdi)	/* Write value to destination.  */
>> -	addq	$8, %rdi	/* Adjust pointer.  */
>> -	cmpq	$32, %rdx	/* Are there enough bytes in destination
>> -				   for the next unrolled round?  */
>> -	jae	1b		/* Next iteration.  */
>> -
>> -60:
>> -	cmpq	$8, %rdx	/* Are there enough bytes in destination
>> -				   for the next unrolled round?  */
>> -	jb	50f		/* Now, copy and check byte by byte.  */
>> -
>> -	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
>> -	addq	$8, %rsi	/* Adjust pointer for next word.  */
>> -	movq	%rax, %r9	/* Save a copy for NUL finding.  */
>> -	addq	%r8, %r9	/* add the magic value to the word.  We get
>> -				   carry bits reported for each byte which
>> -				   is *not* 0 */
>> -	jnc	3f		/* highest byte is NUL => return pointer */
>> -	xorq	%rax, %r9	/* (word+magic)^word */
>> -	orq	%r8, %r9	/* set all non-carry bits */
>> -	incq	%r9		/* add 1: if one carry bit was *not* set
>> -				   the addition will not result in 0.  */
>> -
>> -	jnz	3f		/* found NUL => return pointer */
>> -
>> -	subq	$8, %rdx	/* Adjust destlen.  */
>> -	movq	%rax, (%rdi)	/* Write value to destination.  */
>> -	addq	$8, %rdi	/* Adjust pointer.  */
>> -	jmp	60b		/* Next iteration.  */
>> -
>> -	/* Do the last few bytes. %rax contains the value to write.
>> -	   The loop is unrolled twice.  */
>> -	.p2align 4
>> -3:
>> -	/* Note that stpcpy needs to return with the value of the NUL
>> -	   byte.  */
>> -	movb	%al, (%rdi)	/* 1st byte.  */
>> -	testb	%al, %al	/* Is it NUL.  */
>> -	jz	4f		/* yes, finish.  */
>> -	incq	%rdi		/* Increment destination.  */
>> -	movb	%ah, (%rdi)	/* 2nd byte.  */
>> -	testb	%ah, %ah	/* Is it NUL?.  */
>> -	jz	4f		/* yes, finish.  */
>> -	incq	%rdi		/* Increment destination.  */
>> -	shrq	$16, %rax	/* Shift...  */
>> -	jmp	3b		/* and look at next two bytes in %rax.  */
>> -
>> -51:
>> -	/* Search the bytes directly, checking for overflows.  */
>> -	incq	%rsi
>> -	incq	%rdi
>> -	decq	%rdx
>> -	jz	HIDDEN_JUMPTARGET (__chk_fail)
>> -52:
>> -	movb	(%rsi), %al	/* Fetch a byte */
>> -	testb	%al, %al	/* Is it NUL? */
>> -	movb	%al, (%rdi)	/* Store it */
>> -	jnz	51b		/* If it was NUL, done! */
>> -4:
>> -#ifdef USE_AS_STPCPY_CHK
>> -	movq	%rdi, %rax	/* Destination is return value.  */
>> -#else
>> -	movq	%r10, %rax	/* Source is return value.  */
>> -#endif
>> -	retq
>> -
>> -50:
>> -	testq	%rdx, %rdx
>> -	jnz	52b
>> -	jmp	HIDDEN_JUMPTARGET (__chk_fail)
>> -
>> -END (STRCPY_CHK)
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]