This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PING][PATCH] Fix strcpy_chk and stpcpy_chk performance.
- From: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
- To: libc-alpha at sourceware dot org
- Date: Wed, 19 Aug 2015 10:10:53 -0300
- Subject: Re: [PING][PATCH] Fix strcpy_chk and stpcpy_chk performance.
- Authentication-results: sourceware.org; auth=none
- References: <20150812064213 dot GA6256 at domone> <20150819061522 dot GA19761 at domone>
LGTM with some nits below:
On 19-08-2015 03:15, OndÅej BÃlka wrote:
> ping
> On Wed, Aug 12, 2015 at 08:42:13AM +0200, OndÅej BÃlka wrote:
>> Hi, as I wrote in previous patches a performance of checked strcpy and
>> stpcpy is terrible as these don't use sse2 and are around four times
>> slower that strcpy and stpcpy now.
>>
>> As this bug shows that these functions are not performance sensitive I
>> decided just to improve generic implementation instead for easier
>> maintainance.
>>
>> I could improve performance more if we allow to write past bound and
>> then fail as we could just call stpcpy and check if it exceed bounds or
>> not.
>>
>> Other arches could also have bad performance so sobebody should finally
>> read benchtests there to see problems.
>>
>> * debug/strcpy_chk.c: Improve performance.
>> * debug/stpcpy_chk.c: Likewise.
I think you should add the (__stpcpy_chk) to indicate which function
you are refering.
>> * sysdeps/x86_64/strcpy_chk.S: Remove.
>> * sysdeps/x86_64/stpcpy_chk.S: Remove.
>>
>> diff --git a/debug/stpcpy_chk.c b/debug/stpcpy_chk.c
>> index 91c5031..69476a2 100644
>> --- a/debug/stpcpy_chk.c
>> +++ b/debug/stpcpy_chk.c
>> @@ -29,16 +29,9 @@ __stpcpy_chk (dest, src, destlen)
>> const char *src;
>> size_t destlen;
>> {
Could you also get rid of the k&r function prototype (same for strcpy_chk) ?
>> - char *d = dest;
>> - const char *s = src;
>> -
>> - do
>> - {
>> - if (__glibc_unlikely (destlen-- == 0))
>> - __chk_fail ();
>> - *d++ = *s;
>> - }
>> - while (*s++ != '\0');
>> -
>> - return d - 1;
>> + size_t len = strlen (src);
>> + if (len > destlen)
>> + __chk_fail ();
>> +
>> + return memcpy (dest, src, len + 1) + len;
>> }
>> diff --git a/debug/strcpy_chk.c b/debug/strcpy_chk.c
>> index 91bf0dd..92eb69d2 100644
>> --- a/debug/strcpy_chk.c
>> +++ b/debug/strcpy_chk.c
>> @@ -28,40 +28,9 @@ __strcpy_chk (dest, src, destlen)
>> const char *src;
>> size_t destlen;
>> {
>> - char c;
>> - char *s = (char *) src;
>> - const ptrdiff_t off = dest - s;
>> -
>> - while (__builtin_expect (destlen >= 4, 0))
>> - {
>> - c = s[0];
>> - s[off] = c;
>> - if (c == '\0')
>> - return dest;
>> - c = s[1];
>> - s[off + 1] = c;
>> - if (c == '\0')
>> - return dest;
>> - c = s[2];
>> - s[off + 2] = c;
>> - if (c == '\0')
>> - return dest;
>> - c = s[3];
>> - s[off + 3] = c;
>> - if (c == '\0')
>> - return dest;
>> - destlen -= 4;
>> - s += 4;
>> - }
>> -
>> - do
>> - {
>> - if (__glibc_unlikely (destlen-- == 0))
>> - __chk_fail ();
>> - c = *s;
>> - *(s++ + off) = c;
>> - }
>> - while (c != '\0');
>> -
>> - return dest;
>> + size_t len = strlen (src);
>> + if (len > destlen)
>> + __chk_fail ();
>> +
>> + return memcpy (dest, src, len + 1);
>> }
>> diff --git a/sysdeps/x86_64/stpcpy_chk.S b/sysdeps/x86_64/stpcpy_chk.S
>> deleted file mode 100644
>> index 905e8d7..0000000
>> --- a/sysdeps/x86_64/stpcpy_chk.S
>> +++ /dev/null
>> @@ -1,3 +0,0 @@
>> -#define USE_AS_STPCPY_CHK
>> -#define STRCPY_CHK __stpcpy_chk
>> -#include <sysdeps/x86_64/strcpy_chk.S>
>> diff --git a/sysdeps/x86_64/strcpy_chk.S b/sysdeps/x86_64/strcpy_chk.S
>> deleted file mode 100644
>> index 24e51c6..0000000
>> --- a/sysdeps/x86_64/strcpy_chk.S
>> +++ /dev/null
>> @@ -1,208 +0,0 @@
>> -/* strcpy/stpcpy checking implementation for x86-64.
>> - Copyright (C) 2002-2015 Free Software Foundation, Inc.
>> - This file is part of the GNU C Library.
>> - Contributed by Andreas Jaeger <aj@suse.de>, 2002.
>> - Adopted into checking version by Jakub Jelinek <jakub@redhat.com>.
>> -
>> - The GNU C Library is free software; you can redistribute it and/or
>> - modify it under the terms of the GNU Lesser General Public
>> - License as published by the Free Software Foundation; either
>> - version 2.1 of the License, or (at your option) any later version.
>> -
>> - The GNU C Library is distributed in the hope that it will be useful,
>> - but WITHOUT ANY WARRANTY; without even the implied warranty of
>> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> - Lesser General Public License for more details.
>> -
>> - You should have received a copy of the GNU Lesser General Public
>> - License along with the GNU C Library; if not, see
>> - <http://www.gnu.org/licenses/>. */
>> -
>> -#include <sysdep.h>
>> -#include "asm-syntax.h"
>> -
>> -#ifndef USE_AS_STPCPY_CHK
>> -# define STRCPY_CHK __strcpy_chk
>> -#endif
>> -
>> - .text
>> -ENTRY (STRCPY_CHK)
>> - movq %rsi, %rcx /* Source register. */
>> - andl $7, %ecx /* mask alignment bits */
>> -#ifndef USE_AS_STPCPY_CHK
>> - movq %rdi, %r10 /* Duplicate destination pointer. */
>> -#endif
>> - jz 5f /* aligned => start loop */
>> -
>> - cmpq $8, %rdx /* Check if only few bytes left in
>> - destination. */
>> - jb 50f
>> -
>> - subq $8, %rcx /* We need to align to 8 bytes. */
>> - addq %rcx, %rdx /* Subtract count of stored bytes
>> - in the cycle below from destlen. */
>> -
>> - /* Search the first bytes directly. */
>> -0:
>> - movb (%rsi), %al /* Fetch a byte */
>> - testb %al, %al /* Is it NUL? */
>> - movb %al, (%rdi) /* Store it */
>> - jz 4f /* If it was NUL, done! */
>> - incq %rsi
>> - incq %rdi
>> - incl %ecx
>> - jnz 0b
>> -
>> -5:
>> - movq $0xfefefefefefefeff,%r8
>> - cmpq $32, %rdx /* Are there enough bytes in destination
>> - for the next unrolled round? */
>> - jb 60f /* If not, avoid the unrolled loop. */
>> -
>> - /* Now the sources is aligned. Unfortunatly we cannot force
>> - to have both source and destination aligned, so ignore the
>> - alignment of the destination. */
>> - .p2align 4
>> -1:
>> - /* 1st unroll. */
>> - movq (%rsi), %rax /* Read double word (8 bytes). */
>> - addq $8, %rsi /* Adjust pointer for next word. */
>> - movq %rax, %r9 /* Save a copy for NUL finding. */
>> - addq %r8, %r9 /* add the magic value to the word. We get
>> - carry bits reported for each byte which
>> - is *not* 0 */
>> - jnc 3f /* highest byte is NUL => return pointer */
>> - xorq %rax, %r9 /* (word+magic)^word */
>> - orq %r8, %r9 /* set all non-carry bits */
>> - incq %r9 /* add 1: if one carry bit was *not* set
>> - the addition will not result in 0. */
>> -
>> - jnz 3f /* found NUL => return pointer */
>> -
>> - movq %rax, (%rdi) /* Write value to destination. */
>> - addq $8, %rdi /* Adjust pointer. */
>> -
>> - /* 2nd unroll. */
>> - movq (%rsi), %rax /* Read double word (8 bytes). */
>> - addq $8, %rsi /* Adjust pointer for next word. */
>> - movq %rax, %r9 /* Save a copy for NUL finding. */
>> - addq %r8, %r9 /* add the magic value to the word. We get
>> - carry bits reported for each byte which
>> - is *not* 0 */
>> - jnc 3f /* highest byte is NUL => return pointer */
>> - xorq %rax, %r9 /* (word+magic)^word */
>> - orq %r8, %r9 /* set all non-carry bits */
>> - incq %r9 /* add 1: if one carry bit was *not* set
>> - the addition will not result in 0. */
>> -
>> - jnz 3f /* found NUL => return pointer */
>> -
>> - movq %rax, (%rdi) /* Write value to destination. */
>> - addq $8, %rdi /* Adjust pointer. */
>> -
>> - /* 3rd unroll. */
>> - movq (%rsi), %rax /* Read double word (8 bytes). */
>> - addq $8, %rsi /* Adjust pointer for next word. */
>> - movq %rax, %r9 /* Save a copy for NUL finding. */
>> - addq %r8, %r9 /* add the magic value to the word. We get
>> - carry bits reported for each byte which
>> - is *not* 0 */
>> - jnc 3f /* highest byte is NUL => return pointer */
>> - xorq %rax, %r9 /* (word+magic)^word */
>> - orq %r8, %r9 /* set all non-carry bits */
>> - incq %r9 /* add 1: if one carry bit was *not* set
>> - the addition will not result in 0. */
>> -
>> - jnz 3f /* found NUL => return pointer */
>> -
>> - movq %rax, (%rdi) /* Write value to destination. */
>> - addq $8, %rdi /* Adjust pointer. */
>> -
>> - /* 4th unroll. */
>> - movq (%rsi), %rax /* Read double word (8 bytes). */
>> - addq $8, %rsi /* Adjust pointer for next word. */
>> - movq %rax, %r9 /* Save a copy for NUL finding. */
>> - addq %r8, %r9 /* add the magic value to the word. We get
>> - carry bits reported for each byte which
>> - is *not* 0 */
>> - jnc 3f /* highest byte is NUL => return pointer */
>> - xorq %rax, %r9 /* (word+magic)^word */
>> - orq %r8, %r9 /* set all non-carry bits */
>> - incq %r9 /* add 1: if one carry bit was *not* set
>> - the addition will not result in 0. */
>> -
>> - jnz 3f /* found NUL => return pointer */
>> -
>> - subq $32, %rdx /* Adjust destlen. */
>> - movq %rax, (%rdi) /* Write value to destination. */
>> - addq $8, %rdi /* Adjust pointer. */
>> - cmpq $32, %rdx /* Are there enough bytes in destination
>> - for the next unrolled round? */
>> - jae 1b /* Next iteration. */
>> -
>> -60:
>> - cmpq $8, %rdx /* Are there enough bytes in destination
>> - for the next unrolled round? */
>> - jb 50f /* Now, copy and check byte by byte. */
>> -
>> - movq (%rsi), %rax /* Read double word (8 bytes). */
>> - addq $8, %rsi /* Adjust pointer for next word. */
>> - movq %rax, %r9 /* Save a copy for NUL finding. */
>> - addq %r8, %r9 /* add the magic value to the word. We get
>> - carry bits reported for each byte which
>> - is *not* 0 */
>> - jnc 3f /* highest byte is NUL => return pointer */
>> - xorq %rax, %r9 /* (word+magic)^word */
>> - orq %r8, %r9 /* set all non-carry bits */
>> - incq %r9 /* add 1: if one carry bit was *not* set
>> - the addition will not result in 0. */
>> -
>> - jnz 3f /* found NUL => return pointer */
>> -
>> - subq $8, %rdx /* Adjust destlen. */
>> - movq %rax, (%rdi) /* Write value to destination. */
>> - addq $8, %rdi /* Adjust pointer. */
>> - jmp 60b /* Next iteration. */
>> -
>> - /* Do the last few bytes. %rax contains the value to write.
>> - The loop is unrolled twice. */
>> - .p2align 4
>> -3:
>> - /* Note that stpcpy needs to return with the value of the NUL
>> - byte. */
>> - movb %al, (%rdi) /* 1st byte. */
>> - testb %al, %al /* Is it NUL. */
>> - jz 4f /* yes, finish. */
>> - incq %rdi /* Increment destination. */
>> - movb %ah, (%rdi) /* 2nd byte. */
>> - testb %ah, %ah /* Is it NUL?. */
>> - jz 4f /* yes, finish. */
>> - incq %rdi /* Increment destination. */
>> - shrq $16, %rax /* Shift... */
>> - jmp 3b /* and look at next two bytes in %rax. */
>> -
>> -51:
>> - /* Search the bytes directly, checking for overflows. */
>> - incq %rsi
>> - incq %rdi
>> - decq %rdx
>> - jz HIDDEN_JUMPTARGET (__chk_fail)
>> -52:
>> - movb (%rsi), %al /* Fetch a byte */
>> - testb %al, %al /* Is it NUL? */
>> - movb %al, (%rdi) /* Store it */
>> - jnz 51b /* If it was NUL, done! */
>> -4:
>> -#ifdef USE_AS_STPCPY_CHK
>> - movq %rdi, %rax /* Destination is return value. */
>> -#else
>> - movq %r10, %rax /* Source is return value. */
>> -#endif
>> - retq
>> -
>> -50:
>> - testq %rdx, %rdx
>> - jnz 52b
>> - jmp HIDDEN_JUMPTARGET (__chk_fail)
>> -
>> -END (STRCPY_CHK)
>