This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] [BZ #18858] Implement x86-64 multiarch mempcpy in memcpy
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: GNU C Library <libc-alpha at sourceware dot org>
- Date: Mon, 28 Mar 2016 10:31:16 -0700
- Subject: Re: [PATCH] [BZ #18858] Implement x86-64 multiarch mempcpy in memcpy
- Authentication-results: sourceware.org; auth=none
- References: <20160325134152 dot GA23806 at intel dot com>
On Fri, Mar 25, 2016 at 6:41 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> Implement x86-64 multiarch mempcpy in memcpy to share most of code.
> It will reduce code size of libc.so.
>
> Tested on x86-64. Comments? Feedbacks?
Changes in bench-mempcpy output are just noises. I will check it
in this week.
> H.J.
> ---
> [BZ #18858]
> * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Remove
> mempcpy-ssse3, mempcpy-ssse3-back, mempcpy-avx-unaligned
> and mempcpy-avx512-no-vzeroupper.
> * sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S (MEMPCPY_CHK):
> New.
> (MEMPCPY): Likewise.
> * sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
> (MEMPCPY_CHK): New.
> (MEMPCPY): Likewise.
> * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S (MEMPCPY_CHK): New.
> (MEMPCPY): Likewise.
> * sysdeps/x86_64/multiarch/memcpy-ssse3.S (MEMPCPY_CHK): New.
> (MEMPCPY): Likewise.
> * sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S: Removed.
> * sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S:
> Likewise.
> * sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S: Likewise.
> * sysdeps/x86_64/multiarch/mempcpy-ssse3.S: Likewise.
> ---
> sysdeps/x86_64/multiarch/Makefile | 8 ++++----
> sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S | 18 +++++++++++++++++-
> .../x86_64/multiarch/memcpy-avx512-no-vzeroupper.S | 16 ++++++++++++++++
> sysdeps/x86_64/multiarch/memcpy-ssse3-back.S | 16 ++++++++++++++++
> sysdeps/x86_64/multiarch/memcpy-ssse3.S | 16 ++++++++++++++++
> sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S | 22 ----------------------
> .../multiarch/mempcpy-avx512-no-vzeroupper.S | 22 ----------------------
> sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S | 4 ----
> sysdeps/x86_64/multiarch/mempcpy-ssse3.S | 4 ----
> 9 files changed, 69 insertions(+), 57 deletions(-)
> delete mode 100644 sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S
> delete mode 100644 sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S
> delete mode 100644 sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
> delete mode 100644 sysdeps/x86_64/multiarch/mempcpy-ssse3.S
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index d234f4a..39c0905 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -8,10 +8,10 @@ ifeq ($(subdir),string)
> sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
> strcmp-sse2-unaligned strncmp-ssse3 \
> memcmp-sse4 memcpy-ssse3 memcpy-sse2-unaligned \
> - memcpy-avx512-no-vzeroupper mempcpy-ssse3 memmove-ssse3 \
> - memcpy-ssse3-back mempcpy-ssse3-back memmove-avx-unaligned \
> - memcpy-avx-unaligned mempcpy-avx-unaligned \
> - mempcpy-avx512-no-vzeroupper memmove-ssse3-back \
> + memcpy-avx512-no-vzeroupper memmove-ssse3 \
> + memcpy-ssse3-back memmove-avx-unaligned \
> + memcpy-avx-unaligned \
> + memmove-ssse3-back \
> memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \
> strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
> strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
> diff --git a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
> index b615d06..dd4187f 100644
> --- a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
> +++ b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
> @@ -25,11 +25,26 @@
>
> #include "asm-syntax.h"
> #ifndef MEMCPY
> -# define MEMCPY __memcpy_avx_unaligned
> +# define MEMCPY __memcpy_avx_unaligned
> # define MEMCPY_CHK __memcpy_chk_avx_unaligned
> +# define MEMPCPY __mempcpy_avx_unaligned
> +# define MEMPCPY_CHK __mempcpy_chk_avx_unaligned
> #endif
>
> .section .text.avx,"ax",@progbits
> +#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
> +ENTRY (MEMPCPY_CHK)
> + cmpq %rdx, %rcx
> + jb HIDDEN_JUMPTARGET (__chk_fail)
> +END (MEMPCPY_CHK)
> +
> +ENTRY (MEMPCPY)
> + movq %rdi, %rax
> + addq %rdx, %rax
> + jmp L(start)
> +END (MEMPCPY)
> +#endif
> +
> #if !defined USE_AS_BCOPY
> ENTRY (MEMCPY_CHK)
> cmpq %rdx, %rcx
> @@ -42,6 +57,7 @@ ENTRY (MEMCPY)
> #ifdef USE_AS_MEMPCPY
> add %rdx, %rax
> #endif
> +L(start):
> cmp $256, %rdx
> jae L(256bytesormore)
> cmp $16, %dl
> diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
> index 3d567fc..285bb83 100644
> --- a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
> +++ b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
> @@ -27,9 +27,24 @@
> #ifndef MEMCPY
> # define MEMCPY __memcpy_avx512_no_vzeroupper
> # define MEMCPY_CHK __memcpy_chk_avx512_no_vzeroupper
> +# define MEMPCPY __mempcpy_avx512_no_vzeroupper
> +# define MEMPCPY_CHK __mempcpy_chk_avx512_no_vzeroupper
> #endif
>
> .section .text.avx512,"ax",@progbits
> +#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
> +ENTRY (MEMPCPY_CHK)
> + cmpq %rdx, %rcx
> + jb HIDDEN_JUMPTARGET (__chk_fail)
> +END (MEMPCPY_CHK)
> +
> +ENTRY (MEMPCPY)
> + movq %rdi, %rax
> + addq %rdx, %rax
> + jmp L(start)
> +END (MEMPCPY)
> +#endif
> +
> #if !defined USE_AS_BCOPY
> ENTRY (MEMCPY_CHK)
> cmpq %rdx, %rcx
> @@ -42,6 +57,7 @@ ENTRY (MEMCPY)
> #ifdef USE_AS_MEMPCPY
> add %rdx, %rax
> #endif
> +L(start):
> lea (%rsi, %rdx), %rcx
> lea (%rdi, %rdx), %r9
> cmp $512, %rdx
> diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
> index 08b41e9..b4890f4 100644
> --- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
> +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
> @@ -29,6 +29,8 @@
> #ifndef MEMCPY
> # define MEMCPY __memcpy_ssse3_back
> # define MEMCPY_CHK __memcpy_chk_ssse3_back
> +# define MEMPCPY __mempcpy_ssse3_back
> +# define MEMPCPY_CHK __mempcpy_chk_ssse3_back
> #endif
>
> #define JMPTBL(I, B) I - B
> @@ -44,6 +46,19 @@
> ud2
>
> .section .text.ssse3,"ax",@progbits
> +#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
> +ENTRY (MEMPCPY_CHK)
> + cmpq %rdx, %rcx
> + jb HIDDEN_JUMPTARGET (__chk_fail)
> +END (MEMPCPY_CHK)
> +
> +ENTRY (MEMPCPY)
> + movq %rdi, %rax
> + addq %rdx, %rax
> + jmp L(start)
> +END (MEMPCPY)
> +#endif
> +
> #if !defined USE_AS_BCOPY
> ENTRY (MEMCPY_CHK)
> cmpq %rdx, %rcx
> @@ -66,6 +81,7 @@ ENTRY (MEMCPY)
> BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
> L(copy_forward):
> #endif
> +L(start):
> cmp $144, %rdx
> jae L(144bytesormore)
>
> diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
> index 95de969..1ca88c0 100644
> --- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
> +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
> @@ -29,6 +29,8 @@
> #ifndef MEMCPY
> # define MEMCPY __memcpy_ssse3
> # define MEMCPY_CHK __memcpy_chk_ssse3
> +# define MEMPCPY __mempcpy_ssse3
> +# define MEMPCPY_CHK __mempcpy_chk_ssse3
> #endif
>
> #define JMPTBL(I, B) I - B
> @@ -44,6 +46,19 @@
> ud2
>
> .section .text.ssse3,"ax",@progbits
> +#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
> +ENTRY (MEMPCPY_CHK)
> + cmpq %rdx, %rcx
> + jb HIDDEN_JUMPTARGET (__chk_fail)
> +END (MEMPCPY_CHK)
> +
> +ENTRY (MEMPCPY)
> + movq %rdi, %rax
> + addq %rdx, %rax
> + jmp L(start)
> +END (MEMPCPY)
> +#endif
> +
> #if !defined USE_AS_BCOPY
> ENTRY (MEMCPY_CHK)
> cmpq %rdx, %rcx
> @@ -66,6 +81,7 @@ ENTRY (MEMCPY)
> jmp L(copy_backward)
> L(copy_forward):
> #endif
> +L(start):
> cmp $79, %rdx
> lea L(table_less_80bytes)(%rip), %r11
> ja L(80bytesormore)
> diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S b/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S
> deleted file mode 100644
> index 241378e..0000000
> --- a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -/* mempcpy with AVX
> - Copyright (C) 2014-2016 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#define USE_AS_MEMPCPY
> -#define MEMCPY __mempcpy_avx_unaligned
> -#define MEMCPY_CHK __mempcpy_chk_avx_unaligned
> -#include "memcpy-avx-unaligned.S"
> diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S
> deleted file mode 100644
> index fcc0945..0000000
> --- a/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -/* mempcpy optimized with AVX512 for KNL hardware.
> - Copyright (C) 2016 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#define USE_AS_MEMPCPY
> -#define MEMCPY __mempcpy_avx512_no_vzeroupper
> -#define MEMCPY_CHK __mempcpy_chk_avx512_no_vzeroupper
> -#include "memcpy-avx512-no-vzeroupper.S"
> diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
> deleted file mode 100644
> index 82ffacb..0000000
> --- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
> +++ /dev/null
> @@ -1,4 +0,0 @@
> -#define USE_AS_MEMPCPY
> -#define MEMCPY __mempcpy_ssse3_back
> -#define MEMCPY_CHK __mempcpy_chk_ssse3_back
> -#include "memcpy-ssse3-back.S"
> diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3.S
> deleted file mode 100644
> index 822d98e..0000000
> --- a/sysdeps/x86_64/multiarch/mempcpy-ssse3.S
> +++ /dev/null
> @@ -1,4 +0,0 @@
> -#define USE_AS_MEMPCPY
> -#define MEMCPY __mempcpy_ssse3
> -#define MEMCPY_CHK __mempcpy_chk_ssse3
> -#include "memcpy-ssse3.S"
> --
> 2.5.5
>
--
H.J.