This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [Patch] aarch64: Thunderx specific memcpy and memmove
- From: Siddhesh Poyarekar <siddhesh at sourceware dot org>
- To: Steve Ellcey <sellcey at caviumnetworks dot com>, libc-alpha <libc-alpha at sourceware dot org>
- Cc: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
- Date: Tue, 2 May 2017 09:21:20 +0530
- Subject: Re: [Patch] aarch64: Thunderx specific memcpy and memmove
- Authentication-results: sourceware.org; auth=none
- References: <1490397926.19074.73.camel@caviumnetworks.com>
- Reply-to: siddhesh at sourceware dot org
On Saturday 25 March 2017 04:55 AM, Steve Ellcey wrote:
> If people think we should use the ThunderX version of memcpy for all
> aarch64 systems I am happy to drop this patch and create one that just
> changes memcpy.S to do the ThunderX style prefetches for all aarch64
> systems.
That could be done as an add-on if we find out that it is the case.
The patch looks good to me with the formatting fixups I have specified
inline.
Siddhesh
> 2017-03-24 Steve Ellcey <sellcey@caviumnetworks.com>
>
> * sysdeps/aarch64/memcpy.S (MEMMOVE, MEMCPY): New macros.
> (memmove): Use MEMMOVE for name.
> (memcpy): Use MEMCPY for name. Add loop with prefetching
> under USE_THUNDERX macro.
> * sysdeps/aarch64/multiarch/Makefile: New file.
> * sysdeps/aarch64/multiarch/ifunc-impl-list.c: Likewise.
> * sysdeps/aarch64/multiarch/init-arch.h: Likewise.
> * sysdeps/aarch64/multiarch/memcpy.c: Likewise.
> * sysdeps/aarch64/multiarch/memcpy_generic.S: Likewise.
> * sysdeps/aarch64/multiarch/memcpy_thunderx.S: Likewise.
> * sysdeps/aarch64/multiarch/memmove.c: Likewise.
>
>
> ifunc.patch
>
>
> diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
> index 29af8b1..74444b4 100644
> --- a/sysdeps/aarch64/memcpy.S
> +++ b/sysdeps/aarch64/memcpy.S
> @@ -59,7 +59,14 @@
> Overlapping large forward memmoves use a loop that copies backwards.
> */
>
> -ENTRY_ALIGN (memmove, 6)
> +#ifndef MEMMOVE
> +# define MEMMOVE memmove
Single char indent.
> +#endif
> +#ifndef MEMCPY
> +# define MEMCPY memcpy
Likewise.
> +#endif
> +
> +ENTRY_ALIGN (MEMMOVE, 6)
>
> DELOUSE (0)
> DELOUSE (1)
> @@ -71,9 +78,9 @@ ENTRY_ALIGN (memmove, 6)
> b.lo L(move_long)
>
> /* Common case falls through into memcpy. */
> -END (memmove)
> -libc_hidden_builtin_def (memmove)
> -ENTRY (memcpy)
> +END (MEMMOVE)
> +libc_hidden_builtin_def (MEMMOVE)
> +ENTRY (MEMCPY)
>
> DELOUSE (0)
> DELOUSE (1)
> @@ -158,10 +165,22 @@ L(copy96):
>
> .p2align 4
> L(copy_long):
> +
> +#ifdef USE_THUNDERX
> +
> + /* On thunderx, large memcpy's are helped by software prefetching.
> + This loop is identical to the one below it but with prefetching
> + instructions included. For loops that are less than 32768 bytes,
> + the prefetching does not help and slow the code down so we only
> + use the prefetching loop for the largest memcpys. */
> +
> + cmp count, #32768
> + b.lo L(copy_long_without_prefetch)
> and tmp1, dstin, 15
> bic dst, dstin, 15
> ldp D_l, D_h, [src]
> sub src, src, tmp1
> + prfm pldl1strm, [src, 384]
> add count, count, tmp1 /* Count is now 16 too large. */
> ldp A_l, A_h, [src, 16]
> stp D_l, D_h, [dstin]
> @@ -169,7 +188,10 @@ L(copy_long):
> ldp C_l, C_h, [src, 48]
> ldp D_l, D_h, [src, 64]!
> subs count, count, 128 + 16 /* Test and readjust count. */
> - b.ls 2f
> +
> +L(prefetch_loop64):
> + tbz src, #6, 1f
> + prfm pldl1strm, [src, 512]
> 1:
> stp A_l, A_h, [dst, 16]
> ldp A_l, A_h, [src, 16]
> @@ -180,12 +202,40 @@ L(copy_long):
> stp D_l, D_h, [dst, 64]!
> ldp D_l, D_h, [src, 64]!
> subs count, count, 64
> - b.hi 1b
> + b.hi L(prefetch_loop64)
> + b L(last64)
> +
> +L(copy_long_without_prefetch):
> +#endif
> +
> + and tmp1, dstin, 15
> + bic dst, dstin, 15
> + ldp D_l, D_h, [src]
> + sub src, src, tmp1
> + add count, count, tmp1 /* Count is now 16 too large. */
> + ldp A_l, A_h, [src, 16]
> + stp D_l, D_h, [dstin]
> + ldp B_l, B_h, [src, 32]
> + ldp C_l, C_h, [src, 48]
> + ldp D_l, D_h, [src, 64]!
> + subs count, count, 128 + 16 /* Test and readjust count. */
> + b.ls L(last64)
> +L(loop64):
> + stp A_l, A_h, [dst, 16]
> + ldp A_l, A_h, [src, 16]
> + stp B_l, B_h, [dst, 32]
> + ldp B_l, B_h, [src, 32]
> + stp C_l, C_h, [dst, 48]
> + ldp C_l, C_h, [src, 48]
> + stp D_l, D_h, [dst, 64]!
> + ldp D_l, D_h, [src, 64]!
> + subs count, count, 64
> + b.hi L(loop64)
>
> /* Write the last full set of 64 bytes. The remainder is at most 64
> bytes, so it is safe to always copy 64 bytes from the end even if
> there is just 1 byte left. */
> -2:
> +L(last64):
> ldp E_l, E_h, [srcend, -64]
> stp A_l, A_h, [dst, 16]
> ldp A_l, A_h, [srcend, -48]
> @@ -256,5 +306,5 @@ L(move_long):
> stp C_l, C_h, [dstin]
> 3: ret
>
> -END (memcpy)
> -libc_hidden_builtin_def (memcpy)
> +END (MEMCPY)
> +libc_hidden_builtin_def (MEMCPY)
> diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
> index e69de29..78d52c7 100644
> --- a/sysdeps/aarch64/multiarch/Makefile
> +++ b/sysdeps/aarch64/multiarch/Makefile
> @@ -0,0 +1,3 @@
> +ifeq ($(subdir),string)
> +sysdep_routines += memcpy_generic memcpy_thunderx
> +endif
> diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> index e69de29..c4f23df 100644
> --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> @@ -0,0 +1,51 @@
> +/* Enumerate available IFUNC implementations of a function. AARCH64 version.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#include <assert.h>
> +#include <string.h>
> +#include <wchar.h>
> +#include <ldsodefs.h>
> +#include <ifunc-impl-list.h>
> +#include <init-arch.h>
> +#include <stdio.h>
> +
> +/* Maximum number of IFUNC implementations. */
> +#define MAX_IFUNC 2
> +
> +size_t
> +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> + size_t max)
> +{
> + assert (max >= MAX_IFUNC);
> +
> + size_t i = 0;
> +
> + INIT_ARCH ();
> +
> + /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */
> + IFUNC_IMPL (i, name, memcpy,
> + IFUNC_IMPL_ADD (array, i, memcpy, IS_THUNDERX (midr),
> + __memcpy_thunderx)
> + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
> + IFUNC_IMPL (i, name, memmove,
> + IFUNC_IMPL_ADD (array, i, memmove, IS_THUNDERX (midr),
> + __memmove_thunderx)
> + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
> +
> + return i;
> +}
> diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
> index e69de29..e690e00 100644
> --- a/sysdeps/aarch64/multiarch/init-arch.h
> +++ b/sysdeps/aarch64/multiarch/init-arch.h
> @@ -0,0 +1,22 @@
> +/* This file is part of the GNU C Library.
One line description of the file.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#include <ldsodefs.h>
> +
> +#define INIT_ARCH() \
> + uint64_t __attribute__((unused)) midr = \
> + GLRO(dl_aarch64_cpu_features).midr_el1;
> diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
> index e69de29..4e3f251 100644
> --- a/sysdeps/aarch64/multiarch/memcpy.c
> +++ b/sysdeps/aarch64/multiarch/memcpy.c
> @@ -0,0 +1,39 @@
> +/* Multiple versions of memcpy. AARCH64 version.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +/* Define multiple versions only for the definition in libc. */
> +
> +#if IS_IN (libc)
> +/* Redefine memcpy so that the compiler won't complain about the type
> + mismatch with the IFUNC selector in strong_alias, below. */
> +# undef memcpy
> +# define memcpy __redirect_memcpy
> +# include <string.h>
> +# include <init-arch.h>
> +
> +extern __typeof (__redirect_memcpy) __libc_memcpy;
> +
> +extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
> +extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
> +
> +libc_ifunc (__libc_memcpy,
> + IS_THUNDERX (midr) ? __memcpy_thunderx : __memcpy_generic);
> +
> +#undef memcpy
Single char indent.
> +strong_alias (__libc_memcpy, memcpy);
> +#endif
> diff --git a/sysdeps/aarch64/multiarch/memcpy_generic.S b/sysdeps/aarch64/multiarch/memcpy_generic.S
> index e69de29..50e1a1c 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_generic.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_generic.S
> @@ -0,0 +1,42 @@
> +/* A Generic Optimized memcpy implementation for AARCH64.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +/* The actual memcpy and memmove code is in ../memcpy.S. If we are
> + building libc this file defines __memcpy_generic and __memmove_generic.
> + Otherwise the include of ../memcpy.S will define the normal __memcpy
> + and__memmove entry points. */
> +
> +#include <sysdep.h>
> +
> +#if IS_IN (libc)
> +
> +#define MEMCPY __memcpy_generic
> +#define MEMMOVE __memmove_generic
> +
> +/* Do not hide the generic versions of memcpy and memmove, we use them
> + internally. */
> +#undef libc_hidden_builtin_def
> +#define libc_hidden_builtin_def(name)
> +
> +/* It doesn't make sense to send libc-internal memcpy calls through a PLT. */
> + .globl __GI_memcpy; __GI_memcpy = __memcpy_generic
> + .globl __GI_memmove; __GI_memmove = __memmove_generic
Single char indent for all macro defs.
> +
> +#endif
> +
> +#include "../memcpy.S"
> diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> index e69de29..ee971c8 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> @@ -0,0 +1,32 @@
> +/* A Thunderx Optimized memcpy implementation for AARCH64.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +/* The actual thunderx optimized code is in ../memcpy.S under the USE_THUNDERX
> + ifdef. If we are not building libc then we do not build anything when
> + compiling this file and __memcpy is defined by memcpy_generic.S. */
> +
> +#include <sysdep.h>
> +
> +#if IS_IN (libc)
> +
> +#define MEMCPY __memcpy_thunderx
> +#define MEMMOVE __memmove_thunderx
> +#define USE_THUNDERX
> +#include "../memcpy.S"
Single char indent for all macro defs.
> +
> +#endif
> diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
> index e69de29..8d7a146 100644
> --- a/sysdeps/aarch64/multiarch/memmove.c
> +++ b/sysdeps/aarch64/multiarch/memmove.c
> @@ -0,0 +1,39 @@
> +/* Multiple versions of memmove. AARCH64 version.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +/* Define multiple versions only for the definition in libc. */
> +
> +#if IS_IN (libc)
> +/* Redefine memmove so that the compiler won't complain about the type
> + mismatch with the IFUNC selector in strong_alias, below. */
> +# undef memmove
> +# define memmove __redirect_memmove
> +# include <string.h>
> +# include <init-arch.h>
> +
> +extern __typeof (__redirect_memmove) __libc_memmove;
> +
> +extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden;
> +extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden;
> +
> +libc_ifunc (__libc_memmove,
> + IS_THUNDERX (midr) ? __memmove_thunderx : __memmove_generic);
> +
> +#undef memmove
Single char indent.
> +strong_alias (__libc_memmove, memmove);
> +#endif
>