This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 3/4] [Powerpc] tune/optimize memmove/wordcopy. Populatepower7 wordcopy.c files.
- From: Will Schmidt <will_schmidt at vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Cc: willschm at us dot ibm dot com
- Date: Mon, 19 Mar 2012 16:02:00 -0500
- Subject: [PATCH 3/4] [Powerpc] tune/optimize memmove/wordcopy. Populatepower7 wordcopy.c files.
- References: <20120319210108.24160.63510.stgit@brimstone>
[Powerpc] tune/optimize memmove/wordcopy. Populate power7 wordcopy.c files.
Populate the power7 sysdep directories with versions of the wordcopy.c
files.
2012-03-19 Will Schmidt <will_schmidt@vnet.ibm.com>
* sysdeps/powerpc/powerpc64/power7/wordcopy.c: New file. (Copied from
sysdeps/powerpc/powerpc64/power6/wordcopy.c)
* sysdeps/powerpc/powerpc32/power7/wordcopy.c: New file. (Copied from
sysdeps/powerpc/powerpc32/power6/wordcopy.c)
* sysdeps/powerpc/memmove.c: New file. (Copied from string/memmove.c).
---
sysdeps/powerpc/memmove.c | 116 ++++++++++++++
sysdeps/powerpc/powerpc32/power7/wordcopy.c | 218 +++++++++++++++++++++++++++
sysdeps/powerpc/powerpc64/power7/wordcopy.c | 218 +++++++++++++++++++++++++++
3 files changed, 552 insertions(+), 0 deletions(-)
create mode 100644 sysdeps/powerpc/memmove.c
create mode 100644 sysdeps/powerpc/powerpc32/power7/wordcopy.c
create mode 100644 sysdeps/powerpc/powerpc64/power7/wordcopy.c
diff --git a/sysdeps/powerpc/memmove.c b/sysdeps/powerpc/memmove.c
new file mode 100644
index 0000000..0799a08
--- /dev/null
+++ b/sysdeps/powerpc/memmove.c
@@ -0,0 +1,116 @@
+/* Copy memory to memory until the specified number of bytes
+ has been copied. Overlap is handled correctly.
+ Copyright (C) 1991, 1995, 1996, 1997, 2003, 2012
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <string.h>
+#include <memcopy.h>
+#include <pagecopy.h>
+
+/* All this is so that bcopy.c can #include
+ this file after defining some things. */
+#ifndef a1
+#define a1 dest /* First arg is DEST. */
+#define a1const
+#define a2 src /* Second arg is SRC. */
+#define a2const const
+#undef memmove
+#endif
+#if !defined(RETURN) || !defined(rettype)
+#define RETURN(s) return (s) /* Return DEST. */
+#define rettype void *
+#endif
+
+#ifndef MEMMOVE
+#define MEMMOVE memmove
+#endif
+
+rettype
+MEMMOVE (a1, a2, len)
+ a1const void *a1;
+ a2const void *a2;
+ size_t len;
+{
+ unsigned long int dstp = (long int) dest;
+ unsigned long int srcp = (long int) src;
+
+ /* This test makes the forward copying code be used whenever possible.
+ Reduces the working set. */
+ if (dstp - srcp >= len) /* *Unsigned* compare! */
+ {
+ /* Copy from the beginning to the end. */
+
+ /* If there not too few bytes to copy, use word copy. */
+ if (len >= OP_T_THRES)
+ {
+ /* Copy just a few bytes to make DSTP aligned. */
+ len -= (-dstp) % OPSIZ;
+ BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);
+
+ /* Copy whole pages from SRCP to DSTP by virtual address
+ manipulation, as much as possible. */
+
+ PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);
+
+ /* Copy from SRCP to DSTP taking advantage of the known
+ alignment of DSTP. Number of bytes remaining is put
+ in the third argument, i.e. in LEN. This number may
+ vary from machine to machine. */
+
+ WORD_COPY_FWD (dstp, srcp, len, len);
+
+ /* Fall out and copy the tail. */
+ }
+
+ /* There are just a few bytes to copy. Use byte memory operations. */
+ BYTE_COPY_FWD (dstp, srcp, len);
+ }
+ else
+ {
+ /* Copy from the end to the beginning. */
+ srcp += len;
+ dstp += len;
+
+ /* If there not too few bytes to copy, use word copy. */
+ if (len >= OP_T_THRES)
+ {
+ /* Copy just a few bytes to make DSTP aligned. */
+ len -= dstp % OPSIZ;
+ BYTE_COPY_BWD (dstp, srcp, dstp % OPSIZ);
+
+ /* Copy from SRCP to DSTP taking advantage of the known
+ alignment of DSTP. Number of bytes remaining is put
+ in the third argument, i.e. in LEN. This number may
+ vary from machine to machine. */
+
+ WORD_COPY_BWD (dstp, srcp, len, len);
+
+ /* Fall out and copy the tail. */
+ }
+
+ /* There are just a few bytes to copy. Use byte memory operations. */
+ BYTE_COPY_BWD (dstp, srcp, len);
+ }
+
+ RETURN (dest);
+}
+#ifndef memmove
+libc_hidden_builtin_def (memmove)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power7/wordcopy.c b/sysdeps/powerpc/powerpc32/power7/wordcopy.c
new file mode 100644
index 0000000..af35e98
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/wordcopy.c
@@ -0,0 +1,218 @@
+/* _memcopy.c -- subroutines for memory copy functions.
+ Copyright (C) 1991, 1996, 2006, 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+ Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
+
+#include <stddef.h>
+#include <memcopy.h>
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
+
+void
+_wordcopy_fwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ do
+ {
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = a0;
+ ((op_t *) dstp)[1] = a1;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
+ *not* be aligned. */
+
+#define fwd_align_merge(align) \
+ do \
+ { \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
+ a0 = a2; \
+ srcp += 2 * OPSIZ; \
+ dstp += 2 * OPSIZ; \
+ len -= 2; \
+ } \
+ while (len != 0);
+
+void
+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+ int align;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ align = srcp % OPSIZ;
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a0 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+
+ if (len == 1)
+ return;
+
+ a0 = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ fwd_align_merge(align)
+
+}
+
+/* _wordcopy_bwd_aligned -- Copy block finishing right before
+ SRCP to block finishing right before DSTP with LEN `op_t' words
+ (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
+ operations on `op_t's. */
+
+void
+_wordcopy_bwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ len -= 1;
+ }
+
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = a1;
+ ((op_t *) dstp)[0] = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+#define bwd_align_merge(align) \
+ do \
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
+ a2 = a0; \
+ len -= 2; \
+ } \
+ while (len != 0);
+
+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
+ before SRCP to block finishing right before DSTP with LEN `op_t'
+ words (not LEN bytes!). DSTP should be aligned for memory
+ operations on `op_t', but SRCP must *not* be aligned. */
+
+void
+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+ int align;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ align = srcp % OPSIZ;
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make srcp aligned by rounding it down to the beginning of the op_t
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a2 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
+
+ if (len == 1)
+ return;
+
+ a2 = a1;
+ len -= 1;
+ }
+
+ bwd_align_merge(align)
+}
diff --git a/sysdeps/powerpc/powerpc64/power7/wordcopy.c b/sysdeps/powerpc/powerpc64/power7/wordcopy.c
new file mode 100644
index 0000000..f4b80dd
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/wordcopy.c
@@ -0,0 +1,218 @@
+/* _memcopy.c -- subroutines for memory copy functions.
+ Copyright (C) 1991, 1996, 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
+
+#include <stddef.h>
+#include <memcopy.h>
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
+
+void
+_wordcopy_fwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ do
+ {
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = a0;
+ ((op_t *) dstp)[1] = a1;
+
+ srcp += 2 * OPSIZ;
+ dstp += 2 * OPSIZ;
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+#define fwd_align_merge(align) \
+ do \
+ { \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
+ a0 = a2; \
+ srcp += 2 * OPSIZ; \
+ dstp += 2 * OPSIZ; \
+ len -= 2; \
+ } \
+ while (len != 0);
+
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
+ *not* be aligned. */
+
+void
+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+ int align;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ align = srcp % OPSIZ;
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a0 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2);
+
+ if (len == 1)
+ return;
+
+ a0 = a1;
+ srcp += OPSIZ;
+ dstp += OPSIZ;
+ len -= 1;
+ }
+
+ fwd_align_merge(align)
+
+}
+
+/* _wordcopy_bwd_aligned -- Copy block finishing right before
+ SRCP to block finishing right before DSTP with LEN `op_t' words
+ (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
+ operations on `op_t's. */
+
+void
+_wordcopy_bwd_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1;
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ ((op_t *) dstp)[0] = ((op_t *) srcp)[0];
+
+ if (len == 1)
+ return;
+ len -= 1;
+ }
+
+ do
+ {
+ srcp -= 2 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+
+ a1 = ((op_t *) srcp)[1];
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[1] = a1;
+ ((op_t *) dstp)[0] = a0;
+
+ len -= 2;
+ }
+ while (len != 0);
+}
+
+#define bwd_align_merge(align) \
+ do \
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
+ a2 = a0; \
+ len -= 2; \
+ } \
+ while (len != 0);
+
+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
+ before SRCP to block finishing right before DSTP with LEN `op_t'
+ words (not LEN bytes!). DSTP should be aligned for memory
+ operations on `op_t', but SRCP must *not* be aligned. */
+
+void
+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
+ long int dstp;
+ long int srcp;
+ size_t len;
+{
+ op_t a0, a1, a2;
+ int sh_1, sh_2;
+ int align;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ align = srcp % OPSIZ;
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make srcp aligned by rounding it down to the beginning of the op_t
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+ a2 = ((op_t *) srcp)[0];
+
+ if (len & 1)
+ {
+ srcp -= OPSIZ;
+ dstp -= OPSIZ;
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
+
+ if (len == 1)
+ return;
+
+ a2 = a1;
+ len -= 1;
+ }
+
+ bwd_align_merge(align)
+}