This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 5/5] [Powerpc] tune/optimize memmove/wordcopy. preload srcpvalues
- From: Will Schmidt <will_schmidt at vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Cc: willschm at us dot ibm dot com
- Date: Mon, 12 Mar 2012 16:38:46 -0500
- Subject: [PATCH 5/5] [Powerpc] tune/optimize memmove/wordcopy. preload srcpvalues
- References: <20120312213742.28917.97709.stgit@brimstone>
[Powerpc] tune/optimize memmove/wordcopy. preload srcp values
Rework the while loops to (pre-)load the srcp values ahead of time while
it is safe to do so (length != 0 ).
2012-03-12 Will Schmidt <will_schmidt@vnet.ibm.com>
* sysdeps/powerpc/powerpc64/power7/wordcopy.c: Load srcp values
ahead of time if safe.
* sysdeps/powerpc/powerpc32/power7/wordcopy.c: Likewise.
---
sysdeps/powerpc/powerpc32/power7/wordcopy.c | 40 ++++++++++++-----
sysdeps/powerpc/powerpc64/power7/wordcopy.c | 65 +++++++++++++++++----------
2 files changed, 68 insertions(+), 37 deletions(-)
diff --git a/sysdeps/powerpc/powerpc32/power7/wordcopy.c b/sysdeps/powerpc/powerpc32/power7/wordcopy.c
index 00bd444..7040b5c 100644
--- a/sysdeps/powerpc/powerpc32/power7/wordcopy.c
+++ b/sysdeps/powerpc/powerpc32/power7/wordcopy.c
@@ -67,18 +67,25 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
*not* be aligned. */
#define fwd_align_merge(align) \
+ { \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
do \
{ \
- a1 = ((op_t *) srcp)[1]; \
- a2 = ((op_t *) srcp)[2]; \
((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
- a0 = a2; \
- srcp += 2 * OPSIZ; \
- dstp += 2 * OPSIZ; \
len -= 2; \
+ if (len) \
+ { \
+ srcp += 2 * OPSIZ; \
+ dstp += 2 * OPSIZ; \
+ a0 = a2; \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
+ } \
} \
- while (len != 0);
+ while (len != 0); \
+ }
void
_wordcopy_fwd_dest_aligned (dstp, srcp, len)
@@ -168,18 +175,27 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
}
#define bwd_align_merge(align) \
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
do \
{ \
- srcp -= 2 * OPSIZ; \
- dstp -= 2 * OPSIZ; \
- a1 = ((op_t *) srcp)[1]; \
- a0 = ((op_t *) srcp)[0]; \
((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
- a2 = a0; \
len -= 2; \
+ if (len) \
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a2 = a0; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
+ } \
} \
- while (len != 0);
+ while (len != 0); \
+ }
/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
before SRCP to block finishing right before DSTP with LEN `op_t'
diff --git a/sysdeps/powerpc/powerpc64/power7/wordcopy.c b/sysdeps/powerpc/powerpc64/power7/wordcopy.c
index 1c7b99a..6c5bdc7 100644
--- a/sysdeps/powerpc/powerpc64/power7/wordcopy.c
+++ b/sysdeps/powerpc/powerpc64/power7/wordcopy.c
@@ -61,19 +61,25 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
}
#define fwd_align_merge(align) \
- do \
- { \
- a1 = ((op_t *) srcp)[1]; \
- a2 = ((op_t *) srcp)[2]; \
- ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
- ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
- a0 = a2; \
- srcp += 2 * OPSIZ; \
- dstp += 2 * OPSIZ; \
- len -= 2; \
- } \
- while (len != 0);
-
+ { \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
+ do \
+ { \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
+ len -= 2; \
+ if (len) \
+ { \
+ srcp += 2 * OPSIZ; \
+ dstp += 2 * OPSIZ; \
+ a0 = a2; \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
+ } \
+ } \
+ while (len != 0); \
+ }
/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
@@ -188,18 +194,27 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
}
#define bwd_align_merge(align) \
- do \
- { \
- srcp -= 2 * OPSIZ; \
- dstp -= 2 * OPSIZ; \
- a1 = ((op_t *) srcp)[1]; \
- a0 = ((op_t *) srcp)[0]; \
- ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
- ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
- a2 = a0; \
- len -= 2; \
- } \
- while (len != 0);
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
+ do \
+ { \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
+ len -= 2; \
+ if (len) \
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a2 = a0; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
+ } \
+ } \
+ while (len != 0); \
+ }
/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
before SRCP to block finishing right before DSTP with LEN `op_t'