This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 3/5] [Powerpc] tune/optimize memmove/wordcopy. Add helpermacro to call MERGE
- From: Will Schmidt <will_schmidt at vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Cc: willschm at us dot ibm dot com
- Date: Mon, 12 Mar 2012 16:38:26 -0500
- Subject: [PATCH 3/5] [Powerpc] tune/optimize memmove/wordcopy. Add helpermacro to call MERGE
- References: <20120312213742.28917.97709.stgit@brimstone>
[Powerpc] tune/optimize memmove/wordcopy. Add helper macro to call MERGE
Add helper macros to handle calling the MERGE function with the proper
alignments. This significantly shortens the code.
2012-03-12 Will Schmidt <will_schmidt@vnet.ibm.com>
* sysdeps/powerpc/powerpc64/power7/wordcopy.c: Add fwd_align_merge and
bwd_align_merge macros.
* sysdeps/powerpc/powerpc32/power7/wordcopy.c: Likewise.
---
sysdeps/powerpc/powerpc32/power7/wordcopy.c | 115 ++++---------
sysdeps/powerpc/powerpc64/power7/wordcopy.c | 232 +++++----------------------
2 files changed, 77 insertions(+), 270 deletions(-)
diff --git a/sysdeps/powerpc/powerpc32/power7/wordcopy.c b/sysdeps/powerpc/powerpc32/power7/wordcopy.c
index fbe1cba..9748268 100644
--- a/sysdeps/powerpc/powerpc32/power7/wordcopy.c
+++ b/sysdeps/powerpc/powerpc32/power7/wordcopy.c
@@ -66,6 +66,20 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
DSTP should be aligned for memory operations on `op_t's, but SRCP must
*not* be aligned. */
+#define fwd_align_merge(align) \
+ do \
+ { \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
+ a0 = a2; \
+ srcp += 2 * OPSIZ; \
+ dstp += 2 * OPSIZ; \
+ len -= 2; \
+ } \
+ while (len != 0);
+
void
_wordcopy_fwd_dest_aligned (dstp, srcp, len)
long int dstp;
@@ -105,49 +119,13 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
- ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(1)
break;
case 2:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
- ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(2)
break;
case 3:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
- ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(3)
break;
}
@@ -192,6 +170,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
while (len != 0);
}
+#define bwd_align_merge(align) \
+ do \
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
+ a2 = a0; \
+ len -= 2; \
+ } \
+ while (len != 0);
+
/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
before SRCP to block finishing right before DSTP with LEN `op_t'
words (not LEN bytes!). DSTP should be aligned for memory
@@ -236,52 +228,13 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
- ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(1)
break;
case 2:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
- ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(2)
break;
case 3:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
- ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(3)
break;
}
}
diff --git a/sysdeps/powerpc/powerpc64/power7/wordcopy.c b/sysdeps/powerpc/powerpc64/power7/wordcopy.c
index afdccab..1ff9c92 100644
--- a/sysdeps/powerpc/powerpc64/power7/wordcopy.c
+++ b/sysdeps/powerpc/powerpc64/power7/wordcopy.c
@@ -60,6 +60,21 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
while (len != 0);
}
+#define fwd_align_merge(align) \
+ do \
+ { \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
+ a0 = a2; \
+ srcp += 2 * OPSIZ; \
+ dstp += 2 * OPSIZ; \
+ len -= 2; \
+ } \
+ while (len != 0);
+
+
/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
DSTP should be aligned for memory operations on `op_t's, but SRCP must
@@ -104,109 +119,25 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
- ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(1)
break;
case 2:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
- ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(2)
break;
case 3:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
- ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(3)
break;
case 4:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
- ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(4)
break;
case 5:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
- ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(5)
break;
case 6:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
- ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(6)
break;
case 7:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
- ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge(7)
break;
}
@@ -251,6 +182,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
while (len != 0);
}
+#define bwd_align_merge(align) \
+ do \
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
+ a2 = a0; \
+ len -= 2; \
+ } \
+ while (len != 0);
+
/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
before SRCP to block finishing right before DSTP with LEN `op_t'
words (not LEN bytes!). DSTP should be aligned for memory
@@ -295,116 +240,25 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
- ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(1)
break;
case 2:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
- ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(2)
break;
case 3:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
- ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(3)
break;
case 4:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
- ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(4)
break;
case 5:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
- ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(5)
break;
case 6:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
- ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(6)
break;
case 7:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
- ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge(7)
break;
}
}