This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] powerpc64: strcpy optimization for unaligned string



On 12/19/2014 02:43 AM, OndÅej BÃlka wrote:
On Wed, Dec 17, 2014 at 09:34:53PM +0530, Rajalakshmi Srinivasaraghavan wrote:

This patch optimizes strcpy for ppc64 for unaligned source or
destination address. The source or destination address is aligned
to doubleword and data is shifted based on the alignment and
added with the previous loaded data to be written as a doubleword.
For each load, cmpb instruction is used for faster null check.

More combination of unaligned inputs is also added in benchtest
to measure the improvement.The new optimization shows 2 to 80% of
performance improvement for longer string though it does not show
big difference on string size less than 16 due to additional checks.

This patch is tested on powerpc64 BE and LE and I have also attached
the benchtest result.

As I wrote that benchtests are suspect first retest what happens if you
do not always call strcpy with same input and output buffer. What
diffence that makes in benchmark?

I applied this patch with and without my optimization and I
could not see any decrease in performance. Attached the results.
diff --git a/benchtests/bench-strcpy.c b/benchtests/bench-strcpy.c
index c3ab4cf..0329f60 100644
--- a/benchtests/bench-strcpy.c
+++ b/benchtests/bench-strcpy.c
@@ -71,25 +71,25 @@ SIMPLE_STRCPY (CHAR *dst, const CHAR *src)
  typedef CHAR *(*proto_t) (CHAR *, const CHAR *);

  static void
-do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
+do_one_test (impl_t *impl, CHAR **dst, CHAR **src,
  	     size_t len __attribute__((unused)))
  {
    size_t i, iters = INNER_LOOP_ITERS;
    timing_t start, stop, cur;

-  if (CALL (impl, dst, src) != STRCPY_RESULT (dst, len))
+  if (CALL (impl, dst[0], src[0]) != STRCPY_RESULT (dst[0], len[0]))
Modified it as

 if (CALL (impl, dst[0], src[0]) != STRCPY_RESULT (dst[0], len))

      {
        error (0, 0, "Wrong result in function %s %p %p", impl->name,
-	     CALL (impl, dst, src), STRCPY_RESULT (dst, len));
+	     CALL (impl, dst[0], src[0]), STRCPY_RESULT (dst[0], len));
        ret = 1;
        return;
      }

-  if (STRCMP (dst, src) != 0)
+  if (STRCMP (dst[0], src[0]) != 0)
      {
        error (0, 0,
  	     "Wrong result in function %s dst \"%" sfmt "\" src \"%" sfmt "\"",
-	     impl->name, dst, src);
+	     impl->name, dst[0], src[0]);
        ret = 1;
        return;
      }
@@ -97,7 +97,7 @@ do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
    TIMING_NOW (start);
    for (i = 0; i < iters; ++i)
      {
-	  CALL (impl, dst, src);
+	  CALL (impl, dst[i % 16], src[i % 16]);
      }
    TIMING_NOW (stop);

@@ -109,8 +109,8 @@ do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
  static void
  do_test (size_t align1, size_t align2, size_t len, int max_char)
  {
-  size_t i;
-  CHAR *s1, *s2;
+  size_t i, j;
+  CHAR **s1, **s2;
  /* For wcscpy: align1 and align2 here mean alignment not in bytes,
     but in wchar_ts, in bytes it will equal to align * (sizeof (wchar_t))
     len for wcschr here isn't in bytes but it's number of wchar_t symbols.  */
@@ -122,12 +122,17 @@ do_test (size_t align1, size_t align2, size_t len, int max_char)
    if ((align2 + len) * sizeof(CHAR) >= page_size)
      return;

-  s1 = (CHAR *) (buf1) + align1;
-  s2 = (CHAR *) (buf2) + align2;
+  s1 = calloc (sizeof (char *), 16);
+  s2 = calloc (sizeof (char *), 16);
+  for (j = 0; j < 16; j++)
+    {
+      s1[j] = ((CHAR *) calloc (align1 + len + 1, sizeof (CHAR))) + align1;
+      s2[j] = ((CHAR *) calloc (align2 + len + 1, sizeof (CHAR))) + align2;

-  for (i = 0; i < len; i++)
-    s1[i] = 32 + 23 * i % (max_char - 32);
-  s1[len] = 0;
+      for (i = 0; i < len; i++)
+        s1[j][i] = 32 + 23 * i % (max_char - 32);
+      s1[j][len] = 0;
+    }

    printf ("Length %4zd, alignments in bytes %2zd/%2zd:", len, align1 * sizeof(CHAR), align2 * sizeof(CHAR));




--
Thanks
Rajalakshmi S

Attachment: strcpy_with_optimization.out
Description: Text document

Attachment: strcpy_without_optimization.out
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]