This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 6/7] stdlib: Optimization qsort{_r} swap implementation


This patchs adds a optimized swap operation on qsort based in previous
msort one.  Instead of byte operation, three variants are provided:

  1. Using uint32_t loads and stores.
  2. Using uint64_t loads and stores.
  3. Generic one with a temporary buffer and memcpy/mempcpy.

The 1. and 2. option are selected only either if architecture defines
_STRING_ARCH_unaligned or if base pointer is aligned to required type.
This is due based on data for bench-qsort, usually programs calls
qsort with array with multiple of machine word as element size.

Benchmarking shows an increase performance:

Results for member size 4
  Sorted
  nmemb   |      base |   patched | diff
        32|      1401 |      1958 | 39.76
      4096|    351333 |    368533 | 4.90
     32768|   3369386 |   3131712 | -7.05
    524288|  63192972 |  59807494 | -5.36

  MostlySorted
  nmemb   |      base |   patched | diff
        32|      2391 |      2061 | -13.80
      4096|   1124074 |    961816 | -14.43
     32768|  11196607 |   9410438 | -15.95
    524288| 215908169 | 185586732 | -14.04

  Unsorted
  nmemb   |      base |   patched | diff
        32|      4993 |      2021 | -59.52
      4096|   1113860 |    963126 | -13.53
     32768|  11251293 |   9518795 | -15.40
    524288| 217252237 | 185072278 | -14.81

Results for member size 8
  Sorted
  nmemb   |      base |   patched | diff
        32|      1296 |      1267 | -2.24
      4096|    359418 |    334852 | -6.83
     32768|   3535229 |   3345157 | -5.38
    524288|  69847251 |  67029358 | -4.03

  MostlySorted
  nmemb   |      base |   patched | diff
        32|      2745 |      2340 | -14.75
      4096|   1222082 |   1014314 | -17.00
     32768|  12244800 |   9924706 | -18.95
    524288| 241557971 | 196898760 | -18.49

  Unsorted
  nmemb   |      base |   patched | diff
        32|      2972 |      2389 | -19.62
      4096|   1314861 |   1024052 | -22.12
     32768|  12397909 |  10120848 | -18.37
    524288| 241789262 | 193414824 | -20.01

Results for member size 32
  Sorted
  nmemb   |      base |   patched | diff
        32|      1305 |      1287 | -1.38
      4096|    346332 |    347979 | 0.48
     32768|   3458244 |   3408058 | -1.45
    524288|  72793445 |  69973719 | -3.87

  MostlySorted
  nmemb   |      base |   patched | diff
        32|      5435 |      4890 | -10.03
      4096|   2032260 |   1688556 | -16.91
     32768|  19909035 |  16419992 | -17.52
    524288| 390339319 | 325921585 | -16.50

  Unsorted
  nmemb   |      base |   patched | diff
        32|      5833 |      5351 | -8.26
      4096|   2022531 |   1724961 | -14.71
     32768|  19842888 |  16588545 | -16.40
    524288| 388838382 | 324102703 | -16.65

Checked on x86_64-linux-gnu.

	[BZ #19305].
	* stdlib/qsort.c (SWAP): Remove.
	(check_alignment, swap_u32, swap_u64, swap_generic,
	select_swap_func): New functions.
	(__qsort_r):
---
 stdlib/qsort.c | 77 ++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 59 insertions(+), 18 deletions(-)

diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index b3a5102..2194003 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -23,20 +23,59 @@
 #include <limits.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>
 
-/* Byte-wise swap two items of size SIZE. */
-#define SWAP(a, b, size)						      \
-  do									      \
-    {									      \
-      size_t __size = (size);						      \
-      char *__a = (a), *__b = (b);					      \
-      do								      \
-	{								      \
-	  char __tmp = *__a;						      \
-	  *__a++ = *__b;						      \
-	  *__b++ = __tmp;						      \
-	} while (--__size > 0);						      \
-    } while (0)
+/* Swap SIZE bytes between addresses A and B.  Helper to generic types
+   are provided as an optimization.  */
+
+typedef void (*swap_t)(void *, void *, size_t);
+
+static inline bool
+check_alignment (const void *base, size_t align)
+{
+  return _STRING_ARCH_unaligned || ((uintptr_t)base % (align - 1)) == 0;
+}
+
+static void
+swap_u32 (void *a, void *b, size_t size)
+{
+  uint32_t tmp = *(uint32_t*) a;
+  *(uint32_t*) a = *(uint32_t*) b;
+  *(uint32_t*) b = tmp;
+}
+
+static void
+swap_u64 (void *a, void *b, size_t size)
+{
+  uint64_t tmp = *(uint64_t*) a;
+  *(uint64_t*) a = *(uint64_t*) b;
+  *(uint64_t*) b = tmp;
+}
+
+static inline void
+swap_generic (void *a, void *b, size_t size)
+{
+  unsigned char tmp[128];
+  do
+    {
+      size_t s = size > sizeof (tmp) ? sizeof (tmp) : size;
+      memcpy (tmp, a, s);
+      a = __mempcpy (a, b, s);
+      b = __mempcpy (b, tmp, s);
+      size -= s;
+    }
+  while (size > 0);
+}
+
+static inline swap_t
+select_swap_func (const void *base, size_t size)
+{
+  if (size == 4 && check_alignment (base, 4))
+    return swap_u32;
+  else if (size == 8 && check_alignment (base, 8))
+    return swap_u64;
+  return swap_generic;
+}
 
 /* Discontinue quicksort algorithm when partition gets below this size.
    This particular magic number was chosen to work best on a Sun 4/260. */
@@ -96,6 +135,8 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size,
     /* Avoid lossage with unsigned arithmetic below.  */
     return;
 
+  swap_t swap = select_swap_func (pbase, size);
+
   if (total_elems > MAX_THRESH)
     {
       char *lo = base_ptr;
@@ -119,13 +160,13 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size,
 	  char *mid = lo + size * ((hi - lo) / size >> 1);
 
 	  if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
-	    SWAP (mid, lo, size);
+	    swap (mid, lo, size);
 	  if ((*cmp) ((void *) hi, (void *) mid, arg) < 0)
-	    SWAP (mid, hi, size);
+	    swap (mid, hi, size);
 	  else
 	    goto jump_over;
 	  if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
-	    SWAP (mid, lo, size);
+	    swap (mid, lo, size);
 	jump_over:;
 
 	  left_ptr  = lo + size;
@@ -144,7 +185,7 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size,
 
 	      if (left_ptr < right_ptr)
 		{
-		  SWAP (left_ptr, right_ptr, size);
+		  swap (left_ptr, right_ptr, size);
 		  if (mid == left_ptr)
 		    mid = right_ptr;
 		  else if (mid == right_ptr)
@@ -216,7 +257,7 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size,
         tmp_ptr = run_ptr;
 
     if (tmp_ptr != base_ptr)
-      SWAP (tmp_ptr, base_ptr, size);
+      swap (tmp_ptr, base_ptr, size);
 
     /* Insertion sort, running from left-hand-side up to right-hand-side.  */
 
-- 
2.7.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]