[PATCH] Implement introsort for qsort
Kuan-Wei Chiu
visitorckw@gmail.com
Sat Dec 30 19:59:40 GMT 2023
Enhances the qsort implementation by introducing introsort to address
the worst-case time complexity of O(n^2) associated with quicksort.
Introsort is utilized to switch to heapsort when quicksort recursion
depth becomes excessive, ensuring a worst-case time complexity of
O(n log n).
The heapsort implementation adopts a bottom-up approach, significantly
reducing the number of required comparisons and enhancing overall
efficiency.
Refs:
Introspective Sorting and Selection Algorithms
David R. Musser
Software—Practice & Experience, 27(8); Pages 983–993, Aug 1997
https://dl.acm.org/doi/10.5555/261387.261395
A killer adversary for quicksort
M. D. McIlroy
Software—Practice & Experience, 29(4); Pages 341–344, 10 April 1999
https://dl.acm.org/doi/10.5555/311868.311871
BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
QUICKSORT (if n is not very small)
Ingo Wegener
Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
https://dl.acm.org/doi/10.5555/162625.162643
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
To assess the performance of the new introsort and the old quicksort in
worst-case scenarios, we examined the number of comparisons required
for sorting based on the paper "A killer adversary for quicksort."
Before the patch:
n = 1000, cmp_count = 100853
n = 2000, cmp_count = 395991
n = 3000, cmp_count = 885617
n = 4000, cmp_count = 1569692
n = 5000, cmp_count = 2448183
n = 6000, cmp_count = 3521140
n = 7000, cmp_count = 4788493
n = 8000, cmp_count = 6250342
n = 9000, cmp_count = 7906610
n = 10000, cmp_count = 9757353
After the patch:
n = 1000, cmp_count = 27094
n = 2000, cmp_count = 61500
n = 3000, cmp_count = 100529
n = 4000, cmp_count = 136538
n = 5000, cmp_count = 182698
n = 6000, cmp_count = 221120
n = 7000, cmp_count = 259933
n = 8000, cmp_count = 299058
n = 9000, cmp_count = 356405
n = 10000, cmp_count = 397723
newlib/libc/search/qsort.c | 153 +++++++++++++++++++++++++++++++++++--
1 file changed, 147 insertions(+), 6 deletions(-)
diff --git a/newlib/libc/search/qsort.c b/newlib/libc/search/qsort.c
index b53400aa8..61db73746 100644
--- a/newlib/libc/search/qsort.c
+++ b/newlib/libc/search/qsort.c
@@ -65,6 +65,7 @@ PORTABILITY
#include <_ansi.h>
#include <sys/cdefs.h>
#include <stdlib.h>
+#include <limits.h>
#ifndef __GNUC__
#define inline
@@ -145,6 +146,130 @@ __unused
:(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
}
+/*
+ * Sifts the element at index 'i' down into the heap.
+ *
+ * This variant of siftdown follows a bottom-up approach, reducing the overall
+ * number of comparisons. The algorithm identifies the path for the element to
+ * sift down, reaching the leaves with only one comparison per level.
+ * Subsequently, it backtracks to determine the appropriate position to insert
+ * the target element.
+ *
+ * As elements tend to sift down closer to the leaves, this approach minimizes
+ * the number of comparisons compared to the traditional two per level descent.
+ * On average, it uses slightly more than half as many comparisons, with a
+ * worst-case scenario of 3/4th the comparisons.
+ */
+#if defined(I_AM_QSORT_R)
+static inline void
+__bsd_siftdown_r (void *a,
+ size_t i,
+ size_t n,
+ size_t es,
+ int swaptype,
+ void *thunk,
+ cmp_t *cmp)
+#elif defined(I_AM_GNU_QSORT_R)
+static inline void
+siftdown_r (void *a,
+ size_t i,
+ size_t n,
+ size_t es,
+ int swaptype,
+ cmp_t *cmp,
+ void *thunk)
+#else
+#define thunk NULL
+static inline void
+siftdown (void *a,
+ size_t i,
+ size_t n,
+ size_t es,
+ int swaptype,
+ cmp_t *cmp)
+#endif
+{
+ size_t j, k;
+
+ /* Find the sift-down path all the way to the leaves. */
+ for (j = i; k = 2 * j + 1, k + 1 < n;)
+ j = CMP(thunk, (char *) a + k * es, (char *) a + (k + 1) * es) > 0 ? k : (k + 1);
+
+ /* Special case for the last leaf with no sibling. */
+ if (k + 1 == n)
+ j = k;
+
+ /* Backtrack to the correct location. */
+ while (i != j && CMP(thunk, (char *) a + i * es, (char *) a + j * es) > 0)
+ j = (j - 1) / 2;
+
+ /* Shift the element into its correct place. */
+ k = j;
+ while (i != j) {
+ j = (j - 1) / 2;
+ swap((char *) a + j * es, (char *) a + k * es);
+ }
+}
+
+/*
+ * Heapsort is employed to achieve O(n log n) worst-case time complexity and
+ * O(1) additional space complexity for introsort implementation. When
+ * quicksort recursion becomes too deep, switching to heapsort helps maintain
+ * efficiency.
+ */
+#if defined(I_AM_QSORT_R)
+static void
+__bsd_heapsort_r (void *a,
+ size_t n,
+ size_t es,
+ int swaptype,
+ void *thunk,
+ cmp_t *cmp)
+#elif defined(I_AM_GNU_QSORT_R)
+static void
+heapsort_r (void *a,
+ size_t n,
+ size_t es,
+ int swaptype,
+ cmp_t *cmp,
+ void *thunk)
+#else
+#define thunk NULL
+static void
+heapsort (void *a,
+ size_t n,
+ size_t es,
+ int swaptype,
+ cmp_t *cmp)
+#endif
+{
+ size_t i = n / 2;
+
+ /* Build a max heap. */
+ while(i) {
+ --i;
+#if defined(I_AM_QSORT_R)
+ __bsd_siftdown_r(a, i, n, es, swaptype, thunk, cmp);
+#elif defined(I_AM_GNU_QSORT_R)
+ siftdown_r(a, i, n, es, swaptype, cmp, thunk);
+#else
+ siftdown(a, i, n, es, swaptype, cmp);
+#endif
+ }
+
+ /* Extract elements from the heap one by one. */
+ for(i = n - 1; i; i--) {
+ swap((char *) a, (char *) a + i * es);
+#if defined(I_AM_QSORT_R)
+ __bsd_siftdown_r(a, 0, i, es, swaptype, thunk, cmp);
+#elif defined(I_AM_GNU_QSORT_R)
+ siftdown_r(a, 0, i, es, swaptype, cmp, thunk);
+#else
+ siftdown(a, 0, i, es, swaptype, cmp);
+#endif
+ }
+}
+
/*
* Classical function call recursion wastes a lot of stack space. Each
* recursion level requires a full stack frame comprising all local variables
@@ -189,7 +314,9 @@ qsort (void *a,
int cmp_result;
int swaptype, swap_cnt;
size_t recursion_level = 0;
- struct { void *a; size_t n; } parameter_stack[PARAMETER_STACK_LEVELS];
+ size_t current_level = 0;
+ size_t max_level = 2 * (sizeof(size_t) * CHAR_BIT - 1 - __builtin_clzl(n));
+ struct { void *a; size_t n; size_t level; } parameter_stack[PARAMETER_STACK_LEVELS];
SWAPINIT(a, es);
loop: swap_cnt = 0;
@@ -202,6 +329,18 @@ loop: swap_cnt = 0;
goto pop;
}
+ /* Switch to heapsort when the current level exceeds the maximum level. */
+ if(++current_level > max_level) {
+#if defined(I_AM_QSORT_R)
+ __bsd_heapsort_r(a, n, es, swaptype, thunk, cmp);
+#elif defined(I_AM_GNU_QSORT_R)
+ heapsort_r(a, n, es, swaptype, thunk, cmp);
+#else
+ heapsort(a, n, es, swaptype, cmp);
+#endif
+ goto pop;
+ }
+
/* Select a pivot element, move it to the left. */
pm = (char *) a + (n / 2) * es;
if (n > 7) {
@@ -310,6 +449,7 @@ loop: swap_cnt = 0;
*/
parameter_stack[recursion_level].a = a;
parameter_stack[recursion_level].n = n / es;
+ parameter_stack[recursion_level].level = current_level;
recursion_level++;
a = pa;
n = r / es;
@@ -318,15 +458,15 @@ loop: swap_cnt = 0;
else {
/*
* The parameter_stack array is full. The smaller part
- * is sorted using function call recursion. The larger
- * part will be sorted after the function call returns.
+ * is sorted using heapsort. The larger part will be
+ * sorted after the function call returns.
*/
#if defined(I_AM_QSORT_R)
- __bsd_qsort_r(pa, r / es, es, thunk, cmp);
+ __bsd_heapsort_r(pa, r / es, es, swaptype, thunk, cmp);
#elif defined(I_AM_GNU_QSORT_R)
- qsort_r(pa, r / es, es, cmp, thunk);
+ heapsort_r(pa, r / es, es, swaptype, cmp, thunk);
#else
- qsort(pa, r / es, es, cmp);
+ heapsort(pa, r / es, es, swaptype, cmp);
#endif
}
}
@@ -340,6 +480,7 @@ pop:
recursion_level--;
a = parameter_stack[recursion_level].a;
n = parameter_stack[recursion_level].n;
+ current_level = parameter_stack[recursion_level].level;
goto loop;
}
}
--
2.25.1
More information about the Newlib
mailing list