[PATCH 1/2] Ensure qsort recursion depth is bounded

Corinna Vinschen vinschen@redhat.com
Tue Mar 13 12:53:00 GMT 2018


On Mar 12 15:44, Håkan Lindqvist wrote:
> From 9d2f1fc59b8338f54c13dba4dac01d8751ceb8df Mon Sep 17 00:00:00 2001
> From: Hakan Lindqvist <hakan.lindqvist@enea.com>
> Date: Mon, 12 Mar 2018 13:51:07 +0100
> Subject: [PATCH 1/2] Ensure qsort recursion depth is bounded
> 
> The qsort algorithm splits the input array in three parts. The
> left and right parts may need further sorting. One of them is
> sorted by recursion, the other by iteration. This update ensures
> that it is the smaller part that is chosen for recursion.
> 
> By choosing the smaller part, each recursion level will handle
> less than half the array of the previous recursion level. Hence
> the recursion depth is bounded to be less than log2(n) i.e. 1
> level per significant bit in the array size n.
> 
> The update also includes code comments explaining the algorithm.

The patch looks good at first glance.  Would you mind to outline how
you tested it?


Thanks,
Corinna

> ---
>  newlib/libc/search/qsort.c | 68 ++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 56 insertions(+), 12 deletions(-)
> 
> diff --git a/newlib/libc/search/qsort.c b/newlib/libc/search/qsort.c
> index db3f589..4dc61be 100644
> --- a/newlib/libc/search/qsort.c
> +++ b/newlib/libc/search/qsort.c
> @@ -173,15 +173,18 @@ qsort (void *a,
>  	int cmp_result;
>  	int swaptype, swap_cnt;
>  
> -loop:	SWAPINIT(a, es);
> -	swap_cnt = 0;
> +	SWAPINIT(a, es);
> +loop:	swap_cnt = 0;
>  	if (n < 7) {
> +		/* Short arrays are insertion sorted. */
>  		for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
>  			for (pl = pm; pl > (char *) a && CMP(thunk, pl - es, pl) > 0;
>  			     pl -= es)
>  				swap(pl, pl - es);
>  		return;
>  	}
> +
> +	/* Select a pivot element, move it to the left. */
>  	pm = (char *) a + (n / 2) * es;
>  	if (n > 7) {
>  		pl = a;
> @@ -195,11 +198,17 @@ loop:	SWAPINIT(a, es);
>  		pm = med3(pl, pm, pn, cmp, thunk);
>  	}
>  	swap(a, pm);
> -	pa = pb = (char *) a + es;
>  
> +	/*
> +	 * Sort the array relative the pivot in four ranges as follows:
> +	 * { elems == pivot, elems < pivot, elems > pivot, elems == pivot }
> +	 */
> +	pa = pb = (char *) a + es;
>  	pc = pd = (char *) a + (n - 1) * es;
>  	for (;;) {
> +		/* Scan left to right stopping at first element > pivot. */
>  		while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
> +			/* Move elements == pivot to the left (to pa) */
>  			if (cmp_result == 0) {
>  				swap_cnt = 1;
>  				swap(pa, pb);
> @@ -207,7 +216,9 @@ loop:	SWAPINIT(a, es);
>  			}
>  			pb += es;
>  		}
> +		/* Scan right to left stopping at first element < pivot. */
>  		while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
> +			/* Move elements == pivot to the right (to pd) */
>  			if (cmp_result == 0) {
>  				swap_cnt = 1;
>  				swap(pc, pd);
> @@ -217,6 +228,7 @@ loop:	SWAPINIT(a, es);
>  		}
>  		if (pb > pc)
>  			break;
> +		/* The scan has found two elements to swap with each other. */
>  		swap(pb, pc);
>  		swap_cnt = 1;
>  		pb += es;
> @@ -230,24 +242,56 @@ loop:	SWAPINIT(a, es);
>  		return;
>  	}
>  
> +	/*
> +	 * Rearrange the array in three parts sorted like this:
> +	 * { elements < pivot, elements == pivot, elements > pivot }
> +	 */
>  	pn = (char *) a + n * es;
>  	r = min(pa - (char *)a, pb - pa);
>  	vecswap(a, pb - r, r);
>  	r = min(pd - pc, pn - pd - es);
>  	vecswap(pb, pn - r, r);
> -	if ((r = pb - pa) > es)
> +	d = pb - pa; /* d = Size of left part. */
> +	r = pd - pc; /* r = Size of right part. */
> +	pn -= r;     /* pn = Base of right part. */
> +
> +	/*
> +	 * Check which of the left and right parts are larger.
> +	 * Set (a, n)  to (base, size) of the larger part.
> +	 * Set (pa, r) to (base, size) of the smaller part.
> +	 */
> +	if (r > d) { /* Right part is the larger part */
> +		pa = a;
> +		a = pn;
> +		n = r;
> +		r = d;
> +	}
> +	else { /* Left part is the larger part, or both are equal. */
> +		pa = pn;
> +		n = d;
> +	}
> +
> +	/*
> +	 * The left and right parts each need further sorting if they
> +	 * contain two elements or more. If both need sorting we use
> +	 * recursion to sort the smaller part and save the larger part
> +	 * to be sorted by iteration after the recursion.
> +	 * Using recursion only for the smaller part guarantees a
> +	 * recursion depth that is bounded to be less than (log2(n)).
> +	 */
> +	if (r > es) {  /* Smaller part > 1 element. Both parts need sorting. */
> +		/* Sort smaller part using recursion. */
>  #if defined(I_AM_QSORT_R)
> -		__bsd_qsort_r(a, r / es, es, thunk, cmp);
> +		__bsd_qsort_r(pa, r / es, es, thunk, cmp);
>  #elif defined(I_AM_GNU_QSORT_R)
> -		qsort_r(a, r / es, es, cmp, thunk);
> +		qsort_r(pa, r / es, es, cmp, thunk);
>  #else
> -		qsort(a, r / es, es, cmp);
> +		qsort(pa, r / es, es, cmp);
>  #endif
> -	if ((r = pd - pc) > es) {
> -		/* Iterate rather than recurse to save stack space */
> -		a = pn - r;
> -		n = r / es;
> +	}
> +	if (n > es) {  /* The larger part needs sorting. Iterate to sort.  */
> +		n = n / es;
>  		goto loop;
>  	}
> -/*		qsort(pn - r, r / es, es, cmp);*/
> +	/* Both left and right parts are one element or less - level done. */
>  }
> -- 
> 1.8.5

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://sourceware.org/pipermail/newlib/attachments/20180313/9f55b92a/attachment.sig>


More information about the Newlib mailing list