[RFC PATCH] Optimize bsearch() implementation for performance
Kuan-Wei Chiu
visitorckw@gmail.com
Sun Sep 1 01:57:08 GMT 2024
On Sun, Sep 01, 2024 at 09:54:56AM +0800, Kuan-Wei Chiu wrote:
> Optimize the bsearch() function to improve binary search performance.
> This modification reduces the execution time by approximately 8% on an
> x86 machine with a 100,000-element array under 1e8 searches. The
> optimization slightly increases the code size by 8 bytes.
>
> code size:
> * old:
> text data bss dec hex filename
> 250 0 0 250 fa ./stdlib/bsearch.o
> * new:
> text data bss dec hex filename
> 258 0 0 258 102 ./stdlib/bsearch.o
>
> benchmark:
> Old bsearch elapsed time: 4536833
> New bsearch elapsed time: 4137556
> Improve efficiency by 8 %
>
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> ---
> Since I only have x86 machines available for testing, I am not entirely
> certain whether this patch will bring performance improvements across
> different architectures. Therefore, I am sending this as an RFC patch
> first.
>
> bits/stdlib-bsearch.h | 20 +++++++++-----------
> 1 file changed, 9 insertions(+), 11 deletions(-)
>
FWIW, here is the code I used for testing:
/* tst-bsearch1.c */
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define TEST_OLD
int arr[100000];
#define narr (sizeof (arr) / sizeof (arr[0]))
static int
comp (const void *p1, const void *p2)
{
int x1 = *(int *) p1;
int x2 = *(int *) p2;
if (x1 < x2)
return -1;
if (x1 > x2)
return 1;
return 0;
}
__attribute__((noinline)) void *
newbsearch (const void *__key, const void *__base, size_t __nmemb, size_t __size,
__compar_fn_t __compar)
{
const void *__p;
int __comparison;
while (__nmemb)
{
__p = (const void *) (((const char *) __base) + ((__nmemb >> 1) * __size));
__comparison = (*__compar) (__key, __p);
if (__comparison == 0)
{
#if __GNUC_PREREQ(4, 6)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wcast-qual"
#endif
return (void *) __p;
#if __GNUC_PREREQ(4, 6)
# pragma GCC diagnostic pop
#endif
}
if (__comparison > 0)
{
__base = ((const char *) __p) + __size;
--__nmemb;
}
__nmemb >>= 1;
}
return NULL;
}
__attribute__((noinline)) void *
oldbsearch (const void *__key, const void *__base, size_t __nmemb, size_t __size,
__compar_fn_t __compar)
{
size_t __l, __u, __idx;
const void *__p;
int __comparison;
__l = 0;
__u = __nmemb;
while (__l < __u)
{
__idx = (__l + __u) / 2;
__p = (const void *) (((const char *) __base) + (__idx * __size));
__comparison = (*__compar) (__key, __p);
if (__comparison < 0)
__u = __idx;
else if (__comparison > 0)
__l = __idx + 1;
else
{
#if __GNUC_PREREQ(4, 6)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wcast-qual"
#endif
return (void *) __p;
#if __GNUC_PREREQ(4, 6)
# pragma GCC diagnostic pop
#endif
}
}
return NULL;
}
static int
do_test (void)
{
volatile size_t i;
volatile int *res;
volatile clock_t begin, end;
int key;
long long int old_time, new_time;
for (i = 0; i < narr; ++i)
arr[i] = i;
asm volatile ("" : : : "memory");
begin = clock ();
asm volatile ("" : : : "memory");
for (i = 0; i < 1e8; ++i)
{
key = i % narr;
res = (int *) oldbsearch (&key, arr, narr, sizeof (arr[0]), comp);
if (res != arr + key)
{
printf ("entry %zd got wrong answer\n", i);
return 1;
}
}
asm volatile ("" : : : "memory");
end = clock ();
asm volatile ("" : : : "memory");
old_time = end - begin;
printf ("Old bsearch elapsed time: %lld\n", old_time);
asm volatile ("" : : : "memory");
begin = clock ();
asm volatile ("" : : : "memory");
for (i = 0; i < 1e8; ++i)
{
key = i % narr;
res = (int *) newbsearch (&key, arr, narr, sizeof (arr[0]), comp);
if (res != arr + key)
{
printf ("entry %zd got wrong answer\n", i);
return 1;
}
}
asm volatile ("" : : : "memory");
end = clock ();
asm volatile ("" : : : "memory");
new_time = end - begin;
printf ("New bsearch elapsed time: %lld\n", new_time);
printf ("Improve efficiency by %lld %%\n", (old_time - new_time) * 100 / old_time);
return 0;
}
#define TEST_FUNCTION do_test ()
#include "../test-skeleton.c"
More information about the Libc-alpha
mailing list