[RFC PATCH] Optimize bsearch() implementation for performance

Kuan-Wei Chiu visitorckw@gmail.com
Sun Sep 1 01:57:08 GMT 2024


On Sun, Sep 01, 2024 at 09:54:56AM +0800, Kuan-Wei Chiu wrote:
> Optimize the bsearch() function to improve binary search performance.
> This modification reduces the execution time by approximately 8% on an
> x86 machine with a 100,000-element array under 1e8 searches. The
> optimization slightly increases the code size by 8 bytes.
> 
> code size:
> * old:
>    text    data     bss     dec     hex filename
>     250       0       0     250      fa ./stdlib/bsearch.o
> * new:
>    text    data     bss     dec     hex filename
>     258       0       0     258     102 ./stdlib/bsearch.o
> 
> benchmark:
> Old bsearch elapsed time: 4536833
> New bsearch elapsed time: 4137556
> Improve efficiency by 8 %
> 
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> ---
> Since I only have x86 machines available for testing, I am not entirely
> certain whether this patch will bring performance improvements across
> different architectures. Therefore, I am sending this as an RFC patch
> first.
> 
>  bits/stdlib-bsearch.h | 20 +++++++++-----------
>  1 file changed, 9 insertions(+), 11 deletions(-)
> 
FWIW, here is the code I used for testing:

/* tst-bsearch1.c */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define TEST_OLD

int arr[100000];
#define narr (sizeof (arr) / sizeof (arr[0]))

static int
comp (const void *p1, const void *p2)
{
  int x1 = *(int *) p1;
  int x2 = *(int *) p2;

  if (x1 < x2)
    return -1;
  if (x1 > x2)
    return 1;
  return 0;
}

__attribute__((noinline)) void *
newbsearch (const void *__key, const void *__base, size_t __nmemb, size_t __size,
	 __compar_fn_t __compar)
{
  const void *__p;
  int __comparison;

  while (__nmemb)
    {
      __p = (const void *) (((const char *) __base) + ((__nmemb >> 1) * __size));
      __comparison = (*__compar) (__key, __p);
      if (__comparison == 0)
	{
#if __GNUC_PREREQ(4, 6)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wcast-qual"
#endif
	  return (void *) __p;
#if __GNUC_PREREQ(4, 6)
# pragma GCC diagnostic pop
#endif
	}
      if (__comparison > 0)
	{
	  __base = ((const char *) __p) + __size;
	  --__nmemb;
	}
      __nmemb >>= 1;
    }

  return NULL;
}

__attribute__((noinline)) void *
oldbsearch (const void *__key, const void *__base, size_t __nmemb, size_t __size,
	 __compar_fn_t __compar)
{
  size_t __l, __u, __idx;
  const void *__p;
  int __comparison;

  __l = 0;
  __u = __nmemb;
  while (__l < __u)
    {
      __idx = (__l + __u) / 2;
      __p = (const void *) (((const char *) __base) + (__idx * __size));
      __comparison = (*__compar) (__key, __p);
      if (__comparison < 0)
	__u = __idx;
      else if (__comparison > 0)
	__l = __idx + 1;
      else
	{
#if __GNUC_PREREQ(4, 6)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wcast-qual"
#endif
	  return (void *) __p;
#if __GNUC_PREREQ(4, 6)
# pragma GCC diagnostic pop
#endif
	}
    }

  return NULL;
}

static int
do_test (void)
{
  volatile size_t i;
  volatile int *res;
  volatile clock_t begin, end;
  int key;
  long long int old_time, new_time;

  for (i = 0; i < narr; ++i)
    arr[i] = i;

  asm volatile ("" : : : "memory");
  begin = clock ();
  asm volatile ("" : : : "memory");
  for (i = 0; i < 1e8; ++i)
    {
      key = i % narr;
      res = (int *) oldbsearch (&key, arr, narr, sizeof (arr[0]), comp);
      if (res != arr + key)
	{
	  printf ("entry %zd got wrong answer\n", i);
	  return 1;
	}
    }
  asm volatile ("" : : : "memory");
  end = clock ();
  asm volatile ("" : : : "memory");
  old_time = end - begin;
  printf ("Old bsearch elapsed time: %lld\n", old_time);

  asm volatile ("" : : : "memory");
  begin = clock ();
  asm volatile ("" : : : "memory");
  for (i = 0; i < 1e8; ++i)
    {
      key = i % narr;
      res = (int *) newbsearch (&key, arr, narr, sizeof (arr[0]), comp);
      if (res != arr + key)
	{
	  printf ("entry %zd got wrong answer\n", i);
	  return 1;
	}
    }
  asm volatile ("" : : : "memory");
  end = clock ();
  asm volatile ("" : : : "memory");
  new_time = end - begin;
  printf ("New bsearch elapsed time: %lld\n", new_time);

  printf ("Improve efficiency by %lld %%\n", (old_time - new_time) * 100 / old_time);

  return 0;
}

#define TEST_FUNCTION do_test ()
#include "../test-skeleton.c"



More information about the Libc-alpha mailing list