[PATCH] benchtests: Add bench-calloc-thread

H.J. Lu hjl.tools@gmail.com
Fri Nov 29 00:48:22 GMT 2024


On Wed, Nov 27, 2024 at 3:12 PM Wangyang Guo <wangyang.guo@intel.com> wrote:
>
> ---
>  benchtests/Makefile                   |   5 +-
>  benchtests/bench-calloc-thread.c      |  23 ++
>  benchtests/bench-malloc-thread-base.c | 297 ++++++++++++++++++++++++++
>  benchtests/bench-malloc-thread.c      | 280 +-----------------------
>  4 files changed, 327 insertions(+), 278 deletions(-)
>  create mode 100644 benchtests/bench-calloc-thread.c
>  create mode 100644 benchtests/bench-malloc-thread-base.c
>
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index 23db840666..dfb1ae309d 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -308,11 +308,13 @@ CFLAGS-bench-isfinite.c += $(config-cflags-signaling-nans)
>
>  ifeq (${BENCHSET},)
>  bench-malloc := \
> +  calloc-thread \
>    malloc-simple \
>    malloc-thread \
>    # bench-malloc
>  else
>  bench-malloc := $(filter malloc-%,${BENCHSET})
> +bench-malloc += $(filter calloc-%,${BENCHSET})
>  endif
>
>  ifeq (${STATIC-BENCHTESTS},yes)
> @@ -429,6 +431,7 @@ VALIDBENCHSETNAMES := \
>    bench-math \
>    bench-pthread \
>    bench-string \
> +  calloc-thread \
>    hash-benchset \
>    malloc-simple \
>    malloc-thread \
> @@ -469,7 +472,7 @@ bench-set: $(binaries-benchset)
>  bench-malloc: $(binaries-bench-malloc)
>         for run in $^; do \
>           echo "$${run}"; \
> -         if [ `basename $${run}` = "bench-malloc-thread" ]; then \
> +         if [[ `basename $${run}` =~ bench-[cm]alloc-thread ]]; then \
>                 for thr in 1 8 16 32; do \
>                         echo "Running $${run} $${thr}"; \
>                         $(run-bench) $${thr} > $${run}-$${thr}.out; \
> diff --git a/benchtests/bench-calloc-thread.c b/benchtests/bench-calloc-thread.c
> new file mode 100644
> index 0000000000..eec4058fe2
> --- /dev/null
> +++ b/benchtests/bench-calloc-thread.c
> @@ -0,0 +1,23 @@
> +/* Benchmark calloc and free functions.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define MALLOC(size) calloc (1, size)
> +
> +#define TEST_NAME "calloc"

Please simply add

#ifndef TEST_FUNC
# define TEST_FUNC(size) malloc(size)
# define TEST_NAME "malloc"
#endif

in bench-malloc-thread.c directly.

> +
> +#include "bench-malloc-thread-base.c"
> diff --git a/benchtests/bench-malloc-thread-base.c b/benchtests/bench-malloc-thread-base.c
> new file mode 100644
> index 0000000000..b4b271f2b0
> --- /dev/null
> +++ b/benchtests/bench-malloc-thread-base.c
> @@ -0,0 +1,297 @@
> +/* Benchmark memory allocation and free functions.
> +   Copyright (C) 2013-2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <errno.h>
> +#include <math.h>
> +#include <pthread.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/time.h>
> +#include <sys/resource.h>
> +#include <unistd.h>
> +
> +#include "bench-timing.h"
> +#include "json-lib.h"
> +
> +/* Benchmark duration in seconds.  */
> +#define BENCHMARK_DURATION     10
> +#define RAND_SEED              88
> +
> +#ifndef NUM_THREADS
> +# define NUM_THREADS 1
> +#endif
> +
> +/* Maximum memory that can be allocated at any one time is:
> +
> +   NUM_THREADS * WORKING_SET_SIZE * MAX_ALLOCATION_SIZE
> +
> +   However due to the distribution of the random block sizes
> +   the typical amount allocated will be much smaller.  */
> +#define WORKING_SET_SIZE       1024
> +
> +#define MIN_ALLOCATION_SIZE    4
> +#define MAX_ALLOCATION_SIZE    32768
> +
> +/* Get a random block size with an inverse square distribution.  */
> +static unsigned int
> +get_block_size (unsigned int rand_data)
> +{
> +  /* Inverse square.  */
> +  const float exponent = -2;
> +  /* Minimum value of distribution.  */
> +  const float dist_min = MIN_ALLOCATION_SIZE;
> +  /* Maximum value of distribution.  */
> +  const float dist_max = MAX_ALLOCATION_SIZE;
> +
> +  float min_pow = powf (dist_min, exponent + 1);
> +  float max_pow = powf (dist_max, exponent + 1);
> +
> +  float r = (float) rand_data / RAND_MAX;
> +
> +  return (unsigned int) powf ((max_pow - min_pow) * r + min_pow,
> +                             1 / (exponent + 1));
> +}
> +
> +#define NUM_BLOCK_SIZES        8000
> +#define NUM_OFFSETS    ((WORKING_SET_SIZE) * 4)
> +
> +static unsigned int random_block_sizes[NUM_BLOCK_SIZES];
> +static unsigned int random_offsets[NUM_OFFSETS];
> +
> +static void
> +init_random_values (void)
> +{
> +  for (size_t i = 0; i < NUM_BLOCK_SIZES; i++)
> +    random_block_sizes[i] = get_block_size (rand ());
> +
> +  for (size_t i = 0; i < NUM_OFFSETS; i++)
> +    random_offsets[i] = rand () % WORKING_SET_SIZE;
> +}
> +
> +static unsigned int
> +get_random_block_size (unsigned int *state)
> +{
> +  unsigned int idx = *state;
> +
> +  if (idx >= NUM_BLOCK_SIZES - 1)
> +    idx = 0;
> +  else
> +    idx++;
> +
> +  *state = idx;
> +
> +  return random_block_sizes[idx];
> +}
> +
> +static unsigned int
> +get_random_offset (unsigned int *state)
> +{
> +  unsigned int idx = *state;
> +
> +  if (idx >= NUM_OFFSETS - 1)
> +    idx = 0;
> +  else
> +    idx++;
> +
> +  *state = idx;
> +
> +  return random_offsets[idx];
> +}
> +
> +static volatile bool timeout;
> +
> +static void
> +alarm_handler (int signum)
> +{
> +  timeout = true;
> +}
> +
> +/* Allocate and free blocks in a random order.  */
> +static size_t
> +malloc_benchmark_loop (void **ptr_arr)
> +{
> +  unsigned int offset_state = 0, block_state = 0;
> +  size_t iters = 0;
> +
> +  while (!timeout)
> +    {
> +      unsigned int next_idx = get_random_offset (&offset_state);
> +      unsigned int next_block = get_random_block_size (&block_state);
> +
> +      free (ptr_arr[next_idx]);
> +
> +      ptr_arr[next_idx] = MALLOC (next_block);
> +
> +      iters++;
> +    }
> +
> +  return iters;
> +}
> +
> +struct thread_args
> +{
> +  size_t iters;
> +  void **working_set;
> +  timing_t elapsed;
> +};
> +
> +static void *
> +benchmark_thread (void *arg)
> +{
> +  struct thread_args *args = (struct thread_args *) arg;
> +  size_t iters;
> +  void *thread_set = args->working_set;
> +  timing_t start, stop;
> +
> +  TIMING_NOW (start);
> +  iters = malloc_benchmark_loop (thread_set);
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (args->elapsed, start, stop);
> +  args->iters = iters;
> +
> +  return NULL;
> +}
> +
> +static timing_t
> +do_benchmark (size_t num_threads, size_t *iters)
> +{
> +  timing_t elapsed = 0;
> +
> +  if (num_threads == 1)
> +    {
> +      timing_t start, stop;
> +      void *working_set[WORKING_SET_SIZE];
> +
> +      memset (working_set, 0, sizeof (working_set));
> +
> +      TIMING_NOW (start);
> +      *iters = malloc_benchmark_loop (working_set);
> +      TIMING_NOW (stop);
> +
> +      TIMING_DIFF (elapsed, start, stop);
> +    }
> +  else
> +    {
> +      struct thread_args args[num_threads];
> +      void *working_set[num_threads][WORKING_SET_SIZE];
> +      pthread_t threads[num_threads];
> +
> +      memset (working_set, 0, sizeof (working_set));
> +
> +      *iters = 0;
> +
> +      for (size_t i = 0; i < num_threads; i++)
> +       {
> +         args[i].working_set = working_set[i];
> +         pthread_create(&threads[i], NULL, benchmark_thread, &args[i]);
> +       }
> +
> +      for (size_t i = 0; i < num_threads; i++)
> +       {
> +         pthread_join(threads[i], NULL);
> +         TIMING_ACCUM (elapsed, args[i].elapsed);
> +         *iters += args[i].iters;
> +       }
> +    }
> +  return elapsed;
> +}
> +
> +static void usage(const char *name)
> +{
> +  fprintf (stderr, "%s: <num_threads>\n", name);
> +  exit (1);
> +}
> +
> +int
> +main (int argc, char **argv)
> +{
> +  timing_t cur;
> +  size_t iters = 0, num_threads = 1;
> +  json_ctx_t json_ctx;
> +  double d_total_s, d_total_i;
> +  struct sigaction act;
> +
> +  if (argc == 1)
> +    num_threads = 1;
> +  else if (argc == 2)
> +    {
> +      long ret;
> +
> +      errno = 0;
> +      ret = strtol(argv[1], NULL, 10);
> +
> +      if (errno || ret == 0)
> +       usage(argv[0]);
> +
> +      num_threads = ret;
> +    }
> +  else
> +    usage(argv[0]);
> +
> +  init_random_values ();
> +
> +  json_init (&json_ctx, 0, stdout);
> +
> +  json_document_begin (&json_ctx);
> +
> +  json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
> +
> +  json_attr_object_begin (&json_ctx, "functions");
> +
> +  json_attr_object_begin (&json_ctx, TEST_NAME);
> +
> +  json_attr_object_begin (&json_ctx, "");
> +
> +  memset (&act, 0, sizeof (act));
> +  act.sa_handler = &alarm_handler;
> +
> +  sigaction (SIGALRM, &act, NULL);
> +
> +  alarm (BENCHMARK_DURATION);
> +
> +  cur = do_benchmark (num_threads, &iters);
> +
> +  struct rusage usage;
> +  getrusage(RUSAGE_SELF, &usage);
> +
> +  d_total_s = cur;
> +  d_total_i = iters;
> +
> +  json_attr_double (&json_ctx, "duration", d_total_s);
> +  json_attr_double (&json_ctx, "iterations", d_total_i);
> +  json_attr_double (&json_ctx, "time_per_iteration", d_total_s / d_total_i);
> +  json_attr_double (&json_ctx, "max_rss", usage.ru_maxrss);
> +
> +  json_attr_double (&json_ctx, "threads", num_threads);
> +  json_attr_double (&json_ctx, "min_size", MIN_ALLOCATION_SIZE);
> +  json_attr_double (&json_ctx, "max_size", MAX_ALLOCATION_SIZE);
> +  json_attr_double (&json_ctx, "random_seed", RAND_SEED);
> +
> +  json_attr_object_end (&json_ctx);
> +
> +  json_attr_object_end (&json_ctx);
> +
> +  json_attr_object_end (&json_ctx);
> +
> +  json_document_end (&json_ctx);
> +
> +  return 0;
> +}
> diff --git a/benchtests/bench-malloc-thread.c b/benchtests/bench-malloc-thread.c
> index 46fdabd30c..d16eb494df 100644
> --- a/benchtests/bench-malloc-thread.c
> +++ b/benchtests/bench-malloc-thread.c
> @@ -16,282 +16,8 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <errno.h>
> -#include <math.h>
> -#include <pthread.h>
> -#include <signal.h>
> -#include <stdio.h>
> -#include <stdlib.h>
> -#include <string.h>
> -#include <sys/time.h>
> -#include <sys/resource.h>
> -#include <unistd.h>
> +#define MALLOC(size) malloc (size)
>
> -#include "bench-timing.h"
> -#include "json-lib.h"
> +#define TEST_NAME "malloc"
>
> -/* Benchmark duration in seconds.  */
> -#define BENCHMARK_DURATION     10
> -#define RAND_SEED              88
> -
> -#ifndef NUM_THREADS
> -# define NUM_THREADS 1
> -#endif
> -
> -/* Maximum memory that can be allocated at any one time is:
> -
> -   NUM_THREADS * WORKING_SET_SIZE * MAX_ALLOCATION_SIZE
> -
> -   However due to the distribution of the random block sizes
> -   the typical amount allocated will be much smaller.  */
> -#define WORKING_SET_SIZE       1024
> -
> -#define MIN_ALLOCATION_SIZE    4
> -#define MAX_ALLOCATION_SIZE    32768
> -
> -/* Get a random block size with an inverse square distribution.  */
> -static unsigned int
> -get_block_size (unsigned int rand_data)
> -{
> -  /* Inverse square.  */
> -  const float exponent = -2;
> -  /* Minimum value of distribution.  */
> -  const float dist_min = MIN_ALLOCATION_SIZE;
> -  /* Maximum value of distribution.  */
> -  const float dist_max = MAX_ALLOCATION_SIZE;
> -
> -  float min_pow = powf (dist_min, exponent + 1);
> -  float max_pow = powf (dist_max, exponent + 1);
> -
> -  float r = (float) rand_data / RAND_MAX;
> -
> -  return (unsigned int) powf ((max_pow - min_pow) * r + min_pow,
> -                             1 / (exponent + 1));
> -}
> -
> -#define NUM_BLOCK_SIZES        8000
> -#define NUM_OFFSETS    ((WORKING_SET_SIZE) * 4)
> -
> -static unsigned int random_block_sizes[NUM_BLOCK_SIZES];
> -static unsigned int random_offsets[NUM_OFFSETS];
> -
> -static void
> -init_random_values (void)
> -{
> -  for (size_t i = 0; i < NUM_BLOCK_SIZES; i++)
> -    random_block_sizes[i] = get_block_size (rand ());
> -
> -  for (size_t i = 0; i < NUM_OFFSETS; i++)
> -    random_offsets[i] = rand () % WORKING_SET_SIZE;
> -}
> -
> -static unsigned int
> -get_random_block_size (unsigned int *state)
> -{
> -  unsigned int idx = *state;
> -
> -  if (idx >= NUM_BLOCK_SIZES - 1)
> -    idx = 0;
> -  else
> -    idx++;
> -
> -  *state = idx;
> -
> -  return random_block_sizes[idx];
> -}
> -
> -static unsigned int
> -get_random_offset (unsigned int *state)
> -{
> -  unsigned int idx = *state;
> -
> -  if (idx >= NUM_OFFSETS - 1)
> -    idx = 0;
> -  else
> -    idx++;
> -
> -  *state = idx;
> -
> -  return random_offsets[idx];
> -}
> -
> -static volatile bool timeout;
> -
> -static void
> -alarm_handler (int signum)
> -{
> -  timeout = true;
> -}
> -
> -/* Allocate and free blocks in a random order.  */
> -static size_t
> -malloc_benchmark_loop (void **ptr_arr)
> -{
> -  unsigned int offset_state = 0, block_state = 0;
> -  size_t iters = 0;
> -
> -  while (!timeout)
> -    {
> -      unsigned int next_idx = get_random_offset (&offset_state);
> -      unsigned int next_block = get_random_block_size (&block_state);
> -
> -      free (ptr_arr[next_idx]);
> -
> -      ptr_arr[next_idx] = malloc (next_block);
> -
> -      iters++;
> -    }
> -
> -  return iters;
> -}
> -
> -struct thread_args
> -{
> -  size_t iters;
> -  void **working_set;
> -  timing_t elapsed;
> -};
> -
> -static void *
> -benchmark_thread (void *arg)
> -{
> -  struct thread_args *args = (struct thread_args *) arg;
> -  size_t iters;
> -  void *thread_set = args->working_set;
> -  timing_t start, stop;
> -
> -  TIMING_NOW (start);
> -  iters = malloc_benchmark_loop (thread_set);
> -  TIMING_NOW (stop);
> -
> -  TIMING_DIFF (args->elapsed, start, stop);
> -  args->iters = iters;
> -
> -  return NULL;
> -}
> -
> -static timing_t
> -do_benchmark (size_t num_threads, size_t *iters)
> -{
> -  timing_t elapsed = 0;
> -
> -  if (num_threads == 1)
> -    {
> -      timing_t start, stop;
> -      void *working_set[WORKING_SET_SIZE];
> -
> -      memset (working_set, 0, sizeof (working_set));
> -
> -      TIMING_NOW (start);
> -      *iters = malloc_benchmark_loop (working_set);
> -      TIMING_NOW (stop);
> -
> -      TIMING_DIFF (elapsed, start, stop);
> -    }
> -  else
> -    {
> -      struct thread_args args[num_threads];
> -      void *working_set[num_threads][WORKING_SET_SIZE];
> -      pthread_t threads[num_threads];
> -
> -      memset (working_set, 0, sizeof (working_set));
> -
> -      *iters = 0;
> -
> -      for (size_t i = 0; i < num_threads; i++)
> -       {
> -         args[i].working_set = working_set[i];
> -         pthread_create(&threads[i], NULL, benchmark_thread, &args[i]);
> -       }
> -
> -      for (size_t i = 0; i < num_threads; i++)
> -       {
> -         pthread_join(threads[i], NULL);
> -         TIMING_ACCUM (elapsed, args[i].elapsed);
> -         *iters += args[i].iters;
> -       }
> -    }
> -  return elapsed;
> -}
> -
> -static void usage(const char *name)
> -{
> -  fprintf (stderr, "%s: <num_threads>\n", name);
> -  exit (1);
> -}
> -
> -int
> -main (int argc, char **argv)
> -{
> -  timing_t cur;
> -  size_t iters = 0, num_threads = 1;
> -  json_ctx_t json_ctx;
> -  double d_total_s, d_total_i;
> -  struct sigaction act;
> -
> -  if (argc == 1)
> -    num_threads = 1;
> -  else if (argc == 2)
> -    {
> -      long ret;
> -
> -      errno = 0;
> -      ret = strtol(argv[1], NULL, 10);
> -
> -      if (errno || ret == 0)
> -       usage(argv[0]);
> -
> -      num_threads = ret;
> -    }
> -  else
> -    usage(argv[0]);
> -
> -  init_random_values ();
> -
> -  json_init (&json_ctx, 0, stdout);
> -
> -  json_document_begin (&json_ctx);
> -
> -  json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
> -
> -  json_attr_object_begin (&json_ctx, "functions");
> -
> -  json_attr_object_begin (&json_ctx, "malloc");
> -
> -  json_attr_object_begin (&json_ctx, "");
> -
> -  memset (&act, 0, sizeof (act));
> -  act.sa_handler = &alarm_handler;
> -
> -  sigaction (SIGALRM, &act, NULL);
> -
> -  alarm (BENCHMARK_DURATION);
> -
> -  cur = do_benchmark (num_threads, &iters);
> -
> -  struct rusage usage;
> -  getrusage(RUSAGE_SELF, &usage);
> -
> -  d_total_s = cur;
> -  d_total_i = iters;
> -
> -  json_attr_double (&json_ctx, "duration", d_total_s);
> -  json_attr_double (&json_ctx, "iterations", d_total_i);
> -  json_attr_double (&json_ctx, "time_per_iteration", d_total_s / d_total_i);
> -  json_attr_double (&json_ctx, "max_rss", usage.ru_maxrss);
> -
> -  json_attr_double (&json_ctx, "threads", num_threads);
> -  json_attr_double (&json_ctx, "min_size", MIN_ALLOCATION_SIZE);
> -  json_attr_double (&json_ctx, "max_size", MAX_ALLOCATION_SIZE);
> -  json_attr_double (&json_ctx, "random_seed", RAND_SEED);
> -
> -  json_attr_object_end (&json_ctx);
> -
> -  json_attr_object_end (&json_ctx);
> -
> -  json_attr_object_end (&json_ctx);
> -
> -  json_document_end (&json_ctx);
> -
> -  return 0;
> -}
> +#include "bench-malloc-thread-base.c"
> --
> 2.43.5
>


-- 
H.J.


More information about the Libc-alpha mailing list