[PATCH] Linux: Avoid calling malloc indirectly from __get_nprocs

Adhemerval Zanella adhemerval.zanella@linaro.org
Wed Jun 30 17:20:46 GMT 2021



On 30/06/2021 11:12, Florian Weimer wrote:
> malloc initialization depends on __get_nprocs, so using
> scratch buffers in __get_nprocs may result in infinite recursion.
> 
> Tested on i686-linux-gnu, x86_64-linux-gnu.  build-many-glibcs.py is
> still running, but looks good so far.
> 
> (Exposure of this bug may have been a side effect of the malloc
> initialization rework, not sure.)

Some comments below.

> 
> Thanks,
> Florian
> ---
>  sysdeps/unix/sysv/linux/getsysstats.c | 83 ++++++++++++++++++++++++-----------
>  1 file changed, 57 insertions(+), 26 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
> index 2e15f0039e..1391e360b8 100644
> --- a/sysdeps/unix/sysv/linux/getsysstats.c
> +++ b/sysdeps/unix/sysv/linux/getsysstats.c
> @@ -17,42 +17,73 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> +#include <array_length.h>
>  #include <dirent.h>
> +#include <errno.h>
> +#include <ldsodefs.h>
> +#include <limits.h>
>  #include <not-cancel.h>
> -#include <scratch_buffer.h>
>  #include <stdio.h>
>  #include <stdio_ext.h>
> +#include <sys/mman.h>
>  #include <sys/sysinfo.h>
>  #include <sysdep.h>
>  
> +/* Compute the population count of the entire array.  */
> +static int
> +__get_nprocs_count (const unsigned long int *array, size_t length)
> +{
> +  int count = 0;
> +  for (size_t i = 0; i < length; ++i)
> +    if (__builtin_add_overflow (count,  __builtin_popcountl (array[i]),
> +				&count))
> +      return INT_MAX;
> +  return count;
> +}

Could we avoid replicate the same logic over different files? We have a
countbits() on posix/sched_cpucount.c, so I think it would better to move
it on a sched.h and use it instead (there is no need to really handle
overflow here, it would require a *very* large buffer...).

> +
> +/* __get_nprocs with a large buffer.  */
> +static int
> +__get_nprocs_large (void)
> +{
> +  /* This code cannot use scratch_buffer because it is used during
> +     malloc initialization.  */
> +  size_t pagesize = GLRO (dl_pagesize);
> +  unsigned long int *page = __mmap (0, pagesize, PROT_READ | PROT_WRITE,
> +				    MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +  if (page == MAP_FAILED)
> +    return 2;
> +  int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, pagesize, page);
> +  int count;
> +  if (r > 0)
> +    count = __get_nprocs_count (page, pagesize / sizeof (unsigned long int));
> +  else if (r == -EINVAL)
> +    /* One page is still not enough to store the bits.  A more-or-less
> +       arbitrary value.  This assumes t hat such large systems never
> +       happen in practice.  */
> +    count = GLRO (dl_pagesize) * CHAR_BIT;
> +  else
> +    count = 2;
> +  __munmap (page, GLRO (dl_pagesize));

Maybe use pagesize here since you are defining it.

> +  return count;
> +}
> +
>  int
>  __get_nprocs (void)
>  {
> -  struct scratch_buffer set;
> -  scratch_buffer_init (&set);
> -
> -  int r;
> -  while (true)
> -    {
> -      /* The possible error are EFAULT for an invalid buffer or ESRCH for
> -	 invalid pid, none could happen.  */
> -      r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, set.length,
> -                                 set.data);
> -      if (r > 0)
> -        break;
> -
> -      if (!scratch_buffer_grow (&set))
> -        /* Default to an SMP system in case we cannot obtain an accurate
> -           number.  */
> -        return 2;
> -    }
> -
> -  /* The scratch_buffer is aligned to max_align_t.  */
> -  r = __sched_cpucount (r, (const cpu_set_t *) set.data);
> -
> -  scratch_buffer_free (&set);
> -
> -  return r;
> +  /* Fast path for most systems.  The kernel expects a buffer size
> +     that is a multiple of 8.  */
> +  unsigned long int small_buffer[1024 / CHAR_BIT / sizeof (unsigned long int)];

Use __cpu_mask instead of 'unsigned long int'.  

> +  int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0,
> +				 sizeof (small_buffer), small_buffer);
> +  if (r > 0)
> +    return __get_nprocs_count (small_buffer, r / sizeof (unsigned long int));
> +  else if (r == -EINVAL)
> +    /* The kernel requests a larger buffer to store the data.  */
> +    return __get_nprocs_large ();
> +  else
> +    /* Some other error.  2 is conservative (not a uniprocessor
> +       system, so atomics are needed). */
> +    return 2;
>  }
>  libc_hidden_def (__get_nprocs)
>  weak_alias (__get_nprocs, get_nprocs)
> 
I would prefer that since now we don't iterate increasing the buffer size for
sched_getaffinity we go for a simplified version and use a large buffer instead.

Linux currently supports at maximum of 4096 cpus for most architectures:

$ find -iname Kconfig | xargs git grep -A10 -w NR_CPUS | grep -w range
arch/alpha/Kconfig-	range 2 32
arch/arc/Kconfig-	range 2 4096
arch/arm/Kconfig-	range 2 16 if DEBUG_KMAP_LOCAL
arch/arm/Kconfig-	range 2 32 if !DEBUG_KMAP_LOCAL
arch/arm64/Kconfig-	range 2 4096
arch/csky/Kconfig-	range 2 32
arch/hexagon/Kconfig-	range 2 6 if SMP
arch/ia64/Kconfig-	range 2 4096
arch/mips/Kconfig-	range 2 256
arch/openrisc/Kconfig-	range 2 32
arch/parisc/Kconfig-	range 2 32
arch/riscv/Kconfig-	range 2 32
arch/s390/Kconfig-	range 2 512
arch/sh/Kconfig-	range 2 32
arch/sparc/Kconfig-	range 2 32 if SPARC32
arch/sparc/Kconfig-	range 2 4096 if SPARC64
arch/um/Kconfig-	range 1 1
arch/x86/Kconfig-# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
arch/x86/Kconfig-	range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
arch/xtensa/Kconfig-	range 2 32

With x86 supporting 8192:

arch/x86/Kconfig
 976 config NR_CPUS_RANGE_END
 977         int
 978         depends on X86_64
 979         default 8192 if  SMP && CPUMASK_OFFSTACK
 980         default  512 if  SMP && !CPUMASK_OFFSTACK
 981         default    1 if !SMP

So using a buffer for 16384 entries seems more than enough.

  enum { max_cpus = 16384 };
  __cpu_mask buffer[max_cpus / CHAR_BIT / sizeof (__cpu_mask)];
  int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, sizeof buffer, buffer);
  if (r < 0)
    return 2;
  else if (r == -EINVAL)
    return max_cpus;

  int count = 0;
  for (size_t i = 0; i < max_cpus; ++i)
    count += countbits (buffer[i]);
  return count;


More information about the Libc-alpha mailing list