[PATCH] Linux: Avoid calling malloc indirectly from __get_nprocs

Carlos O'Donell carlos@redhat.com
Wed Jun 30 15:20:15 GMT 2021


On 6/30/21 10:12 AM, Florian Weimer via Libc-alpha wrote:
> malloc initialization depends on __get_nprocs, so using
> scratch buffers in __get_nprocs may result in infinite recursion.

LGTM.

Reviewed-by: Carlos O'Donell <carlos@redhat.com>

In our implementation we have:

Configured processors:
_SC_NPROCESSORS_CONF -> __get_nprocs_conf -> /sys/devices/system/cpu

Online processors:
_SC_NPROCESSORS_ONLN -> __get_nprocs -> sched_getaffinity.

Note: the JVM tries sched_getaffinity first, and if that syscall fails
it call sysconf(_SC_NPROCESSORS_ONLN).

So in this case we end up with 2 being returned because rather than EINVAL
we see ENOSYS or EPERM depending on syscall seccomp blocking.

I think 2 is sufficiently conservative to trigger the right behaviour.

The upperbound of page size * bits-per-char seems acceptable for this case
to give some parallelism and a good-enough answer to the sysconf query
or the malloc code. I wish we had another method, but such a method involves
looking into sysfs to find something we can use with reliable semantics.

> Tested on i686-linux-gnu, x86_64-linux-gnu.  build-many-glibcs.py is
> still running, but looks good so far.
> 
> (Exposure of this bug may have been a side effect of the malloc
> initialization rework, not sure.)
> 
> Thanks,
> Florian
> ---
>  sysdeps/unix/sysv/linux/getsysstats.c | 83 ++++++++++++++++++++++++-----------
>  1 file changed, 57 insertions(+), 26 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
> index 2e15f0039e..1391e360b8 100644
> --- a/sysdeps/unix/sysv/linux/getsysstats.c
> +++ b/sysdeps/unix/sysv/linux/getsysstats.c
> @@ -17,42 +17,73 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> +#include <array_length.h>
>  #include <dirent.h>
> +#include <errno.h>
> +#include <ldsodefs.h>
> +#include <limits.h>
>  #include <not-cancel.h>
> -#include <scratch_buffer.h>
>  #include <stdio.h>
>  #include <stdio_ext.h>
> +#include <sys/mman.h>
>  #include <sys/sysinfo.h>
>  #include <sysdep.h>
>  
> +/* Compute the population count of the entire array.  */
> +static int
> +__get_nprocs_count (const unsigned long int *array, size_t length)
> +{
> +  int count = 0;
> +  for (size_t i = 0; i < length; ++i)
> +    if (__builtin_add_overflow (count,  __builtin_popcountl (array[i]),
> +				&count))
> +      return INT_MAX;
> +  return count;
> +}

OK.

> +
> +/* __get_nprocs with a large buffer.  */
> +static int
> +__get_nprocs_large (void)
> +{
> +  /* This code cannot use scratch_buffer because it is used during
> +     malloc initialization.  */
> +  size_t pagesize = GLRO (dl_pagesize);
> +  unsigned long int *page = __mmap (0, pagesize, PROT_READ | PROT_WRITE,
> +				    MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +  if (page == MAP_FAILED)
> +    return 2;
> +  int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, pagesize, page);
> +  int count;
> +  if (r > 0)
> +    count = __get_nprocs_count (page, pagesize / sizeof (unsigned long int));
> +  else if (r == -EINVAL)
> +    /* One page is still not enough to store the bits.  A more-or-less
> +       arbitrary value.  This assumes t hat such large systems never
> +       happen in practice.  */
> +    count = GLRO (dl_pagesize) * CHAR_BIT;

OK. Conservative on -EINVAL (not EPERM/ENOSYS).

> +  else
> +    count = 2;

OK. Conservative on EPERM/ENOSYS. Note: JVM's conservative is also 2 (os::processor_count()).

> +  __munmap (page, GLRO (dl_pagesize));
> +  return count;
> +}
> +
>  int
>  __get_nprocs (void)
>  {
> -  struct scratch_buffer set;
> -  scratch_buffer_init (&set);
> -
> -  int r;
> -  while (true)
> -    {
> -      /* The possible error are EFAULT for an invalid buffer or ESRCH for
> -	 invalid pid, none could happen.  */
> -      r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, set.length,
> -                                 set.data);
> -      if (r > 0)
> -        break;
> -
> -      if (!scratch_buffer_grow (&set))
> -        /* Default to an SMP system in case we cannot obtain an accurate
> -           number.  */
> -        return 2;
> -    }
> -
> -  /* The scratch_buffer is aligned to max_align_t.  */
> -  r = __sched_cpucount (r, (const cpu_set_t *) set.data);
> -
> -  scratch_buffer_free (&set);
> -
> -  return r;
> +  /* Fast path for most systems.  The kernel expects a buffer size
> +     that is a multiple of 8.  */
> +  unsigned long int small_buffer[1024 / CHAR_BIT / sizeof (unsigned long int)];
> +  int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0,
> +				 sizeof (small_buffer), small_buffer);
> +  if (r > 0)
> +    return __get_nprocs_count (small_buffer, r / sizeof (unsigned long int));
> +  else if (r == -EINVAL)
> +    /* The kernel requests a larger buffer to store the data.  */
> +    return __get_nprocs_large ();

OK. Retry once for failure due to sizing.

> +  else
> +    /* Some other error.  2 is conservative (not a uniprocessor
> +       system, so atomics are needed). */
> +    return 2;

OK. Acceptable for EPERM/ENOSYS.

>  }
>  libc_hidden_def (__get_nprocs)
>  weak_alias (__get_nprocs, get_nprocs)
> 


-- 
Cheers,
Carlos.



More information about the Libc-alpha mailing list