[PATCH] Linux: Avoid calling malloc indirectly from __get_nprocs
Adhemerval Zanella
adhemerval.zanella@linaro.org
Wed Jun 30 17:20:46 GMT 2021
On 30/06/2021 11:12, Florian Weimer wrote:
> malloc initialization depends on __get_nprocs, so using
> scratch buffers in __get_nprocs may result in infinite recursion.
>
> Tested on i686-linux-gnu, x86_64-linux-gnu. build-many-glibcs.py is
> still running, but looks good so far.
>
> (Exposure of this bug may have been a side effect of the malloc
> initialization rework, not sure.)
Some comments below.
>
> Thanks,
> Florian
> ---
> sysdeps/unix/sysv/linux/getsysstats.c | 83 ++++++++++++++++++++++++-----------
> 1 file changed, 57 insertions(+), 26 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
> index 2e15f0039e..1391e360b8 100644
> --- a/sysdeps/unix/sysv/linux/getsysstats.c
> +++ b/sysdeps/unix/sysv/linux/getsysstats.c
> @@ -17,42 +17,73 @@
> License along with the GNU C Library; if not, see
> <https://www.gnu.org/licenses/>. */
>
> +#include <array_length.h>
> #include <dirent.h>
> +#include <errno.h>
> +#include <ldsodefs.h>
> +#include <limits.h>
> #include <not-cancel.h>
> -#include <scratch_buffer.h>
> #include <stdio.h>
> #include <stdio_ext.h>
> +#include <sys/mman.h>
> #include <sys/sysinfo.h>
> #include <sysdep.h>
>
> +/* Compute the population count of the entire array. */
> +static int
> +__get_nprocs_count (const unsigned long int *array, size_t length)
> +{
> + int count = 0;
> + for (size_t i = 0; i < length; ++i)
> + if (__builtin_add_overflow (count, __builtin_popcountl (array[i]),
> + &count))
> + return INT_MAX;
> + return count;
> +}
Could we avoid replicate the same logic over different files? We have a
countbits() on posix/sched_cpucount.c, so I think it would better to move
it on a sched.h and use it instead (there is no need to really handle
overflow here, it would require a *very* large buffer...).
> +
> +/* __get_nprocs with a large buffer. */
> +static int
> +__get_nprocs_large (void)
> +{
> + /* This code cannot use scratch_buffer because it is used during
> + malloc initialization. */
> + size_t pagesize = GLRO (dl_pagesize);
> + unsigned long int *page = __mmap (0, pagesize, PROT_READ | PROT_WRITE,
> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> + if (page == MAP_FAILED)
> + return 2;
> + int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, pagesize, page);
> + int count;
> + if (r > 0)
> + count = __get_nprocs_count (page, pagesize / sizeof (unsigned long int));
> + else if (r == -EINVAL)
> + /* One page is still not enough to store the bits. A more-or-less
> + arbitrary value. This assumes t hat such large systems never
> + happen in practice. */
> + count = GLRO (dl_pagesize) * CHAR_BIT;
> + else
> + count = 2;
> + __munmap (page, GLRO (dl_pagesize));
Maybe use pagesize here since you are defining it.
> + return count;
> +}
> +
> int
> __get_nprocs (void)
> {
> - struct scratch_buffer set;
> - scratch_buffer_init (&set);
> -
> - int r;
> - while (true)
> - {
> - /* The possible error are EFAULT for an invalid buffer or ESRCH for
> - invalid pid, none could happen. */
> - r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, set.length,
> - set.data);
> - if (r > 0)
> - break;
> -
> - if (!scratch_buffer_grow (&set))
> - /* Default to an SMP system in case we cannot obtain an accurate
> - number. */
> - return 2;
> - }
> -
> - /* The scratch_buffer is aligned to max_align_t. */
> - r = __sched_cpucount (r, (const cpu_set_t *) set.data);
> -
> - scratch_buffer_free (&set);
> -
> - return r;
> + /* Fast path for most systems. The kernel expects a buffer size
> + that is a multiple of 8. */
> + unsigned long int small_buffer[1024 / CHAR_BIT / sizeof (unsigned long int)];
Use __cpu_mask instead of 'unsigned long int'.
> + int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0,
> + sizeof (small_buffer), small_buffer);
> + if (r > 0)
> + return __get_nprocs_count (small_buffer, r / sizeof (unsigned long int));
> + else if (r == -EINVAL)
> + /* The kernel requests a larger buffer to store the data. */
> + return __get_nprocs_large ();
> + else
> + /* Some other error. 2 is conservative (not a uniprocessor
> + system, so atomics are needed). */
> + return 2;
> }
> libc_hidden_def (__get_nprocs)
> weak_alias (__get_nprocs, get_nprocs)
>
I would prefer that since now we don't iterate increasing the buffer size for
sched_getaffinity we go for a simplified version and use a large buffer instead.
Linux currently supports at maximum of 4096 cpus for most architectures:
$ find -iname Kconfig | xargs git grep -A10 -w NR_CPUS | grep -w range
arch/alpha/Kconfig- range 2 32
arch/arc/Kconfig- range 2 4096
arch/arm/Kconfig- range 2 16 if DEBUG_KMAP_LOCAL
arch/arm/Kconfig- range 2 32 if !DEBUG_KMAP_LOCAL
arch/arm64/Kconfig- range 2 4096
arch/csky/Kconfig- range 2 32
arch/hexagon/Kconfig- range 2 6 if SMP
arch/ia64/Kconfig- range 2 4096
arch/mips/Kconfig- range 2 256
arch/openrisc/Kconfig- range 2 32
arch/parisc/Kconfig- range 2 32
arch/riscv/Kconfig- range 2 32
arch/s390/Kconfig- range 2 512
arch/sh/Kconfig- range 2 32
arch/sparc/Kconfig- range 2 32 if SPARC32
arch/sparc/Kconfig- range 2 4096 if SPARC64
arch/um/Kconfig- range 1 1
arch/x86/Kconfig-# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
arch/x86/Kconfig- range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
arch/xtensa/Kconfig- range 2 32
With x86 supporting 8192:
arch/x86/Kconfig
976 config NR_CPUS_RANGE_END
977 int
978 depends on X86_64
979 default 8192 if SMP && CPUMASK_OFFSTACK
980 default 512 if SMP && !CPUMASK_OFFSTACK
981 default 1 if !SMP
So using a buffer for 16384 entries seems more than enough.
enum { max_cpus = 16384 };
__cpu_mask buffer[max_cpus / CHAR_BIT / sizeof (__cpu_mask)];
int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, sizeof buffer, buffer);
if (r < 0)
return 2;
else if (r == -EINVAL)
return max_cpus;
int count = 0;
for (size_t i = 0; i < max_cpus; ++i)
count += countbits (buffer[i]);
return count;
More information about the Libc-alpha
mailing list