[PATCH 1/3] Linux: Add close_range

Florian Weimer fweimer@redhat.com
Tue Dec 22 09:05:20 GMT 2020


* Adhemerval Zanella via Libc-alpha:

> diff --git a/include/bits/unistd_ext.h b/include/bits/unistd_ext.h
> new file mode 100644
> index 0000000000..277be05746
> --- /dev/null
> +++ b/include/bits/unistd_ext.h
> @@ -0,0 +1,6 @@
> +#include_next <bits/unistd_ext.h>
> +
> +#ifndef _ISOMAC
> +extern int __close_range (unsigned int lowfd, unsigned int highfd, int flags);
> +libc_hidden_proto (__close_range);
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile

Missing __THROW?  (But see below.)

> diff --git a/sysdeps/unix/sysv/linux/bits/unistd_ext.h b/sysdeps/unix/sysv/linux/bits/unistd_ext.h
> index c315cc5cb8..799c59512c 100644
> --- a/sysdeps/unix/sysv/linux/bits/unistd_ext.h
> +++ b/sysdeps/unix/sysv/linux/bits/unistd_ext.h
> @@ -33,4 +33,15 @@
>     not detached and has not been joined.  */
>  extern __pid_t gettid (void) __THROW;
>  
> +/* Unshare the file descriptor table before closing file descriptors.  */
> +#define CLOSE_RANGE_UNSHARE     (1U << 1)
> +
> +/* Close all file descriptors in the range FD up to MAX_FD.  The flag FLAGS
> +   are define by the CLOSE_RANGE prefix.  This function behaves like close
> +   on the range, but in a fail-safe where it will either fail and not close
> +   any file descriptor or close all of them.  Returns 0 on successor or -1
> +   for failure (and sets errno accordingly).  */
> +extern int close_range (unsigned int __fd, unsigned int __max_fd,
> +			int __flags) __THROW;
> +
>  #endif

I think we generally use int for file descriptors, following POSIX.

CLOSE_RANGE_CLOEXEC is coming (but it's not in the released 5.10).  I
think it makes sense to adjust the comment to reflect that.

__THROW is incompatible with close_range as a cancellation point.  I'm
not sure if it is useful for this function to be a cancellation point,
given that it's mostly used for cleanup.  It's true that close can block
for an indefinite time for certain descriptors, but I don't know if
close_range inherits that behavior.  It certainly does not apply to
CLOSE_RANGE_CLOEXEC.

> diff --git a/sysdeps/unix/sysv/linux/close_range.c b/sysdeps/unix/sysv/linux/close_range.c
> new file mode 100644
> index 0000000000..a664e158ee
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/close_range.c

> +int
> +__close_range (unsigned int lowfd, unsigned int highfd, int flags)
> +{
> +  return SYSCALL_CANCEL (close_range, lowfd, highfd, flags);
> +}
> +libc_hidden_def (__close_range)
> +weak_alias (__close_range, close_range)

See above, this should probably be INLINE_SYSCALL_CALL.  And you could
use the syscall lists file to define the system call either way.

> diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h
> new file mode 100644
> index 0000000000..2344ac5101
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/tst-clone.h
> @@ -0,0 +1,48 @@
> +/* Auxiliary wrapper to issue the clone symbol.

It's not clear to me what this means.

> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _TST_CLONE_H
> +#define _TST_CLONE_H
> +
> +#include <sched.h>
> +#include <stackinfo.h>  /* For _STACK_GROWS_{UP,DOWN}.  */
> +
> +#define DEFINE_STACK(name, size) \
> +  char name[size] __attribute__ ((aligned))
> +
> +static inline pid_t
> +clone_call (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size,
> +	    int flags)
> +{
> +#ifdef __ia64__
> +  extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size,
> +		       int flags, void *arg, ...);
> +  return __clone2 (f, stack, stack_size, flags, arg, /* ptid */ NULL,
> +		   /* tls */ NULL, /* ctid  */ ctid);
> +#else
> +# if _STACK_GROWS_DOWN
> +  return clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL,
> +	        /* tls */ NULL, /* ctid */  NULL);
> +# elif _STACK_GROWS_UP
> +  return clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL,
> +	        &ctid);
> +# endif
> +#endif
> +}
> +
> +#endif

Should this go into support/?

> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c
> new file mode 100644
> index 0000000000..a685e7d359
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c

> +static unsigned int
> +first_invalid_flag (void)
> +{
> +  static const unsigned int flags[] = {
> +    CLOSE_RANGE_UNSHARE,
> +  };
> +
> +  unsigned int r = 1;
> +  for (int i = 0; i < array_length (flags); i++)
> +    if ((1U << r) & flags[i])
> +      r++;
> +  return 1U << r;
> +}
> +
> +enum
> +{
> +  maximum_fd = 100,
> +  half_fd = maximum_fd / 2,
> +  gap_1 = maximum_fd - 8,
> +  gap_0 = (maximum_fd * 3) / 4
> +};
> +
> +static void
> +close_range_test_common (int *fds, unsigned int flags)
> +{
> +  /* Basic test for invalid flags, bail out if unsupported.  */
> +  TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1);

This test is not future-proof at all.  I don't think it's valuable.

> +  if (errno == ENOSYS)
> +    FAIL_UNSUPPORTED ("close_range not supported");
> +  TEST_COMPARE (errno, EINVAL);
> +
> +  /* Close half of the descriptors and check result.  */
> +  TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0);
> +  for (int i = 0; i <= half_fd; i++)
> +    {
> +      TEST_COMPARE (fcntl (fds[i], F_GETFL), -1);
> +      TEST_COMPARE (errno, EBADF);
> +    }

I have some difficulty figuring out whether you expect the descriptors
to be in a continuous range or not.

I think it would make sense to create one specific layout of file
descriptors and test with that.

> +static int
> +close_range_unshare_test_fn (void *arg)
> +{
> +  close_range_test_common (arg, CLOSE_RANGE_UNSHARE);
> +  exit (EXIT_SUCCESS);
> +}
> +
> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess
> +   created with CLONE_FILES does not close the parent file descriptor list.  */
> +static void
> +close_range_unshare_test (void)
> +{

I think there could also be a test without CLOSE_RANGE_UNSHARE that
verifies that the subprocess operators on the shared file descriptor
table.

Thanks,
Florian
-- 
Red Hat GmbH, https://de.redhat.com/ , Registered seat: Grasbrunn,
Commercial register: Amtsgericht Muenchen, HRB 153243,
Managing Directors: Charles Cachera, Brian Klemm, Laurie Krebs, Michael O'Neill



More information about the Libc-alpha mailing list