[PATCH v2 10/13] posix: Improve thread safety of functions using environ with execveat

Adhemerval Zanella Netto adhemerval.zanella@linaro.org
Wed Nov 27 20:01:05 GMT 2024



On 28/07/24 16:03, Florian Weimer wrote:
> The environment snapshot is stored on the stack, for easy
> memory management if vfork is involved.
> ---
>  posix/execveat.c                             |  27 ++-
>  stdlib/Makefile                              |  18 +-
>  stdlib/environ_single_threaded_no_snapshot.c |  40 ++++
>  stdlib/setenv.h                              |  24 +-
>  stdlib/tst-environ-execl.c                   |  35 +++
>  stdlib/tst-environ-snapshot-skeleton.c       | 218 +++++++++++++++++++
>  6 files changed, 354 insertions(+), 8 deletions(-)
>  create mode 100644 stdlib/environ_single_threaded_no_snapshot.c
>  create mode 100644 stdlib/tst-environ-execl.c
>  create mode 100644 stdlib/tst-environ-snapshot-skeleton.c
> 
> diff --git a/posix/execveat.c b/posix/execveat.c
> index 110177ea54..2cd275d02a 100644
> --- a/posix/execveat.c
> +++ b/posix/execveat.c
> @@ -21,15 +21,38 @@
>  #include <unistd.h>
>  
>  #include <arch-execveat.h>
> +#include <stdlib/setenv.h>
>  
>  /* Replace the current process, executing PATH relative to difrd with
>     arguments argv and environment envp.
>     argv and envp are terminated by NULL pointers.  */
>  int
> -__execveat (int dirfd, const char *path, char *const argv[], char *const envp[],
> +__execveat (int dirfd, const char *path, char *const *argv, char *const *envp,
>              int flags)
>  {
> -  return __arch_execveat (dirfd, path, argv, envp, flags);
> +  if (envp == NULL || envp != __environ

Wouldn't 'envp != __environ' already handle the 'envp == NULL', assuming
that __environ is always non-NULL?

> +      || !__environ_is_from_array_list ((char **) envp)
> +      || __environ_single_threaded_no_snapshot (envp))
> +    return __arch_execveat (dirfd, path, argv, envp, flags);
> +
> +  size_t env_size = ENVIRON_STACK_SNAPSHOT_SIZE;

The size here is only the initial size, so maybe name it hinting it?

> +
> +  while (true)
> +    {
> +      /* Using an on-stack buffer is not ideal, but it is the most
> +         practical way to avoid memory leaks with vfork: New mappings
> +         created before the execveat call will be preserved in the
> +         original process before the vfork call even if the execve at
> +         call succeeds and replaces the current process.  */

Can't we use MADV_DONTFORK here? There still the small windows between
mmap return and madvise call, but I am not very confident that adding a possible
unbounded dynamic stack allocation is best way forward here. 

> +      ++env_size;               /* Make room for the null terminator.  */
> +      char *env_copy[env_size];
> +      size_t env_count = __getenvarray (env_copy, env_size);
> +      if (env_count < env_size)
> +        return __arch_execveat (dirfd, path, argv, env_copy, flags);
> +
> +      /* Retry with larger buffer.  */
> +      env_size = env_count;
> +    }
>  }
>  
>  weak_alias (__execveat, execveat)
> diff --git a/stdlib/Makefile b/stdlib/Makefile
> index e1f1848439..498ec15e91 100644
> --- a/stdlib/Makefile
> +++ b/stdlib/Makefile
> @@ -72,6 +72,7 @@ routines := \
>    drand48 \
>    drand48-iter \
>    drand48_r \
> +  environ_single_threaded_no_snapshot \
>    erand48 \
>    erand48_r \
>    exit \
> @@ -374,11 +375,20 @@ tests-container := \
>    tst-system \
>    #tests-container
>  
> +# These tests re-exec themselves, so they need to be linked
> +# statically, or use harcoded paths.
> +tests-with-reexec := \
> +  tst-environ-execl \
> +  # tests-with-reexec
> +tests += $(tests-with-reexec)
> +
>  ifeq ($(build-hardcoded-path-in-tests),yes)
>  tests += \
>    tst-empty-env \
>    # tests
> -endif
> +else # !$(build-hardcoded-path-in-tests)
> +tests-static += $(tests-with-reexec)
> +endif # !$(build-hardcoded-path-in-tests)
>  
>  LDLIBS-test-atexit-race = $(shared-thread-library)
>  LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
> @@ -631,3 +641,9 @@ $(objpfx)tst-qsort5: $(libm)
>  $(objpfx)tst-getenv-signal: $(shared-thread-library)
>  $(objpfx)tst-getenv-thread: $(shared-thread-library)
>  $(objpfx)tst-getenv-unsetenv: $(shared-thread-library)
> +# See above for $(tests-with-reexec).
> +ifeq ($(build-hardcoded-path-in-tests),yes)
> +$(objpfx)tst-environ-execl: $(shared-thread-library)
> +else
> +$(objpfx)tst-environ-execl: $(static-thread-library)
> +endif
> diff --git a/stdlib/environ_single_threaded_no_snapshot.c b/stdlib/environ_single_threaded_no_snapshot.c
> new file mode 100644
> index 0000000000..6dc1d22ea2
> --- /dev/null
> +++ b/stdlib/environ_single_threaded_no_snapshot.c
> @@ -0,0 +1,40 @@
> +/* Check for environ snapshots in single-threaded case.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <setenv.h>
> +#include <single-thread.h>
> +#include <tls.h>
> +
> +bool
> +__environ_single_threaded_no_snapshot (char *const *envp)
> +{
> +  if (!SINGLE_THREAD_P)
> +    return false;
> +
> +  const char *previous = NULL;
> +  for (char *const *ep = envp; *ep != NULL; ++ep)
> +    {
> +      if (*ep == previous)
> +        /* Must perform a snapshot to remove the duplicate.  */
> +        return false;
> +      previous = *ep;
> +    }
> +  /* No duplicates, no snapshot needed because no interference
> +     from other threads is possible.  */
> +  return true;
> +}
> diff --git a/stdlib/setenv.h b/stdlib/setenv.h
> index 69fa9367ed..3e530f0bc0 100644
> --- a/stdlib/setenv.h
> +++ b/stdlib/setenv.h
> @@ -19,8 +19,10 @@
>  #ifndef _SETENV_H
>  #define _SETENV_H
>  
> -#include <atomic.h>
> -#include <stdbool.h>
> +/* Not actually usable in rtld, but it is required for symbol discovery.  */
> +#if IS_IN (libc) || IS_IN (rtld)
> +# include <atomic.h>
> +# include <stdbool.h>
>  
>  /* We use an exponential sizing policy for environment arrays.  The
>     arrays are not deallocating during the lifetime of the process.
> @@ -59,11 +61,11 @@ size_t __getenvarray (char **result, size_t length)
>     but given that counter wrapround is probably impossible to hit
>     (2**32 operations in unsetenv concurrently with getenv), using
>     <atomic_wide_counter.h> seems unnecessary.  */
> -#if __HAVE_64B_ATOMICS
> +# if __HAVE_64B_ATOMICS
>  typedef uint64_t environ_counter;
> -#else
> +# else
>  typedef uint32_t environ_counter;
> -#endif
> +# endif
>  
>  /* Updated by unsetenv to detect multiple overwrites in getenv.  */
>  extern environ_counter __environ_counter attribute_hidden;
> @@ -77,4 +79,16 @@ extern environ_counter __environ_counter attribute_hidden;
>  int __add_to_environ (const char *name, const char *value,
>                        const char *combines, int replace) attribute_hidden;
>  
> +/* Returns true if the environ pointer ENVP (which should be equal to
> +   environ) does not need snapshotting because execution is
> +   single-threaded and there are no duplicates in the array.  */
> +bool __environ_single_threaded_no_snapshot (char *const *envp)
> +  attribute_hidden;
> +
> +#endif /* IS_IN (libc) */
> +
> +/* This seems to cover the majority of situations on the first
> +   pass and requires only a moderate amount of stack size.  */
> +enum { ENVIRON_STACK_SNAPSHOT_SIZE = 120 };
> +
>  #endif /* _SETENV_H */
> diff --git a/stdlib/tst-environ-execl.c b/stdlib/tst-environ-execl.c
> new file mode 100644
> index 0000000000..b028a94c7d
> --- /dev/null
> +++ b/stdlib/tst-environ-execl.c
> @@ -0,0 +1,35 @@
> +/* Test environment snapshots with execl.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define PROGRAM "tst-environ-execl"
> +#include "tst-environ-snapshot-skeleton.c"
> +
> +static pid_t
> +create_process (void)
> +{
> +  pid_t pid = vfork ();
> +  if (pid < 0)
> +    FAIL_EXIT1 ("vfork: %m");
> +  if (pid == 0)
> +    {
> +      execl (self_path, self_path, "verify", NULL);
> +      printf ("error: execl failed: %m\n");
> +      _exit (17);
> +    }
> +  return pid;
> +}
> diff --git a/stdlib/tst-environ-snapshot-skeleton.c b/stdlib/tst-environ-snapshot-skeleton.c
> new file mode 100644
> index 0000000000..8548e0a9e0
> --- /dev/null
> +++ b/stdlib/tst-environ-snapshot-skeleton.c
> @@ -0,0 +1,218 @@
> +/* Test skeleton for environment snapshots.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* The actual test including this file needs to define the macro
> +   PROGRAM as a string literal, the name of the test program, before
> +   including this file.
> +
> +   It laso needs to define a function

s/laso/also

> +
> +   static pid_t create_process (void);
> +
> +   after including this file, describing how the subprocess with the
> +   environment snapshot is created.  */
> +
> +#include <array_length.h>
> +#include <setenv.h>
> +#include <stddef.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <support/check.h>
> +#include <support/support.h>
> +#include <support/xthread.h>
> +#include <support/xunistd.h>
> +#include <unistd.h>
> +
> +static pid_t create_process (void);
> +
> +/* Absolute path to the executable.  */
> +static char *self_path;
> +
> +/* Used to synchronize the start of each test iteration.  */
> +static pthread_barrier_t barrier;
> +
> +/* These are the variables set by the main thread.  This needs to
> +   exceed the stack reservation for the snapshot, to exercise that
> +   code path.  Variables before start_variable are not removed in the
> +   unsetenv loop.  */
> +enum
> +  {
> +    on_stack_count = ENVIRON_STACK_SNAPSHOT_SIZE,
> +    start_variable = 50,
> +  };
> +static char *variables[start_variable + on_stack_count];
> +
> +/* Number of iterations.  */
> +enum { iterations = 5000 };
> +
> +/* Check that even with concurrent unsetenv, a variable that is known
> +   to be there is found.  */
> +static void *
> +test_thread (void *ignored)
> +{
> +  for (int i = 0; i < iterations; ++i)
> +    {
> +      xpthread_barrier_wait (&barrier);
> +      pid_t pid = create_process ();
> +      int status;
> +      xwaitpid (pid, &status, 0);
> +      TEST_COMPARE (status, 0);
> +      xpthread_barrier_wait (&barrier);
> +    }
> +  return NULL;
> +}
> +
> +/* Called to verify the contents of the environment.  */
> +static void
> +verify_environ (void)
> +{
> +  TEST_COMPARE_STRING (getenv ("variable"), "value");
> +  char *changing = getenv ("changing");
> +  if (strcmp (changing, "value-1") != 0)
> +    TEST_COMPARE_STRING (changing, "value-2");
> +
> +  bool seen[array_length (variables)] = { 0, };
> +  bool seen_changing = false;
> +  bool seen_variable = false;
> +  char **ep = environ;
> +  for (int i = 0; i < start_variable; ++i)
> +    {
> +      char expected[20];
> +      snprintf (expected, sizeof (expected), "V%d=%d", i, i);
> +      TEST_COMPARE_STRING (ep[i], expected);
> +      if (ep[i] == NULL)
> +        break;
> +    }
> +
> +  if (*ep == NULL)
> +    return;
> +
> +  for (ep += start_variable; *ep != NULL; ++ep)
> +    {
> +      if (**ep == 'V')
> +        {
> +          char *eq = strchr (*ep, '=');
> +          TEST_VERIFY (eq != NULL);
> +          if (eq != NULL)
> +            {
> +              int idx = atoi (eq + 1);
> +              TEST_VERIFY (idx >= 0);
> +              TEST_VERIFY (idx < array_length (variables));
> +              if (idx >= 0 && idx < array_length (variables))
> +                {
> +                  char expected[20];
> +                  snprintf (expected, sizeof (expected), "V%d=%d", idx, idx);
> +                  TEST_COMPARE_STRING (*ep, expected);
> +                  if (seen[idx])
> +                    {
> +                      printf ("error: duplicate: %s\n", *ep);
> +                      support_record_failure ();
> +                    }
> +                  seen[idx] = true;
> +                }
> +            }
> +        }
> +      else if (strcmp (*ep, "changing=value-1") == 0
> +               || strcmp (*ep, "changing=value-2") == 0)
> +        {
> +          if (seen_changing)
> +            {
> +              printf ("error: duplicate: %s\n", *ep);
> +              support_record_failure ();
> +            }
> +          seen_changing = true;
> +        }
> +      else
> +        {
> +          TEST_COMPARE_STRING (*ep, "variable=value");
> +          if (seen_variable)
> +            {
> +              printf ("error: duplicate: %s\n", *ep);
> +              support_record_failure ();
> +            }
> +          seen_variable = true;
> +        }
> +    }
> +}
> +
> +/* In the prepare function, verify the environment contents after a
> +   self-exec.  */
> +#define PREPARE do_prepare
> +static void
> +do_prepare (int argc, char *argv[])
> +{
> +  if (argc == 2 && strcmp (argv[1], "verify") == 0)
> +    {
> +      verify_environ ();
> +
> +      /* Pass errors through to the other process.  */
> +      _exit (support_record_failure_is_failed () ? 1 : 0);
> +    }
> +}
> +
> +static int
> +do_test (void)
> +{
> +  self_path = xasprintf ("%s/stdlib/%s", support_objdir_root, PROGRAM);
> +  xpthread_barrier_init (&barrier, NULL, 2);
> +  pthread_t thr = xpthread_create (NULL, test_thread, NULL);
> +
> +  for (int i = 0; i < array_length (variables); ++i)
> +    variables[i] = xasprintf ("V%d", i);
> +
> +  for (int i = 0; i < iterations; ++i)
> +    {
> +      int variable_count = array_length (variables);
> +      if ((iterations % 2) == 0)
> +        /* Use the on-stack code path in __environ_snapshot_get.  */
> +        variable_count = on_stack_count - 1;
> +
> +      clearenv ();
> +      for (int j = 0; j < variable_count; ++j)
> +        {
> +          /* Add the changing variable somewhere in the middle.  */
> +          if (j == start_variable + 5)
> +            TEST_COMPARE (setenv ("changing", "value-2", 1), 0);
> +          TEST_COMPARE (setenv (variables[j], variables[j] + 1, 1), 0);
> +        }
> +      TEST_COMPARE (setenv ("variable", "value", 1), 0);
> +      xpthread_barrier_wait (&barrier);
> +
> +      /* Leave a certain amount of variables in place.  */
> +      for (int j = start_variable; j < variable_count; ++j)
> +        {
> +          /* Every few iterations, change the variable.  */
> +          if ((j % 4) == 0)
> +            TEST_COMPARE (setenv ("changing",
> +                                  ((j / 4) % 2) == 0 ? "value-1" : "value-2",
> +                                  1), 0);
> +          TEST_COMPARE (unsetenv (variables[j]), 0);
> +        }
> +
> +      xpthread_barrier_wait (&barrier);
> +    }
> +  xpthread_join (thr);
> +  xpthread_barrier_destroy (&barrier);
> +  for (int i = 0; i < array_length (variables); ++i)
> +    free (variables[i]);
> +  free (self_path);
> +  return 0;
> +}
> +
> +#include <support/test-driver.c>



More information about the Libc-alpha mailing list