[PATCH v2 10/13] posix: Improve thread safety of functions using environ with execveat
Adhemerval Zanella Netto
adhemerval.zanella@linaro.org
Wed Nov 27 20:01:05 GMT 2024
On 28/07/24 16:03, Florian Weimer wrote:
> The environment snapshot is stored on the stack, for easy
> memory management if vfork is involved.
> ---
> posix/execveat.c | 27 ++-
> stdlib/Makefile | 18 +-
> stdlib/environ_single_threaded_no_snapshot.c | 40 ++++
> stdlib/setenv.h | 24 +-
> stdlib/tst-environ-execl.c | 35 +++
> stdlib/tst-environ-snapshot-skeleton.c | 218 +++++++++++++++++++
> 6 files changed, 354 insertions(+), 8 deletions(-)
> create mode 100644 stdlib/environ_single_threaded_no_snapshot.c
> create mode 100644 stdlib/tst-environ-execl.c
> create mode 100644 stdlib/tst-environ-snapshot-skeleton.c
>
> diff --git a/posix/execveat.c b/posix/execveat.c
> index 110177ea54..2cd275d02a 100644
> --- a/posix/execveat.c
> +++ b/posix/execveat.c
> @@ -21,15 +21,38 @@
> #include <unistd.h>
>
> #include <arch-execveat.h>
> +#include <stdlib/setenv.h>
>
> /* Replace the current process, executing PATH relative to difrd with
> arguments argv and environment envp.
> argv and envp are terminated by NULL pointers. */
> int
> -__execveat (int dirfd, const char *path, char *const argv[], char *const envp[],
> +__execveat (int dirfd, const char *path, char *const *argv, char *const *envp,
> int flags)
> {
> - return __arch_execveat (dirfd, path, argv, envp, flags);
> + if (envp == NULL || envp != __environ
Wouldn't 'envp != __environ' already handle the 'envp == NULL', assuming
that __environ is always non-NULL?
> + || !__environ_is_from_array_list ((char **) envp)
> + || __environ_single_threaded_no_snapshot (envp))
> + return __arch_execveat (dirfd, path, argv, envp, flags);
> +
> + size_t env_size = ENVIRON_STACK_SNAPSHOT_SIZE;
The size here is only the initial size, so maybe name it hinting it?
> +
> + while (true)
> + {
> + /* Using an on-stack buffer is not ideal, but it is the most
> + practical way to avoid memory leaks with vfork: New mappings
> + created before the execveat call will be preserved in the
> + original process before the vfork call even if the execve at
> + call succeeds and replaces the current process. */
Can't we use MADV_DONTFORK here? There still the small windows between
mmap return and madvise call, but I am not very confident that adding a possible
unbounded dynamic stack allocation is best way forward here.
> + ++env_size; /* Make room for the null terminator. */
> + char *env_copy[env_size];
> + size_t env_count = __getenvarray (env_copy, env_size);
> + if (env_count < env_size)
> + return __arch_execveat (dirfd, path, argv, env_copy, flags);
> +
> + /* Retry with larger buffer. */
> + env_size = env_count;
> + }
> }
>
> weak_alias (__execveat, execveat)
> diff --git a/stdlib/Makefile b/stdlib/Makefile
> index e1f1848439..498ec15e91 100644
> --- a/stdlib/Makefile
> +++ b/stdlib/Makefile
> @@ -72,6 +72,7 @@ routines := \
> drand48 \
> drand48-iter \
> drand48_r \
> + environ_single_threaded_no_snapshot \
> erand48 \
> erand48_r \
> exit \
> @@ -374,11 +375,20 @@ tests-container := \
> tst-system \
> #tests-container
>
> +# These tests re-exec themselves, so they need to be linked
> +# statically, or use harcoded paths.
> +tests-with-reexec := \
> + tst-environ-execl \
> + # tests-with-reexec
> +tests += $(tests-with-reexec)
> +
> ifeq ($(build-hardcoded-path-in-tests),yes)
> tests += \
> tst-empty-env \
> # tests
> -endif
> +else # !$(build-hardcoded-path-in-tests)
> +tests-static += $(tests-with-reexec)
> +endif # !$(build-hardcoded-path-in-tests)
>
> LDLIBS-test-atexit-race = $(shared-thread-library)
> LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
> @@ -631,3 +641,9 @@ $(objpfx)tst-qsort5: $(libm)
> $(objpfx)tst-getenv-signal: $(shared-thread-library)
> $(objpfx)tst-getenv-thread: $(shared-thread-library)
> $(objpfx)tst-getenv-unsetenv: $(shared-thread-library)
> +# See above for $(tests-with-reexec).
> +ifeq ($(build-hardcoded-path-in-tests),yes)
> +$(objpfx)tst-environ-execl: $(shared-thread-library)
> +else
> +$(objpfx)tst-environ-execl: $(static-thread-library)
> +endif
> diff --git a/stdlib/environ_single_threaded_no_snapshot.c b/stdlib/environ_single_threaded_no_snapshot.c
> new file mode 100644
> index 0000000000..6dc1d22ea2
> --- /dev/null
> +++ b/stdlib/environ_single_threaded_no_snapshot.c
> @@ -0,0 +1,40 @@
> +/* Check for environ snapshots in single-threaded case.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <setenv.h>
> +#include <single-thread.h>
> +#include <tls.h>
> +
> +bool
> +__environ_single_threaded_no_snapshot (char *const *envp)
> +{
> + if (!SINGLE_THREAD_P)
> + return false;
> +
> + const char *previous = NULL;
> + for (char *const *ep = envp; *ep != NULL; ++ep)
> + {
> + if (*ep == previous)
> + /* Must perform a snapshot to remove the duplicate. */
> + return false;
> + previous = *ep;
> + }
> + /* No duplicates, no snapshot needed because no interference
> + from other threads is possible. */
> + return true;
> +}
> diff --git a/stdlib/setenv.h b/stdlib/setenv.h
> index 69fa9367ed..3e530f0bc0 100644
> --- a/stdlib/setenv.h
> +++ b/stdlib/setenv.h
> @@ -19,8 +19,10 @@
> #ifndef _SETENV_H
> #define _SETENV_H
>
> -#include <atomic.h>
> -#include <stdbool.h>
> +/* Not actually usable in rtld, but it is required for symbol discovery. */
> +#if IS_IN (libc) || IS_IN (rtld)
> +# include <atomic.h>
> +# include <stdbool.h>
>
> /* We use an exponential sizing policy for environment arrays. The
> arrays are not deallocating during the lifetime of the process.
> @@ -59,11 +61,11 @@ size_t __getenvarray (char **result, size_t length)
> but given that counter wrapround is probably impossible to hit
> (2**32 operations in unsetenv concurrently with getenv), using
> <atomic_wide_counter.h> seems unnecessary. */
> -#if __HAVE_64B_ATOMICS
> +# if __HAVE_64B_ATOMICS
> typedef uint64_t environ_counter;
> -#else
> +# else
> typedef uint32_t environ_counter;
> -#endif
> +# endif
>
> /* Updated by unsetenv to detect multiple overwrites in getenv. */
> extern environ_counter __environ_counter attribute_hidden;
> @@ -77,4 +79,16 @@ extern environ_counter __environ_counter attribute_hidden;
> int __add_to_environ (const char *name, const char *value,
> const char *combines, int replace) attribute_hidden;
>
> +/* Returns true if the environ pointer ENVP (which should be equal to
> + environ) does not need snapshotting because execution is
> + single-threaded and there are no duplicates in the array. */
> +bool __environ_single_threaded_no_snapshot (char *const *envp)
> + attribute_hidden;
> +
> +#endif /* IS_IN (libc) */
> +
> +/* This seems to cover the majority of situations on the first
> + pass and requires only a moderate amount of stack size. */
> +enum { ENVIRON_STACK_SNAPSHOT_SIZE = 120 };
> +
> #endif /* _SETENV_H */
> diff --git a/stdlib/tst-environ-execl.c b/stdlib/tst-environ-execl.c
> new file mode 100644
> index 0000000000..b028a94c7d
> --- /dev/null
> +++ b/stdlib/tst-environ-execl.c
> @@ -0,0 +1,35 @@
> +/* Test environment snapshots with execl.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define PROGRAM "tst-environ-execl"
> +#include "tst-environ-snapshot-skeleton.c"
> +
> +static pid_t
> +create_process (void)
> +{
> + pid_t pid = vfork ();
> + if (pid < 0)
> + FAIL_EXIT1 ("vfork: %m");
> + if (pid == 0)
> + {
> + execl (self_path, self_path, "verify", NULL);
> + printf ("error: execl failed: %m\n");
> + _exit (17);
> + }
> + return pid;
> +}
> diff --git a/stdlib/tst-environ-snapshot-skeleton.c b/stdlib/tst-environ-snapshot-skeleton.c
> new file mode 100644
> index 0000000000..8548e0a9e0
> --- /dev/null
> +++ b/stdlib/tst-environ-snapshot-skeleton.c
> @@ -0,0 +1,218 @@
> +/* Test skeleton for environment snapshots.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +/* The actual test including this file needs to define the macro
> + PROGRAM as a string literal, the name of the test program, before
> + including this file.
> +
> + It laso needs to define a function
s/laso/also
> +
> + static pid_t create_process (void);
> +
> + after including this file, describing how the subprocess with the
> + environment snapshot is created. */
> +
> +#include <array_length.h>
> +#include <setenv.h>
> +#include <stddef.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <support/check.h>
> +#include <support/support.h>
> +#include <support/xthread.h>
> +#include <support/xunistd.h>
> +#include <unistd.h>
> +
> +static pid_t create_process (void);
> +
> +/* Absolute path to the executable. */
> +static char *self_path;
> +
> +/* Used to synchronize the start of each test iteration. */
> +static pthread_barrier_t barrier;
> +
> +/* These are the variables set by the main thread. This needs to
> + exceed the stack reservation for the snapshot, to exercise that
> + code path. Variables before start_variable are not removed in the
> + unsetenv loop. */
> +enum
> + {
> + on_stack_count = ENVIRON_STACK_SNAPSHOT_SIZE,
> + start_variable = 50,
> + };
> +static char *variables[start_variable + on_stack_count];
> +
> +/* Number of iterations. */
> +enum { iterations = 5000 };
> +
> +/* Check that even with concurrent unsetenv, a variable that is known
> + to be there is found. */
> +static void *
> +test_thread (void *ignored)
> +{
> + for (int i = 0; i < iterations; ++i)
> + {
> + xpthread_barrier_wait (&barrier);
> + pid_t pid = create_process ();
> + int status;
> + xwaitpid (pid, &status, 0);
> + TEST_COMPARE (status, 0);
> + xpthread_barrier_wait (&barrier);
> + }
> + return NULL;
> +}
> +
> +/* Called to verify the contents of the environment. */
> +static void
> +verify_environ (void)
> +{
> + TEST_COMPARE_STRING (getenv ("variable"), "value");
> + char *changing = getenv ("changing");
> + if (strcmp (changing, "value-1") != 0)
> + TEST_COMPARE_STRING (changing, "value-2");
> +
> + bool seen[array_length (variables)] = { 0, };
> + bool seen_changing = false;
> + bool seen_variable = false;
> + char **ep = environ;
> + for (int i = 0; i < start_variable; ++i)
> + {
> + char expected[20];
> + snprintf (expected, sizeof (expected), "V%d=%d", i, i);
> + TEST_COMPARE_STRING (ep[i], expected);
> + if (ep[i] == NULL)
> + break;
> + }
> +
> + if (*ep == NULL)
> + return;
> +
> + for (ep += start_variable; *ep != NULL; ++ep)
> + {
> + if (**ep == 'V')
> + {
> + char *eq = strchr (*ep, '=');
> + TEST_VERIFY (eq != NULL);
> + if (eq != NULL)
> + {
> + int idx = atoi (eq + 1);
> + TEST_VERIFY (idx >= 0);
> + TEST_VERIFY (idx < array_length (variables));
> + if (idx >= 0 && idx < array_length (variables))
> + {
> + char expected[20];
> + snprintf (expected, sizeof (expected), "V%d=%d", idx, idx);
> + TEST_COMPARE_STRING (*ep, expected);
> + if (seen[idx])
> + {
> + printf ("error: duplicate: %s\n", *ep);
> + support_record_failure ();
> + }
> + seen[idx] = true;
> + }
> + }
> + }
> + else if (strcmp (*ep, "changing=value-1") == 0
> + || strcmp (*ep, "changing=value-2") == 0)
> + {
> + if (seen_changing)
> + {
> + printf ("error: duplicate: %s\n", *ep);
> + support_record_failure ();
> + }
> + seen_changing = true;
> + }
> + else
> + {
> + TEST_COMPARE_STRING (*ep, "variable=value");
> + if (seen_variable)
> + {
> + printf ("error: duplicate: %s\n", *ep);
> + support_record_failure ();
> + }
> + seen_variable = true;
> + }
> + }
> +}
> +
> +/* In the prepare function, verify the environment contents after a
> + self-exec. */
> +#define PREPARE do_prepare
> +static void
> +do_prepare (int argc, char *argv[])
> +{
> + if (argc == 2 && strcmp (argv[1], "verify") == 0)
> + {
> + verify_environ ();
> +
> + /* Pass errors through to the other process. */
> + _exit (support_record_failure_is_failed () ? 1 : 0);
> + }
> +}
> +
> +static int
> +do_test (void)
> +{
> + self_path = xasprintf ("%s/stdlib/%s", support_objdir_root, PROGRAM);
> + xpthread_barrier_init (&barrier, NULL, 2);
> + pthread_t thr = xpthread_create (NULL, test_thread, NULL);
> +
> + for (int i = 0; i < array_length (variables); ++i)
> + variables[i] = xasprintf ("V%d", i);
> +
> + for (int i = 0; i < iterations; ++i)
> + {
> + int variable_count = array_length (variables);
> + if ((iterations % 2) == 0)
> + /* Use the on-stack code path in __environ_snapshot_get. */
> + variable_count = on_stack_count - 1;
> +
> + clearenv ();
> + for (int j = 0; j < variable_count; ++j)
> + {
> + /* Add the changing variable somewhere in the middle. */
> + if (j == start_variable + 5)
> + TEST_COMPARE (setenv ("changing", "value-2", 1), 0);
> + TEST_COMPARE (setenv (variables[j], variables[j] + 1, 1), 0);
> + }
> + TEST_COMPARE (setenv ("variable", "value", 1), 0);
> + xpthread_barrier_wait (&barrier);
> +
> + /* Leave a certain amount of variables in place. */
> + for (int j = start_variable; j < variable_count; ++j)
> + {
> + /* Every few iterations, change the variable. */
> + if ((j % 4) == 0)
> + TEST_COMPARE (setenv ("changing",
> + ((j / 4) % 2) == 0 ? "value-1" : "value-2",
> + 1), 0);
> + TEST_COMPARE (unsetenv (variables[j]), 0);
> + }
> +
> + xpthread_barrier_wait (&barrier);
> + }
> + xpthread_join (thr);
> + xpthread_barrier_destroy (&barrier);
> + for (int i = 0; i < array_length (variables); ++i)
> + free (variables[i]);
> + free (self_path);
> + return 0;
> +}
> +
> +#include <support/test-driver.c>
More information about the Libc-alpha
mailing list