[PATCH] Make abort() AS-safe (Bug 26275).

Carlos O'Donell carlos@redhat.com
Sun Sep 27 14:19:52 GMT 2020


As noted in bug 26275 the abort() function is not AS-safe and the
standard says it should be.

Upstream Rust complained about this:
https://github.com/rust-lang/rust/issues/73894#issuecomment-673478761

The fix is to take the abort lock in fork and then release it in the
child and parent. This assures that you have a consistent state for
the locks.

The test case is probabilistic and will only catch the defect when
run continuously for 100-600s.  The test only runs for 20s, and so
has a chance to detect the bug if it ever comes back again.  The
fork and abort interactions are tested for livelock in the test.

The manual is updated to indicate that abort is officially considered
to be AS-safe.

The supprot/timespec.* routines are updated to help provide a pattern
that supports running given operations for a certain amount of time
e.g. TIMESPEC_BEFORE().
---
 include/stdlib.h                 |   6 +-
 manual/startup.texi              |   5 +-
 stdlib/Makefile                  |   4 +-
 stdlib/abort.c                   |  11 ++
 stdlib/tst-threaded-fork-abort.c | 168 +++++++++++++++++++++++++++++++
 support/timespec.c               |  23 ++---
 support/timespec.h               |   5 +
 sysdeps/nptl/fork.c              |  11 ++
 8 files changed, 215 insertions(+), 18 deletions(-)
 create mode 100644 stdlib/tst-threaded-fork-abort.c

diff --git a/include/stdlib.h b/include/stdlib.h
index ffcefd7b85..a2431f9139 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -315,6 +315,10 @@ extern __typeof (unsetenv) unsetenv attribute_hidden;
 extern __typeof (__strtoul_internal) __strtoul_internal attribute_hidden;
 # endif
 
-#endif
+/* Coordinate locks with fork to make abort AS-safe.  */
+void __abort_fork_lock (void) attribute_hidden;
+void __abort_fork_unlock (void) attribute_hidden;
+
+#endif /* !defined _ISOMAC */
 
 #endif  /* include/stdlib.h */
diff --git a/manual/startup.texi b/manual/startup.texi
index 21c48cd037..e38cdc0e28 100644
--- a/manual/startup.texi
+++ b/manual/startup.texi
@@ -992,10 +992,7 @@ for this function is in @file{stdlib.h}.
 
 @deftypefun void abort (void)
 @standards{ISO, stdlib.h}
-@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}}
-@c The implementation takes a recursive lock and attempts to support
-@c calls from signal handlers, but if we're in the middle of flushing or
-@c using streams, we may encounter them in inconsistent states.
+@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{} @acucorrupt{}}}
 The @code{abort} function causes abnormal program termination.  This
 does not execute cleanup functions registered with @code{atexit} or
 @code{on_exit}.
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 4615f6dfe7..cd470e53ac 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -87,7 +87,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
 		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
 		   tst-setcontext6 tst-setcontext7 tst-setcontext8 \
-		   tst-setcontext9 tst-bz20544
+		   tst-setcontext9 tst-bz20544 tst-threaded-fork-abort \
 
 tests-internal	:= tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
 		   tst-tls-atexit tst-tls-atexit-nodelete
@@ -243,3 +243,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
 	$(evaluate-test)
 
 $(objpfx)tst-makecontext: $(libdl)
+
+LDLIBS-tst-threaded-fork-abort = $(shared-thread-library)
diff --git a/stdlib/abort.c b/stdlib/abort.c
index df98782dd7..e7f74319d6 100644
--- a/stdlib/abort.c
+++ b/stdlib/abort.c
@@ -42,6 +42,17 @@ static int stage;
 /* We should be prepared for multiple threads trying to run abort.  */
 __libc_lock_define_initialized_recursive (static, lock);
 
+void
+__abort_fork_lock (void)
+{
+  __libc_lock_lock_recursive (lock);
+}
+
+void
+__abort_fork_unlock (void)
+{
+  __libc_lock_unlock_recursive (lock);
+}
 
 /* Cause an abnormal program termination with core-dump.  */
 void
diff --git a/stdlib/tst-threaded-fork-abort.c b/stdlib/tst-threaded-fork-abort.c
new file mode 100644
index 0000000000..67f0b09086
--- /dev/null
+++ b/stdlib/tst-threaded-fork-abort.c
@@ -0,0 +1,168 @@
+/* Verify abort is AS-safe (Bug 26275).
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <support/xthread.h>
+#include <support/xsignal.h>
+#include <support/xunistd.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <support/timespec.h>
+#include <support/capture_subprocess.h>
+
+static void
+handle_abort (int sig)
+{
+  int wstatus;
+  pid_t ret;
+  /* The parent has called abort, and now we wait on the child to call
+     abort.  If the child never calls abort because the abort is live
+     locked on the abort lock then the test times out with an error.
+     If the child calls abort we see that, capture it here, and exit
+     the test successfully.  We loop because we might have called abort
+     before the child was created and so we have to try again.  */
+  do
+    {
+      ret = wait (&wstatus);
+    }
+  while (ret == -1);
+
+  /* The child must have exited with SIGABRT or we fail the test.  */
+  if (WIFSIGNALED (wstatus))
+    if (WTERMSIG (wstatus) == SIGABRT)
+      _exit (0);
+
+  /* The child exit condition is not what we expected.  */
+  _exit (1);
+}
+
+static void *
+thread_abort (void *closure)
+{
+  pthread_barrier_t *barrier = (pthread_barrier_t *) closure;
+  xpthread_barrier_wait (barrier);
+  /* Attempt to abort.  */
+  abort ();
+  return NULL;
+}
+
+static void *
+thread_fork (void *closure)
+{
+  pthread_barrier_t *barrier = (pthread_barrier_t *) closure;
+  pid_t child;
+
+  /* Bug 26275: In the child the abort hangs.  This allows the parent
+     to wait for the child termination in its own SIGABRT handler.
+     We want to fork just after the aborting thread has taken the
+     abort handler lock, but we can't always time it right.  */
+  xpthread_barrier_wait (barrier);
+  child = xfork ();
+
+  if (child == 0)
+    {
+      /* Reset default disposition for SIGABRT so our own abort will
+	 terminate the child.  */
+      signal (SIGABRT, SIG_DFL);
+      /* Without the fix this hangs because the forked child observes
+	 the abort lock as held.  */
+      abort ();
+    }
+
+  return NULL;
+}
+
+/* The goal of this test is to call fork from thread B while thread A is
+   in the process of aborting.  Bug 26275 shows that if thread A is
+   holding the abort lock while thread B forks then thread B will be
+   unable to abort (livelock on the abort lock).  The fix is for
+   thread B to take the abort lock during fork setup, and then release
+   it after the fork (in both child and parent).  This test is
+   probabilistic and may not trigger the bug, but we run for long enough
+   that any issues during this abort/fork sequencing should show up in
+   long-term testing as this test failing every once in a while.
+
+   On a x86_64 VM (4 vCPU @ 3.5GHz) the test fails once every 100-600
+   seconds when run continuosly in a loop.  We want this test to fail
+   with a probability of say 10% so develpers see it at least once in
+   a one or two week development period, or at least once during the
+   release window. To achieve that we run for at least 20 seconds,
+   and set the timeout at 30 seconds (gives time for a last iteration
+   to complete).  */
+static void
+test_fork_abort (void *closure __attribute__ ((unused)))
+{
+  struct sigaction sact;
+  struct sigaction oact;
+  pthread_barrier_t barrier;
+
+  /* Wait for both threads to get in the right spot.  */
+  xpthread_barrier_init (&barrier, NULL, 2);
+
+  /* Handle the abort.  */
+  sact.sa_handler = handle_abort;
+  xsigaction (SIGABRT, &sact, &oact);
+
+  /* Start the abort thread.  */
+  xpthread_create (NULL, thread_abort, &barrier);
+
+  /* Start the fork thread.  */
+  xpthread_create (NULL, thread_fork, &barrier);
+
+  /* Wait indefinately.  We exit via the abort handler.  */
+  while (1)
+    pause ();
+}
+
+static int
+do_test (void)
+{
+  struct support_capture_subprocess result;
+  struct timespec before, after, running, end;
+
+  /* Total runtime is 20 seconds.  */
+  end = make_timespec (20, 0);
+
+  /* Measure start time.  */
+  xclock_gettime (CLOCK_MONOTONIC, &before);
+
+  /* Run test for 20 seconds.  */
+  do
+    {
+      result = support_capture_subprocess (test_fork_abort, NULL);
+      support_capture_subprocess_check (&result, "test_fork_abort", 0,
+					sc_allow_none);
+      xclock_gettime (CLOCK_MONOTONIC, &after);
+      running = timespec_sub (support_timespec_normalize (after),
+			      support_timespec_normalize (before));
+    }
+  while (TIMESPEC_BEFORE (running, end));
+
+  /* Success.  */
+  return 0;
+}
+
+#define TIMEOUT 30
+#define TEST_FUNCTION do_test
+#include <support/test-driver.c>
diff --git a/support/timespec.c b/support/timespec.c
index edbdb165ec..3d2cdfca5b 100644
--- a/support/timespec.c
+++ b/support/timespec.c
@@ -27,18 +27,17 @@ test_timespec_before_impl (const char *file, int line,
 			   const struct timespec left,
 			   const struct timespec right)
 {
-  if (left.tv_sec > right.tv_sec
-      || (left.tv_sec == right.tv_sec
-	  && left.tv_nsec > right.tv_nsec)) {
-    support_record_failure ();
-    const struct timespec diff = timespec_sub (left, right);
-    printf ("%s:%d: %jd.%09jds not before %jd.%09jds "
-	    "(difference %jd.%09jds)\n",
-	    file, line,
-	    (intmax_t) left.tv_sec, (intmax_t) left.tv_nsec,
-	    (intmax_t) right.tv_sec, (intmax_t) right.tv_nsec,
-	    (intmax_t) diff.tv_sec, (intmax_t) diff.tv_nsec);
-  }
+  if (!TIMESPEC_BEFORE(left, right))
+    {
+      support_record_failure ();
+      const struct timespec diff = timespec_sub (left, right);
+      printf ("%s:%d: %jd.%09jds not before %jd.%09jds "
+	      "(difference %jd.%09jds)\n",
+	      file, line,
+	      (intmax_t) left.tv_sec, (intmax_t) left.tv_nsec,
+	      (intmax_t) right.tv_sec, (intmax_t) right.tv_nsec,
+	      (intmax_t) diff.tv_sec, (intmax_t) diff.tv_nsec);
+    }
 }
 
 void
diff --git a/support/timespec.h b/support/timespec.h
index 1a6775a882..280844f2c0 100644
--- a/support/timespec.h
+++ b/support/timespec.h
@@ -55,6 +55,11 @@ struct timespec support_timespec_normalize (struct timespec time);
 int support_timespec_check_in_range (struct timespec expected, struct timespec observed,
 				  double lower_bound, double upper_bound);
 
+/* True if left is before right, false otherwise.  */
+#define TIMESPEC_BEFORE(left, right) \
+  ((left.tv_sec < right.tv_sec)					\
+   || (left.tv_sec == right.tv_sec				\
+       && left.tv_nsec < right.tv_nsec))
 
 /* Check that the timespec on the left represents a time before the
    time on the right. */
diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
index 5091a000e3..8fdc6589e5 100644
--- a/sysdeps/nptl/fork.c
+++ b/sysdeps/nptl/fork.c
@@ -66,6 +66,11 @@ __libc_fork (void)
     {
       _IO_list_lock ();
 
+      /* Acquire the abort lock.  We need to prevent other threads
+	 from aborting while we fork.  We must keep abort AS-safe
+	 and to do that we need to own the lock in the child.  */
+      __abort_fork_lock ();
+
       /* Acquire malloc locks.  This needs to come last because fork
 	 handlers may use malloc, and the libio list lock has an
 	 indirect malloc dependency as well (via the getdelim
@@ -113,6 +118,9 @@ __libc_fork (void)
 	  /* Release malloc locks.  */
 	  call_function_static_weak (__malloc_fork_unlock_child);
 
+	  /* Release the abort lock.  */
+	  __abort_fork_unlock ();
+
 	  /* Reset the file list.  These are recursive mutexes.  */
 	  fresetlockfiles ();
 
@@ -134,6 +142,9 @@ __libc_fork (void)
 	  /* Release malloc locks, parent process variant.  */
 	  call_function_static_weak (__malloc_fork_unlock_parent);
 
+	  /* Release the abort lock.  */
+	  __abort_fork_unlock ();
+
 	  /* We execute this even if the 'fork' call failed.  */
 	  _IO_list_unlock ();
 	}
-- 
2.26.2



More information about the Libc-alpha mailing list