PR17232 variant #2: in runtime, let STP_EXIT nest within STP_START

author Frank Ch. Eigler <fche@redhat.com>

Sat, 9 Aug 2014 00:31:01 +0000 (20:31 -0400)

committer Frank Ch. Eigler <fche@redhat.com>

Sun, 10 Aug 2014 23:18:17 +0000 (19:18 -0400)
author Frank Ch. Eigler <fche@redhat.com>
Sat, 9 Aug 2014 00:31:01 +0000 (20:31 -0400)
committer Frank Ch. Eigler <fche@redhat.com>
Sun, 10 Aug 2014 23:18:17 +0000 (19:18 -0400)
diff --git a/runtime/transport/control.c b/runtime/transport/control.c

index 4151a93cd7b36a6ace1a11afd3d289bb95267bc8..5428d9d6952a01a54d9574dfc363adbc6612b6c3 100644 (file)
--- a/runtime/transport/control.c
+++ b/runtime/transport/control.c
@@ -30,7 +30,9 @@ static void _stp_handle_remote_id (struct _stp_msg_remote_id* rem);
  static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
  {
         u32 type;
-       static int started = 0;
+        // these might be accessed from reentrant calls
+       static atomic_t started = ATOMIC_INIT(0);
+       static atomic_t stopped_while_starting = ATOMIC_INIT(0);
  
  #ifdef STAPCONF_TASK_UID
         uid_t euid = current->euid;
@@ -59,18 +61,32 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz
  
         switch (type) {
         case STP_START:
-               if (started == 0) {
+                if (atomic_read(& started) == 0)
+                {
                         struct _stp_msg_start st;
                         if (count < sizeof(st))
                                 return 0;
                         if (copy_from_user(&st, buf, sizeof(st)))
                                 return -EFAULT;
                         _stp_handle_start(&st);
-                       started = 1;
+                        // PR17232: NB: an STP_EXIT might come in while we're still busy here.
+                        // That's OK, we'll catch it on the rebound.
+                       atomic_set (& started, 1);
+                        if (atomic_read (&stopped_while_starting)) {
+                                dbug_trans(1, "handling deferred STP_EXIT received while starting!\n");
+                                _stp_cleanup_and_exit(1);
+                        }
                 }
                 break;
         case STP_EXIT:
-               _stp_cleanup_and_exit(1);
+                // PR17232: this message can theoretically arrive
+                // while we're still handling an STP_START.
+                if (atomic_read(& started))
+                        _stp_cleanup_and_exit(1);
+                else {
+                        dbug_trans(1, "STP_EXIT received while starting!\n");
+                        atomic_set(& stopped_while_starting, 1);
+                }
                 break;
         case STP_BULK:
  #ifdef STP_BULKMODE
@@ -146,6 +162,11 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz
                 return -EINVAL;
         }
  
+#if defined(DEBUG_TRANS) && (DEBUG_TRANS >= 2)
+       if (type < STP_MAX_CMD)
+               dbug_trans2("Completed %s\n", _stp_command_name[type]);
+#endif
+
         return count + sizeof(u32); /* Pretend that we absorbed the entire message. */
  }
  
diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c

index 7d5c9e06cb4dc9b1d0e9504e9fdfeadfda34ebeb..25cf5bfde52ed0dcdce206e31d4a7c89de1ed053 100644 (file)
--- a/runtime/transport/transport.c
+++ b/runtime/transport/transport.c
@@ -36,7 +36,8 @@ static pid_t _stp_target = 0;
  static int _stp_probes_started = 0;
  
  /* _stp_transport_mutext guards _stp_start_called and _stp_exit_called.
-   We only want to do the startup and exit sequences once.  */
+   We only want to do the startup and exit sequences once.  Note that
+   these indicate the respective process starting, not their conclusion. */
  static int _stp_start_called = 0;
  static int _stp_exit_called = 0;
  static DEFINE_MUTEX(_stp_transport_mutex);
@@ -178,18 +179,21 @@ static void _stp_cleanup_and_exit(int send_exit)
         _stp_exit_called = 1;
         mutex_unlock(&_stp_transport_mutex);
  
-       /* Note, we can be sure that the startup sequence has finished
-           if handle_exit is true because it depends on _stp_start_called
-          being set to true. _stp_start_called can only be set to true
-          in _stp_handle_start() in response to a _STP_START message on
-          the control channel. Only one writer can have the control
-          channel open at a time, so the whole startup sequence in
-          _stp_handle_start() has to be completed before another message
-          can be send.  _stp_cleanup_and_exit() can only be called through
-          either a _STP_EXIT message, which cannot arrive while _STP_START
-          is still being handled, or when the module is unloaded. The
-          module can only be unloaded when there are no more users that
-          keep the control channel open.  */
+       /* Note, before PR17232, we thought we could be sure that the
+           startup sequence has finished if handle_exit is true
+           because it depends on _stp_start_called being set to
+           true. _stp_start_called can only be set to true in
+           _stp_handle_start() in response to a _STP_START message on
+           the control channel. Only one writer can have the control
+           channel open at a time, so the whole startup sequence in
+           _stp_handle_start() has to be completed before another
+           message can be sent ... except for multiple threads /
+           signal handlers in staprun (PR17232)!
+           _stp_cleanup_and_exit() can only be called through either a
+           _STP_EXIT message, which cannot arrive while _STP_START is
+           still being handled, or when the module is unloaded. The
+           module can only be unloaded when there are no more users
+           that keep the control channel open.  */
         if (handle_exit) {
                 int failures;
  
@@ -238,7 +242,7 @@ static void _stp_request_exit(void)
  {
         static int called = 0;
         if (!called) {
-               /* we only want to do this once */
+               /* we only want to do this once; XXX: why? what's the harm? */
                 called = 1;
                 dbug_trans(1, "ctl_send STP_REQUEST_EXIT\n");
                 /* Called from the timer when _stp_exit_flag has been
author	Frank Ch. Eigler <fche@redhat.com>
	Sat, 9 Aug 2014 00:31:01 +0000 (20:31 -0400)
committer	Frank Ch. Eigler <fche@redhat.com>
	Sun, 10 Aug 2014 23:18:17 +0000 (19:18 -0400)
runtime/transport/control.c		patch \| blob \| blame \| history
runtime/transport/transport.c		patch \| blob \| blame \| history