Synchronization problem with posix_spawn

Corinna Vinschen corinna-cygwin@cygwin.com
Sun Aug 2 14:50:54 GMT 2020


On Aug  2 13:55, Corinna Vinschen wrote:
> [Moving this discussion to cygwin-developers]
> On Jul 31 10:10, Corinna Vinschen wrote:
> > On Jul 30 19:04, Ken Brown via Cygwin wrote:
> > > On 7/30/2020 1:17 PM, Corinna Vinschen wrote:
> > > > On Jul 30 13:59, Corinna Vinschen wrote:
> > > > > On Jul 29 19:12, Ken Brown via Cygwin wrote:
> > > > > > On 7/29/2020 4:17 PM, Ken Brown via Cygwin wrote:
> > > > > > > posix_spawn(p) returns before the spawned process is fully up and
> > > > > > > running.  [...]
> > > > > > I just took a look at the source, and I see that posix_spawn
> > > > > > was taken from FreeBSD.
> > > > > > [...]
> > > > > 
> > > > > Actually, this is a Cygwin problem.
> > > > > [...]
> > > > > IOW, we need a Cygwin-specific do_posix_spawn() using fork(2)
> > > > > in conjunction with some synchronization the BSD function
> > > > > gets "for free" by using its specific vfork(2).
> > > > 
> > > > Below is a POC implementation for a Cygwin-specific do_posix_spawn().
> > > > [...]
> > > > Can you give it a try?
> > > 
> > > It looks like something further is needed: 'wait' doesn't seem to recognize
> > > the spawned process.
> > 
> > Oh well.
> > [...]
> 
> I attached another patch.  This one is designed from the ground up and
> I *think* it works as desired.  I added lots of comments so the idea
> behind this patch should be clear enough.
> 
> Please give it a try.

Version 2 of the patch attached.  It occured to me belatedly, that
parent and child have to be synchronized prior to calling WFMO in
the parent.  Otherwise OpenProcess in __posix_spawn_sem_wait_and_close
may end up opening the exec'ed process rather than the forked child.


Corinna
-------------- next part --------------
>From 43450a38365d5f8516565c59722c9798dbce464e Mon Sep 17 00:00:00 2001
From: Corinna Vinschen <corinna@vinschen.de>
Date: Sat, 1 Aug 2020 21:35:58 +0200
Subject: [PATCH v2] Cygwin: posix_spawn: add Cygwin-specific code fixing process
 synchronisation

Newlib's posix_spawn has been taken from FreeBSD.  The code relies on
BSD-specific behaviour of vfork, namely the fact that vfork blocks
the parent until the child exits or calls execve as well as the fact
that the child shares parent memory in non-COW mode.

This behaviour can't be emulated by Cygwin.  Cygwin's vfork is
equivalent to fork.  This is POSIX-compliant, but it's lacking BSD's
vfork ingrained synchronization of the parent to wait for the child
calling execve, or the chance to just write a variable and the parent
will see the result.

So this requires a Cygwin-specific solution.  The core function of
posix_spawn, called do_posix_spawn is now implemented twice, once using
the BSD method, and once for Cygwin using Windows synchronization under
the hood waiting for the child to call execve and signalling errors
upstream.  The Windows specifics are hidden inside Cygwin, so newlib
only calls internal Cygwin functions.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
---
 newlib/libc/posix/posix_spawn.c |  63 +++++++++++++
 winsup/cygwin/child_info.h      |  12 ++-
 winsup/cygwin/spawn.cc          | 152 ++++++++++++++++++++++++++++++++
 3 files changed, 224 insertions(+), 3 deletions(-)

diff --git a/newlib/libc/posix/posix_spawn.c b/newlib/libc/posix/posix_spawn.c
index 19c5cd0fe986..350fed6164fd 100644
--- a/newlib/libc/posix/posix_spawn.c
+++ b/newlib/libc/posix/posix_spawn.c
@@ -254,6 +254,68 @@ process_file_actions(const posix_spawn_file_actions_t fa)
 	return (0);
 }
 
+#ifdef __CYGWIN__
+/* Cygwin's vfork does not follow BSD vfork semantics.  Rather it's equivalent
+   to fork.  While that's POSIX compliant, the below FreeBSD implementation
+   relying on BSD vfork semantics doesn't work as expected on Cygwin.  The
+   following Cygwin-specific code handles the synchronization FreeBSD gets
+   for free by using vfork. */
+
+extern void *__posix_spawn_sem_create (void **semp);
+extern void __posix_spawn_sem_release (void *sem, int error);
+extern int __posix_spawn_sem_wait_and_close (void *sem, pid_t child_pid);
+extern int __posix_spawn_execvpe (const char *path, char * const *argv,
+				  char *const *envp, void *sem,
+				  int use_env_path);
+
+
+static int
+do_posix_spawn(pid_t *pid, const char *path,
+	const posix_spawn_file_actions_t *fa,
+	const posix_spawnattr_t *sa,
+	char * const argv[], char * const envp[], int use_env_path)
+{
+	int error;
+	void *sem;
+	pid_t p;
+
+	error = __posix_spawn_sem_create (&sem);
+	if (error)
+		return error;
+
+	p = fork();
+	switch (p) {
+	case -1:
+		return (errno);
+	case 0:
+		if (sa != NULL) {
+			error = process_spawnattr(*sa);
+			if (error) {
+				__posix_spawn_sem_release (sem, error);
+				_exit(127);
+			}
+		}
+		if (fa != NULL) {
+			error = process_file_actions(*fa);
+			if (error) {
+				__posix_spawn_sem_release (sem, error);
+				_exit(127);
+			}
+		}
+		__posix_spawn_execvpe (path, argv,
+				       envp != NULL ? envp : *p_environ,
+				       sem, use_env_path);
+		_exit(127);
+	default:
+		error = __posix_spawn_sem_wait_and_close (sem, p);
+		if (error != 0)
+			waitpid(p, NULL, WNOHANG);
+		else if (pid != NULL)
+			*pid = p;
+		return (error);
+	}
+}
+#else
 static int
 do_posix_spawn(pid_t *pid, const char *path,
 	const posix_spawn_file_actions_t *fa,
@@ -292,6 +354,7 @@ do_posix_spawn(pid_t *pid, const char *path,
 		return (error);
 	}
 }
+#endif
 
 int
 posix_spawn (pid_t *pid,
diff --git a/winsup/cygwin/child_info.h b/winsup/cygwin/child_info.h
index 2fa71ba6983f..e5aa28144772 100644
--- a/winsup/cygwin/child_info.h
+++ b/winsup/cygwin/child_info.h
@@ -37,7 +37,7 @@ enum child_status
 #define EXEC_MAGIC_SIZE sizeof(child_info)
 
 /* Change this value if you get a message indicating that it is out-of-sync. */
-#define CURR_CHILD_INFO_MAGIC 0xf4531879U
+#define CURR_CHILD_INFO_MAGIC 0xecc930b9U
 
 #define NPROCS	256
 
@@ -144,6 +144,7 @@ class child_info_spawn: public child_info
 {
   HANDLE hExeced;
   HANDLE ev;
+  HANDLE sem;
   pid_t cygpid;
 public:
   cygheap_exec_info *moreinfo;
@@ -159,6 +160,11 @@ public:
   void *operator new (size_t, void *p) __attribute__ ((nothrow)) {return p;}
   void set (child_info_types ci, bool b) { new (this) child_info_spawn (ci, b);}
   void __reg1 handle_spawn ();
+  void set_sem (HANDLE _sem)
+  {
+    /* Don't leak semaphore handle into exec'ed process. */
+    SetHandleInformation (sem = _sem, HANDLE_FLAG_INHERIT, 0);
+  }
   bool set_saw_ctrl_c ()
   {
     if (!has_execed ())
@@ -188,8 +194,8 @@ public:
   bool get_parent_handle ();
   bool has_execed_cygwin () const { return iscygwin () && has_execed (); }
   operator HANDLE& () {return hExeced;}
-  int __reg3 worker (const char *, const char *const *, const char *const [], int,
-	      int = -1, int = -1);;
+  int __reg3 worker (const char *, const char *const *, const char *const [],
+		     int, int = -1, int = -1);
 };
 
 extern child_info_spawn ch_spawn;
diff --git a/winsup/cygwin/spawn.cc b/winsup/cygwin/spawn.cc
index 3e8c8367ae9b..926cd18185e4 100644
--- a/winsup/cygwin/spawn.cc
+++ b/winsup/cygwin/spawn.cc
@@ -252,6 +252,8 @@ struct system_call_handle
 
 child_info_spawn NO_COPY ch_spawn;
 
+extern "C" void __posix_spawn_sem_release (void *sem, int error);
+
 int
 child_info_spawn::worker (const char *prog_arg, const char *const *argv,
 			  const char *const envp[], int mode,
@@ -897,6 +899,8 @@ child_info_spawn::worker (const char *prog_arg, const char *const *argv,
 		  && WaitForSingleObject (pi.hProcess, 0) == WAIT_TIMEOUT)
 		wait_for_myself ();
 	    }
+	  if (sem)
+	    __posix_spawn_sem_release (sem, 0);
 	  myself.exit (EXITCODE_NOSET);
 	  break;
 	case _P_WAIT:
@@ -1295,3 +1299,151 @@ err:
   __seterrno ();
   return -1;
 }
+
+/* The following __posix_spawn_* functions are called from newlib's posix_spawn
+   implementation.  The original code in newlib has been taken from FreeBSD,
+   and the core code relies on specific, non-portable behaviour of vfork(2).
+   Our replacement implementation uses a semaphore to synchronize parent and
+   child process. */
+
+/* Create an inheritable semaphore.  Set it to 0 (== non-signalled), so the
+   parent can wait on the semaphore immediately. */
+extern "C" int
+__posix_spawn_sem_create (void **semp)
+{
+  HANDLE sem;
+  OBJECT_ATTRIBUTES attr;
+  NTSTATUS status;
+
+  if (!semp)
+    return EINVAL;
+  InitializeObjectAttributes (&attr, NULL, OBJ_INHERIT, NULL, NULL);
+  status = NtCreateSemaphore (&sem, SEMAPHORE_ALL_ACCESS, &attr, 0, INT_MAX);
+  if (!NT_SUCCESS (status))
+    return geterrno_from_nt_status (status);
+  *semp = sem;
+  return 0;
+}
+
+/* Signal the semaphore.  "error" should be 0 if all went fine and the
+   exec'd child process is up and running, a useful POSIX error code otherwise.
+   After releasing the semaphore, the value of the semaphore reflects
+   the error code + 1.  Thus, after WFMO in__posix_spawn_sem_wait_and_close,
+   querying the value of the semaphore returns either 0 if all went well,
+   or a value > 0 equivalent to the POSIX error code. */
+extern "C" void
+__posix_spawn_sem_release (void *sem, int error)
+{
+  ReleaseSemaphore (sem, error + 1, NULL);
+}
+
+/* Helper to check the semaphore value. */
+static inline int
+__posix_spawn_sem_query (void *sem)
+{
+  SEMAPHORE_BASIC_INFORMATION sbi;
+
+  NtQuerySemaphore (sem, SemaphoreBasicInformation, &sbi, sizeof sbi, NULL);
+  return sbi.CurrentCount;
+}
+
+/* Called from parent to wait for fork/exec completion.  We're waiting for
+   the semaphore as well as the child's process handle, so even if the
+   child crashes without signalling the semaphore, we won't wait infinitely. */
+extern "C" int
+__posix_spawn_sem_wait_and_close (void *sem, pid_t child_pid)
+{
+  int ret = 0;
+  HANDLE hProcess;
+  HANDLE w4[2];
+
+  pinfo p (child_pid);
+  if (!p || p->pid != child_pid)
+    {
+      ret = ENOENT;
+      goto out;
+    }
+
+  hProcess = OpenProcess (SYNCHRONIZE, FALSE, p->dwProcessId);
+  if (hProcess == NULL)
+    {
+      ret = geterrno_from_win_error ();
+      goto out;
+    }
+
+  /* Synchronize with the forked child.  The child calls WFSO to wait for
+     the parent being ready to call WFMO.  The above process info must be
+     the one from the forked child, not the one from the exec'ed child.
+     Consequentially we have to sync parent and child before the child exec's.
+
+     The parent signals the semaphore once here to unblock the child.
+     The child's WFSO call will unsignal the semaphore again.  The parent
+     has to wait for that, as well as check if the process still exists. */
+  __posix_spawn_sem_release (sem, 0);
+  while (__posix_spawn_sem_query (sem) > 0)
+    {
+      if (WaitForSingleObject (hProcess, 0) == WAIT_OBJECT_0)
+	{
+	  ret = geterrno_from_win_error ();
+	  goto out;
+	}
+      Sleep (1);
+    }
+
+  w4[0] = sem;
+  w4[1] = hProcess;
+  switch (WaitForMultipleObjects (2, w4, FALSE, INFINITE))
+    {
+    case WAIT_OBJECT_0:
+      ret = __posix_spawn_sem_query (sem);
+      break;
+    case WAIT_OBJECT_0 + 1:
+      /* If we return here due to the child process dying, the semaphore is
+	 very likely not signalled.  Check this here and return a valid error
+	 code. */
+      ret = __posix_spawn_sem_query (sem);
+      if (ret == 0)
+	ret = ECHILD;
+      break;
+    default:
+      ret = geterrno_from_win_error ();
+      break;
+    }
+  CloseHandle (hProcess);
+
+out:
+  CloseHandle (sem);
+  return ret;
+}
+
+/* Replacement for execve/execvpe, called from forked child in newlib's
+   posix_spawn.  The relevant difference is the additional semaphore
+   so the worker method (which is not supposed to return on success)
+   can signal the semaphore after sync'ing with the exec'd child. */
+extern "C" int
+__posix_spawn_execvpe (const char *path, char * const *argv, char *const *envp,
+		       HANDLE sem, int use_env_path)
+{
+  path_conv buf;
+
+  static char *const empty_env[] = { NULL };
+  if (!envp)
+    envp = empty_env;
+  ch_spawn.set_sem (sem);
+
+  /* Sync with parent to make sure parent gets valid process data as outlined
+     in __posix_spawn_sem_wait_and_close above.
+
+     FIXME?  We don't wait infinitely, otherwise a crashed parent would block
+     the exec'ing child infinitely.  For all practical purposes this should
+     be an extrem border case.  If this still fails for some reason, we have
+     to fetch a parent process handle and WFMO. */
+  WaitForSingleObject (sem, 5000);
+
+  ch_spawn.worker (use_env_path ? (find_exec (path, buf, "PATH", FE_NNF) ?: "")
+				: path,
+		   argv, envp,
+		   _P_OVERLAY | (use_env_path ? _P_PATH_TYPE_EXEC : 0));
+  __posix_spawn_sem_release (sem, errno);
+  return -1;
+}
-- 
2.26.2



More information about the Cygwin-developers mailing list