Implement correct RLIMIT_STACK handling

author Corinna Vinschen <corinna@vinschen.de>

Sun, 5 Jul 2015 13:51:37 +0000 (15:51 +0200)

committer Corinna Vinschen <corinna@vinschen.de>

Sun, 5 Jul 2015 13:51:37 +0000 (15:51 +0200)
author Corinna Vinschen <corinna@vinschen.de>
Sun, 5 Jul 2015 13:51:37 +0000 (15:51 +0200)
committer Corinna Vinschen <corinna@vinschen.de>
Sun, 5 Jul 2015 13:51:37 +0000 (15:51 +0200)
diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog

index a1f49571d8f8551bfb2e5a84893377ae42b20d42..82a8a58b3850420118f1a3b07c141a9cc7a441a4 100644 (file)
--- a/winsup/cygwin/ChangeLog
+++ b/winsup/cygwin/ChangeLog
@@ -1,3 +1,38 @@
+2015-07-05  Corinna Vinschen  <corinna@vinschen.de>
+
+       * miscfuncs.cc (struct pthread_wrapper_arg): Add member guardsize.
+       (pthread_wrapper): Set thread stack guarantee according to guardsize.
+       Tweak assembler code so that $rax/$eax is not required by GCC to
+       prepare the wrapper_arg value.
+       (CygwinCreateThread): Fix deadzone handling.  Drop setting a "POSIX"
+       guardpage (aka page w/ PAGE_NOACCESS).  Always use Windows guard
+       pages instead.  On post-XP systems (providing SetThreadStackGuarantee)
+       always set up stack Windows like with reserved/commited areas and
+       movable guard pages.  Only on XP set up stack fully commited if the
+       guardpage size is not the default system guardpage size.
+       Fill out pthread_wrapper_arg::guardsize.  Improve comments.
+       * resource.cc: Implement RSTACK_LIMIT Linux-like.
+       (DEFAULT_STACKSIZE): New macro.
+       (DEFAULT_STACKGUARD): Ditto.
+       (rlimit_stack_guard): New muto.
+       (rlimit_stack): New global variable holding current RSTACK_LIMIT values.
+       (__set_rlimit_stack): Set rlimit_stack under lock.
+       (__get_rlimit_stack): Initialize rlimit_stack from executable header
+       and return rlimit_stack values under lock.
+       (get_rlimit_stack): Filtering function to return useful default
+       stacksize from rlimit_stack.rlim_cur value.
+       (getrlimit): Call __get_rlimit_stack in RLIMIT_STACK case.
+       (setrlimit): Call __set_rlimit_stack in RLIMIT_STACK case.
+       * thread.cc (pthread::create): Fetch default stacksize calling
+       get_rlimit_stack.
+       (pthread_attr::pthread_attr): Fetch default guardsize calling
+       wincap.def_guard_page_size.
+       (pthread_attr_getstacksize): Fetch default stacksize calling
+       get_rlimit_stack.
+       * thread.h (PTHREAD_DEFAULT_STACKSIZE): Remove.
+       (PTHREAD_DEFAULT_GUARDSIZE): Remove.
+       (get_rlimit_stack): Declare.
+
  2015-07-05  Corinna Vinschen  <corinna@vinschen.de>
  
         * fhandler_process.cc (heap_info::heap_info): Disable fetching heap info
diff --git a/winsup/cygwin/miscfuncs.cc b/winsup/cygwin/miscfuncs.cc

index 7f324b94601f5ac556c8f6a987bc5fad8d63352e..4a7a1b858a756f5b0f017fbae7b88702008d6708 100644 (file)
--- a/winsup/cygwin/miscfuncs.cc
+++ b/winsup/cygwin/miscfuncs.cc
@@ -560,6 +560,7 @@ struct pthread_wrapper_arg
    PBYTE stackaddr;
    PBYTE stackbase;
    PBYTE stacklimit;
+  ULONG guardsize;
  };
  
  DWORD WINAPI
@@ -592,7 +593,14 @@ pthread_wrapper (PVOID arg)
       The below assembler code will release the OS stack after switching to our
       new stack. */
    wrapper_arg.stackaddr = dealloc_addr;
-
+  /* On post-XP systems, set thread stack guarantee matching the guardsize.
+     Note that the guardsize is one page bigger than the guarantee. */
+  if (wincap.has_set_thread_stack_guarantee ()
+      && wrapper_arg.guardsize > wincap.def_guard_page_size ())
+    {
+      wrapper_arg.guardsize -= wincap.page_size ();
+      SetThreadStackGuarantee (&wrapper_arg.guardsize);
+    }
    /* Initialize new _cygtls. */
    _my_tls.init_thread (wrapper_arg.stackbase - CYGTLS_PADSIZE,
                        (DWORD (*)(void*, void*)) wrapper_arg.func);
@@ -632,7 +640,7 @@ pthread_wrapper (PVOID arg)
  #endif
  #ifdef __x86_64__
    __asm__ ("\n\
-          movq  %[WRAPPER_ARG], %%rbx  # Load &wrapper_arg into rbx    \n\
+          leaq  %[WRAPPER_ARG], %%rbx  # Load &wrapper_arg into rbx    \n\
            movq  (%%rbx), %%r12         # Load thread func into r12     \n\
            movq  8(%%rbx), %%r13        # Load thread arg into r13      \n\
            movq  16(%%rbx), %%rcx       # Load stackaddr into rcx       \n\
@@ -652,11 +660,11 @@ pthread_wrapper (PVOID arg)
            # register r13 and then just call the function.              \n\
            movq  %%r13, %%rcx           # Move thread arg to 1st arg reg\n\
            call  *%%r12                 # Call thread func              \n"
-          : : [WRAPPER_ARG] "r" (&wrapper_arg),
+          : : [WRAPPER_ARG] "o" (wrapper_arg),
                [CYGTLS] "i" (CYGTLS_PADSIZE));
  #else
    __asm__ ("\n\
-          movl  %[WRAPPER_ARG], %%ebx  # Load &wrapper_arg into ebx    \n\
+          leal  %[WRAPPER_ARG], %%ebx  # Load &wrapper_arg into ebx    \n\
            movl  (%%ebx), %%eax         # Load thread func into eax     \n\
            movl  4(%%ebx), %%ecx        # Load thread arg into ecx      \n\
            movl  8(%%ebx), %%edx        # Load stackaddr into edx       \n\
@@ -683,7 +691,7 @@ pthread_wrapper (PVOID arg)
            # stack in the expected spot.                                \n\
            popl  %%eax                  # Pop thread_func address       \n\
            call  *%%eax                 # Call thread func              \n"
-          : : [WRAPPER_ARG] "r" (&wrapper_arg),
+          : : [WRAPPER_ARG] "o" (wrapper_arg),
                [CYGTLS] "i" (CYGTLS_PADSIZE));
  #endif
    /* pthread::thread_init_wrapper calls pthread::exit, which
@@ -777,7 +785,8 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg,
  
    if (stackaddr)
      {
-      /* If the application provided the stack, just use it. */
+      /* If the application provided the stack, just use it.  There won't
+        be any stack overflow handling! */
        wrapper_arg->stackaddr = (PBYTE) stackaddr;
        wrapper_arg->stackbase = (PBYTE) stackaddr + stacksize;
      }
@@ -790,10 +799,8 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg,
        real_guardsize = roundup2 (guardsize, wincap.page_size ());
        /* Add the guardsize to the stacksize */
        real_stacksize += real_guardsize;
-      /* If we use the default Windows guardpage method, we have to take
-        the 2 pages dead zone into account. */
-      if (real_guardsize == wincap.page_size ())
-         real_stacksize += 2 * wincap.page_size ();
+      /* Take dead zone page into account, which always stays uncommited. */
+      real_stacksize += wincap.page_size ();
        /* Now roundup the result to the next allocation boundary. */
        real_stacksize = roundup2 (real_stacksize,
                                  wincap.allocation_granularity ());
@@ -811,46 +818,63 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg,
  #endif
        if (!real_stackaddr)
         return NULL;
-      /* Set up committed region.  Two cases: */
-      if (real_guardsize != wincap.page_size ())
+      /* Set up committed region.  We have two cases: */
+      if (!wincap.has_set_thread_stack_guarantee ()
+         && real_guardsize != wincap.def_guard_page_size ())
         {
-         /* If guardsize is set to something other than the page size, we
-            commit the entire stack and, if guardsize is > 0, we set up a
-            POSIX guardpage.  We don't set up a Windows guardpage. */
-         if (!VirtualAlloc (real_stackaddr, real_guardsize, MEM_COMMIT,
-                            PAGE_NOACCESS))
+         /* If guardsize is set to something other than the default guard page
+            size, and if we're running on Windows XP 32 bit, we commit the
+            entire stack, and, if guardsize is > 0, set up a guard page. */
+         real_stacklimit = (PBYTE) real_stackaddr + wincap.page_size ();
+         if (real_guardsize
+             && !VirtualAlloc (real_stacklimit, real_guardsize, MEM_COMMIT,
+                               PAGE_READWRITE | PAGE_GUARD))
             goto err;
-         real_stacklimit = (PBYTE) real_stackaddr + real_guardsize;
-         if (!VirtualAlloc (real_stacklimit, real_stacksize - real_guardsize,
+         real_stacklimit += real_guardsize;
+         if (!VirtualAlloc (real_stacklimit, real_stacksize - real_guardsize
+                                             - wincap.page_size (),
                              MEM_COMMIT, PAGE_READWRITE))
             goto err;
         }
        else
         {
-         /* If guardsize is exactly the page_size, we can assume that the
-            application will behave Windows conformant in terms of stack usage.
-            We can especially assume that it never allocates more than one
-            page at a time (alloca/_chkstk).  Therefore, this is the default
-            case which allows a Windows compatible stack setup with a
-            reserved region, a guard page, and a commited region.  We don't
-            need to set up a POSIX guardpage since Windows already handles
-            stack overflow: Trying to extend the stack into the last three
-            pages of the stack results in a SEGV.
-            We always commit 64K here, starting with the guardpage. */
+         /* Otherwise we set up the stack like the OS does, with a reserved
+            region, the guard pages, and a commited region.  We commit the
+            stack commit size from the executable header, but at least
+            PTHREAD_STACK_MIN (64K). */
+         static ULONG exe_commitsize;
+
+         if (!exe_commitsize)
+           {
+             PIMAGE_DOS_HEADER dosheader;
+             PIMAGE_NT_HEADERS ntheader;
+
+             dosheader = (PIMAGE_DOS_HEADER) GetModuleHandle (NULL);
+             ntheader = (PIMAGE_NT_HEADERS)
+                        ((PBYTE) dosheader + dosheader->e_lfanew);
+             exe_commitsize = ntheader->OptionalHeader.SizeOfStackCommit;
+             exe_commitsize = roundup2 (exe_commitsize, wincap.page_size ());
+           }
+         ULONG commitsize = exe_commitsize;
+         if (commitsize > real_stacksize - real_guardsize
+                          - wincap.page_size ())
+           commitsize = real_stacksize - real_guardsize - wincap.page_size ();
+         else if (commitsize < PTHREAD_STACK_MIN)
+           commitsize = PTHREAD_STACK_MIN;
           real_stacklimit = (PBYTE) real_stackaddr + real_stacksize
-                               - wincap.allocation_granularity ();
-         if (!VirtualAlloc (real_stacklimit, wincap.page_size (), MEM_COMMIT,
-                            PAGE_READWRITE | PAGE_GUARD))
+                           - commitsize - real_guardsize;
+         if (!VirtualAlloc (real_stacklimit, real_guardsize,
+                            MEM_COMMIT, PAGE_READWRITE | PAGE_GUARD))
             goto err;
-         real_stacklimit += wincap.page_size ();
-         if (!VirtualAlloc (real_stacklimit, wincap.allocation_granularity ()
-                                        - wincap.page_size (), MEM_COMMIT,
+         real_stacklimit += real_guardsize;
+         if (!VirtualAlloc (real_stacklimit, commitsize, MEM_COMMIT,
                              PAGE_READWRITE))
             goto err;
         }
        wrapper_arg->stackaddr = (PBYTE) real_stackaddr;
        wrapper_arg->stackbase = (PBYTE) real_stackaddr + real_stacksize;
        wrapper_arg->stacklimit = real_stacklimit;
+      wrapper_arg->guardsize = real_guardsize;
      }
    /* Use the STACK_SIZE_PARAM_IS_A_RESERVATION parameter so only the
       minimum size for a thread stack is reserved by the OS.  Note that we
diff --git a/winsup/cygwin/release/2.1.0 b/winsup/cygwin/release/2.1.0

index da484f5f444a39523cc221ed97a4f5424a75e854..aca69cdf3c028dc2a90908fabf57f22b342eeabe 100644 (file)
--- a/winsup/cygwin/release/2.1.0
+++ b/winsup/cygwin/release/2.1.0
@@ -1,6 +1,12 @@
  What's new:
  -----------
  
+- Handle pthread stacksizes as in GLibc:  Default to RLIMIT_STACK resource.
+  Allow to set RLIMIT_STACK via setrlimit.  Default RLIMIT_STACK to value
+  from executable header as described on
+  https://msdn.microsoft.com/en-us/library/windows/desktop/ms686774.aspx
+  Default stacksize to 2 Megs in case RLIMIT_STACK is set to RLIM_INFINITY.
+
  - First cut of an implementation to allow signal handlers running on an
    alternate signal stack.
    
diff --git a/winsup/cygwin/resource.cc b/winsup/cygwin/resource.cc

index 895ba7f33c118187f51038bf4ec05b7d2ec9e941..a5a23a4d85d420955355e445cba56fac6c2f0bf1 100644 (file)
--- a/winsup/cygwin/resource.cc
+++ b/winsup/cygwin/resource.cc
@@ -111,6 +111,61 @@ getrusage (int intwho, struct rusage *rusage_in)
    return res;
  }
  
+/* Default stacksize in case RLIMIT_STACK is RLIM_INFINITY is 2 Megs with
+   system-dependent number of guard pages.  The pthread stacksize does not
+   include the guardpage size, so we have to subtract the default guardpage
+   size.  Additionally the Windows stack handling disallows to commit the
+   last page, so we subtract it, too. */
+#define DEFAULT_STACKSIZE (2 * 1024 * 1024)
+#define DEFAULT_STACKGUARD (wincap.def_guard_page_size() + wincap.page_size ())
+
+muto NO_COPY rlimit_stack_guard;
+static struct rlimit rlimit_stack = { 0, RLIM_INFINITY };
+
+static void
+__set_rlimit_stack (const struct rlimit *rlp)
+{
+  rlimit_stack_guard.init ("rlimit_stack_guard")->acquire ();
+  rlimit_stack = *rlp;
+  rlimit_stack_guard.release ();
+}
+
+static void
+__get_rlimit_stack (struct rlimit *rlp)
+{
+  rlimit_stack_guard.init ("rlimit_stack_guard")->acquire ();
+  if (!rlimit_stack.rlim_cur)
+    {
+      /* Fetch the default stacksize from the executable header... */
+      PIMAGE_DOS_HEADER dosheader;
+      PIMAGE_NT_HEADERS ntheader;
+
+      dosheader = (PIMAGE_DOS_HEADER) GetModuleHandle (NULL);
+      ntheader = (PIMAGE_NT_HEADERS) ((PBYTE) dosheader + dosheader->e_lfanew);
+      rlimit_stack.rlim_cur = ntheader->OptionalHeader.SizeOfStackReserve;
+      /* ...and subtract the guardpages. */
+      rlimit_stack.rlim_cur -= DEFAULT_STACKGUARD;
+    }
+  *rlp = rlimit_stack;
+  rlimit_stack_guard.release ();
+}
+
+size_t
+get_rlimit_stack (void)
+{
+  struct rlimit rl;
+
+  __get_rlimit_stack (&rl);
+  /* RLIM_INFINITY doesn't make much sense.  As in glibc, use an
+     "architecture-specific default". */
+  if (rl.rlim_cur == RLIM_INFINITY)
+    rl.rlim_cur = DEFAULT_STACKSIZE - DEFAULT_STACKGUARD;
+  /* Always return at least minimum stacksize. */
+  else if (rl.rlim_cur < PTHREAD_STACK_MIN)
+    rl.rlim_cur = PTHREAD_STACK_MIN;
+  return (size_t) rl.rlim_cur;
+}
+
  extern "C" int
  getrlimit (int resource, struct rlimit *rlp)
  {
@@ -127,32 +182,7 @@ getrlimit (int resource, struct rlimit *rlp)
         case RLIMIT_AS:
           break;
         case RLIMIT_STACK:
-         PTEB teb;
-         /* 2015-06-26: Originally rlim_cur returned the size of the still
-            available stack area on the current stack, rlim_max the total size
-            of the current stack.  Two problems:
-
-            - Per POSIX, RLIMIT_STACK returns "the maximum size of the initial
-              thread's stack, in bytes. The implementation does not
-              automatically grow the stack beyond this limit".
-
-            - With the implementation of sigaltstack, the current stack is not
-              necessarily the "initial thread's stack" anymore.  Rather, when
-              called from a signal handler running on the alternate stack,
-              RLIMIT_STACK should return the size of the original stack.
-
-            rlim_cur is now the size of the stack.  For system-provided stacks
-            it's the size between DeallocationStack and StackBase.  For
-            application-provided stacks (via pthread_attr_setstack),
-            DeallocationStack is NULL, but StackLimit points to the bottom
-            of the stack.
-
-            rlim_max is set to RLIM_INFINITY since there's no hard limit
-            for stack sizes on Windows. */
-         teb = NtCurrentTeb ();
-         rlp->rlim_cur = (rlim_t) teb->Tib.StackBase
-                         - (rlim_t) (teb->DeallocationStack
-                                     ?: teb->Tib.StackLimit);
+         __get_rlimit_stack (rlp);
           break;
         case RLIMIT_NOFILE:
           rlp->rlim_cur = getdtablesize ();
@@ -206,6 +236,9 @@ setrlimit (int resource, const struct rlimit *rlp)
           if (rlp->rlim_cur != RLIM_INFINITY)
             return setdtablesize (rlp->rlim_cur);
           break;
+       case RLIMIT_STACK:
+         __set_rlimit_stack (rlp);
+         break;
         default:
           set_errno (EINVAL);
           __leave;
diff --git a/winsup/cygwin/thread.cc b/winsup/cygwin/thread.cc

index 9320868f5ff53ec4edf6d9085c037b84cf18455e..b92a806204390fdc2a2e4ca2c82f7ca6e455bb87 100644 (file)
--- a/winsup/cygwin/thread.cc
+++ b/winsup/cygwin/thread.cc
@@ -475,7 +475,7 @@ pthread::create (void *(*func) (void *), pthread_attr *newattr,
    mutex.lock ();
    /* stackaddr holds the uppermost stack address.  See the comments in
       pthread_attr_setstack and pthread_attr_setstackaddr for a description. */
-  ULONG stacksize = attr.stacksize ?: PTHREAD_DEFAULT_STACKSIZE;
+  ULONG stacksize = attr.stacksize ?: get_rlimit_stack ();
    PVOID stackaddr = attr.stackaddr ? ((caddr_t) attr.stackaddr - stacksize)
                                    : NULL;
    win32_obj_id = CygwinCreateThread (thread_init_wrapper, this, stackaddr,
@@ -1093,7 +1093,7 @@ pthread::resume ()
  pthread_attr::pthread_attr ():verifyable_object (PTHREAD_ATTR_MAGIC),
  joinable (PTHREAD_CREATE_JOINABLE), contentionscope (PTHREAD_SCOPE_PROCESS),
  inheritsched (PTHREAD_INHERIT_SCHED), stackaddr (NULL), stacksize (0),
-guardsize (PTHREAD_DEFAULT_GUARDSIZE)
+guardsize (wincap.def_guard_page_size ())
  {
    schedparam.sched_priority = 0;
  }
@@ -2330,7 +2330,7 @@ pthread_attr_getstacksize (const pthread_attr_t *attr, size_t *size)
    /* If the stacksize has not been set by the application, return the
       default stacksize.  Note that this is different from what
       pthread_attr_getstack returns. */
-  *size = (*attr)->stacksize ?: PTHREAD_DEFAULT_STACKSIZE;
+  *size = (*attr)->stacksize ?: get_rlimit_stack ();
    return 0;
  }
  
diff --git a/winsup/cygwin/thread.h b/winsup/cygwin/thread.h

index 3650e9509ef4016235b0dbe7deb0f714898cb3f6..a6c735885cb604aab6becbcbbe9ff3611a43a952 100644 (file)
--- a/winsup/cygwin/thread.h
+++ b/winsup/cygwin/thread.h
@@ -16,13 +16,8 @@ details. */
  #define WRITE_LOCK 1
  #define READ_LOCK  2
  
-/* Default is a 1 Megs stack with a 4K guardpage.  The pthread stacksize
-   does not include the guardpage size, so we subtract the default guardpage
-   size. Additionally, the Windows stack handling disallows to use the last
-   two pages as guard page  (tested on XP and W7).  That results in a zone of
-   three pages which have to be subtract to get the actual stack size. */
-#define PTHREAD_DEFAULT_STACKSIZE (1024 * 1024 - 3 * wincap.page_size ())
-#define PTHREAD_DEFAULT_GUARDSIZE (wincap.page_size ())
+/* resource.cc */
+extern size_t get_rlimit_stack (void);
  
  #include <pthread.h>
  #include <limits.h>
diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml

index be3e3892fe3b2cd6c82a833159201ae6d6b3c185..c52574ce4d3719ca49696fe4444ae3bdaed00072 100644 (file)
--- a/winsup/doc/new-features.xml
+++ b/winsup/doc/new-features.xml
@@ -8,6 +8,14 @@
  
  <itemizedlist mark="bullet">
  
+<listitem><para>
+Handle pthread stacksizes as in GLibc:  Default to RLIMIT_STACK resource.
+Allow to set RLIMIT_STACK via setrlimit.  Default RLIMIT_STACK to value
+from executable header as described on the MSDN website
+<ulink url="https://msdn.microsoft.com/en-us/library/windows/desktop/ms686774.aspx">Thread Stack Size</ulink>
+Default stacksize to 2 Megs in case RLIMIT_STACK is set to RLIM_INFINITY.
+</para></listitem>
+
  <listitem><para>
  First cut of an implementation to allow signal handlers running on an
  alternate signal stack.
author	Corinna Vinschen <corinna@vinschen.de>
	Sun, 5 Jul 2015 13:51:37 +0000 (15:51 +0200)
committer	Corinna Vinschen <corinna@vinschen.de>
	Sun, 5 Jul 2015 13:51:37 +0000 (15:51 +0200)
winsup/cygwin/ChangeLog		patch \| blob \| blame \| history
winsup/cygwin/miscfuncs.cc		patch \| blob \| blame \| history
winsup/cygwin/release/2.1.0		patch \| blob \| blame \| history
winsup/cygwin/resource.cc		patch \| blob \| blame \| history
winsup/cygwin/thread.cc		patch \| blob \| blame \| history
winsup/cygwin/thread.h		patch \| blob \| blame \| history
winsup/doc/new-features.xml		patch \| blob \| blame \| history