+2015-07-05 Corinna Vinschen <corinna@vinschen.de>
+
+ * miscfuncs.cc (struct pthread_wrapper_arg): Add member guardsize.
+ (pthread_wrapper): Set thread stack guarantee according to guardsize.
+ Tweak assembler code so that $rax/$eax is not required by GCC to
+ prepare the wrapper_arg value.
+ (CygwinCreateThread): Fix deadzone handling. Drop setting a "POSIX"
+ guardpage (aka page w/ PAGE_NOACCESS). Always use Windows guard
+ pages instead. On post-XP systems (providing SetThreadStackGuarantee)
+ always set up stack Windows like with reserved/commited areas and
+ movable guard pages. Only on XP set up stack fully commited if the
+ guardpage size is not the default system guardpage size.
+ Fill out pthread_wrapper_arg::guardsize. Improve comments.
+ * resource.cc: Implement RSTACK_LIMIT Linux-like.
+ (DEFAULT_STACKSIZE): New macro.
+ (DEFAULT_STACKGUARD): Ditto.
+ (rlimit_stack_guard): New muto.
+ (rlimit_stack): New global variable holding current RSTACK_LIMIT values.
+ (__set_rlimit_stack): Set rlimit_stack under lock.
+ (__get_rlimit_stack): Initialize rlimit_stack from executable header
+ and return rlimit_stack values under lock.
+ (get_rlimit_stack): Filtering function to return useful default
+ stacksize from rlimit_stack.rlim_cur value.
+ (getrlimit): Call __get_rlimit_stack in RLIMIT_STACK case.
+ (setrlimit): Call __set_rlimit_stack in RLIMIT_STACK case.
+ * thread.cc (pthread::create): Fetch default stacksize calling
+ get_rlimit_stack.
+ (pthread_attr::pthread_attr): Fetch default guardsize calling
+ wincap.def_guard_page_size.
+ (pthread_attr_getstacksize): Fetch default stacksize calling
+ get_rlimit_stack.
+ * thread.h (PTHREAD_DEFAULT_STACKSIZE): Remove.
+ (PTHREAD_DEFAULT_GUARDSIZE): Remove.
+ (get_rlimit_stack): Declare.
+
2015-07-05 Corinna Vinschen <corinna@vinschen.de>
* fhandler_process.cc (heap_info::heap_info): Disable fetching heap info
PBYTE stackaddr;
PBYTE stackbase;
PBYTE stacklimit;
+ ULONG guardsize;
};
DWORD WINAPI
The below assembler code will release the OS stack after switching to our
new stack. */
wrapper_arg.stackaddr = dealloc_addr;
-
+ /* On post-XP systems, set thread stack guarantee matching the guardsize.
+ Note that the guardsize is one page bigger than the guarantee. */
+ if (wincap.has_set_thread_stack_guarantee ()
+ && wrapper_arg.guardsize > wincap.def_guard_page_size ())
+ {
+ wrapper_arg.guardsize -= wincap.page_size ();
+ SetThreadStackGuarantee (&wrapper_arg.guardsize);
+ }
/* Initialize new _cygtls. */
_my_tls.init_thread (wrapper_arg.stackbase - CYGTLS_PADSIZE,
(DWORD (*)(void*, void*)) wrapper_arg.func);
#endif
#ifdef __x86_64__
__asm__ ("\n\
- movq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\
+ leaq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\
movq (%%rbx), %%r12 # Load thread func into r12 \n\
movq 8(%%rbx), %%r13 # Load thread arg into r13 \n\
movq 16(%%rbx), %%rcx # Load stackaddr into rcx \n\
# register r13 and then just call the function. \n\
movq %%r13, %%rcx # Move thread arg to 1st arg reg\n\
call *%%r12 # Call thread func \n"
- : : [WRAPPER_ARG] "r" (&wrapper_arg),
+ : : [WRAPPER_ARG] "o" (wrapper_arg),
[CYGTLS] "i" (CYGTLS_PADSIZE));
#else
__asm__ ("\n\
- movl %[WRAPPER_ARG], %%ebx # Load &wrapper_arg into ebx \n\
+ leal %[WRAPPER_ARG], %%ebx # Load &wrapper_arg into ebx \n\
movl (%%ebx), %%eax # Load thread func into eax \n\
movl 4(%%ebx), %%ecx # Load thread arg into ecx \n\
movl 8(%%ebx), %%edx # Load stackaddr into edx \n\
# stack in the expected spot. \n\
popl %%eax # Pop thread_func address \n\
call *%%eax # Call thread func \n"
- : : [WRAPPER_ARG] "r" (&wrapper_arg),
+ : : [WRAPPER_ARG] "o" (wrapper_arg),
[CYGTLS] "i" (CYGTLS_PADSIZE));
#endif
/* pthread::thread_init_wrapper calls pthread::exit, which
if (stackaddr)
{
- /* If the application provided the stack, just use it. */
+ /* If the application provided the stack, just use it. There won't
+ be any stack overflow handling! */
wrapper_arg->stackaddr = (PBYTE) stackaddr;
wrapper_arg->stackbase = (PBYTE) stackaddr + stacksize;
}
real_guardsize = roundup2 (guardsize, wincap.page_size ());
/* Add the guardsize to the stacksize */
real_stacksize += real_guardsize;
- /* If we use the default Windows guardpage method, we have to take
- the 2 pages dead zone into account. */
- if (real_guardsize == wincap.page_size ())
- real_stacksize += 2 * wincap.page_size ();
+ /* Take dead zone page into account, which always stays uncommited. */
+ real_stacksize += wincap.page_size ();
/* Now roundup the result to the next allocation boundary. */
real_stacksize = roundup2 (real_stacksize,
wincap.allocation_granularity ());
#endif
if (!real_stackaddr)
return NULL;
- /* Set up committed region. Two cases: */
- if (real_guardsize != wincap.page_size ())
+ /* Set up committed region. We have two cases: */
+ if (!wincap.has_set_thread_stack_guarantee ()
+ && real_guardsize != wincap.def_guard_page_size ())
{
- /* If guardsize is set to something other than the page size, we
- commit the entire stack and, if guardsize is > 0, we set up a
- POSIX guardpage. We don't set up a Windows guardpage. */
- if (!VirtualAlloc (real_stackaddr, real_guardsize, MEM_COMMIT,
- PAGE_NOACCESS))
+ /* If guardsize is set to something other than the default guard page
+ size, and if we're running on Windows XP 32 bit, we commit the
+ entire stack, and, if guardsize is > 0, set up a guard page. */
+ real_stacklimit = (PBYTE) real_stackaddr + wincap.page_size ();
+ if (real_guardsize
+ && !VirtualAlloc (real_stacklimit, real_guardsize, MEM_COMMIT,
+ PAGE_READWRITE | PAGE_GUARD))
goto err;
- real_stacklimit = (PBYTE) real_stackaddr + real_guardsize;
- if (!VirtualAlloc (real_stacklimit, real_stacksize - real_guardsize,
+ real_stacklimit += real_guardsize;
+ if (!VirtualAlloc (real_stacklimit, real_stacksize - real_guardsize
+ - wincap.page_size (),
MEM_COMMIT, PAGE_READWRITE))
goto err;
}
else
{
- /* If guardsize is exactly the page_size, we can assume that the
- application will behave Windows conformant in terms of stack usage.
- We can especially assume that it never allocates more than one
- page at a time (alloca/_chkstk). Therefore, this is the default
- case which allows a Windows compatible stack setup with a
- reserved region, a guard page, and a commited region. We don't
- need to set up a POSIX guardpage since Windows already handles
- stack overflow: Trying to extend the stack into the last three
- pages of the stack results in a SEGV.
- We always commit 64K here, starting with the guardpage. */
+ /* Otherwise we set up the stack like the OS does, with a reserved
+ region, the guard pages, and a commited region. We commit the
+ stack commit size from the executable header, but at least
+ PTHREAD_STACK_MIN (64K). */
+ static ULONG exe_commitsize;
+
+ if (!exe_commitsize)
+ {
+ PIMAGE_DOS_HEADER dosheader;
+ PIMAGE_NT_HEADERS ntheader;
+
+ dosheader = (PIMAGE_DOS_HEADER) GetModuleHandle (NULL);
+ ntheader = (PIMAGE_NT_HEADERS)
+ ((PBYTE) dosheader + dosheader->e_lfanew);
+ exe_commitsize = ntheader->OptionalHeader.SizeOfStackCommit;
+ exe_commitsize = roundup2 (exe_commitsize, wincap.page_size ());
+ }
+ ULONG commitsize = exe_commitsize;
+ if (commitsize > real_stacksize - real_guardsize
+ - wincap.page_size ())
+ commitsize = real_stacksize - real_guardsize - wincap.page_size ();
+ else if (commitsize < PTHREAD_STACK_MIN)
+ commitsize = PTHREAD_STACK_MIN;
real_stacklimit = (PBYTE) real_stackaddr + real_stacksize
- - wincap.allocation_granularity ();
- if (!VirtualAlloc (real_stacklimit, wincap.page_size (), MEM_COMMIT,
- PAGE_READWRITE | PAGE_GUARD))
+ - commitsize - real_guardsize;
+ if (!VirtualAlloc (real_stacklimit, real_guardsize,
+ MEM_COMMIT, PAGE_READWRITE | PAGE_GUARD))
goto err;
- real_stacklimit += wincap.page_size ();
- if (!VirtualAlloc (real_stacklimit, wincap.allocation_granularity ()
- - wincap.page_size (), MEM_COMMIT,
+ real_stacklimit += real_guardsize;
+ if (!VirtualAlloc (real_stacklimit, commitsize, MEM_COMMIT,
PAGE_READWRITE))
goto err;
}
wrapper_arg->stackaddr = (PBYTE) real_stackaddr;
wrapper_arg->stackbase = (PBYTE) real_stackaddr + real_stacksize;
wrapper_arg->stacklimit = real_stacklimit;
+ wrapper_arg->guardsize = real_guardsize;
}
/* Use the STACK_SIZE_PARAM_IS_A_RESERVATION parameter so only the
minimum size for a thread stack is reserved by the OS. Note that we
return res;
}
+/* Default stacksize in case RLIMIT_STACK is RLIM_INFINITY is 2 Megs with
+ system-dependent number of guard pages. The pthread stacksize does not
+ include the guardpage size, so we have to subtract the default guardpage
+ size. Additionally the Windows stack handling disallows to commit the
+ last page, so we subtract it, too. */
+#define DEFAULT_STACKSIZE (2 * 1024 * 1024)
+#define DEFAULT_STACKGUARD (wincap.def_guard_page_size() + wincap.page_size ())
+
+muto NO_COPY rlimit_stack_guard;
+static struct rlimit rlimit_stack = { 0, RLIM_INFINITY };
+
+static void
+__set_rlimit_stack (const struct rlimit *rlp)
+{
+ rlimit_stack_guard.init ("rlimit_stack_guard")->acquire ();
+ rlimit_stack = *rlp;
+ rlimit_stack_guard.release ();
+}
+
+static void
+__get_rlimit_stack (struct rlimit *rlp)
+{
+ rlimit_stack_guard.init ("rlimit_stack_guard")->acquire ();
+ if (!rlimit_stack.rlim_cur)
+ {
+ /* Fetch the default stacksize from the executable header... */
+ PIMAGE_DOS_HEADER dosheader;
+ PIMAGE_NT_HEADERS ntheader;
+
+ dosheader = (PIMAGE_DOS_HEADER) GetModuleHandle (NULL);
+ ntheader = (PIMAGE_NT_HEADERS) ((PBYTE) dosheader + dosheader->e_lfanew);
+ rlimit_stack.rlim_cur = ntheader->OptionalHeader.SizeOfStackReserve;
+ /* ...and subtract the guardpages. */
+ rlimit_stack.rlim_cur -= DEFAULT_STACKGUARD;
+ }
+ *rlp = rlimit_stack;
+ rlimit_stack_guard.release ();
+}
+
+size_t
+get_rlimit_stack (void)
+{
+ struct rlimit rl;
+
+ __get_rlimit_stack (&rl);
+ /* RLIM_INFINITY doesn't make much sense. As in glibc, use an
+ "architecture-specific default". */
+ if (rl.rlim_cur == RLIM_INFINITY)
+ rl.rlim_cur = DEFAULT_STACKSIZE - DEFAULT_STACKGUARD;
+ /* Always return at least minimum stacksize. */
+ else if (rl.rlim_cur < PTHREAD_STACK_MIN)
+ rl.rlim_cur = PTHREAD_STACK_MIN;
+ return (size_t) rl.rlim_cur;
+}
+
extern "C" int
getrlimit (int resource, struct rlimit *rlp)
{
case RLIMIT_AS:
break;
case RLIMIT_STACK:
- PTEB teb;
- /* 2015-06-26: Originally rlim_cur returned the size of the still
- available stack area on the current stack, rlim_max the total size
- of the current stack. Two problems:
-
- - Per POSIX, RLIMIT_STACK returns "the maximum size of the initial
- thread's stack, in bytes. The implementation does not
- automatically grow the stack beyond this limit".
-
- - With the implementation of sigaltstack, the current stack is not
- necessarily the "initial thread's stack" anymore. Rather, when
- called from a signal handler running on the alternate stack,
- RLIMIT_STACK should return the size of the original stack.
-
- rlim_cur is now the size of the stack. For system-provided stacks
- it's the size between DeallocationStack and StackBase. For
- application-provided stacks (via pthread_attr_setstack),
- DeallocationStack is NULL, but StackLimit points to the bottom
- of the stack.
-
- rlim_max is set to RLIM_INFINITY since there's no hard limit
- for stack sizes on Windows. */
- teb = NtCurrentTeb ();
- rlp->rlim_cur = (rlim_t) teb->Tib.StackBase
- - (rlim_t) (teb->DeallocationStack
- ?: teb->Tib.StackLimit);
+ __get_rlimit_stack (rlp);
break;
case RLIMIT_NOFILE:
rlp->rlim_cur = getdtablesize ();
if (rlp->rlim_cur != RLIM_INFINITY)
return setdtablesize (rlp->rlim_cur);
break;
+ case RLIMIT_STACK:
+ __set_rlimit_stack (rlp);
+ break;
default:
set_errno (EINVAL);
__leave;