This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.
Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Hi! Examples/ex11.c seems to get stuck on alphaev6, alpha and i686 (all of them 4 CPU boxes) from time to time. E.g. if I cut'n'paste the ex11 invocation command from make check into shell and run it 100 times, it locks up on i686 after some 40 invocations, on alpha it gets stuck every ~ 5 invocations or so. Anybody else can reproduce it? E.g. on alpha I see: * 3 Thread 7176 (LWP 31955) 0x20000010154 in __pthread_rwlock_timedwrlock (rwlock=0x120100fa0, abstime=0x3ffff1ff970) at restart.h:34 2 Thread 2049 (LWP 31948) 0x20000337c4c in __poll (fds=0x20001126d00, nfds=1, timeout=2000) at ../sysdeps/unix/sysv/linux/poll.c:63 1 Thread 1024 (LWP 31947) __sigsuspend () at ../sysdeps/unix/sysv/linux/alpha/sigsuspend.S:42 and from the output of the program it is clear that thread 3 (writer thread 6) has succeeded just once, then tried again once and never succeeded again. Yet (gdb) bt #0 __sigsuspend () at ../sysdeps/unix/sysv/linux/alpha/sigsuspend.S:42 #1 0x2000000bc50 in __pthread_wait_for_restart_signal (self=0x3ffff1ffa60) at pthread.c:898 #2 0x20000010154 in __pthread_rwlock_timedwrlock (rwlock=0x120100fa0, abstime=0x3ffff1ff970) at restart.h:34 #3 0x120000c74 in reader_thread (nr=0x6) at Examples/ex11.c:112 #4 0x20000009b88 in pthread_start_thread (arg=0x0) at manager.c:274 (gdb) p *rwlock $6 = {__rw_lock = {__status = 0, __spinlock = 736}, __rw_readers = 0, __rw_writer = 0x0, __rw_read_waiting = 0x0, __rw_write_waiting = 0x0, __rw_kind = 2, __rw_pshared = 0} I think it is in int was_on_queue; __pthread_lock (&rwlock->__rw_lock, self); was_on_queue = remove_from_queue (&rwlock->__rw_read_waiting, self); __pthread_unlock (&rwlock->__rw_lock); if (was_on_queue) { __pthread_set_own_extricate_if (self, 0); return ETIMEDOUT; } /* Eat the outstanding restart() from the signaller */ suspend (self); ^^^ here and for some reason was_on_queue was 0. All other reader/writer threads have finished successfully at that point, the other threads above are manager thread and main(). On i686, something different happens: There are all but one thread sleeping, one thread is running at 100% of one CPUs time spinning forever in __pthread_unlock (gdb) bt #0 0x4000b5b3 in __pthread_unlock (lock=0x8049bc0) at spinlock.c:179 #1 0x4000cea3 in __pthread_rwlock_timedwrlock (rwlock=0x8049bc0, abstime=0xbe3ffb0c) at rwlock.c:468 #2 0x804890e in writer_thread (nr=0xa) at Examples/ex11.c:64 #3 0x4000782d in pthread_start_thread (arg=0xbe3ffc00) at manager.c:274 (gdb) p/x *lock $89 = {__status = 0xbd5ffc01, __spinlock = 0xca0} (gdb) p ((pthread_descr)0xbd5ffc00)->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock $81 = 0xbd3ffc00 (gdb) p ((pthread_descr)0xbd5ffc00)->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock $82 = 0xbdfffc00 (gdb) p ((pthread_descr)0xbd5ffc00)->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock $83 = 0xbe5ffc00 (gdb) p ((pthread_descr)0xbd5ffc00)->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock $84 = 0xbdfffc00 because there is for some reason a loop in the p_nextlock chain (0xbdfffc00 -> 0xbe5ffc00 and 0xbe5ffc00 -> 0xbdfffc00). The relevant thread backtraces are: (gdb) thread 10 [Switching to thread 10 (Thread 10251 (LWP 16945))] #0 0x40055e05 in __sigsuspend (set=0xbe5ff9ec) at ../sysdeps/unix/sysv/linux/sigsuspend.c:45 45 return INLINE_SYSCALL (rt_sigsuspend, 2, CHECK_SIGSET (set), _NSIG / 8); (gdb) bt #0 0x40055e05 in __sigsuspend (set=0xbe5ff9ec) at ../sysdeps/unix/sysv/linux/sigsuspend.c:45 #1 0x40009799 in __pthread_wait_for_restart_signal (self=0xbe5ffc00) at pthread.c:898 #2 0x4000b49c in __pthread_lock (lock=0x8049bc0, self=0xbe5ffc00) at spinlock.c:126 #3 0x4000cec0 in __pthread_rwlock_timedwrlock (rwlock=0x8049bc0, abstime=0xbe5ffb0c) at rwlock.c:474 #4 0x804890e in writer_thread (nr=0x9) at Examples/ex11.c:64 #5 0x4000782d in pthread_start_thread (arg=0xbe5ffc00) at manager.c:274 (gdb) thread 13 [Switching to thread 13 (Thread 13326 (LWP 16948))] #0 0x40055e05 in __sigsuspend (set=0xbdfff9ec) at ../sysdeps/unix/sysv/linux/sigsuspend.c:45 45 return INLINE_SYSCALL (rt_sigsuspend, 2, CHECK_SIGSET (set), _NSIG / 8); (gdb) bt #0 0x40055e05 in __sigsuspend (set=0xbdfff9ec) at ../sysdeps/unix/sysv/linux/sigsuspend.c:45 #1 0x40009799 in __pthread_wait_for_restart_signal (self=0xbdfffc00) at pthread.c:898 #2 0x4000b49c in __pthread_lock (lock=0x8049bc0, self=0xbdfffc00) at spinlock.c:126 #3 0x4000cec0 in __pthread_rwlock_timedwrlock (rwlock=0x8049bc0, abstime=0xbdfffb0c) at rwlock.c:474 #4 0x804890e in writer_thread (nr=0xc) at Examples/ex11.c:64 #5 0x4000782d in pthread_start_thread (arg=0xbdfffc00) at manager.c:274 Jakub
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |