This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

linuxthreads/ex11 issues


Hi!

Examples/ex11.c seems to get stuck on alphaev6, alpha and i686 (all of them 4
CPU boxes) from time to time.
E.g. if I cut'n'paste the ex11 invocation command from make check into shell
and run it 100 times, it locks up on i686 after some 40 invocations, on
alpha it gets stuck every ~ 5 invocations or so.

Anybody else can reproduce it?

E.g. on alpha I see:
* 3 Thread 7176 (LWP 31955)  0x20000010154 in __pthread_rwlock_timedwrlock (rwlock=0x120100fa0, abstime=0x3ffff1ff970)
    at restart.h:34
  2 Thread 2049 (LWP 31948)  0x20000337c4c in __poll (fds=0x20001126d00, nfds=1, timeout=2000)
    at ../sysdeps/unix/sysv/linux/poll.c:63
  1 Thread 1024 (LWP 31947)  __sigsuspend () at ../sysdeps/unix/sysv/linux/alpha/sigsuspend.S:42

and from the output of the program it is clear that thread 3 (writer
thread 6) has succeeded just once, then tried again once and never succeeded
again. Yet
(gdb) bt
#0  __sigsuspend () at ../sysdeps/unix/sysv/linux/alpha/sigsuspend.S:42
#1  0x2000000bc50 in __pthread_wait_for_restart_signal (self=0x3ffff1ffa60) at pthread.c:898
#2  0x20000010154 in __pthread_rwlock_timedwrlock (rwlock=0x120100fa0, abstime=0x3ffff1ff970) at restart.h:34
#3  0x120000c74 in reader_thread (nr=0x6) at Examples/ex11.c:112
#4  0x20000009b88 in pthread_start_thread (arg=0x0) at manager.c:274
(gdb) p *rwlock
$6 = {__rw_lock = {__status = 0, __spinlock = 736}, __rw_readers = 0, __rw_writer = 0x0, __rw_read_waiting = 0x0,
  __rw_write_waiting = 0x0, __rw_kind = 2, __rw_pshared = 0}

I think it is in
          int was_on_queue;

          __pthread_lock (&rwlock->__rw_lock, self);
          was_on_queue = remove_from_queue (&rwlock->__rw_read_waiting, self);
          __pthread_unlock (&rwlock->__rw_lock);

          if (was_on_queue)
            {
              __pthread_set_own_extricate_if (self, 0);
              return ETIMEDOUT;
            }

          /* Eat the outstanding restart() from the signaller */
          suspend (self);
	  ^^^ here

and for some reason was_on_queue was 0.
All other reader/writer threads have finished successfully at that point,
the other threads above are manager thread and main().

On i686, something different happens:
There are all but one thread sleeping, one thread is running at 100% of one
CPUs time spinning forever in __pthread_unlock

(gdb) bt
#0  0x4000b5b3 in __pthread_unlock (lock=0x8049bc0) at spinlock.c:179
#1  0x4000cea3 in __pthread_rwlock_timedwrlock (rwlock=0x8049bc0,
abstime=0xbe3ffb0c) at rwlock.c:468
#2  0x804890e in writer_thread (nr=0xa) at Examples/ex11.c:64
#3  0x4000782d in pthread_start_thread (arg=0xbe3ffc00) at manager.c:274
(gdb) p/x *lock
$89 = {__status = 0xbd5ffc01, __spinlock = 0xca0}
(gdb) p
((pthread_descr)0xbd5ffc00)->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock
$81 = 0xbd3ffc00
(gdb) p
((pthread_descr)0xbd5ffc00)->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock
$82 = 0xbdfffc00
(gdb) p
((pthread_descr)0xbd5ffc00)->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock
$83 = 0xbe5ffc00
(gdb) p
((pthread_descr)0xbd5ffc00)->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock->p_nextlock
$84 = 0xbdfffc00

because there is for some reason a loop in the p_nextlock chain (0xbdfffc00
-> 0xbe5ffc00 and 0xbe5ffc00 -> 0xbdfffc00).

The relevant thread backtraces are:
(gdb) thread 10
[Switching to thread 10 (Thread 10251 (LWP 16945))]
#0  0x40055e05 in __sigsuspend (set=0xbe5ff9ec) at ../sysdeps/unix/sysv/linux/sigsuspend.c:45
45        return INLINE_SYSCALL (rt_sigsuspend, 2, CHECK_SIGSET (set), _NSIG / 8);
(gdb) bt
#0  0x40055e05 in __sigsuspend (set=0xbe5ff9ec) at ../sysdeps/unix/sysv/linux/sigsuspend.c:45
#1  0x40009799 in __pthread_wait_for_restart_signal (self=0xbe5ffc00) at pthread.c:898
#2  0x4000b49c in __pthread_lock (lock=0x8049bc0, self=0xbe5ffc00) at spinlock.c:126
#3  0x4000cec0 in __pthread_rwlock_timedwrlock (rwlock=0x8049bc0, abstime=0xbe5ffb0c) at rwlock.c:474
#4  0x804890e in writer_thread (nr=0x9) at Examples/ex11.c:64
#5  0x4000782d in pthread_start_thread (arg=0xbe5ffc00) at manager.c:274
(gdb) thread 13
[Switching to thread 13 (Thread 13326 (LWP 16948))]
#0  0x40055e05 in __sigsuspend (set=0xbdfff9ec) at ../sysdeps/unix/sysv/linux/sigsuspend.c:45
45        return INLINE_SYSCALL (rt_sigsuspend, 2, CHECK_SIGSET (set), _NSIG / 8);
(gdb) bt
#0  0x40055e05 in __sigsuspend (set=0xbdfff9ec) at ../sysdeps/unix/sysv/linux/sigsuspend.c:45
#1  0x40009799 in __pthread_wait_for_restart_signal (self=0xbdfffc00) at pthread.c:898
#2  0x4000b49c in __pthread_lock (lock=0x8049bc0, self=0xbdfffc00) at spinlock.c:126
#3  0x4000cec0 in __pthread_rwlock_timedwrlock (rwlock=0x8049bc0, abstime=0xbdfffb0c) at rwlock.c:474
#4  0x804890e in writer_thread (nr=0xc) at Examples/ex11.c:64
#5  0x4000782d in pthread_start_thread (arg=0xbdfffc00) at manager.c:274

	Jakub

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]