pthread_cancel/pthread_join deadlock
Martin Koegler
martin.koegler@chello.at
Thu Dec 19 21:44:00 GMT 2013
The following test programs produces very likley a lockup, if it is started via GDB:
=============
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <poll.h>
void* thread(void* arg)
{
int i = 0;
do{
struct pollfd f;
f.fd = 0;
f.events = POLLPRI;
poll(&f, 1, 10);
i++;
} while(i < 100);
return NULL;
}
#define CNT 5
int main()
{
void*res;
int i;
pthread_t t[CNT];
do{
printf("New\n");
for(i = 0; i < CNT; i++)
pthread_create(&t[i], NULL, thread, 0);
printf("Stop\n");
for(i = 0; i < CNT; i++)
{
pthread_cancel(t[i]);
pthread_join(t[i], &res);
pthread_create(&t[i], NULL, thread, 0);
}
}while(1);
return 0;
}
===========
Program received signal SIGINT, Interrupt.
0x00007ffff7bc8e75 in pthread_join (threadid=140736042649344, thread_return=0x7fffffffe140) at pthread_join.c:89
89 in pthread_join.c
(gdb) info threads
Id Target Id Frame
312 Thread 0x7fffa9d44700 (LWP 28096) "t" __libc_disable_asynccancel () at ../nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S:116
* 1 Thread 0x7ffff7fb6700 (LWP 27783) "t" 0x00007ffff7bc8e75 in pthread_join (threadid=140736042649344, thread_return=0x7fffffffe140) at pthread_join.c:89
(gdb) bt
#0 0x00007ffff7bc8e75 in pthread_join (threadid=140736042649344, thread_return=0x7fffffffe140) at pthread_join.c:89
#1 0x000000000040077e in main () at t.c:34
(gdb) info registers
rax 0xfffffffffffffe00 -512
rbx 0x7fffa9d44700 140736042649344
rcx 0xffffffffffffffff -1
rdx 0x6dc0 28096
rsi 0x0 0
rdi 0x7fffa9d449d0 140736042650064
rbp 0x7fffffffe140 0x7fffffffe140
rsp 0x7fffffffe0b0 0x7fffffffe0b0
r8 0x0 0
r9 0x7ffff7fb6700 140737353836288
r10 0x0 0
r11 0x246 582
r12 0x7fffffffe0c0 140737488347328
r13 0x7fffa9d44d28 140736042650920
r14 0x7ffff7fb6700 140737353836288
r15 0x0 0
rip 0x7ffff7bc8e75 0x7ffff7bc8e75 <pthread_join+277>
eflags 0x246 [ PF ZF IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
(gdb) disassemble
Dump of assembler code for function pthread_join:
0x00007ffff7bc8d60 <+0>: mov %rbx,-0x28(%rsp)
0x00007ffff7bc8d65 <+5>: mov %rbp,-0x20(%rsp)
0x00007ffff7bc8d6a <+10>: mov %rdi,%rbx
0x00007ffff7bc8d6d <+13>: mov %r12,-0x18(%rsp)
0x00007ffff7bc8d72 <+18>: mov %r13,-0x10(%rsp)
0x00007ffff7bc8d77 <+23>: mov %rsi,%rbp
0x00007ffff7bc8d7a <+26>: mov %r14,-0x8(%rsp)
0x00007ffff7bc8d7f <+31>: sub $0x58,%rsp
0x00007ffff7bc8d83 <+35>: test %rdi,%rdi
0x00007ffff7bc8d86 <+38>: je 0x7ffff7bc8ecb <pthread_join+363>
0x00007ffff7bc8d8c <+44>: mov 0x2d0(%rdi),%eax
0x00007ffff7bc8d92 <+50>: test %eax,%eax
0x00007ffff7bc8d94 <+52>: js 0x7ffff7bc8ecb <pthread_join+363>
0x00007ffff7bc8d9a <+58>: cmp %rdi,0x628(%rdi)
0x00007ffff7bc8da1 <+65>: mov $0x16,%eax
0x00007ffff7bc8da6 <+70>: je 0x7ffff7bc8e3f <pthread_join+223>
0x00007ffff7bc8dac <+76>: lea 0x628(%rdi),%r13
0x00007ffff7bc8db3 <+83>: lea 0x10(%rsp),%r12
0x00007ffff7bc8db8 <+88>: lea -0x7f(%rip),%rsi # 0x7ffff7bc8d40 <cleanup>
0x00007ffff7bc8dbf <+95>: mov %fs:0x10,%r14
0x00007ffff7bc8dc8 <+104>: mov %r13,%rdx
0x00007ffff7bc8dcb <+107>: mov %r12,%rdi
0x00007ffff7bc8dce <+110>: callq 0x7ffff7bce800 <_pthread_cleanup_push>
0x00007ffff7bc8dd3 <+115>: callq 0x7ffff7bcebe0 <__pthread_enable_asynccancel>
0x00007ffff7bc8dd8 <+120>: cmp %r14,%rbx
0x00007ffff7bc8ddb <+123>: mov %eax,%r8d
0x00007ffff7bc8dde <+126>: je 0x7ffff7bc8e8d <pthread_join+301>
0x00007ffff7bc8de4 <+132>: cmp %rbx,0x628(%r14)
0x00007ffff7bc8deb <+139>: je 0x7ffff7bc8e80 <pthread_join+288>
0x00007ffff7bc8df1 <+145>: xor %eax,%eax
0x00007ffff7bc8df3 <+147>: lock cmpxchg %r14,0x0(%r13)
0x00007ffff7bc8df9 <+153>: jne 0x7ffff7bc8ec4 <pthread_join+356>
0x00007ffff7bc8dff <+159>: mov 0x2d0(%rbx),%edx
0x00007ffff7bc8e05 <+165>: test %edx,%edx
0x00007ffff7bc8e07 <+167>: jne 0x7ffff7bc8e60 <pthread_join+256>
0x00007ffff7bc8e09 <+169>: mov %r8d,%edi
0x00007ffff7bc8e0c <+172>: callq 0x7ffff7bcec40 <__pthread_disable_asynccancel>
0x00007ffff7bc8e11 <+177>: xor %esi,%esi
0x00007ffff7bc8e13 <+179>: mov %r12,%rdi
0x00007ffff7bc8e16 <+182>: callq 0x7ffff7bce820 <_pthread_cleanup_pop>
0x00007ffff7bc8e1b <+187>: test %rbp,%rbp
0x00007ffff7bc8e1e <+190>: movl $0xffffffff,0x2d0(%rbx)
0x00007ffff7bc8e28 <+200>: je 0x7ffff7bc8e35 <pthread_join+213>
0x00007ffff7bc8e2a <+202>: mov 0x630(%rbx),%rax
0x00007ffff7bc8e31 <+209>: mov %rax,0x0(%rbp)
0x00007ffff7bc8e35 <+213>: mov %rbx,%rdi
0x00007ffff7bc8e38 <+216>: callq 0x7ffff7bc7a40 <__free_tcb>
0x00007ffff7bc8e3d <+221>: xor %eax,%eax
0x00007ffff7bc8e3f <+223>: mov 0x30(%rsp),%rbx
0x00007ffff7bc8e44 <+228>: mov 0x38(%rsp),%rbp
0x00007ffff7bc8e49 <+233>: mov 0x40(%rsp),%r12
0x00007ffff7bc8e4e <+238>: mov 0x48(%rsp),%r13
0x00007ffff7bc8e53 <+243>: mov 0x50(%rsp),%r14
0x00007ffff7bc8e58 <+248>: add $0x58,%rsp
0x00007ffff7bc8e5c <+252>: retq
0x00007ffff7bc8e5d <+253>: nopl (%rax)
0x00007ffff7bc8e60 <+256>: lea 0x2d0(%rbx),%rdi
0x00007ffff7bc8e67 <+263>: xor %esi,%esi
0x00007ffff7bc8e69 <+265>: xor %r10,%r10
0x00007ffff7bc8e6c <+268>: mov $0xca,%rax
0x00007ffff7bc8e73 <+275>: syscall
=> 0x00007ffff7bc8e75 <+277>: cmpl $0x0,(%rdi)
0x00007ffff7bc8e78 <+280>: jne 0x7ffff7bc8e6c <pthread_join+268>
0x00007ffff7bc8e7a <+282>: jmp 0x7ffff7bc8e09 <pthread_join+169>
0x00007ffff7bc8e7c <+284>: nopl 0x0(%rax)
0x00007ffff7bc8e80 <+288>: testb $0x3c,0x308(%rbx)
0x00007ffff7bc8e87 <+295>: jne 0x7ffff7bc8df1 <pthread_join+145>
0x00007ffff7bc8e8d <+301>: mov 0x308(%r14),%edx
0x00007ffff7bc8e94 <+308>: mov $0x23,%eax
---Type <return> to continue, or q <return> to quit---
0x00007ffff7bc8e99 <+313>: and $0xffffffb9,%edx
0x00007ffff7bc8e9c <+316>: cmp $0x8,%edx
0x00007ffff7bc8e9f <+319>: je 0x7ffff7bc8df1 <pthread_join+145>
0x00007ffff7bc8ea5 <+325>: mov %r8d,%edi
0x00007ffff7bc8ea8 <+328>: mov %eax,0x8(%rsp)
0x00007ffff7bc8eac <+332>: callq 0x7ffff7bcec40 <__pthread_disable_asynccancel>
0x00007ffff7bc8eb1 <+337>: xor %esi,%esi
0x00007ffff7bc8eb3 <+339>: mov %r12,%rdi
0x00007ffff7bc8eb6 <+342>: callq 0x7ffff7bce820 <_pthread_cleanup_pop>
0x00007ffff7bc8ebb <+347>: mov 0x8(%rsp),%eax
0x00007ffff7bc8ebf <+351>: jmpq 0x7ffff7bc8e3f <pthread_join+223>
0x00007ffff7bc8ec4 <+356>: mov $0x16,%eax
0x00007ffff7bc8ec9 <+361>: jmp 0x7ffff7bc8ea5 <pthread_join+325>
0x00007ffff7bc8ecb <+363>: mov $0x3,%eax
0x00007ffff7bc8ed0 <+368>: jmpq 0x7ffff7bc8e3f <pthread_join+223>
End of assembler dump.
(gdb) thread 312
[Switching to thread 312 (Thread 0x7fffa9d44700 (LWP 28096))]
#0 __libc_disable_asynccancel () at ../nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S:116
116 ../nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S: No such file or directory.
(gdb) bt
#0 __libc_disable_asynccancel () at ../nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S:116
#1 0x00007ffff7906e4b in *__GI___poll (fds=<optimized out>, nfds=<optimized out>, timeout=4) at ../sysdeps/unix/sysv/linux/poll.c:89
#2 0x00000000004006e2 in thread (arg=0x0) at t.c:13
#3 0x00007ffff7bc7b50 in start_thread (arg=<optimized out>) at pthread_create.c:304
#4 0x00007ffff7911a7d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#5 0x0000000000000000 in ?? ()
(gdb) info registers
rax 0xfffffffffffffe00 -512
rbx 0x7fffa9d44700 140736042649344
rcx 0xffffffffffffffff -1
rdx 0x4 4
rsi 0x80 128
rdi 0x7fffa9d44a08 140736042650120
rbp 0x7fffa9d43ee0 0x7fffa9d43ee0
rsp 0x7fffa9d43e88 0x7fffa9d43e88
r8 0x0 0
r9 0x7fffa9d44700 140736042649344
r10 0x0 0
r11 0x202 514
r12 0x7ffff7bd21a0 140737349755296
r13 0x7fffa9d449c0 140736042650048
r14 0x7ffff7ffd040 140737354125376
r15 0x3 3
rip 0x7ffff791e5af 0x7ffff791e5af <__libc_disable_asynccancel+79>
eflags 0x202 [ IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
(gdb) disassemble
Dump of assembler code for function __libc_disable_asynccancel:
0x00007ffff791e560 <+0>: test $0x2,%edi
0x00007ffff791e566 <+6>: jne 0x7ffff791e58f <__libc_disable_asynccancel+47>
0x00007ffff791e568 <+8>: mov %fs:0x308,%eax
0x00007ffff791e570 <+16>: mov %eax,%r11d
0x00007ffff791e573 <+19>: and $0xfffffffd,%r11d
0x00007ffff791e577 <+23>: lock cmpxchg %r11d,%fs:0x308
0x00007ffff791e582 <+34>: jne 0x7ffff791e570 <__libc_disable_asynccancel+16>
0x00007ffff791e584 <+36>: mov %r11d,%eax
0x00007ffff791e587 <+39>: and $0xc,%eax
0x00007ffff791e58a <+42>: cmp $0x4,%eax
0x00007ffff791e58d <+45>: je 0x7ffff791e590 <__libc_disable_asynccancel+48>
0x00007ffff791e58f <+47>: retq
0x00007ffff791e590 <+48>: mov %fs:0x0,%rdi
0x00007ffff791e599 <+57>: mov $0xca,%eax
0x00007ffff791e59e <+62>: xor %r10,%r10
0x00007ffff791e5a1 <+65>: add $0x308,%rdi
0x00007ffff791e5a8 <+72>: mov $0x80,%esi
0x00007ffff791e5ad <+77>: syscall
=> 0x00007ffff791e5af <+79>: mov %fs:0x308,%eax
0x00007ffff791e5b7 <+87>: jmp 0x7ffff791e587 <__libc_disable_asynccancel+39>
End of assembler dump.
[OS: Debian Stable]
pthread_join want to wait for a canceled thread (via futex), but the canceled thread blocks in __libc_disable_asynccancel also waiting for a futex.
Most of the time, starting it via GDB yields to a lockup in less than a minute.
Regards,
Martin
PS: Please CC me on replies.
More information about the Libc-help
mailing list