Issue is following. 1. Create x timers, where x>1, calling timer_create, with appropriate arguments, and SIGEV_THREAD as notification method on expire. 2. Delete last created timer with timer_delete before it's expire time. 3. Wait for another timer to expire. On attempt to call notification function glibc hang with SIGSEGV in timer_helper_thread(). Tested on glibc 2.4, 2.7, 2.8, 2.9 and latest cvs head with ubuntu 8.10 on amd64. Here is test code [code] /* file timer_t.c */ #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <signal.h> #include <sys/time.h> #include <time.h> #include <errno.h> #include <pthread.h> #include <string.h> void timer_expire_func(union sigval arg) { void *obj = arg.sival_ptr; printf("%s(): called: %p\n", __FUNCTION__, obj); } pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER; int q = 0; void *thr_func(void *arg) { int qf = 0; while(1) { pthread_mutex_lock(&a); qf = q; pthread_mutex_unlock(&a); if (qf) break; sleep(2); } return NULL; } timer_t t1, t2; int main(void) { struct sigevent sigev; struct timeval expire; struct itimerval exp; sigev.sigev_notify = SIGEV_THREAD; sigev.sigev_notify_function = timer_expire_func; sigev.sigev_notify_attributes = 0; sigev.sigev_value.sival_ptr = (void *) &t1; expire.tv_sec = 10; expire.tv_usec = 0; if (timer_create(CLOCK_REALTIME, &sigev, &t1) ) { perror("timer_create"); exit(-1); } exp.it_value.tv_sec = expire.tv_sec; exp.it_value.tv_usec = expire.tv_usec; exp.it_interval.tv_sec = 0; exp.it_interval.tv_usec = 0; printf("add timer: %p, exp(s,u) = (%lu, %lu)\n", t1, expire.tv_sec, expire.tv_usec); timer_settime(t1, 0, (const struct itimerspec*)&exp, NULL); memset(&sigev, 0, sizeof(sigev)); sigev.sigev_notify = SIGEV_THREAD; sigev.sigev_notify_function = timer_expire_func; sigev.sigev_notify_attributes = 0; sigev.sigev_value.sival_ptr = (void *) &t2; expire.tv_sec = 10; expire.tv_usec = 0; if (timer_create(CLOCK_REALTIME, &sigev, &t2) ) { perror("timer_create"); exit(-1); } exp.it_value.tv_sec = expire.tv_sec; exp.it_value.tv_usec = expire.tv_usec; exp.it_interval.tv_sec = 0; exp.it_interval.tv_usec = 0; printf("add timer: %p, exp(s,u) = (%lu, %lu)\n", t2, expire.tv_sec, expire.tv_usec); timer_settime(t2, 0, (const struct itimerspec*)&exp, NULL); pthread_t ptid; pthread_create(&ptid, NULL, thr_func, NULL); timer_delete(t2); int rr; pthread_join(ptid, (void**)&rr); exit(0); [/code] # gcc -Wall -g -o timer_t timer_t.c -lrt # gdb ./timer_t (gdb) r Starting program: /tmp/timer_t [Thread debugging using libthread_db enabled] [New Thread 0x7fd8ae7e26e0 (LWP 25091)] [New Thread 0x41006950 (LWP 25092)] add timer: 0xd4c130, exp(s,u) = (10, 0) add timer: 0xd4c190, exp(s,u) = (10, 0) [New Thread 0x42260950 (LWP 25093)] Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x41006950 (LWP 25092)] timer_helper_thread (arg=<value optimized out>) at ../nptl/sysdeps/unix/sysv/linux/timer_routines.c:114 114 runp = runp->next; (gdb) p runp $1 = (struct timer *) 0x60 (gdb) ====== So I look up into the code and found out, that SIGEV_THREAD timers, on creation, stored in one-way linked list, last created timer is in the head. Then, when delete timer, glibc found that timer in linked list and delete it from there. nptl/sysdeps/unix/sysv/linux/timer_delete.c:42 timer_delete (timerid) timer_t timerid; { # undef timer_delete # ifndef __ASSUME_POSIX_TIMERS if (__no_posix_timers >= 0) # endif { struct timer *kt = (struct timer *) timerid; /* Delete the kernel timer object. */ int res = INLINE_SYSCALL (timer_delete, 1, kt->ktimerid); if (res == 0) { if (kt->sigev_notify == SIGEV_THREAD) { /* Remove the timer from the list. */ skip skip skip } # ifndef __ASSUME_POSIX_TIMERS /* We know the syscall support is available. */ __no_posix_timers = 1; # endif /* Free the memory. */ (void) free (kt); return 0; } But when look into nptl/sysdeps/unix/sysv/linux/timer_create.c:52 timer_create (clock_id, evp, timerid) clockid_t clock_id; struct sigevent *evp; timer_t *timerid; { /* skip non SIGEV_THREAD code */ line 162 struct timer *newp; newp = (struct timer *) malloc (sizeof (struct timer)); if (newp == NULL) return -1; /* Copy the thread parameters the user provided. */ newp->sival = evp->sigev_value; newp->thrfunc = evp->sigev_notify_function; skipped line 207 if (! INTERNAL_SYSCALL_ERROR_P (res, err)) { /* Add to the queue of active timers with thread delivery. */ pthread_mutex_lock (&__active_timer_sigev_thread_lock); newp->next = __active_timer_sigev_thread; __active_timer_sigev_thread = newp; pthread_mutex_unlock (&__active_timer_sigev_thread_lock); *timerid = (timer_t) newp; return 0; } } Nobody set newp->sigev_notify = SIGEV_THREAD. So, when call timer_delete(), kt->sigev_notify value not specified, thus lead to simple free kt pointer, w/o deleting it from linked list prior. So, when deleting last created timer, linked list simply become lost, cause head was free'd, and next expired timer will hang glibc.
Created attachment 3681 [details] Patch to fix the bug I agree with the analysis in this bug report (at least for current glibc - any version after the fix for bug 5220 went into CVS - not necessarily for all the versions mentioned in the report). I attach the obvious patch to fix it implied by the analysis in the bug report.
I concur with the patch. An easy way to verify the fix is by running the app under valgrind. Here is the output before the patch: ==4907== Memcheck, a memory error detector. ==4907== Copyright (C) 2002-2008, and GNU GPL'd, by Julian Seward et al. ==4907== Using LibVEX rev 1878, a library for dynamic binary translation. ==4907== Copyright (C) 2004-2008, and GNU GPL'd, by OpenWorks LLP. ==4907== Using valgrind-3.4.0, a dynamic binary instrumentation framework. ==4907== Copyright (C) 2000-2008, and GNU GPL'd, by Julian Seward et al. ==4907== For more details, rerun with: -v ==4907== add timer: 0x402c0e0, exp(s,u) = (10, 0) add timer: 0x402c148, exp(s,u) = (10, 0) ==4907== Conditional jump or move depends on uninitialised value(s) ==4907== at 0x94CC94: timer_delete (in /lib/librt-2.9.so) ==4907== by 0x804891C: main (in /var/tmp/a.out) ==4907== ==4907== Thread 2: ==4907== Invalid read of size 4 ==4907== at 0x94DE20: timer_helper_thread (in /lib/librt-2.9.so) ==4907== by 0x81951E: start_thread (in /lib/libpthread-2.9.so) ==4907== by 0x74F04D: clone (in /lib/libc-2.9.so) ==4907== Address 0x402c17c is 52 bytes inside a block of size 56 free'd ==4907== at 0x4005B6A: free (in /opt/share/buildtools-f10-i386/lib/valgrind/x86-linux/vgpreload_memcheck.so) ==4907== by 0x94CC9D: timer_delete (in /lib/librt-2.9.so) ==4907== by 0x804891C: main (in /var/tmp/a.out) timer_expire_func(): called: 0x8049c3c ^C==4907== ==4907== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 15 from 1) ==4907== malloc/free: in use at exit: 328 bytes in 3 blocks. ==4907== malloc/free: 6 allocs, 3 frees, 528 bytes allocated. ==4907== For counts of detected errors, rerun with: -v ==4907== Use --track-origins=yes to see where uninitialised values come from ==4907== searching for pointers to 3 not-freed blocks. ==4907== checked 10,568,324 bytes. ==4907== ==4907== LEAK SUMMARY: ==4907== definitely lost: 0 bytes in 0 blocks. ==4907== possibly lost: 272 bytes in 2 blocks. ==4907== still reachable: 56 bytes in 1 blocks. ==4907== suppressed: 0 bytes in 0 blocks. ==4907== Rerun with --leak-check=full to see details of leaked memory. And here is the output after the patch: ==25782== Memcheck, a memory error detector. ==25782== Copyright (C) 2002-2008, and GNU GPL'd, by Julian Seward et al. ==25782== Using LibVEX rev 1878, a library for dynamic binary translation. ==25782== Copyright (C) 2004-2008, and GNU GPL'd, by OpenWorks LLP. ==25782== Using valgrind-3.4.0, a dynamic binary instrumentation framework. ==25782== Copyright (C) 2000-2008, and GNU GPL'd, by Julian Seward et al. ==25782== For more details, rerun with: -v ==25782== add timer: 0x41e50e0, exp(s,u) = (10, 0) add timer: 0x41e5148, exp(s,u) = (10, 0) timer_expire_func(): called: 0x8049c3c ^C==25782== ==25782== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 18 from 1) ==25782== malloc/free: in use at exit: 328 bytes in 3 blocks. ==25782== malloc/free: 6 allocs, 3 frees, 528 bytes allocated. ==25782== For counts of detected errors, rerun with: -v ==25782== searching for pointers to 3 not-freed blocks. ==25782== checked 10,567,240 bytes. ==25782== ==25782== LEAK SUMMARY: ==25782== definitely lost: 0 bytes in 0 blocks. ==25782== possibly lost: 272 bytes in 2 blocks. ==25782== still reachable: 56 bytes in 1 blocks. ==25782== suppressed: 0 bytes in 0 blocks. ==25782== Rerun with --leak-check=full to see details of leaked memory.
Fixed in current git.
Verified.