Bug 7094 - Bug in creating/deleting posix per process timers with SIGEV_THREAD notification method
Summary: Bug in creating/deleting posix per process timers with SIGEV_THREAD notificat...
Status: VERIFIED FIXED
Alias: None
Product: glibc
Classification: Unclassified
Component: nptl (show other bugs)
Version: unspecified
: P2 normal
Target Milestone: ---
Assignee: Ulrich Drepper
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2008-12-11 08:41 UTC by Pavel Smolenskiy
Modified: 2009-09-03 05:18 UTC (History)
1 user (show)

See Also:
Host: All
Target: all
Build: all
Last reconfirmed:


Attachments
Patch to fix the bug (440 bytes, patch)
2009-01-23 02:29 UTC, Joseph Myers
Details | Diff

Note You need to log in before you can comment on or make changes to this bug.
Description Pavel Smolenskiy 2008-12-11 08:41:03 UTC
Issue is following.
1. Create x timers, where x>1, calling timer_create, with appropriate arguments,
and SIGEV_THREAD as notification method on expire.
2. Delete last created timer with timer_delete before it's expire time.
3. Wait for another timer to expire. On attempt to call notification function
glibc hang with SIGSEGV in timer_helper_thread().

Tested on glibc 2.4, 2.7, 2.8, 2.9 and latest cvs head with ubuntu 8.10 on amd64.

Here is test code
[code]
/* file timer_t.c */

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/time.h>
#include <time.h>
#include <errno.h>
#include <pthread.h>
#include <string.h>

void timer_expire_func(union sigval arg)
{
        void *obj = arg.sival_ptr;
        printf("%s(): called: %p\n", __FUNCTION__, obj);

}

pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER;
int q = 0;

void *thr_func(void *arg)
{
        int qf = 0;
        while(1) {
                pthread_mutex_lock(&a);
                qf = q;
                pthread_mutex_unlock(&a);
                if (qf)
                        break;
                sleep(2);
        }
        return NULL;
}

timer_t t1, t2;

int main(void)
{
        struct sigevent sigev;
        struct timeval expire;
        struct itimerval exp;

        sigev.sigev_notify = SIGEV_THREAD;
        sigev.sigev_notify_function = timer_expire_func;
        sigev.sigev_notify_attributes = 0;
        sigev.sigev_value.sival_ptr = (void *) &t1;

        expire.tv_sec = 10;
        expire.tv_usec = 0;
        if (timer_create(CLOCK_REALTIME, &sigev, &t1) ) {
                perror("timer_create");
                exit(-1);
        }
        exp.it_value.tv_sec = expire.tv_sec;
        exp.it_value.tv_usec = expire.tv_usec;
        exp.it_interval.tv_sec = 0;
        exp.it_interval.tv_usec = 0;

        printf("add timer: %p, exp(s,u) = (%lu, %lu)\n", t1,
                           expire.tv_sec, expire.tv_usec);
        timer_settime(t1, 0, (const struct itimerspec*)&exp, NULL);

        memset(&sigev, 0, sizeof(sigev));

        sigev.sigev_notify = SIGEV_THREAD;
        sigev.sigev_notify_function = timer_expire_func;
        sigev.sigev_notify_attributes = 0;
        sigev.sigev_value.sival_ptr = (void *) &t2;

        expire.tv_sec = 10;
        expire.tv_usec = 0;
        if (timer_create(CLOCK_REALTIME, &sigev, &t2) ) {
                perror("timer_create");
                exit(-1);
        }
        exp.it_value.tv_sec = expire.tv_sec;
        exp.it_value.tv_usec = expire.tv_usec;
        exp.it_interval.tv_sec = 0;
        exp.it_interval.tv_usec = 0;

        printf("add timer: %p, exp(s,u) = (%lu, %lu)\n", t2,
                           expire.tv_sec, expire.tv_usec);
        timer_settime(t2, 0, (const struct itimerspec*)&exp, NULL);

        pthread_t ptid;

        pthread_create(&ptid, NULL, thr_func, NULL);
        timer_delete(t2);
        int rr;
        pthread_join(ptid, (void**)&rr);
        exit(0);

[/code]
# gcc -Wall -g -o timer_t timer_t.c -lrt
# gdb ./timer_t
(gdb) r
Starting program: /tmp/timer_t 
[Thread debugging using libthread_db enabled]
[New Thread 0x7fd8ae7e26e0 (LWP 25091)]
[New Thread 0x41006950 (LWP 25092)]
add timer: 0xd4c130, exp(s,u) = (10, 0)
add timer: 0xd4c190, exp(s,u) = (10, 0)
[New Thread 0x42260950 (LWP 25093)]

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x41006950 (LWP 25092)]
timer_helper_thread (arg=<value optimized out>) at
../nptl/sysdeps/unix/sysv/linux/timer_routines.c:114
114			  runp = runp->next;
(gdb) p runp
$1 = (struct timer *) 0x60
(gdb) 

======
So I look up into the code and found out, that SIGEV_THREAD timers, on creation,
stored in one-way linked list, last created timer is in the head.
Then, when delete timer, glibc found that timer in linked list and delete it
from there.

nptl/sysdeps/unix/sysv/linux/timer_delete.c:42
timer_delete (timerid)
     timer_t timerid;
{
# undef timer_delete
# ifndef __ASSUME_POSIX_TIMERS
  if (__no_posix_timers >= 0)
# endif
    {
      struct timer *kt = (struct timer *) timerid;

      /* Delete the kernel timer object.  */
      int res = INLINE_SYSCALL (timer_delete, 1, kt->ktimerid);

      if (res == 0)
	{
	  if (kt->sigev_notify == SIGEV_THREAD)
	    {
	      /* Remove the timer from the list.  */
               skip
               skip
               skip
	    }

# ifndef __ASSUME_POSIX_TIMERS
	  /* We know the syscall support is available.  */
	  __no_posix_timers = 1;
# endif

	  /* Free the memory.  */
	  (void) free (kt);

	  return 0;
	}

But when look into
nptl/sysdeps/unix/sysv/linux/timer_create.c:52
timer_create (clock_id, evp, timerid)
     clockid_t clock_id;
     struct sigevent *evp;
     timer_t *timerid;
{
 /* skip non SIGEV_THREAD code */
line 162
	      struct timer *newp;
	      newp = (struct timer *) malloc (sizeof (struct timer));
	      if (newp == NULL)
		return -1;

	      /* Copy the thread parameters the user provided.  */
	      newp->sival = evp->sigev_value;
	      newp->thrfunc = evp->sigev_notify_function;
skipped
line 207
	      if (! INTERNAL_SYSCALL_ERROR_P (res, err))
		{
		  /* Add to the queue of active timers with thread
		     delivery.  */
		  pthread_mutex_lock (&__active_timer_sigev_thread_lock);
		  newp->next = __active_timer_sigev_thread;
		  __active_timer_sigev_thread = newp;
		  pthread_mutex_unlock (&__active_timer_sigev_thread_lock);

		  *timerid = (timer_t) newp;
		  return 0;
		}
}

Nobody set newp->sigev_notify = SIGEV_THREAD.

So, when call timer_delete(), kt->sigev_notify value not specified, thus lead to
simple free kt pointer, w/o deleting it from linked list prior.
So, when deleting last created timer, linked list simply become lost, cause head
was free'd, and next expired timer will hang glibc.
Comment 1 Joseph Myers 2009-01-23 02:29:38 UTC
Created attachment 3681 [details]
Patch to fix the bug

I agree with the analysis in this bug report (at least for current glibc -
any version after the fix for bug 5220 went into CVS - not necessarily for
all the versions mentioned in the report).  I attach the obvious patch to
fix it implied by the analysis in the bug report.
Comment 2 Brian Buesker 2009-01-23 15:19:17 UTC
I concur with the patch. An easy way to verify the fix is by running the app
under valgrind. Here is the output before the patch:

==4907== Memcheck, a memory error detector.
==4907== Copyright (C) 2002-2008, and GNU GPL'd, by Julian Seward et al.
==4907== Using LibVEX rev 1878, a library for dynamic binary translation.
==4907== Copyright (C) 2004-2008, and GNU GPL'd, by OpenWorks LLP.
==4907== Using valgrind-3.4.0, a dynamic binary instrumentation framework.
==4907== Copyright (C) 2000-2008, and GNU GPL'd, by Julian Seward et al.
==4907== For more details, rerun with: -v
==4907==
add timer: 0x402c0e0, exp(s,u) = (10, 0)
add timer: 0x402c148, exp(s,u) = (10, 0)
==4907== Conditional jump or move depends on uninitialised value(s)
==4907==    at 0x94CC94: timer_delete (in /lib/librt-2.9.so)
==4907==    by 0x804891C: main (in /var/tmp/a.out)
==4907==
==4907== Thread 2:
==4907== Invalid read of size 4
==4907==    at 0x94DE20: timer_helper_thread (in /lib/librt-2.9.so)
==4907==    by 0x81951E: start_thread (in /lib/libpthread-2.9.so)
==4907==    by 0x74F04D: clone (in /lib/libc-2.9.so)
==4907==  Address 0x402c17c is 52 bytes inside a block of size 56 free'd
==4907==    at 0x4005B6A: free (in
/opt/share/buildtools-f10-i386/lib/valgrind/x86-linux/vgpreload_memcheck.so)
==4907==    by 0x94CC9D: timer_delete (in /lib/librt-2.9.so)
==4907==    by 0x804891C: main (in /var/tmp/a.out)
timer_expire_func(): called: 0x8049c3c
^C==4907==
==4907== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 15 from 1)
==4907== malloc/free: in use at exit: 328 bytes in 3 blocks.
==4907== malloc/free: 6 allocs, 3 frees, 528 bytes allocated.
==4907== For counts of detected errors, rerun with: -v
==4907== Use --track-origins=yes to see where uninitialised values come from
==4907== searching for pointers to 3 not-freed blocks.
==4907== checked 10,568,324 bytes.
==4907==
==4907== LEAK SUMMARY:
==4907==    definitely lost: 0 bytes in 0 blocks.
==4907==      possibly lost: 272 bytes in 2 blocks.
==4907==    still reachable: 56 bytes in 1 blocks.
==4907==         suppressed: 0 bytes in 0 blocks.
==4907== Rerun with --leak-check=full to see details of leaked memory.

And here is the output after the patch:

==25782== Memcheck, a memory error detector.
==25782== Copyright (C) 2002-2008, and GNU GPL'd, by Julian Seward et al.
==25782== Using LibVEX rev 1878, a library for dynamic binary translation.
==25782== Copyright (C) 2004-2008, and GNU GPL'd, by OpenWorks LLP.
==25782== Using valgrind-3.4.0, a dynamic binary instrumentation framework.
==25782== Copyright (C) 2000-2008, and GNU GPL'd, by Julian Seward et al.
==25782== For more details, rerun with: -v
==25782==
add timer: 0x41e50e0, exp(s,u) = (10, 0)
add timer: 0x41e5148, exp(s,u) = (10, 0)
timer_expire_func(): called: 0x8049c3c
^C==25782==
==25782== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 18 from 1)
==25782== malloc/free: in use at exit: 328 bytes in 3 blocks.
==25782== malloc/free: 6 allocs, 3 frees, 528 bytes allocated.
==25782== For counts of detected errors, rerun with: -v
==25782== searching for pointers to 3 not-freed blocks.
==25782== checked 10,567,240 bytes.
==25782==
==25782== LEAK SUMMARY:
==25782==    definitely lost: 0 bytes in 0 blocks.
==25782==      possibly lost: 272 bytes in 2 blocks.
==25782==    still reachable: 56 bytes in 1 blocks.
==25782==         suppressed: 0 bytes in 0 blocks.
==25782== Rerun with --leak-check=full to see details of leaked memory.
Comment 3 Ulrich Drepper 2009-09-03 03:00:20 UTC
Fixed in current git.
Comment 4 Pavel Smolenskiy 2009-09-03 05:18:14 UTC
Verified.