This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Adding systemtap probe points in pthread library (was: Re: revamp sdt.h)


With the new sdt.h and translator in the systemtap git tree, I added the
probes in assembly code in lowlevellock.S to only trace mutex lock calls
that are contented (ie. those that end up calling futex(2)).

Additions & modifications in this revision:
1) nptl/DESIGN-systemtap-probes.txt - brief docs about the probes
2) nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S - added 2 probes
in the assembly routines right before they perform the SYS_futex
syscall.

(Please see the patch at the end of this message)

The micro-benchmark results are much better, with a simple program that
does nothing much but pthread_mutex_lock() & pthread_mutex_unlock():


#include <stdio.h>
#include <pthread.h>

pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;

thread()
{
 int i;

 for (i=0;i<100000000;i++)
 {
   pthread_mutex_lock(&lock);
   pthread_mutex_unlock(&lock);
 }
}

main()
{
 pthread_t tid, tid2, tid3, tid4;

 pthread_create(&tid,  NULL, thread, NULL);
 pthread_create(&tid2, NULL, thread, NULL);
 pthread_create(&tid3, NULL, thread, NULL);
 pthread_create(&tid4, NULL, thread, NULL);


 thread();

 pthread_join(tid,  NULL);
 pthread_join(tid2, NULL);
 pthread_join(tid3, NULL);
 pthread_join(tid4, NULL);

}

With 5 threads (the version above), there were only 408613 calls to
futex(2), which reduced the firing of mutex_acquire() by 1223.6 times! I
repeated the same test with smaller number (2-4) of threads, and in all
cases, the number of times futex(2) is entered to get the lock is low --
which is similar to the behavior of well-written threaded code (using
DTrace on OpenSolaris, MySQL was not extremely contented.)

So instead of a slow-down of several times when I benchmarked this
micro-benchmark with the older probes, we are only several % slower than
the same code without being instrumented by systemtap. (I benchmarked
the code on my laptop, and I will repeat the benchmark again on a much
quieter machine, esp. with X running and release the final results.)

Rayson




diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index 43ca44c..2f1a4c1 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -33,6 +33,7 @@
 #include <atomic.h>
 #include <kernel-features.h>
 
+#include "pthread_probe.h"
 
 /* Atomic operations on TLS memory.  */
 #ifndef THREAD_ATOMIC_CMPXCHG_VAL
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index 34d83f9..66f44cb 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -296,6 +296,11 @@ start_thread (void *arg)
 	  CANCEL_RESET (oldtype);
 	}
 
+      /* SystemTap probe
+         All of the normal thread creation work would
+         be done after this point */
+      PTHREAD_PROBE_START(pd->arg);
+
       /* Run the code the user provided.  */
 #ifdef CALL_THREAD_FCT
       THREAD_SETMEM (pd, result, CALL_THREAD_FCT (pd));
@@ -552,6 +557,9 @@ __pthread_create_2_1 (newthread, attr,
start_routine, arg)
   /* Pass the descriptor to the caller.  */
   *newthread = (pthread_t) pd;
 
+  /* Systemtap probe */
+  PTHREAD_PROBE_CREATE(newthread, start_routine, arg);
+
   /* Start the thread.  */
   return create_thread (pd, iattr, STACK_VARIABLES_ARGS);
 }
diff --git a/nptl/pthread_join.c b/nptl/pthread_join.c
index 6a87a8b..58171a3 100644
--- a/nptl/pthread_join.c
+++ b/nptl/pthread_join.c
@@ -55,6 +55,8 @@ pthread_join (threadid, thread_return)
   struct pthread *self = THREAD_SELF;
   int result = 0;
 
+  PTHREAD_PROBE_JOIN(threadid);
+
   /* During the wait we change to asynchronous cancellation.  If we
      are canceled the thread we are waiting for must be marked as
      un-wait-ed for again.  */
@@ -110,5 +112,7 @@ pthread_join (threadid, thread_return)
       __free_tcb (pd);
     }
 
+  PTHREAD_PROBE_JOIN_RET(threadid, result);
+
   return result;
 }
diff --git a/nptl/pthread_mutex_destroy.c b/nptl/pthread_mutex_destroy.c
index e2c9f8a..2217f58 100644
--- a/nptl/pthread_mutex_destroy.c
+++ b/nptl/pthread_mutex_destroy.c
@@ -29,6 +29,8 @@ __pthread_mutex_destroy (mutex)
       && mutex->__data.__nusers != 0)
     return EBUSY;
 
+  PTHREAD_PROBE_MUTEX_DESTROY(mutex);
+
   /* Set to an invalid value.  */
   mutex->__data.__kind = -1;
 
diff --git a/nptl/pthread_mutex_init.c b/nptl/pthread_mutex_init.c
index d9b1ef0..bf395dd 100644
--- a/nptl/pthread_mutex_init.c
+++ b/nptl/pthread_mutex_init.c
@@ -45,6 +45,8 @@ __pthread_mutex_init (mutex, mutexattr)
 
   assert (sizeof (pthread_mutex_t) <= __SIZEOF_PTHREAD_MUTEX_T);
 
+  PTHREAD_PROBE_MUTEX_INIT(mutex);
+
   imutexattr = (const struct pthread_mutexattr *) mutexattr ?:
&default_attr;
 
   /* Sanity checks.  */
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index 50dc188..a4ccefe 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -48,6 +48,10 @@ __pthread_mutex_lock (mutex)
   assert (sizeof (mutex->__size) >= sizeof (mutex->__data));
 
   unsigned int type = PTHREAD_MUTEX_TYPE (mutex);
+
+  /* systemtap marker */
+  PTHREAD_PROBE_MUTEX_ENTRY(mutex);
+
   if (__builtin_expect (type & ~PTHREAD_MUTEX_KIND_MASK_NP, 0))
     return __pthread_mutex_lock_full (mutex);
 
@@ -60,6 +64,8 @@ __pthread_mutex_lock (mutex)
       /* Normal mutex.  */
       LLL_MUTEX_LOCK (mutex);
       assert (mutex->__data.__owner == 0);
+
+      PTHREAD_PROBE_MUTEX_BLOCK(mutex);
     }
   else if (__builtin_expect (type == PTHREAD_MUTEX_RECURSIVE_NP, 1))
     {
@@ -75,6 +81,11 @@ __pthread_mutex_lock (mutex)
 
 	  ++mutex->__data.__count;
 
+          /* currently, the systemtap pthread probe does not have a */
+          /* probe point here because the thread already owns this */
+          /* recursive lock before the call to this function. */
+          /* this might change in the future */
+
 	  return 0;
 	}
 
@@ -83,6 +94,8 @@ __pthread_mutex_lock (mutex)
 
       assert (mutex->__data.__owner == 0);
       mutex->__data.__count = 1;
+
+      PTHREAD_PROBE_MUTEX_BLOCK(mutex);
     }
   else if (__builtin_expect (type == PTHREAD_MUTEX_ADAPTIVE_NP, 1))
     {
@@ -94,6 +107,7 @@ __pthread_mutex_lock (mutex)
 	  int cnt = 0;
 	  int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
 			     mutex->__data.__spins * 2 + 10);
+
 	  do
 	    {
 	      if (cnt++ >= max_cnt)
@@ -108,6 +122,8 @@ __pthread_mutex_lock (mutex)
 	    }
 	  while (LLL_MUTEX_TRYLOCK (mutex) != 0);
 
+          PTHREAD_PROBE_MUTEX_BLOCK(mutex);
+
 	  mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
 	}
       assert (mutex->__data.__owner == 0);
@@ -127,6 +143,8 @@ __pthread_mutex_lock (mutex)
   ++mutex->__data.__nusers;
 #endif
 
+  PTHREAD_PROBE_MUTEX_ACQUIRE(mutex);
+
   return 0;
 }
 
@@ -277,6 +295,10 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 
 		++mutex->__data.__count;
 
+                /* currently, the systemtap pthread probe does not have
a */
+                /* probe point here because the thread already owns
this */
+                /* recursive lock before the call to this function. */
+                /* this might change in the future */
 		return 0;
 	      }
 	  }
@@ -393,6 +415,11 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 		  /* Overflow of the counter.  */
 		  return EAGAIN;
 
+                 /* currently, the systemtap pthread probe does not
have a */
+                 /* probe point here because the thread already owns
this */
+                 /* recursive lock before the call to this function. */
+                 /* this might change in the future */
+
 		++mutex->__data.__count;
 
 		return 0;
@@ -442,8 +469,10 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 		  break;
 
 		if (oldval != ceilval)
+                {
 		  lll_futex_wait (&mutex->__data.__lock, ceilval | 2,
 				  PTHREAD_MUTEX_PSHARED (mutex));
+                }
 	      }
 	    while (atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
 							ceilval | 2, ceilval)
@@ -451,6 +480,8 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 	  }
 	while ((oldval & PTHREAD_MUTEX_PRIO_CEILING_MASK) != ceilval);
 
+        PTHREAD_PROBE_MUTEX_BLOCK(mutex);
+
 	assert (mutex->__data.__owner == 0);
 	mutex->__data.__count = 1;
       }
@@ -467,6 +498,8 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
   ++mutex->__data.__nusers;
 #endif
 
+  PTHREAD_PROBE_MUTEX_ACQUIRE(mutex);
+
   return 0;
 }
 #ifndef __pthread_mutex_lock
diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c
index f9fe10b..50cbc5c 100644
--- a/nptl/pthread_mutex_unlock.c
+++ b/nptl/pthread_mutex_unlock.c
@@ -50,6 +50,7 @@ __pthread_mutex_unlock_usercnt (mutex, decr)
 
       /* Unlock.  */
       lll_unlock (mutex->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex));
+      PTHREAD_PROBE_MUTEX_RELEASE(mutex);
       return 0;
     }
   else if (__builtin_expect (type == PTHREAD_MUTEX_RECURSIVE_NP, 1))
@@ -60,6 +61,10 @@ __pthread_mutex_unlock_usercnt (mutex, decr)
 
       if (--mutex->__data.__count != 0)
 	/* We still hold the mutex.  */
+        
+        /* currently, the systemtap pthread probe does not have */
+        /* probe point here because the thread still owns the lock */
+        /* this might change in the future */
 	return 0;
       goto normal;
     }
@@ -104,6 +109,10 @@ __pthread_mutex_unlock_full (pthread_mutex_t
*mutex, int decr)
 
       if (--mutex->__data.__count != 0)
 	/* We still hold the mutex.  */
+
+        /* currently, the systemtap pthread probe does not have */
+        /* probe point here because the thread still owns the lock */
+        /* this might change in the future */
 	return 0;
 
       goto robust;
@@ -149,6 +158,10 @@ __pthread_mutex_unlock_full (pthread_mutex_t
*mutex, int decr)
 
       if (--mutex->__data.__count != 0)
 	/* We still hold the mutex.  */
+
+        /* currently, the systemtap pthread probe does not have */
+        /* probe point here because the thread still owns the lock */
+        /* this might change in the future */
 	return 0;
       goto continue_pi_non_robust;
 
@@ -171,6 +184,10 @@ __pthread_mutex_unlock_full (pthread_mutex_t
*mutex, int decr)
 
       if (--mutex->__data.__count != 0)
 	/* We still hold the mutex.  */
+
+        /* currently, the systemtap pthread probe does not have */
+        /* probe point here because the thread still owns the lock */
+        /* this might change in the future */
 	return 0;
 
       goto continue_pi_robust;
@@ -237,6 +254,10 @@ __pthread_mutex_unlock_full (pthread_mutex_t
*mutex, int decr)
 
       if (--mutex->__data.__count != 0)
 	/* We still hold the mutex.  */
+
+        /* currently, the systemtap pthread probe does not have */
+        /* probe point here because the thread still owns the lock */
+        /* this might change in the future */
 	return 0;
       goto pp;
 
@@ -272,6 +293,9 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex,
int decr)
 			PTHREAD_MUTEX_PSHARED (mutex));
 
       int oldprio = newval >> PTHREAD_MUTEX_PRIO_CEILING_SHIFT;
+
+      PTHREAD_PROBE_MUTEX_RELEASE(mutex);
+
       return __pthread_tpp_change_priority (oldprio, -1);
 
     default:
@@ -279,6 +303,7 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex,
int decr)
       return EINVAL;
     }
 
+  PTHREAD_PROBE_MUTEX_RELEASE(mutex);
   return 0;
 }
 
diff --git a/nptl/pthread_rwlock_destroy.c
b/nptl/pthread_rwlock_destroy.c
index 28fd24b..b4cd7ab 100644
--- a/nptl/pthread_rwlock_destroy.c
+++ b/nptl/pthread_rwlock_destroy.c
@@ -24,6 +24,8 @@ int
 __pthread_rwlock_destroy (rwlock)
      pthread_rwlock_t *rwlock;
 {
+  PTHREAD_PROBE_RWLOCK_DESTROY(rwlock);
+
   /* Nothing to be done.  For now.  */
   return 0;
 }
diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c
index 31eb508..954b414 100644
--- a/nptl/pthread_rwlock_rdlock.c
+++ b/nptl/pthread_rwlock_rdlock.c
@@ -31,6 +31,8 @@ __pthread_rwlock_rdlock (rwlock)
 {
   int result = 0;
 
+  PTHREAD_PROBE_RLOCK_ENTRY(rwlock);
+
   /* Make sure we are along.  */
   lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
 
@@ -49,6 +51,12 @@ __pthread_rwlock_rdlock (rwlock)
 	      --rwlock->__data.__nr_readers;
 	      result = EAGAIN;
 	    }
+          else
+            {
+              /* systemtap pthread probe - this is the only place where
*/
+              /* we get this read-write lock */
+              PTHREAD_PROBE_RWLOCK_ACQUIRE_READ(rwlock);
+            }
 
 	  break;
 	}
diff --git a/nptl/pthread_rwlock_unlock.c b/nptl/pthread_rwlock_unlock.c
index a7ef71a..e7d1568 100644
--- a/nptl/pthread_rwlock_unlock.c
+++ b/nptl/pthread_rwlock_unlock.c
@@ -27,6 +27,8 @@
 int
 __pthread_rwlock_unlock (pthread_rwlock_t *rwlock)
 {
+  PTHREAD_PROBE_RWLOCK_UNLOCK(rwlock);
+
   lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
   if (rwlock->__data.__writer)
     rwlock->__data.__writer = 0;
diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c
index 64fe970..abf3083 100644
--- a/nptl/pthread_rwlock_wrlock.c
+++ b/nptl/pthread_rwlock_wrlock.c
@@ -31,6 +31,8 @@ __pthread_rwlock_wrlock (rwlock)
 {
   int result = 0;
 
+  PTHREAD_PROBE_WLOCK_ENTRY(rwlock);
+
   /* Make sure we are along.  */
   lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
 
@@ -41,6 +43,11 @@ __pthread_rwlock_wrlock (rwlock)
 	{
 	  /* Mark self as writer.  */
 	  rwlock->__data.__writer = THREAD_GETMEM (THREAD_SELF, tid);
+
+          /* systemtap pthread probe - this is the only place where we
can */
+          /* get this read-write lock. */
+          PTHREAD_PROBE_RWLOCK_ACQUIRE_WRITE(rwlock);
+
 	  break;
 	}
diff --git a/nptl/pthread_probe.h b/nptl/pthread_probe.h
index e69de29..8178ac9 100644
--- a/nptl/pthread_probe.h
+++ b/nptl/pthread_probe.h
@@ -0,0 +1,33 @@
+#include <sys/sdt.h>
+
+/* #include "new-sdt3.h" */
+
+#define PTHREAD_PROBE_CREATE(arg1,arg2,arg3)
STAP_PROBE3(provider,create,arg1,arg2,arg3)
+#define PTHREAD_PROBE_JOIN(arg1) STAP_PROBE1(provider,join,arg1)
+#define PTHREAD_PROBE_JOIN_RET(arg1,arg2)
STAP_PROBE2(provider,join_ret,arg1,arg2)
+#define PTHREAD_PROBE_START(arg1) STAP_PROBE1(provider,start,arg1)
+#define PTHREAD_PROBE_END(arg1) STAP_PROBE1(provider,end,arg1)
+#define PTHREAD_PROBE_MUTEX_INIT(arg1)
STAP_PROBE1(provider,mutex_init,arg1)
+#define PTHREAD_PROBE_MUTEX_DESTROY(arg1)
STAP_PROBE1(provider,mutex_destroy,arg1)
+#define PTHREAD_PROBE_MUTEX_ACQUIRE(arg1)
STAP_PROBE1(provider,mutex_acquire,arg1)
+#define PTHREAD_PROBE_MUTEX_RELEASE(arg1)
STAP_PROBE1(provider,mutex_release,arg1)
+#define PTHREAD_PROBE_MUTEX_BLOCK(arg1)
STAP_PROBE1(provider,mutex_block,arg1)
+#define PTHREAD_PROBE_COND_INIT(arg1)
STAP_PROBE1(provider,cond_init,arg1)
+#define PTHREAD_PROBE_COND_DESTROY(arg1)
STAP_PROBE1(provider,cond_destroy,arg1)
+#define PTHREAD_PROBE_COND_WAIT(arg1,arg2)
STAP_PROBE2(provider,cond_wait,arg1,arg2)
+#define PTHREAD_PROBE_COND_WAKE(arg1,arg2)
STAP_PROBE2(provider,cond_wake,arg1,arg2)
+#define PTHREAD_PROBE_COND_SIGNAL(arg1)
STAP_PROBE1(provider,cond_signal,arg1)
+#define PTHREAD_PROBE_COND_BROADCAST(arg1)
STAP_PROBE1(provider,cond_broadcast,arg1)
+#define PTHREAD_PROBE_RWLOCK_ACQUIRE_WRITE(arg1)
STAP_PROBE1(provider,rwlock_acquire_write,arg1)
+#define PTHREAD_PROBE_RWLOCK_ACQUIRE_READ(arg1)
STAP_PROBE1(provider,rwlock_acquire_read,arg1)
+#define PTHREAD_PROBE_RWLOCK_DESTROY(arg1)
STAP_PROBE1(provider,rwlock_destroy,arg1)
+#define PTHREAD_PROBE_RWLOCK_UNLOCK(arg1)
STAP_PROBE1(provider,rwlock_unlock,arg1)
+#define PTHREAD_PROBE_MUTEX_ENTRY(arg1)
STAP_PROBE1(provider,mutex_entry,arg1)
+#define PTHREAD_PROBE_RLOCK_ENTRY(arg1)
STAP_PROBE1(provider,rlock_entry,arg1)
+#define PTHREAD_PROBE_RLOCK_BLOCK(arg1)
STAP_PROBE1(provider,rlock_block,arg1)
+#define PTHREAD_PROBE_WLOCK_ENTRY(arg1)
STAP_PROBE1(provider,wlock_entry,arg1)
+#define PTHREAD_PROBE_WLOCK_BLOCK(arg1)
STAP_PROBE1(provider,wlock_block,arg1)
+
+/* the following probe points are in low-level assembly/inline assembly
code */
+#define PTHREAD_PROBE_LL_LOCKWAIT_PRIVATE(arg1)
STAP_PROBE1(provider,lock_wait_private,arg1)
+#define PTHREAD_PROBE_LL_LOCKWAIT(arg1)
STAP_PROBE1(provider,lock_wait,arg1)

diff --git a/nptl/DESIGN-systemtap-probes.txt
b/nptl/DESIGN-systemtap-probes.txt
index e69de29..d8bbbd7 100644
--- a/nptl/DESIGN-systemtap-probes.txt
+++ b/nptl/DESIGN-systemtap-probes.txt
@@ -0,0 +1,34 @@
+Systemtap is a dynamic tracingi/instrumenting tool available on Linux.
Probes that are not fired at run time have extremely close to zero
overhead.
+
+The following probes are available for NPTL:
+
+Thread creation & Join Probes
+=============================
+create   - probe for pthread_create(3) - arg1 = thread ID, arg2 =
start_routine, arg3 = arguments
+start    - probe for actual thread creation, arg1 = struct pthread
(members include thread ID, process ID)
+join     - probe for pthread_join(3)   - arg1 = thread ID
+join_ret - probe for pthread_join(3) return - arg1 = thread ID, arg2 =
return value
+
+Lock-related Probes
+===================
+mutex_init    - probe for pthread_mutex_init(3) - arg1 = address of
mutex lock
+mutex_acquired- probe for pthread_mutex_lock(3) - arg1 = address of
mutex lock
+mutex_block   - probe for resume from _possible_ mutex block event -
arg1 = address of mutex lock
+mutex_entry   - probe for entry to the pthread_mutex_lock(3) function,
- arg1 = address of mutex lock
+mutex_release - probe for pthread_mutex_unlock(3) after the successful
release of a mutex lock - arg1 = address of mutex lock
+mutex_destroy - probe for pthread_mutex_destroy(3) - arg1 = address of
mutex lock
+rwlock_destroy- probe for pthread_rwlock_destroy(3) - arg1 = address of
rw lock
+rwlock_acquire_write-probe for pthread_rwlock_wrlock(3) - arg1 =
address of rw lock
+rwlock_unlock - probe for pthread_rwlock_unlock(3) - arg1 = address of
rw lock
+
+lock_wait         - probe in low-level lock code, only fired when futex
is called (i.e. when trying to acquire a contented lock)
+lock_wait_private - probe in low-level lock code, only fired when futex
is called (i.e. when trying to acquire a contented lock)
+
+Condition variable Probes
+=========================
+cond_init - probe for pthread_cond_init(3) - arg1 = condition, arg2 =
attr
+cond_destroy- probe for pthread_condattr_destroy(3) - arg1 = attr
+cond_wait - probe for pthread_cond_wait(3) - arg1 = condition, arg2 =
mutex lock
+cond_signal - probe for pthread_cond_signal(3) - arg1 = condition
+cond_broadcast - probe for pthread_cond_broadcast(3) - arg1 = condition
+
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
index 8de9cf4..b6d9847 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
@@ -21,6 +21,7 @@
 #include <pthread-errnos.h>
 #include <kernel-features.h>
 #include <lowlevellock.h>
+#include <pthread_probe.h>
 
        .text
 
@@ -130,7 +131,8 @@ __lll_lock_wait:
        cmpl    %edx, %eax      /* NB:   %edx == 2 */
        jne     2f
 
-1:     movl    $SYS_futex, %eax
+1:     PTHREAD_PROBE_LL_LOCKWAIT(%rdi)
+        movl   $SYS_futex, %eax
        syscall
 
 2:     movl    %edx, %eax
@@ -180,7 +182,8 @@ __lll_timedlock_wait:
        cmpl    %edx, %eax
        jne     2f
 
-1:     movl    $SYS_futex, %eax
+1:     PTHREAD_PROBE_LL_LOCKWAIT_PRIVATE(%rdi)
+        movl   $SYS_futex, %eax
        movl    $2, %edx
        syscall
 

On Wed, 2010-08-11 at 09:10 -0400, Rayson Ho wrote:
> Thanks Roland,
> 
> I've ported my pthread probe changes against your systemtap glibc tree.
> I think the interface of the new sdt.h and that of the LIBC_PROBE()
> marco is almost the identical, but I am not sure if we are going to have
> macros like PROBE2() in the new-sdt -- not an issue at all as it is just
> a handy macro.
> 
> I've inline-attached the diff, and the biggest change is that
> "pthread_probe.h" used with the old sdt.h is now gone.
> 
> I have not attached the diffs for the probes in the inline assembly code
> yet, I will do that soon.
> 
> Rayson
> 
> 
> 
> diff --git a/nptl/pthread_join.c b/nptl/pthread_join.c
> index 6a87a8b..9f67a58 100644
> --- a/nptl/pthread_join.c
> +++ b/nptl/pthread_join.c
> @@ -23,6 +23,8 @@
>  #include <atomic.h>
>  #include "pthreadP.h"
>  
> +#include <stap-probe.h>
> +
>  
>  static void
>  cleanup (void *arg)
> @@ -55,6 +57,8 @@ pthread_join (threadid, thread_return)
>    struct pthread *self = THREAD_SELF;
>    int result = 0;
>  
> +  LIBC_PROBE(pthread_join, 1, threadid);
> +
>    /* During the wait we change to asynchronous cancellation.  If we
>       are canceled the thread we are waiting for must be marked as
>       un-wait-ed for again.  */
> @@ -110,5 +114,7 @@ pthread_join (threadid, thread_return)
>        __free_tcb (pd);
>      }
>  
> +  LIBC_PROBE(pthread_join_ret, 2, threadid, result);  
> +
>    return result;
>  }
> diff --git a/nptl/pthread_mutex_destroy.c b/nptl/pthread_mutex_destroy.c
> index e2c9f8a..dd690cd 100644
> --- a/nptl/pthread_mutex_destroy.c
> +++ b/nptl/pthread_mutex_destroy.c
> @@ -20,6 +20,7 @@
>  #include <errno.h>
>  #include "pthreadP.h"
>  
> +#include <stap-probe.h>
>  
>  int
>  __pthread_mutex_destroy (mutex)
> @@ -32,6 +33,8 @@ __pthread_mutex_destroy (mutex)
>    /* Set to an invalid value.  */
>    mutex->__data.__kind = -1;
>  
> +  LIBC_PROBE(pthread_mutex_destroy, 1, mutex);
> +
>    return 0;
>  }
>  strong_alias (__pthread_mutex_destroy, pthread_mutex_destroy)
> diff --git a/nptl/pthread_mutex_init.c b/nptl/pthread_mutex_init.c
> index d9b1ef0..e4fcae7 100644
> --- a/nptl/pthread_mutex_init.c
> +++ b/nptl/pthread_mutex_init.c
> @@ -24,6 +24,8 @@
>  #include <kernel-features.h>
>  #include "pthreadP.h"
>  
> +#include <stap-probe.h>
> +
>  static const struct pthread_mutexattr default_attr =
>    {
>      /* Default is a normal mutex, not shared between processes.  */
> @@ -47,6 +49,8 @@ __pthread_mutex_init (mutex, mutexattr)
>  
>    imutexattr = (const struct pthread_mutexattr *) mutexattr ?:
> &default_attr;
>  
> +  LIBC_PROBE(pthread_mutex_init, 2, mutex, mutexattr);
> +
>    /* Sanity checks.  */
>    switch (__builtin_expect (imutexattr->mutexkind
>  			    & PTHREAD_MUTEXATTR_PROTOCOL_MASK,
> diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
> index 50dc188..b754372 100644
> --- a/nptl/pthread_mutex_lock.c
> +++ b/nptl/pthread_mutex_lock.c
> @@ -25,6 +25,7 @@
>  #include "pthreadP.h"
>  #include <lowlevellock.h>
>  
> +#include <stap-probe.h>
>  
>  #ifndef LLL_MUTEX_LOCK
>  # define LLL_MUTEX_LOCK(mutex) \
> @@ -48,6 +49,10 @@ __pthread_mutex_lock (mutex)
>    assert (sizeof (mutex->__size) >= sizeof (mutex->__data));
>  
>    unsigned int type = PTHREAD_MUTEX_TYPE (mutex);
> +
> +  /* systemtap marker */
> +  LIBC_PROBE(pthread_mutex_lock, 1, mutex);
> +
>    if (__builtin_expect (type & ~PTHREAD_MUTEX_KIND_MASK_NP, 0))
>      return __pthread_mutex_lock_full (mutex);
>  
> @@ -60,6 +65,8 @@ __pthread_mutex_lock (mutex)
>        /* Normal mutex.  */
>        LLL_MUTEX_LOCK (mutex);
>        assert (mutex->__data.__owner == 0);
> +
> +      LIBC_PROBE(pthread_mutex_lock_block, 1, mutex);
>      }
>    else if (__builtin_expect (type == PTHREAD_MUTEX_RECURSIVE_NP, 1))
>      {
> @@ -75,6 +82,11 @@ __pthread_mutex_lock (mutex)
>  
>  	  ++mutex->__data.__count;
>  
> +          /* currently, the systemtap pthread probe does not have a */
> +          /* probe point here because the thread already owns this */
> +          /* recursive lock before the call to this function. */
> +          /* this might change in the future */
> +
>  	  return 0;
>  	}
>  
> @@ -83,6 +95,8 @@ __pthread_mutex_lock (mutex)
>  
>        assert (mutex->__data.__owner == 0);
>        mutex->__data.__count = 1;
> +
> +      LIBC_PROBE(pthread_mutex_lock_block, 1, mutex);
>      }
>    else if (__builtin_expect (type == PTHREAD_MUTEX_ADAPTIVE_NP, 1))
>      {
> @@ -94,6 +108,7 @@ __pthread_mutex_lock (mutex)
>  	  int cnt = 0;
>  	  int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
>  			     mutex->__data.__spins * 2 + 10);
> +
>  	  do
>  	    {
>  	      if (cnt++ >= max_cnt)
> @@ -108,6 +123,8 @@ __pthread_mutex_lock (mutex)
>  	    }
>  	  while (LLL_MUTEX_TRYLOCK (mutex) != 0);
>  
> +          LIBC_PROBE(pthread_mutex_lock_block, 1, mutex);
> +
>  	  mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
>  	}
>        assert (mutex->__data.__owner == 0);
> @@ -127,6 +144,8 @@ __pthread_mutex_lock (mutex)
>    ++mutex->__data.__nusers;
>  #endif
>  
> +  LIBC_PROBE(pthread_mutex_lock_acquire, 1, mutex);
> +
>    return 0;
>  }
>  
> @@ -277,6 +296,10 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
>  
>  		++mutex->__data.__count;
>  
> +                /* currently, the systemtap pthread probe does not have
> a */
> +                /* probe point here because the thread already owns
> this */
> +                /* recursive lock before the call to this function. */
> +                /* this might change in the future */
>  		return 0;
>  	      }
>  	  }
> @@ -393,6 +416,11 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
>  		  /* Overflow of the counter.  */
>  		  return EAGAIN;
>  
> +                 /* currently, the systemtap pthread probe does not
> have a */
> +                 /* probe point here because the thread already owns
> this */
> +                 /* recursive lock before the call to this function. */
> +                 /* this might change in the future */
> +
>  		++mutex->__data.__count;
>  
>  		return 0;
> @@ -451,6 +479,8 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
>  	  }
>  	while ((oldval & PTHREAD_MUTEX_PRIO_CEILING_MASK) != ceilval);
>  
> +        LIBC_PROBE(pthread_mutex_lock_full_block, 1, mutex);
> +
>  	assert (mutex->__data.__owner == 0);
>  	mutex->__data.__count = 1;
>        }
> @@ -467,6 +497,8 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
>    ++mutex->__data.__nusers;
>  #endif
>  
> +  LIBC_PROBE(pthread_mutex_lock_full_acquire, 1, mutex);
> +
>    return 0;
>  }
>  #ifndef __pthread_mutex_lock
> diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c
> index f9fe10b..e0f305e 100644
> --- a/nptl/pthread_mutex_unlock.c
> +++ b/nptl/pthread_mutex_unlock.c
> @@ -23,6 +23,8 @@
>  #include "pthreadP.h"
>  #include <lowlevellock.h>
>  
> +#include <stap-probe.h>
> +
>  static int
>  internal_function
>  __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
> @@ -50,6 +52,7 @@ __pthread_mutex_unlock_usercnt (mutex, decr)
>  
>        /* Unlock.  */
>        lll_unlock (mutex->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex));
> +      LIBC_PROBE(pthread_mutex_release, 1, mutex);
>        return 0;
>      }
>    else if (__builtin_expect (type == PTHREAD_MUTEX_RECURSIVE_NP, 1))
> @@ -60,6 +63,10 @@ __pthread_mutex_unlock_usercnt (mutex, decr)
>  
>        if (--mutex->__data.__count != 0)
>  	/* We still hold the mutex.  */
> +        
> +        /* currently, the systemtap pthread probe does not have */
> +        /* probe point here because the thread still owns the lock */
> +        /* this might change in the future */
>  	return 0;
>        goto normal;
>      }
> @@ -104,6 +111,10 @@ __pthread_mutex_unlock_full (pthread_mutex_t
> *mutex, int decr)
>  
>        if (--mutex->__data.__count != 0)
>  	/* We still hold the mutex.  */
> +
> +        /* currently, the systemtap pthread probe does not have */
> +        /* probe point here because the thread still owns the lock */
> +        /* this might change in the future */
>  	return 0;
>  
>        goto robust;
> @@ -149,6 +160,10 @@ __pthread_mutex_unlock_full (pthread_mutex_t
> *mutex, int decr)
>  
>        if (--mutex->__data.__count != 0)
>  	/* We still hold the mutex.  */
> +
> +        /* currently, the systemtap pthread probe does not have */
> +        /* probe point here because the thread still owns the lock */
> +        /* this might change in the future */
>  	return 0;
>        goto continue_pi_non_robust;
>  
> @@ -171,6 +186,10 @@ __pthread_mutex_unlock_full (pthread_mutex_t
> *mutex, int decr)
>  
>        if (--mutex->__data.__count != 0)
>  	/* We still hold the mutex.  */
> +
> +        /* currently, the systemtap pthread probe does not have */
> +        /* probe point here because the thread still owns the lock */
> +        /* this might change in the future */
>  	return 0;
>  
>        goto continue_pi_robust;
> @@ -237,6 +256,10 @@ __pthread_mutex_unlock_full (pthread_mutex_t
> *mutex, int decr)
>  
>        if (--mutex->__data.__count != 0)
>  	/* We still hold the mutex.  */
> +
> +        /* currently, the systemtap pthread probe does not have */
> +        /* probe point here because the thread still owns the lock */
> +        /* this might change in the future */
>  	return 0;
>        goto pp;
>  
> @@ -272,6 +295,9 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex,
> int decr)
>  			PTHREAD_MUTEX_PSHARED (mutex));
>  
>        int oldprio = newval >> PTHREAD_MUTEX_PRIO_CEILING_SHIFT;
> +
> +      LIBC_PROBE(pthread_mutex_release, 1, mutex);
> +
>        return __pthread_tpp_change_priority (oldprio, -1);
>  
>      default:
> @@ -279,6 +305,8 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex,
> int decr)
>        return EINVAL;
>      }
>  
> +  LIBC_PROBE(pthread_mutex_release, 1, mutex);
> +
>    return 0;
>  }
>  
> diff --git a/nptl/pthread_rwlock_destroy.c
> b/nptl/pthread_rwlock_destroy.c
> index 28fd24b..b14de8f 100644
> --- a/nptl/pthread_rwlock_destroy.c
> +++ b/nptl/pthread_rwlock_destroy.c
> @@ -19,12 +19,14 @@
>  
>  #include "pthreadP.h"
>  
> +#include <stap-probe.h>
>  
>  int
>  __pthread_rwlock_destroy (rwlock)
>       pthread_rwlock_t *rwlock;
>  {
>    /* Nothing to be done.  For now.  */
> +  LIBC_PROBE(pthread_rwlock_destroy, 1, rwlock);
>    return 0;
>  }
>  strong_alias (__pthread_rwlock_destroy, pthread_rwlock_destroy)
> diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c
> index 31eb508..7b4d8f0 100644
> --- a/nptl/pthread_rwlock_rdlock.c
> +++ b/nptl/pthread_rwlock_rdlock.c
> @@ -23,6 +23,8 @@
>  #include <pthread.h>
>  #include <pthreadP.h>
>  
> +#include <stap-probe.h>
> +
>  
>  /* Acquire read lock for RWLOCK.  */
>  int
> @@ -31,6 +33,8 @@ __pthread_rwlock_rdlock (rwlock)
>  {
>    int result = 0;
>  
> +  LIBC(pthread_rwlock_rdlock, 1, rwlock);
> +
>    /* Make sure we are along.  */
>    lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
>  
> @@ -49,6 +53,13 @@ __pthread_rwlock_rdlock (rwlock)
>  	      --rwlock->__data.__nr_readers;
>  	      result = EAGAIN;
>  	    }
> +          else
> +            {
> +              /* systemtap pthread probe - this is the only place where
> */
> +              /* we get this read-write lock */
> +              LIBC_PROBE(pthread_rwlock_rdlock, 1, rwlock);
> +            }
> +
>  
>  	  break;
>  	}
> diff --git a/nptl/pthread_rwlock_unlock.c b/nptl/pthread_rwlock_unlock.c
> index a7ef71a..ba9620b 100644
> --- a/nptl/pthread_rwlock_unlock.c
> +++ b/nptl/pthread_rwlock_unlock.c
> @@ -23,10 +23,14 @@
>  #include <pthread.h>
>  #include <pthreadP.h>
>  
> +#include <stap-probe.h>
> +
>  /* Unlock RWLOCK.  */
>  int
>  __pthread_rwlock_unlock (pthread_rwlock_t *rwlock)
>  {
> +  LIBC_PROBE(pthread_rwlock_unlock, 1, rwlock);
> +
>    lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
>    if (rwlock->__data.__writer)
>      rwlock->__data.__writer = 0;
> diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c
> index 64fe970..09b9454 100644
> --- a/nptl/pthread_rwlock_wrlock.c
> +++ b/nptl/pthread_rwlock_wrlock.c
> @@ -23,6 +23,7 @@
>  #include <pthread.h>
>  #include <pthreadP.h>
>  
> +#include <stap-probe.h>
>  
>  /* Acquire write lock for RWLOCK.  */
>  int
> @@ -31,6 +32,8 @@ __pthread_rwlock_wrlock (rwlock)
>  {
>    int result = 0;
>  
> +  LIBC_PROBE(pthread_rwlock_wrlock, 1, rwlock);
> +
>    /* Make sure we are along.  */
>    lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
>  
> @@ -41,6 +44,11 @@ __pthread_rwlock_wrlock (rwlock)
>  	{
>  	  /* Mark self as writer.  */
>  	  rwlock->__data.__writer = THREAD_GETMEM (THREAD_SELF, tid);
> +
> +          /* systemtap pthread probe - this is the only place where we
> can */
> +          /* get this read-write lock. */
> +          LIBC_PROBE(pthread_rwlock_wrlock_acquire, 1, rwlock);
> +
>  	  break;
>  	}
>  
> 
> 
> On Mon, 2010-08-09 at 09:37 -0700, Roland McGrath wrote:
> > > I have modified your sdt.h slightly and used it for the pthread
> > > probes. Is there a special branch of systemtap or utrace that I need
> > > to use in order to test/benchmark the overhead of the existance of the
> > > new sdt probes in libpthread?
> > 
> > The translator work has yet to be done.  So at the moment all you could
> > test is having the probes in there statically (i.e. addition of nop
> > instructions) and not using them.
> > 
> > 
> > Thanks,
> > Roland
> 



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]