2006-06-21 Josh Stone <joshua.i.stone@intel.com>

author jistone <jistone>

Thu, 22 Jun 2006 00:48:33 +0000 (00:48 +0000)

committer jistone <jistone>

Thu, 22 Jun 2006 00:48:33 +0000 (00:48 +0000)
author jistone <jistone>
Thu, 22 Jun 2006 00:48:33 +0000 (00:48 +0000)
committer jistone <jistone>
Thu, 22 Jun 2006 00:48:33 +0000 (00:48 +0000)
diff --git a/runtime/ChangeLog b/runtime/ChangeLog

index ea6a8d4e19011336fcd2257d296b223491621998..53730df7a96608847a0627f708314dcf2bd4a613 100644 (file)
--- a/runtime/ChangeLog
+++ b/runtime/ChangeLog
@@ -1,3 +1,7 @@
+2006-06-21  Josh Stone  <joshua.i.stone@intel.com>
+
+       * time.c: Time-estimation with minimal dependency on xtime.
+
  2006-06-16  Roland McGrath  <roland@redhat.com>
  
         * lket/b2a/Makefile.in: Regenerated with automake-1.9.6-2.
diff --git a/runtime/time.c b/runtime/time.c

new file mode 100644 (file)

index 0000000..a1a7acc
--- /dev/null
+++ b/runtime/time.c
@@ -0,0 +1,184 @@
+/* -*- linux-c -*- 
+ * time-estimation with minimal dependency on xtime
+ * Copyright (C) 2006 Intel Corporation.
+ *
+ * This file is part of systemtap, and is free software.  You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+
+#include <linux/cpufreq.h>
+
+typedef struct __stp_time_t {
+    /* 
+     * A write lock is taken by __stp_time_timer_callback() and
+     * __stp_time_cpufreq_callback().  The timer callback is called from a
+     * softIRQ, and cpufreq callback guarantees that it is not called within
+     * an interrupt context.  Thus there should be no opportunity for a
+     * deadlock between writers.
+     *
+     * A read lock is taken by _stp_gettimeofday_us().  There is the potential
+     * for this to occur at any time, so there is a slim chance that this will
+     * happen while the write lock is held, and it will be impossible to get a
+     * read lock.  However, we can limit how long we try to get the lock to
+     * avoid a deadlock.
+     *
+     * Note that seqlock is safer than rwlock because some kernels
+     * don't have read_trylock.
+     */
+    seqlock_t lock;
+
+    /* These provide a reference time to correlate cycles to real time */
+    struct timeval base_time;
+    cycles_t base_cycles;
+
+    /* The frequency in MHz of this CPU, for interpolating
+     * cycle counts from the base time. */
+    unsigned int cpufreq;
+
+    /* Callback used to schedule updates of the base_time */
+    struct timer_list timer;
+} stp_time_t;
+
+DEFINE_PER_CPU(stp_time_t, stp_time);
+
+/* Try to estimate the number of CPU cycles in a microsecond - i.e. MHz.  This
+ * relies heavily on the accuracy of udelay.  By calling udelay twice, we
+ * attempt to account for overhead in the call.
+ */
+static unsigned int
+__stp_estimate_cpufreq(void)
+{
+    cycles_t beg, mid, end;
+    beg = get_cycles(); barrier();
+    udelay(2); barrier();
+    mid = get_cycles(); barrier();
+    udelay(10); barrier();
+    end = get_cycles(); barrier();
+    return (beg - 2*mid + end)/8;
+}
+
+static void
+__stp_time_timer_callback(unsigned long val)
+{
+    unsigned long flags;
+    stp_time_t *time;
+    struct timeval tv;
+    cycles_t cycles;
+
+    do_gettimeofday(&tv);
+    cycles = get_cycles();
+
+    time = &__get_cpu_var(stp_time);
+    write_seqlock_irqsave(&time->lock, flags);
+    time->base_time = tv;
+    time->base_cycles = cycles;
+    write_sequnlock_irqrestore(&time->lock, flags);
+
+    mod_timer(&time->timer, jiffies + 1);
+}
+
+static void
+__stp_init_time(void *info)
+{
+    stp_time_t *time = &__get_cpu_var(stp_time);
+
+    seqlock_init(&time->lock);
+    do_gettimeofday(&time->base_time);
+    time->base_cycles = get_cycles();
+
+    time->cpufreq = cpufreq_get(smp_processor_id()) / 1000;
+    if (!time->cpufreq) {
+        time->cpufreq = __stp_estimate_cpufreq();
+    }
+
+    init_timer(&time->timer);
+    time->timer.expires = jiffies + 1;
+    time->timer.function = __stp_time_timer_callback;
+    add_timer(&time->timer);
+}
+
+static int
+__stp_time_cpufreq_callback(struct notifier_block *self,
+        unsigned long state, void *vfreqs)
+{
+    int ret = 0;
+    unsigned long flags;
+    struct cpufreq_freqs *freqs;
+    unsigned int freq_mhz;
+    stp_time_t *time;
+
+    switch (state) {
+        case CPUFREQ_POSTCHANGE:
+        case CPUFREQ_RESUMECHANGE:
+            freqs = (struct cpufreq_freqs *)vfreqs;
+            freq_mhz = freqs->new / 1000;
+
+            time = &per_cpu(stp_time, freqs->cpu);
+            write_seqlock_irqsave(&time->lock, flags);
+            time->cpufreq = freq_mhz;
+            write_sequnlock_irqrestore(&time->lock, flags);
+            break;
+    }
+
+    return NOTIFY_OK;
+}
+
+struct notifier_block __stp_time_notifier = {
+    .notifier_call = __stp_time_cpufreq_callback,
+};
+
+void
+_stp_kill_time(void)
+{
+    int cpu;
+    for_each_online_cpu(cpu) {
+        stp_time_t *time = &per_cpu(stp_time, cpu);
+        del_timer_sync(&time->timer);
+    }
+    cpufreq_unregister_notifier(&__stp_time_notifier, CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+int
+_stp_init_time(void)
+{
+    int ret = 0;
+
+    if ((ret = on_each_cpu(__stp_init_time, NULL, 0, 1)))
+        return ret;
+
+    return cpufreq_register_notifier(&__stp_time_notifier, CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+int64_t
+_stp_gettimeofday_us(void)
+{
+    struct timeval base;
+    cycles_t last, delta;
+    unsigned int freq;
+    unsigned int seq;
+    int i = 0;
+
+    stp_time_t *time = &__get_cpu_var(stp_time);
+
+    seq = read_seqbegin(&time->lock);
+    base = time->base_time;
+    last = time->base_cycles;
+    freq = time->cpufreq;
+    while (unlikely(read_seqretry(&time->lock, seq))) {
+        if (unlikely(++i >= MAXTRYLOCK))
+            return 0;
+        ndelay(TRYLOCKDELAY);
+        seq = read_seqbegin(&time->lock);
+        base = time->base_time;
+        last = time->base_cycles;
+        freq = time->cpufreq;
+    }
+
+    delta = get_cycles() - last;
+    do_div(delta, freq);
+
+    return (USEC_PER_SEC * (int64_t)base.tv_sec) + base.tv_usec + delta;
+}
+
diff --git a/runtime/transport/ChangeLog b/runtime/transport/ChangeLog

index c7db329c97f562ab5260964df8685132cae13095..eb5626d867a46d2b5a5063aa9f1c2c84a747f725 100644 (file)
--- a/runtime/transport/ChangeLog
+++ b/runtime/transport/ChangeLog
@@ -1,3 +1,8 @@
+2006-06-21  Josh Stone  <joshua.i.stone@intel.com>
+
+       * transport.c (_stp_handle_start): Initialize timer functions.
+       * transport.c (_stp_cleanup_and_exit): Teardown timer functions.
+
  2006-06-13  Martin Hunt  <hunt@redhat.com>
  
         * transport.c (_stp_transport_close): Destroy workqueue when
diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c

index 5b0b4ff24859caad3ae2a788dfe6bc2eac6d00fa..55bb4ca2e2268b626d6105d26be7841973a62713 100644 (file)
--- a/runtime/transport/transport.c
+++ b/runtime/transport/transport.c
@@ -3,6 +3,7 @@
   *
   * Copyright (C) IBM Corporation, 2005
   * Copyright (C) Red Hat Inc, 2005, 2006
+ * Copyright (C) Intel Corporation, 2006
   *
   * This file is part of systemtap, and is free software.  You can
   * redistribute it and/or modify it under the terms of the GNU General
@@ -15,6 +16,7 @@
  
  #include <linux/delay.h>
  #include "transport.h"
+#include "time.c"
  
  #ifdef STP_RELAYFS
  #include "relayfs.c"
@@ -100,10 +102,13 @@ static void _stp_handle_buf_info(int *cpuptr)
   */
  void _stp_handle_start (struct transport_start *st)
  {
+       int ret;
         kbug ("stp_handle_start pid=%d\n", st->pid);
  
+       ret = _stp_init_time();
+
         /* note: st->pid is actually the return code for the reply packet */
-       st->pid = probe_start();
+       st->pid = unlikely(ret) ? ret : probe_start();
         atomic_set(&_stp_start_finished,1);
  
         /* if probe_start() failed, suppress calling probe_exit() */
@@ -148,6 +153,7 @@ static void _stp_cleanup_and_exit (int dont_rmmod)
                 _stp_transport_send(STP_EXIT, &dont_rmmod, sizeof(int));
                 kbug("done with transport_send STP_EXIT\n");
         }
+       _stp_kill_time();
  }
  
  /*
diff --git a/tapset/ChangeLog b/tapset/ChangeLog

index 1a15f4a9181850085f66040b8ea417b18915ba84..edf521c84fb957f9aadabebf79a6e72c0e2b5a81 100644 (file)
--- a/tapset/ChangeLog
+++ b/tapset/ChangeLog
@@ -1,3 +1,8 @@
+2006-06-21  Josh Stone  <joshua.i.stone@intel.com>
+
+       * timestamp.stp (gettimeofday_us, gettimeofday_ms, gettimeofday_s):
+       Convert to using the runtime-provided _stp_gettimeofday_us().
+
  2006-06-19  Martin Hunt  <hunt@redhat.com>
  
         * syscalls.stp: Make the 16-bit calls optional.
diff --git a/tapset/timestamp.stp b/tapset/timestamp.stp

index 67e2e73aa4e3a50754de64e9542a99e9a76808fd..d0c89df8c22020c97afffb9165fbebace4d0581d 100644 (file)
--- a/tapset/timestamp.stp
+++ b/tapset/timestamp.stp
@@ -1,5 +1,6 @@
  // timestamp tapset
  // Copyright (C) 2005-2006 Red Hat Inc.
+// Copyright (C) 2006 Intel Corporation.
  //
  // This file is part of systemtap, and is free software.  You can
  // redistribute it and/or modify it under the terms of the GNU General
@@ -21,23 +22,19 @@ function get_cycles:long () %{ /* pure */
  
  // return in microseconds since epoch
  function gettimeofday_us:long () %{ /* pure */
-  struct timeval tm;
-  do_gettimeofday (& tm);
-  THIS->__retvalue = (tm.tv_sec * 1000000ULL) + (tm.tv_usec);
+  /* NOTE: we can't use do_gettimeofday because we could be called from a
+   * context where xtime_lock is already held.  See bug #2525. */
+  THIS->__retvalue = _stp_gettimeofday_us();
  %}
  
  // return in milliseconds since epoch
-function gettimeofday_ms:long () %{ /* pure */
-  struct timeval tm;
-  do_gettimeofday (& tm);
-  THIS->__retvalue = (tm.tv_sec * 1000ULL) + (tm.tv_usec / 1000);
-%}
+function gettimeofday_ms:long () {
+  return gettimeofday_us() / 1000;
+}
  
  // return in seconds since epoch
-function gettimeofday_s:long () %{ /* pure */
-  struct timeval tm;
-  do_gettimeofday (& tm);
-  THIS->__retvalue = tm.tv_sec;
-%}
+function gettimeofday_s:long () {
+  return gettimeofday_us() / 1000000;
+}
  
  // likewise jiffies, monotonic_clock ...
author	jistone <jistone>
	Thu, 22 Jun 2006 00:48:33 +0000 (00:48 +0000)
committer	jistone <jistone>
	Thu, 22 Jun 2006 00:48:33 +0000 (00:48 +0000)
runtime/ChangeLog		patch \| blob \| blame \| history
runtime/time.c	[new file with mode: 0644]	patch \| blob
runtime/transport/ChangeLog		patch \| blob \| blame \| history
runtime/transport/transport.c		patch \| blob \| blame \| history
tapset/ChangeLog		patch \| blob \| blame \| history
tapset/timestamp.stp		patch \| blob \| blame \| history