Introduce runtime optimizations for statistical computations per PR10234.

author Martin Cermak <mcermak@redhat.com>

Wed, 5 Oct 2016 06:59:42 +0000 (08:59 +0200)

committer Martin Cermak <mcermak@redhat.com>

Wed, 5 Oct 2016 07:31:26 +0000 (09:31 +0200)
author Martin Cermak <mcermak@redhat.com>
Wed, 5 Oct 2016 06:59:42 +0000 (08:59 +0200)
committer Martin Cermak <mcermak@redhat.com>
Wed, 5 Oct 2016 07:31:26 +0000 (09:31 +0200)
diff --git a/runtime/map-gen.c b/runtime/map-gen.c

index c185a1a3d77f22580190ae70ecbb0259b4ef21f4..413e20edb29a0ec441720d790d5a6f7c95e643ba 100644 (file)
--- a/runtime/map-gen.c
+++ b/runtime/map-gen.c
@@ -48,8 +48,8 @@
  #define VALN s
  #define VALSTOR char value[MAP_STRING_LENGTH]
  #define MAP_GET_VAL(node) ((node)->value)
-#define MAP_SET_VAL(map,node,val,add) _new_map_set_str(map,MAP_GET_VAL(node),val,add)
-#define MAP_COPY_VAL(map,node,val,add) MAP_SET_VAL(map,node,val,add)
+#define MAP_SET_VAL(map,node,val,add,s1,s2,s3,s4,s5) _new_map_set_str(map,MAP_GET_VAL(node),val,add)
+#define MAP_COPY_VAL(map,node,val,add) MAP_SET_VAL(map,node,val,add,0,0,0,0,0)
  #define NULLRET ""
  #elif VALUE_TYPE == INT64
  #define VALTYPE int64_t
@@ -58,8 +58,8 @@
  #define VALN i
  #define VALSTOR int64_t value
  #define MAP_GET_VAL(node) ((node)->value)
-#define MAP_SET_VAL(map,node,val,add) _new_map_set_int64(map,&MAP_GET_VAL(node),val,add)
-#define MAP_COPY_VAL(map,node,val,add) MAP_SET_VAL(map,node,val,add)
+#define MAP_SET_VAL(map,node,val,add,s1,s2,s3,s4,s5) _new_map_set_int64(map,&MAP_GET_VAL(node),val,add)
+#define MAP_COPY_VAL(map,node,val,add) MAP_SET_VAL(map,node,val,add,0,0,0,0,0)
  #define NULLRET (int64_t)0
  #elif VALUE_TYPE == STAT
  #define VALTYPE stat_data*
@@ -68,7 +68,7 @@
  #define VALN x
  #define VALSTOR stat_data value
  #define MAP_GET_VAL(node) (&(node)->value)
-#define MAP_SET_VAL(map,node,val,add) _new_map_set_stat(map,MAP_GET_VAL(node),val,add)
+#define MAP_SET_VAL(map,node,val,add,s1,s2,s3,s4,s5) _new_map_set_stat(map,MAP_GET_VAL(node),val,add,s1,s2,s3,s4,s5)
  #define MAP_COPY_VAL(map,node,val,add) _new_map_copy_stat(map,MAP_GET_VAL(node),val,add)
  #define NULLRET (stat_data*)0
  #else
@@ -799,7 +799,7 @@ static MAP KEYSYM(_stp_map_new) (int first_arg, ...)
  
  #endif /* VALUE_TYPE */
  
-static int KEYSYM(__stp_map_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add)
+static inline int KEYSYM(__stp_map_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add, int s1, int s2, int s3, int s4, int s5)
  {
         unsigned int hv;
         struct mhlist_head *head;
@@ -817,7 +817,7 @@ static int KEYSYM(__stp_map_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add)
  
         mhlist_for_each_entry(n, e, head, node.hnode) {
                 if (KEY_EQ_P(n)) {
-                       return MAP_SET_VAL(map, n, val, add);
+                       return MAP_SET_VAL(map, n, val, add, s1, s2, s3, s4, s5);
                 }
         }
         /* key not found */
@@ -825,17 +825,17 @@ static int KEYSYM(__stp_map_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add)
         if (n == NULL)
                 return -1;
         KEYCPY(n);
-       return MAP_SET_VAL(map, n, val, 0);
+       return MAP_SET_VAL(map, n, val, 0, s1, s2, s3, s4, s5);
  }
  
  static int KEYSYM(_stp_map_set) (MAP map, ALLKEYSD(key), VSTYPE val)
  {
-       return KEYSYM(__stp_map_set) (map, ALLKEYS(key), val, 0);
+       return KEYSYM(__stp_map_set) (map, ALLKEYS(key), val, 0, 1, 1, 1, 1, 1);
  }
  
  static int KEYSYM(_stp_map_add) (MAP map, ALLKEYSD(key), VSTYPE val)
  {
-       return KEYSYM(__stp_map_set) (map, ALLKEYS(key), val, 1);
+       return KEYSYM(__stp_map_set) (map, ALLKEYS(key), val, 1, 1, 1, 1, 1, 1);
  }
  
  
diff --git a/runtime/map.c b/runtime/map.c

index b95c8c2d93d9d5d70c3303317cc54755e99e6dd8..5caf739dae83ec3f141b1c8e5b178f4cb5040084 100644 (file)
--- a/runtime/map.c
+++ b/runtime/map.c
@@ -436,7 +436,7 @@ static int _new_map_set_str (MAP map, char *dst, char *val, int add)
         return 0;
  }
  
-static int _new_map_set_stat (MAP map, struct stat_data *sd, int64_t val, int add)
+static int _new_map_set_stat (MAP map, struct stat_data *sd, int64_t val, int add, int s1, int s2, int s3, int s4, int s5)
  {
         if (!add) {
                 Hist st = &map->hist;
@@ -449,7 +449,7 @@ static int _new_map_set_stat (MAP map, struct stat_data *sd, int64_t val, int ad
         }
         (&map->hist)->bit_shift = map->bit_shift;
         (&map->hist)->stat_ops = map->stat_ops;
-       __stp_stat_add (&map->hist, sd, val);
+       __stp_stat_add (&map->hist, sd, val, s1, s2, s3, s4, s5);
         return 0;
  }
  
diff --git a/runtime/map.h b/runtime/map.h

index cc4bf0711a2da4edbc546f90b27ba98e80d5c5ac..0a1dfff5ff087f43d53b83c0908814c986811873 100644 (file)
--- a/runtime/map.h
+++ b/runtime/map.h
@@ -179,7 +179,7 @@ static void _stp_pmap_del(PMAP pmap);
  static MAP _stp_pmap_agg (PMAP pmap, map_update_fn update, map_cmp_fn cmp);
  static struct map_node *_stp_new_agg(MAP agg, struct mhlist_head *ahead,
                                      struct map_node *ptr, map_update_fn update);
-static int _new_map_set_stat (MAP map, struct stat_data *dst, int64_t val, int add);
+static int _new_map_set_stat (MAP map, struct stat_data *dst, int64_t val, int add, int s1, int s2, int s3, int s4, int s5);
  static int _new_map_copy_stat (MAP map, struct stat_data *dst, struct stat_data *src, int add);
  static void _stp_map_sort (MAP map, int keynum, int dir, map_get_key_fn get_key);
  static void _stp_map_sortn(MAP map, int n, int keynum, int dir, map_get_key_fn get_key);
diff --git a/runtime/pmap-gen.c b/runtime/pmap-gen.c

index 7172205958787d6b71f5e21d319738409d6d31d8..fbe5c58511a272d7b7428990f014b09a8e391522 100644 (file)
--- a/runtime/pmap-gen.c
+++ b/runtime/pmap-gen.c
@@ -234,18 +234,18 @@ static int KEYSYM(_stp_pmap_set) (PMAP pmap, ALLKEYSD(key), VSTYPE val)
  {
         int res;
         MAP m = _stp_pmap_get_map (pmap, MAP_GET_CPU());
-       res = KEYSYM(__stp_map_set) (m, ALLKEYS(key), val, 0);
+       res = KEYSYM(__stp_map_set) (m, ALLKEYS(key), val, 0, 1, 1, 1, 1, 1);
          MAP_PUT_CPU();
         return res;
  }
  
-static int KEYSYM(_stp_pmap_add) (PMAP pmap, ALLKEYSD(key), VSTYPE val)
+static inline int KEYSYM(_stp_pmap_add) (PMAP pmap, ALLKEYSD(key), VSTYPE val, int s1, int s2, int s3, int s4, int s5)
  {
         int res;
         MAP m = _stp_pmap_get_map (pmap, MAP_GET_CPU());
         m->bit_shift = pmap->bit_shift;
         m->stat_ops = pmap->stat_ops;
-       res = KEYSYM(__stp_map_set) (m, ALLKEYS(key), val, 1);
+       res = KEYSYM(__stp_map_set) (m, ALLKEYS(key), val, 1, s1, s2, s3, s4, s5);
          MAP_PUT_CPU();
         return res;
  }
diff --git a/runtime/stat-common.c b/runtime/stat-common.c

index e58b1c2326c2ce6067e3ad5625f228beb85deea3..764d84cc7f63761fd0eed723473c8b6e7559aff3 100644 (file)
--- a/runtime/stat-common.c
+++ b/runtime/stat-common.c
@@ -288,15 +288,13 @@ static void _stp_stat_print_histogram(Hist st, stat_data *sd)
         _stp_print_flush();
  }
  
-static void __stp_stat_add(Hist st, stat_data *sd, int64_t val)
+static inline void __stp_stat_add(Hist st, stat_data *sd, int64_t val,
+                                  int stat_op_count, int stat_op_sum, int stat_op_min,
+                                 int stat_op_max, int stat_op_variance)
  {
         int n;
         int delta = 0;
  
-       /*
-        * Below, we use Welford's online algorithm for computing variance.
-        * https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
-        */
         sd->shift = st->bit_shift;
         sd->stat_ops = st->stat_ops;
         if (sd->count == 0) {
@@ -305,20 +303,19 @@ static void __stp_stat_add(Hist st, stat_data *sd, int64_t val)
                 sd->avg_s = val << sd->shift;
                 sd->_M2 = 0;
         } else {
-               sd->count++;
-               sd->sum += val;
-               if (val > sd->max)
+               if(stat_op_count)
+                       sd->count++;
+               if(stat_op_sum)
+                       sd->sum += val;
+               if (stat_op_min && (val > sd->max))
                         sd->max = val;
-               if (val < sd->min)
+               if (stat_op_max && (val < sd->min))
                         sd->min = val;
                 /*
-                * Following is an optimization that improves performance
-                * in case @variance() isn't used with given global.
-                *
-                * Note that this doesn't affect computing of @avg(), which
-                * happens within the per-CPU aggregation functions.
+                * Below, we use Welford's online algorithm for computing variance.
+                * https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
                  */
-               if (sd->stat_ops & STAT_OP_VARIANCE) {
+               if (stat_op_variance) {
                     delta = (val << sd->shift) - sd->avg_s;
                     sd->avg_s += _stp_div64(NULL, delta, sd->count);
                     sd->_M2 += delta * ((val << sd->shift) - sd->avg_s);
diff --git a/runtime/stat.c b/runtime/stat.c

index 62ef538dc5fd829b3acbca0e3f1fd6373b26a5be..7fc8ac5253aa264672d021d304fc92e24021ba69 100644 (file)
--- a/runtime/stat.c
+++ b/runtime/stat.c
@@ -130,21 +130,32 @@ static void _stp_stat_del (Stat st)
  }
  
  /** Add to a Stat.
- * Add an int64 to a Stat.
+ * Add an int64 to a Stat, and for optimization purposes specify which
+ * statistical operators are bound to given Stat.  Set all of stat_op*
+ * to 1 if unsure.  Note that @avg() is being evaluated separately based
+ * on @sum and @count within the code directly generated by the translator.
   *
   * @param st Stat
   * @param val Value to add
+ * @param stat_op_count int
+ * @param stat_op_sum int
+ * @param stat_op_min int
+ * @param stat_op_max int
+ * @param stat_op_variance int
+ *
   */
-static void _stp_stat_add (Stat st, int64_t val)
+static inline void _stp_stat_add (Stat st, int64_t val, int stat_op_count,
+                                  int stat_op_sum, int stat_op_min,
+                                 int stat_op_max, int stat_op_variance)
  {
         stat_data *sd = _stp_stat_per_cpu_ptr (st, STAT_GET_CPU());
         STAT_LOCK(sd);
-       __stp_stat_add (&st->hist, sd, val);
+       __stp_stat_add (&st->hist, sd, val, stat_op_count, stat_op_sum,
+                       stat_op_min, stat_op_max, stat_op_variance);
         STAT_UNLOCK(sd);
         STAT_PUT_CPU();
  }
  
-
  static void _stp_stat_clear_data (Stat st, stat_data *sd)
  {
          int j;
diff --git a/tapsets.cxx b/tapsets.cxx

index 016dd3abace7cd88d453486495a11a7ddab984a6..164881f44d3fecba6876547aac9cc88f74dd3d86 100644 (file)
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -269,7 +269,8 @@ common_probe_entryfn_epilogue (systemtap_session& s,
      }
  
    s.op->newline() << "#ifdef STP_TIMING";
-  s.op->newline() << "if (likely (stat)) _stp_stat_add(stat, cycles_elapsed);";
+  // STP_TIMING requires min, max, avg (and thus count and sum), but not variance.
+  s.op->newline() << "if (likely (stat)) _stp_stat_add(stat, cycles_elapsed, 1, 1, 1, 1, 0);";
    s.op->newline() << "#endif";
  
    if (overload_processing && !s.runtime_usermode_p())
diff --git a/testsuite/systemtap.base/optim_stats.exp b/testsuite/systemtap.base/optim_stats.exp

new file mode 100644 (file)

index 0000000..e46de40
--- /dev/null
+++ b/testsuite/systemtap.base/optim_stats.exp
@@ -0,0 +1,31 @@
+# This is a test for stat run time optimizations.
+# See corresponding .stp file for details.
+
+set test "optim_stats"
+
+if {![installtest_p]} {
+    untested $test
+    return
+}
+
+for {set i 1} {$i <= 2} {incr i} {
+    foreach runtime [get_runtime_list] {
+       if {$runtime != ""} {
+           spawn stap --runtime=$runtime -g --suppress-time-limits $srcdir/$subdir/$test$i.stp
+       } else {
+           spawn stap -g --suppress-time-limits $srcdir/$subdir/$test$i.stp
+       }
+
+       expect {
+           -timeout 300
+           -re {^IGNORE[^\r\n]+\r\n} { exp_continue }
+           -re {^PASS test1[^\r\n]+\r\n} { pass "$test$i.stp subtest1 $runtime"; exp_continue }
+           -re {^PASS test2[^\r\n]+\r\n} { pass "$test$i.stp subtest2 $runtime"; exp_continue }
+           -re {^FAIL test1[^\r\n]+\r\n} { fail "$test$i.stp subtest1 $runtime"; exp_continue }
+           -re {^FAIL test2[^\r\n]+\r\n} { fail "$test$i.stp subtest2 $runtime"; exp_continue }
+           timeout {fail "$test: unexpected timeout"}
+           eof { }
+       }
+       catch {close}; catch {wait}
+    }
+}
diff --git a/testsuite/systemtap.base/optim_stats1.stp b/testsuite/systemtap.base/optim_stats1.stp

new file mode 100644 (file)

index 0000000..2144b7b
--- /dev/null
+++ b/testsuite/systemtap.base/optim_stats1.stp
@@ -0,0 +1,136 @@
+/*
+ * This is a test for stat run time optimizations.  Each stat has a list of
+ * requested statistical operators.  For instance, if a script uses stat x,
+ * and only refers to @avg(x), then the list of requested statistical operators
+ * for given stat x is @count, @sum, and @avg. The  @min(x) and @max(x) are
+ * not in the list, and thus do not need to be avaluated at the _stp_stat_add()
+ * time (iow, at the x<<<val time).  Optimization based on this makes the
+ * systemtap runtime run faster. The goal of this test is to verify that this
+ * sort of optimizations actually works in a measurable way.
+ *
+ * At the moment, the available stat operators are @count, @sum, @min, @max,
+ * @avg, and @variance.  The most computionally expensive is @variance.
+ * Detecting the variance optimization is quite simple.  Other operators are
+ * computionally cheap and thus detecting their respective optimizations is
+ * somewhat tricky on a multiuser/multitasking system, where so many irrelevant
+ * bearings are affecting our fragile measurement.  In this case we must set
+ * the treshold distinguishing between the PASS and FAIL pretty carefully.  Just
+ * slightly above the "noise".  This testcase is sentenced to be fragile by it's
+ * nature though.
+ *
+ * One of the basic assumptions for this sort of test is that if we compare stats
+ * having identical list of requested statistical operators, we should get very
+ * similar results.  It turns out, that to achieve this, we can't simply feed the
+ * values into measured stats in straightforward order. Instead, we need to baffle
+ * the optimizations under the hood by complicating the "feed" order slightly.
+ * After verifying this assumption, we can start comparing different stats.
+ *
+ * Since verifying the @variance optimization is much easirer and doesn't require
+ * so many time consuming iterations to get reasonable results, this test is
+ * divided into two parts, TEST 1, and TEST 2, where in TEST 1 we focus on the
+ * optimization for @count, @sum, @min, and @max, and then, in TEST 2, we test the
+ * @variance optimization separately. This makes the test itself run faster.
+ *
+ */
+
+@define RANDCNT %( 200000 %)
+@define RANDMAX %( 1000 %)
+@define ITERS %( 1500 %)
+
+@define feed(agg, tagg)
+%(
+    t = time()
+    foreach(k in randvals)
+       @agg <<< k
+    @tagg += time() - t
+%)
+
+global x, tx = 0, y, ty = 0
+global a, ta = 0, b, tb = 0
+global randvals[@RANDCNT]
+
+function time() { return gettimeofday_us() }
+
+probe begin
+{
+    /* TEST 1: test optimizations for @count, @sum, @min, and @max. */
+
+    for (i=0; i<@ITERS; i++)
+    {
+
+       for (j=0; j<@RANDCNT; j++)
+           randvals[j] = randint(@RANDMAX)
+
+       /* The "ordering dance" described above happens here */
+       if(i%2)
+       {
+           @feed(x, tx)
+           @feed(y, ty)
+       }
+       else
+       {
+           @feed(y, ty)
+           @feed(x, tx)
+       }
+    }
+
+    /*
+     * We need to print the stats out to avoid compiler elision.
+     * The list of stats mentioned below makes the actual difference
+     * between stats under test and is the gist of this test.  The test
+     * should show no measurable shrinkage, if the below list doesn't
+     * differ for measured stats.
+     */
+    printdln(" ", "IGNORE", @count(x))
+    printdln(" ", "IGNORE", @count(y), @sum(y), @min(y), @max(y))
+
+    /* Measured shrinkage [%] */
+    shrinkage = (ty-tx)*100/ty
+
+    /*
+     * Treshold [%] (just slightly above the "noise") The usual values were
+     * around 8% at the time of writing this test using gcc-6.2.1-1.fc26.x86_64.
+     * But deeper testing shows, that on other arches, namely on power and arm,
+     * gcc is not so good optimizing the runtime code, so here we only check
+     * for regressions.
+     */
+    treshold = 0
+
+    printf("%s test1 (%d)\n", ((shrinkage >= treshold) ? "PASS" : "FAIL"), shrinkage)
+
+
+    /* TEST 2: test optimizations for @variance. */
+
+    for (i=0; i<(@ITERS / 4); i++)
+    {
+
+       for (j=0; j<@RANDCNT; j++)
+           randvals[j] = randint(@RANDMAX)
+
+       if(i%2)
+       {
+           @feed(a, ta)
+           @feed(b, tb)
+       }
+       else
+       {
+           @feed(b, tb)
+           @feed(a, ta)
+       }
+    }
+
+    printdln(" ", "IGNORE", @count(a))
+    printdln(" ", "IGNORE", @variance(b))
+
+    shrinkage = (tb-ta)*100/tb
+
+    /*
+     * Treshold [%], for this test the usual value is around 68% at the time
+     * of writing this test.
+     */
+    treshold = 20
+
+    printf("%s test2 (%d)\n", ((shrinkage >= treshold) ? "PASS" : "FAIL"), shrinkage)
+
+    exit()
+}
diff --git a/testsuite/systemtap.base/optim_stats2.stp b/testsuite/systemtap.base/optim_stats2.stp

new file mode 100644 (file)

index 0000000..53bbc69
--- /dev/null
+++ b/testsuite/systemtap.base/optim_stats2.stp
@@ -0,0 +1,85 @@
+/*
+ * Analogy to optim_stats1.stp, but for pmaps.  See optim_stats1.stp for comments.
+ */
+
+@define RANDCNT %( 200000 %)
+@define RANDMAX %( 1000 %)
+@define ITERS %( 1500 %)
+
+@define feed(agg, tagg)
+%(
+    t = time()
+    foreach(k in randvals)
+       @agg <<< k
+    @tagg += time() - t
+%)
+
+global x, tx = 0, y, ty = 0
+global a, ta = 0, b, tb = 0
+global randvals[@RANDCNT]
+
+function time() { return gettimeofday_us() }
+
+probe begin
+{
+    /* TEST 1 */
+
+    for (i=0; i<@ITERS; i++)
+    {
+
+       for (j=0; j<@RANDCNT; j++)
+           randvals[j] = randint(@RANDMAX)
+
+       if(i%2)
+       {
+           @feed(x[1], tx)
+           @feed(y[1], ty)
+       }
+       else
+       {
+           @feed(y[1], ty)
+           @feed(x[1], tx)
+       }
+    }
+
+    printdln(" ", "IGNORE", @count(x[1]))
+    printdln(" ", "IGNORE", @count(y[1]), @sum(y[1]), @min(y[1]), @max(y[1]))
+
+    shrinkage = (ty-tx)*100/ty
+
+    treshold = 0
+
+    printf("%s test1 (%d)\n", ((shrinkage >= treshold) ? "PASS" : "FAIL"), shrinkage)
+
+
+    /* TEST 2 */
+
+    for (i=0; i<(@ITERS / 4); i++)
+    {
+
+       for (j=0; j<@RANDCNT; j++)
+           randvals[j] = randint(@RANDMAX)
+
+       if(i%2)
+       {
+           @feed(a[1], ta)
+           @feed(b[1], tb)
+       }
+       else
+       {
+           @feed(b[1], tb)
+           @feed(a[1], ta)
+       }
+    }
+
+    printdln(" ", "IGNORE", @count(a[1]))
+    printdln(" ", "IGNORE", @variance(b[1]))
+
+    shrinkage = (tb-ta)*100/tb
+
+    treshold = 20
+
+    printf("%s test2 (%d)\n", ((shrinkage >= treshold) ? "PASS" : "FAIL"), shrinkage)
+
+    exit()
+}
diff --git a/translate.cxx b/translate.cxx

index 73e0f0a2ea00814e5a530db50806bc5abef29727..f4c83322660804a0f3d2e5bcb694741e27991bd2 100644 (file)
--- a/translate.cxx
+++ b/translate.cxx
@@ -742,6 +742,17 @@ struct mapvar
      return result;
    }
  
+  string stat_op_parms() const
+  {
+    string result = "";
+    result += (sd.stat_ops & (STAT_OP_COUNT|STAT_OP_AVG|STAT_OP_VARIANCE)) ? "1, " : "0, ";
+    result += (sd.stat_ops & (STAT_OP_SUM|STAT_OP_AVG|STAT_OP_VARIANCE)) ? "1, " : "0, ";
+    result += (sd.stat_ops & STAT_OP_MIN) ? "1, " : "0, ";
+    result += (sd.stat_ops & STAT_OP_MAX) ? "1, " : "0, ";
+    result += (sd.stat_ops & STAT_OP_VARIANCE) ? "1" : "0";
+    return result;
+  }
+
    string calculate_aggregate() const
    {
      if (!is_parallel())
@@ -793,7 +804,7 @@ struct mapvar
  
      // impedance matching: empty strings -> NULL
      if (type() == pe_stats)
-      res += (call_prefix("add", indices) + ", " + val.value() + ")");
+      res += (call_prefix("add", indices) + ", " + val.value() + ", " + stat_op_parms() + ")");
      else
        throw SEMANTIC_ERROR(_("adding a value of an unsupported map type"));
  
@@ -2128,7 +2139,8 @@ c_unparser::emit_module_refresh ()
        o->newline(1) << "? ((int32_t)cycles_atend - (int32_t)cycles_atstart)";
        o->newline() << ": (~(int32_t)0) - (int32_t)cycles_atstart + (int32_t)cycles_atend + 1;";
        o->indent(-1);
-      o->newline() << "_stp_stat_add(g_refresh_timing, cycles_elapsed);";
+      // STP_TIMING requires min, max, avg (and thus count and sum), but not variance.
+      o->newline() << "_stp_stat_add(g_refresh_timing, cycles_elapsed, 1, 1, 1, 1, 0);";
        o->newline(-1) << "}";
        o->newline() << "#endif";
      }
@@ -3390,10 +3402,20 @@ c_unparser_assignment::c_assignop(tmpvar & res,
      }
    else if (op == "<<<")
      {
+      int stat_op_count = lval.sdecl().stat_ops & (STAT_OP_COUNT|STAT_OP_AVG|STAT_OP_VARIANCE);
+      int stat_op_sum = lval.sdecl().stat_ops & (STAT_OP_SUM|STAT_OP_AVG|STAT_OP_VARIANCE);
+      int stat_op_min = lval.sdecl().stat_ops & STAT_OP_MIN;
+      int stat_op_max = lval.sdecl().stat_ops & STAT_OP_MAX;
+      int stat_op_variance = lval.sdecl().stat_ops & STAT_OP_VARIANCE;
+
        assert(lval.type() == pe_stats);
        assert(rval.type() == pe_long);
        assert(res.type() == pe_long);
-      o->newline() << "_stp_stat_add (" << lval << ", " << rval << ");";
+
+      o->newline() << "_stp_stat_add (" << lval << ", " << rval << ", " <<
+                      stat_op_count << ", " <<  stat_op_sum << ", " <<
+                      stat_op_min << ", " << stat_op_max << ", " <<
+                      stat_op_variance << ");";
        res = rval;
      }
    else if (res.type() == pe_long)
author	Martin Cermak <mcermak@redhat.com>
	Wed, 5 Oct 2016 06:59:42 +0000 (08:59 +0200)
committer	Martin Cermak <mcermak@redhat.com>
	Wed, 5 Oct 2016 07:31:26 +0000 (09:31 +0200)
runtime/map-gen.c		patch \| blob \| blame \| history
runtime/map.c		patch \| blob \| blame \| history
runtime/map.h		patch \| blob \| blame \| history
runtime/pmap-gen.c		patch \| blob \| blame \| history
runtime/stat-common.c		patch \| blob \| blame \| history
runtime/stat.c		patch \| blob \| blame \| history
tapsets.cxx		patch \| blob \| blame \| history
testsuite/systemtap.base/optim_stats.exp	[new file with mode: 0644]	patch \| blob
testsuite/systemtap.base/optim_stats1.stp	[new file with mode: 0644]	patch \| blob
testsuite/systemtap.base/optim_stats2.stp	[new file with mode: 0644]	patch \| blob
translate.cxx		patch \| blob \| blame \| history