This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: proposed perf.counter enhancement


global A

probe perf.type(0).config(0).thread("z") {
  # this probe, even if it is never hit, sets up the counter "z"
}

probe process("./bench.x").function("main")
{
  printf("In %s\n",pp())
  A <<< @perf("z")
}
probe end
{
  print (@hist_log(A))
}


Add @perf support -elaborate.cxx -elaborate.h -parse.cxx -staptree.cxx -staptree.h

translate time list of perfs in a probe
-elaborate.h perf_counter_idx

Associate a perf with a task
-_stp_perf_init add task parm
-_stp_perf_read_init
-perf_counter_idx in uprobes-common.h
-perf_counters in uprobes-inode.c (runtime perf list)

Initialize a perf for a task at runtime
-stapiu_target_reg

List of perf counters encountered
-session.h

Add thread suffix to perf probe
-tapset-perfmon.cxx

Emit perf reading code (_stp_perf_read avove)
-tapsets.cxx::visit_perf_op

map of perfs for a probe
-dwarf_derived_probe

runtime list of perfs in a probe
-perf_counter_idx in uprobes-common.h




~/work/src ~/work/bld ~/work/stap/perf
diff --git a/elaborate.cxx b/elaborate.cxx
index 633e0c9..1579577 100644
--- a/elaborate.cxx
+++ b/elaborate.cxx
@@ -4472,6 +4472,14 @@ typeresolution_info::visit_entry_op (entry_op* e)
 
 
 void
+typeresolution_info::visit_perf_op (perf_op* e)
+{
+  e->type = pe_long;
+  //  throw semantic_error(_("unexpected @perf"), e->tok);
+}
+
+
+void
 typeresolution_info::visit_cast_op (cast_op* e)
 {
   // Like target_symbol, a cast_op shouldn't survive this far
diff --git a/elaborate.h b/elaborate.h
index 403f507..087d5d1 100644
--- a/elaborate.h
+++ b/elaborate.h
@@ -121,6 +121,7 @@ struct typeresolution_info: public visitor
   void visit_cast_op (cast_op* e);
   void visit_defined_op (defined_op* e);
   void visit_entry_op (entry_op* e);
+  void visit_perf_op (perf_op* e);
 };
 
 
@@ -189,6 +190,10 @@ public:
   // Location of semaphores to activate sdt probes
   Dwarf_Addr sdt_semaphore_addr;
 
+  // Index of desired perf counter, -1 if none
+  std::vector<long> perf_counter_idx;
+  // int perf_counter_idx;
+
   // index into session.probes[], set and used during translation
   unsigned session_index;
 };
diff --git a/parse.cxx b/parse.cxx
index a36f32d..a56e3a4 100644
--- a/parse.cxx
+++ b/parse.cxx
@@ -180,6 +180,7 @@ private: // nonterminals
   target_symbol *parse_target_symbol (const token* t);
   expression* parse_entry_op (const token* t);
   expression* parse_defined_op (const token* t);
+  expression* parse_perf_op (const token* t);
   expression* parse_expression ();
   expression* parse_assignment ();
   expression* parse_ternary ();
@@ -3325,6 +3326,9 @@ expression* parser::parse_symbol ()
       if (name == "@entry")
         return parse_entry_op (t);
 
+      if (name == "@perf")
+        return parse_perf_op (t);
+
       if (name.size() > 0 && name[0] == '@')
 	{
 	  stat_op *sop = new stat_op;
@@ -3618,6 +3622,18 @@ expression* parser::parse_entry_op (const token* t)
 }
 
 
+// Parse a @perf().  Given head token has already been consumed.
+expression* parser::parse_perf_op (const token* t)
+{
+  perf_op* pop = new perf_op;
+  pop->tok = t;
+  expect_op("(");
+  pop->operand = parse_expression ();
+  expect_op(")");
+  return pop;
+}
+
+
 
 void
 parser::parse_target_symbol_components (target_symbol* e)
diff --git a/runtime/linux/perf.c b/runtime/linux/perf.c
index 77aa75b..f5c6903 100644
--- a/runtime/linux/perf.c
+++ b/runtime/linux/perf.c
@@ -26,29 +26,49 @@
  *
  * @param stp Handle for the event to be registered.
  */
-static long _stp_perf_init (struct stap_perf_probe *stp)
+static long _stp_perf_init (struct stap_perf_probe *stp, struct task_struct* task)
 {
 	int cpu;
+	struct task_struct * perf_task;
+	
+	if (stp->attr.sample_period == 0 && task == 0)
+	  return 0;
+	if (stp->per_thread && task != 0) {
+	  if (stp->events != 0) /* already setup */
+	    return 0;
+	  else {
+	    stp->events = _stp_kmalloc(sizeof(struct perf_event*));
+	    stp->events[0] = perf_event_create_kernel_counter(&stp->attr,
+							      -1,
+							      (struct task_struct *)task,
+							      stp->callback
+#ifdef STAPCONF_PERF_COUNTER_CONTEXT
+							      , NULL
+#endif
+							      );
 
-	/* allocate space for the event descriptor for each cpu */
-	stp->events = _stp_alloc_percpu (sizeof(struct perf_event*));
-	if (stp->events == NULL) {
-		return -ENOMEM;
+	  }
 	}
+	else {
+	  /* allocate space for the event descriptor for each cpu */
+	  stp->events = _stp_alloc_percpu (sizeof(struct perf_event*));
+	  if (stp->events == NULL) {
+	    return -ENOMEM;
+	  }
 
-	/* initialize event on each processor */
-	for_each_possible_cpu(cpu) {
-		struct perf_event **event = per_cpu_ptr (stp->events, cpu);
-		if (cpu_is_offline(cpu)) {
-			*event = NULL;
-			continue;
-		}
+	  /* initialize event on each processor */
+	  for_each_possible_cpu(cpu) {
+	    struct perf_event **event = per_cpu_ptr (stp->events, cpu);
+	    if (cpu_is_offline(cpu)) {
+	      *event = NULL;
+	      continue;
+	    }
 		*event = perf_event_create_kernel_counter(&stp->attr,
 							  cpu,
 #if defined(STAPCONF_PERF_STRUCTPID) || defined (STAPCONF_PERF_COUNTER_CONTEXT)
-                                                          NULL,
+							  NULL,
 #else
-                                                          -1,
+							  -1,
 #endif
 							  stp->callback
 #ifdef STAPCONF_PERF_COUNTER_CONTEXT
@@ -57,12 +77,13 @@ static long _stp_perf_init (struct stap_perf_probe *stp)
 							  );
 
 		if (IS_ERR(*event)) {
-			long rc = PTR_ERR(*event);
-			*event = NULL;
-			_stp_perf_del(stp);
-			return rc;
+		  long rc = PTR_ERR(*event);
+		  *event = NULL;
+		  _stp_perf_del(stp);
+		  return rc;
 		}
-	}
+	  }
+	} /* if (task != 0) else */
 	return 0;
 }
 
@@ -73,18 +94,82 @@ static long _stp_perf_init (struct stap_perf_probe *stp)
  */
 static void _stp_perf_del (struct stap_perf_probe *stp)
 {
-	if (stp && stp->events) {
 		int cpu;
-		/* shut down performance event sampling */
-		for_each_possible_cpu(cpu) {
-			struct perf_event **event = per_cpu_ptr (stp->events, cpu);
-			if (*event) {
-				perf_event_release_kernel(*event);
-			}
-		}
-		_stp_free_percpu (stp->events);
-		stp->events = NULL;
+	if (! stp || !stp->events)
+	  return;
+
+	/* shut down performance event sampling */
+	if (stp->per_thread) {
+	  struct perf_event *event = stp->events[0];
+	  if (event) {
+	    perf_event_release_kernel(event);
+	  }
+	  _stp_kfree (stp->events);
+	  stp->events = NULL;
 	}
+	else {
+	  for_each_possible_cpu(cpu) {
+	    struct perf_event **event = per_cpu_ptr (stp->events, cpu);
+	    if (*event) {
+	      perf_event_release_kernel(*event);
+	    }
+	  }
+	  _stp_free_percpu (stp->events);
+	  stp->events = NULL;
+	}
+}
+
+
+/*
+The first call to _stp_perf_init, via systemtap_module_init at runtime, is for
+setting up aggregate counters.  Per thread counters need to be setup when the
+thread is known.  This is done by calling _stp_perf_init later when the thread
+is known.  A per thread perf counter is defined by a "thread("var") suffix on
+the perf probe.  It is defined by perf_builder.  This counter is read on demand 
+via the "@perf("var")" builtin which is treated as an expression right hand side
+which reads the perf counter associated with the previously defined perf
+counter.  It is expanded by dwarf_var_expanding_visitor
+*/
+
+static int _stp_perf_read_init (unsigned i, void* task)
+{
+  /* Choose the stap_perf_probes entry */
+  struct stap_perf_probe* stp = & stap_perf_probes[i];
+
+  _stp_perf_init (stp, (struct task_struct*)task);
+  return 0;
 }
 
+
+long _stp_perf_read (int ncpu, unsigned i)
+{
+  /* Choose the stap_perf_probes entry */
+  struct stap_perf_probe* stp = & stap_perf_probes[i];
+  u64 enabled, running;
+  struct perf_event *event;
+
+  if (stp == NULL)
+    {
+      _stp_printf ("perf_event_read_value: stp is null\n");
+      return 0;
+    }
+
+  if (stp->events == NULL)
+    {
+      _stp_printf ("perf_event_read_value: stp->events is null\n");
+      return 0;
+    }
+  else
+    event = stp->events[0];
+
+  if (event == NULL)
+    {
+      _stp_printf ("perf_event_read_value: event is null\n");
+      return 0;
+    }
+
+    return perf_event_read_value (event, &enabled, &running);
+}
+
+
 #endif /* _PERF_C_ */
diff --git a/runtime/linux/perf.h b/runtime/linux/perf.h
index 2d7266c..4a27e06 100644
--- a/runtime/linux/perf.h
+++ b/runtime/linux/perf.h
@@ -21,12 +21,18 @@ struct stap_perf_probe {
 	perf_overflow_handler_t callback;
 	struct stap_probe * const probe;
 
-	/* per-cpu data. allocated with _stp_alloc_percpu() */
+	/* ! per thread: per-cpu data. allocated with _stp_alloc_percpu() */
+        /*   per thread: one event allocated with kmalloc */
+        int per_thread;
 	struct perf_event **events;
 };
 
-static long _stp_perf_init (struct stap_perf_probe *stp);
+static long _stp_perf_init (struct stap_perf_probe *stp, struct task_struct* task);
 
 static void _stp_perf_del (struct stap_perf_probe *stp);
 
+// moved to runtime_defines.h
+// static int _stp_perf_read_init (unsigned i);
+// static long _stp_perf_read (int ncpu, unsigned i);
+
 #endif /* _PERF_H_ */
diff --git a/runtime/linux/uprobes-common.h b/runtime/linux/uprobes-common.h
index 8ec1856..6377dd4 100644
--- a/runtime/linux/uprobes-common.h
+++ b/runtime/linux/uprobes-common.h
@@ -26,6 +26,7 @@ struct stap_uprobe_spec {
   unsigned return_p:1;
   unsigned long address;
   unsigned long sdt_sem_offset;
+  unsigned long perf_counter_idx;
   struct stap_probe * const probe;
  };
 
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
index 9d4d867..5de2ae0 100644
--- a/runtime/linux/uprobes-inode.c
+++ b/runtime/linux/uprobes-inode.c
@@ -86,6 +86,8 @@ struct stapiu_consumer {
 
 	loff_t offset; /* the probe offset within the inode */
 	loff_t sdt_sem_offset; /* the semaphore offset from process->base */
+        long perf_counters_dim;
+        long (*perf_counters) [];
 
 	struct stap_probe * const probe;
 };
@@ -249,13 +251,18 @@ stapiu_target_unreg(struct stapiu_target *target)
 
 /* Register all uprobe consumers of a target.  */
 static int
-stapiu_target_reg(struct stapiu_target *target)
+stapiu_target_reg(struct stapiu_target *target, struct task_struct* task)
 {
 	int ret = 0;
 	struct stapiu_consumer *c;
 
 	list_for_each_entry(c, &target->consumers, target_consumer) {
 		if (! c->registered) {
+		  int i;
+		  for (i=0; i < c->perf_counters_dim; i++) {
+			  if ((*(c->perf_counters))[i] > -1)
+			    _stp_perf_read_init ((*(c->perf_counters))[i], task);
+		        }
 			ret = uprobe_register(target->inode, c->offset, &c->consumer);
 			if (ret) {
 				c->registered = 0;
@@ -388,7 +395,7 @@ stapiu_change_plus(struct stapiu_target* target, struct task_struct *task,
 
 		/* OK, we've checked the target's buildid. Now
 		 * register all its consumers. */
-		rc = stapiu_target_reg(target);
+		rc = stapiu_target_reg(target, task);
 		if (rc) {
 			/* Be sure to release the inode on failure. */
 			iput(target->inode);
diff --git a/runtime/runtime_defines.h b/runtime/runtime_defines.h
index 676cf5b..c93cb3b 100644
--- a/runtime/runtime_defines.h
+++ b/runtime/runtime_defines.h
@@ -111,3 +111,9 @@ enum stp_probe_type {
 /* netfilter probe, triggered on network trafic */
 	stp_probe_type_netfilter,
 };
+
+
+// not the right place for this, but dcl is needed before use by probe_*
+static long _stp_perf_read (int ncpu, unsigned i);
+
+static int _stp_perf_read_init (unsigned i, void* pid);
diff --git a/session.h b/session.h
index 4ba7e5c..f6c18d0 100644
--- a/session.h
+++ b/session.h
@@ -282,6 +282,7 @@ public:
   std::vector<stapfile*> files;
   std::vector<vardecl*> globals;
   std::map<std::string,functiondecl*> functions;
+  std::map<std::string,long> perf_counters;
   std::vector<derived_probe*> probes; // see also *_probes groups below
   std::vector<embeddedcode*> embeds;
   std::map<std::string, statistic_decl> stat_decls;
diff --git a/staptree.cxx b/staptree.cxx
index e8a4298..5f4e402 100644
--- a/staptree.cxx
+++ b/staptree.cxx
@@ -434,6 +434,12 @@ void entry_op::print (ostream& o) const
 }
 
 
+void perf_op::print (ostream& o) const
+{
+  o << "@perf(" << *operand << ")";
+}
+
+
 void vardecl::print (ostream& o) const
 {
   o << name;
@@ -1470,6 +1476,13 @@ entry_op::visit (visitor* u)
 
 
 void
+perf_op::visit (visitor* u)
+{
+  u->visit_perf_op(this);
+}
+
+
+void
 arrayindex::visit (visitor* u)
 {
   u->visit_arrayindex (this);
@@ -1877,6 +1890,13 @@ traversing_visitor::visit_entry_op (entry_op* e)
 
 
 void
+traversing_visitor::visit_perf_op (perf_op* e)
+{
+  e->operand->visit (this);
+}
+
+
+void
 traversing_visitor::visit_arrayindex (arrayindex* e)
 {
   for (unsigned i=0; i<e->indexes.size(); i++)
@@ -2075,6 +2095,13 @@ varuse_collecting_visitor::visit_entry_op (entry_op *e)
 
 
 void
+varuse_collecting_visitor::visit_perf_op (perf_op *e)
+{
+  functioncall_traversing_visitor::visit_perf_op (e);
+}
+
+
+void
 varuse_collecting_visitor::visit_print_format (print_format* e)
 {
   // NB: Instead of being top-level statements, "print" and "printf"
@@ -2514,6 +2541,13 @@ throwing_visitor::visit_entry_op (entry_op* e)
 
 
 void
+throwing_visitor::visit_perf_op (perf_op* e)
+{
+  throwone (e->tok);
+}
+
+
+void
 throwing_visitor::visit_arrayindex (arrayindex* e)
 {
   throwone (e->tok);
@@ -2786,6 +2820,13 @@ update_visitor::visit_entry_op (entry_op* e)
 }
 
 void
+update_visitor::visit_perf_op (perf_op* e)
+{
+  replace (e->operand);
+  provide (e);
+}
+
+void
 update_visitor::visit_arrayindex (arrayindex* e)
 {
   replace (e->base);
@@ -3047,6 +3088,12 @@ deep_copy_visitor::visit_entry_op (entry_op* e)
 }
 
 void
+deep_copy_visitor::visit_perf_op (perf_op* e)
+{
+  update_visitor::visit_perf_op(new perf_op(*e));
+}
+
+void
 deep_copy_visitor::visit_arrayindex (arrayindex* e)
 {
   update_visitor::visit_arrayindex(new arrayindex(*e));
diff --git a/staptree.h b/staptree.h
index c2d201a..085231f 100644
--- a/staptree.h
+++ b/staptree.h
@@ -307,6 +307,14 @@ struct entry_op: public expression
 };
 
 
+struct perf_op: public expression
+{
+  expression *operand;
+  void print (std::ostream& o) const;
+  void visit (visitor* u);
+};
+
+
 struct arrayindex: public expression
 {
   std::vector<expression*> indexes;
@@ -501,6 +509,7 @@ struct vardecl: public symboldecl
   literal *init; // for global scalars only
   bool synthetic; // for probe locals only, don't init on entry
   bool wrap;
+  bool perf;	// Implicit variable for perf counter use
 };
 
 
@@ -508,7 +517,6 @@ struct vardecl_builtin: public vardecl
 {
 };
 
-
 struct statement;
 struct functiondecl: public symboldecl
 {
@@ -786,6 +794,7 @@ struct visitor
   virtual void visit_cast_op (cast_op* e) = 0;
   virtual void visit_defined_op (defined_op* e) = 0;
   virtual void visit_entry_op (entry_op* e) = 0;
+  virtual void visit_perf_op (perf_op* e) = 0;
 };
 
 
@@ -831,8 +840,9 @@ struct traversing_visitor: public visitor
   void visit_cast_op (cast_op* e);
   void visit_defined_op (defined_op* e);
   void visit_entry_op (entry_op* e);
+  void visit_perf_op (perf_op* e);
 };
-
+  
 
 // A kind of traversing visitor, which also follows function calls.
 // It uses an internal set object to prevent infinite recursion.
@@ -880,7 +890,7 @@ struct varuse_collecting_visitor: public functioncall_traversing_visitor
   void visit_cast_op (cast_op* e);
   void visit_defined_op (defined_op* e);
   void visit_entry_op (entry_op* e);
-
+  void visit_perf_op (perf_op* e);
   bool side_effect_free ();
   bool side_effect_free_wrt (const std::set<vardecl*>& vars);
 };
@@ -934,6 +944,7 @@ struct throwing_visitor: public visitor
   void visit_cast_op (cast_op* e);
   void visit_defined_op (defined_op* e);
   void visit_entry_op (entry_op* e);
+  void visit_perf_op (perf_op* e);
 };
 
 // A visitor similar to a traversing_visitor, but with the ability to rewrite
@@ -1004,6 +1015,7 @@ struct update_visitor: public visitor
   virtual void visit_cast_op (cast_op* e);
   virtual void visit_defined_op (defined_op* e);
   virtual void visit_entry_op (entry_op* e);
+  virtual void visit_perf_op (perf_op* e);
 
 private:
   std::stack<void *> targets;
@@ -1063,6 +1075,7 @@ struct deep_copy_visitor: public update_visitor
   virtual void visit_cast_op (cast_op* e);
   virtual void visit_defined_op (defined_op* e);
   virtual void visit_entry_op (entry_op* e);
+  virtual void visit_perf_op (perf_op* e);
 };
 
 #endif // STAPTREE_H
diff --git a/tapset-perfmon.cxx b/tapset-perfmon.cxx
index 53f74dc..4f9b2a7 100644
--- a/tapset-perfmon.cxx
+++ b/tapset-perfmon.cxx
@@ -28,6 +28,7 @@ static const string TOK_PERF("perf");
 static const string TOK_TYPE("type");
 static const string TOK_CONFIG("config");
 static const string TOK_SAMPLE("sample");
+static const string TOK_THREAD("thread");
 
 
 // ------------------------------------------------------------------------
@@ -41,7 +42,8 @@ struct perf_derived_probe: public derived_probe
   int64_t event_type;
   int64_t event_config;
   int64_t interval;
-  perf_derived_probe (probe* p, probe_point* l, int64_t type, int64_t config, int64_t i);
+  string counter_var;
+  perf_derived_probe (probe* p, probe_point* l, int64_t type, int64_t config, int64_t i, string v);
   virtual void join_group (systemtap_session& s);
 };
 
@@ -57,9 +59,10 @@ struct perf_derived_probe_group: public generic_dpg<perf_derived_probe>
 perf_derived_probe::perf_derived_probe (probe* p, probe_point* l,
                                         int64_t type,
                                         int64_t config,
-                                        int64_t i):
+                                        int64_t i,
+					string v):
   derived_probe (p, l, true /* .components soon rewritten */),
-  event_type (type), event_config (config), interval (i)
+  event_type (type), event_config (config), interval (i), counter_var (v)
 {
   vector<probe_point::component*>& comps = this->sole_location()->components;
   comps.clear();
@@ -67,6 +70,7 @@ perf_derived_probe::perf_derived_probe (probe* p, probe_point* l,
   comps.push_back (new probe_point::component (TOK_TYPE, new literal_number(type)));
   comps.push_back (new probe_point::component (TOK_CONFIG, new literal_number (config)));
   comps.push_back (new probe_point::component (TOK_SAMPLE, new literal_number (interval)));
+  comps.push_back (new probe_point::component (TOK_THREAD, new literal_string (counter_var)));
 }
 
 
@@ -85,7 +89,8 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s)
   if (probes.empty()) return;
 
   s.op->newline() << "/* ---- perf probes ---- */";
-  s.op->newline() << "#include \"linux/perf.c\"";
+  s.op->newline() << "#include <linux/perf_event.h>";
+  s.op->newline() << "#include \"linux/perf.h\"";
   s.op->newline();
 
   /* declarations */
@@ -119,6 +124,10 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s)
                        << "{ .sample_period=" << probes[i]->interval << "ULL }},";
       s.op->newline() << ".callback=enter_perf_probe_" << i << ", ";
       s.op->newline() << ".probe=" << common_probe_init (probes[i]) << ", ";
+      if (probes[i]->counter_var.length() == 0)
+	s.op->newline() << ".per_thread=" << "0, ";
+      else
+	s.op->newline() << ".per_thread=" << "1, ";
       s.op->newline(-1) << "},";
     }
   s.op->newline(-1) << "};";
@@ -159,6 +168,9 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s)
   s.op->newline() << "(*stp->probe->ph) (c);";
   common_probe_entryfn_epilogue (s, true);
   s.op->newline(-1) << "}";
+  s.op->newline();
+  s.op->newline() << "#include \"linux/perf.c\"";
+  s.op->newline();
 }
 
 
@@ -169,7 +181,8 @@ perf_derived_probe_group::emit_module_init (systemtap_session& s)
 
   s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {";
   s.op->newline(1) << "struct stap_perf_probe* stp = & stap_perf_probes [i];";
-  s.op->newline() << "rc = _stp_perf_init(stp);";
+  s.op->newline() << "rc = _stp_perf_init(stp, 0);";
+//  s.op->newline() << "_stp_perf_read_init(stp);";
   s.op->newline() << "if (rc) {";
   s.op->newline(1) << "probe_point = stp->probe->pp;";
   s.op->newline() << "for (j=0; j<i; j++) {";
@@ -233,11 +246,38 @@ perf_builder::build(systemtap_session & sess,
     throw semantic_error(_("invalid perf sample period ") + lex_cast(period),
                          parameters.find(TOK_SAMPLE)->second->tok);
 
+  string var;
+  get_param(parameters, TOK_THREAD, var);
+  if (var.length() > 0)
+    {
+      period = 0;		// perf_event_attr.sample_freq should be 0
+      int perf_n = 0;
+      for (unsigned i=0; i<sess.globals.size(); i++)
+	if (sess.globals[i]->name == var)
+	  throw parse_error (_("duplicate global name"));
+        else if (sess.globals[i]->perf)
+	  perf_n += 1;
+	  
+      vardecl* d = new vardecl;
+      token* tok = new token();
+      tok->type = tok_identifier;
+      tok->content = var;
+      tok->location = base->tok->location;
+      d->name = var;
+      d->tok = tok;
+      d->type = pe_long;
+      d->set_arity(0, tok);
+      d->perf = true;
+      // stap_perf_probes index
+      d->init = new literal_number (perf_n);
+      sess.globals.push_back (d);
+    }
+
   if (sess.verbose > 1)
     clog << _F("perf probe type=%" PRId64 " config=%" PRId64 " period=%" PRId64, type, config, period) << endl;
 
   finished_results.push_back
-    (new perf_derived_probe(base, location, type, config, period));
+    (new perf_derived_probe(base, location, type, config, period, var));
 }
 
 
@@ -252,6 +292,7 @@ register_tapset_perf(systemtap_session& s)
   match_node* event = perf->bind_num(TOK_TYPE)->bind_num(TOK_CONFIG);
   event->bind(builder);
   event->bind_num(TOK_SAMPLE)->bind(builder);
+  event->bind_str(TOK_THREAD)->bind(builder);
 }
 
 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
diff --git a/tapsets.cxx b/tapsets.cxx
index c697749..b3d7249 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -2237,6 +2237,7 @@ struct dwarf_var_expanding_visitor: public var_expanding_visitor
   void visit_target_symbol (target_symbol* e);
   void visit_cast_op (cast_op* e);
   void visit_entry_op (entry_op* e);
+  void visit_perf_op (perf_op* e);
 private:
   vector<Dwarf_Die>& getcuscope(target_symbol *e);
   vector<Dwarf_Die>& getscopes(target_symbol *e);
@@ -2522,6 +2523,7 @@ dwarf_pretty_print::expand ()
 
   // function pretty_print_X([pointer], [arg1, arg2, ...]) {
   //   try {
+
   //     return sprintf("{.foo=...}", (ts)->foo, ...)
   //   } catch {
   //     return "ERROR"
@@ -3793,6 +3795,67 @@ dwarf_var_expanding_visitor::visit_entry_op (entry_op *e)
   provide (repl);
 }
 
+void
+dwarf_var_expanding_visitor::visit_perf_op (perf_op *e)
+{
+  expression *repl = e;
+  printf ("XXX");
+  repl->print(cout);
+  printf ("\n");
+  token* t = new token;
+  t->location = e->tok->location;
+  t->type = tok_identifier;
+  t->content = ((literal_string*)e->operand)->value;
+
+  add_block = new block;
+
+  vardecl* c = NULL;
+  vector<vardecl*>::iterator it;
+  systemtap_session &s = this->q.sess;
+
+  // find perf_var: perf.type(N).config(N).thread("perf_var")
+  for ( it=s.globals.begin() ; it < s.globals.end(); it++ )
+    if ((*it)->name == ((literal_string*)e->operand)->value)
+      c = *it;
+  if (c == NULL)
+    throw semantic_error (_("perf.thread not defined"), t);
+
+  s.perf_counters[c->name] = ((literal_number*)c->init)->value;
+  embedded_expr *spri = new embedded_expr;
+  spri->tok = t;
+  spri->code = string("_stp_perf_read_init(" + lex_cast(((literal_number*)c->init)->value) + ")");
+
+  expr_statement* spri_s = new expr_statement;
+  spri_s->value = spri;
+  spri_s->tok = t;
+  //  ((struct block*)add_block)->statements.push_back(spri_s);
+
+  // perf_var = _stp_perf_read (which_cpu, stap_perf_probes_idx)
+  symbol* c_s = new symbol;
+  c_s->name = c->name;
+  c_s->tok = t;
+  c_s->referent = c;
+
+  embedded_expr *spr = new embedded_expr;
+  spr->tok = t;
+  spr->code = string("_stp_perf_read(smp_processor_id()," + lex_cast(((literal_number*)c->init)->value) + ")");
+
+  assignment* ceqspr = new assignment;
+  ceqspr->left = c_s;
+  ceqspr->op = "=";
+  ceqspr->right = spr;
+  ceqspr->tok = t;
+
+  expr_statement* ceqspr_s = new expr_statement;
+  ceqspr_s->value = ceqspr;
+  ceqspr_s->tok = t;
+
+  add_block->print(cout);
+  ((struct block*)add_block)->statements.push_back(ceqspr_s);
+  add_block->print(cout);
+  provide (c_s);
+}
+
 vector<Dwarf_Die>&
 dwarf_var_expanding_visitor::getcuscope(target_symbol *e)
 {
@@ -4292,6 +4355,15 @@ dwarf_derived_probe::dwarf_derived_probe(const string& funcname,
       // XXX: user-space deref's for q.has_process!
       dwarf_var_expanding_visitor v (q, scope_die, dwfl_addr);
       v.replace (this->body);
+      std::map<std::string,long>::iterator pci;
+      for (pci = q.dw.sess.perf_counters.begin();
+	   pci != q.dw.sess.perf_counters.end(); pci++)
+	{
+	  this->perf_counter_idx.push_back((*pci).second);
+	  // this->perf_counter_idx = (*pci).second;
+	  q.dw.sess.perf_counters.erase(pci);
+	}
+      // this->perf_counter_idx = ((struct derived_probe*)(q.base_probe))->perf_counter_idx; // need a way to percolate this forward
       if (!q.has_process)
         access_vars = v.visited;
 
@@ -7405,6 +7477,10 @@ uprobe_derived_probe_group::emit_module_utrace_decls (systemtap_session& s)
         s.op->line() << " .sdt_sem_offset=(unsigned long)0x"
                      << hex << p->sdt_semaphore_addr << dec << "ULL,";
 
+      s.op->line() << " .perf_counter_idx[]={"
+	           << "(unsigned long)"
+		   << p->perf_counter_idx[0]
+		   << "},";
       if (p->has_return)
         s.op->line() << " .return_p=1,";
       s.op->line() << " },";
@@ -7664,6 +7740,22 @@ uprobe_derived_probe_group::emit_module_inode_decls (systemtap_session& s)
   s.op->assert_0_indent();
 
   // Declare the actual probes.
+  s.op->newline() << "long perf_counters[] = {";
+  unsigned pci;
+  for (pci=0; pci<probes.size(); pci++)
+    {
+      // this->perf_counter_idx.push_back((*pci).second);
+      uprobe_derived_probe *p = probes[pci];
+      std::vector<long>::iterator pcii;
+      for (pcii = p->perf_counter_idx.begin();
+	   pcii != p->perf_counter_idx.end(); pcii++)
+	{
+	  if (*pcii >= 0)
+	    s.op->line() << *pcii << ", ";
+	}
+    }
+  s.op->newline() << "};";
+
   s.op->newline() << "static struct stapiu_consumer "
                   << "stap_inode_uprobe_consumers[] = {";
   s.op->indent(1);
@@ -7678,6 +7770,8 @@ uprobe_derived_probe_group::emit_module_inode_decls (systemtap_session& s)
       if (p->sdt_semaphore_addr)
         s.op->line() << " .sdt_sem_offset=(loff_t)0x"
                      << hex << p->sdt_semaphore_addr << dec << "ULL,";
+      s.op->line() << " .perf_counters_dim=" << pci << ",";
+      s.op->line() << " .perf_counters=&perf_counters,";
       s.op->line() << " .probe=" << common_probe_init (p) << ",";
       s.op->line() << " },";
     }
diff --git a/testsuite/buildok/systemtap_privilege.stp b/testsuite/buildok/systemtap_privilege.stp
old mode 100755
new mode 100644
diff --git a/testsuite/semok/entry04.stp b/testsuite/semok/entry04.stp
old mode 100755
new mode 100644
diff --git a/testsuite/semok/pretty-uprobes.stp b/testsuite/semok/pretty-uprobes.stp
old mode 100755
new mode 100644
diff --git a/testsuite/semok/thirtysix-utrace.stp b/testsuite/semok/thirtysix-utrace.stp
old mode 100755
new mode 100644
diff --git a/translate.cxx b/translate.cxx
index 0bf50ed..47bdf50 100644
--- a/translate.cxx
+++ b/translate.cxx
@@ -196,6 +196,7 @@ struct c_unparser: public unparser, public visitor
   void visit_cast_op (cast_op* e);
   void visit_defined_op (defined_op* e);
   void visit_entry_op (entry_op* e);
+  void visit_perf_op (perf_op* e);
 };
 
 // A shadow visitor, meant to generate temporary variable declarations
@@ -4327,6 +4328,13 @@ c_unparser::visit_entry_op (entry_op* e)
 
 
 void
+c_unparser::visit_perf_op (perf_op* e)
+{
+  //  throw semantic_error(_("cannot translate general @perf expression"), e->tok);
+}
+
+
+void
 c_tmpcounter::load_map_indices(arrayindex *e)
 {
   symbol *array;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]