This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: BZ#2421 - removing duplicate probe handlers


On Thu, 2006-08-03 at 09:56 -0500, David Smith wrote:
> On Wed, 2006-08-02 at 21:54 -0400, Frank Ch. Eigler wrote:
> > Hi -
> > 
> > > I tried to think through this a bit, and I believe I was thinking
> > > too hard about this.  [...]  In addition, I realized that there are
> > > cases where we don't want to merge probes even if they are identical
> > > (but we could use your idea above of reusing the code bodies).
> > > [...]
> > 
> > As Josh said, merging should be a script-invisible phenomenon, and so
> > must preserve semantics.
> 
> See my reply to Josh for the long answer to this, but the short version
> is semantics should be preserved - I was wrong in my begin probe
> example.
> 
> > Merging at the probe / derived_probe level
> > *could* involve moving probe_location values from place to place.  But
> > now that I think about it more, it seems like a good place for both
> > function and probe merging is in translate.cxx, strictly within the
> > emit_{probe,function} routines.
> 
> I'll look into it.

I looked into it, and if function merging is done that late, then probes
can't be merged, since they are calling different functions.

Probe merging can be done in translate.cxx.  I've attached a patch that
demonstrates it.

Note I'm just doing string compares to compare probe bodies.  That can
be improved later if you like the basic approach.

Note that using the same test case I used before (which I've included
again) shows less space savings than doing it all in elaborate.cxx,
since the C probe auxiliary functions don't get merged using this
approach.

original stap:
  generated C file: 1612k
  generated module: 1944k

removing duplicate functions (in elaborate.cxx) only:
  generated C file: 1420k
  generated module: 1880k

removing duplicate functions and probes (both in elaborate.cxx) - the
last approach:
  generated C file:   48k
  generated module:  920k

removing duplicate functions (in elaborate.cxx) and probes (in
translate.cxx) - the current approach:
  generated C file:  976k
  generated module: 1644k

-- 
David Smith
dsmith@redhat.com
Red Hat, Inc.
http://www.redhat.com
256.217.0141 (direct)
256.837.0057 (fax)

Index: elaborate.cxx
===================================================================
RCS file: /cvs/systemtap/src/elaborate.cxx,v
retrieving revision 1.62
diff -u -p -r1.62 elaborate.cxx
--- elaborate.cxx	5 Jun 2006 19:45:56 -0000	1.62
+++ elaborate.cxx	8 Aug 2006 19:15:17 -0000
@@ -24,7 +24,7 @@ extern "C" {
 #include <vector>
 #include <algorithm>
 #include <iterator>
-
+#include <ext/hash_map>
 
 using namespace std;
 
@@ -1550,6 +1550,95 @@ void semantic_pass_opt4 (systemtap_sessi
     }
 }
 
+struct duplicate_function_remover: public functioncall_traversing_visitor
+{
+  systemtap_session& s;
+  bool& relaxed_p;
+  map<functiondecl*, functiondecl*>& duplicate_function_map;
+
+  duplicate_function_remover(systemtap_session& sess, bool& r,
+			     map<functiondecl*, functiondecl*>&dfm):
+    s(sess), relaxed_p(r), duplicate_function_map(dfm) {};
+
+  void visit_functioncall (functioncall* e);
+};
+
+void
+duplicate_function_remover::visit_functioncall (functioncall *e)
+{
+  functioncall_traversing_visitor::visit_functioncall (e);
+
+  // If the current function call reference points to a function that
+  // is a duplicate, replace it.
+  if (duplicate_function_map.count(e->referent) != 0)
+    {
+      if (s.verbose>2)
+	  clog << "Changing " << e->referent->name
+	       << " reference to "
+	       << duplicate_function_map[e->referent]->name
+	       << " reference\n";
+      e->tok = duplicate_function_map[e->referent]->tok;
+      e->function = duplicate_function_map[e->referent]->name;
+      e->referent = duplicate_function_map[e->referent];
+
+      relaxed_p = false;
+    }
+}
+
+static size_t
+hash_functiondecl (functiondecl* f)
+{
+  ostringstream s;
+  __gnu_cxx::hash<const char*> hash_func;
+
+  // Get the "name:args body" of the function in s.  We have to
+  // include the args since the function 'x1(a, b)' is different than
+  // the function 'x2(b, a)' even if the bodies of the two functions
+  // are exactly the same.
+  f->printsig(s);
+  f->body->print(s);
+
+  // printsig puts f->name + ':' on the front.  Remove this
+  // (otherwise, functions would never compare equal).
+  string str = s.str().erase(0, f->name.size() + 1);
+
+  // Actually generate the hash.
+  return hash_func(str.c_str());
+}
+
+void semantic_pass_opt5 (systemtap_session& s, bool& relaxed_p)
+{
+  // Walk through all the functions, looking for duplicates.
+  map<size_t, functiondecl*> function_hash_map;
+  map<functiondecl*, functiondecl*> duplicate_function_map;
+  for (unsigned i=0; i < s.functions.size(); i++)
+    {
+      size_t function_hash = hash_functiondecl(s.functions[i]);
+
+      if (function_hash_map.count(function_hash) == 0)
+	// This function is unique.  Remember it.
+	function_hash_map[function_hash] = s.functions[i];
+      else
+	// This function is a duplicate.
+	duplicate_function_map[s.functions[i]]
+	    = function_hash_map[function_hash];
+    }
+
+  // If we have duplicate functions, traverse down the tree, replacing
+  // the appropriate function calls.
+  // duplicate_function_remover::visit_functioncall() handles the
+  // details of replacing the function calls.  Note that we don't
+  // delete the duplicate functiondecl itself, we'll let pass 1 do
+  // that.
+  if (duplicate_function_map.size() != 0)
+    {
+      duplicate_function_remover dfr (s, relaxed_p, duplicate_function_map);
+
+      for (unsigned i=0; i < s.probes.size(); i++)
+	s.probes[i]->body->visit(&dfr);
+    }
+}
+
 
 static int
 semantic_pass_optimize (systemtap_session& s)
@@ -1571,6 +1660,7 @@ semantic_pass_optimize (systemtap_sessio
       semantic_pass_opt2 (s, relaxed_p);
       semantic_pass_opt3 (s, relaxed_p);
       semantic_pass_opt4 (s, relaxed_p);
+      semantic_pass_opt5 (s, relaxed_p);
     }
 
   if (s.probes.size() == 0)
Index: translate.cxx
===================================================================
RCS file: /cvs/systemtap/src/translate.cxx,v
retrieving revision 1.127
diff -u -p -r1.127 translate.cxx
--- translate.cxx	9 Jun 2006 09:20:03 -0000	1.127
+++ translate.cxx	8 Aug 2006 19:15:17 -0000
@@ -68,6 +68,8 @@ struct c_unparser: public unparser, publ
   unsigned tmpvar_counter;
   unsigned label_counter;
 
+  map<string, string> probe_contents;
+
   c_unparser (systemtap_session* ss):
     session (ss), o (ss->op), current_probe(0), current_function (0),
   tmpvar_counter (0), label_counter (0) {}
@@ -1270,40 +1272,67 @@ c_unparser::emit_probe (derived_probe* v
   o->newline() << "static void " << v->name << " (struct context * __restrict__ c) {";
   o->indent(1);
 
-  // initialize frame pointer
-  o->newline() << "struct " << v->name << "_locals * __restrict__ l =";
-  o->newline(1) << "& c->locals[0]." << v->name << ";";
-  o->newline(-1) << "(void) l;"; // make sure "l" is marked used
-
-  // emit all read/write locks for global variables
-  varuse_collecting_visitor vut;
-  v->body->visit (& vut);
-  emit_locks (vut);
+  // If we about to emit a probe that is exactly the same as another
+  // probe previously emitted, make the second probe just call the
+  // first one.
+  //
+  // Notice we're using the probe body itself instead of the emitted C
+  // probe body to compare probes.  We need to do this because the
+  // emitted C probe body has stuff in it like:
+  //
+  //   c->last_stmt = "identifier 'printf' at foo.stp:<line>:<column>";
+  //
+  // which would make comparisons impossible.
+  ostringstream oss;
+  v->body->print(oss);
 
-  // initialize locals
-  for (unsigned j=0; j<v->locals.size(); j++)
+  // If an identical probe has already been emitted, just call that
+  // one.
+  if (probe_contents.count(oss.str()) != 0)
     {
-      if (v->locals[j]->index_types.size() > 0) // array?
-	throw semantic_error ("array locals not supported", v->tok);
-      else if (v->locals[j]->type == pe_long)
-	o->newline() << "l->" << c_varname (v->locals[j]->name)
-		     << " = 0;";
-      else if (v->locals[j]->type == pe_string)
-	o->newline() << "l->" << c_varname (v->locals[j]->name)
-		     << "[0] = '\\0';";
-      else
-	throw semantic_error ("unsupported local variable type",
-			      v->locals[j]->tok);
+	o->newline() << probe_contents[oss.str()] << " (c);";
     }
+  // This probe is unique.  Remember it and output it.
+  else
+    {
+      probe_contents[oss.str()] = v->name;
+
+      // initialize frame pointer
+      o->newline() << "struct " << v->name << "_locals * __restrict__ l =";
+      o->newline(1) << "& c->locals[0]." << v->name << ";";
+      o->newline(-1) << "(void) l;"; // make sure "l" is marked used
+
+      // emit all read/write locks for global variables
+      varuse_collecting_visitor vut;
+      v->body->visit (& vut);
+      emit_locks (vut);
+
+      // initialize locals
+      for (unsigned j=0; j<v->locals.size(); j++)
+        {
+	  if (v->locals[j]->index_types.size() > 0) // array?
+	    throw semantic_error ("array locals not supported", v->tok);
+	  else if (v->locals[j]->type == pe_long)
+	    o->newline() << "l->" << c_varname (v->locals[j]->name)
+			 << " = 0;";
+	  else if (v->locals[j]->type == pe_string)
+	    o->newline() << "l->" << c_varname (v->locals[j]->name)
+			 << "[0] = '\\0';";
+	  else
+	    throw semantic_error ("unsupported local variable type",
+				  v->locals[j]->tok);
+        }
   
-  v->body->visit (this);
+      v->body->visit (this);
 
-  o->newline(-1) << "out:";
-  // NB: no need to uninitialize locals, except if arrays can somedays be local
-  // XXX: do this flush only if the body included a print/printf/etc. routine!
-  o->newline(1) << "_stp_print_flush();";
+      o->newline(-1) << "out:";
+      // NB: no need to uninitialize locals, except if arrays can
+      // somedays be local XXX: do this flush only if the body
+      // included a print/printf/etc. routine!
+      o->newline(1) << "_stp_print_flush();";
 
-  emit_unlocks (vut);
+      emit_unlocks (vut);
+    }
 
   o->newline(-1) << "}\n";
   

Attachment: dup_function.stp
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]