This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Optimize varuse_collecting_visitor::visit_embeddedcode by bool flags.


The varuse_collecting_visitor::visit_embeddedcode() method currently
spends a lot of CPU time on the string comparison operations for the
memo_tagged_p() function. It is very common for the same embedded code
to be used for many times in the same stap script.

Now we cache the computation results via boolean flags in the
embedded_expr and embeddedcode objects. And the memo_tagged_p() function
overhead has reduced dramatically.

The stap analyzing time (phase 2) for one of our largest stap scripts
(lgcpath) now reduces by 93% (from 23s to 1.6s). Yay!

Just for the record, we sampled the *inversed* versions of the on-CPU
flame graphs against the stap process both before and after this
optimization:

Before:

http://openresty.org/misc/flamegraph/old-inv-stap-varuse-embeddedcode.svg

After:

http://openresty.org/misc/flamegraph/new-inv-stap-varuse-embeddedcode.svg

The flame graphs were sampled by stap as well. That's fun :)
---
 staptree.cxx      | 49 ++++++++++++++++++++++++++++++++++++++++++++---
 staptree.h        |  7 +++++++
 tapset-python.cxx | 12 ++++++++++++
 3 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/staptree.cxx b/staptree.cxx
index b53fa7eac..0de30616f 100644
--- a/staptree.cxx
+++ b/staptree.cxx
@@ -365,6 +365,11 @@ bool memo_tagged_p (const interned_string& haystack, const string& needle)
 
 
 
+embedded_expr::embedded_expr(): varuse_collected(false), is_pure(false)
+{
+}
+
+
 bool
 embedded_expr::tagged_p (const char *tag) const
 {
@@ -386,6 +391,12 @@ embedded_expr::tagged_p (const interned_string& tag) const
 }
 
 
+embeddedcode::embeddedcode(): varuse_collected(false), is_pure(false),
+  is_unmangled(false)
+{
+}
+
+
 bool
 embeddedcode::tagged_p (const char *tag) const
 {
@@ -2576,6 +2587,19 @@ varuse_collecting_visitor::visit_embeddedcode (embeddedcode *s)
 {
   assert (current_function); // only they get embedded code
 
+  if (s->varuse_collected)
+    {
+      if (s->is_unmangled)
+        current_function->mangle_oldstyle = true;
+
+      if (! s->is_pure)
+        embedded_seen = true;
+
+      return;
+    }
+
+  s->varuse_collected = true;
+
   /* We need to lock globals that are accessed through embedded C code */
   for (unsigned i = 0; i < session.globals.size(); i++)
     {
@@ -2603,7 +2627,10 @@ varuse_collecting_visitor::visit_embeddedcode (embeddedcode *s)
 
   // PR14524: Support old-style THIS->local syntax on per-function basis.
   if (s->tagged_p ("/* unmangled */"))
-    current_function->mangle_oldstyle = true;
+    {
+      s->is_unmangled = true;
+      current_function->mangle_oldstyle = true;
+    }
 
   // We want to elide embedded-C functions when possible.  For
   // example, each $target variable access is expanded to an
@@ -2616,7 +2643,10 @@ varuse_collecting_visitor::visit_embeddedcode (embeddedcode *s)
   // Also, explicit side-effect-free tapset functions will have this.
 
   if (s->tagged_p ("/* pure */"))
-    return;
+    {
+      s->is_pure = true;
+      return;
+    }
 
   embedded_seen = true;
 }
@@ -2626,6 +2656,16 @@ varuse_collecting_visitor::visit_embeddedcode (embeddedcode *s)
 void
 varuse_collecting_visitor::visit_embedded_expr (embedded_expr *e)
 {
+  if (e->varuse_collected)
+    {
+      if (! e->is_pure)
+        embedded_seen = true;
+
+      return;
+    }
+
+  e->varuse_collected = true;
+
   /* We need to lock globals that are accessed through embedded C code */
   for (unsigned i = 0; i < session.globals.size(); i++)
     {
@@ -2675,7 +2715,10 @@ varuse_collecting_visitor::visit_embedded_expr (embedded_expr *e)
   // Also, explicit side-effect-free tapset functions will have this.
 
   if (e->tagged_p ("/* pure */"))
-    return;
+    {
+      e->is_pure = true;
+      return;
+    }
 
   embedded_seen = true;
 }
diff --git a/staptree.h b/staptree.h
index fce3de512..81356d4e9 100644
--- a/staptree.h
+++ b/staptree.h
@@ -184,6 +184,9 @@ struct literal_number: public literal
 struct embedded_expr: public expression
 {
   interned_string code;
+  bool varuse_collected;
+  bool is_pure;
+  embedded_expr ();
   bool tagged_p (const char *tag) const;
   bool tagged_p (const std::string &tag) const;
   bool tagged_p (const interned_string& tag) const;
@@ -706,6 +709,10 @@ std::ostream& operator << (std::ostream& o, const statement& k);
 struct embeddedcode: public statement
 {
   interned_string code;
+  bool varuse_collected;
+  bool is_pure;
+  bool is_unmangled;
+  embeddedcode ();
   bool tagged_p (const char *tag) const;
   bool tagged_p (const std::string& tag) const;
   bool tagged_p (const interned_string& tag) const;
diff --git a/tapset-python.cxx b/tapset-python.cxx
index 56a0f286f..2f67e7627 100644
--- a/tapset-python.cxx
+++ b/tapset-python.cxx
@@ -189,6 +189,12 @@ python_derived_probe_group::enroll (python_derived_probe* p)
 	  python2_embedded = new embeddedcode;
 	  s.embeds.push_back(python2_embedded);
 	}
+      else
+        {
+          python2_embedded->varuse_collected = false;
+          python2_embedded->is_pure = false;
+          python2_embedded->is_unmangled = false;
+        }
 
       unsigned index = 0;
       stringstream data;
@@ -213,6 +219,12 @@ python_derived_probe_group::enroll (python_derived_probe* p)
 	  python3_embedded = new embeddedcode;
 	  s.embeds.push_back(python3_embedded);
 	}
+      else
+        {
+          python3_embedded->varuse_collected = false;
+          python3_embedded->is_pure = false;
+          python3_embedded->is_unmangled = false;
+        }
 
       unsigned index = 0;
       stringstream data;
-- 
2.17.2


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]