]> sourceware.org Git - systemtap.git/commitdiff
translator: fix derived-chain elaboration procedure for debuginfod.* probes etc.
authorFrank Ch. Eigler <fche@redhat.com>
Wed, 25 Oct 2023 19:12:52 +0000 (15:12 -0400)
committerFrank Ch. Eigler <fche@redhat.com>
Wed, 25 Oct 2023 19:30:08 +0000 (15:30 -0400)
Previous to this patch, stap -L / -p2 mode outputs for debuginfod.*
probes show relatively uninformative process("buildid").* probe
points, losing all track of what archive and especially what program
name those probes were derived from.  Now we more carefully construct
the derivation chain for debuginfod.* probes.  For probes that contain
wildcards in the .archive() or .process() names, an extra intermediate
probe is inserted into the chain, with the wildcards expanded.

stap -vv -L   now lists the entire probe /* derivation chain */.

The heuristics as to what canonical name to use rides on a probe-level
flag called "well_formed".  This is intended to bypass those probes
that have wildcards in them, so that the end-user would see the
expanded forms.  However, this flag cannot be computed locally & correctly
because wildcards can pop up in different levels of probe resolution.

Consider: debuginfod.archive("*").process("foo").function("*") When
the debuginfod builder resolves the first wildcard, the probe point is
"well formed" with respect to its wildcards, but another one still
remains down there in .function("*").  So it's not overall "well
formed", just locally.  What we need is something like a
probe-point-component level "well-formed"ness, which aggregates via
"and" overall.  Anyway, that's for later.

elaborate.cxx
main.cxx
session.cxx
staptree.cxx
tapset-debuginfod.cxx

index b6fbbc016decca102aec2adf00b810caa8f668f1..8bf9e6c0619629efe18d471ca65bc3c0c0db8b90 100644 (file)
@@ -92,30 +92,18 @@ derived_probe::printsig (ostream& o) const
   printsig_nested (o);
 }
 
+
 void
 derived_probe::printsig_nested (ostream& o) const
 {
-  // We'd like to enclose the probe derivation chain in a /* */
-  // comment delimiter.  But just printing /* base->printsig() */ is
-  // not enough, since base might itself be a derived_probe.  So we,
-  // er, "cleverly" encode our nesting state as a formatting flag for
-  // the ostream.
-  ios::fmtflags f = o.flags (ios::internal);
-  if (f & ios::internal)
-    {
-      // already nested
-      o << " <- ";
-      base->printsig (o);
-    }
-  else
-    {
-      // outermost nesting
-      o << " /* <- ";
-      base->printsig (o);
-      o << " */";
-    }
-  // restore flags
-  (void) o.flags (f);
+  vector<probe*> probes_list;
+  base->collect_derivation_chain(probes_list); // excluding this probe's own pp
+  o << " /* ";
+  for (auto i : probes_list) {
+    o << " <- ";
+    i->printsig (o);
+  }
+  o << " */ ";  
 }
 
 
index 23711cd5c5bc085810118477db738fe2acc21512..50c7e3f1e24a93532fe53e34fc60aed1abf71cb0 100644 (file)
--- a/main.cxx
+++ b/main.cxx
@@ -183,6 +183,8 @@ printscript(systemtap_session& s, ostream& o)
                       // We want to print the probe point signature (without the nested components).
                       std::ostringstream sig;
                       p->printsig_nonest(sig);
+                      if (s.verbose > 1)
+                        p->printsig_nested(sig);                        
 
                       if (s.dump_mode == systemtap_session::dump_matched_probes_vars && isatty(STDOUT_FILENO))
                         o << s.colorize(sig.str(), "source");
index 21314e2d273ed055425dbe60f4d2bc4ac66b9ce3..14f9cae5f031e6adf25d983eca4c8f38613e86bc 100644 (file)
@@ -566,6 +566,12 @@ systemtap_session::version ()
 #endif
 #ifdef HAVE_JSON_C
        << " JSON_C"
+#endif
+#ifdef HAVE_LIBDEBUGINFOD
+       << " LIBDEBUGINFOD" 
+#endif
+#ifdef METADATA_QUERY_ENABLED
+       << " METADATA"
 #endif
        << endl;
 }
index 154fd8d303e93f433722731ead36b26292b55ac2..0d4659fc333731943100259e3d56bf41643df82c 100644 (file)
@@ -96,7 +96,7 @@ probe_point::probe_point (std::vector<component*> const & comps):
 {
 }
 
-// NB: shallow-copy of compoonents & condition!
+// NB: shallow-copy of components & condition!
 probe_point::probe_point (const probe_point& pp):
   components(pp.components), optional (pp.optional), sufficient (pp.sufficient),
   well_formed (pp.well_formed), condition (pp.condition), auto_path (pp.auto_path)
@@ -4067,6 +4067,12 @@ debug_print(const statement* x)
   x->print(cout);
   cout << endl;
 }
+void
+debug_print(const probe_point* x)
+{
+  x->print(cout);
+  cout << endl;
+}
 
 
 ostream& operator << (ostream& o, const exp_type_details& d)
index f5128f273f7fa3c0423ce1c5d08a5f5a2bd4d725..d16f2d49f2feb5e1b56921ed23991d9c16407b25 100644 (file)
@@ -12,7 +12,7 @@
 #include "util.h"
 #include "fnmatch.h"
 
-#if defined(HAVE_LIBDEBUGINFOD) && defined(HAVE_JSON_C)
+#if defined(HAVE_LIBDEBUGINFOD) && defined(HAVE_JSON_C) && defined(METADATA_QUERY_ENABLED)
 
 #include <elfutils/debuginfod.h>
 #include <json-c/json.h>
@@ -25,26 +25,26 @@ static const string TOK_DEBUGINFOD("debuginfod");
 static const string TOK_ARCHIVE("archive");
 static const string TOK_PROCESS("process");
 
-void
-get_buildids(bool has_archive, string archive, string process_path, set<interned_string>& buildids){
+
+struct metadata_result { string archive; string file; };
+
+map<string,metadata_result>
+get_buildids(bool has_archive, string archive, string process_path) {
   static unique_ptr <debuginfod_client, void (*)(debuginfod_client*)>
     client (debuginfod_begin(), &debuginfod_end);
+
+  map<string,metadata_result> buildids;
   
   int metadata_fd;
-  #ifdef METADATA_QUERY_ENABLED
-    if((metadata_fd = debuginfod_find_metadata(client.get(), "glob", (char*)process_path.c_str(), NULL)) < 0)
-      throw SEMANTIC_ERROR(_("can't retrieve buildids from debuginfod"));
-  #else
-    throw SEMANTIC_ERROR(_F("can't retrieve buildids for %s from debuginfod. Metadata query is not supported", process_path.c_str()));
-  #endif
-
-  #ifdef HAVE_JSON_C
+  if((metadata_fd = debuginfod_find_metadata(client.get(), "glob", (char*)process_path.c_str(), NULL)) < 0)
+    throw SEMANTIC_ERROR(_("can't retrieve buildids from debuginfod"));
+
   vector<pid_t> debuginfod_workers;
   json_object *metadata = json_object_from_fd(metadata_fd);
   json_object *metadata_array;
+  close(metadata_fd);
   if(!metadata || !json_object_object_get_ex(metadata, "results", &metadata_array))
     throw SEMANTIC_ERROR(_("retrieved invalid buildids from debuginfod"));
-  close(metadata_fd);
   size_t n_bid = json_object_array_length(metadata_array);
 
   if(0 == n_bid)
@@ -56,43 +56,65 @@ get_buildids(bool has_archive, string archive, string process_path, set<interned
   for(size_t i = 0; i < n_bid; i++)
   {
     file_metadata = json_object_array_get_idx(metadata_array, i);
-    if(json_object_object_get_ex(file_metadata, "buildid", &json_field)) buildid = json_object_get_string(json_field);
-
-    // Query debuginfod for executable/debuginfo for executables which have yet to be seen (as recorded by buildid)
-    // Then the files will be cached for when they are needed later, when accessed via buildid
-    interned_string buildid_is = interned_string(buildid);
-    if(is_build_id(buildid) && buildids.find(buildid_is) == buildids.end() &&
-        json_object_object_get_ex(file_metadata, "type", &json_field) && 0 == strcmp(json_object_get_string(json_field), "executable"))
-    {
-      // Skip the buildid if the archive file name does not contain the requested archive string
-      if(has_archive)
-        {
-        json_object_object_get_ex(file_metadata, "archive", &json_field);
-        string path = json_object_get_string(json_field);
-        string base_filename = path.substr(path.find_last_of("/\\") + 1);
-        if(json_object_object_get_ex(file_metadata, "archive", &json_field) &&
-          0 != fnmatch(archive.c_str(), base_filename.c_str(), 0))
-          continue;
-        }
-      
-      buildids.insert(buildid_is);
 
-      debuginfod_workers.push_back(stap_spawn_piped(0, {"debuginfod-find", "executable", buildid}, NULL, &d_out, &d_err));
-      // NB: we don't really have to preload the debuginfo - a probe point may not call for dwarf
-      // NB: but if it does, we'd suffer latency by downloading one at a time, sequentially
-      debuginfod_workers.push_back(stap_spawn_piped(0, {"debuginfod-find", "debuginfo" , buildid}, NULL, &d_out, &d_err));
+    // Reject missing buildid
+    if(json_object_object_get_ex(file_metadata, "buildid", &json_field))
+      buildid = json_object_get_string(json_field);
+    else
+      continue;
+
+    // Reject sus buildids
+    if(! is_build_id(buildid))
+      continue;
+
+    // Reject duplicate buildid - they are equivalent as far as we're concerned
+    if (buildids.find(buildid) != buildids.end())
+      continue;
+    
+    // Reject non-executables
+    if (! (json_object_object_get_ex(file_metadata, "type", &json_field)
+           && 0 == strcmp(json_object_get_string(json_field), "executable")))
+      continue;
+
+    // Extract archive name (if any - might be empty).
+    string archive_fullname;
+    if (json_object_object_get_ex(file_metadata, "archive", &json_field)) {
+      archive_fullname = json_object_get_string(json_field);
+    }
+    string archive_basename = archive_fullname.substr(archive_fullname.find_last_of("/") + 1);
+
+    // Reject mismatching archive, if user specified
+    if (has_archive) {
+      // documented as basename comparison, so no FNM_PATHNAME needed        
+      if(0 != fnmatch(archive.c_str(), archive_basename.c_str(), 0))
+        continue;
     }
+
+    // Extract file name (mandatory)
+    string filename;
+    if (json_object_object_get_ex(file_metadata, "file", &json_field))
+      filename = json_object_get_string(json_field);
+    else
+      continue;
+    
+    metadata_result af; af.archive = archive_basename; af.file = filename;
+    buildids.insert(make_pair(buildid, af));
+
+    debuginfod_workers.push_back(stap_spawn_piped(0, {"debuginfod-find", "executable", buildid}, NULL, &d_out, &d_err));
+    // NB: we don't really have to preload the debuginfo - a probe point may not call for dwarf
+    // NB: but if it does, we'd suffer latency by downloading one at a time, sequentially
+    debuginfod_workers.push_back(stap_spawn_piped(0, {"debuginfod-find", "debuginfo" , buildid}, NULL, &d_out, &d_err));
   }
   json_object_put(metadata);
 
   // Make sure all the executables/debuginfo are found before continuing
   for(auto worker_pid = debuginfod_workers.begin(); worker_pid != debuginfod_workers.end(); ++worker_pid)
     if(*worker_pid > 0) stap_waitpid(0, *worker_pid);
-  #else
-    throw SEMANTIC_ERROR(_F("can't retrieve buildids for %s from debuginfod. json-c not installed", process_path.c_str()));
-  #endif
+
+  return buildids;
 }
 
+
 // ------------------------------------------------------------------------
 // debuginfod_builder derived probes
 // ------------------------------------------------------------------------
@@ -113,6 +135,7 @@ public:
   virtual string name() { return "debuginfod builder"; }
 };
 
+
 void
 debuginfod_builder::build(systemtap_session & sess, probe * base,
   probe_point * location,
@@ -129,42 +152,85 @@ debuginfod_builder::build(systemtap_session & sess, probe * base,
     throw SEMANTIC_ERROR(_("the probe must be of the form debuginfod.[.archive(\"foobar\")]process(\"foo/bar\").**{...}"));
 
   // The matching buildids from the archives/debuginfod
-  set<interned_string> buildids;
-  get_buildids(has_archive, archive, process_path, buildids);
-
-  probe *base_p = new probe(base, location);
-  probe_point *base_pp = base_p->locations[0];
-  base_p->locations.clear();  // The new probe points are created below as derivatives of base_pp
-  base_pp->components.erase(base_pp->components.begin()); // Remove the 'debuginfod'
-  if (has_archive)
-    base_pp->components.erase(base_pp->components.begin()); // Remove the 'archive' too
-
-  for(auto it = buildids.begin(); it != buildids.end(); ++it){
-    interned_string buildid = *it;
-
-    // Create a new probe point location.
-    probe_point *pp = new probe_point(*base_pp);
-
-    // The new probe point location might not have all wildcards
-    // expanded, so the new location isn't well-formed.
-    pp->well_formed = false;
-
-    // Create a new 'process' component.
-    probe_point::component* ppc
-    = new probe_point::component (TOK_PROCESS,
-          new literal_string(buildid),
-          false);
-    ppc->tok = base_pp->components[0]->tok;
-    pp->components[0] = ppc;
-    pp->optional = true; // handle it similarly to a glob
-
-    base_p->locations.push_back(pp);
-  }
+  map<string,metadata_result> buildids = get_buildids(has_archive, archive, process_path);
+
+  for(auto it = buildids.begin(); it != buildids.end(); ++it) {
+    interned_string buildid = it->first;
+    interned_string m_archive = it->second.archive;
+    interned_string m_file = it->second.file;
+
+    probe *subbase = base;
+    probe_point *subbase_pp = location;
+
+    if (sess.verbose > 3) {
+      clog << "from: ";
+      subbase->printsig(clog);
+    }
+    
+    // If the probe archive/process names were globs, interject an intermediate
+    // probe into the chain that resolves the globs.
+    if (process_path.find_first_of("*?[{") != string::npos ||
+        (has_archive && archive.find_first_of("*?[{") != string::npos)) // was some globbing performed?
+      {
+        subbase_pp = new probe_point(*location);
+        if (has_archive) {
+          assert (subbase_pp->components[1]->functor == TOK_ARCHIVE);
+          subbase_pp->components[1] = new probe_point::component(TOK_ARCHIVE,
+                                                                 new literal_string(m_archive),
+                                                                 false);
+          assert (subbase_pp->components[2]->functor == TOK_PROCESS);
+          subbase_pp->components[2] = new probe_point::component(TOK_PROCESS,
+                                                                 new literal_string(m_file),
+                                                                 false);
+        } else {
+          assert (subbase_pp->components[1]->functor == TOK_PROCESS);
+          subbase_pp->components[1] = new probe_point::component(TOK_PROCESS,
+                                                                 new literal_string(m_file),
+                                                                 false);          
+        }
+        // subbase_pp->well_formed = true; // XXX: true, unless the FUNCTION etc. stuff has *
+        subbase = new probe(base, subbase_pp);
 
-  vector<derived_probe *> results;
-  derive_probes(sess, base_p, results, false, true);
+        if (sess.verbose > 3) {
+          clog << " through: ";
+          subbase->printsig(clog);
+        }
+      }
+
+    // OK time for the new process("buildid").foo probe
+    probe_point *derived_pp = new probe_point(*subbase_pp);
+    derived_pp->components.erase(derived_pp->components.begin()); // Remove the 'debuginfod'
+    if (has_archive)
+      derived_pp->components.erase(derived_pp->components.begin()); // Remove the 'archive' too
+    assert (derived_pp->components[0]->functor == TOK_PROCESS);
+    // NB: since probe_points are shallow-copied, we must not modify a
+    // preexisting component, but create new
+    derived_pp->components[0] = new probe_point::component(TOK_PROCESS,
+                                                           new literal_string(buildid),
+                                                           false);
+    derived_pp->optional = true; // even if not from a glob, to accept wrong-arch buildids
+    // derived_pp->well_formed = true; // XXX: true, unless the FUNCTION etc. stuff has *
+    probe *derived_p = new probe(subbase, derived_pp);
+    
+    if (sess.verbose > 3) {
+      clog << " to: ";
+      derived_p->printsig(clog);
+      clog << endl;
+    }
 
-  finished_results.insert(finished_results.end(), results.begin(), results.end());
+    if (sess.verbose > 2)
+      clog << _F("resolved debuginfod archive %s file %s -> buildid %s archive %s file %s",
+                 has_archive ? archive.to_string().c_str() : "?",
+                 process_path.to_string().c_str(),
+                 buildid.to_string().c_str(),
+                 m_archive.to_string().c_str(),
+                 m_file.to_string().c_str())
+           << endl;
+    
+    vector<derived_probe *> results;
+    derive_probes(sess, derived_p, results, false, true);
+    finished_results.insert(finished_results.end(), results.begin(), results.end());
+  }
 }
 
 void
This page took 0.046164 seconds and 5 git commands to generate.