elaborate.cxx

   1 // elaboration functions
   2 // Copyright (C) 2005-2014 Red Hat Inc.
   3 // Copyright (C) 2008 Intel Corporation
   4 //
   5 // This file is part of systemtap, and is free software.  You can
   6 // redistribute it and/or modify it under the terms of the GNU General
   7 // Public License (GPL); either version 2, or (at your option) any
   8 // later version.
   9
  10 #include "config.h"
  11 #include "elaborate.h"
  12 #include "translate.h"
  13 #include "parse.h"
  14 #include "tapsets.h"
  15 #include "session.h"
  16 #include "util.h"
  17 #include "task_finder.h"
  18 #include "stapregex.h"
  19
  20 extern "C" {
  21 #include <sys/utsname.h>
  22 #include <fnmatch.h>
  23 #define __STDC_FORMAT_MACROS
  24 #include <inttypes.h>
  25 }
  26
  27 #include <algorithm>
  28 #include <fstream>
  29 #include <map>
  30 #include <cassert>
  31 #include <set>
  32 #include <vector>
  33 #include <algorithm>
  34 #include <iterator>
  35 #include <climits>
  36
  37
  38 using namespace std;
  39
  40
  41 // ------------------------------------------------------------------------
  42
  43 // Used in probe_point condition construction.  Either argument may be
  44 // NULL; if both, return NULL too.  Resulting expression is a deep
  45 // copy for symbol resolution purposes.
  46 expression* add_condition (expression* a, expression* b)
  47 {
  48   if (!a && !b) return 0;
  49   if (! a) return deep_copy_visitor::deep_copy(b);
  50   if (! b) return deep_copy_visitor::deep_copy(a);
  51   logical_and_expr la;
  52   la.op = "&&";
  53   la.left = a;
  54   la.right = b;
  55   la.tok = a->tok; // or could be b->tok
  56   return deep_copy_visitor::deep_copy(& la);
  57 }
  58
  59 // ------------------------------------------------------------------------
  60
  61
  62
  63 derived_probe::derived_probe (probe *p, probe_point *l, bool rewrite_loc):
  64   base (p), base_pp(l), group(NULL), sdt_semaphore_addr(0),
  65   session_index((unsigned)-1)
  66 {
  67   assert (p);
  68   this->tok = p->tok;
  69   this->privileged = p->privileged;
  70   this->body = deep_copy_visitor::deep_copy(p->body);
  71
  72   assert (l);
  73   // make a copy for subclasses which want to rewrite the location
  74   if (rewrite_loc)
  75     l = new probe_point(*l);
  76   this->locations.push_back (l);
  77 }
  78
  79
  80 void
  81 derived_probe::printsig (ostream& o) const
  82 {
  83   probe::printsig (o);
  84   printsig_nested (o);
  85 }
  86
  87 void
  88 derived_probe::printsig_nested (ostream& o) const
  89 {
  90   // We'd like to enclose the probe derivation chain in a /* */
  91   // comment delimiter.  But just printing /* base->printsig() */ is
  92   // not enough, since base might itself be a derived_probe.  So we,
  93   // er, "cleverly" encode our nesting state as a formatting flag for
  94   // the ostream.
  95   ios::fmtflags f = o.flags (ios::internal);
  96   if (f & ios::internal)
  97     {
  98       // already nested
  99       o << " <- ";
 100       base->printsig (o);
 101     }
 102   else
 103     {
 104       // outermost nesting
 105       o << " /* <- ";
 106       base->printsig (o);
 107       o << " */";
 108     }
 109   // restore flags
 110   (void) o.flags (f);
 111 }
 112
 113
 114 void
 115 derived_probe::collect_derivation_chain (std::vector<probe*> &probes_list) const
 116 {
 117   probes_list.push_back(const_cast<derived_probe*>(this));
 118   base->collect_derivation_chain(probes_list);
 119 }
 120
 121
 122 void
 123 derived_probe::collect_derivation_pp_chain (std::vector<probe_point*> &pp_list) const
 124 {
 125   pp_list.push_back(const_cast<probe_point*>(this->sole_location()));
 126   base->collect_derivation_pp_chain(pp_list);
 127 }
 128
 129
 130 string
 131 derived_probe::derived_locations ()
 132 {
 133   ostringstream o;
 134   vector<probe_point*> reference_point;
 135   collect_derivation_pp_chain(reference_point);
 136   if (reference_point.size() > 0)
 137     for(unsigned i=1; i<reference_point.size(); ++i)
 138       o << " from: " << reference_point[i]->str(false); // no ?,!,etc
 139   return o.str();
 140 }
 141
 142
 143 probe_point*
 144 derived_probe::sole_location () const
 145 {
 146   if (locations.size() == 0 || locations.size() > 1)
 147     throw SEMANTIC_ERROR (_N("derived_probe with no locations",
 148                              "derived_probe with too many locations",
 149                              locations.size()), this->tok);
 150   else
 151     return locations[0];
 152 }
 153
 154
 155 probe_point*
 156 derived_probe::script_location () const
 157 {
 158   // This feeds function::pn() in the tapset, which is documented as the
 159   // script-level probe point expression, *after wildcard expansion*.
 160   vector<probe_point*> chain;
 161   collect_derivation_pp_chain (chain);
 162
 163   // Go backwards until we hit the first well-formed probe point
 164   for (int i=chain.size()-1; i>=0; i--)
 165     if (chain[i]->well_formed)
 166       return chain[i];
 167
 168   // If that didn't work, just fallback to -something-.
 169   return sole_location();
 170 }
 171
 172
 173 void
 174 derived_probe::emit_privilege_assertion (translator_output* o)
 175 {
 176   // Emit code which will cause compilation to fail if it is compiled in
 177   // unprivileged mode.
 178   o->newline() << "#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \\";
 179   o->newline() << "    ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)";
 180   o->newline() << "#error Internal Error: Probe ";
 181   probe::printsig (o->line());
 182   o->line()    << " generated in --unprivileged mode";
 183   o->newline() << "#endif";
 184 }
 185
 186
 187 void
 188 derived_probe::emit_process_owner_assertion (translator_output* o)
 189 {
 190   // Emit code which will abort should the current target not belong to the
 191   // user in unprivileged mode.
 192   o->newline() << "#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \\";
 193   o->newline() << "    ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)";
 194   o->newline(1)  << "if (! is_myproc ()) {";
 195   o->newline(1)  << "snprintf(c->error_buffer, sizeof(c->error_buffer),";
 196   o->newline()   << "         \"Internal Error: Process %d does not belong to user %d in probe %s in --unprivileged mode\",";
 197   o->newline()   << "         current->tgid, _stp_uid, c->probe_point);";
 198   o->newline()   << "c->last_error = c->error_buffer;";
 199   // NB: since this check occurs before probe locking, its exit should
 200   // not be a "goto out", which would attempt unlocking.
 201   o->newline()   << "return;";
 202   o->newline(-1) << "}";
 203   o->newline(-1) << "#endif";
 204 }
 205
 206 void
 207 derived_probe::print_dupe_stamp_unprivileged(ostream& o)
 208 {
 209   o << _("unprivileged users: authorized") << endl;
 210 }
 211
 212 void
 213 derived_probe::print_dupe_stamp_unprivileged_process_owner(ostream& o)
 214 {
 215   o << _("unprivileged users: authorized for process owner") << endl;
 216 }
 217
 218 // ------------------------------------------------------------------------
 219 // Members of derived_probe_builder
 220
 221 void
 222 derived_probe_builder::build_with_suffix(systemtap_session & sess,
 223                                          probe * use,
 224                                          probe_point * location,
 225                                          std::map<std::string, literal *>
 226                                            const & parameters,
 227                                          std::vector<derived_probe *>
 228                                            & finished_results,
 229                                          std::vector<probe_point::component *>
 230                                            const & suffix) {
 231   // XXX perhaps build the probe if suffix is empty?
 232   // if (suffix.empty()) {
 233   //   build (sess, use, location, parameters, finished_results);
 234   //   return;
 235   // }
 236   throw SEMANTIC_ERROR (_("invalid suffix for probe"));
 237 }
 238
 239 bool
 240 derived_probe_builder::get_param (std::map<std::string, literal*> const & params,
 241                                   const std::string& key,
 242                                   std::string& value)
 243 {
 244   map<string, literal *>::const_iterator i = params.find (key);
 245   if (i == params.end())
 246     return false;
 247   literal_string * ls = dynamic_cast<literal_string *>(i->second);
 248   if (!ls)
 249     return false;
 250   value = ls->value;
 251   return true;
 252 }
 253
 254
 255 bool
 256 derived_probe_builder::get_param (std::map<std::string, literal*> const & params,
 257                                   const std::string& key,
 258                                   int64_t& value)
 259 {
 260   map<string, literal *>::const_iterator i = params.find (key);
 261   if (i == params.end())
 262     return false;
 263   if (i->second == NULL)
 264     return false;
 265   literal_number * ln = dynamic_cast<literal_number *>(i->second);
 266   if (!ln)
 267     return false;
 268   value = ln->value;
 269   return true;
 270 }
 271
 272
 273 bool
 274 derived_probe_builder::has_null_param (std::map<std::string, literal*> const & params,
 275                                        const std::string& key)
 276 {
 277   map<string, literal *>::const_iterator i = params.find(key);
 278   return (i != params.end() && i->second == NULL);
 279 }
 280
 281 bool
 282 derived_probe_builder::has_param (std::map<std::string, literal*> const & params,
 283                                        const std::string& key)
 284 {
 285   return (params.find(key) != params.end());
 286 }
 287
 288 // ------------------------------------------------------------------------
 289 // Members of match_key.
 290
 291 match_key::match_key(string const & n)
 292   : name(n),
 293     have_parameter(false),
 294     parameter_type(pe_unknown)
 295 {
 296 }
 297
 298 match_key::match_key(probe_point::component const & c)
 299   : name(c.functor),
 300     have_parameter(c.arg != NULL),
 301     parameter_type(c.arg ? c.arg->type : pe_unknown)
 302 {
 303 }
 304
 305 match_key &
 306 match_key::with_number()
 307 {
 308   have_parameter = true;
 309   parameter_type = pe_long;
 310   return *this;
 311 }
 312
 313 match_key &
 314 match_key::with_string()
 315 {
 316   have_parameter = true;
 317   parameter_type = pe_string;
 318   return *this;
 319 }
 320
 321 string
 322 match_key::str() const
 323 {
 324   if (have_parameter)
 325     switch (parameter_type)
 326       {
 327       case pe_string: return name + "(string)";
 328       case pe_long: return name + "(number)";
 329       default: return name + "(...)";
 330       }
 331   return name;
 332 }
 333
 334 bool
 335 match_key::operator<(match_key const & other) const
 336 {
 337   return ((name < other.name)
 338
 339           || (name == other.name
 340               && have_parameter < other.have_parameter)
 341
 342           || (name == other.name
 343               && have_parameter == other.have_parameter
 344               && parameter_type < other.parameter_type));
 345 }
 346
 347
 348 // NB: these are only used in the probe point name components, where
 349 // only "*" is permitted.
 350 //
 351 // Within module("bar"), function("foo"), process("baz") strings, real
 352 // wildcards are permitted too. See also util.h:contains_glob_chars
 353
 354 static bool
 355 isglob(string const & str)
 356 {
 357   return(str.find('*') != str.npos);
 358 }
 359
 360 static bool
 361 isdoubleglob(string const & str)
 362 {
 363   return(str.find("**") != str.npos);
 364 }
 365
 366 bool
 367 match_key::globmatch(match_key const & other) const
 368 {
 369   const char *other_str = other.name.c_str();
 370   const char *name_str = name.c_str();
 371
 372   return ((fnmatch(name_str, other_str, FNM_NOESCAPE) == 0)
 373           && have_parameter == other.have_parameter
 374           && parameter_type == other.parameter_type);
 375 }
 376
 377 // ------------------------------------------------------------------------
 378 // Members of match_node
 379 // ------------------------------------------------------------------------
 380
 381 match_node::match_node() :
 382   privilege(privilege_t (pr_stapdev | pr_stapsys))
 383 {
 384 }
 385
 386 match_node *
 387 match_node::bind(match_key const & k)
 388 {
 389   if (k.name == "*")
 390     throw SEMANTIC_ERROR(_("invalid use of wildcard probe point component"));
 391
 392   map<match_key, match_node *>::const_iterator i = sub.find(k);
 393   if (i != sub.end())
 394     return i->second;
 395   match_node * n = new match_node();
 396   sub.insert(make_pair(k, n));
 397   return n;
 398 }
 399
 400 void
 401 match_node::bind(derived_probe_builder * e)
 402 {
 403   ends.push_back (e);
 404 }
 405
 406 match_node *
 407 match_node::bind(string const & k)
 408 {
 409   return bind(match_key(k));
 410 }
 411
 412 match_node *
 413 match_node::bind_str(string const & k)
 414 {
 415   return bind(match_key(k).with_string());
 416 }
 417
 418 match_node *
 419 match_node::bind_num(string const & k)
 420 {
 421   return bind(match_key(k).with_number());
 422 }
 423
 424 match_node *
 425 match_node::bind_privilege(privilege_t p)
 426 {
 427   privilege = p;
 428   return this;
 429 }
 430
 431 void
 432 match_node::find_and_build (systemtap_session& s,
 433                             probe* p, probe_point *loc, unsigned pos,
 434                             vector<derived_probe *>& results)
 435 {
 436   assert (pos <= loc->components.size());
 437   if (pos == loc->components.size()) // matched all probe point components so far
 438     {
 439       if (ends.empty())
 440         {
 441           string alternatives;
 442           for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 443             alternatives += string(" ") + i->first.str();
 444
 445           throw SEMANTIC_ERROR (_F("probe point truncated (follow: %s)",
 446                                    alternatives.c_str()),
 447                                    loc->components.back()->tok);
 448         }
 449
 450       if (! pr_contains (privilege, s.privilege))
 451         {
 452           throw SEMANTIC_ERROR (_F("probe point is not allowed for --privilege=%s",
 453                                    pr_name (s.privilege)),
 454                                 loc->components.back()->tok);
 455         }
 456
 457       map<string, literal *> param_map;
 458       for (unsigned i=0; i<pos; i++)
 459         param_map[loc->components[i]->functor] = loc->components[i]->arg;
 460       // maybe 0
 461
 462       // Iterate over all bound builders
 463       for (unsigned k=0; k<ends.size(); k++)
 464         {
 465           derived_probe_builder *b = ends[k];
 466           b->build (s, p, loc, param_map, results);
 467         }
 468     }
 469   else if (isdoubleglob(loc->components[pos]->functor)) // ** wildcard?
 470     {
 471       unsigned int num_results = results.size();
 472
 473       // When faced with "foo**bar", we try "foo*bar" and "foo*.**bar"
 474
 475       const probe_point::component *comp = loc->components[pos];
 476       const string &functor = comp->functor;
 477       size_t glob_start = functor.find("**");
 478       size_t glob_end = functor.find_first_not_of('*', glob_start);
 479       const string prefix = functor.substr(0, glob_start);
 480       const string suffix = ((glob_end != string::npos) ?
 481                              functor.substr(glob_end) : "");
 482
 483       // Synthesize "foo*bar"
 484       probe_point *simple_pp = new probe_point(*loc);
 485       simple_pp->from_glob = true;
 486       probe_point::component *simple_comp = new probe_point::component(*comp);
 487       simple_comp->functor = prefix + "*" + suffix;
 488       simple_pp->components[pos] = simple_comp;
 489       try
 490         {
 491           find_and_build (s, p, simple_pp, pos, results);
 492         }
 493       catch (const semantic_error& e)
 494         {
 495           // Ignore semantic_errors.
 496         }
 497
 498       // Cleanup if we didn't find anything
 499       if (results.size() == num_results)
 500         {
 501           delete simple_pp;
 502           delete simple_comp;
 503         }
 504
 505       num_results = results.size();
 506
 507       // Synthesize "foo*.**bar"
 508       // NB: any component arg should attach to the latter part only
 509       probe_point *expanded_pp = new probe_point(*loc);
 510       expanded_pp->from_glob = true;
 511       probe_point::component *expanded_comp_pre = new probe_point::component(*comp);
 512       expanded_comp_pre->functor = prefix + "*";
 513       expanded_comp_pre->arg = NULL;
 514       probe_point::component *expanded_comp_post = new probe_point::component(*comp);
 515       expanded_comp_post->functor = "**" + suffix;
 516       expanded_pp->components[pos] = expanded_comp_pre;
 517       expanded_pp->components.insert(expanded_pp->components.begin() + pos + 1,
 518                                      expanded_comp_post);
 519       try
 520         {
 521           find_and_build (s, p, expanded_pp, pos, results);
 522         }
 523       catch (const semantic_error& e)
 524         {
 525           // Ignore semantic_errors.
 526         }
 527
 528       // Cleanup if we didn't find anything
 529       if (results.size() == num_results)
 530         {
 531           delete expanded_pp;
 532           delete expanded_comp_pre;
 533           delete expanded_comp_post;
 534         }
 535
 536       // Try suffix expansion only if no matches found:
 537       if (num_results == results.size())
 538         this->try_suffix_expansion (s, p, loc, pos, results);
 539
 540       if (! loc->optional && num_results == results.size())
 541         {
 542           // We didn't find any wildcard matches (since the size of
 543           // the result vector didn't change).  Throw an error.
 544           string sugs = suggest_functors(functor);
 545           throw SEMANTIC_ERROR (_F("probe point mismatch: didn't find any wildcard matches%s",
 546                                    sugs.empty() ? "" : (" (similar: " + sugs + ")").c_str()),
 547                                 comp->tok);
 548         }
 549     }
 550   else if (isglob(loc->components[pos]->functor)) // wildcard?
 551     {
 552       match_key match (* loc->components[pos]);
 553
 554       // Call find_and_build for each possible match.  Ignore errors -
 555       // unless we don't find any match.
 556       unsigned int num_results = results.size();
 557       for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 558         {
 559           const match_key& subkey = i->first;
 560           match_node* subnode = i->second;
 561
 562           assert_no_interrupts();
 563
 564           if (match.globmatch(subkey))
 565             {
 566               if (s.verbose > 2)
 567                 clog << _F("wildcard '%s' matched '%s'",
 568                            loc->components[pos]->functor.c_str(),
 569                            subkey.name.c_str()) << endl;
 570
 571               // When we have a wildcard, we need to create a copy of
 572               // the probe point.  Then we'll create a copy of the
 573               // wildcard component, and substitute the non-wildcard
 574               // functor.
 575               probe_point *non_wildcard_pp = new probe_point(*loc);
 576               non_wildcard_pp->from_glob = true;
 577               probe_point::component *non_wildcard_component
 578                 = new probe_point::component(*loc->components[pos]);
 579               non_wildcard_component->functor = subkey.name;
 580               non_wildcard_pp->components[pos] = non_wildcard_component;
 581
 582               // NB: probe conditions are not attached at the wildcard
 583               // (component/functor) level, but at the overall
 584               // probe_point level.
 585
 586               unsigned int inner_results = results.size();
 587
 588               // recurse (with the non-wildcard probe point)
 589               try
 590                 {
 591                   subnode->find_and_build (s, p, non_wildcard_pp, pos+1,
 592                                            results);
 593                 }
 594               catch (const semantic_error& e)
 595                 {
 596                   // Ignore semantic_errors while expanding wildcards.
 597                   // If we get done and nothing was expanded, the code
 598                   // following the loop will complain.
 599                 }
 600
 601               if (results.size() == inner_results)
 602                 {
 603                   // If this wildcard didn't match, cleanup.
 604                   delete non_wildcard_pp;
 605                   delete non_wildcard_component;
 606                 }
 607             }
 608         }
 609
 610       // Try suffix expansion only if no matches found:
 611       if (num_results == results.size())
 612         this->try_suffix_expansion (s, p, loc, pos, results);
 613
 614       if (! loc->optional && num_results == results.size())
 615         {
 616           // We didn't find any wildcard matches (since the size of
 617           // the result vector didn't change).  Throw an error.
 618           string sugs = suggest_functors(loc->components[pos]->functor);
 619           throw SEMANTIC_ERROR (_F("probe point mismatch: didn't find any wildcard matches%s",
 620                                    sugs.empty() ? "" : (" (similar: " + sugs + ")").c_str()),
 621                                 loc->components[pos]->tok);
 622         }
 623     }
 624   else
 625     {
 626       match_key match (* loc->components[pos]);
 627       sub_map_iterator_t i = sub.find (match);
 628
 629       if (i != sub.end()) // match found
 630         {
 631           match_node* subnode = i->second;
 632           // recurse
 633           subnode->find_and_build (s, p, loc, pos+1, results);
 634           return;
 635         }
 636
 637       unsigned int num_results = results.size();
 638       this->try_suffix_expansion (s, p, loc, pos, results);
 639
 640       // XXX: how to correctly report alternatives + position numbers
 641       // for alias suffixes?  file a separate PR to address the issue
 642       if (! loc->optional && num_results == results.size())
 643         {
 644           // We didn't find any alias suffixes (since the size of the
 645           // result vector didn't change).  Throw an error.
 646           string sugs = suggest_functors(loc->components[pos]->functor);
 647           throw SEMANTIC_ERROR (_F("probe point mismatch%s",
 648                                    sugs.empty() ? "" : (" (similar: " + sugs + ")").c_str()),
 649                                 loc->components[pos]->tok);
 650         }
 651     }
 652 }
 653
 654 string
 655 match_node::suggest_functors(string functor)
 656 {
 657   // only use prefix if globby (and prefix is non-empty)
 658   size_t glob = functor.find('*');
 659   if (glob != string::npos && glob != 0)
 660     functor.erase(glob);
 661   if (functor.empty())
 662     return "";
 663
 664   set<string> functors;
 665   for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 666     {
 667       string ftor = i->first.str();
 668       if (ftor.find('(') != string::npos)  // trim any parameter
 669         ftor.erase(ftor.find('('));
 670       functors.insert(ftor);
 671     }
 672   return levenshtein_suggest(functor, functors, 5); // print top 5
 673 }
 674
 675 void
 676 match_node::try_suffix_expansion (systemtap_session& s,
 677                                   probe *p, probe_point *loc, unsigned pos,
 678                                   vector<derived_probe *>& results)
 679 {
 680   // PR12210: match alias suffixes. If the components thus far
 681   // have been matched, but there is an additional unknown
 682   // suffix, we have a potential alias suffix on our hands. We
 683   // need to expand the preceding components as probe aliases,
 684   // reattach the suffix, and re-run derive_probes() on the
 685   // resulting expansion. This is done by the routine
 686   // build_with_suffix().
 687
 688   if (strverscmp(s.compatible.c_str(), "2.0") >= 0)
 689     {
 690       // XXX: technically, param_map isn't used here.  So don't
 691       // bother actually assembling it unless some
 692       // derived_probe_builder appears that actually takes
 693       // suffixes *and* consults parameters (currently no such
 694       // builders exist).
 695       map<string, literal *> param_map;
 696       // for (unsigned i=0; i<pos; i++)
 697       //   param_map[loc->components[i]->functor] = loc->components[i]->arg;
 698       // maybe 0
 699
 700       vector<probe_point::component *> suffix (loc->components.begin()+pos,
 701                                                loc->components.end());
 702
 703       // Multiple derived_probe_builders may be bound at a
 704       // match_node due to the possibility of multiply defined
 705       // aliases.
 706       for (unsigned k=0; k < ends.size(); k++)
 707         {
 708           derived_probe_builder *b = ends[k];
 709           try
 710             {
 711               b->build_with_suffix (s, p, loc, param_map, results, suffix);
 712             }
 713           catch (const recursive_expansion_error &e)
 714             {
 715               // Re-throw:
 716               throw semantic_error(e);
 717             }
 718           catch (const semantic_error &e)
 719             {
 720               // Adjust source coordinate and re-throw:
 721               if (! loc->optional)
 722                 throw semantic_error(e.errsrc, e.what(), loc->components[pos]->tok);
 723             }
 724         }
 725     }
 726 }
 727
 728
 729 void
 730 match_node::build_no_more (systemtap_session& s)
 731 {
 732   for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 733     i->second->build_no_more (s);
 734   for (unsigned k=0; k<ends.size(); k++)
 735     {
 736       derived_probe_builder *b = ends[k];
 737       b->build_no_more (s);
 738     }
 739 }
 740
 741 void
 742 match_node::dump (systemtap_session &s, const string &name)
 743 {
 744   // Dump this node, if it is complete.
 745   for (unsigned k=0; k<ends.size(); k++)
 746     {
 747       // Don't print aliases at all (for now) until we can figure out how to determine whether
 748       // the probes they resolve to are ok in unprivileged mode.
 749       if (ends[k]->is_alias ())
 750         continue;
 751
 752       // In unprivileged mode, don't show the probes which are not allowed for unprivileged
 753       // users.
 754       if (pr_contains (privilege, s.privilege))
 755         {
 756           cout << name << endl;
 757           break; // we need only print one instance.
 758         }
 759     }
 760
 761   // Recursively dump the children of this node
 762   string dot;
 763   if (! name.empty ())
 764     dot = ".";
 765   for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 766     {
 767       i->second->dump (s, name + dot + i->first.str());
 768     }
 769 }
 770
 771
 772 // ------------------------------------------------------------------------
 773 // Alias probes
 774 // ------------------------------------------------------------------------
 775
 776 struct alias_derived_probe: public derived_probe
 777 {
 778   alias_derived_probe (probe* base, probe_point *l, const probe_alias *a,
 779                        const vector<probe_point::component *> *suffix = 0);
 780   ~alias_derived_probe();
 781
 782   void upchuck () { throw SEMANTIC_ERROR (_("inappropriate"), this->tok); }
 783
 784   // Alias probes are immediately expanded to other derived_probe
 785   // types, and are not themselves emitted or listed in
 786   // systemtap_session.probes
 787
 788   void join_group (systemtap_session&) { upchuck (); }
 789
 790   virtual const probe_alias *get_alias () const { return alias; }
 791   virtual probe_point *get_alias_loc () const { return alias_loc; }
 792   virtual probe_point *sole_location () const;
 793
 794 private:
 795   const probe_alias *alias; // Used to check for recursion
 796   probe_point *alias_loc; // Hack to recover full probe name
 797 };
 798
 799
 800 alias_derived_probe::alias_derived_probe(probe *base, probe_point *l,
 801                                          const probe_alias *a,
 802                                          const vector<probe_point::component *>
 803                                            *suffix):
 804   derived_probe (base, l), alias(a)
 805 {
 806   // XXX pretty nasty -- this was cribbed from printscript() in main.cxx
 807   assert (alias->alias_names.size() >= 1);
 808   alias_loc = new probe_point(*alias->alias_names[0]); // XXX: [0] is arbitrary; it would make just as much sense to collect all of the names
 809   alias_loc->well_formed = true;
 810   vector<probe_point::component*>::const_iterator it;
 811   for (it = suffix->begin(); it != suffix->end(); ++it)
 812     {
 813       alias_loc->components.push_back(*it);
 814       if (isglob((*it)->functor))
 815         alias_loc->well_formed = false; // needs further derivation
 816     }
 817 }
 818
 819 alias_derived_probe::~alias_derived_probe ()
 820 {
 821   delete alias_loc;
 822 }
 823
 824
 825 probe_point*
 826 alias_derived_probe::sole_location () const
 827 {
 828   return const_cast<probe_point*>(alias_loc);
 829 }
 830
 831
 832 void
 833 alias_expansion_builder::build(systemtap_session & sess,
 834                                probe * use,
 835                                probe_point * location,
 836                                std::map<std::string, literal *>
 837                                  const & parameters,
 838                                vector<derived_probe *> & finished_results)
 839 {
 840   vector<probe_point::component *> empty_suffix;
 841   build_with_suffix (sess, use, location, parameters,
 842                      finished_results, empty_suffix);
 843 }
 844
 845 void
 846 alias_expansion_builder::build_with_suffix(systemtap_session & sess,
 847                                            probe * use,
 848                                            probe_point * location,
 849                                            std::map<std::string, literal *>
 850                                              const &,
 851                                            vector<derived_probe *>
 852                                              & finished_results,
 853                                            vector<probe_point::component *>
 854                                              const & suffix)
 855 {
 856   // Don't build the alias expansion if infinite recursion is detected.
 857   if (checkForRecursiveExpansion (use)) {
 858     stringstream msg;
 859     msg << _F("recursive loop in alias expansion of %s at %s",
 860               lex_cast(*location).c_str(), lex_cast(location->components.front()->tok->location).c_str());
 861     // semantic_errors thrown here might be ignored, so we need a special class:
 862     throw recursive_expansion_error (msg.str());
 863     // XXX The point of throwing this custom error is to suppress a
 864     // cascade of "probe mismatch" messages that appear in addition to
 865     // the error. The current approach suppresses most of the error
 866     // cascade, but leaves one spurious error; in any case, the way
 867     // this particular error is reported could be improved.
 868   }
 869
 870   // We're going to build a new probe and wrap it up in an
 871   // alias_expansion_probe so that the expansion loop recognizes it as
 872   // such and re-expands its expansion.
 873
 874   alias_derived_probe * n = new alias_derived_probe (use, location /* soon overwritten */, this->alias, &suffix);
 875   n->body = new block();
 876
 877   // The new probe gets a deep copy of the location list of the alias
 878   // (with incoming condition joined) plus the suffix (if any),
 879   n->locations.clear();
 880   for (unsigned i=0; i<alias->locations.size(); i++)
 881     {
 882       probe_point *pp = new probe_point(*alias->locations[i]);
 883       // if the original pp that gave rise to the alias we're building was from
 884       // a globby probe, then inherit globbiness
 885       pp->from_glob = location->from_glob;
 886       pp->components.insert(pp->components.end(), suffix.begin(), suffix.end());
 887       pp->condition = add_condition (pp->condition, location->condition);
 888       n->locations.push_back(pp);
 889     }
 890
 891   // the token location of the alias,
 892   n->tok = location->components.front()->tok;
 893
 894   // and statements representing the concatenation of the alias'
 895   // body with the use's.
 896   //
 897   // NB: locals are *not* copied forward, from either alias or
 898   // use. The expansion should have its locals re-inferred since
 899   // there's concatenated code here and we only want one vardecl per
 900   // resulting variable.
 901
 902   if (alias->epilogue_style)
 903     n->body = new block (use->body, alias->body);
 904   else
 905     n->body = new block (alias->body, use->body);
 906
 907   unsigned old_num_results = finished_results.size();
 908   // If expanding for an alias suffix, be sure to pass on any errors
 909   // to the caller instead of printing them in derive_probes():
 910   derive_probes (sess, n, finished_results, location->optional, !suffix.empty());
 911
 912   // Check whether we resolved something. If so, put the
 913   // whole library into the queue if not already there.
 914   if (finished_results.size() > old_num_results)
 915     {
 916       stapfile *f = alias->tok->location.file;
 917       if (find (sess.files.begin(), sess.files.end(), f)
 918           == sess.files.end())
 919         sess.files.push_back (f);
 920     }
 921 }
 922
 923 bool
 924 alias_expansion_builder::checkForRecursiveExpansion (probe *use)
 925 {
 926   // Collect the derivation chain of this probe.
 927   vector<probe*>derivations;
 928   use->collect_derivation_chain (derivations);
 929
 930   // Check all probe points in the alias expansion against the currently-being-expanded probe point
 931   // of each of the probes in the derivation chain, looking for a match. This
 932   // indicates infinite recursion.
 933   // The first element of the derivation chain will be the derived_probe representing 'use', so
 934   // start the search with the second element.
 935   assert (derivations.size() > 0);
 936   assert (derivations[0] == use);
 937   for (unsigned d = 1; d < derivations.size(); ++d) {
 938     if (use->get_alias() == derivations[d]->get_alias())
 939       return true; // recursion detected
 940   }
 941   return false;
 942 }
 943
 944
 945 // ------------------------------------------------------------------------
 946 // Pattern matching
 947 // ------------------------------------------------------------------------
 948
 949 static unsigned max_recursion = 100;
 950
 951 struct
 952 recursion_guard
 953 {
 954   unsigned & i;
 955   recursion_guard(unsigned & i) : i(i)
 956     {
 957       if (i > max_recursion)
 958         throw SEMANTIC_ERROR(_("recursion limit reached"));
 959       ++i;
 960     }
 961   ~recursion_guard()
 962     {
 963       --i;
 964     }
 965 };
 966
 967 // The match-and-expand loop.
 968 void
 969 derive_probes (systemtap_session& s,
 970                probe *p, vector<derived_probe*>& dps,
 971                bool optional,
 972                bool rethrow_errors)
 973 {
 974   // We need a static to track whether the current probe is optional so that
 975   // even if we recurse into derive_probes with optional = false, errors will
 976   // still be ignored. The undo_parent_optional bool ensures we reset the
 977   // static at the same level we had it set.
 978   static bool parent_optional = false;
 979   bool undo_parent_optional = false;
 980
 981   if (optional && !parent_optional)
 982     {
 983       parent_optional = true;
 984       undo_parent_optional = true;
 985     }
 986
 987   vector <semantic_error> optional_errs;
 988
 989   for (unsigned i = 0; i < p->locations.size(); ++i)
 990     {
 991       assert_no_interrupts();
 992
 993       probe_point *loc = p->locations[i];
 994
 995       if (s.verbose > 4)
 996         clog << "derive-probes " << *loc << endl;
 997
 998       try
 999         {
1000           unsigned num_atbegin = dps.size();
1001
1002           try
1003             {
1004               s.pattern_root->find_and_build (s, p, loc, 0, dps); // <-- actual derivation!
1005             }
1006           catch (const semantic_error& e)
1007             {
1008               if (!loc->optional && !parent_optional)
1009                 throw semantic_error(e);
1010               else /* tolerate failure for optional probe */
1011                 {
1012                   // remember err, we will print it (in catch block) if any
1013                   // non-optional loc fails to resolve
1014                   semantic_error err(ERR_SRC, _("while resolving probe point"),
1015                                      loc->components[0]->tok, NULL, &e);
1016                   optional_errs.push_back(err);
1017                   continue;
1018                 }
1019             }
1020
1021           unsigned num_atend = dps.size();
1022
1023           if (! (loc->optional||parent_optional) && // something required, but
1024               num_atbegin == num_atend) // nothing new derived!
1025             throw SEMANTIC_ERROR (_("no match"));
1026
1027           if (loc->sufficient && (num_atend > num_atbegin))
1028             {
1029               if (s.verbose > 1)
1030                 {
1031                   clog << "Probe point ";
1032                   p->locations[i]->print(clog);
1033                   clog << " sufficient, skipped";
1034                   for (unsigned j = i+1; j < p->locations.size(); ++j)
1035                     {
1036                       clog << " ";
1037                       p->locations[j]->print(clog);
1038                     }
1039                   clog << endl;
1040                 }
1041               break; // we need not try to derive for any other locations
1042             }
1043         }
1044       catch (const semantic_error& e)
1045         {
1046           // The rethrow_errors parameter lets the caller decide an
1047           // alternative to printing the error. This is necessary when
1048           // calling derive_probes() recursively during expansion of
1049           // an alias with suffix -- any message printed here would
1050           // point to the alias declaration and not the invalid suffix
1051           // usage, so the caller needs to catch the error themselves
1052           // and print a more appropriate message.
1053           if (rethrow_errors)
1054             {
1055               throw semantic_error(e);
1056             }
1057           // Only output in dump mode if -vv is supplied:
1058           else if (!s.dump_mode || (s.verbose > 1))
1059             {
1060               // print this one manually first because it's more important than
1061               // the optional errs
1062               semantic_error err(ERR_SRC, _("while resolving probe point"),
1063                                  loc->components[0]->tok, NULL, &e);
1064               s.print_error(err);
1065
1066               // print optional errs accumulated while visiting other probe points
1067               for (vector<semantic_error>::const_iterator it = optional_errs.begin();
1068                    it != optional_errs.end(); ++it)
1069                 {
1070                   s.print_error(*it);
1071                 }
1072             }
1073         }
1074     }
1075
1076   if (undo_parent_optional)
1077     parent_optional = false;
1078 }
1079
1080
1081
1082 // ------------------------------------------------------------------------
1083 //
1084 // Indexable usage checks
1085 //
1086
1087 struct symbol_fetcher
1088   : public throwing_visitor
1089 {
1090   symbol *&sym;
1091
1092   symbol_fetcher (symbol *&sym): sym(sym)
1093   {}
1094
1095   void visit_symbol (symbol* e)
1096   {
1097     sym = e;
1098   }
1099
1100   void visit_arrayindex (arrayindex* e)
1101   {
1102     e->base->visit (this);
1103   }
1104
1105   void throwone (const token* t)
1106   {
1107     throw SEMANTIC_ERROR (_("Expecting symbol or array index expression"), t);
1108   }
1109 };
1110
1111 symbol *
1112 get_symbol_within_expression (expression *e)
1113 {
1114   symbol *sym = NULL;
1115   symbol_fetcher fetcher(sym);
1116   e->visit (&fetcher);
1117   return sym; // NB: may be null!
1118 }
1119
1120 static symbol *
1121 get_symbol_within_indexable (indexable *ix)
1122 {
1123   symbol *array = NULL;
1124   hist_op *hist = NULL;
1125   classify_indexable(ix, array, hist);
1126   if (array)
1127     return array;
1128   else
1129     return get_symbol_within_expression (hist->stat);
1130 }
1131
1132 struct mutated_var_collector
1133   : public traversing_visitor
1134 {
1135   set<vardecl *> * mutated_vars;
1136
1137   mutated_var_collector (set<vardecl *> * mm)
1138     : mutated_vars (mm)
1139   {}
1140
1141   void visit_assignment(assignment* e)
1142   {
1143     if (e->type == pe_stats && e->op == "<<<")
1144       {
1145         vardecl *vd = get_symbol_within_expression (e->left)->referent;
1146         if (vd)
1147           mutated_vars->insert (vd);
1148       }
1149     traversing_visitor::visit_assignment(e);
1150   }
1151
1152   void visit_arrayindex (arrayindex *e)
1153   {
1154     if (is_active_lvalue (e))
1155       {
1156         symbol *sym;
1157         if (e->base->is_symbol (sym))
1158           mutated_vars->insert (sym->referent);
1159         else
1160           throw SEMANTIC_ERROR(_("Assignment to read-only histogram bucket"), e->tok);
1161       }
1162     traversing_visitor::visit_arrayindex (e);
1163   }
1164 };
1165
1166
1167 struct no_var_mutation_during_iteration_check
1168   : public traversing_visitor
1169 {
1170   systemtap_session & session;
1171   map<functiondecl *,set<vardecl *> *> & function_mutates_vars;
1172   vector<vardecl *> vars_being_iterated;
1173
1174   no_var_mutation_during_iteration_check
1175   (systemtap_session & sess,
1176    map<functiondecl *,set<vardecl *> *> & fmv)
1177     : session(sess), function_mutates_vars (fmv)
1178   {}
1179
1180   void visit_arrayindex (arrayindex *e)
1181   {
1182     if (is_active_lvalue(e))
1183       {
1184         vardecl *vd = get_symbol_within_indexable (e->base)->referent;
1185         if (vd)
1186           {
1187             for (unsigned i = 0; i < vars_being_iterated.size(); ++i)
1188               {
1189                 vardecl *v = vars_being_iterated[i];
1190                 if (v == vd)
1191                   {
1192                     string err = _F("variable '%s' modified during 'foreach' iteration",
1193                                     v->name.c_str());
1194                     session.print_error (SEMANTIC_ERROR (err, e->tok));
1195                   }
1196               }
1197           }
1198       }
1199     traversing_visitor::visit_arrayindex (e);
1200   }
1201
1202   void visit_functioncall (functioncall* e)
1203   {
1204     map<functiondecl *,set<vardecl *> *>::const_iterator i
1205       = function_mutates_vars.find (e->referent);
1206
1207     if (i != function_mutates_vars.end())
1208       {
1209         for (unsigned j = 0; j < vars_being_iterated.size(); ++j)
1210           {
1211             vardecl *m = vars_being_iterated[j];
1212             if (i->second->find (m) != i->second->end())
1213               {
1214                 string err = _F("function call modifies var '%s' during 'foreach' iteration",
1215                                 m->name.c_str());
1216                 session.print_error (SEMANTIC_ERROR (err, e->tok));
1217               }
1218           }
1219       }
1220
1221     traversing_visitor::visit_functioncall (e);
1222   }
1223
1224   void visit_foreach_loop(foreach_loop* s)
1225   {
1226     vardecl *vd = get_symbol_within_indexable (s->base)->referent;
1227
1228     if (vd)
1229       vars_being_iterated.push_back (vd);
1230
1231     traversing_visitor::visit_foreach_loop (s);
1232
1233     if (vd)
1234       vars_being_iterated.pop_back();
1235   }
1236 };
1237
1238
1239 // ------------------------------------------------------------------------
1240
1241 struct stat_decl_collector
1242   : public traversing_visitor
1243 {
1244   systemtap_session & session;
1245
1246   stat_decl_collector(systemtap_session & sess)
1247     : session(sess)
1248   {}
1249
1250   void visit_stat_op (stat_op* e)
1251   {
1252     symbol *sym = get_symbol_within_expression (e->stat);
1253     if (session.stat_decls.find(sym->name) == session.stat_decls.end())
1254       session.stat_decls[sym->name] = statistic_decl();
1255   }
1256
1257   void visit_assignment (assignment* e)
1258   {
1259     if (e->op == "<<<")
1260       {
1261         symbol *sym = get_symbol_within_expression (e->left);
1262         if (session.stat_decls.find(sym->name) == session.stat_decls.end())
1263           session.stat_decls[sym->name] = statistic_decl();
1264       }
1265     else
1266       traversing_visitor::visit_assignment(e);
1267   }
1268
1269   void visit_hist_op (hist_op* e)
1270   {
1271     symbol *sym = get_symbol_within_expression (e->stat);
1272     statistic_decl new_stat;
1273
1274     if (e->htype == hist_linear)
1275       {
1276         new_stat.type = statistic_decl::linear;
1277         assert (e->params.size() == 3);
1278         new_stat.linear_low = e->params[0];
1279         new_stat.linear_high = e->params[1];
1280         new_stat.linear_step = e->params[2];
1281       }
1282     else
1283       {
1284         assert (e->htype == hist_log);
1285         new_stat.type = statistic_decl::logarithmic;
1286         assert (e->params.size() == 0);
1287       }
1288
1289     map<string, statistic_decl>::iterator i = session.stat_decls.find(sym->name);
1290     if (i == session.stat_decls.end())
1291       session.stat_decls[sym->name] = new_stat;
1292     else
1293       {
1294         statistic_decl & old_stat = i->second;
1295         if (!(old_stat == new_stat))
1296           {
1297             if (old_stat.type == statistic_decl::none)
1298               i->second = new_stat;
1299             else
1300               {
1301                 // FIXME: Support multiple co-declared histogram types
1302                 semantic_error se(ERR_SRC, _F("multiple histogram types declared on '%s'", sym->name.c_str()), e->tok);
1303                 session.print_error (se);
1304               }
1305           }
1306       }
1307   }
1308
1309 };
1310
1311 static int
1312 semantic_pass_stats (systemtap_session & sess)
1313 {
1314   stat_decl_collector sdc(sess);
1315
1316   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1317     it->second->body->visit (&sdc);
1318
1319   for (unsigned i = 0; i < sess.probes.size(); ++i)
1320     sess.probes[i]->body->visit (&sdc);
1321
1322   for (unsigned i = 0; i < sess.globals.size(); ++i)
1323     {
1324       vardecl *v = sess.globals[i];
1325       if (v->type == pe_stats)
1326         {
1327
1328           if (sess.stat_decls.find(v->name) == sess.stat_decls.end())
1329             {
1330               semantic_error se(ERR_SRC, _F("unable to infer statistic parameters for global '%s'", v->name.c_str()));
1331               sess.print_error (se);
1332             }
1333         }
1334     }
1335
1336   return sess.num_errors();
1337 }
1338
1339 // ------------------------------------------------------------------------
1340
1341 // Enforce variable-related invariants: no modification of
1342 // a foreach()-iterated array.
1343 static int
1344 semantic_pass_vars (systemtap_session & sess)
1345 {
1346
1347   map<functiondecl *, set<vardecl *> *> fmv;
1348   no_var_mutation_during_iteration_check chk(sess, fmv);
1349
1350   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1351     {
1352       functiondecl * fn = it->second;
1353       if (fn->body)
1354         {
1355           set<vardecl *> * m = new set<vardecl *>();
1356           mutated_var_collector mc (m);
1357           fn->body->visit (&mc);
1358           fmv[fn] = m;
1359         }
1360     }
1361
1362   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1363     {
1364       functiondecl * fn = it->second;
1365       if (fn->body) fn->body->visit (&chk);
1366     }
1367
1368   for (unsigned i = 0; i < sess.probes.size(); ++i)
1369     {
1370       if (sess.probes[i]->body)
1371         sess.probes[i]->body->visit (&chk);
1372     }
1373
1374   return sess.num_errors();
1375 }
1376
1377
1378 // ------------------------------------------------------------------------
1379
1380 // Rewrite probe condition expressions into probe bodies.  Tricky and
1381 // exciting business, this.  This:
1382 //
1383 // probe foo if (g1 || g2) { ... }
1384 // probe bar { ... g1 ++ ... }
1385 //
1386 // becomes:
1387 //
1388 // probe foo { if (! (g1 || g2)) next; ... }
1389 // probe bar { ... g1 ++ ...;
1390 //             if (g1 || g2) %{ enable_probe_foo %} else %{ disable_probe_foo %}
1391 //           }
1392 //
1393 // In other words, we perform two transformations:
1394 //    (1) Inline probe condition into its body.
1395 //    (2) For each probe that modifies a global var in use in any probe's
1396 //        condition, re-evaluate those probes' condition at the end of that
1397 //        probe's body.
1398 //
1399 // Here, we do all of (1), and half of (2): we simply collect the dependency
1400 // info between probes, which the translator will use to emit the affected
1401 // probes' condition re-evaluation. The translator will also ensure that the
1402 // conditions are evaluated using the globals' starting values prior to any
1403 // probes starting.
1404
1405 // Adds the condition expression to the front of the probe's body
1406 static void
1407 derived_probe_condition_inline (derived_probe *p)
1408 {
1409   expression* e = p->sole_location()->condition;
1410   assert(e);
1411
1412   if_statement *ifs = new if_statement ();
1413   ifs->tok = e->tok;
1414   ifs->thenblock = new next_statement ();
1415   ifs->thenblock->tok = e->tok;
1416   ifs->elseblock = NULL;
1417   unary_expression *notex = new unary_expression ();
1418   notex->op = "!";
1419   notex->tok = e->tok;
1420   notex->operand = e;
1421   ifs->condition = notex;
1422   p->body = new block (ifs, p->body);
1423 }
1424
1425 static int
1426 semantic_pass_conditions (systemtap_session & sess)
1427 {
1428   map<derived_probe*, set<vardecl*> > vars_read_in_cond;
1429   map<derived_probe*, set<vardecl*> > vars_written_in_body;
1430
1431   // do a first pass through the probes to ensure safety, inline any condition,
1432   // and collect var usage
1433   for (unsigned i = 0; i < sess.probes.size(); ++i)
1434     {
1435       derived_probe* p = sess.probes[i];
1436       expression* e = p->sole_location()->condition;
1437
1438       if (e)
1439         {
1440           varuse_collecting_visitor vcv_cond(sess);
1441           e->visit (& vcv_cond);
1442
1443           if (!vcv_cond.written.empty())
1444             sess.print_error (SEMANTIC_ERROR (_("probe condition must not "
1445                                                 "modify any variables"),
1446                                               e->tok));
1447           else if (vcv_cond.embedded_seen)
1448             sess.print_error (SEMANTIC_ERROR (_("probe condition must not "
1449                                                 "include impure embedded-C"),
1450                                               e->tok));
1451
1452           derived_probe_condition_inline(p);
1453
1454           vars_read_in_cond[p].insert(vcv_cond.read.begin(),
1455                                       vcv_cond.read.end());
1456         }
1457
1458       varuse_collecting_visitor vcv_body(sess);
1459       p->body->visit (& vcv_body);
1460
1461       vars_written_in_body[p].insert(vcv_body.written.begin(),
1462                                      vcv_body.written.end());
1463     }
1464
1465   // do a second pass to collect affected probes
1466   for (unsigned i = 0; i < sess.probes.size(); ++i)
1467     {
1468       derived_probe *p = sess.probes[i];
1469
1470       // for each variable this probe modifies...
1471       set<vardecl*>::const_iterator var;
1472       for (var  = vars_written_in_body[p].begin();
1473            var != vars_written_in_body[p].end(); ++var)
1474         {
1475           // collect probes which could be affected
1476           for (unsigned j = 0; j < sess.probes.size(); ++j)
1477             {
1478               if (vars_read_in_cond[sess.probes[j]].count(*var))
1479                 {
1480                   if (!p->probes_with_affected_conditions.count(sess.probes[j]))
1481                     {
1482                       p->probes_with_affected_conditions.insert(sess.probes[j]);
1483                       if (sess.verbose > 2)
1484                         clog << "probe " << i << " can affect condition of "
1485                                 "probe " << j << endl;
1486                     }
1487                 }
1488             }
1489         }
1490     }
1491
1492   return sess.num_errors();
1493 }
1494
1495 // ------------------------------------------------------------------------
1496
1497
1498 // Simple visitor that just goes through all embedded code blocks that
1499 // are available at the end  all the optimizations to register any
1500 // relevant pragmas or other indicators found, so that session flags can
1501 // be set that can be inspected at translation time to trigger any
1502 // necessary initialization of code needed by the embedded code functions.
1503
1504 // This is only for pragmas that don't have any other side-effect than
1505 // needing some initialization at module init time. Currently handles
1506 // /* pragma:vma */ /* pragma:unwind */ /* pragma:symbol */
1507
1508 // /* pragma:uprobes */ is handled during the typeresolution_info pass.
1509 // /* pure */, /* unprivileged */. /* myproc-unprivileged */ and /* guru */
1510 // are handled by the varuse_collecting_visitor.
1511
1512 struct embeddedcode_info: public functioncall_traversing_visitor
1513 {
1514 protected:
1515   systemtap_session& session;
1516
1517 public:
1518   embeddedcode_info (systemtap_session& s): session(s) { }
1519
1520   void visit_embeddedcode (embeddedcode* c)
1521   {
1522     if (! vma_tracker_enabled(session)
1523         && c->code.find("/* pragma:vma */") != string::npos)
1524       {
1525         if (session.verbose > 2)
1526           clog << _F("Turning on task_finder vma_tracker, pragma:vma found in %s",
1527                      current_function->name.c_str()) << endl;
1528
1529         // PR15052: stapdyn doesn't have VMA-tracking yet.
1530         if (session.runtime_usermode_p())
1531           throw SEMANTIC_ERROR(_("VMA-tracking is only supported by the kernel runtime (PR15052)"), c->tok);
1532
1533         enable_vma_tracker(session);
1534       }
1535
1536     if (! session.need_unwind
1537         && c->code.find("/* pragma:unwind */") != string::npos)
1538       {
1539         if (session.verbose > 2)
1540           clog << _F("Turning on unwind support, pragma:unwind found in %s",
1541                     current_function->name.c_str()) << endl;
1542         session.need_unwind = true;
1543       }
1544
1545     if (! session.need_symbols
1546         && c->code.find("/* pragma:symbols */") != string::npos)
1547       {
1548         if (session.verbose > 2)
1549           clog << _F("Turning on symbol data collecting, pragma:symbols found in %s",
1550                     current_function->name.c_str()) << endl;
1551         session.need_symbols = true;
1552       }
1553   }
1554 };
1555
1556 void embeddedcode_info_pass (systemtap_session& s)
1557 {
1558   embeddedcode_info eci (s);
1559   for (unsigned i=0; i<s.probes.size(); i++)
1560     s.probes[i]->body->visit (& eci);
1561 }
1562
1563 // ------------------------------------------------------------------------
1564
1565
1566 // Simple visitor that collects all the regular expressions in the
1567 // file and adds them to the session DFA table.
1568
1569 struct regex_collecting_visitor: public functioncall_traversing_visitor
1570 {
1571 protected:
1572   systemtap_session& session;
1573
1574 public:
1575   regex_collecting_visitor (systemtap_session& s): session(s) { }
1576
1577   void visit_regex_query (regex_query *q) {
1578     functioncall_traversing_visitor::visit_regex_query (q);
1579
1580     string re = q->right->value;
1581     regex_to_stapdfa (&session, re, q->right->tok);
1582   }
1583 };
1584
1585 // Go through the regex match invocations and generate corresponding DFAs.
1586 int gen_dfa_table (systemtap_session& s)
1587 {
1588   regex_collecting_visitor rcv(s);
1589
1590   for (unsigned i=0; i<s.probes.size(); i++)
1591     {
1592       try
1593         {
1594           s.probes[i]->body->visit (& rcv);
1595
1596           if (s.probes[i]->sole_location()->condition)
1597             s.probes[i]->sole_location()->condition->visit (& rcv);
1598         }
1599       catch (const semantic_error& e)
1600         {
1601           s.print_error (e);
1602         }
1603     }
1604
1605   return s.num_errors();
1606 }
1607
1608 // ------------------------------------------------------------------------
1609
1610
1611 static int semantic_pass_symbols (systemtap_session&);
1612 static int semantic_pass_optimize1 (systemtap_session&);
1613 static int semantic_pass_optimize2 (systemtap_session&);
1614 static int semantic_pass_types (systemtap_session&);
1615 static int semantic_pass_vars (systemtap_session&);
1616 static int semantic_pass_stats (systemtap_session&);
1617 static int semantic_pass_conditions (systemtap_session&);
1618
1619
1620 struct expression_build_no_more_visitor : public expression_visitor
1621 {
1622   // Clear extra details from every expression, like DWARF type info, so that
1623   // builders can safely release them in build_no_more.  From here on out,
1624   // we're back to basic types only.
1625   void visit_expression(expression *e)
1626     {
1627       e->type_details.reset();
1628     }
1629 };
1630
1631 static void
1632 build_no_more (systemtap_session& s)
1633 {
1634   expression_build_no_more_visitor v;
1635
1636   for (unsigned i=0; i<s.probes.size(); i++)
1637     s.probes[i]->body->visit(&v);
1638
1639   for (map<string,functiondecl*>::iterator it = s.functions.begin();
1640        it != s.functions.end(); it++)
1641     it->second->body->visit(&v);
1642
1643   // Inform all derived_probe builders that we're done with
1644   // all resolution, so it's time to release caches.
1645   s.pattern_root->build_no_more (s);
1646 }
1647
1648
1649
1650 // Link up symbols to their declarations.  Set the session's
1651 // files/probes/functions/globals vectors from the transitively
1652 // reached set of stapfiles in s.library_files, starting from
1653 // s.user_file.  Perform automatic tapset inclusion and probe
1654 // alias expansion.
1655 static int
1656 semantic_pass_symbols (systemtap_session& s)
1657 {
1658   symresolution_info sym (s);
1659
1660   // If we're listing functions, then we need to include all the files. Probe
1661   // aliases won't be visited/derived so all we gain are the functions, global
1662   // variables, and any real probes (e.g. begin probes). NB: type resolution for
1663   // a specific function arg may fail if it could only be determined from a
1664   // function call in one of the skipped aliases.
1665   if (s.dump_mode == systemtap_session::dump_functions)
1666     {
1667       s.files.insert(s.files.end(), s.library_files.begin(),
1668                                     s.library_files.end());
1669     }
1670   else if (!s.user_files.empty())
1671     {
1672       // Normal run: seed s.files with user_files and let it grow through the
1673       // find_* functions. NB: s.files can grow during this iteration, so
1674       // size() can return gradually increasing numbers.
1675       s.files.insert (s.files.end(), s.user_files.begin(), s.user_files.end());
1676     }
1677
1678   for (unsigned i = 0; i < s.files.size(); i++)
1679     {
1680       assert_no_interrupts();
1681       stapfile* dome = s.files[i];
1682
1683       // Pass 1: add globals and functions to systemtap-session master list,
1684       //         so the find_* functions find them
1685       //
1686       // NB: tapset global/function definitions may duplicate or conflict
1687       // with those already in s.globals/functions.  We need to deconflict
1688       // here.
1689
1690       for (unsigned i=0; i<dome->globals.size(); i++)
1691         {
1692           vardecl* g = dome->globals[i];
1693           for (unsigned j=0; j<s.globals.size(); j++)
1694             {
1695               vardecl* g2 = s.globals[j];
1696               if (g->name == g2->name)
1697                 {
1698                   s.print_error (SEMANTIC_ERROR (_("conflicting global variables"),
1699                                                  g->tok, g2->tok));
1700                 }
1701             }
1702           s.globals.push_back (g);
1703         }
1704
1705       for (unsigned i=0; i<dome->functions.size(); i++)
1706         {
1707           functiondecl* f = dome->functions[i];
1708           functiondecl* f2 = s.functions[f->name];
1709           if (f2 && f != f2)
1710             {
1711               s.print_error (SEMANTIC_ERROR (_("conflicting functions"),
1712                                              f->tok, f2->tok));
1713             }
1714           s.functions[f->name] = f;
1715         }
1716
1717       // NB: embeds don't conflict with each other
1718       for (unsigned i=0; i<dome->embeds.size(); i++)
1719         s.embeds.push_back (dome->embeds[i]);
1720
1721       // Pass 2: derive probes and resolve any further symbols in the
1722       // derived results.
1723
1724       for (unsigned i=0; i<dome->probes.size(); i++)
1725         {
1726           assert_no_interrupts();
1727           probe* p = dome->probes [i];
1728           vector<derived_probe*> dps;
1729
1730           // much magic happens here: probe alias expansion, wildcard
1731           // matching, low-level derived_probe construction.
1732           derive_probes (s, p, dps);
1733
1734           for (unsigned j=0; j<dps.size(); j++)
1735             {
1736               assert_no_interrupts();
1737               derived_probe* dp = dps[j];
1738               s.probes.push_back (dp);
1739               dp->join_group (s);
1740
1741               try
1742                 {
1743                   for (unsigned k=0; k<s.code_filters.size(); k++)
1744                     s.code_filters[k]->replace (dp->body);
1745
1746                   sym.current_function = 0;
1747                   sym.current_probe = dp;
1748                   dp->body->visit (& sym);
1749
1750                   // Process the probe-point condition expression.
1751                   sym.current_function = 0;
1752                   sym.current_probe = 0;
1753                   if (dp->sole_location()->condition)
1754                     dp->sole_location()->condition->visit (& sym);
1755                 }
1756               catch (const semantic_error& e)
1757                 {
1758                   s.print_error (e);
1759                 }
1760             }
1761         }
1762
1763       // Pass 3: process functions
1764
1765       for (unsigned i=0; i<dome->functions.size(); i++)
1766         {
1767           assert_no_interrupts();
1768           functiondecl* fd = dome->functions[i];
1769
1770           try
1771             {
1772               for (unsigned j=0; j<s.code_filters.size(); j++)
1773                 s.code_filters[j]->replace (fd->body);
1774
1775               sym.current_function = fd;
1776               sym.current_probe = 0;
1777               fd->body->visit (& sym);
1778             }
1779           catch (const semantic_error& e)
1780             {
1781               s.print_error (e);
1782             }
1783         }
1784     }
1785
1786   if(s.systemtap_v_check){
1787     for(unsigned i=0;i<s.globals.size();i++){
1788       if(s.globals[i]->systemtap_v_conditional)
1789         s.print_warning(_("This global uses tapset constructs that are dependent on systemtap version"), s.globals[i]->tok);
1790     }
1791
1792     for(map<string, functiondecl*>::const_iterator i=s.functions.begin();i != s.functions.end();++i){
1793       if(i->second->systemtap_v_conditional)
1794         s.print_warning(_("This function uses tapset constructs that are dependent on systemtap version"), i->second->tok);
1795     }
1796
1797     for(unsigned i=0;i<s.probes.size();i++){
1798       vector<probe*> sysvc;
1799       s.probes[i]->collect_derivation_chain(sysvc);
1800       for(unsigned j=0;j<sysvc.size();j++){
1801         if(sysvc[j]->systemtap_v_conditional)
1802           s.print_warning(_("This probe uses tapset constructs that are dependent on systemtap version"), sysvc[j]->tok);
1803         if(sysvc[j]->get_alias() && sysvc[j]->get_alias()->systemtap_v_conditional)
1804           s.print_warning(_("This alias uses tapset constructs that are dependent on systemtap version"), sysvc[j]->get_alias()->tok);
1805       }
1806     }
1807   }
1808
1809   return s.num_errors(); // all those print_error calls
1810 }
1811
1812
1813 // Keep unread global variables for probe end value display.
1814 void add_global_var_display (systemtap_session& s)
1815 {
1816   // Don't generate synthetic end probes when in listing mode; it would clutter
1817   // up the list of probe points with "end ...". In fact, don't bother in any
1818   // dump mode at all, since it'll never be used.
1819   if (s.dump_mode) return;
1820
1821   varuse_collecting_visitor vut(s);
1822
1823   for (unsigned i=0; i<s.probes.size(); i++)
1824     {
1825       s.probes[i]->body->visit (& vut);
1826
1827       if (s.probes[i]->sole_location()->condition)
1828         s.probes[i]->sole_location()->condition->visit (& vut);
1829     }
1830
1831   for (unsigned g=0; g < s.globals.size(); g++)
1832     {
1833       vardecl* l = s.globals[g];
1834       if ((vut.read.find (l) != vut.read.end()
1835            && vut.used.find (l) != vut.used.end())
1836           || vut.written.find (l) == vut.written.end())
1837         continue;
1838
1839       // Don't generate synthetic end probes for unread globals
1840       // declared only within tapsets. (RHBZ 468139), but rather
1841       // only within the end-user script.
1842
1843       bool tapset_global = false;
1844       for (size_t m=0; m < s.library_files.size(); m++)
1845         {
1846           for (size_t n=0; n < s.library_files[m]->globals.size(); n++)
1847             {
1848               if (l->name == s.library_files[m]->globals[n]->name)
1849                 {tapset_global = true; break;}
1850             }
1851         }
1852       if (tapset_global)
1853         continue;
1854
1855       stringstream code;
1856       code << "probe end {" << endl;
1857
1858       string format = l->name;
1859
1860       string indexes;
1861       string foreach_value;
1862       if (!l->index_types.empty())
1863         {
1864           // Add index values to the printf format, and prepare
1865           // a simple list of indexes for passing around elsewhere
1866           format += "[";
1867           for (size_t i = 0; i < l->index_types.size(); ++i)
1868             {
1869               if (i > 0)
1870                 {
1871                   indexes += ",";
1872                   format += ",";
1873                 }
1874               indexes += "__idx" + lex_cast(i);
1875               if (l->index_types[i] == pe_string)
1876                 format += "\\\"%#s\\\"";
1877               else
1878                 format += "%#d";
1879             }
1880           format += "]";
1881
1882           // Iterate over all indexes in the array, sorted by decreasing value
1883           code << "foreach (";
1884           if (l->type != pe_stats)
1885             {
1886               foreach_value = "__val";
1887               code << foreach_value << " = ";
1888             }
1889           code << "[" << indexes << "] in " << l->name << "-)" << endl;
1890         }
1891       else if (l->type == pe_stats)
1892         {
1893           // PR7053: Check scalar globals for empty aggregate
1894           code << "if (@count(" << l->name << ") == 0)" << endl;
1895           code << "printf(\"" << l->name << " @count=0x0\\n\")" << endl;
1896           code << "else" << endl;
1897         }
1898
1899       static const string stats[] = { "@count", "@min", "@max", "@sum", "@avg" };
1900       const string stats_format =
1901         (strverscmp(s.compatible.c_str(), "1.4") >= 0) ? "%#d" : "%#x";
1902
1903       // Fill in the printf format for values
1904       if (l->type == pe_stats)
1905         for (size_t i = 0; i < sizeof(stats)/sizeof(stats[0]); ++i)
1906           format += " " + stats[i] + "=" + stats_format;
1907       else if (l->type == pe_string)
1908         format += "=\\\"%#s\\\"";
1909       else
1910         format += "=%#x";
1911       format += "\\n";
1912
1913       // Output the actual printf
1914       code << "printf (\"" << format << "\"";
1915
1916       // Feed indexes to the printf, and include them in the value
1917       string value = !foreach_value.empty() ? foreach_value : l->name;
1918       if (!l->index_types.empty())
1919         {
1920           code << "," << indexes;
1921           if (foreach_value.empty())
1922             value += "[" + indexes + "]";
1923         }
1924
1925       // Feed the actual values to the printf
1926       if (l->type == pe_stats)
1927         for (size_t i = 0; i < sizeof(stats)/sizeof(stats[0]); ++i)
1928           code << "," << stats[i] << "(" << value << ")";
1929       else
1930         code << "," << value;
1931       code << ")" << endl;
1932
1933       // End of probe
1934       code << "}" << endl;
1935
1936       probe *p = parse_synthetic_probe (s, code, l->tok);
1937       if (!p)
1938         throw SEMANTIC_ERROR (_("can't create global var display"), l->tok);
1939
1940       vector<derived_probe*> dps;
1941       derive_probes (s, p, dps);
1942       for (unsigned i = 0; i < dps.size(); i++)
1943         {
1944           derived_probe* dp = dps[i];
1945           s.probes.push_back (dp);
1946           dp->join_group (s);
1947
1948           // Repopulate symbol and type info
1949           symresolution_info sym (s);
1950           sym.current_function = 0;
1951           sym.current_probe = dp;
1952           dp->body->visit (& sym);
1953         }
1954
1955       semantic_pass_types(s);
1956       // Mark that variable is read
1957       vut.read.insert (l);
1958     }
1959 }
1960
1961 int
1962 semantic_pass (systemtap_session& s)
1963 {
1964   int rc = 0;
1965
1966   try
1967     {
1968       s.register_library_aliases();
1969       register_standard_tapsets(s);
1970
1971       if (rc == 0) rc = semantic_pass_symbols (s);
1972       if (rc == 0) rc = semantic_pass_conditions (s);
1973       if (rc == 0) rc = semantic_pass_optimize1 (s);
1974       if (rc == 0) rc = semantic_pass_types (s);
1975       if (rc == 0) rc = gen_dfa_table(s);
1976       if (rc == 0) add_global_var_display (s);
1977       if (rc == 0) rc = semantic_pass_optimize2 (s);
1978       if (rc == 0) rc = semantic_pass_vars (s);
1979       if (rc == 0) rc = semantic_pass_stats (s);
1980       if (rc == 0) embeddedcode_info_pass (s);
1981     }
1982   catch (const semantic_error& e)
1983     {
1984       s.print_error (e);
1985       rc ++;
1986     }
1987
1988   bool no_primary_probes = true;
1989   for (unsigned i = 0; i < s.probes.size(); i++)
1990     if (s.is_primary_probe(s.probes[i]))
1991       no_primary_probes = false;
1992
1993   if (s.num_errors() == 0 && no_primary_probes && !s.dump_mode)
1994     {
1995       s.print_error(SEMANTIC_ERROR(_("no probes found")));
1996       rc ++;
1997     }
1998
1999   build_no_more (s);
2000
2001   // PR11443
2002   // NB: listing mode only cares whether we have any probes,
2003   // so all previous error conditions are disregarded.
2004   if (s.dump_mode == systemtap_session::dump_matched_probes ||
2005       s.dump_mode == systemtap_session::dump_matched_probes_vars)
2006     rc = no_primary_probes;
2007
2008   // If we're dumping functions, only error out if no functions were found
2009   if (s.dump_mode == systemtap_session::dump_functions)
2010     rc = s.functions.empty();
2011
2012   return rc;
2013 }
2014
2015
2016 // ------------------------------------------------------------------------
2017 // semantic processing: symbol resolution
2018
2019
2020 symresolution_info::symresolution_info (systemtap_session& s):
2021   session (s), current_function (0), current_probe (0)
2022 {
2023 }
2024
2025
2026 void
2027 symresolution_info::visit_block (block* e)
2028 {
2029   for (unsigned i=0; i<e->statements.size(); i++)
2030     {
2031       try
2032         {
2033           e->statements[i]->visit (this);
2034         }
2035       catch (const semantic_error& e)
2036         {
2037           session.print_error (e);
2038         }
2039     }
2040 }
2041
2042
2043 void
2044 symresolution_info::visit_foreach_loop (foreach_loop* e)
2045 {
2046   for (unsigned i=0; i<e->indexes.size(); i++)
2047     e->indexes[i]->visit (this);
2048   for (unsigned i=0; i<e->array_slice.size(); i++)
2049     if (e->array_slice[i])
2050       e->array_slice[i]->visit(this);
2051
2052   symbol *array = NULL;
2053   hist_op *hist = NULL;
2054   classify_indexable (e->base, array, hist);
2055
2056   if (array)
2057     {
2058       if (!array->referent)
2059         {
2060           vardecl* d = find_var (array->name, e->indexes.size (), array->tok);
2061           if (d)
2062             array->referent = d;
2063           else
2064             {
2065               stringstream msg;
2066               msg << _F("unresolved arity-%zu global array %s, missing global declaration?",
2067                         e->indexes.size(), array->name.c_str());
2068               throw SEMANTIC_ERROR (msg.str(), array->tok);
2069             }
2070         }
2071
2072       if (!e->array_slice.empty() && e->array_slice.size() != e->indexes.size())
2073         {
2074           stringstream msg;
2075           msg << _F("unresolved arity-%zu global array %s, missing global declaration?",
2076                     e->array_slice.size(), array->name.c_str());
2077           throw SEMANTIC_ERROR (msg.str(), array->tok);
2078         }
2079     }
2080   else
2081     {
2082       assert (hist);
2083       hist->visit (this);
2084     }
2085
2086   if (e->value)
2087     e->value->visit (this);
2088
2089   if (e->limit)
2090     e->limit->visit (this);
2091
2092   e->block->visit (this);
2093 }
2094
2095
2096 struct
2097 delete_statement_symresolution_info:
2098   public traversing_visitor
2099 {
2100   symresolution_info *parent;
2101
2102   delete_statement_symresolution_info (symresolution_info *p):
2103     parent(p)
2104   {}
2105
2106   void visit_arrayindex (arrayindex* e)
2107   {
2108     parent->visit_arrayindex(e, true);
2109   }
2110
2111   void visit_functioncall (functioncall* e)
2112   {
2113     parent->visit_functioncall (e);
2114   }
2115
2116   void visit_symbol (symbol* e)
2117   {
2118     if (e->referent)
2119       return;
2120
2121     vardecl* d = parent->find_var (e->name, -1, e->tok);
2122     if (d)
2123       e->referent = d;
2124     else
2125       throw SEMANTIC_ERROR (_("unresolved array in delete statement"), e->tok);
2126   }
2127 };
2128
2129 void
2130 symresolution_info::visit_delete_statement (delete_statement* s)
2131 {
2132   delete_statement_symresolution_info di (this);
2133   s->value->visit (&di);
2134 }
2135
2136
2137 void
2138 symresolution_info::visit_symbol (symbol* e)
2139 {
2140   if (e->referent)
2141     return;
2142
2143   vardecl* d = find_var (e->name, 0, e->tok);
2144   if (d)
2145     e->referent = d;
2146   else
2147     {
2148       // new local
2149       vardecl* v = new vardecl;
2150       v->name = e->name;
2151       v->tok = e->tok;
2152       v->set_arity(0, e->tok);
2153       if (current_function)
2154         current_function->locals.push_back (v);
2155       else if (current_probe)
2156         current_probe->locals.push_back (v);
2157       else
2158         // must be probe-condition expression
2159         throw SEMANTIC_ERROR (_("probe condition must not reference undeclared global"), e->tok);
2160       e->referent = v;
2161     }
2162 }
2163
2164
2165 void
2166 symresolution_info::visit_arrayindex (arrayindex* e)
2167 {
2168   visit_arrayindex(e, false);
2169 }
2170
2171 void
2172 symresolution_info::visit_arrayindex (arrayindex* e, bool wildcard_ok)
2173 {
2174   for (unsigned i=0; i<e->indexes.size(); i++)
2175     {
2176       // assuming that if NULL, it was originally a wildcard (*)
2177       if (e->indexes[i] == NULL)
2178         {
2179           if (!wildcard_ok)
2180             throw SEMANTIC_ERROR(_("wildcard not allowed in array index"), e->tok);
2181         }
2182       else
2183         e->indexes[i]->visit (this);
2184     }
2185
2186   symbol *array = NULL;
2187   hist_op *hist = NULL;
2188   classify_indexable(e->base, array, hist);
2189
2190   if (array)
2191     {
2192       if (array->referent)
2193         return;
2194
2195       vardecl* d = find_var (array->name, e->indexes.size (), array->tok);
2196       if (d)
2197         array->referent = d;
2198       else
2199         {
2200           stringstream msg;
2201           msg << _F("unresolved arity-%zu global array %s, missing global declaration?",
2202                     e->indexes.size(), array->name.c_str());
2203           throw SEMANTIC_ERROR (msg.str(), e->tok);
2204         }
2205     }
2206   else
2207     {
2208       assert (hist);
2209       hist->visit (this);
2210     }
2211 }
2212
2213
2214 void
2215 symresolution_info::visit_array_in (array_in* e)
2216 {
2217   visit_arrayindex(e->operand, true);
2218 }
2219
2220
2221 void
2222 symresolution_info::visit_functioncall (functioncall* e)
2223 {
2224   // XXX: we could relax this, if we're going to examine the
2225   // vartracking data recursively.  See testsuite/semko/fortytwo.stp.
2226   if (! (current_function || current_probe))
2227     {
2228       // must be probe-condition expression
2229       throw SEMANTIC_ERROR (_("probe condition must not reference function"), e->tok);
2230     }
2231
2232   for (unsigned i=0; i<e->args.size(); i++)
2233     e->args[i]->visit (this);
2234
2235   if (e->referent)
2236     return;
2237
2238   functiondecl* d = find_function (e->function, e->args.size (), e->tok);
2239   if (d)
2240     e->referent = d;
2241   else
2242     {
2243       string sugs = levenshtein_suggest(e->function, collect_functions(), 5); // print 5 funcs
2244       throw SEMANTIC_ERROR(_F("unresolved function%s",
2245                               sugs.empty() ? "" : (_(" (similar: ") + sugs + ")").c_str()),
2246                            e->tok);
2247     }
2248 }
2249
2250 /*find_var will return an argument other than zero if the name matches the var
2251  * name ie, if the current local name matches the name passed to find_var*/
2252 vardecl*
2253 symresolution_info::find_var (const string& name, int arity, const token* tok)
2254 {
2255   if (current_function || current_probe)
2256     {
2257       // search locals
2258       vector<vardecl*>& locals = (current_function ?
2259                                   current_function->locals :
2260                                   current_probe->locals);
2261
2262
2263       for (unsigned i=0; i<locals.size(); i++)
2264         if (locals[i]->name == name)
2265           {
2266             locals[i]->set_arity (arity, tok);
2267             return locals[i];
2268           }
2269     }
2270
2271   // search function formal parameters (for scalars)
2272   if (arity == 0 && current_function)
2273     for (unsigned i=0; i<current_function->formal_args.size(); i++)
2274       if (current_function->formal_args[i]->name == name)
2275         {
2276           // NB: no need to check arity here: formal args always scalar
2277           current_function->formal_args[i]->set_arity (0, tok);
2278           return current_function->formal_args[i];
2279         }
2280
2281   // search processed globals
2282   for (unsigned i=0; i<session.globals.size(); i++)
2283     if (session.globals[i]->name == name)
2284       {
2285         if (! session.suppress_warnings)
2286           {
2287             vardecl* v = session.globals[i];
2288             stapfile* f = tok->location.file;
2289             // clog << "resolved " << *tok << " to global " << *v->tok << endl;
2290             if (v->tok->location.file != f && !f->synthetic)
2291               {
2292                 session.print_warning (_F("cross-file global variable reference to %s from",
2293                                           lex_cast(*v->tok).c_str()), tok);
2294               }
2295           }
2296         session.globals[i]->set_arity (arity, tok);
2297         return session.globals[i];
2298       }
2299
2300   // search library globals
2301   for (unsigned i=0; i<session.library_files.size(); i++)
2302     {
2303       stapfile* f = session.library_files[i];
2304       for (unsigned j=0; j<f->globals.size(); j++)
2305         {
2306           vardecl* g = f->globals[j];
2307           if (g->name == name)
2308             {
2309               g->set_arity (arity, tok);
2310
2311               // put library into the queue if not already there
2312               if (find (session.files.begin(), session.files.end(), f)
2313                   == session.files.end())
2314                 session.files.push_back (f);
2315
2316               return g;
2317             }
2318         }
2319     }
2320
2321   return 0;
2322 }
2323
2324
2325 functiondecl*
2326 symresolution_info::find_function (const string& name, unsigned arity, const token *tok)
2327 {
2328   // the common path
2329   if (session.functions.find(name) != session.functions.end())
2330     {
2331       functiondecl* fd = session.functions[name];
2332       assert (fd->name == name);
2333       if (fd->formal_args.size() == arity)
2334         return fd;
2335
2336       throw SEMANTIC_ERROR(_F("arity mismatch found (function '%s' takes %zu args)",
2337                               name.c_str(), fd->formal_args.size()), tok, fd->tok);
2338     }
2339
2340   // search library functions
2341   for (unsigned i=0; i<session.library_files.size(); i++)
2342     {
2343       stapfile* f = session.library_files[i];
2344       for (unsigned j=0; j<f->functions.size(); j++)
2345         if (f->functions[j]->name == name)
2346           {
2347             if (f->functions[j]->formal_args.size() == arity)
2348               {
2349                 // put library into the queue if not already there
2350                 if (0) // session.verbose_resolution
2351                   cerr << _F("      function %s is defined from %s",
2352                              name.c_str(), f->name.c_str()) << endl;
2353
2354                 if (find (session.files.begin(), session.files.end(), f)
2355                     == session.files.end())
2356                   session.files.push_back (f);
2357                 // else .. print different message?
2358
2359                 return f->functions[j];
2360               }
2361
2362             throw SEMANTIC_ERROR(_F("arity mismatch found (function '%s' takes %zu args)",
2363                                     name.c_str(), f->functions[j]->formal_args.size()),
2364                                     tok, f->functions[j]->tok);
2365           }
2366     }
2367
2368   return 0;
2369 }
2370
2371 set<string>
2372 symresolution_info::collect_functions(void)
2373 {
2374   set<string> funcs;
2375
2376   for (map<string,functiondecl*>::const_iterator it = session.functions.begin();
2377        it != session.functions.end(); ++it)
2378     funcs.insert(it->first);
2379
2380   // search library functions
2381   for (unsigned i=0; i<session.library_files.size(); i++)
2382     {
2383       stapfile* f = session.library_files[i];
2384       for (unsigned j=0; j<f->functions.size(); j++)
2385         funcs.insert(f->functions[j]->name);
2386     }
2387
2388   return funcs;
2389 }
2390
2391 // ------------------------------------------------------------------------
2392 // optimization
2393
2394
2395 // Do away with functiondecls that are never (transitively) called
2396 // from probes.
2397 void semantic_pass_opt1 (systemtap_session& s, bool& relaxed_p)
2398 {
2399   functioncall_traversing_visitor ftv;
2400   for (unsigned i=0; i<s.probes.size(); i++)
2401     {
2402       s.probes[i]->body->visit (& ftv);
2403       if (s.probes[i]->sole_location()->condition)
2404         s.probes[i]->sole_location()->condition->visit (& ftv);
2405     }
2406   vector<functiondecl*> new_unused_functions;
2407   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2408     {
2409       functiondecl* fd = it->second;
2410       if (ftv.seen.find(fd) == ftv.seen.end())
2411         {
2412           if (! fd->synthetic && s.is_user_file(fd->tok->location.file->name))
2413             s.print_warning (_F("Eliding unused function '%s'", fd->name.c_str()), fd->tok);
2414           // s.functions.erase (it); // NB: can't, since we're already iterating upon it
2415           new_unused_functions.push_back (fd);
2416           relaxed_p = false;
2417         }
2418     }
2419   for (unsigned i=0; i<new_unused_functions.size(); i++)
2420     {
2421       map<string,functiondecl*>::iterator where = s.functions.find (new_unused_functions[i]->name);
2422       assert (where != s.functions.end());
2423       s.functions.erase (where);
2424       if (s.tapset_compile_coverage)
2425         s.unused_functions.push_back (new_unused_functions[i]);
2426     }
2427 }
2428
2429
2430 // ------------------------------------------------------------------------
2431
2432 // Do away with local & global variables that are never
2433 // written nor read.
2434 void semantic_pass_opt2 (systemtap_session& s, bool& relaxed_p, unsigned iterations)
2435 {
2436   varuse_collecting_visitor vut(s);
2437
2438   for (unsigned i=0; i<s.probes.size(); i++)
2439     {
2440       s.probes[i]->body->visit (& vut);
2441
2442       if (s.probes[i]->sole_location()->condition)
2443         s.probes[i]->sole_location()->condition->visit (& vut);
2444     }
2445
2446   // NB: Since varuse_collecting_visitor also traverses down
2447   // actually called functions, we don't need to explicitly
2448   // iterate over them.  Uncalled ones should have been pruned
2449   // in _opt1 above.
2450   //
2451   // for (unsigned i=0; i<s.functions.size(); i++)
2452   //   s.functions[i]->body->visit (& vut);
2453
2454   // Now in vut.read/written, we have a mixture of all locals, globals
2455
2456   for (unsigned i=0; i<s.probes.size(); i++)
2457     for (unsigned j=0; j<s.probes[i]->locals.size(); /* see below */)
2458       {
2459         vardecl* l = s.probes[i]->locals[j];
2460
2461         // skip over "special" locals
2462         if (l->synthetic) { j++; continue; }
2463
2464         if (vut.read.find (l) == vut.read.end() &&
2465             vut.written.find (l) == vut.written.end())
2466           {
2467             if (s.is_user_file(l->tok->location.file->name))
2468               s.print_warning (_F("Eliding unused variable '%s'", l->name.c_str()), l->tok);
2469             if (s.tapset_compile_coverage) {
2470               s.probes[i]->unused_locals.push_back
2471                       (s.probes[i]->locals[j]);
2472             }
2473             s.probes[i]->locals.erase(s.probes[i]->locals.begin() + j);
2474             relaxed_p = false;
2475             // don't increment j
2476           }
2477         else
2478           {
2479             if (vut.written.find (l) == vut.written.end())
2480               if (iterations == 0 && ! s.suppress_warnings)
2481                 {
2482                   set<string> vars;
2483                   vector<vardecl*>::iterator it;
2484                   for (it = s.probes[i]->locals.begin(); it != s.probes[i]->locals.end(); it++)
2485                     vars.insert((*it)->name);
2486                   for (it = s.globals.begin(); it != s.globals.end(); it++)
2487                     vars.insert((*it)->name);
2488
2489                   vars.erase(l->name);
2490                   string sugs = levenshtein_suggest(l->name, vars, 5); // suggest top 5 vars
2491                   s.print_warning (_F("never-assigned local variable '%s'%s",
2492                                       l->name.c_str(), (sugs.empty() ? "" :
2493                                       (_(" (similar: ") + sugs + ")")).c_str()), l->tok);
2494                 }
2495             j++;
2496           }
2497       }
2498
2499   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2500     {
2501       functiondecl *fd = it->second;
2502       for (unsigned j=0; j<fd->locals.size(); /* see below */)
2503         {
2504           vardecl* l = fd->locals[j];
2505           if (vut.read.find (l) == vut.read.end() &&
2506               vut.written.find (l) == vut.written.end())
2507             {
2508               if (s.is_user_file(l->tok->location.file->name))
2509                 s.print_warning (_F("Eliding unused variable '%s'", l->name.c_str()), l->tok);
2510               if (s.tapset_compile_coverage) {
2511                 fd->unused_locals.push_back (fd->locals[j]);
2512               }
2513               fd->locals.erase(fd->locals.begin() + j);
2514               relaxed_p = false;
2515               // don't increment j
2516             }
2517           else
2518             {
2519               if (vut.written.find (l) == vut.written.end())
2520                 if (iterations == 0 && ! s.suppress_warnings)
2521                   {
2522                     set<string> vars;
2523                     vector<vardecl*>::iterator it;
2524                     for (it = fd->formal_args.begin() ;
2525                          it != fd->formal_args.end(); it++)
2526                         vars.insert((*it)->name);
2527                     for (it = fd->locals.begin(); it != fd->locals.end(); it++)
2528                         vars.insert((*it)->name);
2529                     for (it = s.globals.begin(); it != s.globals.end(); it++)
2530                         vars.insert((*it)->name);
2531
2532                     vars.erase(l->name);
2533                     string sugs = levenshtein_suggest(l->name, vars, 5); // suggest top 5 vars
2534                     s.print_warning (_F("never-assigned local variable '%s'%s",
2535                                         l->name.c_str(), (sugs.empty() ? "" :
2536                                         (_(" (similar: ") + sugs + ")")).c_str()), l->tok);
2537                   }
2538
2539               j++;
2540             }
2541         }
2542     }
2543   for (unsigned i=0; i<s.globals.size(); /* see below */)
2544     {
2545       vardecl* l = s.globals[i];
2546       if (vut.read.find (l) == vut.read.end() &&
2547           vut.written.find (l) == vut.written.end())
2548         {
2549           if (s.is_user_file(l->tok->location.file->name))
2550             s.print_warning (_F("Eliding unused variable '%s'", l->name.c_str()), l->tok);
2551           if (s.tapset_compile_coverage) {
2552             s.unused_globals.push_back(s.globals[i]);
2553           }
2554           s.globals.erase(s.globals.begin() + i);
2555           relaxed_p = false;
2556           // don't increment i
2557         }
2558       else
2559         {
2560           if (vut.written.find (l) == vut.written.end() && ! l->init) // no initializer
2561             if (iterations == 0 && ! s.suppress_warnings)
2562               {
2563                 set<string> vars;
2564                 vector<vardecl*>::iterator it;
2565                 for (it = s.globals.begin(); it != s.globals.end(); it++)
2566                   if (l->name != (*it)->name)
2567                     vars.insert((*it)->name);
2568
2569                 string sugs = levenshtein_suggest(l->name, vars, 5); // suggest top 5 vars
2570                 s.print_warning (_F("never-assigned global variable '%s'%s",
2571                                     l->name.c_str(), (sugs.empty() ? "" :
2572                                     (_(" (similar: ") + sugs + ")")).c_str()), l->tok);
2573               }
2574
2575           i++;
2576         }
2577     }
2578 }
2579
2580
2581 // ------------------------------------------------------------------------
2582
2583 struct dead_assignment_remover: public update_visitor
2584 {
2585   systemtap_session& session;
2586   bool& relaxed_p;
2587   const varuse_collecting_visitor& vut;
2588
2589   dead_assignment_remover(systemtap_session& s, bool& r,
2590                           const varuse_collecting_visitor& v):
2591     session(s), relaxed_p(r), vut(v) {}
2592
2593   void visit_assignment (assignment* e);
2594   void visit_try_block (try_block *s);
2595 };
2596
2597
2598 // symbol_fetcher augmented to allow target-symbol types, but NULLed.
2599 struct assignment_symbol_fetcher
2600   : public symbol_fetcher
2601 {
2602   assignment_symbol_fetcher (symbol *&sym): symbol_fetcher(sym)
2603   {}
2604
2605   void visit_target_symbol (target_symbol* e)
2606   {
2607     sym = NULL;
2608   }
2609
2610   void visit_atvar_op (atvar_op *e)
2611   {
2612     sym = NULL;
2613   }
2614
2615   void visit_cast_op (cast_op* e)
2616   {
2617     sym = NULL;
2618   }
2619
2620   void visit_autocast_op (autocast_op* e)
2621   {
2622     sym = NULL;
2623   }
2624
2625   void throwone (const token* t)
2626   {
2627     if (t->type == tok_operator && t->content == ".")
2628       // guess someone misused . in $foo->bar.baz expression
2629       // XXX why are we only checking this in lvalues?
2630       throw SEMANTIC_ERROR (_("Expecting lvalue expression, try -> instead"), t);
2631     else
2632       throw SEMANTIC_ERROR (_("Expecting lvalue expression"), t);
2633   }
2634 };
2635
2636 symbol *
2637 get_assignment_symbol_within_expression (expression *e)
2638 {
2639   symbol *sym = NULL;
2640   assignment_symbol_fetcher fetcher(sym);
2641   e->visit (&fetcher);
2642   return sym; // NB: may be null!
2643 }
2644
2645
2646 void
2647 dead_assignment_remover::visit_assignment (assignment* e)
2648 {
2649   replace (e->left);
2650   replace (e->right);
2651
2652   symbol* left = get_assignment_symbol_within_expression (e->left);
2653   if (left) // not unresolved $target, so intended sideeffect may be elided
2654     {
2655       vardecl* leftvar = left->referent;
2656       if (vut.read.find(leftvar) == vut.read.end()) // var never read?
2657         {
2658           // NB: Not so fast!  The left side could be an array whose
2659           // index expressions may have side-effects.  This would be
2660           // OK if we could replace the array assignment with a
2661           // statement-expression containing all the index expressions
2662           // and the rvalue... but we can't.
2663           // Another possibility is that we have an unread global variable
2664           // which are kept for probe end value display.
2665
2666           bool is_global = false;
2667           vector<vardecl*>::iterator it;
2668           for (it = session.globals.begin(); it != session.globals.end(); it++)
2669             if (leftvar->name == (*it)->name)
2670               {
2671                 is_global = true;
2672                 break;
2673               }
2674
2675           varuse_collecting_visitor lvut(session);
2676           e->left->visit (& lvut);
2677           if (lvut.side_effect_free () && !is_global // XXX: use _wrt() once we track focal_vars
2678               && !leftvar->synthetic) // don't elide assignment to synthetic $context variables
2679             {
2680               /* PR 1119: NB: This is not necessary here.  A write-only
2681                  variable will also be elided soon at the next _opt2 iteration.
2682               if (e->left->tok->location.file->name == session.user_file->name) // !tapset
2683                 session.print_warning("eliding write-only ", *e->left->tok);
2684               else
2685               */
2686               if (session.is_user_file(e->left->tok->location.file->name))
2687                 session.print_warning(_F("Eliding assignment to '%s'", leftvar->name.c_str()), e->tok);
2688               provide (e->right); // goodbye assignment*
2689               relaxed_p = false;
2690               return;
2691             }
2692         }
2693     }
2694   provide (e);
2695 }
2696
2697
2698 void
2699 dead_assignment_remover::visit_try_block (try_block *s)
2700 {
2701   replace (s->try_block);
2702   if (s->catch_error_var)
2703     {
2704       vardecl* errvar = s->catch_error_var->referent;
2705       if (vut.read.find(errvar) == vut.read.end()) // never read?
2706         {
2707           if (session.verbose>2)
2708             clog << _F("Eliding unused error string catcher %s at %s",
2709                       errvar->name.c_str(), lex_cast(*s->tok).c_str()) << endl;
2710           s->catch_error_var = 0;
2711         }
2712     }
2713   replace (s->catch_block);
2714   provide (s);
2715 }
2716
2717
2718 // Let's remove assignments to variables that are never read.  We
2719 // rewrite "(foo = expr)" as "(expr)".  This makes foo a candidate to
2720 // be optimized away as an unused variable, and expr a candidate to be
2721 // removed as a side-effect-free statement expression.  Wahoo!
2722 void semantic_pass_opt3 (systemtap_session& s, bool& relaxed_p)
2723 {
2724   // Recompute the varuse data, which will probably match the opt2
2725   // copy of the computation, except for those totally unused
2726   // variables that opt2 removed.
2727   varuse_collecting_visitor vut(s);
2728   for (unsigned i=0; i<s.probes.size(); i++)
2729     s.probes[i]->body->visit (& vut); // includes reachable functions too
2730
2731   dead_assignment_remover dar (s, relaxed_p, vut);
2732   // This instance may be reused for multiple probe/function body trims.
2733
2734   for (unsigned i=0; i<s.probes.size(); i++)
2735     dar.replace (s.probes[i]->body);
2736   for (map<string,functiondecl*>::iterator it = s.functions.begin();
2737        it != s.functions.end(); it++)
2738     dar.replace (it->second->body);
2739   // The rewrite operation is performed within the visitor.
2740
2741   // XXX: we could also zap write-only globals here
2742 }
2743
2744
2745 // ------------------------------------------------------------------------
2746
2747 struct dead_stmtexpr_remover: public update_visitor
2748 {
2749   systemtap_session& session;
2750   bool& relaxed_p;
2751   set<vardecl*> focal_vars; // vars considered subject to side-effects
2752
2753   dead_stmtexpr_remover(systemtap_session& s, bool& r):
2754     session(s), relaxed_p(r) {}
2755
2756   void visit_block (block *s);
2757   void visit_try_block (try_block *s);
2758   void visit_null_statement (null_statement *s);
2759   void visit_if_statement (if_statement* s);
2760   void visit_foreach_loop (foreach_loop *s);
2761   void visit_for_loop (for_loop *s);
2762   // XXX: and other places where stmt_expr's might be nested
2763
2764   void visit_expr_statement (expr_statement *s);
2765 };
2766
2767
2768 void
2769 dead_stmtexpr_remover::visit_null_statement (null_statement *s)
2770 {
2771   // easy!
2772   if (session.verbose>2)
2773     clog << _("Eliding side-effect-free null statement ") << *s->tok << endl;
2774   s = 0;
2775   provide (s);
2776 }
2777
2778
2779 void
2780 dead_stmtexpr_remover::visit_block (block *s)
2781 {
2782   vector<statement*> new_stmts;
2783   for (unsigned i=0; i<s->statements.size(); i++ )
2784     {
2785       statement* new_stmt = require (s->statements[i], true);
2786       if (new_stmt != 0)
2787         {
2788           // flatten nested blocks into this one
2789           block *b = dynamic_cast<block *>(new_stmt);
2790           if (b)
2791             {
2792               if (session.verbose>2)
2793                 clog << _("Flattening nested block ") << *b->tok << endl;
2794               new_stmts.insert(new_stmts.end(),
2795                   b->statements.begin(), b->statements.end());
2796               relaxed_p = false;
2797             }
2798           else
2799             new_stmts.push_back (new_stmt);
2800         }
2801     }
2802   if (new_stmts.size() == 0)
2803     {
2804       if (session.verbose>2)
2805         clog << _("Eliding side-effect-free empty block ") << *s->tok << endl;
2806       s = 0;
2807     }
2808   else if (new_stmts.size() == 1)
2809     {
2810       if (session.verbose>2)
2811         clog << _("Eliding side-effect-free singleton block ") << *s->tok << endl;
2812       provide (new_stmts[0]);
2813       return;
2814     }
2815   else
2816     s->statements = new_stmts;
2817   provide (s);
2818 }
2819
2820
2821 void
2822 dead_stmtexpr_remover::visit_try_block (try_block *s)
2823 {
2824   replace (s->try_block, true);
2825   replace (s->catch_block, true); // null catch{} is ok and useful
2826   if (s->try_block == 0)
2827     {
2828       if (session.verbose>2)
2829         clog << _("Eliding empty try {} block ") << *s->tok << endl;
2830       s = 0;
2831     }
2832   provide (s);
2833 }
2834
2835
2836 void
2837 dead_stmtexpr_remover::visit_if_statement (if_statement *s)
2838 {
2839   replace (s->thenblock, true);
2840   replace (s->elseblock, true);
2841
2842   if (s->thenblock == 0)
2843     {
2844       if (s->elseblock == 0)
2845         {
2846           // We may be able to elide this statement, if the condition
2847           // expression is side-effect-free.
2848           varuse_collecting_visitor vct(session);
2849           s->condition->visit(& vct);
2850           if (vct.side_effect_free ())
2851             {
2852               if (session.verbose>2)
2853                 clog << _("Eliding side-effect-free if statement ")
2854                      << *s->tok << endl;
2855               s = 0; // yeah, baby
2856             }
2857           else
2858             {
2859               // We can still turn it into a simple expr_statement though...
2860               if (session.verbose>2)
2861                 clog << _("Creating simple evaluation from if statement ")
2862                      << *s->tok << endl;
2863               expr_statement *es = new expr_statement;
2864               es->value = s->condition;
2865               es->tok = es->value->tok;
2866               provide (es);
2867               return;
2868             }
2869         }
2870       else
2871         {
2872           // For an else without a then, we can invert the condition logic to
2873           // avoid having a null statement in the thenblock
2874           if (session.verbose>2)
2875             clog << _("Inverting the condition of if statement ")
2876                  << *s->tok << endl;
2877           unary_expression *ue = new unary_expression;
2878           ue->operand = s->condition;
2879           ue->tok = ue->operand->tok;
2880           ue->op = "!";
2881           s->condition = ue;
2882           s->thenblock = s->elseblock;
2883           s->elseblock = 0;
2884         }
2885     }
2886   provide (s);
2887 }
2888
2889 void
2890 dead_stmtexpr_remover::visit_foreach_loop (foreach_loop *s)
2891 {
2892   replace (s->block, true);
2893
2894   if (s->block == 0)
2895     {
2896       // XXX what if s->limit has side effects?
2897       // XXX what about s->indexes or s->value used outside the loop?
2898       if(session.verbose > 2)
2899         clog << _("Eliding side-effect-free foreach statement ") << *s->tok << endl;
2900       s = 0; // yeah, baby
2901     }
2902   provide (s);
2903 }
2904
2905 void
2906 dead_stmtexpr_remover::visit_for_loop (for_loop *s)
2907 {
2908   replace (s->block, true);
2909
2910   if (s->block == 0)
2911     {
2912       // We may be able to elide this statement, if the condition
2913       // expression is side-effect-free.
2914       varuse_collecting_visitor vct(session);
2915       if (s->init) s->init->visit(& vct);
2916       s->cond->visit(& vct);
2917       if (s->incr) s->incr->visit(& vct);
2918       if (vct.side_effect_free ())
2919         {
2920           if (session.verbose>2)
2921             clog << _("Eliding side-effect-free for statement ") << *s->tok << endl;
2922           s = 0; // yeah, baby
2923         }
2924       else
2925         {
2926           // Can't elide this whole statement; put a null in there.
2927           s->block = new null_statement(s->tok);
2928         }
2929     }
2930   provide (s);
2931 }
2932
2933
2934
2935 void
2936 dead_stmtexpr_remover::visit_expr_statement (expr_statement *s)
2937 {
2938   // Run a varuse query against the operand expression.  If it has no
2939   // side-effects, replace the entire statement expression by a null
2940   // statement with the provide() call.
2941   //
2942   // Unlike many other visitors, we do *not* traverse this outermost
2943   // one into the expression subtrees.  There is no need - no
2944   // expr_statement nodes will be found there.  (Function bodies
2945   // need to be visited explicitly by our caller.)
2946   //
2947   // NB.  While we don't share nodes in the parse tree, let's not
2948   // deallocate *s anyway, just in case...
2949
2950   varuse_collecting_visitor vut(session);
2951   s->value->visit (& vut);
2952
2953   if (vut.side_effect_free_wrt (focal_vars))
2954     {
2955       /* PR 1119: NB: this message is not a good idea here.  It can
2956          name some arbitrary RHS expression of an assignment.
2957       if (s->value->tok->location.file->name == session.user_file->name) // not tapset
2958         session.print_warning("eliding never-assigned ", *s->value->tok);
2959       else
2960       */
2961       if (session.is_user_file(s->value->tok->location.file->name))
2962         session.print_warning("Eliding side-effect-free expression ", s->tok);
2963
2964       // NB: this 0 pointer is invalid to leave around for any length of
2965       // time, but the parent parse tree objects above handle it.
2966       s = 0;
2967       relaxed_p = false;
2968     }
2969   provide (s);
2970 }
2971
2972
2973 void semantic_pass_opt4 (systemtap_session& s, bool& relaxed_p)
2974 {
2975   // Finally, let's remove some statement-expressions that have no
2976   // side-effect.  These should be exactly those whose private varuse
2977   // visitors come back with an empty "written" and "embedded" lists.
2978
2979   dead_stmtexpr_remover duv (s, relaxed_p);
2980   // This instance may be reused for multiple probe/function body trims.
2981
2982   for (unsigned i=0; i<s.probes.size(); i++)
2983     {
2984       assert_no_interrupts();
2985
2986       derived_probe* p = s.probes[i];
2987
2988       duv.focal_vars.clear ();
2989       duv.focal_vars.insert (s.globals.begin(),
2990                              s.globals.end());
2991       duv.focal_vars.insert (p->locals.begin(),
2992                              p->locals.end());
2993
2994       duv.replace (p->body, true);
2995       if (p->body == 0)
2996         {
2997           if (! s.timing) // PR10070
2998             s.print_warning (_F("side-effect-free probe '%s'", p->name.c_str()), p->tok);
2999
3000           p->body = new null_statement(p->tok);
3001
3002           // XXX: possible duplicate warnings; see below
3003         }
3004     }
3005   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
3006     {
3007       assert_no_interrupts();
3008
3009       functiondecl* fn = it->second;
3010       duv.focal_vars.clear ();
3011       duv.focal_vars.insert (fn->locals.begin(),
3012                              fn->locals.end());
3013       duv.focal_vars.insert (fn->formal_args.begin(),
3014                              fn->formal_args.end());
3015       duv.focal_vars.insert (s.globals.begin(),
3016                              s.globals.end());
3017
3018       duv.replace (fn->body, true);
3019       if (fn->body == 0)
3020         {
3021           s.print_warning (_F("side-effect-free function '%s'", fn->name.c_str()), fn->tok);
3022
3023           fn->body = new null_statement(fn->tok);
3024
3025           // XXX: the next iteration of the outer optimization loop may
3026           // take this new null_statement away again, and thus give us a
3027           // fresh warning.  It would be better if this fixup was performed
3028           // only after the relaxation iterations.
3029           // XXX: or else see bug #6469.
3030         }
3031     }
3032 }
3033
3034
3035 // ------------------------------------------------------------------------
3036
3037 // The goal of this visitor is to reduce top-level expressions in void context
3038 // into separate statements that evaluate each subcomponent of the expression.
3039 // The dead-statement-remover can later remove some parts if they have no side
3040 // effects.
3041 //
3042 // All expressions must be overridden here so we never visit their subexpressions
3043 // accidentally.  Thus, the only visited expressions should be value of an
3044 // expr_statement.
3045 //
3046 // For an expression to replace its expr_statement with something else, it will
3047 // let the new statement provide(), and then provide(0) for itself.  The
3048 // expr_statement will take this as a sign that it's been replaced.
3049 struct void_statement_reducer: public update_visitor
3050 {
3051   systemtap_session& session;
3052   bool& relaxed_p;
3053   set<vardecl*> focal_vars; // vars considered subject to side-effects
3054
3055   void_statement_reducer(systemtap_session& s, bool& r):
3056     session(s), relaxed_p(r) {}
3057
3058   void visit_expr_statement (expr_statement* s);
3059
3060   // expressions in conditional / loop controls are definitely a side effect,
3061   // but still recurse into the child statements
3062   void visit_if_statement (if_statement* s);
3063   void visit_for_loop (for_loop* s);
3064   void visit_foreach_loop (foreach_loop* s);
3065
3066   // these expressions get rewritten into their statement equivalents
3067   void visit_logical_or_expr (logical_or_expr* e);
3068   void visit_logical_and_expr (logical_and_expr* e);
3069   void visit_ternary_expression (ternary_expression* e);
3070
3071   // all of these can (usually) be reduced into simpler statements
3072   void visit_binary_expression (binary_expression* e);
3073   void visit_unary_expression (unary_expression* e);
3074   void visit_regex_query (regex_query* e); // XXX depends on subexpr extraction
3075   void visit_comparison (comparison* e);
3076   void visit_concatenation (concatenation* e);
3077   void visit_functioncall (functioncall* e);
3078   void visit_print_format (print_format* e);
3079   void visit_target_symbol (target_symbol* e);
3080   void visit_atvar_op (atvar_op* e);
3081   void visit_cast_op (cast_op* e);
3082   void visit_autocast_op (autocast_op* e);
3083   void visit_defined_op (defined_op* e);
3084
3085   // these are a bit hairy to grok due to the intricacies of indexables and
3086   // stats, so I'm chickening out and skipping them...
3087   void visit_array_in (array_in* e) { provide (e); }
3088   void visit_arrayindex (arrayindex* e) { provide (e); }
3089   void visit_stat_op (stat_op* e) { provide (e); }
3090   void visit_hist_op (hist_op* e) { provide (e); }
3091
3092   // these can't be reduced because they always have an effect
3093   void visit_return_statement (return_statement* s) { provide (s); }
3094   void visit_delete_statement (delete_statement* s) { provide (s); }
3095   void visit_pre_crement (pre_crement* e) { provide (e); }
3096   void visit_post_crement (post_crement* e) { provide (e); }
3097   void visit_assignment (assignment* e) { provide (e); }
3098
3099 private:
3100   void reduce_target_symbol (target_symbol* e, expression* operand=NULL);
3101 };
3102
3103
3104 void
3105 void_statement_reducer::visit_expr_statement (expr_statement* s)
3106 {
3107   replace (s->value, true);
3108
3109   // if the expression provides 0, that's our signal that a new
3110   // statement has been provided, so we shouldn't provide this one.
3111   if (s->value != 0)
3112     provide(s);
3113 }
3114
3115 void
3116 void_statement_reducer::visit_if_statement (if_statement* s)
3117 {
3118   // s->condition is never void
3119   replace (s->thenblock);
3120   replace (s->elseblock);
3121   provide (s);
3122 }
3123
3124 void
3125 void_statement_reducer::visit_for_loop (for_loop* s)
3126 {
3127   // s->init/cond/incr are never void
3128   replace (s->block);
3129   provide (s);
3130 }
3131
3132 void
3133 void_statement_reducer::visit_foreach_loop (foreach_loop* s)
3134 {
3135   // s->indexes/base/value/limit are never void
3136   replace (s->block);
3137   provide (s);
3138 }
3139
3140 void
3141 void_statement_reducer::visit_logical_or_expr (logical_or_expr* e)
3142 {
3143   // In void context, the evaluation of "a || b" is exactly like
3144   // "if (!a) b", so let's do that instead.
3145
3146   if (session.verbose>2)
3147     clog << _("Creating if statement from unused logical-or ")
3148          << *e->tok << endl;
3149
3150   if_statement *is = new if_statement;
3151   is->tok = e->tok;
3152   is->elseblock = 0;
3153
3154   unary_expression *ue = new unary_expression;
3155   ue->operand = e->left;
3156   ue->tok = e->tok;
3157   ue->op = "!";
3158   is->condition = ue;
3159
3160   expr_statement *es = new expr_statement;
3161   es->value = e->right;
3162   es->tok = es->value->tok;
3163   is->thenblock = es;
3164
3165   is->visit(this);
3166   relaxed_p = false;
3167   e = 0;
3168   provide (e);
3169 }
3170
3171 void
3172 void_statement_reducer::visit_logical_and_expr (logical_and_expr* e)
3173 {
3174   // In void context, the evaluation of "a && b" is exactly like
3175   // "if (a) b", so let's do that instead.
3176
3177   if (session.verbose>2)
3178     clog << _("Creating if statement from unused logical-and ")
3179          << *e->tok << endl;
3180
3181   if_statement *is = new if_statement;
3182   is->tok = e->tok;
3183   is->elseblock = 0;
3184   is->condition = e->left;
3185
3186   expr_statement *es = new expr_statement;
3187   es->value = e->right;
3188   es->tok = es->value->tok;
3189   is->thenblock = es;
3190
3191   is->visit(this);
3192   relaxed_p = false;
3193   e = 0;
3194   provide (e);
3195 }
3196
3197 void
3198 void_statement_reducer::visit_ternary_expression (ternary_expression* e)
3199 {
3200   // In void context, the evaluation of "a ? b : c" is exactly like
3201   // "if (a) b else c", so let's do that instead.
3202
3203   if (session.verbose>2)
3204     clog << _("Creating if statement from unused ternary expression ")
3205          << *e->tok << endl;
3206
3207   if_statement *is = new if_statement;
3208   is->tok = e->tok;
3209   is->condition = e->cond;
3210
3211   expr_statement *es = new expr_statement;
3212   es->value = e->truevalue;
3213   es->tok = es->value->tok;
3214   is->thenblock = es;
3215
3216   es = new expr_statement;
3217   es->value = e->falsevalue;
3218   es->tok = es->value->tok;
3219   is->elseblock = es;
3220
3221   is->visit(this);
3222   relaxed_p = false;
3223   e = 0;
3224   provide (e);
3225 }
3226
3227 void
3228 void_statement_reducer::visit_binary_expression (binary_expression* e)
3229 {
3230   // When the result of a binary operation isn't needed, it's just as good to
3231   // evaluate the operands as sequential statements in a block.
3232
3233   if (session.verbose>2)
3234     clog << _("Eliding unused binary ") << *e->tok << endl;
3235
3236   block *b = new block;
3237   b->tok = e->tok;
3238
3239   expr_statement *es = new expr_statement;
3240   es->value = e->left;
3241   es->tok = es->value->tok;
3242   b->statements.push_back(es);
3243
3244   es = new expr_statement;
3245   es->value = e->right;
3246   es->tok = es->value->tok;
3247   b->statements.push_back(es);
3248
3249   b->visit(this);
3250   relaxed_p = false;
3251   e = 0;
3252   provide (e);
3253 }
3254
3255 void
3256 void_statement_reducer::visit_unary_expression (unary_expression* e)
3257 {
3258   // When the result of a unary operation isn't needed, it's just as good to
3259   // evaluate the operand directly
3260
3261   if (session.verbose>2)
3262     clog << _("Eliding unused unary ") << *e->tok << endl;
3263
3264   relaxed_p = false;
3265   e->operand->visit(this);
3266 }
3267
3268 void
3269 void_statement_reducer::visit_regex_query (regex_query* e)
3270 {
3271   // TODOXXX After subexpression extraction is implemented,
3272   // regular expression matches *may* have side-effects in
3273   // terms of producing matched subexpressions, e.g.:
3274   //
3275   //   str =~ "pat"; println(matched(0));
3276   //
3277   // It's debatable if we want to actually allow this, though.
3278
3279   // Treat e as a unary expression on the left operand -- since the
3280   // right hand side must be a literal (as verified by the parser),
3281   // evaluating it never has side effects.
3282
3283   if (session.verbose>2)
3284     clog << _("Eliding regex query ") << *e->tok << endl;
3285
3286   relaxed_p = false;
3287   e->left->visit(this);
3288 }
3289
3290 void
3291 void_statement_reducer::visit_comparison (comparison* e)
3292 {
3293   visit_binary_expression(e);
3294 }
3295
3296 void
3297 void_statement_reducer::visit_concatenation (concatenation* e)
3298 {
3299   visit_binary_expression(e);
3300 }
3301
3302 void
3303 void_statement_reducer::visit_functioncall (functioncall* e)
3304 {
3305   // If a function call is pure and its result ignored, we can elide the call
3306   // and just evaluate the arguments in sequence
3307
3308   if (!e->args.size())
3309     {
3310       provide (e);
3311       return;
3312     }
3313
3314   varuse_collecting_visitor vut(session);
3315   vut.seen.insert (e->referent);
3316   vut.current_function = e->referent;
3317   e->referent->body->visit (& vut);
3318   if (!vut.side_effect_free_wrt (focal_vars))
3319     {
3320       provide (e);
3321       return;
3322     }
3323
3324   if (session.verbose>2)
3325     clog << _("Eliding side-effect-free function call ") << *e->tok << endl;
3326
3327   block *b = new block;
3328   b->tok = e->tok;
3329
3330   for (unsigned i=0; i<e->args.size(); i++ )
3331     {
3332       expr_statement *es = new expr_statement;
3333       es->value = e->args[i];
3334       es->tok = es->value->tok;
3335       b->statements.push_back(es);
3336     }
3337
3338   b->visit(this);
3339   relaxed_p = false;
3340   e = 0;
3341   provide (e);
3342 }
3343
3344 void
3345 void_statement_reducer::visit_print_format (print_format* e)
3346 {
3347   // When an sprint's return value is ignored, we can simply evaluate the
3348   // arguments in sequence
3349
3350   if (e->print_to_stream || !e->args.size())
3351     {
3352       provide (e);
3353       return;
3354     }
3355
3356   if (session.verbose>2)
3357     clog << _("Eliding unused print ") << *e->tok << endl;
3358
3359   block *b = new block;
3360   b->tok = e->tok;
3361
3362   for (unsigned i=0; i<e->args.size(); i++ )
3363     {
3364       expr_statement *es = new expr_statement;
3365       es->value = e->args[i];
3366       es->tok = es->value->tok;
3367       b->statements.push_back(es);
3368     }
3369
3370   b->visit(this);
3371   relaxed_p = false;
3372   e = 0;
3373   provide (e);
3374 }
3375
3376 void
3377 void_statement_reducer::reduce_target_symbol (target_symbol* e,
3378                                               expression* operand)
3379 {
3380   // When the result of any target_symbol isn't needed, it's just as good to
3381   // evaluate the operand and any array indexes directly
3382
3383   block *b = new block;
3384   b->tok = e->tok;
3385
3386   if (operand)
3387     {
3388       expr_statement *es = new expr_statement;
3389       es->value = operand;
3390       es->tok = es->value->tok;
3391       b->statements.push_back(es);
3392     }
3393
3394   for (unsigned i=0; i<e->components.size(); i++ )
3395     {
3396       if (e->components[i].type != target_symbol::comp_expression_array_index)
3397         continue;
3398
3399       expr_statement *es = new expr_statement;
3400       es->value = e->components[i].expr_index;
3401       es->tok = es->value->tok;
3402       b->statements.push_back(es);
3403     }
3404
3405   b->visit(this);
3406   relaxed_p = false;
3407   e = 0;
3408   provide (e);
3409 }
3410
3411 void
3412 void_statement_reducer::visit_atvar_op (atvar_op* e)
3413 {
3414   if (session.verbose>2)
3415     clog << _("Eliding unused target symbol ") << *e->tok << endl;
3416   reduce_target_symbol (e);
3417 }
3418
3419 void
3420 void_statement_reducer::visit_target_symbol (target_symbol* e)
3421 {
3422   if (session.verbose>2)
3423     clog << _("Eliding unused target symbol ") << *e->tok << endl;
3424   reduce_target_symbol (e);
3425 }
3426
3427 void
3428 void_statement_reducer::visit_cast_op (cast_op* e)
3429 {
3430   if (session.verbose>2)
3431     clog << _("Eliding unused typecast ") << *e->tok << endl;
3432   reduce_target_symbol (e, e->operand);
3433 }
3434
3435 void
3436 void_statement_reducer::visit_autocast_op (autocast_op* e)
3437 {
3438   if (session.verbose>2)
3439     clog << _("Eliding unused autocast ") << *e->tok << endl;
3440   reduce_target_symbol (e, e->operand);
3441 }
3442
3443
3444 void
3445 void_statement_reducer::visit_defined_op (defined_op* e)
3446 {
3447   // When the result of a @defined operation isn't needed, just elide
3448   // it entirely.  Its operand $expression must already be
3449   // side-effect-free.
3450
3451   if (session.verbose>2)
3452     clog << _("Eliding unused check ") << *e->tok << endl;
3453
3454   relaxed_p = false;
3455   e = 0;
3456   provide (e);
3457 }
3458
3459
3460
3461 void semantic_pass_opt5 (systemtap_session& s, bool& relaxed_p)
3462 {
3463   // Let's simplify statements with unused computed values.
3464
3465   void_statement_reducer vuv (s, relaxed_p);
3466   // This instance may be reused for multiple probe/function body trims.
3467
3468   vuv.focal_vars.insert (s.globals.begin(), s.globals.end());
3469
3470   for (unsigned i=0; i<s.probes.size(); i++)
3471     vuv.replace (s.probes[i]->body);
3472   for (map<string,functiondecl*>::iterator it = s.functions.begin();
3473        it != s.functions.end(); it++)
3474     vuv.replace (it->second->body);
3475 }
3476
3477
3478 struct const_folder: public update_visitor
3479 {
3480   systemtap_session& session;
3481   bool& relaxed_p;
3482
3483   const_folder(systemtap_session& s, bool& r):
3484     session(s), relaxed_p(r), last_number(0), last_string(0) {}
3485
3486   literal_number* last_number;
3487   literal_number* get_number(expression*& e);
3488   void visit_literal_number (literal_number* e);
3489
3490   literal_string* last_string;
3491   literal_string* get_string(expression*& e);
3492   void visit_literal_string (literal_string* e);
3493
3494   void get_literal(expression*& e, literal_number*& n, literal_string*& s);
3495
3496   void visit_if_statement (if_statement* s);
3497   void visit_for_loop (for_loop* s);
3498   void visit_foreach_loop (foreach_loop* s);
3499   void visit_binary_expression (binary_expression* e);
3500   void visit_unary_expression (unary_expression* e);
3501   void visit_logical_or_expr (logical_or_expr* e);
3502   void visit_logical_and_expr (logical_and_expr* e);
3503   // void visit_regex_query (regex_query* e); // XXX: would require executing dfa at compile-time
3504   void visit_comparison (comparison* e);
3505   void visit_concatenation (concatenation* e);
3506   void visit_ternary_expression (ternary_expression* e);
3507   void visit_defined_op (defined_op* e);
3508   void visit_target_symbol (target_symbol* e);
3509 };
3510
3511 void
3512 const_folder::get_literal(expression*& e,
3513                           literal_number*& n,
3514                           literal_string*& s)
3515 {
3516   replace (e);
3517   n = (e == last_number) ? last_number : NULL;
3518   s = (e == last_string) ? last_string : NULL;
3519 }
3520
3521 literal_number*
3522 const_folder::get_number(expression*& e)
3523 {
3524   replace (e);
3525   return (e == last_number) ? last_number : NULL;
3526 }
3527
3528 void
3529 const_folder::visit_literal_number (literal_number* e)
3530 {
3531   last_number = e;
3532   provide (e);
3533 }
3534
3535 literal_string*
3536 const_folder::get_string(expression*& e)
3537 {
3538   replace (e);
3539   return (e == last_string) ? last_string : NULL;
3540 }
3541
3542 void
3543 const_folder::visit_literal_string (literal_string* e)
3544 {
3545   last_string = e;
3546   provide (e);
3547 }
3548
3549 void
3550 const_folder::visit_if_statement (if_statement* s)
3551 {
3552   literal_number* cond = get_number (s->condition);
3553   if (!cond)
3554     {
3555       replace (s->thenblock);
3556       replace (s->elseblock);
3557       provide (s);
3558     }
3559   else
3560     {
3561       if (session.verbose>2)
3562         clog << _F("Collapsing constant-%" PRIi64 " if-statement %s",
3563                    cond->value, lex_cast(*s->tok).c_str()) << endl;
3564       relaxed_p = false;
3565
3566       statement* n = cond->value ? s->thenblock : s->elseblock;
3567       if (n)
3568         n->visit (this);
3569       else
3570         provide (new null_statement (s->tok));
3571     }
3572 }
3573
3574 void
3575 const_folder::visit_for_loop (for_loop* s)
3576 {
3577   literal_number* cond = get_number (s->cond);
3578   if (!cond || cond->value)
3579     {
3580       replace (s->init);
3581       replace (s->incr);
3582       replace (s->block);
3583       provide (s);
3584     }
3585   else
3586     {
3587       if (session.verbose>2)
3588         clog << _("Collapsing constantly-false for-loop ") << *s->tok << endl;
3589       relaxed_p = false;
3590
3591       if (s->init)
3592         s->init->visit (this);
3593       else
3594         provide (new null_statement (s->tok));
3595     }
3596 }
3597
3598 void
3599 const_folder::visit_foreach_loop (foreach_loop* s)
3600 {
3601   literal_number* limit = get_number (s->limit);
3602   if (!limit || limit->value > 0)
3603     {
3604       for (unsigned i = 0; i < s->indexes.size(); ++i)
3605         replace (s->indexes[i]);
3606       replace (s->base);
3607       replace (s->value);
3608       replace (s->block);
3609       provide (s);
3610     }
3611   else
3612     {
3613       if (session.verbose>2)
3614         clog << _("Collapsing constantly-limited foreach-loop ") << *s->tok << endl;
3615       relaxed_p = false;
3616
3617       provide (new null_statement (s->tok));
3618     }
3619 }
3620
3621 void
3622 const_folder::visit_binary_expression (binary_expression* e)
3623 {
3624   int64_t value;
3625   literal_number* left = get_number (e->left);
3626   literal_number* right = get_number (e->right);
3627
3628   if (right && !right->value && (e->op == "/" || e->op == "%"))
3629     {
3630       // Give divide-by-zero a chance to be optimized out elsewhere,
3631       // and if not it will be a runtime error anyway...
3632       provide (e);
3633       return;
3634     }
3635
3636   if (left && right)
3637     {
3638       if (e->op == "+")
3639         value = left->value + right->value;
3640       else if (e->op == "-")
3641         value = left->value - right->value;
3642       else if (e->op == "*")
3643         value = left->value * right->value;
3644       else if (e->op == "&")
3645         value = left->value & right->value;
3646       else if (e->op == "|")
3647         value = left->value | right->value;
3648       else if (e->op == "^")
3649         value = left->value ^ right->value;
3650       else if (e->op == ">>")
3651         value = left->value >> max(min(right->value, (int64_t)64), (int64_t)0);
3652       else if (e->op == "<<")
3653         value = left->value << max(min(right->value, (int64_t)64), (int64_t)0);
3654       else if (e->op == "/")
3655         value = (left->value == LLONG_MIN && right->value == -1) ? LLONG_MIN :
3656                 left->value / right->value;
3657       else if (e->op == "%")
3658         value = (left->value == LLONG_MIN && right->value == -1) ? 0 :
3659                 left->value % right->value;
3660       else
3661         throw SEMANTIC_ERROR (_("unsupported binary operator ") + e->op);
3662     }
3663
3664   else if ((left && ((left->value == 0 && (e->op == "*" || e->op == "&" ||
3665                                            e->op == ">>" || e->op == "<<" )) ||
3666                      (left->value ==-1 && (e->op == "|" || e->op == ">>"))))
3667            ||
3668            (right && ((right->value == 0 && (e->op == "*" || e->op == "&")) ||
3669                       (right->value == 1 && (e->op == "%")) ||
3670                       (right->value ==-1 && (e->op == "%" || e->op == "|")))))
3671     {
3672       expression* other = left ? e->right : e->left;
3673       varuse_collecting_visitor vu(session);
3674       other->visit(&vu);
3675       if (!vu.side_effect_free())
3676         {
3677           provide (e);
3678           return;
3679         }
3680
3681       if (left)
3682         value = left->value;
3683       else if (e->op == "%")
3684         value = 0;
3685       else
3686         value = right->value;
3687     }
3688
3689   else if ((left && ((left->value == 0 && (e->op == "+" || e->op == "|" ||
3690                                            e->op == "^")) ||
3691                      (left->value == 1 && (e->op == "*")) ||
3692                      (left->value ==-1 && (e->op == "&"))))
3693            ||
3694            (right && ((right->value == 0 && (e->op == "+" || e->op == "-" ||
3695                                              e->op == "|" || e->op == "^")) ||
3696                       (right->value == 1 && (e->op == "*" || e->op == "/")) ||
3697                       (right->value ==-1 && (e->op == "&")) ||
3698                       (right->value <= 0 && (e->op == ">>" || e->op == "<<")))))
3699     {
3700       if (session.verbose>2)
3701         clog << _("Collapsing constant-identity binary operator ") << *e->tok << endl;
3702       relaxed_p = false;
3703
3704       provide (left ? e->right : e->left);
3705       return;
3706     }
3707
3708   else
3709     {
3710       provide (e);
3711       return;
3712     }
3713
3714   if (session.verbose>2)
3715     clog << _F("Collapsing constant-%" PRIi64 " binary operator %s",
3716                value, lex_cast(*e->tok).c_str()) << endl;
3717   relaxed_p = false;
3718
3719   literal_number* n = new literal_number(value);
3720   n->tok = e->tok;
3721   n->visit (this);
3722 }
3723
3724 void
3725 const_folder::visit_unary_expression (unary_expression* e)
3726 {
3727   literal_number* operand = get_number (e->operand);
3728   if (!operand)
3729     provide (e);
3730   else
3731     {
3732       if (session.verbose>2)
3733         clog << _("Collapsing constant unary ") << *e->tok << endl;
3734       relaxed_p = false;
3735
3736       literal_number* n = new literal_number (*operand);
3737       n->tok = e->tok;
3738       if (e->op == "+")
3739         ; // nothing to do
3740       else if (e->op == "-")
3741         n->value = -n->value;
3742       else if (e->op == "!")
3743         n->value = !n->value;
3744       else if (e->op == "~")
3745         n->value = ~n->value;
3746       else
3747         throw SEMANTIC_ERROR (_("unsupported unary operator ") + e->op);
3748       n->visit (this);
3749     }
3750 }
3751
3752 void
3753 const_folder::visit_logical_or_expr (logical_or_expr* e)
3754 {
3755   int64_t value;
3756   literal_number* left = get_number (e->left);
3757   literal_number* right = get_number (e->right);
3758
3759   if (left && right)
3760     value = left->value || right->value;
3761
3762   else if ((left && left->value) || (right && right->value))
3763     {
3764       // If the const is on the left, we get to short-circuit the right
3765       // immediately.  Otherwise, we can only eliminate the LHS if it's pure.
3766       if (right)
3767         {
3768           varuse_collecting_visitor vu(session);
3769           e->left->visit(&vu);
3770           if (!vu.side_effect_free())
3771             {
3772               provide (e);
3773               return;
3774             }
3775         }
3776
3777       value = 1;
3778     }
3779
3780   // We might also get rid of useless "0||x" and "x||0", except it does
3781   // normalize x to 0 or 1.  We could change it to "!!x", but it's not clear
3782   // that this would gain us much.
3783
3784   else
3785     {
3786       provide (e);
3787       return;
3788     }
3789
3790   if (session.verbose>2)
3791     clog << _("Collapsing constant logical-OR ") << *e->tok << endl;
3792   relaxed_p = false;
3793
3794   literal_number* n = new literal_number(value);
3795   n->tok = e->tok;
3796   n->visit (this);
3797 }
3798
3799 void
3800 const_folder::visit_logical_and_expr (logical_and_expr* e)
3801 {
3802   int64_t value;
3803   literal_number* left = get_number (e->left);
3804   literal_number* right = get_number (e->right);
3805
3806   if (left && right)
3807     value = left->value && right->value;
3808
3809   else if ((left && !left->value) || (right && !right->value))
3810     {
3811       // If the const is on the left, we get to short-circuit the right
3812       // immediately.  Otherwise, we can only eliminate the LHS if it's pure.
3813       if (right)
3814         {
3815           varuse_collecting_visitor vu(session);
3816           e->left->visit(&vu);
3817           if (!vu.side_effect_free())
3818             {
3819               provide (e);
3820               return;
3821             }
3822         }
3823
3824       value = 0;
3825     }
3826
3827   // We might also get rid of useless "1&&x" and "x&&1", except it does
3828   // normalize x to 0 or 1.  We could change it to "!!x", but it's not clear
3829   // that this would gain us much.
3830
3831   else
3832     {
3833       provide (e);
3834       return;
3835     }
3836
3837   if (session.verbose>2)
3838     clog << _("Collapsing constant logical-AND ") << *e->tok << endl;
3839   relaxed_p = false;
3840
3841   literal_number* n = new literal_number(value);
3842   n->tok = e->tok;
3843   n->visit (this);
3844 }
3845
3846 void
3847 const_folder::visit_comparison (comparison* e)
3848 {
3849   int comp;
3850
3851   literal_number *left_num, *right_num;
3852   literal_string *left_str, *right_str;
3853   get_literal(e->left, left_num, left_str);
3854   get_literal(e->right, right_num, right_str);
3855
3856   if (left_str && right_str)
3857     comp = left_str->value.compare(right_str->value);
3858
3859   else if (left_num && right_num)
3860     comp = left_num->value < right_num->value ? -1 :
3861            left_num->value > right_num->value ? 1 : 0;
3862
3863   else if ((left_num && ((left_num->value == LLONG_MIN &&
3864                           (e->op == "<=" || e->op == ">")) ||
3865                          (left_num->value == LLONG_MAX &&
3866                           (e->op == ">=" || e->op == "<"))))
3867            ||
3868            (right_num && ((right_num->value == LLONG_MIN &&
3869                             (e->op == ">=" || e->op == "<")) ||
3870                            (right_num->value == LLONG_MAX &&
3871                             (e->op == "<=" || e->op == ">")))))
3872     {
3873       expression* other = left_num ? e->right : e->left;
3874       varuse_collecting_visitor vu(session);
3875       other->visit(&vu);
3876       if (!vu.side_effect_free())
3877         provide (e);
3878       else
3879         {
3880           if (session.verbose>2)
3881             clog << _("Collapsing constant-boundary comparison ") << *e->tok << endl;
3882           relaxed_p = false;
3883
3884           // ops <= and >= are true, < and > are false
3885           literal_number* n = new literal_number( e->op.length() == 2 );
3886           n->tok = e->tok;
3887           n->visit (this);
3888         }
3889       return;
3890     }
3891
3892   else
3893     {
3894       provide (e);
3895       return;
3896     }
3897
3898   if (session.verbose>2)
3899     clog << _("Collapsing constant comparison ") << *e->tok << endl;
3900   relaxed_p = false;
3901
3902   int64_t value;
3903   if (e->op == "==")
3904     value = comp == 0;
3905   else if (e->op == "!=")
3906     value = comp != 0;
3907   else if (e->op == "<")
3908     value = comp < 0;
3909   else if (e->op == ">")
3910     value = comp > 0;
3911   else if (e->op == "<=")
3912     value = comp <= 0;
3913   else if (e->op == ">=")
3914     value = comp >= 0;
3915   else
3916     throw SEMANTIC_ERROR (_("unsupported comparison operator ") + e->op);
3917
3918   literal_number* n = new literal_number(value);
3919   n->tok = e->tok;
3920   n->visit (this);
3921 }
3922
3923 void
3924 const_folder::visit_concatenation (concatenation* e)
3925 {
3926   literal_string* left = get_string (e->left);
3927   literal_string* right = get_string (e->right);
3928
3929   if (left && right)
3930     {
3931       if (session.verbose>2)
3932         clog << _("Collapsing constant concatenation ") << *e->tok << endl;
3933       relaxed_p = false;
3934
3935       literal_string* n = new literal_string (*left);
3936       n->tok = e->tok;
3937       n->value.append(right->value);
3938       n->visit (this);
3939     }
3940   else if ((left && left->value.empty()) ||
3941            (right && right->value.empty()))
3942     {
3943       if (session.verbose>2)
3944         clog << _("Collapsing identity concatenation ") << *e->tok << endl;
3945       relaxed_p = false;
3946       provide(left ? e->right : e->left);
3947     }
3948   else
3949     provide (e);
3950 }
3951
3952 void
3953 const_folder::visit_ternary_expression (ternary_expression* e)
3954 {
3955   literal_number* cond = get_number (e->cond);
3956   if (!cond)
3957     {
3958       replace (e->truevalue);
3959       replace (e->falsevalue);
3960       provide (e);
3961     }
3962   else
3963     {
3964       if (session.verbose>2)
3965         clog << _F("Collapsing constant-%" PRIi64 " ternary %s",
3966                    cond->value, lex_cast(*e->tok).c_str()) << endl;
3967       relaxed_p = false;
3968
3969       expression* n = cond->value ? e->truevalue : e->falsevalue;
3970       n->visit (this);
3971     }
3972 }
3973
3974 void
3975 const_folder::visit_defined_op (defined_op* e)
3976 {
3977   // If a @defined makes it this far, then it is, de facto, undefined.
3978
3979   if (session.verbose>2)
3980     clog << _("Collapsing untouched @defined check ") << *e->tok << endl;
3981   relaxed_p = false;
3982
3983   literal_number* n = new literal_number (0);
3984   n->tok = e->tok;
3985   n->visit (this);
3986 }
3987
3988 void
3989 const_folder::visit_target_symbol (target_symbol* e)
3990 {
3991   if (session.skip_badvars)
3992     {
3993       // Upon user request for ignoring context, the symbol is replaced
3994       // with a literal 0 and a warning message displayed
3995       // XXX this ignores possible side-effects, e.g. in array indexes
3996       literal_number* ln_zero = new literal_number (0);
3997       ln_zero->tok = e->tok;
3998       provide (ln_zero);
3999       session.print_warning (_("Bad $context variable being substituted with literal 0"),
4000                                e->tok);
4001       relaxed_p = false;
4002     }
4003   else
4004     update_visitor::visit_target_symbol (e);
4005 }
4006
4007 static void semantic_pass_const_fold (systemtap_session& s, bool& relaxed_p)
4008 {
4009   // Let's simplify statements with constant values.
4010
4011   const_folder cf (s, relaxed_p);
4012   // This instance may be reused for multiple probe/function body trims.
4013
4014   for (unsigned i=0; i<s.probes.size(); i++)
4015     cf.replace (s.probes[i]->body);
4016   for (map<string,functiondecl*>::iterator it = s.functions.begin();
4017        it != s.functions.end(); it++)
4018     cf.replace (it->second->body);
4019 }
4020
4021
4022 struct dead_control_remover: public traversing_visitor
4023 {
4024   systemtap_session& session;
4025   bool& relaxed_p;
4026   statement* control;
4027
4028   dead_control_remover(systemtap_session& s, bool& r):
4029     session(s), relaxed_p(r), control(NULL) {}
4030
4031   void visit_block (block *b);
4032
4033   // When a block contains any of these, the following statements are dead.
4034   void visit_return_statement (return_statement* s) { control = s; }
4035   void visit_next_statement (next_statement* s) { control = s; }
4036   void visit_break_statement (break_statement* s) { control = s; }
4037   void visit_continue_statement (continue_statement* s) { control = s; }
4038 };
4039
4040
4041 void dead_control_remover::visit_block (block* b)
4042 {
4043   vector<statement*>& vs = b->statements;
4044   if (vs.size() == 0) /* else (size_t) size()-1 => very big */
4045     return;
4046   for (size_t i = 0; i < vs.size() - 1; ++i)
4047     {
4048       vs[i]->visit (this);
4049       if (vs[i] == control)
4050         {
4051           session.print_warning(_("statement will never be reached"),
4052                                 vs[i + 1]->tok);
4053           vs.erase(vs.begin() + i + 1, vs.end());
4054           relaxed_p = false;
4055           break;
4056         }
4057     }
4058 }
4059
4060
4061 static void semantic_pass_dead_control (systemtap_session& s, bool& relaxed_p)
4062 {
4063   // Let's remove code that follow unconditional control statements
4064
4065   dead_control_remover dc (s, relaxed_p);
4066
4067   for (unsigned i=0; i<s.probes.size(); i++)
4068     s.probes[i]->body->visit(&dc);
4069
4070   for (map<string,functiondecl*>::iterator it = s.functions.begin();
4071        it != s.functions.end(); it++)
4072     it->second->body->visit(&dc);
4073 }
4074
4075
4076 struct duplicate_function_remover: public functioncall_traversing_visitor
4077 {
4078   systemtap_session& s;
4079   map<functiondecl*, functiondecl*>& duplicate_function_map;
4080
4081   duplicate_function_remover(systemtap_session& sess,
4082                              map<functiondecl*, functiondecl*>&dfm):
4083     s(sess), duplicate_function_map(dfm) {};
4084
4085   void visit_functioncall (functioncall* e);
4086 };
4087
4088 void
4089 duplicate_function_remover::visit_functioncall (functioncall *e)
4090 {
4091   functioncall_traversing_visitor::visit_functioncall (e);
4092
4093   // If the current function call reference points to a function that
4094   // is a duplicate, replace it.
4095   if (duplicate_function_map.count(e->referent) != 0)
4096     {
4097       if (s.verbose>2)
4098           clog << _F("Changing %s reference to %s reference\n",
4099                      e->referent->name.c_str(), duplicate_function_map[e->referent]->name.c_str());
4100       e->tok = duplicate_function_map[e->referent]->tok;
4101       e->function = duplicate_function_map[e->referent]->name;
4102       e->referent = duplicate_function_map[e->referent];
4103     }
4104 }
4105
4106 static string
4107 get_functionsig (functiondecl* f)
4108 {
4109   ostringstream s;
4110
4111   // Get the "name:args body" of the function in s.  We have to
4112   // include the args since the function 'x1(a, b)' is different than
4113   // the function 'x2(b, a)' even if the bodies of the two functions
4114   // are exactly the same.
4115   f->printsig(s);
4116   f->body->print(s);
4117
4118   // printsig puts f->name + ':' on the front.  Remove this
4119   // (otherwise, functions would never compare equal).
4120   string str = s.str().erase(0, f->name.size() + 1);
4121
4122   // Return the function signature.
4123   return str;
4124 }
4125
4126 void semantic_pass_opt6 (systemtap_session& s, bool& relaxed_p)
4127 {
4128   // Walk through all the functions, looking for duplicates.
4129   map<string, functiondecl*> functionsig_map;
4130   map<functiondecl*, functiondecl*> duplicate_function_map;
4131
4132
4133   vector<functiondecl*> newly_zapped_functions;
4134   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
4135     {
4136       functiondecl *fd = it->second;
4137       string functionsig = get_functionsig(fd);
4138
4139       if (functionsig_map.count(functionsig) == 0)
4140         {
4141           // This function is unique.  Remember it.
4142           functionsig_map[functionsig] = fd;
4143         }
4144       else
4145         {
4146           // This function is a duplicate.
4147           duplicate_function_map[fd] = functionsig_map[functionsig];
4148           newly_zapped_functions.push_back (fd);
4149           relaxed_p = false;
4150         }
4151     }
4152   for (unsigned i=0; i<newly_zapped_functions.size(); i++)
4153     {
4154       map<string,functiondecl*>::iterator where = s.functions.find (newly_zapped_functions[i]->name);
4155       assert (where != s.functions.end());
4156       s.functions.erase (where);
4157     }
4158
4159
4160   // If we have duplicate functions, traverse down the tree, replacing
4161   // the appropriate function calls.
4162   // duplicate_function_remover::visit_functioncall() handles the
4163   // details of replacing the function calls.
4164   if (duplicate_function_map.size() != 0)
4165     {
4166       duplicate_function_remover dfr (s, duplicate_function_map);
4167
4168       for (unsigned i=0; i < s.probes.size(); i++)
4169         s.probes[i]->body->visit(&dfr);
4170     }
4171 }
4172
4173
4174 static int
4175 semantic_pass_optimize1 (systemtap_session& s)
4176 {
4177   // In this pass, we attempt to rewrite probe/function bodies to
4178   // eliminate some blatantly unnecessary code.  This is run before
4179   // type inference, but after symbol resolution and derived_probe
4180   // creation.  We run an outer "relaxation" loop that repeats the
4181   // optimizations until none of them find anything to remove.
4182
4183   int rc = 0;
4184
4185   // Save the old value of suppress_warnings, as we will be changing
4186   // it below.
4187   save_and_restore<bool> suppress_warnings(& s.suppress_warnings);
4188
4189   bool relaxed_p = false;
4190   unsigned iterations = 0;
4191   while (! relaxed_p)
4192     {
4193       assert_no_interrupts();
4194
4195       relaxed_p = true; // until proven otherwise
4196
4197       // If the verbosity is high enough, always print warnings (overrides -w),
4198       // or if not, always suppress warnings for every itteration after the first.
4199       if(s.verbose > 2)
4200         s.suppress_warnings = false;
4201       else if (iterations > 0)
4202         s.suppress_warnings = true;
4203
4204       if (!s.unoptimized)
4205         {
4206           semantic_pass_opt1 (s, relaxed_p);
4207           semantic_pass_opt2 (s, relaxed_p, iterations); // produce some warnings only on iteration=0
4208           semantic_pass_opt3 (s, relaxed_p);
4209           semantic_pass_opt4 (s, relaxed_p);
4210           semantic_pass_opt5 (s, relaxed_p);
4211         }
4212
4213       // For listing mode, we need const-folding regardless of optimization so
4214       // that @defined expressions can be properly resolved.  PR11360
4215       // We also want it in case variables are used in if/case expressions,
4216       // so enable always.  PR11366
4217       semantic_pass_const_fold (s, relaxed_p);
4218
4219       if (!s.unoptimized)
4220         semantic_pass_dead_control (s, relaxed_p);
4221
4222       iterations ++;
4223     }
4224
4225   return rc;
4226 }
4227
4228
4229 static int
4230 semantic_pass_optimize2 (systemtap_session& s)
4231 {
4232   // This is run after type inference.  We run an outer "relaxation"
4233   // loop that repeats the optimizations until none of them find
4234   // anything to remove.
4235
4236   int rc = 0;
4237
4238   // Save the old value of suppress_warnings, as we will be changing
4239   // it below.
4240   save_and_restore<bool> suppress_warnings(& s.suppress_warnings);
4241
4242   bool relaxed_p = false;
4243   unsigned iterations = 0;
4244   while (! relaxed_p)
4245     {
4246       assert_no_interrupts();
4247       relaxed_p = true; // until proven otherwise
4248
4249       // If the verbosity is high enough, always print warnings (overrides -w),
4250       // or if not, always suppress warnings for every itteration after the first.
4251       if(s.verbose > 2)
4252         s.suppress_warnings = false;
4253       else if (iterations > 0)
4254         s.suppress_warnings = true;
4255
4256       if (!s.unoptimized)
4257         semantic_pass_opt6 (s, relaxed_p);
4258
4259       iterations++;
4260     }
4261
4262   return rc;
4263 }
4264
4265
4266
4267 // ------------------------------------------------------------------------
4268 // type resolution
4269
4270 struct autocast_expanding_visitor: public var_expanding_visitor
4271 {
4272   typeresolution_info& ti;
4273   autocast_expanding_visitor (typeresolution_info& ti): ti(ti) {}
4274
4275   void resolve_functioncall (functioncall* fc)
4276     {
4277       // This is a very limited version of semantic_pass_symbols, but we're
4278       // late in the game at this point.  We won't get a chance to optimize,
4279       // but for now the only functions we expect are kernel/user_string from
4280       // pretty-printing, which don't need optimization.
4281
4282       systemtap_session& s = ti.session;
4283       size_t nfiles = s.files.size();
4284
4285       symresolution_info sym (s);
4286       sym.current_function = ti.current_function;
4287       sym.current_probe = ti.current_probe;
4288       fc->visit (&sym);
4289
4290       // NB: synthetic functions get tacked onto the origin file, so we won't
4291       // see them growing s.files[].  Traverse it directly.
4292       if (fc->referent)
4293         {
4294           functiondecl* fd = fc->referent;
4295           sym.current_function = fd;
4296           sym.current_probe = 0;
4297           fd->body->visit (&sym);
4298         }
4299
4300       while (nfiles < s.files.size())
4301         {
4302           stapfile* dome = s.files[nfiles++];
4303           for (size_t i = 0; i < dome->functions.size(); ++i)
4304             {
4305               functiondecl* fd = dome->functions[i];
4306               sym.current_function = fd;
4307               sym.current_probe = 0;
4308               fd->body->visit (&sym);
4309               // NB: not adding to s.functions just yet...
4310             }
4311         }
4312
4313       // Add only the direct functions we need.
4314       functioncall_traversing_visitor ftv;
4315       fc->visit (&ftv);
4316       for (set<functiondecl*>::iterator it = ftv.seen.begin();
4317            it != ftv.seen.end(); ++it)
4318         {
4319           functiondecl* fd = *it;
4320           pair<map<string,functiondecl*>::iterator,bool> inserted =
4321             s.functions.insert (make_pair (fd->name, fd));
4322           if (!inserted.second && inserted.first->second != fd)
4323             throw SEMANTIC_ERROR
4324               (_F("resolved function '%s' conflicts with an existing function",
4325                   fd->name.c_str()), fc->tok);
4326         }
4327     }
4328
4329   void visit_autocast_op (autocast_op* e)
4330     {
4331       const bool lvalue = is_active_lvalue (e);
4332       const exp_type_ptr& details = e->operand->type_details;
4333       if (details && !e->saved_conversion_error)
4334         {
4335           functioncall* fc = details->expand (e, lvalue);
4336           if (fc)
4337             {
4338               ti.num_newly_resolved++;
4339
4340               resolve_functioncall (fc);
4341
4342               if (lvalue)
4343                 provide_lvalue_call (fc);
4344
4345               fc->visit (this);
4346               return;
4347             }
4348         }
4349       var_expanding_visitor::visit_autocast_op (e);
4350     }
4351 };
4352
4353 static int
4354 semantic_pass_types (systemtap_session& s)
4355 {
4356   int rc = 0;
4357
4358   // next pass: type inference
4359   unsigned iterations = 0;
4360   typeresolution_info ti (s);
4361
4362   // Globals never have detailed types.
4363   // If we null them now, then all remaining vardecls can be detailed.
4364   for (unsigned j=0; j<s.globals.size(); j++)
4365     {
4366       vardecl* gd = s.globals[j];
4367       if (!gd->type_details)
4368         gd->type_details = ti.null_type;
4369     }
4370
4371   ti.assert_resolvability = false;
4372   // XXX: maybe convert to exception-based error signalling
4373   while (1)
4374     {
4375       assert_no_interrupts();
4376
4377       iterations ++;
4378       ti.num_newly_resolved = 0;
4379       ti.num_still_unresolved = 0;
4380       ti.num_available_autocasts = 0;
4381
4382       for (map<string,functiondecl*>::iterator it = s.functions.begin();
4383                                                it != s.functions.end(); it++)
4384         {
4385           assert_no_interrupts();
4386
4387           functiondecl* fd = it->second;
4388           ti.current_probe = 0;
4389           ti.current_function = fd;
4390           ti.t = pe_unknown;
4391           fd->body->visit (& ti);
4392           // NB: we don't have to assert a known type for
4393           // functions here, to permit a "void" function.
4394           // The translator phase will omit the "retvalue".
4395           //
4396           // if (fd->type == pe_unknown)
4397           //   ti.unresolved (fd->tok);
4398           for (unsigned i=0; i < fd->locals.size(); ++i)
4399             ti.check_local (fd->locals[i]);
4400
4401           // Check and run the autocast expanding visitor.
4402           if (ti.num_available_autocasts > 0)
4403             {
4404               autocast_expanding_visitor aev (ti);
4405               aev.replace (fd->body);
4406               ti.num_available_autocasts = 0;
4407             }
4408         }
4409
4410       for (unsigned j=0; j<s.probes.size(); j++)
4411         {
4412           assert_no_interrupts();
4413
4414           derived_probe* pn = s.probes[j];
4415           ti.current_function = 0;
4416           ti.current_probe = pn;
4417           ti.t = pe_unknown;
4418           pn->body->visit (& ti);
4419           for (unsigned i=0; i < pn->locals.size(); ++i)
4420             ti.check_local (pn->locals[i]);
4421
4422           // Check and run the autocast expanding visitor.
4423           if (ti.num_available_autocasts > 0)
4424             {
4425               autocast_expanding_visitor aev (ti);
4426               aev.replace (pn->body);
4427               ti.num_available_autocasts = 0;
4428             }
4429
4430           probe_point* pp = pn->sole_location();
4431           if (pp->condition)
4432             {
4433               ti.current_function = 0;
4434               ti.current_probe = 0;
4435               ti.t = pe_long; // NB: expected type
4436               pp->condition->visit (& ti);
4437             }
4438         }
4439
4440       for (unsigned j=0; j<s.globals.size(); j++)
4441         {
4442           vardecl* gd = s.globals[j];
4443           if (gd->type == pe_unknown)
4444             ti.unresolved (gd->tok);
4445           if(gd->arity == 0 && gd->wrap == true)
4446             {
4447               throw SEMANTIC_ERROR (_("wrapping not supported for scalars"), gd->tok);
4448             }
4449         }
4450
4451       if (ti.num_newly_resolved == 0) // converged
4452         {
4453           if (ti.num_still_unresolved == 0)
4454             break; // successfully
4455           else if (! ti.assert_resolvability)
4456             {
4457               ti.assert_resolvability = true; // last pass, with error msgs
4458               if (s.verbose > 0)
4459                 ti.mismatch_complexity = 0; // print every kind of mismatch
4460             }
4461           else
4462             { // unsuccessful conclusion
4463               rc ++;
4464               break;
4465             }
4466         }
4467       else
4468         ti.mismatch_complexity = 0; // reset for next pass
4469     }
4470
4471   return rc + s.num_errors();
4472 }
4473
4474
4475 struct exp_type_null : public exp_type_details
4476 {
4477   uintptr_t id () const { return 0; }
4478   bool expandable() const { return false; }
4479   functioncall *expand(autocast_op*, bool) { return NULL; }
4480 };
4481
4482 typeresolution_info::typeresolution_info (systemtap_session& s):
4483   session(s), num_newly_resolved(0), num_still_unresolved(0),
4484   assert_resolvability(false), mismatch_complexity(0),
4485   current_function(0), current_probe(0), t(pe_unknown),
4486   null_type(new exp_type_null())
4487 {
4488 }
4489
4490
4491 void
4492 typeresolution_info::visit_literal_number (literal_number* e)
4493 {
4494   assert (e->type == pe_long);
4495   if ((t == e->type) || (t == pe_unknown))
4496     return;
4497
4498   mismatch (e->tok, t, e->type);
4499 }
4500
4501
4502 void
4503 typeresolution_info::visit_literal_string (literal_string* e)
4504 {
4505   assert (e->type == pe_string);
4506   if ((t == e->type) || (t == pe_unknown))
4507     return;
4508
4509   mismatch (e->tok, t, e->type);
4510 }
4511
4512
4513 void
4514 typeresolution_info::visit_logical_or_expr (logical_or_expr *e)
4515 {
4516   visit_binary_expression (e);
4517 }
4518
4519
4520 void
4521 typeresolution_info::visit_logical_and_expr (logical_and_expr *e)
4522 {
4523   visit_binary_expression (e);
4524 }
4525
4526 void
4527 typeresolution_info::visit_regex_query (regex_query *e)
4528 {
4529   // NB: result of regex query is an integer!
4530   if (t == pe_stats || t == pe_string)
4531     invalid (e->tok, t);
4532
4533   t = pe_string;
4534   e->left->visit (this);
4535   t = pe_string;
4536   e->right->visit (this); // parser ensures this is a literal known at compile time
4537
4538   if (e->type == pe_unknown)
4539     {
4540       e->type = pe_long;
4541       resolved (e->tok, e->type);
4542     }
4543 }
4544
4545
4546 void
4547 typeresolution_info::visit_comparison (comparison *e)
4548 {
4549   // NB: result of any comparison is an integer!
4550   if (t == pe_stats || t == pe_string)
4551     invalid (e->tok, t);
4552
4553   t = (e->right->type != pe_unknown) ? e->right->type : pe_unknown;
4554   e->left->visit (this);
4555   t = (e->left->type != pe_unknown) ? e->left->type : pe_unknown;
4556   e->right->visit (this);
4557
4558   if (e->left->type != pe_unknown &&
4559       e->right->type != pe_unknown &&
4560       e->left->type != e->right->type)
4561     mismatch (e);
4562
4563   if (e->type == pe_unknown)
4564     {
4565       e->type = pe_long;
4566       resolved (e->tok, e->type);
4567     }
4568 }
4569
4570
4571 void
4572 typeresolution_info::visit_concatenation (concatenation *e)
4573 {
4574   if (t != pe_unknown && t != pe_string)
4575     invalid (e->tok, t);
4576
4577   t = pe_string;
4578   e->left->visit (this);
4579   t = pe_string;
4580   e->right->visit (this);
4581
4582   if (e->type == pe_unknown)
4583     {
4584       e->type = pe_string;
4585       resolved (e->tok, e->type);
4586     }
4587 }
4588
4589
4590 void
4591 typeresolution_info::visit_assignment (assignment *e)
4592 {
4593   if (t == pe_stats)
4594     invalid (e->tok, t);
4595
4596   if (e->op == "<<<") // stats aggregation
4597     {
4598       if (t == pe_string)
4599         invalid (e->tok, t);
4600
4601       t = pe_stats;
4602       e->left->visit (this);
4603       t = pe_long;
4604       e->right->visit (this);
4605       if (e->type == pe_unknown ||
4606           e->type == pe_stats)
4607         {
4608           e->type = pe_long;
4609           resolved (e->tok, e->type);
4610         }
4611     }
4612
4613   else if (e->left->type == pe_stats)
4614     invalid (e->left->tok, e->left->type);
4615
4616   else if (e->right->type == pe_stats)
4617     invalid (e->right->tok, e->right->type);
4618
4619   else if (e->op == "+=" || // numeric only
4620            e->op == "-=" ||
4621            e->op == "*=" ||
4622            e->op == "/=" ||
4623            e->op == "%=" ||
4624            e->op == "&=" ||
4625            e->op == "^=" ||
4626            e->op == "|=" ||
4627            e->op == "<<=" ||
4628            e->op == ">>=" ||
4629            false)
4630     {
4631       visit_binary_expression (e);
4632     }
4633   else if (e->op == ".=" || // string only
4634            false)
4635     {
4636       if (t == pe_long || t == pe_stats)
4637         invalid (e->tok, t);
4638
4639       t = pe_string;
4640       e->left->visit (this);
4641       t = pe_string;
4642       e->right->visit (this);
4643       if (e->type == pe_unknown)
4644         {
4645           e->type = pe_string;
4646           resolved (e->tok, e->type);
4647         }
4648     }
4649   else if (e->op == "=") // overloaded = for string & numeric operands
4650     {
4651       // logic similar to ternary_expression
4652       exp_type sub_type = t;
4653
4654       // Infer types across the l/r values
4655       if (sub_type == pe_unknown && e->type != pe_unknown)
4656         sub_type = e->type;
4657
4658       t = (sub_type != pe_unknown) ? sub_type :
4659         (e->right->type != pe_unknown) ? e->right->type :
4660         pe_unknown;
4661       e->left->visit (this);
4662       t = (sub_type != pe_unknown) ? sub_type :
4663         (e->left->type != pe_unknown) ? e->left->type :
4664         pe_unknown;
4665       e->right->visit (this);
4666
4667       if ((sub_type != pe_unknown) && (e->type == pe_unknown))
4668         {
4669           e->type = sub_type;
4670           resolved (e->tok, e->type);
4671         }
4672       if ((sub_type == pe_unknown) && (e->left->type != pe_unknown))
4673         {
4674           e->type = e->left->type;
4675           resolved (e->tok, e->type);
4676         }
4677
4678       if (e->left->type != pe_unknown &&
4679           e->right->type != pe_unknown &&
4680           e->left->type != e->right->type)
4681         mismatch (e);
4682
4683       // Propagate type details from the RHS to the assignment
4684       if (e->type == e->right->type &&
4685           e->right->type_details && !e->type_details)
4686         resolved_details(e->right->type_details, e->type_details);
4687
4688       // Propagate type details from the assignment to the LHS
4689       if (e->type == e->left->type && e->type_details)
4690         {
4691           if (e->left->type_details &&
4692               *e->left->type_details != *e->type_details &&
4693               *e->left->type_details != *null_type)
4694             resolved_details(null_type, e->left->type_details);
4695           else if (!e->left->type_details)
4696             resolved_details(e->type_details, e->left->type_details);
4697         }
4698     }
4699   else
4700     throw SEMANTIC_ERROR (_("unsupported assignment operator ") + e->op);
4701 }
4702
4703
4704 void
4705 typeresolution_info::visit_embedded_expr (embedded_expr *e)
4706 {
4707   if (e->type == pe_unknown)
4708     {
4709       if (e->code.find ("/* string */") != string::npos)
4710         e->type = pe_string;
4711       else // if (e->code.find ("/* long */") != string::npos)
4712         e->type = pe_long;
4713
4714       resolved (e->tok, e->type);
4715     }
4716 }
4717
4718
4719 void
4720 typeresolution_info::visit_binary_expression (binary_expression* e)
4721 {
4722   if (t == pe_stats || t == pe_string)
4723     invalid (e->tok, t);
4724
4725   t = pe_long;
4726   e->left->visit (this);
4727   t = pe_long;
4728   e->right->visit (this);
4729
4730   if (e->left->type != pe_unknown &&
4731       e->right->type != pe_unknown &&
4732       e->left->type != e->right->type)
4733     mismatch (e);
4734
4735   if (e->type == pe_unknown)
4736     {
4737       e->type = pe_long;
4738       resolved (e->tok, e->type);
4739     }
4740 }
4741
4742
4743 void
4744 typeresolution_info::visit_pre_crement (pre_crement *e)
4745 {
4746   visit_unary_expression (e);
4747 }
4748
4749
4750 void
4751 typeresolution_info::visit_post_crement (post_crement *e)
4752 {
4753   visit_unary_expression (e);
4754 }
4755
4756
4757 void
4758 typeresolution_info::visit_unary_expression (unary_expression* e)
4759 {
4760   if (t == pe_stats || t == pe_string)
4761     invalid (e->tok, t);
4762
4763   t = pe_long;
4764   e->operand->visit (this);
4765
4766   if (e->type == pe_unknown)
4767     {
4768       e->type = pe_long;
4769       resolved (e->tok, e->type);
4770     }
4771 }
4772
4773
4774 void
4775 typeresolution_info::visit_ternary_expression (ternary_expression* e)
4776 {
4777   exp_type sub_type = t;
4778
4779   t = pe_long;
4780   e->cond->visit (this);
4781
4782   // Infer types across the true/false arms of the ternary expression.
4783
4784   if (sub_type == pe_unknown && e->type != pe_unknown)
4785     sub_type = e->type;
4786   t = sub_type;
4787   e->truevalue->visit (this);
4788   t = sub_type;
4789   e->falsevalue->visit (this);
4790
4791   if ((sub_type == pe_unknown) && (e->type != pe_unknown))
4792     ; // already resolved
4793   else if ((sub_type != pe_unknown) && (e->type == pe_unknown))
4794     {
4795       e->type = sub_type;
4796       resolved (e->tok, e->type);
4797     }
4798   else if ((sub_type == pe_unknown) && (e->truevalue->type != pe_unknown))
4799     {
4800       e->type = e->truevalue->type;
4801       resolved (e->tok, e->type);
4802     }
4803   else if ((sub_type == pe_unknown) && (e->falsevalue->type != pe_unknown))
4804     {
4805       e->type = e->falsevalue->type;
4806       resolved (e->tok, e->type);
4807     }
4808   else if (e->type != sub_type)
4809     mismatch (e->tok, sub_type, e->type);
4810
4811   // Propagate type details from both true/false branches
4812   if (!e->type_details &&
4813       e->type == e->truevalue->type && e->type == e->falsevalue->type &&
4814       e->truevalue->type_details && e->falsevalue->type_details &&
4815       *e->truevalue->type_details == *e->falsevalue->type_details)
4816     resolved_details(e->truevalue->type_details, e->type_details);
4817 }
4818
4819
4820 template <class Referrer, class Referent>
4821 void resolve_2types (Referrer* referrer, Referent* referent,
4822                     typeresolution_info* r, exp_type t, bool accept_unknown = false)
4823 {
4824   exp_type& re_type = referrer->type;
4825   const token* re_tok = referrer->tok;
4826   exp_type& te_type = referent->type;
4827
4828   if (t != pe_unknown && re_type == t && re_type == te_type)
4829     ; // do nothing: all three e->types in agreement
4830   else if (t == pe_unknown && re_type != pe_unknown && re_type == te_type)
4831     ; // do nothing: two known e->types in agreement
4832   else if (re_type != pe_unknown && te_type != pe_unknown && re_type != te_type)
4833     r->mismatch (re_tok, re_type, referent); // referrer-referent
4834   else if (re_type != pe_unknown && t != pe_unknown && re_type != t)
4835     r->mismatch (re_tok, t, referent); // referrer-t
4836   else if (te_type != pe_unknown && t != pe_unknown && te_type != t)
4837     r->mismatch (re_tok, t, referent); // referent-t
4838   else if (re_type == pe_unknown && t != pe_unknown)
4839     {
4840       // propagate from upstream
4841       re_type = t;
4842       r->resolved (re_tok, re_type);
4843       // catch re_type/te_type mismatch later
4844     }
4845   else if (re_type == pe_unknown && te_type != pe_unknown)
4846     {
4847       // propagate from referent
4848       re_type = te_type;
4849       r->resolved (re_tok, re_type);
4850       // catch re_type/t mismatch later
4851     }
4852   else if (re_type != pe_unknown && te_type == pe_unknown)
4853     {
4854       // propagate to referent
4855       te_type = re_type;
4856       r->resolved (re_tok, re_type, referent);
4857       // catch re_type/t mismatch later
4858     }
4859   else if (! accept_unknown)
4860     r->unresolved (re_tok);
4861 }
4862
4863
4864 void
4865 typeresolution_info::visit_symbol (symbol* e)
4866 {
4867   if (e->referent == 0)
4868     throw SEMANTIC_ERROR (_F("internal error: unresolved symbol '%s'",
4869                              e->name.c_str()), e->tok);
4870
4871   resolve_2types (e, e->referent, this, t);
4872
4873   if (e->type == e->referent->type)
4874     {
4875       // If both have type details, then they either must agree;
4876       // otherwise force them both to null.
4877       if (e->type_details && e->referent->type_details &&
4878           *e->type_details != *e->referent->type_details)
4879         {
4880           resolved_details(null_type, e->type_details);
4881           resolved_details(null_type, e->referent->type_details);
4882         }
4883       else if (e->type_details && !e->referent->type_details)
4884         resolved_details(e->type_details, e->referent->type_details);
4885       else if (!e->type_details && e->referent->type_details)
4886         resolved_details(e->referent->type_details, e->type_details);
4887     }
4888 }
4889
4890
4891 void
4892 typeresolution_info::visit_target_symbol (target_symbol* e)
4893 {
4894   // This occurs only if a target symbol was not resolved over in
4895   // tapset.cxx land, that error was properly suppressed, and the
4896   // later unused-expression-elimination pass didn't get rid of it
4897   // either.  So we have a target symbol that is believed to be of
4898   // genuine use, yet unresolved by the provider.
4899
4900   if (session.verbose > 2)
4901     {
4902       clog << _("Resolution problem with ");
4903       if (current_function)
4904         {
4905           clog << "function " << current_function->name << endl;
4906           current_function->body->print (clog);
4907           clog << endl;
4908         }
4909       else if (current_probe)
4910         {
4911           clog << "probe " << *current_probe->sole_location() << endl;
4912           current_probe->body->print (clog);
4913           clog << endl;
4914         }
4915       else
4916         //TRANSLATORS: simply saying not an issue with a probe or function
4917         clog << _("other") << endl;
4918     }
4919
4920   if (e->saved_conversion_error)
4921     throw (* (e->saved_conversion_error));
4922   else
4923     throw SEMANTIC_ERROR(_("unresolved target-symbol expression"), e->tok);
4924 }
4925
4926
4927 void
4928 typeresolution_info::visit_atvar_op (atvar_op* e)
4929 {
4930   // This occurs only if an @var() was not resolved over in
4931   // tapset.cxx land, that error was properly suppressed, and the
4932   // later unused-expression-elimination pass didn't get rid of it
4933   // either.  So we have an @var() that is believed to be of
4934   // genuine use, yet unresolved by the provider.
4935
4936   if (session.verbose > 2)
4937     {
4938       clog << _("Resolution problem with ");
4939       if (current_function)
4940         {
4941           clog << "function " << current_function->name << endl;
4942           current_function->body->print (clog);
4943           clog << endl;
4944         }
4945       else if (current_probe)
4946         {
4947           clog << "probe " << *current_probe->sole_location() << endl;
4948           current_probe->body->print (clog);
4949           clog << endl;
4950         }
4951       else
4952         //TRANSLATORS: simply saying not an issue with a probe or function
4953         clog << _("other") << endl;
4954     }
4955
4956   if (e->saved_conversion_error)
4957     throw (* (e->saved_conversion_error));
4958   else
4959     throw SEMANTIC_ERROR(_("unresolved @var() expression"), e->tok);
4960 }
4961
4962
4963 void
4964 typeresolution_info::visit_defined_op (defined_op* e)
4965 {
4966   throw SEMANTIC_ERROR(_("unexpected @defined"), e->tok);
4967 }
4968
4969
4970 void
4971 typeresolution_info::visit_entry_op (entry_op* e)
4972 {
4973   throw SEMANTIC_ERROR(_("@entry is only valid in .return probes"), e->tok);
4974 }
4975
4976
4977 void
4978 typeresolution_info::visit_cast_op (cast_op* e)
4979 {
4980   // Like target_symbol, a cast_op shouldn't survive this far
4981   // unless it was not resolved and its value is really needed.
4982   if (e->saved_conversion_error)
4983     throw (* (e->saved_conversion_error));
4984   else
4985     throw SEMANTIC_ERROR(_F("type definition '%s' not found in '%s'",
4986                             e->type_name.c_str(), e->module.c_str()), e->tok);
4987 }
4988
4989
4990 void
4991 typeresolution_info::visit_autocast_op (autocast_op* e)
4992 {
4993   // Like cast_op, a implicit autocast_op shouldn't survive this far
4994   // unless it was not resolved and its value is really needed.
4995   if (assert_resolvability && e->saved_conversion_error)
4996     throw (* (e->saved_conversion_error));
4997   else if (assert_resolvability)
4998     throw SEMANTIC_ERROR(_("unknown type in dereference"), e->tok);
4999
5000   t = pe_long;
5001   e->operand->visit (this);
5002
5003   num_still_unresolved++;
5004   if (e->operand->type_details &&
5005       e->operand->type_details->expandable())
5006     num_available_autocasts++;
5007 }
5008
5009
5010 void
5011 typeresolution_info::visit_perf_op (perf_op* e)
5012 {
5013   // A perf_op should already be resolved
5014   if (t == pe_stats || t == pe_string)
5015     invalid (e->tok, t);
5016
5017   e->type = pe_long;
5018
5019   // (There is no real need to visit our operand - by parser
5020   // construction, it's always a string literal, with its type already
5021   // set.)
5022   t = pe_string;
5023   e->operand->visit (this);
5024 }
5025
5026
5027 void
5028 typeresolution_info::visit_arrayindex (arrayindex* e)
5029 {
5030
5031   symbol *array = NULL;
5032   hist_op *hist = NULL;
5033   classify_indexable(e->base, array, hist);
5034
5035   // Every hist_op has type [int]:int, that is to say, every hist_op
5036   // is a pseudo-one-dimensional integer array type indexed by
5037   // integers (bucket numbers).
5038
5039   if (hist)
5040     {
5041       if (e->indexes.size() != 1)
5042         unresolved (e->tok);
5043       t = pe_long;
5044       e->indexes[0]->visit (this);
5045       if (e->indexes[0]->type != pe_long)
5046         unresolved (e->tok);
5047       hist->visit (this);
5048       if (e->type != pe_long)
5049         {
5050           e->type = pe_long;
5051           resolved (e->tok, e->type);
5052         }
5053       return;
5054     }
5055
5056   // Now we are left with "normal" map inference and index checking.
5057
5058   assert (array);
5059   assert (array->referent != 0);
5060   resolve_2types (e, array->referent, this, t);
5061
5062   // now resolve the array indexes
5063
5064   // if (e->referent->index_types.size() == 0)
5065   //   // redesignate referent as array
5066   //   e->referent->set_arity (e->indexes.size ());
5067
5068   if (e->indexes.size() != array->referent->index_types.size())
5069     unresolved (e->tok); // symbol resolution should prevent this
5070   else for (unsigned i=0; i<e->indexes.size(); i++)
5071     {
5072       if (e->indexes[i])
5073         {
5074           expression* ee = e->indexes[i];
5075           exp_type& ft = array->referent->index_types [i];
5076           t = ft;
5077           ee->visit (this);
5078           exp_type at = ee->type;
5079
5080           if ((at == pe_string || at == pe_long) && ft == pe_unknown)
5081             {
5082               // propagate to formal type
5083               ft = at;
5084               resolved (ee->tok, ft, array->referent, i);
5085             }
5086           if (at == pe_stats)
5087             invalid (ee->tok, at);
5088           if (ft == pe_stats)
5089             invalid (ee->tok, ft);
5090           if (at != pe_unknown && ft != pe_unknown && ft != at)
5091             mismatch (ee->tok, ee->type, array->referent, i);
5092           if (at == pe_unknown)
5093               unresolved (ee->tok);
5094         }
5095     }
5096 }
5097
5098
5099 void
5100 typeresolution_info::visit_functioncall (functioncall* e)
5101 {
5102   if (e->referent == 0)
5103     throw SEMANTIC_ERROR (_F("internal error: unresolved function call to '%s'",
5104                              e->function.c_str()), e->tok);
5105
5106   resolve_2types (e, e->referent, this, t, true); // accept unknown type
5107
5108   if (e->type == pe_stats)
5109     invalid (e->tok, e->type);
5110
5111   const exp_type_ptr& func_type = e->referent->type_details;
5112   if (func_type && e->referent->type == e->type
5113       && (!e->type_details || *func_type != *e->type_details))
5114     resolved_details(e->referent->type_details, e->type_details);
5115
5116   // now resolve the function parameters
5117   if (e->args.size() != e->referent->formal_args.size())
5118     unresolved (e->tok); // symbol resolution should prevent this
5119   else for (unsigned i=0; i<e->args.size(); i++)
5120     {
5121       expression* ee = e->args[i];
5122       exp_type& ft = e->referent->formal_args[i]->type;
5123       const token* fe_tok = e->referent->formal_args[i]->tok;
5124       t = ft;
5125       ee->visit (this);
5126       exp_type at = ee->type;
5127
5128       if (((at == pe_string) || (at == pe_long)) && ft == pe_unknown)
5129         {
5130           // propagate to formal arg
5131           ft = at;
5132           resolved (ee->tok, ft, e->referent->formal_args[i], i);
5133         }
5134       if (at == pe_stats)
5135         invalid (ee->tok, at);
5136       if (ft == pe_stats)
5137         invalid (fe_tok, ft);
5138       if (at != pe_unknown && ft != pe_unknown && ft != at)
5139         mismatch (ee->tok, ee->type, e->referent->formal_args[i], i);
5140       if (at == pe_unknown)
5141         unresolved (ee->tok);
5142     }
5143 }
5144
5145
5146 void
5147 typeresolution_info::visit_block (block* e)
5148 {
5149   for (unsigned i=0; i<e->statements.size(); i++)
5150     {
5151       t = pe_unknown;
5152       e->statements[i]->visit (this);
5153     }
5154 }
5155
5156
5157 void
5158 typeresolution_info::visit_try_block (try_block* e)
5159 {
5160   if (e->try_block)
5161     e->try_block->visit (this);
5162   if (e->catch_error_var)
5163     {
5164       t = pe_string;
5165       e->catch_error_var->visit (this);
5166     }
5167   if (e->catch_block)
5168     e->catch_block->visit (this);
5169 }
5170
5171
5172 void
5173 typeresolution_info::visit_embeddedcode (embeddedcode* s)
5174 {
5175   // PR11573.  If we have survived thus far with a piece of embedded
5176   // code that requires uprobes, we need to track this.
5177   //
5178   // This is an odd place for this check, as opposed
5179   // to a separate 'optimization' pass, or c_unparser::visit_embeddedcode
5180   // over yonder in pass 3.  However, we want to do it during pass 2 so
5181   // that cached sessions also get the uprobes treatment.
5182   if (! session.need_uprobes
5183       && s->code.find("/* pragma:uprobes */") != string::npos)
5184     {
5185       if (session.verbose > 2)
5186         clog << _("Activating uprobes support because /* pragma:uprobes */ seen.") << endl;
5187       session.need_uprobes = true;
5188     }
5189
5190   // PR15065. Likewise, we need to detect /* pragma:tagged_dfa */
5191   // before the gen_dfa_table pass. Again, the typechecking part of
5192   // pass 2 is a good place for this.
5193   if (! session.need_tagged_dfa
5194       && s->code.find("/* pragma:tagged_dfa */") != string::npos)
5195     {
5196       // if (session.verbose > 2)
5197       //   clog << _F("Turning on DFA subexpressions, pragma:tagged_dfa found in %s",
5198       // current_function->name.c_str()) << endl;
5199       // session.need_tagged_dfa = true;
5200       throw SEMANTIC_ERROR (_("Tagged DFA support is not yet available"), s->tok);
5201     }
5202 }
5203
5204
5205 void
5206 typeresolution_info::visit_if_statement (if_statement* e)
5207 {
5208   t = pe_long;
5209   e->condition->visit (this);
5210
5211   t = pe_unknown;
5212   e->thenblock->visit (this);
5213
5214   if (e->elseblock)
5215     {
5216       t = pe_unknown;
5217       e->elseblock->visit (this);
5218     }
5219 }
5220
5221
5222 void
5223 typeresolution_info::visit_for_loop (for_loop* e)
5224 {
5225   t = pe_unknown;
5226   if (e->init) e->init->visit (this);
5227   t = pe_long;
5228   e->cond->visit (this);
5229   t = pe_unknown;
5230   if (e->incr) e->incr->visit (this);
5231   t = pe_unknown;
5232   e->block->visit (this);
5233 }
5234
5235
5236 void
5237 typeresolution_info::visit_foreach_loop (foreach_loop* e)
5238 {
5239   // See also visit_arrayindex.
5240   // This is different in that, being a statement, we can't assign
5241   // a type to the outer array, only propagate to/from the indexes
5242
5243   // if (e->referent->index_types.size() == 0)
5244   //   // redesignate referent as array
5245   //   e->referent->set_arity (e->indexes.size ());
5246
5247   exp_type wanted_value = pe_unknown;
5248   symbol *array = NULL;
5249   hist_op *hist = NULL;
5250   classify_indexable(e->base, array, hist);
5251
5252   if (hist)
5253     {
5254       if (e->indexes.size() != 1)
5255         unresolved (e->tok);
5256       t = pe_long;
5257       e->indexes[0]->visit (this);
5258       if (e->indexes[0]->type != pe_long)
5259         unresolved (e->tok);
5260       hist->visit (this);
5261       wanted_value = pe_long;
5262     }
5263   else
5264     {
5265       assert (array);
5266       if (e->indexes.size() != array->referent->index_types.size())
5267         unresolved (e->tok); // symbol resolution should prevent this
5268       else
5269         {
5270           for (unsigned i=0; i<e->indexes.size(); i++)
5271             {
5272               expression* ee = e->indexes[i];
5273               exp_type& ft = array->referent->index_types [i];
5274               t = ft;
5275               ee->visit (this);
5276               exp_type at = ee->type;
5277
5278               if ((at == pe_string || at == pe_long) && ft == pe_unknown)
5279                 {
5280                   // propagate to formal type
5281                   ft = at;
5282                   resolved (ee->tok, ee->type, array->referent, i);
5283                 }
5284               if (at == pe_stats)
5285                 invalid (ee->tok, at);
5286               if (ft == pe_stats)
5287                 invalid (ee->tok, ft);
5288               if (at != pe_unknown && ft != pe_unknown && ft != at)
5289                 mismatch (ee->tok, ee->type, array->referent, i);
5290               if (at == pe_unknown)
5291                 unresolved (ee->tok);
5292             }
5293           for (unsigned i=0; i<e->array_slice.size(); i++)
5294             if (e->array_slice[i])
5295               {
5296                 expression* ee = e->array_slice[i];
5297                 exp_type& ft = array->referent->index_types [i];
5298                 t = ft;
5299                 ee->visit (this);
5300                 exp_type at = ee->type;
5301
5302                 if ((at == pe_string || at == pe_long) && ft == pe_unknown)
5303                   {
5304                     // propagate to formal type
5305                     ft = at;
5306                     resolved (ee->tok, ee->type, array->referent, i);
5307                   }
5308                 if (at == pe_stats)
5309                   invalid (ee->tok, at);
5310                 if (ft == pe_stats)
5311                   invalid (ee->tok, ft);
5312                 if (at != pe_unknown && ft != pe_unknown && ft != at)
5313                   mismatch (ee->tok, ee->type, array->referent, i);
5314                 if (at == pe_unknown)
5315                   unresolved (ee->tok);
5316               }
5317         }
5318       t = pe_unknown;
5319       array->visit (this);
5320       wanted_value = array->type;
5321     }
5322
5323   if (e->value)
5324     {
5325       if (wanted_value == pe_stats)
5326         invalid(e->value->tok, wanted_value);
5327       else if (wanted_value != pe_unknown)
5328         check_arg_type(wanted_value, e->value);
5329       else
5330         {
5331           t = pe_unknown;
5332           e->value->visit (this);
5333         }
5334     }
5335
5336   /* Prevent @sum etc. aggregate sorting on non-statistics arrays. */
5337   if (wanted_value != pe_unknown)
5338     if (e->sort_aggr != sc_none && wanted_value != pe_stats)
5339       invalid (array->tok, wanted_value);
5340
5341   if (e->limit)
5342     {
5343       t = pe_long;
5344       e->limit->visit (this);
5345     }
5346
5347   t = pe_unknown;
5348   e->block->visit (this);
5349 }
5350
5351
5352 void
5353 typeresolution_info::visit_null_statement (null_statement*)
5354 {
5355 }
5356
5357
5358 void
5359 typeresolution_info::visit_expr_statement (expr_statement* e)
5360 {
5361   t = pe_unknown;
5362   e->value->visit (this);
5363 }
5364
5365
5366 struct delete_statement_typeresolution_info:
5367   public throwing_visitor
5368 {
5369   typeresolution_info *parent;
5370   delete_statement_typeresolution_info (typeresolution_info *p):
5371     throwing_visitor (_("invalid operand of delete expression")),
5372     parent (p)
5373   {}
5374
5375   void visit_arrayindex (arrayindex* e)
5376   {
5377     parent->visit_arrayindex (e);
5378   }
5379
5380   void visit_symbol (symbol* e)
5381   {
5382     exp_type ignored = pe_unknown;
5383     assert (e->referent != 0);
5384     resolve_2types (e, e->referent, parent, ignored);
5385   }
5386 };
5387
5388
5389 void
5390 typeresolution_info::visit_delete_statement (delete_statement* e)
5391 {
5392   delete_statement_typeresolution_info di (this);
5393   t = pe_unknown;
5394   e->value->visit (&di);
5395 }
5396
5397
5398 void
5399 typeresolution_info::visit_next_statement (next_statement*)
5400 {
5401 }
5402
5403
5404 void
5405 typeresolution_info::visit_break_statement (break_statement*)
5406 {
5407 }
5408
5409
5410 void
5411 typeresolution_info::visit_continue_statement (continue_statement*)
5412 {
5413 }
5414
5415
5416 void
5417 typeresolution_info::visit_array_in (array_in* e)
5418 {
5419   // all unary operators only work on numerics
5420   exp_type t1 = t;
5421   t = pe_unknown; // array value can be anything
5422   e->operand->visit (this);
5423
5424   if (t1 == pe_unknown && e->type != pe_unknown)
5425     ; // already resolved
5426   else if (t1 == pe_string || t1 == pe_stats)
5427     mismatch (e->tok, t1, pe_long);
5428   else if (e->type == pe_unknown)
5429     {
5430       e->type = pe_long;
5431       resolved (e->tok, e->type);
5432     }
5433 }
5434
5435
5436 void
5437 typeresolution_info::visit_return_statement (return_statement* e)
5438 {
5439   // This is like symbol, where the referent is
5440   // the return value of the function.
5441
5442   // translation pass will print error
5443   if (current_function == 0)
5444     return;
5445
5446   exp_type& e_type = current_function->type;
5447   t = current_function->type;
5448   e->value->visit (this);
5449
5450   if (e_type != pe_unknown && e->value->type != pe_unknown
5451       && e_type != e->value->type)
5452     mismatch (e->value->tok, e->value->type, current_function);
5453   if (e_type == pe_unknown &&
5454       (e->value->type == pe_long || e->value->type == pe_string))
5455     {
5456       // propagate non-statistics from value
5457       e_type = e->value->type;
5458       resolved (e->value->tok, e_type, current_function);
5459     }
5460   if (e->value->type == pe_stats)
5461     invalid (e->value->tok, e->value->type);
5462
5463   const exp_type_ptr& value_type = e->value->type_details;
5464   if (value_type && current_function->type == e->value->type)
5465     {
5466       exp_type_ptr& func_type = current_function->type_details;
5467       if (!func_type)
5468         // The function can take on the type details of the return value.
5469         resolved_details(value_type, func_type);
5470       else if (*func_type != *value_type && *func_type != *null_type)
5471         // Conflicting return types?  NO TYPE FOR YOU!
5472         resolved_details(null_type, func_type);
5473     }
5474 }
5475
5476 void
5477 typeresolution_info::visit_print_format (print_format* e)
5478 {
5479   size_t unresolved_args = 0;
5480
5481   if (e->hist)
5482     {
5483       e->hist->visit(this);
5484     }
5485
5486   else if (e->print_with_format)
5487     {
5488       // If there's a format string, we can do both inference *and*
5489       // checking.
5490
5491       // First we extract the subsequence of formatting components
5492       // which are conversions (not just literal string components)
5493
5494       unsigned expected_num_args = 0;
5495       std::vector<print_format::format_component> components;
5496       for (size_t i = 0; i < e->components.size(); ++i)
5497         {
5498           if (e->components[i].type == print_format::conv_unspecified)
5499             throw SEMANTIC_ERROR (_("Unspecified conversion in print operator format string"),
5500                                   e->tok);
5501           else if (e->components[i].type == print_format::conv_literal)
5502             continue;
5503           components.push_back(e->components[i]);
5504           ++expected_num_args;
5505           if (e->components[i].widthtype == print_format::width_dynamic)
5506             ++expected_num_args;
5507           if (e->components[i].prectype == print_format::prec_dynamic)
5508             ++expected_num_args;
5509         }
5510
5511       // Then we check that the number of conversions and the number
5512       // of args agree.
5513
5514       if (expected_num_args != e->args.size())
5515         throw SEMANTIC_ERROR (_("Wrong number of args to formatted print operator"),
5516                               e->tok);
5517
5518       // Then we check that the types of the conversions match the types
5519       // of the args.
5520       unsigned argno = 0;
5521       for (size_t i = 0; i < components.size(); ++i)
5522         {
5523           // Check the dynamic width, if specified
5524           if (components[i].widthtype == print_format::width_dynamic)
5525             {
5526               check_arg_type (pe_long, e->args[argno]);
5527               ++argno;
5528             }
5529
5530           // Check the dynamic precision, if specified
5531           if (components[i].prectype == print_format::prec_dynamic)
5532             {
5533               check_arg_type (pe_long, e->args[argno]);
5534               ++argno;
5535             }
5536
5537           exp_type wanted = pe_unknown;
5538
5539           switch (components[i].type)
5540             {
5541             case print_format::conv_unspecified:
5542             case print_format::conv_literal:
5543               assert (false);
5544               break;
5545
5546             case print_format::conv_pointer:
5547             case print_format::conv_number:
5548             case print_format::conv_binary:
5549             case print_format::conv_char:
5550             case print_format::conv_memory:
5551             case print_format::conv_memory_hex:
5552               wanted = pe_long;
5553               break;
5554
5555             case print_format::conv_string:
5556               wanted = pe_string;
5557               break;
5558             }
5559
5560           assert (wanted != pe_unknown);
5561           check_arg_type (wanted, e->args[argno]);
5562           ++argno;
5563         }
5564     }
5565   else
5566     {
5567       // Without a format string, the best we can do is require that
5568       // each argument resolve to a concrete type.
5569       for (size_t i = 0; i < e->args.size(); ++i)
5570         {
5571           t = pe_unknown;
5572           e->args[i]->visit (this);
5573           if (e->args[i]->type == pe_unknown)
5574             {
5575               unresolved (e->args[i]->tok);
5576               ++unresolved_args;
5577             }
5578         }
5579     }
5580
5581   if (unresolved_args == 0)
5582     {
5583       if (e->type == pe_unknown)
5584         {
5585           if (e->print_to_stream)
5586             e->type = pe_long;
5587           else
5588             e->type = pe_string;
5589           resolved (e->tok, e->type);
5590         }
5591     }
5592   else
5593     {
5594       e->type = pe_unknown;
5595       unresolved (e->tok);
5596     }
5597 }
5598
5599
5600 void
5601 typeresolution_info::visit_stat_op (stat_op* e)
5602 {
5603   t = pe_stats;
5604   e->stat->visit (this);
5605   if (e->type == pe_unknown)
5606     {
5607       e->type = pe_long;
5608       resolved (e->tok, e->type);
5609     }
5610   else if (e->type != pe_long)
5611     mismatch (e->tok, pe_long, e->type);
5612 }
5613
5614 void
5615 typeresolution_info::visit_hist_op (hist_op* e)
5616 {
5617   t = pe_stats;
5618   e->stat->visit (this);
5619 }
5620
5621
5622 void
5623 typeresolution_info::check_arg_type (exp_type wanted, expression* arg)
5624 {
5625   t = wanted;
5626   arg->visit (this);
5627
5628   if (arg->type == pe_unknown)
5629     {
5630       arg->type = wanted;
5631       resolved (arg->tok, arg->type);
5632     }
5633   else if (arg->type != wanted)
5634     {
5635       mismatch (arg->tok, wanted, arg->type);
5636     }
5637 }
5638
5639
5640 void
5641 typeresolution_info::check_local (vardecl* v)
5642 {
5643   if (v->arity != 0)
5644     {
5645       num_still_unresolved ++;
5646       if (assert_resolvability)
5647         session.print_error
5648           (SEMANTIC_ERROR (_("array locals not supported, missing global declaration? "), v->tok));
5649     }
5650
5651   if (v->type == pe_unknown)
5652     unresolved (v->tok);
5653   else if (v->type == pe_stats)
5654     {
5655       num_still_unresolved ++;
5656       if (assert_resolvability)
5657         session.print_error
5658           (SEMANTIC_ERROR (_("stat locals not supported, missing global declaration? "), v->tok));
5659     }
5660   else if (!(v->type == pe_long || v->type == pe_string))
5661     invalid (v->tok, v->type);
5662 }
5663
5664
5665 void
5666 typeresolution_info::unresolved (const token* tok)
5667 {
5668   num_still_unresolved ++;
5669
5670   if (assert_resolvability && mismatch_complexity <= 0)
5671     {
5672       stringstream msg;
5673       msg << _("unresolved type ");
5674       session.print_error (SEMANTIC_ERROR (msg.str(), tok));
5675     }
5676 }
5677
5678
5679 void
5680 typeresolution_info::invalid (const token* tok, exp_type pe)
5681 {
5682   num_still_unresolved ++;
5683
5684   if (assert_resolvability)
5685     {
5686       stringstream msg;
5687       if (tok && tok->type == tok_operator)
5688         msg << _("invalid operator");
5689       else
5690         msg << _("invalid type ") << pe;
5691       session.print_error (SEMANTIC_ERROR (msg.str(), tok));
5692     }
5693 }
5694
5695 void
5696 typeresolution_info::mismatch (const binary_expression* e)
5697 {
5698   num_still_unresolved ++;
5699
5700   if (assert_resolvability && mismatch_complexity <= 1)
5701     {
5702       stringstream msg;
5703       msg << _F("type mismatch: left and right sides don't agree (%s vs %s)",
5704                 lex_cast(e->left->type).c_str(), lex_cast(e->right->type).c_str());
5705       session.print_error (SEMANTIC_ERROR (msg.str(), e->tok));
5706     }
5707   else if (!assert_resolvability)
5708     mismatch_complexity = max(1, mismatch_complexity);
5709 }
5710
5711 /* tok   token where mismatch occurred
5712  * t1    type we expected (the 'good' type)
5713  * t2    type we received (the 'bad' type)
5714  * */
5715 void
5716 typeresolution_info::mismatch (const token* tok, exp_type t1, exp_type t2)
5717 {
5718   num_still_unresolved ++;
5719
5720   if (assert_resolvability && mismatch_complexity <= 2)
5721     {
5722       stringstream msg;
5723       msg << _F("type mismatch: expected %s", lex_cast(t1).c_str());
5724       if (t2 != pe_unknown)
5725         msg << _F(" but found %s", lex_cast(t2).c_str());
5726       session.print_error (SEMANTIC_ERROR (msg.str(), tok));
5727     }
5728   else if (!assert_resolvability)
5729     mismatch_complexity = max(2, mismatch_complexity);
5730 }
5731
5732 /* tok   token where the mismatch happened
5733  * type  type we received (the 'bad' type)
5734  * decl  declaration of mismatched symbol
5735  * index if index-based (array index or function arg)
5736  * */
5737 void
5738 typeresolution_info::mismatch (const token *tok, exp_type type,
5739                                const symboldecl* decl, int index)
5740 {
5741   num_still_unresolved ++;
5742
5743   if (assert_resolvability && mismatch_complexity <= 3)
5744     {
5745       assert(decl != NULL);
5746
5747       // If mismatch is against a function parameter from within the function
5748       // itself (rather than a function call), then the index will be -1. We
5749       // check here if the decl corresponds to one of the params and if so,
5750       // adjust the index.
5751       if (current_function != NULL && index == -1)
5752         {
5753           vector<vardecl*>& args = current_function->formal_args;
5754           for (unsigned i = 0; i < args.size() && index < 0; i++)
5755             if (args[i] == decl)
5756               index = i;
5757         }
5758
5759       // get the declaration's original type and token
5760       const resolved_type *original = NULL;
5761       for (vector<resolved_type>::const_iterator it = resolved_types.begin();
5762            it != resolved_types.end() && original == NULL; ++it)
5763         {
5764           if (it->decl == decl && it->index == index)
5765             original = &(*it);
5766         }
5767
5768       // print basic mismatch msg if we couldn't find the decl (this can happen
5769       // for explicitly typed decls e.g. myvar:long or for fabricated (already
5770       // resolved) decls e.g. __perf_read_*)
5771       if (original == NULL)
5772         {
5773           session.print_error (SEMANTIC_ERROR (
5774             _F("type mismatch: expected %s but found %s",
5775                lex_cast(type).c_str(),
5776                lex_cast(decl->type).c_str()),
5777             tok));
5778           return;
5779         }
5780
5781       // print where mismatch happened and chain with origin of decl type
5782       // resolution
5783       stringstream msg;
5784
5785       if (index >= 0)
5786         msg << _F("index %d ", index);
5787       msg << _F("type mismatch (%s)", lex_cast(type).c_str());
5788       semantic_error err(ERR_SRC, msg.str(), tok);
5789
5790       stringstream chain_msg;
5791       chain_msg << _("type");
5792       if (index >= 0)
5793         chain_msg << _F(" of index %d", index);
5794       chain_msg << _F(" was first inferred here (%s)",
5795                       lex_cast(decl->type).c_str());
5796       semantic_error chain(ERR_SRC, chain_msg.str(), original->tok);
5797
5798       err.set_chain(chain);
5799       session.print_error (err);
5800     }
5801   else if (!assert_resolvability)
5802     mismatch_complexity = max(3, mismatch_complexity);
5803 }
5804
5805
5806 /* tok   token where resolution occurred
5807  * type  type to which we resolved
5808  * decl  declaration of resolved symbol
5809  * index if index-based (array index or function arg)
5810  * */
5811 void
5812 typeresolution_info::resolved (const token *tok, exp_type type,
5813                                const symboldecl* decl, int index)
5814 {
5815   num_newly_resolved ++;
5816
5817   // We only use the resolved_types vector to give better mismatch messages
5818   // involving symbols. So don't bother adding it if we're not given a decl
5819   if (decl != NULL)
5820     {
5821       // As a fail-safe, if the decl & index is already in the vector, then
5822       // modify it instead of adding another one to ensure uniqueness. This
5823       // should never happen since we only call resolved once for each decl &
5824       // index, but better safe than sorry. (IE. if it does happen, better have
5825       // the latest resolution info for better mismatch reporting later).
5826       for (unsigned i = 0; i < resolved_types.size(); i++)
5827         {
5828           if (resolved_types[i].decl == decl
5829               && resolved_types[i].index == index)
5830             {
5831               resolved_types[i].tok = tok;
5832               return;
5833             }
5834         }
5835       resolved_type res(tok, decl, index);
5836       resolved_types.push_back(res);
5837     }
5838 }
5839
5840 void
5841 typeresolution_info::resolved_details (const exp_type_ptr& src,
5842                                        exp_type_ptr& dest)
5843 {
5844   num_newly_resolved ++;
5845   dest = src;
5846 }
5847
5848 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */