elaborate.cxx

   1 // elaboration functions
   2 // Copyright (C) 2005-2015 Red Hat Inc.
   3 // Copyright (C) 2008 Intel Corporation
   4 //
   5 // This file is part of systemtap, and is free software.  You can
   6 // redistribute it and/or modify it under the terms of the GNU General
   7 // Public License (GPL); either version 2, or (at your option) any
   8 // later version.
   9
  10 #include "config.h"
  11 #include "elaborate.h"
  12 #include "translate.h"
  13 #include "parse.h"
  14 #include "tapsets.h"
  15 #include "session.h"
  16 #include "util.h"
  17 #include "task_finder.h"
  18 #include "stapregex.h"
  19 #include "stringtable.h"
  20
  21 extern "C" {
  22 #include <sys/utsname.h>
  23 #include <fnmatch.h>
  24 #define __STDC_FORMAT_MACROS
  25 #include <inttypes.h>
  26 }
  27
  28 #include <algorithm>
  29 #include <fstream>
  30 #include <map>
  31 #include <cassert>
  32 #include <set>
  33 #include <vector>
  34 #include <algorithm>
  35 #include <iterator>
  36 #include <climits>
  37
  38
  39 using namespace std;
  40
  41
  42 // ------------------------------------------------------------------------
  43
  44 // Used in probe_point condition construction.  Either argument may be
  45 // NULL; if both, return NULL too.  Resulting expression is a deep
  46 // copy for symbol resolution purposes.
  47 expression* add_condition (expression* a, expression* b)
  48 {
  49   if (!a && !b) return 0;
  50   if (! a) return deep_copy_visitor::deep_copy(b);
  51   if (! b) return deep_copy_visitor::deep_copy(a);
  52   logical_and_expr la;
  53   la.op = "&&";
  54   la.left = a;
  55   la.right = b;
  56   la.tok = a->tok; // or could be b->tok
  57   return deep_copy_visitor::deep_copy(& la);
  58 }
  59
  60 // ------------------------------------------------------------------------
  61
  62
  63
  64 derived_probe::derived_probe (probe *p, probe_point *l, bool rewrite_loc):
  65   base (p), base_pp(l), group(NULL), sdt_semaphore_addr(0),
  66   session_index((unsigned)-1)
  67 {
  68   assert (p);
  69   this->tok = p->tok;
  70   this->privileged = p->privileged;
  71   this->body = deep_copy_visitor::deep_copy(p->body);
  72
  73   assert (l);
  74   // make a copy for subclasses which want to rewrite the location
  75   if (rewrite_loc)
  76     l = new probe_point(*l);
  77   this->locations.push_back (l);
  78 }
  79
  80
  81 void
  82 derived_probe::printsig (ostream& o) const
  83 {
  84   probe::printsig (o);
  85   printsig_nested (o);
  86 }
  87
  88 void
  89 derived_probe::printsig_nested (ostream& o) const
  90 {
  91   // We'd like to enclose the probe derivation chain in a /* */
  92   // comment delimiter.  But just printing /* base->printsig() */ is
  93   // not enough, since base might itself be a derived_probe.  So we,
  94   // er, "cleverly" encode our nesting state as a formatting flag for
  95   // the ostream.
  96   ios::fmtflags f = o.flags (ios::internal);
  97   if (f & ios::internal)
  98     {
  99       // already nested
 100       o << " <- ";
 101       base->printsig (o);
 102     }
 103   else
 104     {
 105       // outermost nesting
 106       o << " /* <- ";
 107       base->printsig (o);
 108       o << " */";
 109     }
 110   // restore flags
 111   (void) o.flags (f);
 112 }
 113
 114
 115 void
 116 derived_probe::collect_derivation_chain (std::vector<probe*> &probes_list) const
 117 {
 118   probes_list.push_back(const_cast<derived_probe*>(this));
 119   base->collect_derivation_chain(probes_list);
 120 }
 121
 122
 123 void
 124 derived_probe::collect_derivation_pp_chain (std::vector<probe_point*> &pp_list) const
 125 {
 126   pp_list.push_back(const_cast<probe_point*>(this->sole_location()));
 127   base->collect_derivation_pp_chain(pp_list);
 128 }
 129
 130
 131 string
 132 derived_probe::derived_locations (bool firstFrom)
 133 {
 134   ostringstream o;
 135   vector<probe_point*> reference_point;
 136   collect_derivation_pp_chain(reference_point);
 137   if (reference_point.size() > 0)
 138     for(unsigned i=1; i<reference_point.size(); ++i)
 139       {
 140         if (firstFrom || i>1)
 141           o << " from: ";
 142         o << reference_point[i]->str(false); // no ?,!,etc
 143       }
 144   return o.str();
 145 }
 146
 147
 148 probe_point*
 149 derived_probe::sole_location () const
 150 {
 151   if (locations.size() == 0 || locations.size() > 1)
 152     throw SEMANTIC_ERROR (_N("derived_probe with no locations",
 153                              "derived_probe with too many locations",
 154                              locations.size()), this->tok);
 155   else
 156     return locations[0];
 157 }
 158
 159
 160 probe_point*
 161 derived_probe::script_location () const
 162 {
 163   // This feeds function::pn() in the tapset, which is documented as the
 164   // script-level probe point expression, *after wildcard expansion*.
 165   vector<probe_point*> chain;
 166   collect_derivation_pp_chain (chain);
 167
 168   // Go backwards until we hit the first well-formed probe point
 169   for (int i=chain.size()-1; i>=0; i--)
 170     if (chain[i]->well_formed)
 171       return chain[i];
 172
 173   // If that didn't work, just fallback to -something-.
 174   return sole_location();
 175 }
 176
 177
 178 void
 179 derived_probe::emit_privilege_assertion (translator_output* o)
 180 {
 181   // Emit code which will cause compilation to fail if it is compiled in
 182   // unprivileged mode.
 183   o->newline() << "#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \\";
 184   o->newline() << "    ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)";
 185   o->newline() << "#error Internal Error: Probe ";
 186   probe::printsig (o->line());
 187   o->line()    << " generated in --unprivileged mode";
 188   o->newline() << "#endif";
 189 }
 190
 191
 192 void
 193 derived_probe::emit_process_owner_assertion (translator_output* o)
 194 {
 195   // Emit code which will abort should the current target not belong to the
 196   // user in unprivileged mode.
 197   o->newline() << "#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \\";
 198   o->newline() << "    ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)";
 199   o->newline(1)  << "if (! is_myproc ()) {";
 200   o->newline(1)  << "snprintf(c->error_buffer, sizeof(c->error_buffer),";
 201   o->newline()   << "         \"Internal Error: Process %d does not belong to user %d in probe %s in --unprivileged mode\",";
 202   o->newline()   << "         current->tgid, _stp_uid, c->probe_point);";
 203   o->newline()   << "c->last_error = c->error_buffer;";
 204   // NB: since this check occurs before probe locking, its exit should
 205   // not be a "goto out", which would attempt unlocking.
 206   o->newline()   << "return;";
 207   o->newline(-1) << "}";
 208   o->newline(-1) << "#endif";
 209 }
 210
 211 void
 212 derived_probe::print_dupe_stamp_unprivileged(ostream& o)
 213 {
 214   o << _("unprivileged users: authorized") << endl;
 215 }
 216
 217 void
 218 derived_probe::print_dupe_stamp_unprivileged_process_owner(ostream& o)
 219 {
 220   o << _("unprivileged users: authorized for process owner") << endl;
 221 }
 222
 223 // ------------------------------------------------------------------------
 224 // Members of derived_probe_builder
 225
 226 void
 227 derived_probe_builder::build_with_suffix(systemtap_session & sess,
 228                                          probe * use,
 229                                          probe_point * location,
 230                                          literal_map_t const & parameters,
 231                                          std::vector<derived_probe *>
 232                                            & finished_results,
 233                                          std::vector<probe_point::component *>
 234                                            const & suffix) {
 235   // XXX perhaps build the probe if suffix is empty?
 236   // if (suffix.empty()) {
 237   //   build (sess, use, location, parameters, finished_results);
 238   //   return;
 239   // }
 240   throw SEMANTIC_ERROR (_("invalid suffix for probe"));
 241 }
 242
 243 bool
 244 derived_probe_builder::get_param (literal_map_t const & params,
 245                                   interned_string key,
 246                                   interned_string& value)
 247 {
 248   literal_map_t::const_iterator i = params.find (key);
 249   if (i == params.end())
 250     return false;
 251   literal_string * ls = dynamic_cast<literal_string *>(i->second);
 252   if (!ls)
 253     return false;
 254   value = ls->value;
 255   return true;
 256 }
 257
 258
 259 bool
 260 derived_probe_builder::get_param (literal_map_t const & params,
 261                                   interned_string key,
 262                                   int64_t& value)
 263 {
 264   literal_map_t::const_iterator i = params.find (key);
 265   if (i == params.end())
 266     return false;
 267   if (i->second == NULL)
 268     return false;
 269   literal_number * ln = dynamic_cast<literal_number *>(i->second);
 270   if (!ln)
 271     return false;
 272   value = ln->value;
 273   return true;
 274 }
 275
 276
 277 bool
 278 derived_probe_builder::has_null_param (literal_map_t const & params,
 279                                        interned_string key)
 280 {
 281   literal_map_t::const_iterator i = params.find(key);
 282   return (i != params.end() && i->second == NULL);
 283 }
 284
 285 bool
 286 derived_probe_builder::has_param (literal_map_t const & params,
 287                                   interned_string key)
 288 {
 289   return (params.find(key) != params.end());
 290 }
 291
 292 // ------------------------------------------------------------------------
 293 // Members of match_key.
 294
 295 match_key::match_key(interned_string n)
 296   : name(n),
 297     have_parameter(false),
 298     parameter_type(pe_unknown)
 299 {
 300 }
 301
 302 match_key::match_key(probe_point::component const & c)
 303   : name(c.functor),
 304     have_parameter(c.arg != NULL),
 305     parameter_type(c.arg ? c.arg->type : pe_unknown)
 306 {
 307 }
 308
 309 match_key &
 310 match_key::with_number()
 311 {
 312   have_parameter = true;
 313   parameter_type = pe_long;
 314   return *this;
 315 }
 316
 317 match_key &
 318 match_key::with_string()
 319 {
 320   have_parameter = true;
 321   parameter_type = pe_string;
 322   return *this;
 323 }
 324
 325 string
 326 match_key::str() const
 327 {
 328   string n = name;
 329   if (have_parameter)
 330     switch (parameter_type)
 331       {
 332       case pe_string: return n + "(string)";
 333       case pe_long: return n + "(number)";
 334       default: return n + "(...)";
 335       }
 336   return n;
 337 }
 338
 339 bool
 340 match_key::operator<(match_key const & other) const
 341 {
 342   return ((name < other.name)
 343
 344           || (name == other.name
 345               && have_parameter < other.have_parameter)
 346
 347           || (name == other.name
 348               && have_parameter == other.have_parameter
 349               && parameter_type < other.parameter_type));
 350 }
 351
 352
 353 // NB: these are only used in the probe point name components, where
 354 // only "*" is permitted.
 355 //
 356 // Within module("bar"), function("foo"), process("baz") strings, real
 357 // wildcards are permitted too. See also util.h:contains_glob_chars
 358
 359 static bool
 360 isglob(interned_string str)
 361 {
 362   return(str.find('*') != str.npos);
 363 }
 364
 365 static bool
 366 isdoubleglob(interned_string str)
 367 {
 368   return(str.find("**") != str.npos);
 369 }
 370
 371 bool
 372 match_key::globmatch(match_key const & other) const
 373 {
 374   const string & name_str = name;
 375   const string & other_str = other.name;
 376
 377   return ((fnmatch(name_str.c_str(), other_str.c_str(), FNM_NOESCAPE) == 0)
 378           && have_parameter == other.have_parameter
 379           && parameter_type == other.parameter_type);
 380 }
 381
 382 // ------------------------------------------------------------------------
 383 // Members of match_node
 384 // ------------------------------------------------------------------------
 385
 386 match_node::match_node() :
 387   privilege(privilege_t (pr_stapdev | pr_stapsys))
 388 {
 389 }
 390
 391 match_node *
 392 match_node::bind(match_key const & k)
 393 {
 394   if (k.name == "*")
 395     throw SEMANTIC_ERROR(_("invalid use of wildcard probe point component"));
 396
 397   map<match_key, match_node *>::const_iterator i = sub.find(k);
 398   if (i != sub.end())
 399     return i->second;
 400   match_node * n = new match_node();
 401   sub.insert(make_pair(k, n));
 402   return n;
 403 }
 404
 405 void
 406 match_node::bind(derived_probe_builder * e)
 407 {
 408   ends.push_back (e);
 409 }
 410
 411 match_node *
 412 match_node::bind(interned_string k)
 413 {
 414   return bind(match_key(k));
 415 }
 416
 417 match_node *
 418 match_node::bind_str(string const & k)
 419 {
 420   return bind(match_key(k).with_string());
 421 }
 422
 423 match_node *
 424 match_node::bind_num(string const & k)
 425 {
 426   return bind(match_key(k).with_number());
 427 }
 428
 429 match_node *
 430 match_node::bind_privilege(privilege_t p)
 431 {
 432   privilege = p;
 433   return this;
 434 }
 435
 436 void
 437 match_node::find_and_build (systemtap_session& s,
 438                             probe* p, probe_point *loc, unsigned pos,
 439                             vector<derived_probe *>& results)
 440 {
 441   assert (pos <= loc->components.size());
 442   if (pos == loc->components.size()) // matched all probe point components so far
 443     {
 444       if (ends.empty())
 445         {
 446           string alternatives;
 447           for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 448             alternatives += string(" ") + i->first.str();
 449
 450           throw SEMANTIC_ERROR (_F("probe point truncated (follow: %s)",
 451                                    alternatives.c_str()),
 452                                    loc->components.back()->tok);
 453         }
 454
 455       if (! pr_contains (privilege, s.privilege))
 456         {
 457           throw SEMANTIC_ERROR (_F("probe point is not allowed for --privilege=%s",
 458                                    pr_name (s.privilege)),
 459                                 loc->components.back()->tok);
 460         }
 461
 462       literal_map_t param_map;
 463       for (unsigned i=0; i<pos; i++)
 464         param_map[loc->components[i]->functor] = loc->components[i]->arg;
 465       // maybe 0
 466
 467       // Iterate over all bound builders
 468       for (unsigned k=0; k<ends.size(); k++)
 469         {
 470           derived_probe_builder *b = ends[k];
 471           b->build (s, p, loc, param_map, results);
 472         }
 473     }
 474   else if (isdoubleglob(loc->components[pos]->functor)) // ** wildcard?
 475     {
 476       unsigned int num_results = results.size();
 477
 478       // When faced with "foo**bar", we try "foo*bar" and "foo*.**bar"
 479
 480       const probe_point::component *comp = loc->components[pos];
 481       string functor = comp->functor;
 482       size_t glob_start = functor.find("**");
 483       size_t glob_end = functor.find_first_not_of('*', glob_start);
 484       string prefix = functor.substr(0, glob_start);
 485       string suffix = ((glob_end != string::npos) ?
 486                            functor.substr(glob_end) : "");
 487
 488       // Synthesize "foo*bar"
 489       probe_point *simple_pp = new probe_point(*loc);
 490       probe_point::component *simple_comp = new probe_point::component(*comp);
 491       simple_comp->functor = prefix + "*" + suffix;
 492       simple_comp->from_glob = true;
 493       simple_pp->components[pos] = simple_comp;
 494       try
 495         {
 496           find_and_build (s, p, simple_pp, pos, results);
 497         }
 498       catch (const semantic_error& e)
 499         {
 500           // Ignore semantic_errors.
 501         }
 502
 503       // Cleanup if we didn't find anything
 504       if (results.size() == num_results)
 505         {
 506           delete simple_pp;
 507           delete simple_comp;
 508         }
 509
 510       num_results = results.size();
 511
 512       // Synthesize "foo*.**bar"
 513       // NB: any component arg should attach to the latter part only
 514       probe_point *expanded_pp = new probe_point(*loc);
 515       probe_point::component *expanded_comp_pre = new probe_point::component(*comp);
 516       expanded_comp_pre->functor = prefix + "*";
 517       expanded_comp_pre->from_glob = true;
 518       expanded_comp_pre->arg = NULL;
 519       probe_point::component *expanded_comp_post = new probe_point::component(*comp);
 520       expanded_comp_post->functor = string("**") + suffix;
 521       expanded_pp->components[pos] = expanded_comp_pre;
 522       expanded_pp->components.insert(expanded_pp->components.begin() + pos + 1,
 523                                      expanded_comp_post);
 524       try
 525         {
 526           find_and_build (s, p, expanded_pp, pos, results);
 527         }
 528       catch (const semantic_error& e)
 529         {
 530           // Ignore semantic_errors.
 531         }
 532
 533       // Cleanup if we didn't find anything
 534       if (results.size() == num_results)
 535         {
 536           delete expanded_pp;
 537           delete expanded_comp_pre;
 538           delete expanded_comp_post;
 539         }
 540
 541       // Try suffix expansion only if no matches found:
 542       if (num_results == results.size())
 543         this->try_suffix_expansion (s, p, loc, pos, results);
 544
 545       if (! loc->optional && num_results == results.size())
 546         {
 547           // We didn't find any wildcard matches (since the size of
 548           // the result vector didn't change).  Throw an error.
 549           string sugs = suggest_functors(s, functor);
 550           throw SEMANTIC_ERROR (_F("probe point mismatch: didn't find any wildcard matches%s",
 551                                    sugs.empty() ? "" : (" (similar: " + sugs + ")").c_str()),
 552                                 comp->tok);
 553         }
 554     }
 555   else if (isglob(loc->components[pos]->functor)) // wildcard?
 556     {
 557       match_key match (* loc->components[pos]);
 558
 559       // Call find_and_build for each possible match.  Ignore errors -
 560       // unless we don't find any match.
 561       unsigned int num_results = results.size();
 562       for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 563         {
 564           const match_key& subkey = i->first;
 565           match_node* subnode = i->second;
 566
 567           assert_no_interrupts();
 568
 569           if (match.globmatch(subkey))
 570             {
 571               if (s.verbose > 2)
 572                 clog << _F("wildcard '%s' matched '%s'",
 573                            loc->components[pos]->functor.to_string().c_str(),
 574                            subkey.name.to_string().c_str()) << endl;
 575
 576               // When we have a wildcard, we need to create a copy of
 577               // the probe point.  Then we'll create a copy of the
 578               // wildcard component, and substitute the non-wildcard
 579               // functor.
 580               probe_point *non_wildcard_pp = new probe_point(*loc);
 581               probe_point::component *non_wildcard_component
 582                 = new probe_point::component(*loc->components[pos]);
 583               non_wildcard_component->functor = subkey.name;
 584               non_wildcard_component->from_glob = true;
 585               non_wildcard_pp->components[pos] = non_wildcard_component;
 586
 587               // NB: probe conditions are not attached at the wildcard
 588               // (component/functor) level, but at the overall
 589               // probe_point level.
 590
 591               unsigned int inner_results = results.size();
 592
 593               // recurse (with the non-wildcard probe point)
 594               try
 595                 {
 596                   subnode->find_and_build (s, p, non_wildcard_pp, pos+1,
 597                                            results);
 598                 }
 599               catch (const semantic_error& e)
 600                 {
 601                   // Ignore semantic_errors while expanding wildcards.
 602                   // If we get done and nothing was expanded, the code
 603                   // following the loop will complain.
 604                 }
 605
 606               if (results.size() == inner_results)
 607                 {
 608                   // If this wildcard didn't match, cleanup.
 609                   delete non_wildcard_pp;
 610                   delete non_wildcard_component;
 611                 }
 612             }
 613         }
 614
 615       // Try suffix expansion only if no matches found:
 616       if (num_results == results.size())
 617         this->try_suffix_expansion (s, p, loc, pos, results);
 618
 619       if (! loc->optional && num_results == results.size())
 620         {
 621           // We didn't find any wildcard matches (since the size of
 622           // the result vector didn't change).  Throw an error.
 623           string sugs = suggest_functors(s, loc->components[pos]->functor);
 624           throw SEMANTIC_ERROR (_F("probe point mismatch: didn't find any wildcard matches%s",
 625                                    sugs.empty() ? "" : (" (similar: " + sugs + ")").c_str()),
 626                                 loc->components[pos]->tok);
 627         }
 628     }
 629   else
 630     {
 631       match_key match (* loc->components[pos]);
 632       sub_map_iterator_t i = sub.find (match);
 633
 634       if (i != sub.end()) // match found
 635         {
 636           match_node* subnode = i->second;
 637           // recurse
 638           subnode->find_and_build (s, p, loc, pos+1, results);
 639           return;
 640         }
 641
 642       unsigned int num_results = results.size();
 643       this->try_suffix_expansion (s, p, loc, pos, results);
 644
 645       // XXX: how to correctly report alternatives + position numbers
 646       // for alias suffixes?  file a separate PR to address the issue
 647       if (! loc->optional && num_results == results.size())
 648         {
 649           // We didn't find any alias suffixes (since the size of the
 650           // result vector didn't change).  Throw an error.
 651           string sugs = suggest_functors(s, loc->components[pos]->functor);
 652           throw SEMANTIC_ERROR (_F("probe point mismatch%s",
 653                                    sugs.empty() ? "" : (" (similar: " + sugs + ")").c_str()),
 654                                 loc->components[pos]->tok);
 655         }
 656     }
 657 }
 658
 659 string
 660 match_node::suggest_functors(systemtap_session& s, string functor)
 661 {
 662   // only use prefix if globby (and prefix is non-empty)
 663   size_t glob = functor.find('*');
 664   if (glob != string::npos && glob != 0)
 665     functor.erase(glob);
 666   if (functor.empty())
 667     return "";
 668
 669   // PR18577: There isn't any point in generating a suggestion list if
 670   // we're not going to display it.
 671   if ((s.dump_mode == systemtap_session::dump_matched_probes
 672        || s.dump_mode == systemtap_session::dump_matched_probes_vars)
 673       && s.verbose < 2)
 674     return "";
 675
 676   set<string> functors;
 677   for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 678     {
 679       string ftor = i->first.str();
 680       if (ftor.find('(') != string::npos)  // trim any parameter
 681         ftor.erase(ftor.find('('));
 682       functors.insert(ftor);
 683     }
 684   return levenshtein_suggest(functor, functors, 5); // print top 5
 685 }
 686
 687 void
 688 match_node::try_suffix_expansion (systemtap_session& s,
 689                                   probe *p, probe_point *loc, unsigned pos,
 690                                   vector<derived_probe *>& results)
 691 {
 692   // PR12210: match alias suffixes. If the components thus far
 693   // have been matched, but there is an additional unknown
 694   // suffix, we have a potential alias suffix on our hands. We
 695   // need to expand the preceding components as probe aliases,
 696   // reattach the suffix, and re-run derive_probes() on the
 697   // resulting expansion. This is done by the routine
 698   // build_with_suffix().
 699
 700   if (strverscmp(s.compatible.c_str(), "2.0") >= 0)
 701     {
 702       // XXX: technically, param_map isn't used here.  So don't
 703       // bother actually assembling it unless some
 704       // derived_probe_builder appears that actually takes
 705       // suffixes *and* consults parameters (currently no such
 706       // builders exist).
 707       literal_map_t param_map;
 708       // for (unsigned i=0; i<pos; i++)
 709       //   param_map[loc->components[i]->functor] = loc->components[i]->arg;
 710       // maybe 0
 711
 712       vector<probe_point::component *> suffix (loc->components.begin()+pos,
 713                                                loc->components.end());
 714
 715       // Multiple derived_probe_builders may be bound at a
 716       // match_node due to the possibility of multiply defined
 717       // aliases.
 718       for (unsigned k=0; k < ends.size(); k++)
 719         {
 720           derived_probe_builder *b = ends[k];
 721           try
 722             {
 723               b->build_with_suffix (s, p, loc, param_map, results, suffix);
 724             }
 725           catch (const recursive_expansion_error &e)
 726             {
 727               // Re-throw:
 728               throw semantic_error(e);
 729             }
 730           catch (const semantic_error &e)
 731             {
 732               // Adjust source coordinate and re-throw:
 733               if (! loc->optional)
 734                 throw semantic_error(e.errsrc, e.what(), loc->components[pos]->tok);
 735             }
 736         }
 737     }
 738 }
 739
 740
 741 void
 742 match_node::build_no_more (systemtap_session& s)
 743 {
 744   for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 745     i->second->build_no_more (s);
 746   for (unsigned k=0; k<ends.size(); k++)
 747     {
 748       derived_probe_builder *b = ends[k];
 749       b->build_no_more (s);
 750     }
 751 }
 752
 753 void
 754 match_node::dump (systemtap_session &s, const string &name)
 755 {
 756   // Dump this node, if it is complete.
 757   for (unsigned k=0; k<ends.size(); k++)
 758     {
 759       // Don't print aliases at all (for now) until we can figure out how to determine whether
 760       // the probes they resolve to are ok in unprivileged mode.
 761       if (ends[k]->is_alias ())
 762         continue;
 763
 764       // In unprivileged mode, don't show the probes which are not allowed for unprivileged
 765       // users.
 766       if (pr_contains (privilege, s.privilege))
 767         {
 768           cout << name << endl;
 769           break; // we need only print one instance.
 770         }
 771     }
 772
 773   // Recursively dump the children of this node
 774   string dot;
 775   if (! name.empty ())
 776     dot = ".";
 777   for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 778     {
 779       i->second->dump (s, name + dot + i->first.str());
 780     }
 781 }
 782
 783
 784 // ------------------------------------------------------------------------
 785 // Alias probes
 786 // ------------------------------------------------------------------------
 787
 788 struct alias_derived_probe: public derived_probe
 789 {
 790   alias_derived_probe (probe* base, probe_point *l, const probe_alias *a,
 791                        const vector<probe_point::component *> *suffix = 0);
 792   ~alias_derived_probe();
 793
 794   void upchuck () { throw SEMANTIC_ERROR (_("inappropriate"), this->tok); }
 795
 796   // Alias probes are immediately expanded to other derived_probe
 797   // types, and are not themselves emitted or listed in
 798   // systemtap_session.probes
 799
 800   void join_group (systemtap_session&) { upchuck (); }
 801
 802   virtual const probe_alias *get_alias () const { return alias; }
 803   virtual probe_point *get_alias_loc () const { return alias_loc; }
 804   virtual probe_point *sole_location () const;
 805
 806 private:
 807   const probe_alias *alias; // Used to check for recursion
 808   probe_point *alias_loc; // Hack to recover full probe name
 809 };
 810
 811
 812 alias_derived_probe::alias_derived_probe(probe *base, probe_point *l,
 813                                          const probe_alias *a,
 814                                          const vector<probe_point::component *>
 815                                            *suffix):
 816   derived_probe (base, l), alias(a)
 817 {
 818   // XXX pretty nasty -- this was cribbed from printscript() in main.cxx
 819   assert (alias->alias_names.size() >= 1);
 820   alias_loc = new probe_point(*alias->alias_names[0]); // XXX: [0] is arbitrary; it would make just as much sense to collect all of the names
 821   alias_loc->well_formed = true;
 822   vector<probe_point::component*>::const_iterator it;
 823   for (it = suffix->begin(); it != suffix->end(); ++it)
 824     {
 825       alias_loc->components.push_back(*it);
 826       if (isglob((*it)->functor))
 827         alias_loc->well_formed = false; // needs further derivation
 828     }
 829 }
 830
 831 alias_derived_probe::~alias_derived_probe ()
 832 {
 833   delete alias_loc;
 834 }
 835
 836
 837 probe_point*
 838 alias_derived_probe::sole_location () const
 839 {
 840   return const_cast<probe_point*>(alias_loc);
 841 }
 842
 843
 844 void
 845 alias_expansion_builder::build(systemtap_session & sess,
 846                                probe * use,
 847                                probe_point * location,
 848                                literal_map_t const & parameters,
 849                                vector<derived_probe *> & finished_results)
 850 {
 851   vector<probe_point::component *> empty_suffix;
 852   build_with_suffix (sess, use, location, parameters,
 853                      finished_results, empty_suffix);
 854 }
 855
 856 void
 857 alias_expansion_builder::build_with_suffix(systemtap_session & sess,
 858                                            probe * use,
 859                                            probe_point * location,
 860                                            literal_map_t const &,
 861                                            vector<derived_probe *>
 862                                              & finished_results,
 863                                            vector<probe_point::component *>
 864                                              const & suffix)
 865 {
 866   // Don't build the alias expansion if infinite recursion is detected.
 867   if (checkForRecursiveExpansion (use)) {
 868     stringstream msg;
 869     msg << _F("recursive loop in alias expansion of %s at %s",
 870               lex_cast(*location).c_str(), lex_cast(location->components.front()->tok->location).c_str());
 871     // semantic_errors thrown here might be ignored, so we need a special class:
 872     throw recursive_expansion_error (msg.str());
 873     // XXX The point of throwing this custom error is to suppress a
 874     // cascade of "probe mismatch" messages that appear in addition to
 875     // the error. The current approach suppresses most of the error
 876     // cascade, but leaves one spurious error; in any case, the way
 877     // this particular error is reported could be improved.
 878   }
 879
 880   // We're going to build a new probe and wrap it up in an
 881   // alias_expansion_probe so that the expansion loop recognizes it as
 882   // such and re-expands its expansion.
 883
 884   alias_derived_probe * n = new alias_derived_probe (use, location /* soon overwritten */, this->alias, &suffix);
 885   n->body = new block();
 886
 887   // The new probe gets a deep copy of the location list of the alias
 888   // (with incoming condition joined) plus the suffix (if any),
 889   n->locations.clear();
 890   for (unsigned i=0; i<alias->locations.size(); i++)
 891     {
 892       probe_point *pp = new probe_point(*alias->locations[i]);
 893       pp->components.insert(pp->components.end(), suffix.begin(), suffix.end());
 894       pp->condition = add_condition (pp->condition, location->condition);
 895       n->locations.push_back(pp);
 896     }
 897
 898   // the token location of the alias,
 899   n->tok = location->components.front()->tok;
 900
 901   // and statements representing the concatenation of the alias'
 902   // body with the use's.
 903   //
 904   // NB: locals are *not* copied forward, from either alias or
 905   // use. The expansion should have its locals re-inferred since
 906   // there's concatenated code here and we only want one vardecl per
 907   // resulting variable.
 908
 909   if (alias->epilogue_style)
 910     n->body = new block (use->body, alias->body);
 911   else
 912     n->body = new block (alias->body, use->body);
 913
 914   unsigned old_num_results = finished_results.size();
 915   // If expanding for an alias suffix, be sure to pass on any errors
 916   // to the caller instead of printing them in derive_probes():
 917   derive_probes (sess, n, finished_results, location->optional, !suffix.empty());
 918
 919   // Check whether we resolved something. If so, put the
 920   // whole library into the queue if not already there.
 921   if (finished_results.size() > old_num_results)
 922     {
 923       stapfile *f = alias->tok->location.file;
 924       if (find (sess.files.begin(), sess.files.end(), f)
 925           == sess.files.end())
 926         sess.files.push_back (f);
 927     }
 928 }
 929
 930 bool
 931 alias_expansion_builder::checkForRecursiveExpansion (probe *use)
 932 {
 933   // Collect the derivation chain of this probe.
 934   vector<probe*>derivations;
 935   use->collect_derivation_chain (derivations);
 936
 937   // Check all probe points in the alias expansion against the currently-being-expanded probe point
 938   // of each of the probes in the derivation chain, looking for a match. This
 939   // indicates infinite recursion.
 940   // The first element of the derivation chain will be the derived_probe representing 'use', so
 941   // start the search with the second element.
 942   assert (derivations.size() > 0);
 943   assert (derivations[0] == use);
 944   for (unsigned d = 1; d < derivations.size(); ++d) {
 945     if (use->get_alias() == derivations[d]->get_alias())
 946       return true; // recursion detected
 947   }
 948   return false;
 949 }
 950
 951
 952 // ------------------------------------------------------------------------
 953 // Pattern matching
 954 // ------------------------------------------------------------------------
 955
 956 static unsigned max_recursion = 100;
 957
 958 struct
 959 recursion_guard
 960 {
 961   unsigned & i;
 962   recursion_guard(unsigned & i) : i(i)
 963     {
 964       if (i > max_recursion)
 965         throw SEMANTIC_ERROR(_("recursion limit reached"));
 966       ++i;
 967     }
 968   ~recursion_guard()
 969     {
 970       --i;
 971     }
 972 };
 973
 974 // The match-and-expand loop.
 975 void
 976 derive_probes (systemtap_session& s,
 977                probe *p, vector<derived_probe*>& dps,
 978                bool optional,
 979                bool rethrow_errors)
 980 {
 981   // We need a static to track whether the current probe is optional so that
 982   // even if we recurse into derive_probes with optional = false, errors will
 983   // still be ignored. The undo_parent_optional bool ensures we reset the
 984   // static at the same level we had it set.
 985   static bool parent_optional = false;
 986   bool undo_parent_optional = false;
 987
 988   if (optional && !parent_optional)
 989     {
 990       parent_optional = true;
 991       undo_parent_optional = true;
 992     }
 993
 994   vector <semantic_error> optional_errs;
 995
 996   for (unsigned i = 0; i < p->locations.size(); ++i)
 997     {
 998       assert_no_interrupts();
 999
1000       probe_point *loc = p->locations[i];
1001
1002       if (s.verbose > 4)
1003         clog << "derive-probes " << *loc << endl;
1004
1005       try
1006         {
1007           unsigned num_atbegin = dps.size();
1008
1009           try
1010             {
1011               s.pattern_root->find_and_build (s, p, loc, 0, dps); // <-- actual derivation!
1012             }
1013           catch (const semantic_error& e)
1014             {
1015               if (!loc->optional && !parent_optional)
1016                 throw semantic_error(e);
1017               else /* tolerate failure for optional probe */
1018                 {
1019                   // remember err, we will print it (in catch block) if any
1020                   // non-optional loc fails to resolve
1021                   semantic_error err(ERR_SRC, _("while resolving probe point"),
1022                                      loc->components[0]->tok, NULL, &e);
1023                   optional_errs.push_back(err);
1024                   continue;
1025                 }
1026             }
1027
1028           unsigned num_atend = dps.size();
1029
1030           if (! (loc->optional||parent_optional) && // something required, but
1031               num_atbegin == num_atend) // nothing new derived!
1032             throw SEMANTIC_ERROR (_("no match"));
1033
1034           if (loc->sufficient && (num_atend > num_atbegin))
1035             {
1036               if (s.verbose > 1)
1037                 {
1038                   clog << "Probe point ";
1039                   p->locations[i]->print(clog);
1040                   clog << " sufficient, skipped";
1041                   for (unsigned j = i+1; j < p->locations.size(); ++j)
1042                     {
1043                       clog << " ";
1044                       p->locations[j]->print(clog);
1045                     }
1046                   clog << endl;
1047                 }
1048               break; // we need not try to derive for any other locations
1049             }
1050         }
1051       catch (const semantic_error& e)
1052         {
1053           // The rethrow_errors parameter lets the caller decide an
1054           // alternative to printing the error. This is necessary when
1055           // calling derive_probes() recursively during expansion of
1056           // an alias with suffix -- any message printed here would
1057           // point to the alias declaration and not the invalid suffix
1058           // usage, so the caller needs to catch the error themselves
1059           // and print a more appropriate message.
1060           if (rethrow_errors)
1061             {
1062               throw semantic_error(e);
1063             }
1064           // Only output in dump mode if -vv is supplied:
1065           else if (!s.dump_mode || (s.verbose > 1))
1066             {
1067               // print this one manually first because it's more important than
1068               // the optional errs
1069               semantic_error err(ERR_SRC, _("while resolving probe point"),
1070                                  loc->components[0]->tok, NULL, &e);
1071               s.print_error(err);
1072
1073               // print optional errs accumulated while visiting other probe points
1074               for (vector<semantic_error>::const_iterator it = optional_errs.begin();
1075                    it != optional_errs.end(); ++it)
1076                 {
1077                   s.print_error(*it);
1078                 }
1079             }
1080         }
1081     }
1082
1083   if (undo_parent_optional)
1084     parent_optional = false;
1085 }
1086
1087
1088
1089 // ------------------------------------------------------------------------
1090 //
1091 // Indexable usage checks
1092 //
1093
1094 struct symbol_fetcher
1095   : public throwing_visitor
1096 {
1097   symbol *&sym;
1098
1099   symbol_fetcher (symbol *&sym): sym(sym)
1100   {}
1101
1102   void visit_symbol (symbol* e)
1103   {
1104     sym = e;
1105   }
1106
1107   void visit_arrayindex (arrayindex* e)
1108   {
1109     e->base->visit (this);
1110   }
1111
1112   void throwone (const token* t)
1113   {
1114     throw SEMANTIC_ERROR (_("Expecting symbol or array index expression"), t);
1115   }
1116 };
1117
1118 symbol *
1119 get_symbol_within_expression (expression *e)
1120 {
1121   symbol *sym = NULL;
1122   symbol_fetcher fetcher(sym);
1123   e->visit (&fetcher);
1124   return sym; // NB: may be null!
1125 }
1126
1127 static symbol *
1128 get_symbol_within_indexable (indexable *ix)
1129 {
1130   symbol *array = NULL;
1131   hist_op *hist = NULL;
1132   classify_indexable(ix, array, hist);
1133   if (array)
1134     return array;
1135   else
1136     return get_symbol_within_expression (hist->stat);
1137 }
1138
1139 struct mutated_var_collector
1140   : public traversing_visitor
1141 {
1142   set<vardecl *> * mutated_vars;
1143
1144   mutated_var_collector (set<vardecl *> * mm)
1145     : mutated_vars (mm)
1146   {}
1147
1148   void visit_assignment(assignment* e)
1149   {
1150     if (e->type == pe_stats && e->op == "<<<")
1151       {
1152         vardecl *vd = get_symbol_within_expression (e->left)->referent;
1153         if (vd)
1154           mutated_vars->insert (vd);
1155       }
1156     traversing_visitor::visit_assignment(e);
1157   }
1158
1159   void visit_arrayindex (arrayindex *e)
1160   {
1161     if (is_active_lvalue (e))
1162       {
1163         symbol *sym;
1164         if (e->base->is_symbol (sym))
1165           mutated_vars->insert (sym->referent);
1166         else
1167           throw SEMANTIC_ERROR(_("Assignment to read-only histogram bucket"), e->tok);
1168       }
1169     traversing_visitor::visit_arrayindex (e);
1170   }
1171 };
1172
1173
1174 struct no_var_mutation_during_iteration_check
1175   : public traversing_visitor
1176 {
1177   systemtap_session & session;
1178   map<functiondecl *,set<vardecl *> *> & function_mutates_vars;
1179   vector<vardecl *> vars_being_iterated;
1180
1181   no_var_mutation_during_iteration_check
1182   (systemtap_session & sess,
1183    map<functiondecl *,set<vardecl *> *> & fmv)
1184     : session(sess), function_mutates_vars (fmv)
1185   {}
1186
1187   void visit_arrayindex (arrayindex *e)
1188   {
1189     if (is_active_lvalue(e))
1190       {
1191         vardecl *vd = get_symbol_within_indexable (e->base)->referent;
1192         if (vd)
1193           {
1194             for (unsigned i = 0; i < vars_being_iterated.size(); ++i)
1195               {
1196                 vardecl *v = vars_being_iterated[i];
1197                 if (v == vd)
1198                   {
1199                     string err = _F("variable '%s' modified during 'foreach' iteration",
1200                                     v->name.to_string().c_str());
1201                     session.print_error (SEMANTIC_ERROR (err, e->tok));
1202                   }
1203               }
1204           }
1205       }
1206     traversing_visitor::visit_arrayindex (e);
1207   }
1208
1209   void visit_functioncall (functioncall* e)
1210   {
1211     map<functiondecl *,set<vardecl *> *>::const_iterator i
1212       = function_mutates_vars.find (e->referent);
1213
1214     if (i != function_mutates_vars.end())
1215       {
1216         for (unsigned j = 0; j < vars_being_iterated.size(); ++j)
1217           {
1218             vardecl *m = vars_being_iterated[j];
1219             if (i->second->find (m) != i->second->end())
1220               {
1221                 string err = _F("function call modifies var '%s' during 'foreach' iteration",
1222                                 m->name.to_string().c_str());
1223                 session.print_error (SEMANTIC_ERROR (err, e->tok));
1224               }
1225           }
1226       }
1227
1228     traversing_visitor::visit_functioncall (e);
1229   }
1230
1231   void visit_foreach_loop(foreach_loop* s)
1232   {
1233     vardecl *vd = get_symbol_within_indexable (s->base)->referent;
1234
1235     if (vd)
1236       vars_being_iterated.push_back (vd);
1237
1238     traversing_visitor::visit_foreach_loop (s);
1239
1240     if (vd)
1241       vars_being_iterated.pop_back();
1242   }
1243 };
1244
1245
1246 // ------------------------------------------------------------------------
1247
1248 struct stat_decl_collector
1249   : public traversing_visitor
1250 {
1251   systemtap_session & session;
1252
1253   stat_decl_collector(systemtap_session & sess)
1254     : session(sess)
1255   {}
1256
1257   void visit_stat_op (stat_op* e)
1258   {
1259     symbol *sym = get_symbol_within_expression (e->stat);
1260     if (session.stat_decls.find(sym->name) == session.stat_decls.end())
1261       session.stat_decls[sym->name] = statistic_decl();
1262   }
1263
1264   void visit_assignment (assignment* e)
1265   {
1266     if (e->op == "<<<")
1267       {
1268         symbol *sym = get_symbol_within_expression (e->left);
1269         if (session.stat_decls.find(sym->name) == session.stat_decls.end())
1270           session.stat_decls[sym->name] = statistic_decl();
1271       }
1272     else
1273       traversing_visitor::visit_assignment(e);
1274   }
1275
1276   void visit_hist_op (hist_op* e)
1277   {
1278     symbol *sym = get_symbol_within_expression (e->stat);
1279     statistic_decl new_stat;
1280
1281     if (e->htype == hist_linear)
1282       {
1283         new_stat.type = statistic_decl::linear;
1284         assert (e->params.size() == 3);
1285         new_stat.linear_low = e->params[0];
1286         new_stat.linear_high = e->params[1];
1287         new_stat.linear_step = e->params[2];
1288       }
1289     else
1290       {
1291         assert (e->htype == hist_log);
1292         new_stat.type = statistic_decl::logarithmic;
1293         assert (e->params.size() == 0);
1294       }
1295
1296     map<interned_string, statistic_decl>::iterator i = session.stat_decls.find(sym->name);
1297     if (i == session.stat_decls.end())
1298       session.stat_decls[sym->name] = new_stat;
1299     else
1300       {
1301         statistic_decl & old_stat = i->second;
1302         if (!(old_stat == new_stat))
1303           {
1304             if (old_stat.type == statistic_decl::none)
1305               i->second = new_stat;
1306             else
1307               {
1308                 // FIXME: Support multiple co-declared histogram types
1309                 semantic_error se(ERR_SRC, _F("multiple histogram types declared on '%s'",
1310                                               sym->name.to_string().c_str()), e->tok);
1311                 session.print_error (se);
1312               }
1313           }
1314       }
1315   }
1316
1317 };
1318
1319 static int
1320 semantic_pass_stats (systemtap_session & sess)
1321 {
1322   stat_decl_collector sdc(sess);
1323
1324   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1325     it->second->body->visit (&sdc);
1326
1327   for (unsigned i = 0; i < sess.probes.size(); ++i)
1328     sess.probes[i]->body->visit (&sdc);
1329
1330   for (unsigned i = 0; i < sess.globals.size(); ++i)
1331     {
1332       vardecl *v = sess.globals[i];
1333       if (v->type == pe_stats)
1334         {
1335
1336           if (sess.stat_decls.find(v->name) == sess.stat_decls.end())
1337             {
1338               semantic_error se(ERR_SRC, _F("unable to infer statistic parameters for global '%s'",
1339                                             v->name.to_string().c_str()));
1340               sess.print_error (se);
1341             }
1342         }
1343     }
1344
1345   return sess.num_errors();
1346 }
1347
1348 // ------------------------------------------------------------------------
1349
1350 // Enforce variable-related invariants: no modification of
1351 // a foreach()-iterated array.
1352 static int
1353 semantic_pass_vars (systemtap_session & sess)
1354 {
1355
1356   map<functiondecl *, set<vardecl *> *> fmv;
1357   no_var_mutation_during_iteration_check chk(sess, fmv);
1358
1359   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1360     {
1361       functiondecl * fn = it->second;
1362       if (fn->body)
1363         {
1364           set<vardecl *> * m = new set<vardecl *>();
1365           mutated_var_collector mc (m);
1366           fn->body->visit (&mc);
1367           fmv[fn] = m;
1368         }
1369     }
1370
1371   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1372     {
1373       functiondecl * fn = it->second;
1374       if (fn->body) fn->body->visit (&chk);
1375     }
1376
1377   for (unsigned i = 0; i < sess.probes.size(); ++i)
1378     {
1379       if (sess.probes[i]->body)
1380         sess.probes[i]->body->visit (&chk);
1381     }
1382
1383   return sess.num_errors();
1384 }
1385
1386
1387 // ------------------------------------------------------------------------
1388
1389 // Rewrite probe condition expressions into probe bodies.  Tricky and
1390 // exciting business, this.  This:
1391 //
1392 // probe foo if (g1 || g2) { ... }
1393 // probe bar { ... g1 ++ ... }
1394 //
1395 // becomes:
1396 //
1397 // probe foo { if (! (g1 || g2)) next; ... }
1398 // probe bar { ... g1 ++ ...;
1399 //             if (g1 || g2) %{ enable_probe_foo %} else %{ disable_probe_foo %}
1400 //           }
1401 //
1402 // In other words, we perform two transformations:
1403 //    (1) Inline probe condition into its body.
1404 //    (2) For each probe that modifies a global var in use in any probe's
1405 //        condition, re-evaluate those probes' condition at the end of that
1406 //        probe's body.
1407 //
1408 // Here, we do all of (1), and half of (2): we simply collect the dependency
1409 // info between probes, which the translator will use to emit the affected
1410 // probes' condition re-evaluation. The translator will also ensure that the
1411 // conditions are evaluated using the globals' starting values prior to any
1412 // probes starting.
1413
1414 // Adds the condition expression to the front of the probe's body
1415 static void
1416 derived_probe_condition_inline (derived_probe *p)
1417 {
1418   expression* e = p->sole_location()->condition;
1419   assert(e);
1420
1421   if_statement *ifs = new if_statement ();
1422   ifs->tok = e->tok;
1423   ifs->thenblock = new next_statement ();
1424   ifs->thenblock->tok = e->tok;
1425   ifs->elseblock = NULL;
1426   unary_expression *notex = new unary_expression ();
1427   notex->op = "!";
1428   notex->tok = e->tok;
1429   notex->operand = e;
1430   ifs->condition = notex;
1431   p->body = new block (ifs, p->body);
1432 }
1433
1434 static int
1435 semantic_pass_conditions (systemtap_session & sess)
1436 {
1437   map<derived_probe*, set<vardecl*> > vars_read_in_cond;
1438   map<derived_probe*, set<vardecl*> > vars_written_in_body;
1439
1440   // do a first pass through the probes to ensure safety, inline any condition,
1441   // and collect var usage
1442   for (unsigned i = 0; i < sess.probes.size(); ++i)
1443     {
1444       derived_probe* p = sess.probes[i];
1445       expression* e = p->sole_location()->condition;
1446
1447       if (e)
1448         {
1449           varuse_collecting_visitor vcv_cond(sess);
1450           e->visit (& vcv_cond);
1451
1452           if (!vcv_cond.written.empty())
1453             sess.print_error (SEMANTIC_ERROR (_("probe condition must not "
1454                                                 "modify any variables"),
1455                                               e->tok));
1456           else if (vcv_cond.embedded_seen)
1457             sess.print_error (SEMANTIC_ERROR (_("probe condition must not "
1458                                                 "include impure embedded-C"),
1459                                               e->tok));
1460
1461           derived_probe_condition_inline(p);
1462
1463           vars_read_in_cond[p].insert(vcv_cond.read.begin(),
1464                                       vcv_cond.read.end());
1465         }
1466
1467       varuse_collecting_visitor vcv_body(sess);
1468       p->body->visit (& vcv_body);
1469
1470       vars_written_in_body[p].insert(vcv_body.written.begin(),
1471                                      vcv_body.written.end());
1472     }
1473
1474   // do a second pass to collect affected probes
1475   for (unsigned i = 0; i < sess.probes.size(); ++i)
1476     {
1477       derived_probe *p = sess.probes[i];
1478
1479       // for each variable this probe modifies...
1480       set<vardecl*>::const_iterator var;
1481       for (var  = vars_written_in_body[p].begin();
1482            var != vars_written_in_body[p].end(); ++var)
1483         {
1484           // collect probes which could be affected
1485           for (unsigned j = 0; j < sess.probes.size(); ++j)
1486             {
1487               if (vars_read_in_cond[sess.probes[j]].count(*var))
1488                 {
1489                   if (!p->probes_with_affected_conditions.count(sess.probes[j]))
1490                     {
1491                       p->probes_with_affected_conditions.insert(sess.probes[j]);
1492                       if (sess.verbose > 2)
1493                         clog << "probe " << i << " can affect condition of "
1494                                 "probe " << j << endl;
1495                     }
1496                 }
1497             }
1498         }
1499     }
1500
1501   // PR18115: We create a begin probe which is artificially registered as
1502   // affecting every other probe. This will serve as the initializer so that
1503   // other probe types with false conditions can be skipped (or registered as
1504   // disabled) during module initialization.
1505
1506   set<derived_probe*> targets;
1507   for (unsigned i = 0; i < sess.probes.size(); ++i)
1508     if (!vars_read_in_cond[sess.probes[i]].empty())
1509       targets.insert(sess.probes[i]);
1510
1511   if (!targets.empty())
1512     {
1513       stringstream ss("probe begin {}");
1514
1515       // no good token to choose here... let's just use the condition expression
1516       // of one of the probes as the token
1517       const token *tok = (*targets.begin())->sole_location()->condition->tok;
1518
1519       probe *p = parse_synthetic_probe(sess, ss, tok);
1520       if (!p)
1521         throw SEMANTIC_ERROR (_("can't create cond initializer probe"), tok);
1522
1523       vector<derived_probe*> dps;
1524       derive_probes(sess, p, dps);
1525
1526       // there should only be one
1527       assert(dps.size() == 1);
1528
1529       derived_probe* dp = dps[0];
1530       dp->probes_with_affected_conditions.insert(targets.begin(),
1531                                                  targets.end());
1532       sess.probes.push_back (dp);
1533       dp->join_group (sess);
1534
1535       // no need to manually do symresolution since body is empty
1536     }
1537
1538   return sess.num_errors();
1539 }
1540
1541 // ------------------------------------------------------------------------
1542
1543
1544 // Simple visitor that just goes through all embedded code blocks that
1545 // are available at the end  all the optimizations to register any
1546 // relevant pragmas or other indicators found, so that session flags can
1547 // be set that can be inspected at translation time to trigger any
1548 // necessary initialization of code needed by the embedded code functions.
1549
1550 // This is only for pragmas that don't have any other side-effect than
1551 // needing some initialization at module init time. Currently handles
1552 // /* pragma:vma */ /* pragma:unwind */ /* pragma:symbol */
1553
1554 // /* pragma:uprobes */ is handled during the typeresolution_info pass.
1555 // /* pure */, /* unprivileged */. /* myproc-unprivileged */ and /* guru */
1556 // are handled by the varuse_collecting_visitor.
1557
1558 struct embeddedcode_info: public functioncall_traversing_visitor
1559 {
1560 protected:
1561   systemtap_session& session;
1562
1563 public:
1564   embeddedcode_info (systemtap_session& s): session(s) { }
1565
1566   void visit_embeddedcode (embeddedcode* c)
1567   {
1568     if (! vma_tracker_enabled(session)
1569         && c->code.find("/* pragma:vma */") != string::npos)
1570       {
1571         if (session.verbose > 2)
1572           clog << _F("Turning on task_finder vma_tracker, pragma:vma found in %s",
1573                      current_function->name.to_string().c_str()) << endl;
1574
1575         // PR15052: stapdyn doesn't have VMA-tracking yet.
1576         if (session.runtime_usermode_p())
1577           throw SEMANTIC_ERROR(_("VMA-tracking is only supported by the kernel runtime (PR15052)"), c->tok);
1578
1579         enable_vma_tracker(session);
1580       }
1581
1582     if (! session.need_unwind
1583         && c->code.find("/* pragma:unwind */") != string::npos)
1584       {
1585         if (session.verbose > 2)
1586           clog << _F("Turning on unwind support, pragma:unwind found in %s",
1587                     current_function->name.to_string().c_str()) << endl;
1588         session.need_unwind = true;
1589       }
1590
1591     if (! session.need_symbols
1592         && c->code.find("/* pragma:symbols */") != string::npos)
1593       {
1594         if (session.verbose > 2)
1595           clog << _F("Turning on symbol data collecting, pragma:symbols found in %s",
1596                     current_function->name.to_string().c_str()) << endl;
1597         session.need_symbols = true;
1598       }
1599
1600     if (! session.need_lines
1601         && c->code.find("/* pragma:lines */") != string::npos)
1602       {
1603         if (session.verbose > 2)
1604           clog << _F("Turning on debug line data collecting, pragma:lines found in %s",
1605                     current_function->name.to_string().c_str()) << endl;
1606         session.need_lines = true;
1607       }
1608   }
1609 };
1610
1611 void embeddedcode_info_pass (systemtap_session& s)
1612 {
1613   embeddedcode_info eci (s);
1614   for (unsigned i=0; i<s.probes.size(); i++)
1615     s.probes[i]->body->visit (& eci);
1616 }
1617
1618 // ------------------------------------------------------------------------
1619
1620
1621 // Simple visitor that collects all the regular expressions in the
1622 // file and adds them to the session DFA table.
1623
1624 struct regex_collecting_visitor: public functioncall_traversing_visitor
1625 {
1626 protected:
1627   systemtap_session& session;
1628
1629 public:
1630   regex_collecting_visitor (systemtap_session& s): session(s) { }
1631
1632   void visit_regex_query (regex_query *q) {
1633     functioncall_traversing_visitor::visit_regex_query (q);
1634
1635     string re = q->right->value;
1636     regex_to_stapdfa (&session, re, q->right->tok);
1637   }
1638 };
1639
1640 // Go through the regex match invocations and generate corresponding DFAs.
1641 int gen_dfa_table (systemtap_session& s)
1642 {
1643   regex_collecting_visitor rcv(s);
1644
1645   for (unsigned i=0; i<s.probes.size(); i++)
1646     {
1647       try
1648         {
1649           s.probes[i]->body->visit (& rcv);
1650
1651           if (s.probes[i]->sole_location()->condition)
1652             s.probes[i]->sole_location()->condition->visit (& rcv);
1653         }
1654       catch (const semantic_error& e)
1655         {
1656           s.print_error (e);
1657         }
1658     }
1659
1660   return s.num_errors();
1661 }
1662
1663 // ------------------------------------------------------------------------
1664
1665
1666 static int semantic_pass_symbols (systemtap_session&);
1667 static int semantic_pass_optimize1 (systemtap_session&);
1668 static int semantic_pass_optimize2 (systemtap_session&);
1669 static int semantic_pass_types (systemtap_session&);
1670 static int semantic_pass_vars (systemtap_session&);
1671 static int semantic_pass_stats (systemtap_session&);
1672 static int semantic_pass_conditions (systemtap_session&);
1673
1674
1675 struct expression_build_no_more_visitor : public expression_visitor
1676 {
1677   // Clear extra details from every expression, like DWARF type info, so that
1678   // builders can safely release them in build_no_more.  From here on out,
1679   // we're back to basic types only.
1680   void visit_expression(expression *e)
1681     {
1682       e->type_details.reset();
1683     }
1684 };
1685
1686 static void
1687 build_no_more (systemtap_session& s)
1688 {
1689   expression_build_no_more_visitor v;
1690
1691   for (unsigned i=0; i<s.probes.size(); i++)
1692     s.probes[i]->body->visit(&v);
1693
1694   for (map<string,functiondecl*>::iterator it = s.functions.begin();
1695        it != s.functions.end(); it++)
1696     it->second->body->visit(&v);
1697
1698   // Inform all derived_probe builders that we're done with
1699   // all resolution, so it's time to release caches.
1700   s.pattern_root->build_no_more (s);
1701 }
1702
1703
1704
1705 // Link up symbols to their declarations.  Set the session's
1706 // files/probes/functions/globals vectors from the transitively
1707 // reached set of stapfiles in s.library_files, starting from
1708 // s.user_file.  Perform automatic tapset inclusion and probe
1709 // alias expansion.
1710 static int
1711 semantic_pass_symbols (systemtap_session& s)
1712 {
1713   symresolution_info sym (s);
1714
1715   // If we're listing functions, then we need to include all the files. Probe
1716   // aliases won't be visited/derived so all we gain are the functions, global
1717   // variables, and any real probes (e.g. begin probes). NB: type resolution for
1718   // a specific function arg may fail if it could only be determined from a
1719   // function call in one of the skipped aliases.
1720   if (s.dump_mode == systemtap_session::dump_functions)
1721     {
1722       s.files.insert(s.files.end(), s.library_files.begin(),
1723                                     s.library_files.end());
1724     }
1725   else if (!s.user_files.empty())
1726     {
1727       // Normal run: seed s.files with user_files and let it grow through the
1728       // find_* functions. NB: s.files can grow during this iteration, so
1729       // size() can return gradually increasing numbers.
1730       s.files.insert (s.files.end(), s.user_files.begin(), s.user_files.end());
1731     }
1732
1733   for (unsigned i = 0; i < s.files.size(); i++)
1734     {
1735       assert_no_interrupts();
1736       stapfile* dome = s.files[i];
1737
1738       // Pass 1: add globals and functions to systemtap-session master list,
1739       //         so the find_* functions find them
1740       //
1741       // NB: tapset global/function definitions may duplicate or conflict
1742       // with those already in s.globals/functions.  We need to deconflict
1743       // here.
1744
1745       for (unsigned i=0; i<dome->globals.size(); i++)
1746         {
1747           vardecl* g = dome->globals[i];
1748           for (unsigned j=0; j<s.globals.size(); j++)
1749             {
1750               vardecl* g2 = s.globals[j];
1751               if (g->name == g2->name)
1752                 {
1753                   s.print_error (SEMANTIC_ERROR (_("conflicting global variables"),
1754                                                  g->tok, g2->tok));
1755                 }
1756             }
1757           s.globals.push_back (g);
1758         }
1759
1760       for (unsigned i=0; i<dome->functions.size(); i++)
1761         {
1762           functiondecl* f = dome->functions[i];
1763           functiondecl* f2 = s.functions[f->name];
1764           if (f2 && f != f2)
1765             {
1766               s.print_error (SEMANTIC_ERROR (_("conflicting functions"),
1767                                              f->tok, f2->tok));
1768             }
1769           s.functions[f->name] = f;
1770         }
1771
1772       // NB: embeds don't conflict with each other
1773       for (unsigned i=0; i<dome->embeds.size(); i++)
1774         s.embeds.push_back (dome->embeds[i]);
1775
1776       // Pass 2: derive probes and resolve any further symbols in the
1777       // derived results.
1778
1779       for (unsigned i=0; i<dome->probes.size(); i++)
1780         {
1781           assert_no_interrupts();
1782           probe* p = dome->probes [i];
1783           vector<derived_probe*> dps;
1784
1785           // much magic happens here: probe alias expansion, wildcard
1786           // matching, low-level derived_probe construction.
1787           derive_probes (s, p, dps);
1788
1789           for (unsigned j=0; j<dps.size(); j++)
1790             {
1791               assert_no_interrupts();
1792               derived_probe* dp = dps[j];
1793               s.probes.push_back (dp);
1794               dp->join_group (s);
1795
1796               try
1797                 {
1798                   for (unsigned k=0; k<s.code_filters.size(); k++)
1799                     s.code_filters[k]->replace (dp->body);
1800
1801                   sym.current_function = 0;
1802                   sym.current_probe = dp;
1803                   dp->body->visit (& sym);
1804
1805                   // Process the probe-point condition expression.
1806                   sym.current_function = 0;
1807                   sym.current_probe = 0;
1808                   if (dp->sole_location()->condition)
1809                     dp->sole_location()->condition->visit (& sym);
1810                 }
1811               catch (const semantic_error& e)
1812                 {
1813                   s.print_error (e);
1814                 }
1815             }
1816         }
1817
1818       // Pass 3: process functions
1819
1820       for (unsigned i=0; i<dome->functions.size(); i++)
1821         {
1822           assert_no_interrupts();
1823           functiondecl* fd = dome->functions[i];
1824
1825           try
1826             {
1827               for (unsigned j=0; j<s.code_filters.size(); j++)
1828                 s.code_filters[j]->replace (fd->body);
1829
1830               sym.current_function = fd;
1831               sym.current_probe = 0;
1832               fd->body->visit (& sym);
1833             }
1834           catch (const semantic_error& e)
1835             {
1836               s.print_error (e);
1837             }
1838         }
1839     }
1840
1841   if(s.systemtap_v_check){
1842     for(unsigned i=0;i<s.globals.size();i++){
1843       if(s.globals[i]->systemtap_v_conditional)
1844         s.print_warning(_("This global uses tapset constructs that are dependent on systemtap version"), s.globals[i]->tok);
1845     }
1846
1847     for(map<string, functiondecl*>::const_iterator i=s.functions.begin();i != s.functions.end();++i){
1848       if(i->second->systemtap_v_conditional)
1849         s.print_warning(_("This function uses tapset constructs that are dependent on systemtap version"), i->second->tok);
1850     }
1851
1852     for(unsigned i=0;i<s.probes.size();i++){
1853       vector<probe*> sysvc;
1854       s.probes[i]->collect_derivation_chain(sysvc);
1855       for(unsigned j=0;j<sysvc.size();j++){
1856         if(sysvc[j]->systemtap_v_conditional)
1857           s.print_warning(_("This probe uses tapset constructs that are dependent on systemtap version"), sysvc[j]->tok);
1858         if(sysvc[j]->get_alias() && sysvc[j]->get_alias()->systemtap_v_conditional)
1859           s.print_warning(_("This alias uses tapset constructs that are dependent on systemtap version"), sysvc[j]->get_alias()->tok);
1860       }
1861     }
1862   }
1863
1864   return s.num_errors(); // all those print_error calls
1865 }
1866
1867
1868 // Keep unread global variables for probe end value display.
1869 void add_global_var_display (systemtap_session& s)
1870 {
1871   // Don't generate synthetic end probes when in listing mode; it would clutter
1872   // up the list of probe points with "end ...". In fact, don't bother in any
1873   // dump mode at all, since it'll never be used.
1874   if (s.dump_mode) return;
1875
1876   varuse_collecting_visitor vut(s);
1877
1878   for (unsigned i=0; i<s.probes.size(); i++)
1879     {
1880       s.probes[i]->body->visit (& vut);
1881
1882       if (s.probes[i]->sole_location()->condition)
1883         s.probes[i]->sole_location()->condition->visit (& vut);
1884     }
1885
1886   for (unsigned g=0; g < s.globals.size(); g++)
1887     {
1888       vardecl* l = s.globals[g];
1889       if ((vut.read.find (l) != vut.read.end()
1890            && vut.used.find (l) != vut.used.end())
1891           || vut.written.find (l) == vut.written.end())
1892         continue;
1893
1894       // Don't generate synthetic end probes for unread globals
1895       // declared only within tapsets. (RHBZ 468139), but rather
1896       // only within the end-user script.
1897
1898       bool tapset_global = false;
1899       for (size_t m=0; m < s.library_files.size(); m++)
1900         {
1901           for (size_t n=0; n < s.library_files[m]->globals.size(); n++)
1902             {
1903               if (l->name == s.library_files[m]->globals[n]->name)
1904                 {tapset_global = true; break;}
1905             }
1906         }
1907       if (tapset_global)
1908         continue;
1909
1910       stringstream code;
1911       code << "probe end {" << endl;
1912
1913       string format = l->tok->content;
1914
1915       string indexes;
1916       string foreach_value;
1917       if (!l->index_types.empty())
1918         {
1919           // Add index values to the printf format, and prepare
1920           // a simple list of indexes for passing around elsewhere
1921           format += "[";
1922           for (size_t i = 0; i < l->index_types.size(); ++i)
1923             {
1924               if (i > 0)
1925                 {
1926                   indexes += ",";
1927                   format += ",";
1928                 }
1929               indexes += "__idx" + lex_cast(i);
1930               if (l->index_types[i] == pe_string)
1931                 format += "\\\"%#s\\\"";
1932               else
1933                 format += "%#d";
1934             }
1935           format += "]";
1936
1937           // Iterate over all indexes in the array, sorted by decreasing value
1938           code << "foreach (";
1939           if (l->type != pe_stats)
1940             {
1941               foreach_value = "__val";
1942               code << foreach_value << " = ";
1943             }
1944           code << "[" << indexes << "] in " << l->tok->content << "-)" << endl;
1945         }
1946       else if (l->type == pe_stats)
1947         {
1948           // PR7053: Check scalar globals for empty aggregate
1949           code << "if (@count(" << l->tok->content << ") == 0)" << endl;
1950           code << "printf(\"" << l->tok->content << " @count=0x0\\n\")" << endl;
1951           code << "else" << endl;
1952         }
1953
1954       static const string stats[] = { "@count", "@min", "@max", "@sum", "@avg" };
1955       const string stats_format =
1956         (strverscmp(s.compatible.c_str(), "1.4") >= 0) ? "%#d" : "%#x";
1957
1958       // Fill in the printf format for values
1959       if (l->type == pe_stats)
1960         for (size_t i = 0; i < sizeof(stats)/sizeof(stats[0]); ++i)
1961           format += " " + stats[i] + "=" + stats_format;
1962       else if (l->type == pe_string)
1963         format += "=\\\"%#s\\\"";
1964       else
1965         format += "=%#x";
1966       format += "\\n";
1967
1968       // Output the actual printf
1969       code << "printf (\"" << format << "\"";
1970
1971       // Feed indexes to the printf, and include them in the value
1972       string value = !foreach_value.empty() ? foreach_value : string(l->tok->content);
1973       if (!l->index_types.empty())
1974         {
1975           code << "," << indexes;
1976           if (foreach_value.empty())
1977             value += "[" + indexes + "]";
1978         }
1979
1980       // Feed the actual values to the printf
1981       if (l->type == pe_stats)
1982         for (size_t i = 0; i < sizeof(stats)/sizeof(stats[0]); ++i)
1983           code << "," << stats[i] << "(" << value << ")";
1984       else
1985         code << "," << value;
1986       code << ")" << endl;
1987
1988       // End of probe
1989       code << "}" << endl;
1990
1991       probe *p = parse_synthetic_probe (s, code, l->tok);
1992       if (!p)
1993         throw SEMANTIC_ERROR (_("can't create global var display"), l->tok);
1994
1995       vector<derived_probe*> dps;
1996       derive_probes (s, p, dps);
1997       for (unsigned i = 0; i < dps.size(); i++)
1998         {
1999           derived_probe* dp = dps[i];
2000           s.probes.push_back (dp);
2001           dp->join_group (s);
2002
2003           // Repopulate symbol and type info
2004           symresolution_info sym (s);
2005           sym.current_function = 0;
2006           sym.current_probe = dp;
2007           dp->body->visit (& sym);
2008         }
2009
2010       semantic_pass_types(s);
2011       // Mark that variable is read
2012       vut.read.insert (l);
2013     }
2014 }
2015
2016 int
2017 semantic_pass (systemtap_session& s)
2018 {
2019   int rc = 0;
2020
2021   try
2022     {
2023       s.register_library_aliases();
2024       register_standard_tapsets(s);
2025
2026       if (rc == 0) rc = semantic_pass_symbols (s);
2027       if (rc == 0) rc = semantic_pass_conditions (s);
2028       if (rc == 0) rc = semantic_pass_optimize1 (s);
2029       if (rc == 0) rc = semantic_pass_types (s);
2030       if (rc == 0) rc = gen_dfa_table(s);
2031       if (rc == 0) add_global_var_display (s);
2032       if (rc == 0) rc = semantic_pass_optimize2 (s);
2033       if (rc == 0) rc = semantic_pass_vars (s);
2034       if (rc == 0) rc = semantic_pass_stats (s);
2035       if (rc == 0) embeddedcode_info_pass (s);
2036     }
2037   catch (const semantic_error& e)
2038     {
2039       s.print_error (e);
2040       rc ++;
2041     }
2042
2043   bool no_primary_probes = true;
2044   for (unsigned i = 0; i < s.probes.size(); i++)
2045     if (s.is_primary_probe(s.probes[i]))
2046       no_primary_probes = false;
2047
2048   if (s.num_errors() == 0 && no_primary_probes && !s.dump_mode)
2049     {
2050       s.print_error(SEMANTIC_ERROR(_("no probes found")));
2051       rc ++;
2052     }
2053
2054   build_no_more (s);
2055
2056   // PR11443
2057   // NB: listing mode only cares whether we have any probes,
2058   // so all previous error conditions are disregarded.
2059   if (s.dump_mode == systemtap_session::dump_matched_probes ||
2060       s.dump_mode == systemtap_session::dump_matched_probes_vars)
2061     rc = no_primary_probes;
2062
2063   // If we're dumping functions, only error out if no functions were found
2064   if (s.dump_mode == systemtap_session::dump_functions)
2065     rc = s.functions.empty();
2066
2067   return rc;
2068 }
2069
2070
2071 // ------------------------------------------------------------------------
2072 // semantic processing: symbol resolution
2073
2074
2075 symresolution_info::symresolution_info (systemtap_session& s):
2076   session (s), current_function (0), current_probe (0)
2077 {
2078 }
2079
2080
2081 void
2082 symresolution_info::visit_block (block* e)
2083 {
2084   for (unsigned i=0; i<e->statements.size(); i++)
2085     {
2086       try
2087         {
2088           e->statements[i]->visit (this);
2089         }
2090       catch (const semantic_error& e)
2091         {
2092           session.print_error (e);
2093         }
2094     }
2095 }
2096
2097
2098 void
2099 symresolution_info::visit_foreach_loop (foreach_loop* e)
2100 {
2101   for (unsigned i=0; i<e->indexes.size(); i++)
2102     e->indexes[i]->visit (this);
2103   for (unsigned i=0; i<e->array_slice.size(); i++)
2104     if (e->array_slice[i])
2105       e->array_slice[i]->visit(this);
2106
2107   symbol *array = NULL;
2108   hist_op *hist = NULL;
2109   classify_indexable (e->base, array, hist);
2110
2111   if (array)
2112     {
2113       if (!array->referent)
2114         {
2115           vardecl* d = find_var (array->name, e->indexes.size (), array->tok);
2116           if (d)
2117           {
2118             array->referent = d;
2119             array->name = d->name;
2120           }
2121           else
2122             {
2123               stringstream msg;
2124               msg << _F("unresolved arity-%zu global array %s, missing global declaration?",
2125                         e->indexes.size(), array->name.to_string().c_str());
2126               throw SEMANTIC_ERROR (msg.str(), array->tok);
2127             }
2128         }
2129
2130       if (!e->array_slice.empty() && e->array_slice.size() != e->indexes.size())
2131         {
2132           stringstream msg;
2133           msg << _F("unresolved arity-%zu global array %s, missing global declaration?",
2134                     e->array_slice.size(), array->name.to_string().c_str());
2135           throw SEMANTIC_ERROR (msg.str(), array->tok);
2136         }
2137     }
2138   else
2139     {
2140       assert (hist);
2141       hist->visit (this);
2142     }
2143
2144   if (e->value)
2145     e->value->visit (this);
2146
2147   if (e->limit)
2148     e->limit->visit (this);
2149
2150   e->block->visit (this);
2151 }
2152
2153
2154 struct
2155 delete_statement_symresolution_info:
2156   public traversing_visitor
2157 {
2158   symresolution_info *parent;
2159
2160   delete_statement_symresolution_info (symresolution_info *p):
2161     parent(p)
2162   {}
2163
2164   void visit_arrayindex (arrayindex* e)
2165   {
2166     parent->visit_arrayindex(e, true);
2167   }
2168
2169   void visit_functioncall (functioncall* e)
2170   {
2171     parent->visit_functioncall (e);
2172   }
2173
2174   void visit_symbol (symbol* e)
2175   {
2176     if (e->referent)
2177       return;
2178
2179     vardecl* d = parent->find_var (e->name, -1, e->tok);
2180     if (d)
2181       e->referent = d;
2182     else
2183       throw SEMANTIC_ERROR (_("unresolved array in delete statement"), e->tok);
2184   }
2185 };
2186
2187 void
2188 symresolution_info::visit_delete_statement (delete_statement* s)
2189 {
2190   delete_statement_symresolution_info di (this);
2191   s->value->visit (&di);
2192 }
2193
2194
2195 void
2196 symresolution_info::visit_symbol (symbol* e)
2197 {
2198   if (e->referent)
2199     return;
2200
2201   vardecl* d = find_var (e->name, 0, e->tok);
2202   if (d)
2203   {
2204     e->referent = d;
2205     e->name = d->name;
2206   }
2207   else
2208     {
2209       // new local
2210       vardecl* v = new vardecl;
2211       v->name = e->name;
2212       v->tok = e->tok;
2213       v->set_arity(0, e->tok);
2214       if (current_function)
2215         current_function->locals.push_back (v);
2216       else if (current_probe)
2217         current_probe->locals.push_back (v);
2218       else
2219         // must be probe-condition expression
2220         throw SEMANTIC_ERROR (_("probe condition must not reference undeclared global"), e->tok);
2221       e->referent = v;
2222     }
2223 }
2224
2225
2226 void
2227 symresolution_info::visit_arrayindex (arrayindex* e)
2228 {
2229   visit_arrayindex(e, false);
2230 }
2231
2232 void
2233 symresolution_info::visit_arrayindex (arrayindex* e, bool wildcard_ok)
2234 {
2235   for (unsigned i=0; i<e->indexes.size(); i++)
2236     {
2237       // assuming that if NULL, it was originally a wildcard (*)
2238       if (e->indexes[i] == NULL)
2239         {
2240           if (!wildcard_ok)
2241             throw SEMANTIC_ERROR(_("wildcard not allowed in array index"), e->tok);
2242         }
2243       else
2244         e->indexes[i]->visit (this);
2245     }
2246
2247   symbol *array = NULL;
2248   hist_op *hist = NULL;
2249   classify_indexable(e->base, array, hist);
2250
2251   if (array)
2252     {
2253       if (array->referent)
2254         return;
2255
2256       vardecl* d = find_var (array->name, e->indexes.size (), array->tok);
2257       if (d)
2258       {
2259         array->referent = d;
2260         array->name = d->name;
2261       }
2262       else
2263         {
2264           stringstream msg;
2265           msg << _F("unresolved arity-%zu global array %s, missing global declaration?",
2266                     e->indexes.size(), array->name.to_string().c_str());
2267           throw SEMANTIC_ERROR (msg.str(), e->tok);
2268         }
2269     }
2270   else
2271     {
2272       assert (hist);
2273       hist->visit (this);
2274     }
2275 }
2276
2277
2278 void
2279 symresolution_info::visit_array_in (array_in* e)
2280 {
2281   visit_arrayindex(e->operand, true);
2282 }
2283
2284
2285 void
2286 symresolution_info::visit_functioncall (functioncall* e)
2287 {
2288   // XXX: we could relax this, if we're going to examine the
2289   // vartracking data recursively.  See testsuite/semko/fortytwo.stp.
2290   if (! (current_function || current_probe))
2291     {
2292       // must be probe-condition expression
2293       throw SEMANTIC_ERROR (_("probe condition must not reference function"), e->tok);
2294     }
2295
2296   for (unsigned i=0; i<e->args.size(); i++)
2297     e->args[i]->visit (this);
2298
2299   if (e->referent)
2300     return;
2301
2302   functiondecl* d = find_function (e->function, e->args.size (), e->tok);
2303   if (d)
2304   {
2305     e->referent = d;
2306     e->function = d->name;
2307   }
2308   else
2309     {
2310       string sugs = levenshtein_suggest(e->function, collect_functions(), 5); // print 5 funcs
2311       throw SEMANTIC_ERROR(_F("unresolved function%s",
2312                               sugs.empty() ? "" : (_(" (similar: ") + sugs + ")").c_str()),
2313                            e->tok);
2314     }
2315 }
2316
2317 /*find_var will return an argument other than zero if the name matches the var
2318  * name ie, if the current local name matches the name passed to find_var*/
2319 vardecl*
2320 symresolution_info::find_var (interned_string name, int arity, const token* tok)
2321 {
2322   if (current_function || current_probe)
2323     {
2324       // search locals
2325       vector<vardecl*>& locals = (current_function ?
2326                                   current_function->locals :
2327                                   current_probe->locals);
2328
2329
2330       for (unsigned i=0; i<locals.size(); i++)
2331         if (locals[i]->name == name)
2332           {
2333             locals[i]->set_arity (arity, tok);
2334             return locals[i];
2335           }
2336     }
2337
2338   // search function formal parameters (for scalars)
2339   if (arity == 0 && current_function)
2340     for (unsigned i=0; i<current_function->formal_args.size(); i++)
2341       if (current_function->formal_args[i]->name == name)
2342         {
2343           // NB: no need to check arity here: formal args always scalar
2344           current_function->formal_args[i]->set_arity (0, tok);
2345           return current_function->formal_args[i];
2346         }
2347
2348   // search processed globals
2349   string gname = "__global_" + string(name);
2350   string pname = "__private_" + detox_path(tok->location.file->name) + string(name);
2351   for (unsigned i=0; i<session.globals.size(); i++)
2352   {
2353     if ((session.globals[i]->name == name && startswith(name, "__global_")) ||
2354         (session.globals[i]->name == gname) ||
2355         (session.globals[i]->name == pname))
2356       {
2357         if (! session.suppress_warnings)
2358           {
2359             vardecl* v = session.globals[i];
2360             stapfile* f = tok->location.file;
2361             // clog << "resolved " << *tok << " to global " << *v->tok << endl;
2362             if (v->tok->location.file != f && !f->synthetic)
2363               {
2364                 session.print_warning (_F("cross-file global variable reference to %s from",
2365                                           lex_cast(*v->tok).c_str()), tok);
2366               }
2367           }
2368         session.globals[i]->set_arity (arity, tok);
2369         return session.globals[i];
2370       }
2371   }
2372
2373   // search library globals
2374   for (unsigned i=0; i<session.library_files.size(); i++)
2375     {
2376       stapfile* f = session.library_files[i];
2377       for (unsigned j=0; j<f->globals.size(); j++)
2378         {
2379           vardecl* g = f->globals[j];
2380           if (g->name == gname)
2381             {
2382               g->set_arity (arity, tok);
2383
2384               // put library into the queue if not already there
2385               if (find (session.files.begin(), session.files.end(), f)
2386                   == session.files.end())
2387                 session.files.push_back (f);
2388
2389               return g;
2390             }
2391         }
2392     }
2393
2394   return 0;
2395 }
2396
2397
2398 functiondecl*
2399 symresolution_info::find_function (const string& name, unsigned arity, const token *tok)
2400 {
2401   string gname = "__global_" + string(name);
2402   string pname = "__private_" + detox_path(tok->location.file->name) + string(name);
2403
2404   // the common path
2405
2406   // internal global functions bypassing the parser, such as __global_dwarf_tvar_[gs]et
2407   if ((session.functions.find(name) != session.functions.end()) && startswith(name, "__private_"))
2408     {
2409       functiondecl* fd = session.functions[name];
2410       assert (fd->name == name);
2411       if (fd->formal_args.size() == arity)
2412         return fd;
2413
2414       throw SEMANTIC_ERROR(_F("arity mismatch found (function '%s' takes %zu args)",
2415                               name.c_str(), fd->formal_args.size()), tok, fd->tok);
2416     }
2417
2418   // tapset or user script global functions coming from the parser
2419   if (session.functions.find(gname) != session.functions.end())
2420     {
2421       functiondecl* fd = session.functions[gname];
2422       assert (fd->name == gname);
2423       if (fd->formal_args.size() == arity)
2424         return fd;
2425
2426       throw SEMANTIC_ERROR(_F("arity mismatch found (function '%s' takes %zu args)",
2427                               name.c_str(), fd->formal_args.size()), tok, fd->tok);
2428     }
2429
2430   // tapset or user script private functions coming from the parser
2431   if (session.functions.find(pname) != session.functions.end())
2432     {
2433       functiondecl* fd = session.functions[pname];
2434       assert (fd->name == pname);
2435       if (fd->formal_args.size() == arity)
2436         return fd;
2437
2438       throw SEMANTIC_ERROR(_F("arity mismatch found (function '%s' takes %zu args)",
2439                               name.c_str(), fd->formal_args.size()), tok, fd->tok);
2440     }
2441
2442   // search library functions
2443   for (unsigned i=0; i<session.library_files.size(); i++)
2444     {
2445       stapfile* f = session.library_files[i];
2446       for (unsigned j=0; j<f->functions.size(); j++)
2447       {
2448         if ((f->functions[j]->name == gname) ||
2449             (f->functions[j]->name == pname))
2450           {
2451             if (f->functions[j]->formal_args.size() == arity)
2452               {
2453                 // put library into the queue if not already there
2454                 if (0) // session.verbose_resolution
2455                   cerr << _F("      function %s is defined from %s",
2456                              name.c_str(), f->name.c_str()) << endl;
2457
2458                 if (find (session.files.begin(), session.files.end(), f)
2459                     == session.files.end())
2460                   session.files.push_back (f);
2461                 // else .. print different message?
2462
2463                 return f->functions[j];
2464               }
2465
2466             throw SEMANTIC_ERROR(_F("arity mismatch found (function '%s' takes %zu args)",
2467                                     name.c_str(), f->functions[j]->formal_args.size()),
2468                                     tok, f->functions[j]->tok);
2469           }
2470       }
2471     }
2472
2473   return 0;
2474 }
2475
2476 set<string>
2477 symresolution_info::collect_functions(void)
2478 {
2479   set<string> funcs;
2480
2481   for (map<string,functiondecl*>::const_iterator it = session.functions.begin();
2482        it != session.functions.end(); ++it)
2483     funcs.insert(it->first);
2484
2485   // search library functions
2486   for (unsigned i=0; i<session.library_files.size(); i++)
2487     {
2488       stapfile* f = session.library_files[i];
2489       for (unsigned j=0; j<f->functions.size(); j++)
2490         funcs.insert(f->functions[j]->name);
2491     }
2492
2493   return funcs;
2494 }
2495
2496 // ------------------------------------------------------------------------
2497 // optimization
2498
2499
2500 // Do away with functiondecls that are never (transitively) called
2501 // from probes.
2502 void semantic_pass_opt1 (systemtap_session& s, bool& relaxed_p)
2503 {
2504   functioncall_traversing_visitor ftv;
2505   for (unsigned i=0; i<s.probes.size(); i++)
2506     {
2507       s.probes[i]->body->visit (& ftv);
2508       if (s.probes[i]->sole_location()->condition)
2509         s.probes[i]->sole_location()->condition->visit (& ftv);
2510     }
2511   vector<functiondecl*> new_unused_functions;
2512   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2513     {
2514       functiondecl* fd = it->second;
2515       if (ftv.seen.find(fd) == ftv.seen.end())
2516         {
2517           if (! fd->synthetic && s.is_user_file(fd->tok->location.file->name))
2518             s.print_warning (_F("Eliding unused function '%s'",
2519                                 fd->name.to_string().c_str()), fd->tok);
2520           // s.functions.erase (it); // NB: can't, since we're already iterating upon it
2521           new_unused_functions.push_back (fd);
2522           relaxed_p = false;
2523         }
2524     }
2525   for (unsigned i=0; i<new_unused_functions.size(); i++)
2526     {
2527       map<string,functiondecl*>::iterator where = s.functions.find (new_unused_functions[i]->name);
2528       assert (where != s.functions.end());
2529       s.functions.erase (where);
2530       if (s.tapset_compile_coverage)
2531         s.unused_functions.push_back (new_unused_functions[i]);
2532     }
2533 }
2534
2535
2536 // ------------------------------------------------------------------------
2537
2538 // Do away with local & global variables that are never
2539 // written nor read.
2540 void semantic_pass_opt2 (systemtap_session& s, bool& relaxed_p, unsigned iterations)
2541 {
2542   varuse_collecting_visitor vut(s);
2543
2544   for (unsigned i=0; i<s.probes.size(); i++)
2545     {
2546       s.probes[i]->body->visit (& vut);
2547
2548       if (s.probes[i]->sole_location()->condition)
2549         s.probes[i]->sole_location()->condition->visit (& vut);
2550     }
2551
2552   // NB: Since varuse_collecting_visitor also traverses down
2553   // actually called functions, we don't need to explicitly
2554   // iterate over them.  Uncalled ones should have been pruned
2555   // in _opt1 above.
2556   //
2557   // for (unsigned i=0; i<s.functions.size(); i++)
2558   //   s.functions[i]->body->visit (& vut);
2559
2560   // Now in vut.read/written, we have a mixture of all locals, globals
2561
2562   for (unsigned i=0; i<s.probes.size(); i++)
2563     for (unsigned j=0; j<s.probes[i]->locals.size(); /* see below */)
2564       {
2565         vardecl* l = s.probes[i]->locals[j];
2566
2567         // skip over "special" locals
2568         if (l->synthetic) { j++; continue; }
2569
2570         if (vut.read.find (l) == vut.read.end() &&
2571             vut.written.find (l) == vut.written.end())
2572           {
2573             if (s.is_user_file(l->tok->location.file->name))
2574               s.print_warning (_F("Eliding unused variable '%s'",
2575                                   l->name.to_string().c_str()), l->tok);
2576             if (s.tapset_compile_coverage) {
2577               s.probes[i]->unused_locals.push_back
2578                       (s.probes[i]->locals[j]);
2579             }
2580             s.probes[i]->locals.erase(s.probes[i]->locals.begin() + j);
2581             relaxed_p = false;
2582             // don't increment j
2583           }
2584         else
2585           {
2586             if (vut.written.find (l) == vut.written.end())
2587               if (iterations == 0 && ! s.suppress_warnings)
2588                 {
2589                   set<string> vars;
2590                   vector<vardecl*>::iterator it;
2591                   for (it = s.probes[i]->locals.begin(); it != s.probes[i]->locals.end(); it++)
2592                     vars.insert((*it)->name);
2593                   for (it = s.globals.begin(); it != s.globals.end(); it++)
2594                     vars.insert((*it)->name);
2595
2596                   vars.erase(l->name);
2597                   string sugs = levenshtein_suggest(l->name, vars, 5); // suggest top 5 vars
2598                   s.print_warning (_F("never-assigned local variable '%s'%s",
2599                                       l->name.to_string().c_str(), (sugs.empty() ? "" :
2600                                       (_(" (similar: ") + sugs + ")")).c_str()), l->tok);
2601                 }
2602             j++;
2603           }
2604       }
2605
2606   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2607     {
2608       functiondecl *fd = it->second;
2609       for (unsigned j=0; j<fd->locals.size(); /* see below */)
2610         {
2611           vardecl* l = fd->locals[j];
2612           if (vut.read.find (l) == vut.read.end() &&
2613               vut.written.find (l) == vut.written.end())
2614             {
2615               if (s.is_user_file(l->tok->location.file->name))
2616                 s.print_warning (_F("Eliding unused variable '%s'",
2617                                     l->name.to_string().c_str()), l->tok);
2618               if (s.tapset_compile_coverage) {
2619                 fd->unused_locals.push_back (fd->locals[j]);
2620               }
2621               fd->locals.erase(fd->locals.begin() + j);
2622               relaxed_p = false;
2623               // don't increment j
2624             }
2625           else
2626             {
2627               if (vut.written.find (l) == vut.written.end())
2628                 if (iterations == 0 && ! s.suppress_warnings)
2629                   {
2630                     set<string> vars;
2631                     vector<vardecl*>::iterator it;
2632                     for (it = fd->formal_args.begin() ;
2633                          it != fd->formal_args.end(); it++)
2634                         vars.insert((*it)->name);
2635                     for (it = fd->locals.begin(); it != fd->locals.end(); it++)
2636                         vars.insert((*it)->name);
2637                     for (it = s.globals.begin(); it != s.globals.end(); it++)
2638                         vars.insert((*it)->name);
2639
2640                     vars.erase(l->name);
2641                     string sugs = levenshtein_suggest(l->name, vars, 5); // suggest top 5 vars
2642                     s.print_warning (_F("never-assigned local variable '%s'%s",
2643                                         l->name.to_string().c_str(), (sugs.empty() ? "" :
2644                                         (_(" (similar: ") + sugs + ")")).c_str()), l->tok);
2645                   }
2646
2647               j++;
2648             }
2649         }
2650     }
2651   for (unsigned i=0; i<s.globals.size(); /* see below */)
2652     {
2653       vardecl* l = s.globals[i];
2654       if (vut.read.find (l) == vut.read.end() &&
2655           vut.written.find (l) == vut.written.end())
2656         {
2657           if (s.is_user_file(l->tok->location.file->name))
2658             s.print_warning (_F("Eliding unused variable '%s'",
2659                                 l->name.to_string().c_str()), l->tok);
2660           if (s.tapset_compile_coverage) {
2661             s.unused_globals.push_back(s.globals[i]);
2662           }
2663           s.globals.erase(s.globals.begin() + i);
2664           relaxed_p = false;
2665           // don't increment i
2666         }
2667       else
2668         {
2669           if (vut.written.find (l) == vut.written.end() && ! l->init) // no initializer
2670             if (iterations == 0 && ! s.suppress_warnings)
2671               {
2672                 set<string> vars;
2673                 vector<vardecl*>::iterator it;
2674                 for (it = s.globals.begin(); it != s.globals.end(); it++)
2675                   if (l->name != (*it)->name)
2676                     vars.insert((*it)->name);
2677
2678                 string sugs = levenshtein_suggest(l->name, vars, 5); // suggest top 5 vars
2679                 s.print_warning (_F("never-assigned global variable '%s'%s",
2680                                     l->name.to_string().c_str(), (sugs.empty() ? "" :
2681                                     (_(" (similar: ") + sugs + ")")).c_str()), l->tok);
2682               }
2683
2684           i++;
2685         }
2686     }
2687 }
2688
2689
2690 // ------------------------------------------------------------------------
2691
2692 struct dead_assignment_remover: public update_visitor
2693 {
2694   systemtap_session& session;
2695   bool& relaxed_p;
2696   const varuse_collecting_visitor& vut;
2697
2698   dead_assignment_remover(systemtap_session& s, bool& r,
2699                           const varuse_collecting_visitor& v):
2700     session(s), relaxed_p(r), vut(v) {}
2701
2702   void visit_assignment (assignment* e);
2703   void visit_try_block (try_block *s);
2704 };
2705
2706
2707 // symbol_fetcher augmented to allow target-symbol types, but NULLed.
2708 struct assignment_symbol_fetcher
2709   : public symbol_fetcher
2710 {
2711   assignment_symbol_fetcher (symbol *&sym): symbol_fetcher(sym)
2712   {}
2713
2714   void visit_target_symbol (target_symbol* e)
2715   {
2716     sym = NULL;
2717   }
2718
2719   void visit_atvar_op (atvar_op *e)
2720   {
2721     sym = NULL;
2722   }
2723
2724   void visit_cast_op (cast_op* e)
2725   {
2726     sym = NULL;
2727   }
2728
2729   void visit_autocast_op (autocast_op* e)
2730   {
2731     sym = NULL;
2732   }
2733
2734   void throwone (const token* t)
2735   {
2736     if (t->type == tok_operator && t->content == ".")
2737       // guess someone misused . in $foo->bar.baz expression
2738       // XXX why are we only checking this in lvalues?
2739       throw SEMANTIC_ERROR (_("Expecting lvalue expression, try -> instead"), t);
2740     else
2741       throw SEMANTIC_ERROR (_("Expecting lvalue expression"), t);
2742   }
2743 };
2744
2745 symbol *
2746 get_assignment_symbol_within_expression (expression *e)
2747 {
2748   symbol *sym = NULL;
2749   assignment_symbol_fetcher fetcher(sym);
2750   e->visit (&fetcher);
2751   return sym; // NB: may be null!
2752 }
2753
2754
2755 void
2756 dead_assignment_remover::visit_assignment (assignment* e)
2757 {
2758   replace (e->left);
2759   replace (e->right);
2760
2761   symbol* left = get_assignment_symbol_within_expression (e->left);
2762   if (left) // not unresolved $target, so intended sideeffect may be elided
2763     {
2764       vardecl* leftvar = left->referent;
2765       if (vut.read.find(leftvar) == vut.read.end()) // var never read?
2766         {
2767           // NB: Not so fast!  The left side could be an array whose
2768           // index expressions may have side-effects.  This would be
2769           // OK if we could replace the array assignment with a
2770           // statement-expression containing all the index expressions
2771           // and the rvalue... but we can't.
2772           // Another possibility is that we have an unread global variable
2773           // which are kept for probe end value display.
2774
2775           bool is_global = false;
2776           vector<vardecl*>::iterator it;
2777           for (it = session.globals.begin(); it != session.globals.end(); it++)
2778             if (leftvar->name == (*it)->name)
2779               {
2780                 is_global = true;
2781                 break;
2782               }
2783
2784           varuse_collecting_visitor lvut(session);
2785           e->left->visit (& lvut);
2786           if (lvut.side_effect_free () && !is_global // XXX: use _wrt() once we track focal_vars
2787               && !leftvar->synthetic) // don't elide assignment to synthetic $context variables
2788             {
2789               /* PR 1119: NB: This is not necessary here.  A write-only
2790                  variable will also be elided soon at the next _opt2 iteration.
2791               if (e->left->tok->location.file->name == session.user_file->name) // !tapset
2792                 session.print_warning("eliding write-only ", *e->left->tok);
2793               else
2794               */
2795               if (session.is_user_file(e->left->tok->location.file->name))
2796                 session.print_warning(_F("Eliding assignment to '%s'",
2797                                          leftvar->name.to_string().c_str()), e->tok);
2798               provide (e->right); // goodbye assignment*
2799               relaxed_p = false;
2800               return;
2801             }
2802         }
2803     }
2804   provide (e);
2805 }
2806
2807
2808 void
2809 dead_assignment_remover::visit_try_block (try_block *s)
2810 {
2811   replace (s->try_block);
2812   if (s->catch_error_var)
2813     {
2814       vardecl* errvar = s->catch_error_var->referent;
2815       if (vut.read.find(errvar) == vut.read.end()) // never read?
2816         {
2817           if (session.verbose>2)
2818             clog << _F("Eliding unused error string catcher %s at %s",
2819                       errvar->name.to_string().c_str(), lex_cast(*s->tok).c_str()) << endl;
2820           s->catch_error_var = 0;
2821         }
2822     }
2823   replace (s->catch_block);
2824   provide (s);
2825 }
2826
2827
2828 // Let's remove assignments to variables that are never read.  We
2829 // rewrite "(foo = expr)" as "(expr)".  This makes foo a candidate to
2830 // be optimized away as an unused variable, and expr a candidate to be
2831 // removed as a side-effect-free statement expression.  Wahoo!
2832 void semantic_pass_opt3 (systemtap_session& s, bool& relaxed_p)
2833 {
2834   // Recompute the varuse data, which will probably match the opt2
2835   // copy of the computation, except for those totally unused
2836   // variables that opt2 removed.
2837   varuse_collecting_visitor vut(s);
2838   for (unsigned i=0; i<s.probes.size(); i++)
2839     s.probes[i]->body->visit (& vut); // includes reachable functions too
2840
2841   dead_assignment_remover dar (s, relaxed_p, vut);
2842   // This instance may be reused for multiple probe/function body trims.
2843
2844   for (unsigned i=0; i<s.probes.size(); i++)
2845     dar.replace (s.probes[i]->body);
2846   for (map<string,functiondecl*>::iterator it = s.functions.begin();
2847        it != s.functions.end(); it++)
2848     dar.replace (it->second->body);
2849   // The rewrite operation is performed within the visitor.
2850
2851   // XXX: we could also zap write-only globals here
2852 }
2853
2854
2855 // ------------------------------------------------------------------------
2856
2857 struct dead_stmtexpr_remover: public update_visitor
2858 {
2859   systemtap_session& session;
2860   bool& relaxed_p;
2861   set<vardecl*> focal_vars; // vars considered subject to side-effects
2862
2863   dead_stmtexpr_remover(systemtap_session& s, bool& r):
2864     session(s), relaxed_p(r) {}
2865
2866   void visit_block (block *s);
2867   void visit_try_block (try_block *s);
2868   void visit_null_statement (null_statement *s);
2869   void visit_if_statement (if_statement* s);
2870   void visit_foreach_loop (foreach_loop *s);
2871   void visit_for_loop (for_loop *s);
2872   // XXX: and other places where stmt_expr's might be nested
2873
2874   void visit_expr_statement (expr_statement *s);
2875 };
2876
2877
2878 void
2879 dead_stmtexpr_remover::visit_null_statement (null_statement *s)
2880 {
2881   // easy!
2882   if (session.verbose>2)
2883     clog << _("Eliding side-effect-free null statement ") << *s->tok << endl;
2884   s = 0;
2885   provide (s);
2886 }
2887
2888
2889 void
2890 dead_stmtexpr_remover::visit_block (block *s)
2891 {
2892   vector<statement*> new_stmts;
2893   for (unsigned i=0; i<s->statements.size(); i++ )
2894     {
2895       statement* new_stmt = require (s->statements[i], true);
2896       if (new_stmt != 0)
2897         {
2898           // flatten nested blocks into this one
2899           block *b = dynamic_cast<block *>(new_stmt);
2900           if (b)
2901             {
2902               if (session.verbose>2)
2903                 clog << _("Flattening nested block ") << *b->tok << endl;
2904               new_stmts.insert(new_stmts.end(),
2905                   b->statements.begin(), b->statements.end());
2906               relaxed_p = false;
2907             }
2908           else
2909             new_stmts.push_back (new_stmt);
2910         }
2911     }
2912   if (new_stmts.size() == 0)
2913     {
2914       if (session.verbose>2)
2915         clog << _("Eliding side-effect-free empty block ") << *s->tok << endl;
2916       s = 0;
2917     }
2918   else if (new_stmts.size() == 1)
2919     {
2920       if (session.verbose>2)
2921         clog << _("Eliding side-effect-free singleton block ") << *s->tok << endl;
2922       provide (new_stmts[0]);
2923       return;
2924     }
2925   else
2926     s->statements = new_stmts;
2927   provide (s);
2928 }
2929
2930
2931 void
2932 dead_stmtexpr_remover::visit_try_block (try_block *s)
2933 {
2934   replace (s->try_block, true);
2935   replace (s->catch_block, true); // null catch{} is ok and useful
2936   if (s->try_block == 0)
2937     {
2938       if (session.verbose>2)
2939         clog << _("Eliding empty try {} block ") << *s->tok << endl;
2940       s = 0;
2941     }
2942   provide (s);
2943 }
2944
2945
2946 void
2947 dead_stmtexpr_remover::visit_if_statement (if_statement *s)
2948 {
2949   replace (s->thenblock, true);
2950   replace (s->elseblock, true);
2951
2952   if (s->thenblock == 0)
2953     {
2954       if (s->elseblock == 0)
2955         {
2956           // We may be able to elide this statement, if the condition
2957           // expression is side-effect-free.
2958           varuse_collecting_visitor vct(session);
2959           s->condition->visit(& vct);
2960           if (vct.side_effect_free ())
2961             {
2962               if (session.verbose>2)
2963                 clog << _("Eliding side-effect-free if statement ")
2964                      << *s->tok << endl;
2965               s = 0; // yeah, baby
2966             }
2967           else
2968             {
2969               // We can still turn it into a simple expr_statement though...
2970               if (session.verbose>2)
2971                 clog << _("Creating simple evaluation from if statement ")
2972                      << *s->tok << endl;
2973               expr_statement *es = new expr_statement;
2974               es->value = s->condition;
2975               es->tok = es->value->tok;
2976               provide (es);
2977               return;
2978             }
2979         }
2980       else
2981         {
2982           // For an else without a then, we can invert the condition logic to
2983           // avoid having a null statement in the thenblock
2984           if (session.verbose>2)
2985             clog << _("Inverting the condition of if statement ")
2986                  << *s->tok << endl;
2987           unary_expression *ue = new unary_expression;
2988           ue->operand = s->condition;
2989           ue->tok = ue->operand->tok;
2990           ue->op = "!";
2991           s->condition = ue;
2992           s->thenblock = s->elseblock;
2993           s->elseblock = 0;
2994         }
2995     }
2996   provide (s);
2997 }
2998
2999 void
3000 dead_stmtexpr_remover::visit_foreach_loop (foreach_loop *s)
3001 {
3002   replace (s->block, true);
3003
3004   if (s->block == 0)
3005     {
3006       // XXX what if s->limit has side effects?
3007       // XXX what about s->indexes or s->value used outside the loop?
3008       if(session.verbose > 2)
3009         clog << _("Eliding side-effect-free foreach statement ") << *s->tok << endl;
3010       s = 0; // yeah, baby
3011     }
3012   provide (s);
3013 }
3014
3015 void
3016 dead_stmtexpr_remover::visit_for_loop (for_loop *s)
3017 {
3018   replace (s->block, true);
3019
3020   if (s->block == 0)
3021     {
3022       // We may be able to elide this statement, if the condition
3023       // expression is side-effect-free.
3024       varuse_collecting_visitor vct(session);
3025       if (s->init) s->init->visit(& vct);
3026       s->cond->visit(& vct);
3027       if (s->incr) s->incr->visit(& vct);
3028       if (vct.side_effect_free ())
3029         {
3030           if (session.verbose>2)
3031             clog << _("Eliding side-effect-free for statement ") << *s->tok << endl;
3032           s = 0; // yeah, baby
3033         }
3034       else
3035         {
3036           // Can't elide this whole statement; put a null in there.
3037           s->block = new null_statement(s->tok);
3038         }
3039     }
3040   provide (s);
3041 }
3042
3043
3044
3045 void
3046 dead_stmtexpr_remover::visit_expr_statement (expr_statement *s)
3047 {
3048   // Run a varuse query against the operand expression.  If it has no
3049   // side-effects, replace the entire statement expression by a null
3050   // statement with the provide() call.
3051   //
3052   // Unlike many other visitors, we do *not* traverse this outermost
3053   // one into the expression subtrees.  There is no need - no
3054   // expr_statement nodes will be found there.  (Function bodies
3055   // need to be visited explicitly by our caller.)
3056   //
3057   // NB.  While we don't share nodes in the parse tree, let's not
3058   // deallocate *s anyway, just in case...
3059
3060   varuse_collecting_visitor vut(session);
3061   s->value->visit (& vut);
3062
3063   if (vut.side_effect_free_wrt (focal_vars))
3064     {
3065       /* PR 1119: NB: this message is not a good idea here.  It can
3066          name some arbitrary RHS expression of an assignment.
3067       if (s->value->tok->location.file->name == session.user_file->name) // not tapset
3068         session.print_warning("eliding never-assigned ", *s->value->tok);
3069       else
3070       */
3071       if (session.is_user_file(s->value->tok->location.file->name))
3072         session.print_warning("Eliding side-effect-free expression ", s->tok);
3073
3074       // NB: this 0 pointer is invalid to leave around for any length of
3075       // time, but the parent parse tree objects above handle it.
3076       s = 0;
3077       relaxed_p = false;
3078     }
3079   provide (s);
3080 }
3081
3082
3083 void semantic_pass_opt4 (systemtap_session& s, bool& relaxed_p)
3084 {
3085   // Finally, let's remove some statement-expressions that have no
3086   // side-effect.  These should be exactly those whose private varuse
3087   // visitors come back with an empty "written" and "embedded" lists.
3088
3089   dead_stmtexpr_remover duv (s, relaxed_p);
3090   // This instance may be reused for multiple probe/function body trims.
3091
3092   for (unsigned i=0; i<s.probes.size(); i++)
3093     {
3094       assert_no_interrupts();
3095
3096       derived_probe* p = s.probes[i];
3097
3098       duv.focal_vars.clear ();
3099       duv.focal_vars.insert (s.globals.begin(),
3100                              s.globals.end());
3101       duv.focal_vars.insert (p->locals.begin(),
3102                              p->locals.end());
3103
3104       duv.replace (p->body, true);
3105       if (p->body == 0)
3106         {
3107           if (! s.timing && // PR10070
3108               !(p->base->tok->location.file->synthetic)) // don't warn for synthetic probes
3109             s.print_warning (_F("side-effect-free probe '%s'",
3110                                 p->name.to_string().c_str()), p->tok);
3111
3112           p->body = new null_statement(p->tok);
3113
3114           // XXX: possible duplicate warnings; see below
3115         }
3116     }
3117   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
3118     {
3119       assert_no_interrupts();
3120
3121       functiondecl* fn = it->second;
3122       duv.focal_vars.clear ();
3123       duv.focal_vars.insert (fn->locals.begin(),
3124                              fn->locals.end());
3125       duv.focal_vars.insert (fn->formal_args.begin(),
3126                              fn->formal_args.end());
3127       duv.focal_vars.insert (s.globals.begin(),
3128                              s.globals.end());
3129
3130       duv.replace (fn->body, true);
3131       if (fn->body == 0)
3132         {
3133           s.print_warning (_F("side-effect-free function '%s'",
3134                               fn->name.to_string().c_str()), fn->tok);
3135
3136           fn->body = new null_statement(fn->tok);
3137
3138           // XXX: the next iteration of the outer optimization loop may
3139           // take this new null_statement away again, and thus give us a
3140           // fresh warning.  It would be better if this fixup was performed
3141           // only after the relaxation iterations.
3142           // XXX: or else see bug #6469.
3143         }
3144     }
3145 }
3146
3147
3148 // ------------------------------------------------------------------------
3149
3150 // The goal of this visitor is to reduce top-level expressions in void context
3151 // into separate statements that evaluate each subcomponent of the expression.
3152 // The dead-statement-remover can later remove some parts if they have no side
3153 // effects.
3154 //
3155 // All expressions must be overridden here so we never visit their subexpressions
3156 // accidentally.  Thus, the only visited expressions should be value of an
3157 // expr_statement.
3158 //
3159 // For an expression to replace its expr_statement with something else, it will
3160 // let the new statement provide(), and then provide(0) for itself.  The
3161 // expr_statement will take this as a sign that it's been replaced.
3162 struct void_statement_reducer: public update_visitor
3163 {
3164   systemtap_session& session;
3165   bool& relaxed_p;
3166   set<vardecl*> focal_vars; // vars considered subject to side-effects
3167
3168   void_statement_reducer(systemtap_session& s, bool& r):
3169     session(s), relaxed_p(r) {}
3170
3171   void visit_expr_statement (expr_statement* s);
3172
3173   // expressions in conditional / loop controls are definitely a side effect,
3174   // but still recurse into the child statements
3175   void visit_if_statement (if_statement* s);
3176   void visit_for_loop (for_loop* s);
3177   void visit_foreach_loop (foreach_loop* s);
3178
3179   // these expressions get rewritten into their statement equivalents
3180   void visit_logical_or_expr (logical_or_expr* e);
3181   void visit_logical_and_expr (logical_and_expr* e);
3182   void visit_ternary_expression (ternary_expression* e);
3183
3184   // all of these can (usually) be reduced into simpler statements
3185   void visit_binary_expression (binary_expression* e);
3186   void visit_unary_expression (unary_expression* e);
3187   void visit_regex_query (regex_query* e); // XXX depends on subexpr extraction
3188   void visit_comparison (comparison* e);
3189   void visit_concatenation (concatenation* e);
3190   void visit_functioncall (functioncall* e);
3191   void visit_print_format (print_format* e);
3192   void visit_target_symbol (target_symbol* e);
3193   void visit_atvar_op (atvar_op* e);
3194   void visit_cast_op (cast_op* e);
3195   void visit_autocast_op (autocast_op* e);
3196   void visit_defined_op (defined_op* e);
3197
3198   // these are a bit hairy to grok due to the intricacies of indexables and
3199   // stats, so I'm chickening out and skipping them...
3200   void visit_array_in (array_in* e) { provide (e); }
3201   void visit_arrayindex (arrayindex* e) { provide (e); }
3202   void visit_stat_op (stat_op* e) { provide (e); }
3203   void visit_hist_op (hist_op* e) { provide (e); }
3204
3205   // these can't be reduced because they always have an effect
3206   void visit_return_statement (return_statement* s) { provide (s); }
3207   void visit_delete_statement (delete_statement* s) { provide (s); }
3208   void visit_pre_crement (pre_crement* e) { provide (e); }
3209   void visit_post_crement (post_crement* e) { provide (e); }
3210   void visit_assignment (assignment* e) { provide (e); }
3211
3212 private:
3213   void reduce_target_symbol (target_symbol* e, expression* operand=NULL);
3214 };
3215
3216
3217 void
3218 void_statement_reducer::visit_expr_statement (expr_statement* s)
3219 {
3220   replace (s->value, true);
3221
3222   // if the expression provides 0, that's our signal that a new
3223   // statement has been provided, so we shouldn't provide this one.
3224   if (s->value != 0)
3225     provide(s);
3226 }
3227
3228 void
3229 void_statement_reducer::visit_if_statement (if_statement* s)
3230 {
3231   // s->condition is never void
3232   replace (s->thenblock);
3233   replace (s->elseblock);
3234   provide (s);
3235 }
3236
3237 void
3238 void_statement_reducer::visit_for_loop (for_loop* s)
3239 {
3240   // s->init/cond/incr are never void
3241   replace (s->block);
3242   provide (s);
3243 }
3244
3245 void
3246 void_statement_reducer::visit_foreach_loop (foreach_loop* s)
3247 {
3248   // s->indexes/base/value/limit are never void
3249   replace (s->block);
3250   provide (s);
3251 }
3252
3253 void
3254 void_statement_reducer::visit_logical_or_expr (logical_or_expr* e)
3255 {
3256   // In void context, the evaluation of "a || b" is exactly like
3257   // "if (!a) b", so let's do that instead.
3258
3259   if (session.verbose>2)
3260     clog << _("Creating if statement from unused logical-or ")
3261          << *e->tok << endl;
3262
3263   if_statement *is = new if_statement;
3264   is->tok = e->tok;
3265   is->elseblock = 0;
3266
3267   unary_expression *ue = new unary_expression;
3268   ue->operand = e->left;
3269   ue->tok = e->tok;
3270   ue->op = "!";
3271   is->condition = ue;
3272
3273   expr_statement *es = new expr_statement;
3274   es->value = e->right;
3275   es->tok = es->value->tok;
3276   is->thenblock = es;
3277
3278   is->visit(this);
3279   relaxed_p = false;
3280   e = 0;
3281   provide (e);
3282 }
3283
3284 void
3285 void_statement_reducer::visit_logical_and_expr (logical_and_expr* e)
3286 {
3287   // In void context, the evaluation of "a && b" is exactly like
3288   // "if (a) b", so let's do that instead.
3289
3290   if (session.verbose>2)
3291     clog << _("Creating if statement from unused logical-and ")
3292          << *e->tok << endl;
3293
3294   if_statement *is = new if_statement;
3295   is->tok = e->tok;
3296   is->elseblock = 0;
3297   is->condition = e->left;
3298
3299   expr_statement *es = new expr_statement;
3300   es->value = e->right;
3301   es->tok = es->value->tok;
3302   is->thenblock = es;
3303
3304   is->visit(this);
3305   relaxed_p = false;
3306   e = 0;
3307   provide (e);
3308 }
3309
3310 void
3311 void_statement_reducer::visit_ternary_expression (ternary_expression* e)
3312 {
3313   // In void context, the evaluation of "a ? b : c" is exactly like
3314   // "if (a) b else c", so let's do that instead.
3315
3316   if (session.verbose>2)
3317     clog << _("Creating if statement from unused ternary expression ")
3318          << *e->tok << endl;
3319
3320   if_statement *is = new if_statement;
3321   is->tok = e->tok;
3322   is->condition = e->cond;
3323
3324   expr_statement *es = new expr_statement;
3325   es->value = e->truevalue;
3326   es->tok = es->value->tok;
3327   is->thenblock = es;
3328
3329   es = new expr_statement;
3330   es->value = e->falsevalue;
3331   es->tok = es->value->tok;
3332   is->elseblock = es;
3333
3334   is->visit(this);
3335   relaxed_p = false;
3336   e = 0;
3337   provide (e);
3338 }
3339
3340 void
3341 void_statement_reducer::visit_binary_expression (binary_expression* e)
3342 {
3343   // When the result of a binary operation isn't needed, it's just as good to
3344   // evaluate the operands as sequential statements in a block.
3345
3346   if (session.verbose>2)
3347     clog << _("Eliding unused binary ") << *e->tok << endl;
3348
3349   block *b = new block;
3350   b->tok = e->tok;
3351
3352   expr_statement *es = new expr_statement;
3353   es->value = e->left;
3354   es->tok = es->value->tok;
3355   b->statements.push_back(es);
3356
3357   es = new expr_statement;
3358   es->value = e->right;
3359   es->tok = es->value->tok;
3360   b->statements.push_back(es);
3361
3362   b->visit(this);
3363   relaxed_p = false;
3364   e = 0;
3365   provide (e);
3366 }
3367
3368 void
3369 void_statement_reducer::visit_unary_expression (unary_expression* e)
3370 {
3371   // When the result of a unary operation isn't needed, it's just as good to
3372   // evaluate the operand directly
3373
3374   if (session.verbose>2)
3375     clog << _("Eliding unused unary ") << *e->tok << endl;
3376
3377   relaxed_p = false;
3378   e->operand->visit(this);
3379 }
3380
3381 void
3382 void_statement_reducer::visit_regex_query (regex_query* e)
3383 {
3384   // TODOXXX After subexpression extraction is implemented,
3385   // regular expression matches *may* have side-effects in
3386   // terms of producing matched subexpressions, e.g.:
3387   //
3388   //   str =~ "pat"; println(matched(0));
3389   //
3390   // It's debatable if we want to actually allow this, though.
3391
3392   // Treat e as a unary expression on the left operand -- since the
3393   // right hand side must be a literal (as verified by the parser),
3394   // evaluating it never has side effects.
3395
3396   if (session.verbose>2)
3397     clog << _("Eliding regex query ") << *e->tok << endl;
3398
3399   relaxed_p = false;
3400   e->left->visit(this);
3401 }
3402
3403 void
3404 void_statement_reducer::visit_comparison (comparison* e)
3405 {
3406   visit_binary_expression(e);
3407 }
3408
3409 void
3410 void_statement_reducer::visit_concatenation (concatenation* e)
3411 {
3412   visit_binary_expression(e);
3413 }
3414
3415 void
3416 void_statement_reducer::visit_functioncall (functioncall* e)
3417 {
3418   // If a function call is pure and its result ignored, we can elide the call
3419   // and just evaluate the arguments in sequence
3420
3421   if (!e->args.size())
3422     {
3423       provide (e);
3424       return;
3425     }
3426
3427   varuse_collecting_visitor vut(session);
3428   vut.seen.insert (e->referent);
3429   vut.current_function = e->referent;
3430   e->referent->body->visit (& vut);
3431   if (!vut.side_effect_free_wrt (focal_vars))
3432     {
3433       provide (e);
3434       return;
3435     }
3436
3437   if (session.verbose>2)
3438     clog << _("Eliding side-effect-free function call ") << *e->tok << endl;
3439
3440   block *b = new block;
3441   b->tok = e->tok;
3442
3443   for (unsigned i=0; i<e->args.size(); i++ )
3444     {
3445       expr_statement *es = new expr_statement;
3446       es->value = e->args[i];
3447       es->tok = es->value->tok;
3448       b->statements.push_back(es);
3449     }
3450
3451   b->visit(this);
3452   relaxed_p = false;
3453   e = 0;
3454   provide (e);
3455 }
3456
3457 void
3458 void_statement_reducer::visit_print_format (print_format* e)
3459 {
3460   // When an sprint's return value is ignored, we can simply evaluate the
3461   // arguments in sequence
3462
3463   if (e->print_to_stream || !e->args.size())
3464     {
3465       provide (e);
3466       return;
3467     }
3468
3469   if (session.verbose>2)
3470     clog << _("Eliding unused print ") << *e->tok << endl;
3471
3472   block *b = new block;
3473   b->tok = e->tok;
3474
3475   for (unsigned i=0; i<e->args.size(); i++ )
3476     {
3477       expr_statement *es = new expr_statement;
3478       es->value = e->args[i];
3479       es->tok = es->value->tok;
3480       b->statements.push_back(es);
3481     }
3482
3483   b->visit(this);
3484   relaxed_p = false;
3485   e = 0;
3486   provide (e);
3487 }
3488
3489 void
3490 void_statement_reducer::reduce_target_symbol (target_symbol* e,
3491                                               expression* operand)
3492 {
3493   // When the result of any target_symbol isn't needed, it's just as good to
3494   // evaluate the operand and any array indexes directly
3495
3496   block *b = new block;
3497   b->tok = e->tok;
3498
3499   if (operand)
3500     {
3501       expr_statement *es = new expr_statement;
3502       es->value = operand;
3503       es->tok = es->value->tok;
3504       b->statements.push_back(es);
3505     }
3506
3507   for (unsigned i=0; i<e->components.size(); i++ )
3508     {
3509       if (e->components[i].type != target_symbol::comp_expression_array_index)
3510         continue;
3511
3512       expr_statement *es = new expr_statement;
3513       es->value = e->components[i].expr_index;
3514       es->tok = es->value->tok;
3515       b->statements.push_back(es);
3516     }
3517
3518   b->visit(this);
3519   relaxed_p = false;
3520   e = 0;
3521   provide (e);
3522 }
3523
3524 void
3525 void_statement_reducer::visit_atvar_op (atvar_op* e)
3526 {
3527   if (session.verbose>2)
3528     clog << _("Eliding unused target symbol ") << *e->tok << endl;
3529   reduce_target_symbol (e);
3530 }
3531
3532 void
3533 void_statement_reducer::visit_target_symbol (target_symbol* e)
3534 {
3535   if (session.verbose>2)
3536     clog << _("Eliding unused target symbol ") << *e->tok << endl;
3537   reduce_target_symbol (e);
3538 }
3539
3540 void
3541 void_statement_reducer::visit_cast_op (cast_op* e)
3542 {
3543   if (session.verbose>2)
3544     clog << _("Eliding unused typecast ") << *e->tok << endl;
3545   reduce_target_symbol (e, e->operand);
3546 }
3547
3548 void
3549 void_statement_reducer::visit_autocast_op (autocast_op* e)
3550 {
3551   if (session.verbose>2)
3552     clog << _("Eliding unused autocast ") << *e->tok << endl;
3553   reduce_target_symbol (e, e->operand);
3554 }
3555
3556
3557 void
3558 void_statement_reducer::visit_defined_op (defined_op* e)
3559 {
3560   // When the result of a @defined operation isn't needed, just elide
3561   // it entirely.  Its operand $expression must already be
3562   // side-effect-free.
3563
3564   if (session.verbose>2)
3565     clog << _("Eliding unused check ") << *e->tok << endl;
3566
3567   relaxed_p = false;
3568   e = 0;
3569   provide (e);
3570 }
3571
3572
3573
3574 void semantic_pass_opt5 (systemtap_session& s, bool& relaxed_p)
3575 {
3576   // Let's simplify statements with unused computed values.
3577
3578   void_statement_reducer vuv (s, relaxed_p);
3579   // This instance may be reused for multiple probe/function body trims.
3580
3581   vuv.focal_vars.insert (s.globals.begin(), s.globals.end());
3582
3583   for (unsigned i=0; i<s.probes.size(); i++)
3584     vuv.replace (s.probes[i]->body);
3585   for (map<string,functiondecl*>::iterator it = s.functions.begin();
3586        it != s.functions.end(); it++)
3587     vuv.replace (it->second->body);
3588 }
3589
3590
3591 struct const_folder: public update_visitor
3592 {
3593   systemtap_session& session;
3594   bool& relaxed_p;
3595
3596   const_folder(systemtap_session& s, bool& r):
3597     session(s), relaxed_p(r), last_number(0), last_string(0) {}
3598
3599   literal_number* last_number;
3600   literal_number* get_number(expression*& e);
3601   void visit_literal_number (literal_number* e);
3602
3603   literal_string* last_string;
3604   literal_string* get_string(expression*& e);
3605   void visit_literal_string (literal_string* e);
3606
3607   void get_literal(expression*& e, literal_number*& n, literal_string*& s);
3608
3609   void visit_if_statement (if_statement* s);
3610   void visit_for_loop (for_loop* s);
3611   void visit_foreach_loop (foreach_loop* s);
3612   void visit_binary_expression (binary_expression* e);
3613   void visit_unary_expression (unary_expression* e);
3614   void visit_logical_or_expr (logical_or_expr* e);
3615   void visit_logical_and_expr (logical_and_expr* e);
3616   // void visit_regex_query (regex_query* e); // XXX: would require executing dfa at compile-time
3617   void visit_comparison (comparison* e);
3618   void visit_concatenation (concatenation* e);
3619   void visit_ternary_expression (ternary_expression* e);
3620   void visit_defined_op (defined_op* e);
3621   void visit_target_symbol (target_symbol* e);
3622 };
3623
3624 void
3625 const_folder::get_literal(expression*& e,
3626                           literal_number*& n,
3627                           literal_string*& s)
3628 {
3629   replace (e);
3630   n = (e == last_number) ? last_number : NULL;
3631   s = (e == last_string) ? last_string : NULL;
3632 }
3633
3634 literal_number*
3635 const_folder::get_number(expression*& e)
3636 {
3637   replace (e);
3638   return (e == last_number) ? last_number : NULL;
3639 }
3640
3641 void
3642 const_folder::visit_literal_number (literal_number* e)
3643 {
3644   last_number = e;
3645   provide (e);
3646 }
3647
3648 literal_string*
3649 const_folder::get_string(expression*& e)
3650 {
3651   replace (e);
3652   return (e == last_string) ? last_string : NULL;
3653 }
3654
3655 void
3656 const_folder::visit_literal_string (literal_string* e)
3657 {
3658   last_string = e;
3659   provide (e);
3660 }
3661
3662 void
3663 const_folder::visit_if_statement (if_statement* s)
3664 {
3665   literal_number* cond = get_number (s->condition);
3666   if (!cond)
3667     {
3668       replace (s->thenblock);
3669       replace (s->elseblock);
3670       provide (s);
3671     }
3672   else
3673     {
3674       if (session.verbose>2)
3675         clog << _F("Collapsing constant-%" PRIi64 " if-statement %s",
3676                    cond->value, lex_cast(*s->tok).c_str()) << endl;
3677       relaxed_p = false;
3678
3679       statement* n = cond->value ? s->thenblock : s->elseblock;
3680       if (n)
3681         n->visit (this);
3682       else
3683         provide (new null_statement (s->tok));
3684     }
3685 }
3686
3687 void
3688 const_folder::visit_for_loop (for_loop* s)
3689 {
3690   literal_number* cond = get_number (s->cond);
3691   if (!cond || cond->value)
3692     {
3693       replace (s->init);
3694       replace (s->incr);
3695       replace (s->block);
3696       provide (s);
3697     }
3698   else
3699     {
3700       if (session.verbose>2)
3701         clog << _("Collapsing constantly-false for-loop ") << *s->tok << endl;
3702       relaxed_p = false;
3703
3704       if (s->init)
3705         s->init->visit (this);
3706       else
3707         provide (new null_statement (s->tok));
3708     }
3709 }
3710
3711 void
3712 const_folder::visit_foreach_loop (foreach_loop* s)
3713 {
3714   literal_number* limit = get_number (s->limit);
3715   if (!limit || limit->value > 0)
3716     {
3717       for (unsigned i = 0; i < s->indexes.size(); ++i)
3718         replace (s->indexes[i]);
3719       replace (s->base);
3720       replace (s->value);
3721       replace (s->block);
3722       provide (s);
3723     }
3724   else
3725     {
3726       if (session.verbose>2)
3727         clog << _("Collapsing constantly-limited foreach-loop ") << *s->tok << endl;
3728       relaxed_p = false;
3729
3730       provide (new null_statement (s->tok));
3731     }
3732 }
3733
3734 void
3735 const_folder::visit_binary_expression (binary_expression* e)
3736 {
3737   int64_t value;
3738   literal_number* left = get_number (e->left);
3739   literal_number* right = get_number (e->right);
3740
3741   if (right && !right->value && (e->op == "/" || e->op == "%"))
3742     {
3743       // Give divide-by-zero a chance to be optimized out elsewhere,
3744       // and if not it will be a runtime error anyway...
3745       provide (e);
3746       return;
3747     }
3748
3749   if (left && right)
3750     {
3751       if (e->op == "+")
3752         value = left->value + right->value;
3753       else if (e->op == "-")
3754         value = left->value - right->value;
3755       else if (e->op == "*")
3756         value = left->value * right->value;
3757       else if (e->op == "&")
3758         value = left->value & right->value;
3759       else if (e->op == "|")
3760         value = left->value | right->value;
3761       else if (e->op == "^")
3762         value = left->value ^ right->value;
3763       else if (e->op == ">>")
3764         value = left->value >> max(min(right->value, (int64_t)64), (int64_t)0);
3765       else if (e->op == "<<")
3766         value = left->value << max(min(right->value, (int64_t)64), (int64_t)0);
3767       else if (e->op == "/")
3768         value = (left->value == LLONG_MIN && right->value == -1) ? LLONG_MIN :
3769                 left->value / right->value;
3770       else if (e->op == "%")
3771         value = (left->value == LLONG_MIN && right->value == -1) ? 0 :
3772                 left->value % right->value;
3773       else
3774         throw SEMANTIC_ERROR (_("unsupported binary operator ") + (string)e->op);
3775     }
3776
3777   else if ((left && ((left->value == 0 && (e->op == "*" || e->op == "&" ||
3778                                            e->op == ">>" || e->op == "<<" )) ||
3779                      (left->value ==-1 && (e->op == "|" || e->op == ">>"))))
3780            ||
3781            (right && ((right->value == 0 && (e->op == "*" || e->op == "&")) ||
3782                       (right->value == 1 && (e->op == "%")) ||
3783                       (right->value ==-1 && (e->op == "%" || e->op == "|")))))
3784     {
3785       expression* other = left ? e->right : e->left;
3786       varuse_collecting_visitor vu(session);
3787       other->visit(&vu);
3788       if (!vu.side_effect_free())
3789         {
3790           provide (e);
3791           return;
3792         }
3793
3794       // we'll pass on type=pe_long inference to the expression
3795       if (other->type == pe_unknown)
3796         other->type = pe_long;
3797       else if (other->type != pe_long)
3798         {
3799           // this mismatch was not caught in the initial type resolution pass,
3800           // generate a mismatch (left doesn't match right) error
3801           typeresolution_info ti(session);
3802           ti.assert_resolvability = true; // need this to get it throw errors
3803           ti.mismatch_complexity = 1; // also needed to throw errors
3804           ti.mismatch(e);
3805         }
3806
3807       if (left)
3808         value = left->value;
3809       else if (e->op == "%")
3810         value = 0;
3811       else
3812         value = right->value;
3813     }
3814
3815   else if ((left && ((left->value == 0 && (e->op == "+" || e->op == "|" ||
3816                                            e->op == "^")) ||
3817                      (left->value == 1 && (e->op == "*")) ||
3818                      (left->value ==-1 && (e->op == "&"))))
3819            ||
3820            (right && ((right->value == 0 && (e->op == "+" || e->op == "-" ||
3821                                              e->op == "|" || e->op == "^")) ||
3822                       (right->value == 1 && (e->op == "*" || e->op == "/")) ||
3823                       (right->value ==-1 && (e->op == "&")) ||
3824                       (right->value <= 0 && (e->op == ">>" || e->op == "<<")))))
3825     {
3826       if (session.verbose>2)
3827         clog << _("Collapsing constant-identity binary operator ") << *e->tok << endl;
3828       relaxed_p = false;
3829
3830       // we'll pass on type=pe_long inference to the expression
3831       expression* other = left ? e->right : e->left;
3832       if (other->type == pe_unknown)
3833         other->type = pe_long;
3834       else if (other->type != pe_long)
3835         {
3836           // this mismatch was not caught in the initial type resolution pass,
3837           // generate a mismatch (left doesn't match right) error
3838           typeresolution_info ti(session);
3839           ti.assert_resolvability = true; // need this to get it throw errors
3840           ti.mismatch_complexity = 1; // also needed to throw errors
3841           ti.mismatch(e);
3842         }
3843
3844       provide (other);
3845       return;
3846     }
3847
3848   else
3849     {
3850       provide (e);
3851       return;
3852     }
3853
3854   if (session.verbose>2)
3855     clog << _F("Collapsing constant-%" PRIi64 " binary operator %s",
3856                value, lex_cast(*e->tok).c_str()) << endl;
3857   relaxed_p = false;
3858
3859   literal_number* n = new literal_number(value);
3860   n->tok = e->tok;
3861   n->visit (this);
3862 }
3863
3864 void
3865 const_folder::visit_unary_expression (unary_expression* e)
3866 {
3867   literal_number* operand = get_number (e->operand);
3868   if (!operand)
3869     provide (e);
3870   else
3871     {
3872       if (session.verbose>2)
3873         clog << _("Collapsing constant unary ") << *e->tok << endl;
3874       relaxed_p = false;
3875
3876       literal_number* n = new literal_number (*operand);
3877       n->tok = e->tok;
3878       if (e->op == "+")
3879         ; // nothing to do
3880       else if (e->op == "-")
3881         n->value = -n->value;
3882       else if (e->op == "!")
3883         n->value = !n->value;
3884       else if (e->op == "~")
3885         n->value = ~n->value;
3886       else
3887         throw SEMANTIC_ERROR (_("unsupported unary operator ") + (string)e->op);
3888       n->visit (this);
3889     }
3890 }
3891
3892 void
3893 const_folder::visit_logical_or_expr (logical_or_expr* e)
3894 {
3895   int64_t value;
3896   literal_number* left = get_number (e->left);
3897   literal_number* right = get_number (e->right);
3898
3899   if (left && right)
3900     value = left->value || right->value;
3901
3902   else if ((left && left->value) || (right && right->value))
3903     {
3904       // If the const is on the left, we get to short-circuit the right
3905       // immediately.  Otherwise, we can only eliminate the LHS if it's pure.
3906       if (right)
3907         {
3908           varuse_collecting_visitor vu(session);
3909           e->left->visit(&vu);
3910           if (!vu.side_effect_free())
3911             {
3912               provide (e);
3913               return;
3914             }
3915         }
3916
3917       value = 1;
3918     }
3919
3920   // We might also get rid of useless "0||x" and "x||0", except it does
3921   // normalize x to 0 or 1.  We could change it to "!!x", but it's not clear
3922   // that this would gain us much.
3923
3924   else
3925     {
3926       provide (e);
3927       return;
3928     }
3929
3930   if (session.verbose>2)
3931     clog << _("Collapsing constant logical-OR ") << *e->tok << endl;
3932   relaxed_p = false;
3933
3934   literal_number* n = new literal_number(value);
3935   n->tok = e->tok;
3936   n->visit (this);
3937 }
3938
3939 void
3940 const_folder::visit_logical_and_expr (logical_and_expr* e)
3941 {
3942   int64_t value;
3943   literal_number* left = get_number (e->left);
3944   literal_number* right = get_number (e->right);
3945
3946   if (left && right)
3947     value = left->value && right->value;
3948
3949   else if ((left && !left->value) || (right && !right->value))
3950     {
3951       // If the const is on the left, we get to short-circuit the right
3952       // immediately.  Otherwise, we can only eliminate the LHS if it's pure.
3953       if (right)
3954         {
3955           varuse_collecting_visitor vu(session);
3956           e->left->visit(&vu);
3957           if (!vu.side_effect_free())
3958             {
3959               provide (e);
3960               return;
3961             }
3962         }
3963
3964       value = 0;
3965     }
3966
3967   // We might also get rid of useless "1&&x" and "x&&1", except it does
3968   // normalize x to 0 or 1.  We could change it to "!!x", but it's not clear
3969   // that this would gain us much.
3970
3971   else
3972     {
3973       provide (e);
3974       return;
3975     }
3976
3977   if (session.verbose>2)
3978     clog << _("Collapsing constant logical-AND ") << *e->tok << endl;
3979   relaxed_p = false;
3980
3981   literal_number* n = new literal_number(value);
3982   n->tok = e->tok;
3983   n->visit (this);
3984 }
3985
3986 void
3987 const_folder::visit_comparison (comparison* e)
3988 {
3989   int comp;
3990
3991   literal_number *left_num, *right_num;
3992   literal_string *left_str, *right_str;
3993   get_literal(e->left, left_num, left_str);
3994   get_literal(e->right, right_num, right_str);
3995
3996   if (left_str && right_str)
3997     comp = left_str->value.compare(right_str->value);
3998
3999   else if (left_num && right_num)
4000     comp = left_num->value < right_num->value ? -1 :
4001            left_num->value > right_num->value ? 1 : 0;
4002
4003   else if ((left_num && ((left_num->value == LLONG_MIN &&
4004                           (e->op == "<=" || e->op == ">")) ||
4005                          (left_num->value == LLONG_MAX &&
4006                           (e->op == ">=" || e->op == "<"))))
4007            ||
4008            (right_num && ((right_num->value == LLONG_MIN &&
4009                             (e->op == ">=" || e->op == "<")) ||
4010                            (right_num->value == LLONG_MAX &&
4011                             (e->op == "<=" || e->op == ">")))))
4012     {
4013       expression* other = left_num ? e->right : e->left;
4014       varuse_collecting_visitor vu(session);
4015       other->visit(&vu);
4016       if (!vu.side_effect_free())
4017         provide (e);
4018       else
4019         {
4020           if (session.verbose>2)
4021             clog << _("Collapsing constant-boundary comparison ") << *e->tok << endl;
4022           relaxed_p = false;
4023
4024           // ops <= and >= are true, < and > are false
4025           literal_number* n = new literal_number( e->op.length() == 2 );
4026           n->tok = e->tok;
4027           n->visit (this);
4028         }
4029       return;
4030     }
4031
4032   else
4033     {
4034       provide (e);
4035       return;
4036     }
4037
4038   if (session.verbose>2)
4039     clog << _("Collapsing constant comparison ") << *e->tok << endl;
4040   relaxed_p = false;
4041
4042   int64_t value;
4043   if (e->op == "==")
4044     value = comp == 0;
4045   else if (e->op == "!=")
4046     value = comp != 0;
4047   else if (e->op == "<")
4048     value = comp < 0;
4049   else if (e->op == ">")
4050     value = comp > 0;
4051   else if (e->op == "<=")
4052     value = comp <= 0;
4053   else if (e->op == ">=")
4054     value = comp >= 0;
4055   else
4056     throw SEMANTIC_ERROR (_("unsupported comparison operator ") + (string)e->op);
4057
4058   literal_number* n = new literal_number(value);
4059   n->tok = e->tok;
4060   n->visit (this);
4061 }
4062
4063 void
4064 const_folder::visit_concatenation (concatenation* e)
4065 {
4066   literal_string* left = get_string (e->left);
4067   literal_string* right = get_string (e->right);
4068
4069   if (left && right)
4070     {
4071       if (session.verbose>2)
4072         clog << _("Collapsing constant concatenation ") << *e->tok << endl;
4073       relaxed_p = false;
4074
4075       literal_string* n = new literal_string (*left);
4076       n->tok = e->tok;
4077       n->value = (string)n->value + (string)right->value;
4078       n->visit (this);
4079     }
4080   else if ((left && left->value.empty()) ||
4081            (right && right->value.empty()))
4082     {
4083       if (session.verbose>2)
4084         clog << _("Collapsing identity concatenation ") << *e->tok << endl;
4085       relaxed_p = false;
4086       provide(left ? e->right : e->left);
4087     }
4088   else
4089     provide (e);
4090 }
4091
4092 void
4093 const_folder::visit_ternary_expression (ternary_expression* e)
4094 {
4095   literal_number* cond = get_number (e->cond);
4096   if (!cond)
4097     {
4098       replace (e->truevalue);
4099       replace (e->falsevalue);
4100       provide (e);
4101     }
4102   else
4103     {
4104       if (session.verbose>2)
4105         clog << _F("Collapsing constant-%" PRIi64 " ternary %s",
4106                    cond->value, lex_cast(*e->tok).c_str()) << endl;
4107       relaxed_p = false;
4108
4109       expression* n = cond->value ? e->truevalue : e->falsevalue;
4110       n->visit (this);
4111     }
4112 }
4113
4114 void
4115 const_folder::visit_defined_op (defined_op* e)
4116 {
4117   // If a @defined makes it this far, then it is, de facto, undefined.
4118
4119   if (session.verbose>2)
4120     clog << _("Collapsing untouched @defined check ") << *e->tok << endl;
4121   relaxed_p = false;
4122
4123   literal_number* n = new literal_number (0);
4124   n->tok = e->tok;
4125   n->visit (this);
4126 }
4127
4128 void
4129 const_folder::visit_target_symbol (target_symbol* e)
4130 {
4131   if (session.skip_badvars)
4132     {
4133       // Upon user request for ignoring context, the symbol is replaced
4134       // with a literal 0 and a warning message displayed
4135       // XXX this ignores possible side-effects, e.g. in array indexes
4136       literal_number* ln_zero = new literal_number (0);
4137       ln_zero->tok = e->tok;
4138       provide (ln_zero);
4139       session.print_warning (_("Bad $context variable being substituted with literal 0"),
4140                                e->tok);
4141       relaxed_p = false;
4142     }
4143   else
4144     update_visitor::visit_target_symbol (e);
4145 }
4146
4147 static int initial_typeres_pass(systemtap_session& s);
4148 static int semantic_pass_const_fold (systemtap_session& s, bool& relaxed_p)
4149 {
4150   // attempt an initial type resolution pass to see if there are any type
4151   // mismatches before we starting whisking away vars that get switched out
4152   // with a const.
4153
4154   // return if the initial type resolution pass reported errors (type mismatches)
4155   int rc = initial_typeres_pass(s);
4156   if (rc)
4157     {
4158       relaxed_p = true;
4159       return rc;
4160     }
4161
4162   // Let's simplify statements with constant values.
4163   const_folder cf (s, relaxed_p);
4164   // This instance may be reused for multiple probe/function body trims.
4165
4166   for (unsigned i=0; i<s.probes.size(); i++)
4167     cf.replace (s.probes[i]->body);
4168   for (map<string,functiondecl*>::iterator it = s.functions.begin();
4169        it != s.functions.end(); it++)
4170     cf.replace (it->second->body);
4171   return 0;
4172 }
4173
4174
4175 struct dead_control_remover: public traversing_visitor
4176 {
4177   systemtap_session& session;
4178   bool& relaxed_p;
4179   statement* control;
4180
4181   dead_control_remover(systemtap_session& s, bool& r):
4182     session(s), relaxed_p(r), control(NULL) {}
4183
4184   void visit_block (block *b);
4185
4186   // When a block contains any of these, the following statements are dead.
4187   void visit_return_statement (return_statement* s) { control = s; }
4188   void visit_next_statement (next_statement* s) { control = s; }
4189   void visit_break_statement (break_statement* s) { control = s; }
4190   void visit_continue_statement (continue_statement* s) { control = s; }
4191 };
4192
4193
4194 void dead_control_remover::visit_block (block* b)
4195 {
4196   vector<statement*>& vs = b->statements;
4197   if (vs.size() == 0) /* else (size_t) size()-1 => very big */
4198     return;
4199   for (size_t i = 0; i < vs.size() - 1; ++i)
4200     {
4201       vs[i]->visit (this);
4202       if (vs[i] == control)
4203         {
4204           session.print_warning(_("statement will never be reached"),
4205                                 vs[i + 1]->tok);
4206           vs.erase(vs.begin() + i + 1, vs.end());
4207           relaxed_p = false;
4208           break;
4209         }
4210     }
4211 }
4212
4213
4214 static void semantic_pass_dead_control (systemtap_session& s, bool& relaxed_p)
4215 {
4216   // Let's remove code that follow unconditional control statements
4217
4218   dead_control_remover dc (s, relaxed_p);
4219
4220   for (unsigned i=0; i<s.probes.size(); i++)
4221     s.probes[i]->body->visit(&dc);
4222
4223   for (map<string,functiondecl*>::iterator it = s.functions.begin();
4224        it != s.functions.end(); it++)
4225     it->second->body->visit(&dc);
4226 }
4227
4228
4229 struct duplicate_function_remover: public functioncall_traversing_visitor
4230 {
4231   systemtap_session& s;
4232   map<functiondecl*, functiondecl*>& duplicate_function_map;
4233
4234   duplicate_function_remover(systemtap_session& sess,
4235                              map<functiondecl*, functiondecl*>&dfm):
4236     s(sess), duplicate_function_map(dfm) {};
4237
4238   void visit_functioncall (functioncall* e);
4239 };
4240
4241 void
4242 duplicate_function_remover::visit_functioncall (functioncall *e)
4243 {
4244   functioncall_traversing_visitor::visit_functioncall (e);
4245
4246   // If the current function call reference points to a function that
4247   // is a duplicate, replace it.
4248   if (duplicate_function_map.count(e->referent) != 0)
4249     {
4250       if (s.verbose>2)
4251           clog << _F("Changing %s reference to %s reference\n",
4252                      e->referent->name.to_string().c_str(),
4253                      duplicate_function_map[e->referent]->name.to_string().c_str());
4254       e->tok = duplicate_function_map[e->referent]->tok;
4255       e->function = duplicate_function_map[e->referent]->name;
4256       e->referent = duplicate_function_map[e->referent];
4257     }
4258 }
4259
4260 static string
4261 get_functionsig (functiondecl* f)
4262 {
4263   ostringstream s;
4264
4265   // Get the "name:args body" of the function in s.  We have to
4266   // include the args since the function 'x1(a, b)' is different than
4267   // the function 'x2(b, a)' even if the bodies of the two functions
4268   // are exactly the same.
4269   f->printsig(s);
4270   f->body->print(s);
4271
4272   // printsig puts f->name + ':' on the front.  Remove this
4273   // (otherwise, functions would never compare equal).
4274   string str = s.str().erase(0, f->name.size() + 1);
4275
4276   // Return the function signature.
4277   return str;
4278 }
4279
4280 void semantic_pass_opt6 (systemtap_session& s, bool& relaxed_p)
4281 {
4282   // Walk through all the functions, looking for duplicates.
4283   map<string, functiondecl*> functionsig_map;
4284   map<functiondecl*, functiondecl*> duplicate_function_map;
4285
4286
4287   vector<functiondecl*> newly_zapped_functions;
4288   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
4289     {
4290       functiondecl *fd = it->second;
4291       string functionsig = get_functionsig(fd);
4292
4293       if (functionsig_map.count(functionsig) == 0)
4294         {
4295           // This function is unique.  Remember it.
4296           functionsig_map[functionsig] = fd;
4297         }
4298       else
4299         {
4300           // This function is a duplicate.
4301           duplicate_function_map[fd] = functionsig_map[functionsig];
4302           newly_zapped_functions.push_back (fd);
4303           relaxed_p = false;
4304         }
4305     }
4306   for (unsigned i=0; i<newly_zapped_functions.size(); i++)
4307     {
4308       map<string,functiondecl*>::iterator where = s.functions.find (newly_zapped_functions[i]->name);
4309       assert (where != s.functions.end());
4310       s.functions.erase (where);
4311     }
4312
4313
4314   // If we have duplicate functions, traverse down the tree, replacing
4315   // the appropriate function calls.
4316   // duplicate_function_remover::visit_functioncall() handles the
4317   // details of replacing the function calls.
4318   if (duplicate_function_map.size() != 0)
4319     {
4320       duplicate_function_remover dfr (s, duplicate_function_map);
4321
4322       for (unsigned i=0; i < s.probes.size(); i++)
4323         s.probes[i]->body->visit(&dfr);
4324     }
4325 }
4326
4327 struct stable_analysis: public embedded_tags_visitor
4328 {
4329   bool stable;
4330   stable_analysis(): embedded_tags_visitor(true), stable(false) {};
4331
4332   void visit_embeddedcode (embeddedcode* s);
4333   void visit_functioncall (functioncall* e);
4334 };
4335
4336 void stable_analysis::visit_embeddedcode (embeddedcode* s)
4337 {
4338   embedded_tags_visitor::visit_embeddedcode(s);
4339   if (tagged_p("/* stable */"))
4340     stable = true;
4341   if (stable && !tagged_p("/* pure */"))
4342     throw SEMANTIC_ERROR(_("stable function must also be /* pure */"),
4343         s->tok);
4344 }
4345
4346 void stable_analysis::visit_functioncall (functioncall* e)
4347 {
4348 }
4349
4350 // Examines entire subtree for any stable functioncalls.
4351 struct stable_finder: public traversing_visitor
4352 {
4353   bool stable;
4354   set<string>& stable_fcs;
4355   stable_finder(set<string>&s): stable(false), stable_fcs(s) {};
4356   void visit_functioncall (functioncall* e);
4357 };
4358
4359 void stable_finder::visit_functioncall (functioncall* e)
4360 {
4361   if (stable_fcs.find(e->function) != stable_fcs.end())
4362     stable = true;
4363   traversing_visitor::visit_functioncall(e);
4364 }
4365
4366 // Examines current level of block for stable functioncalls.
4367 // Does not descend into sublevels.
4368 struct level_check: public traversing_visitor
4369 {
4370   bool stable;
4371   set<string>& stable_fcs;
4372   level_check(set<string>& s): stable(false), stable_fcs(s) {};
4373
4374   void visit_block (block* s);
4375   void visit_try_block (try_block *s);
4376   void visit_if_statement (if_statement* s);
4377   void visit_for_loop (for_loop* s);
4378   void visit_foreach_loop (foreach_loop* s);
4379   void visit_functioncall (functioncall* s);
4380 };
4381
4382 void level_check::visit_block (block* s)
4383 {
4384 }
4385
4386 void level_check::visit_try_block (try_block* s)
4387 {
4388   if (s->catch_error_var)
4389     s->catch_error_var->visit(this);
4390 }
4391
4392 void level_check::visit_if_statement (if_statement* s)
4393 {
4394   s->condition->visit(this);
4395 }
4396
4397 void level_check::visit_for_loop (for_loop* s)
4398 {
4399   if (s->init) s->init->visit(this);
4400   s->cond->visit(this);
4401   if (s->incr) s->incr->visit(this);
4402 }
4403
4404 void level_check::visit_foreach_loop (foreach_loop* s)
4405 {
4406   s->base->visit(this);
4407
4408   for (unsigned i=0; i<s->indexes.size(); i++)
4409     s->indexes[i]->visit(this);
4410
4411   if (s->value)
4412     s->value->visit(this);
4413
4414   if (s->limit)
4415     s->limit->visit(this);
4416 }
4417
4418 void level_check::visit_functioncall (functioncall* e)
4419 {
4420   if (stable_fcs.find(e->function) != stable_fcs.end())
4421     stable = true;
4422   traversing_visitor::visit_functioncall(e);
4423 }
4424
4425 struct stable_functioncall_visitor: public update_visitor
4426 {
4427   systemtap_session& session;
4428   functiondecl* current_function;
4429   derived_probe* current_probe;
4430   set<string>& stable_fcs;
4431   set<string> scope_vars;
4432   map<string,vardecl*> new_vars;
4433   vector<pair<expr_statement*,block*> > new_stmts;
4434   unsigned loop_depth;
4435   block* top_scope;
4436   block* curr_scope;
4437   stable_functioncall_visitor(systemtap_session& s, set<string>& sfc):
4438     session(s), current_function(0), current_probe(0), stable_fcs(sfc),
4439     loop_depth(0), top_scope(0), curr_scope(0) {};
4440
4441   statement* convert_stmt(statement* s);
4442   void visit_block (block* s);
4443   void visit_try_block (try_block* s);
4444   void visit_if_statement (if_statement* s);
4445   void visit_for_loop (for_loop* s);
4446   void visit_foreach_loop (foreach_loop* s);
4447   void visit_functioncall (functioncall* e);
4448 };
4449
4450 statement* stable_functioncall_visitor::convert_stmt (statement* s)
4451 {
4452   if (top_scope == 0 &&
4453      (dynamic_cast<for_loop*>(s) || dynamic_cast<foreach_loop*>(s)))
4454     {
4455       stable_finder sf(stable_fcs);
4456       s->visit(&sf);
4457       if (sf.stable)
4458         {
4459           block* b = new block;
4460           b->tok = s->tok;
4461           b->statements.push_back(s);
4462           return b;
4463         }
4464     }
4465   else if (top_scope == 0 && !dynamic_cast<block*>(s))
4466     {
4467       level_check lc(stable_fcs);
4468       s->visit(&lc);
4469       if (lc.stable)
4470         {
4471           block* b = new block;
4472           b->tok = s->tok;
4473           b->statements.push_back(s);
4474           return b;
4475         }
4476     }
4477
4478   return s;
4479 }
4480
4481 void stable_functioncall_visitor::visit_block (block* s)
4482 {
4483   block* prev_top_scope = top_scope;
4484   block* prev_scope = curr_scope;
4485   if (loop_depth == 0)
4486     top_scope = s;
4487   curr_scope = s;
4488   set<string> current_vars = scope_vars;
4489
4490   update_visitor::visit_block(s);
4491
4492   if (loop_depth == 0)
4493     top_scope = prev_top_scope;
4494   curr_scope = prev_scope;
4495   scope_vars = current_vars;
4496 }
4497
4498 void stable_functioncall_visitor::visit_try_block (try_block* s)
4499 {
4500   if (s->try_block)
4501     s->try_block = convert_stmt(s->try_block);
4502   replace(s->try_block);
4503   replace(s->catch_error_var);
4504   if (s->catch_block)
4505     s->catch_block = convert_stmt(s->catch_block);
4506   replace(s->catch_block);
4507   provide(s);
4508 }
4509
4510 void stable_functioncall_visitor::visit_if_statement (if_statement* s)
4511 {
4512   block* prev_top_scope = top_scope;
4513
4514   if (loop_depth == 0)
4515     top_scope = 0;
4516   replace(s->condition);
4517   s->thenblock = convert_stmt(s->thenblock);
4518   replace(s->thenblock);
4519   if (loop_depth == 0)
4520     top_scope = 0;
4521   if (s->elseblock)
4522     s->elseblock = convert_stmt(s->elseblock);
4523   replace(s->elseblock);
4524   provide(s);
4525
4526   top_scope = prev_top_scope;
4527 }
4528
4529 void stable_functioncall_visitor::visit_for_loop (for_loop* s)
4530 {
4531   replace(s->init);
4532   replace(s->cond);
4533   replace(s->incr);
4534   loop_depth++;
4535   s->block = convert_stmt(s->block);
4536   replace(s->block);
4537   loop_depth--;
4538   provide(s);
4539 }
4540
4541 void stable_functioncall_visitor::visit_foreach_loop (foreach_loop* s)
4542 {
4543   for (unsigned i = 0; i < s->indexes.size(); ++i)
4544     replace(s->indexes[i]);
4545   replace(s->base);
4546   replace(s->value);
4547   replace(s->limit);
4548   loop_depth++;
4549   s->block = convert_stmt(s->block);
4550   replace(s->block);
4551   loop_depth--;
4552   provide(s);
4553 }
4554
4555 void stable_functioncall_visitor::visit_functioncall (functioncall* e)
4556 {
4557   for (unsigned i = 0; i < e->args.size(); ++i)
4558     replace (e->args[i]);
4559
4560   if (stable_fcs.find(e->function) != stable_fcs.end())
4561     {
4562       string name("__stable_");
4563       name.append(e->function).append("_value");
4564
4565       // Variable potentially not in scope since it is in a sibling block
4566       if (scope_vars.find(e->function) == scope_vars.end())
4567         {
4568           if (new_vars.find(e->function) == new_vars.end())
4569             {
4570               // New variable declaration to store result of function call
4571               vardecl* v = new vardecl;
4572               v->name = name;
4573               v->tok = e->tok;
4574               v->set_arity(0, e->tok);
4575               v->type = e->type;
4576               if (current_function)
4577                 current_function->locals.push_back(v);
4578               else
4579                 current_probe->locals.push_back(v);
4580               new_vars[e->function] = v;
4581             }
4582
4583           symbol* sym = new symbol;
4584           sym->name = name;
4585           sym->tok = e->tok;
4586           sym->referent = new_vars[e->function];
4587           sym->type = e->type;
4588
4589           functioncall* fc = new functioncall;
4590           fc->tok = e->tok;
4591           fc->function = e->function;
4592           fc->referent = e->referent;
4593           fc->type = e->type;
4594
4595           assignment* a = new assignment;
4596           a->tok = e->tok;
4597           a->op = "=";
4598           a->left = sym;
4599           a->right = fc;
4600           a->type = e->type;
4601
4602           expr_statement* es = new expr_statement;
4603           es->tok = e->tok;
4604           es->value = a;
4605
4606           // Store location of the block to put new declaration.
4607           if (loop_depth != 0)
4608             {
4609               assert(top_scope);
4610               new_stmts.push_back(make_pair(es,top_scope));
4611             }
4612           else
4613             {
4614               assert(curr_scope);
4615               new_stmts.push_back(make_pair(es,curr_scope));
4616             }
4617
4618           scope_vars.insert(e->function);
4619
4620           provide(sym);
4621         }
4622       else
4623         {
4624           symbol* sym = new symbol;
4625           sym->name = name;
4626           sym->tok = e->tok;
4627           sym->referent = new_vars[e->function];
4628           sym->type = e->type;
4629           provide(sym);
4630         }
4631       return;
4632     }
4633
4634   provide(e);
4635 }
4636
4637 // Cache stable embedded-c functioncall results and replace
4638 // all calls with same name using that value to reduce duplicate
4639 // functioncall overhead. Functioncalls are pulled out of any
4640 // top-level loops and put into if/try blocks.
4641 void semantic_pass_opt7(systemtap_session& s)
4642 {
4643   set<string> stable_fcs;
4644   for (map<string,functiondecl*>::iterator it = s.functions.begin();
4645        it != s.functions.end(); ++it)
4646     {
4647       functiondecl* fn = (*it).second;
4648       stable_analysis sa;
4649       fn->body->visit(&sa);
4650       if (sa.stable && fn->formal_args.size() == 0)
4651         stable_fcs.insert(fn->name);
4652     }
4653
4654   for (vector<derived_probe*>::iterator it = s.probes.begin();
4655        it != s.probes.end(); ++it)
4656     {
4657       stable_functioncall_visitor t(s, stable_fcs);
4658       t.current_probe = *it;
4659       (*it)->body = t.convert_stmt((*it)->body);
4660       t.replace((*it)->body);
4661
4662       for (vector<pair<expr_statement*,block*> >::iterator st = t.new_stmts.begin();
4663            st != t.new_stmts.end(); ++st)
4664         st->second->statements.insert(st->second->statements.begin(), st->first);
4665     }
4666
4667   for (map<string,functiondecl*>::iterator it = s.functions.begin();
4668        it != s.functions.end(); ++it)
4669     {
4670       functiondecl* fn = (*it).second;
4671       stable_functioncall_visitor t(s, stable_fcs);
4672       t.current_function = fn;
4673       fn->body = t.convert_stmt(fn->body);
4674       t.replace(fn->body);
4675
4676       for (vector<pair<expr_statement*,block*> >::iterator st = t.new_stmts.begin();
4677            st != t.new_stmts.end(); ++st)
4678         st->second->statements.insert(st->second->statements.begin(), st->first);
4679     }
4680 }
4681
4682 static int
4683 semantic_pass_optimize1 (systemtap_session& s)
4684 {
4685   // In this pass, we attempt to rewrite probe/function bodies to
4686   // eliminate some blatantly unnecessary code.  This is run before
4687   // type inference, but after symbol resolution and derived_probe
4688   // creation.  We run an outer "relaxation" loop that repeats the
4689   // optimizations until none of them find anything to remove.
4690
4691   int rc = 0;
4692
4693   // Save the old value of suppress_warnings, as we will be changing
4694   // it below.
4695   save_and_restore<bool> suppress_warnings(& s.suppress_warnings);
4696
4697   bool relaxed_p = false;
4698   unsigned iterations = 0;
4699   while (! relaxed_p)
4700     {
4701       assert_no_interrupts();
4702
4703       relaxed_p = true; // until proven otherwise
4704
4705       // If the verbosity is high enough, always print warnings (overrides -w),
4706       // or if not, always suppress warnings for every itteration after the first.
4707       if(s.verbose > 2)
4708         s.suppress_warnings = false;
4709       else if (iterations > 0)
4710         s.suppress_warnings = true;
4711
4712       if (!s.unoptimized)
4713         {
4714           semantic_pass_opt1 (s, relaxed_p);
4715           semantic_pass_opt2 (s, relaxed_p, iterations); // produce some warnings only on iteration=0
4716           semantic_pass_opt3 (s, relaxed_p);
4717           semantic_pass_opt4 (s, relaxed_p);
4718           semantic_pass_opt5 (s, relaxed_p);
4719         }
4720
4721       // For listing mode, we need const-folding regardless of optimization so
4722       // that @defined expressions can be properly resolved.  PR11360
4723       // We also want it in case variables are used in if/case expressions,
4724       // so enable always.  PR11366
4725       // rc is incremented if there is an error that got reported.
4726       rc += semantic_pass_const_fold (s, relaxed_p);
4727
4728       if (!s.unoptimized)
4729         semantic_pass_dead_control (s, relaxed_p);
4730
4731       iterations ++;
4732     }
4733
4734   return rc;
4735 }
4736
4737
4738 static int
4739 semantic_pass_optimize2 (systemtap_session& s)
4740 {
4741   // This is run after type inference.  We run an outer "relaxation"
4742   // loop that repeats the optimizations until none of them find
4743   // anything to remove.
4744
4745   int rc = 0;
4746
4747   // Save the old value of suppress_warnings, as we will be changing
4748   // it below.
4749   save_and_restore<bool> suppress_warnings(& s.suppress_warnings);
4750
4751   bool relaxed_p = false;
4752   unsigned iterations = 0;
4753   while (! relaxed_p)
4754     {
4755       assert_no_interrupts();
4756       relaxed_p = true; // until proven otherwise
4757
4758       // If the verbosity is high enough, always print warnings (overrides -w),
4759       // or if not, always suppress warnings for every itteration after the first.
4760       if(s.verbose > 2)
4761         s.suppress_warnings = false;
4762       else if (iterations > 0)
4763         s.suppress_warnings = true;
4764
4765       if (!s.unoptimized)
4766         semantic_pass_opt6 (s, relaxed_p);
4767
4768       iterations++;
4769     }
4770
4771   if (!s.unoptimized)
4772     semantic_pass_opt7(s);
4773
4774   return rc;
4775 }
4776
4777
4778
4779 // ------------------------------------------------------------------------
4780 // type resolution
4781
4782 struct autocast_expanding_visitor: public var_expanding_visitor
4783 {
4784   typeresolution_info& ti;
4785   autocast_expanding_visitor (typeresolution_info& ti): ti(ti) {}
4786
4787   void resolve_functioncall (functioncall* fc)
4788     {
4789       // This is a very limited version of semantic_pass_symbols, but we're
4790       // late in the game at this point.  We won't get a chance to optimize,
4791       // but for now the only functions we expect are kernel/user_string from
4792       // pretty-printing, which don't need optimization.
4793
4794       systemtap_session& s = ti.session;
4795       size_t nfiles = s.files.size();
4796
4797       symresolution_info sym (s);
4798       sym.current_function = ti.current_function;
4799       sym.current_probe = ti.current_probe;
4800       fc->visit (&sym);
4801
4802       // NB: synthetic functions get tacked onto the origin file, so we won't
4803       // see them growing s.files[].  Traverse it directly.
4804       if (fc->referent)
4805         {
4806           functiondecl* fd = fc->referent;
4807           sym.current_function = fd;
4808           sym.current_probe = 0;
4809           fd->body->visit (&sym);
4810         }
4811
4812       while (nfiles < s.files.size())
4813         {
4814           stapfile* dome = s.files[nfiles++];
4815           for (size_t i = 0; i < dome->functions.size(); ++i)
4816             {
4817               functiondecl* fd = dome->functions[i];
4818               sym.current_function = fd;
4819               sym.current_probe = 0;
4820               fd->body->visit (&sym);
4821               // NB: not adding to s.functions just yet...
4822             }
4823         }
4824
4825       // Add only the direct functions we need.
4826       functioncall_traversing_visitor ftv;
4827       fc->visit (&ftv);
4828       for (set<functiondecl*>::iterator it = ftv.seen.begin();
4829            it != ftv.seen.end(); ++it)
4830         {
4831           functiondecl* fd = *it;
4832           pair<map<string,functiondecl*>::iterator,bool> inserted =
4833             s.functions.insert (make_pair (fd->name, fd));
4834           if (!inserted.second && inserted.first->second != fd)
4835             throw SEMANTIC_ERROR
4836               (_F("resolved function '%s' conflicts with an existing function",
4837                   fd->name.to_string().c_str()), fc->tok);
4838         }
4839     }
4840
4841   void visit_autocast_op (autocast_op* e)
4842     {
4843       const bool lvalue = is_active_lvalue (e);
4844       const exp_type_ptr& details = e->operand->type_details;
4845       if (details && !e->saved_conversion_error)
4846         {
4847           functioncall* fc = details->expand (e, lvalue);
4848           if (fc)
4849             {
4850               ti.num_newly_resolved++;
4851
4852               resolve_functioncall (fc);
4853
4854               if (lvalue)
4855                 provide_lvalue_call (fc);
4856
4857               fc->visit (this);
4858               return;
4859             }
4860         }
4861       var_expanding_visitor::visit_autocast_op (e);
4862     }
4863 };
4864
4865
4866 struct initial_typeresolution_info : public typeresolution_info
4867 {
4868   initial_typeresolution_info (systemtap_session& s): typeresolution_info(s)
4869   {}
4870
4871   // these expressions are not supposed to make its way to the typeresolution
4872   // pass. they probably get substituted/replaced, but since this is an initial pass
4873   // and not all substitutions are done, replace the functions that throw errors.
4874   void visit_target_symbol (target_symbol* e) {}
4875   void visit_atvar_op (atvar_op* e) {}
4876   void visit_defined_op (defined_op* e) {}
4877   void visit_entry_op (entry_op* e) {}
4878   void visit_cast_op (cast_op* e) {}
4879 };
4880
4881 static int initial_typeres_pass(systemtap_session& s)
4882 {
4883   // minimal type resolution based off of semantic_pass_types(), without
4884   // checking for complete type resolutions or autocast expanding
4885   initial_typeresolution_info ti(s);
4886
4887   // Globals never have detailed types.
4888   // If we null them now, then all remaining vardecls can be detailed.
4889   for (unsigned j=0; j<s.globals.size(); j++)
4890     {
4891       vardecl* gd = s.globals[j];
4892       if (!gd->type_details)
4893         gd->type_details = ti.null_type;
4894     }
4895
4896   ti.assert_resolvability = false;
4897   while (1)
4898     {
4899       assert_no_interrupts();
4900
4901       ti.num_newly_resolved = 0;
4902       ti.num_still_unresolved = 0;
4903       ti.num_available_autocasts = 0;
4904
4905       for (map<string,functiondecl*>::iterator it = s.functions.begin();
4906                                                it != s.functions.end(); it++)
4907         {
4908           assert_no_interrupts();
4909
4910           functiondecl* fd = it->second;
4911           ti.current_probe = 0;
4912           ti.current_function = fd;
4913           ti.t = pe_unknown;
4914           fd->body->visit (& ti);
4915         }
4916
4917       for (unsigned j=0; j<s.probes.size(); j++)
4918         {
4919           assert_no_interrupts();
4920
4921           derived_probe* pn = s.probes[j];
4922           ti.current_function = 0;
4923           ti.current_probe = pn;
4924           ti.t = pe_unknown;
4925           pn->body->visit (& ti);
4926
4927           probe_point* pp = pn->sole_location();
4928           if (pp->condition)
4929             {
4930               ti.current_function = 0;
4931               ti.current_probe = 0;
4932               ti.t = pe_long; // NB: expected type
4933               pp->condition->visit (& ti);
4934             }
4935         }
4936       if (ti.num_newly_resolved == 0) // converged
4937         {
4938           // take into account that if there are mismatches, we'd want to know
4939           // about them incase they get whisked away, later in this process
4940           if (!ti.assert_resolvability && ti.mismatch_complexity > 0) // found a mismatch!!
4941             {
4942               ti.assert_resolvability = true; // report errors
4943               if (s.verbose > 0)
4944                 ti.mismatch_complexity = 1; // print out mismatched but not unresolved type mismatches
4945             }
4946           else
4947             break;
4948         }
4949       else
4950         ti.mismatch_complexity = 0;
4951     }
4952
4953   return s.num_errors();
4954 }
4955
4956 static int
4957 semantic_pass_types (systemtap_session& s)
4958 {
4959   int rc = 0;
4960
4961   // next pass: type inference
4962   unsigned iterations = 0;
4963   typeresolution_info ti (s);
4964
4965   // Globals never have detailed types.
4966   // If we null them now, then all remaining vardecls can be detailed.
4967   for (unsigned j=0; j<s.globals.size(); j++)
4968     {
4969       vardecl* gd = s.globals[j];
4970       if (!gd->type_details)
4971         gd->type_details = ti.null_type;
4972     }
4973
4974   ti.assert_resolvability = false;
4975   while (1)
4976     {
4977       assert_no_interrupts();
4978
4979       iterations ++;
4980       ti.num_newly_resolved = 0;
4981       ti.num_still_unresolved = 0;
4982       ti.num_available_autocasts = 0;
4983
4984       for (map<string,functiondecl*>::iterator it = s.functions.begin();
4985                                                it != s.functions.end(); it++)
4986         try
4987           {
4988             assert_no_interrupts();
4989
4990             functiondecl* fd = it->second;
4991             ti.current_probe = 0;
4992             ti.current_function = fd;
4993             ti.t = pe_unknown;
4994             fd->body->visit (& ti);
4995             // NB: we don't have to assert a known type for
4996             // functions here, to permit a "void" function.
4997             // The translator phase will omit the "retvalue".
4998             //
4999             // if (fd->type == pe_unknown)
5000             //   ti.unresolved (fd->tok);
5001             for (unsigned i=0; i < fd->locals.size(); ++i)
5002               ti.check_local (fd->locals[i]);
5003
5004             // Check and run the autocast expanding visitor.
5005             if (ti.num_available_autocasts > 0)
5006               {
5007                 autocast_expanding_visitor aev (ti);
5008                 aev.replace (fd->body);
5009                 ti.num_available_autocasts = 0;
5010               }
5011           }
5012         catch (const semantic_error& e)
5013           {
5014             throw SEMANTIC_ERROR(_F("while processing function %s",
5015                                     it->second->name.to_string().c_str())).set_chain(e);
5016           }
5017
5018       for (unsigned j=0; j<s.probes.size(); j++)
5019         try
5020           {
5021             assert_no_interrupts();
5022
5023             derived_probe* pn = s.probes[j];
5024             ti.current_function = 0;
5025             ti.current_probe = pn;
5026             ti.t = pe_unknown;
5027             pn->body->visit (& ti);
5028             for (unsigned i=0; i < pn->locals.size(); ++i)
5029               ti.check_local (pn->locals[i]);
5030
5031             // Check and run the autocast expanding visitor.
5032             if (ti.num_available_autocasts > 0)
5033               {
5034                 autocast_expanding_visitor aev (ti);
5035                 aev.replace (pn->body);
5036                 ti.num_available_autocasts = 0;
5037               }
5038
5039             probe_point* pp = pn->sole_location();
5040             if (pp->condition)
5041               {
5042                 ti.current_function = 0;
5043                 ti.current_probe = 0;
5044                 ti.t = pe_long; // NB: expected type
5045                 pp->condition->visit (& ti);
5046               }
5047           }
5048         catch (const semantic_error& e)
5049           {
5050             throw SEMANTIC_ERROR(_F("while processing probe %s",
5051                                     s.probes[j]->derived_locations(false).c_str())).set_chain(e);
5052           }
5053
5054       for (unsigned j=0; j<s.globals.size(); j++)
5055         {
5056           vardecl* gd = s.globals[j];
5057           if (gd->type == pe_unknown)
5058             ti.unresolved (gd->tok);
5059           if(gd->arity == 0 && gd->wrap == true)
5060             {
5061               throw SEMANTIC_ERROR(_("wrapping not supported for scalars"), gd->tok);
5062             }
5063         }
5064
5065       if (ti.num_newly_resolved == 0) // converged
5066         {
5067           if (ti.num_still_unresolved == 0)
5068             break; // successfully
5069           else if (! ti.assert_resolvability)
5070             {
5071               ti.assert_resolvability = true; // last pass, with error msgs
5072               if (s.verbose > 0)
5073                 ti.mismatch_complexity = 0; // print every kind of mismatch
5074             }
5075           else
5076             { // unsuccessful conclusion
5077               rc ++;
5078               break;
5079             }
5080         }
5081       else
5082         ti.mismatch_complexity = 0; // reset for next pass
5083     }
5084
5085   return rc + s.num_errors();
5086 }
5087
5088
5089 struct exp_type_null : public exp_type_details
5090 {
5091   uintptr_t id () const { return 0; }
5092   bool expandable() const { return false; }
5093   functioncall *expand(autocast_op*, bool) { return NULL; }
5094 };
5095
5096 typeresolution_info::typeresolution_info (systemtap_session& s):
5097   session(s), num_newly_resolved(0), num_still_unresolved(0),
5098   num_available_autocasts(0),
5099   assert_resolvability(false), mismatch_complexity(0),
5100   current_function(0), current_probe(0), t(pe_unknown),
5101   null_type(new exp_type_null())
5102 {
5103 }
5104
5105
5106 void
5107 typeresolution_info::visit_literal_number (literal_number* e)
5108 {
5109   assert (e->type == pe_long);
5110   if ((t == e->type) || (t == pe_unknown))
5111     return;
5112
5113   mismatch (e->tok, t, e->type);
5114 }
5115
5116
5117 void
5118 typeresolution_info::visit_literal_string (literal_string* e)
5119 {
5120   assert (e->type == pe_string);
5121   if ((t == e->type) || (t == pe_unknown))
5122     return;
5123
5124   mismatch (e->tok, t, e->type);
5125 }
5126
5127
5128 void
5129 typeresolution_info::visit_logical_or_expr (logical_or_expr *e)
5130 {
5131   visit_binary_expression (e);
5132 }
5133
5134
5135 void
5136 typeresolution_info::visit_logical_and_expr (logical_and_expr *e)
5137 {
5138   visit_binary_expression (e);
5139 }
5140
5141 void
5142 typeresolution_info::visit_regex_query (regex_query *e)
5143 {
5144   // NB: result of regex query is an integer!
5145   if (t == pe_stats || t == pe_string)
5146     invalid (e->tok, t);
5147
5148   t = pe_string;
5149   e->left->visit (this);
5150   t = pe_string;
5151   e->right->visit (this); // parser ensures this is a literal known at compile time
5152
5153   if (e->type == pe_unknown)
5154     {
5155       e->type = pe_long;
5156       resolved (e->tok, e->type);
5157     }
5158 }
5159
5160
5161 void
5162 typeresolution_info::visit_comparison (comparison *e)
5163 {
5164   // NB: result of any comparison is an integer!
5165   if (t == pe_stats || t == pe_string)
5166     invalid (e->tok, t);
5167
5168   t = (e->right->type != pe_unknown) ? e->right->type : pe_unknown;
5169   e->left->visit (this);
5170   t = (e->left->type != pe_unknown) ? e->left->type : pe_unknown;
5171   e->right->visit (this);
5172
5173   if (e->left->type != pe_unknown &&
5174       e->right->type != pe_unknown &&
5175       e->left->type != e->right->type)
5176     mismatch (e);
5177
5178   if (e->type == pe_unknown)
5179     {
5180       e->type = pe_long;
5181       resolved (e->tok, e->type);
5182     }
5183 }
5184
5185
5186 void
5187 typeresolution_info::visit_concatenation (concatenation *e)
5188 {
5189   if (t != pe_unknown && t != pe_string)
5190     invalid (e->tok, t);
5191
5192   t = pe_string;
5193   e->left->visit (this);
5194   t = pe_string;
5195   e->right->visit (this);
5196
5197   if (e->type == pe_unknown)
5198     {
5199       e->type = pe_string;
5200       resolved (e->tok, e->type);
5201     }
5202 }
5203
5204
5205 void
5206 typeresolution_info::visit_assignment (assignment *e)
5207 {
5208   if (t == pe_stats)
5209     invalid (e->tok, t);
5210
5211   if (e->op == "<<<") // stats aggregation
5212     {
5213       if (t == pe_string)
5214         invalid (e->tok, t);
5215
5216       t = pe_stats;
5217       e->left->visit (this);
5218       t = pe_long;
5219       e->right->visit (this);
5220       if (e->type == pe_unknown ||
5221           e->type == pe_stats)
5222         {
5223           e->type = pe_long;
5224           resolved (e->tok, e->type);
5225         }
5226     }
5227
5228   else if (e->left->type == pe_stats)
5229     invalid (e->left->tok, e->left->type);
5230
5231   else if (e->right->type == pe_stats)
5232     invalid (e->right->tok, e->right->type);
5233
5234   else if (e->op == "+=" || // numeric only
5235            e->op == "-=" ||
5236            e->op == "*=" ||
5237            e->op == "/=" ||
5238            e->op == "%=" ||
5239            e->op == "&=" ||
5240            e->op == "^=" ||
5241            e->op == "|=" ||
5242            e->op == "<<=" ||
5243            e->op == ">>=" ||
5244            false)
5245     {
5246       visit_binary_expression (e);
5247     }
5248   else if (e->op == ".=" || // string only
5249            false)
5250     {
5251       if (t == pe_long || t == pe_stats)
5252         invalid (e->tok, t);
5253
5254       t = pe_string;
5255       e->left->visit (this);
5256       t = pe_string;
5257       e->right->visit (this);
5258       if (e->type == pe_unknown)
5259         {
5260           e->type = pe_string;
5261           resolved (e->tok, e->type);
5262         }
5263     }
5264   else if (e->op == "=") // overloaded = for string & numeric operands
5265     {
5266       // logic similar to ternary_expression
5267       exp_type sub_type = t;
5268
5269       // Infer types across the l/r values
5270       if (sub_type == pe_unknown && e->type != pe_unknown)
5271         sub_type = e->type;
5272
5273       t = (sub_type != pe_unknown) ? sub_type :
5274         (e->right->type != pe_unknown) ? e->right->type :
5275         pe_unknown;
5276       e->left->visit (this);
5277       t = (sub_type != pe_unknown) ? sub_type :
5278         (e->left->type != pe_unknown) ? e->left->type :
5279         pe_unknown;
5280       e->right->visit (this);
5281
5282       if ((sub_type != pe_unknown) && (e->type == pe_unknown))
5283         {
5284           e->type = sub_type;
5285           resolved (e->tok, e->type);
5286         }
5287       if ((sub_type == pe_unknown) && (e->left->type != pe_unknown))
5288         {
5289           e->type = e->left->type;
5290           resolved (e->tok, e->type);
5291         }
5292
5293       if (e->left->type != pe_unknown &&
5294           e->right->type != pe_unknown &&
5295           e->left->type != e->right->type)
5296         mismatch (e);
5297
5298       // Propagate type details from the RHS to the assignment
5299       if (e->type == e->right->type &&
5300           e->right->type_details && !e->type_details)
5301         resolved_details(e->right->type_details, e->type_details);
5302
5303       // Propagate type details from the assignment to the LHS
5304       if (e->type == e->left->type && e->type_details)
5305         {
5306           if (e->left->type_details &&
5307               *e->left->type_details != *e->type_details &&
5308               *e->left->type_details != *null_type)
5309             resolved_details(null_type, e->left->type_details);
5310           else if (!e->left->type_details)
5311             resolved_details(e->type_details, e->left->type_details);
5312         }
5313     }
5314   else
5315     throw SEMANTIC_ERROR (_("unsupported assignment operator ") + (string)e->op);
5316 }
5317
5318
5319 void
5320 typeresolution_info::visit_embedded_expr (embedded_expr *e)
5321 {
5322   if (e->type == pe_unknown)
5323     {
5324       if (e->code.find ("/* string */") != string::npos)
5325         e->type = pe_string;
5326       else // if (e->code.find ("/* long */") != string::npos)
5327         e->type = pe_long;
5328
5329       resolved (e->tok, e->type);
5330     }
5331 }
5332
5333
5334 void
5335 typeresolution_info::visit_binary_expression (binary_expression* e)
5336 {
5337   if (t == pe_stats || t == pe_string)
5338     invalid (e->tok, t);
5339
5340   t = pe_long;
5341   e->left->visit (this);
5342   t = pe_long;
5343   e->right->visit (this);
5344
5345   if (e->left->type != pe_unknown &&
5346       e->right->type != pe_unknown &&
5347       e->left->type != e->right->type)
5348     mismatch (e);
5349
5350   if (e->type == pe_unknown)
5351     {
5352       e->type = pe_long;
5353       resolved (e->tok, e->type);
5354     }
5355 }
5356
5357
5358 void
5359 typeresolution_info::visit_pre_crement (pre_crement *e)
5360 {
5361   visit_unary_expression (e);
5362 }
5363
5364
5365 void
5366 typeresolution_info::visit_post_crement (post_crement *e)
5367 {
5368   visit_unary_expression (e);
5369 }
5370
5371
5372 void
5373 typeresolution_info::visit_unary_expression (unary_expression* e)
5374 {
5375   if (t == pe_stats || t == pe_string)
5376     invalid (e->tok, t);
5377
5378   t = pe_long;
5379   e->operand->visit (this);
5380
5381   if (e->type == pe_unknown)
5382     {
5383       e->type = pe_long;
5384       resolved (e->tok, e->type);
5385     }
5386 }
5387
5388
5389 void
5390 typeresolution_info::visit_ternary_expression (ternary_expression* e)
5391 {
5392   exp_type sub_type = t;
5393
5394   t = pe_long;
5395   e->cond->visit (this);
5396
5397   // Infer types across the true/false arms of the ternary expression.
5398
5399   if (sub_type == pe_unknown && e->type != pe_unknown)
5400     sub_type = e->type;
5401   t = sub_type;
5402   e->truevalue->visit (this);
5403   t = sub_type;
5404   e->falsevalue->visit (this);
5405
5406   if ((sub_type == pe_unknown) && (e->type != pe_unknown))
5407     ; // already resolved
5408   else if ((sub_type != pe_unknown) && (e->type == pe_unknown))
5409     {
5410       e->type = sub_type;
5411       resolved (e->tok, e->type);
5412     }
5413   else if ((sub_type == pe_unknown) && (e->truevalue->type != pe_unknown))
5414     {
5415       e->type = e->truevalue->type;
5416       resolved (e->tok, e->type);
5417     }
5418   else if ((sub_type == pe_unknown) && (e->falsevalue->type != pe_unknown))
5419     {
5420       e->type = e->falsevalue->type;
5421       resolved (e->tok, e->type);
5422     }
5423   else if (e->type != sub_type)
5424     mismatch (e->tok, sub_type, e->type);
5425
5426   // Propagate type details from both true/false branches
5427   if (!e->type_details &&
5428       e->type == e->truevalue->type && e->type == e->falsevalue->type &&
5429       e->truevalue->type_details && e->falsevalue->type_details &&
5430       *e->truevalue->type_details == *e->falsevalue->type_details)
5431     resolved_details(e->truevalue->type_details, e->type_details);
5432 }
5433
5434
5435 template <class Referrer, class Referent>
5436 void resolve_2types (Referrer* referrer, Referent* referent,
5437                     typeresolution_info* r, exp_type t, bool accept_unknown = false)
5438 {
5439   exp_type& re_type = referrer->type;
5440   const token* re_tok = referrer->tok;
5441   exp_type& te_type = referent->type;
5442
5443   if (t != pe_unknown && re_type == t && re_type == te_type)
5444     ; // do nothing: all three e->types in agreement
5445   else if (t == pe_unknown && re_type != pe_unknown && re_type == te_type)
5446     ; // do nothing: two known e->types in agreement
5447   else if (re_type != pe_unknown && te_type != pe_unknown && re_type != te_type)
5448     r->mismatch (re_tok, re_type, referent); // referrer-referent
5449   else if (re_type != pe_unknown && t != pe_unknown && re_type != t)
5450     r->mismatch (re_tok, t, referent); // referrer-t
5451   else if (te_type != pe_unknown && t != pe_unknown && te_type != t)
5452     r->mismatch (re_tok, t, referent); // referent-t
5453   else if (re_type == pe_unknown && t != pe_unknown)
5454     {
5455       // propagate from upstream
5456       re_type = t;
5457       r->resolved (re_tok, re_type);
5458       // catch re_type/te_type mismatch later
5459     }
5460   else if (re_type == pe_unknown && te_type != pe_unknown)
5461     {
5462       // propagate from referent
5463       re_type = te_type;
5464       r->resolved (re_tok, re_type);
5465       // catch re_type/t mismatch later
5466     }
5467   else if (re_type != pe_unknown && te_type == pe_unknown)
5468     {
5469       // propagate to referent
5470       te_type = re_type;
5471       r->resolved (re_tok, re_type, referent);
5472       // catch re_type/t mismatch later
5473     }
5474   else if (! accept_unknown)
5475     r->unresolved (re_tok);
5476 }
5477
5478
5479 void
5480 typeresolution_info::visit_symbol (symbol* e)
5481 {
5482   if (e->referent == 0)
5483     throw SEMANTIC_ERROR (_F("internal error: unresolved symbol '%s'",
5484                              e->name.to_string().c_str()), e->tok);
5485
5486   resolve_2types (e, e->referent, this, t);
5487
5488   if (e->type == e->referent->type)
5489     {
5490       // If both have type details, then they either must agree;
5491       // otherwise force them both to null.
5492       if (e->type_details && e->referent->type_details &&
5493           *e->type_details != *e->referent->type_details)
5494         {
5495           resolved_details(null_type, e->type_details);
5496           resolved_details(null_type, e->referent->type_details);
5497         }
5498       else if (e->type_details && !e->referent->type_details)
5499         resolved_details(e->type_details, e->referent->type_details);
5500       else if (!e->type_details && e->referent->type_details)
5501         resolved_details(e->referent->type_details, e->type_details);
5502     }
5503 }
5504
5505
5506 void
5507 typeresolution_info::visit_target_symbol (target_symbol* e)
5508 {
5509   // This occurs only if a target symbol was not resolved over in
5510   // tapset.cxx land, that error was properly suppressed, and the
5511   // later unused-expression-elimination pass didn't get rid of it
5512   // either.  So we have a target symbol that is believed to be of
5513   // genuine use, yet unresolved by the provider.
5514
5515   if (session.verbose > 2)
5516     {
5517       clog << _("Resolution problem with ");
5518       if (current_function)
5519         {
5520           clog << "function " << current_function->name << endl;
5521           current_function->body->print (clog);
5522           clog << endl;
5523         }
5524       else if (current_probe)
5525         {
5526           clog << "probe " << *current_probe->sole_location() << endl;
5527           current_probe->body->print (clog);
5528           clog << endl;
5529         }
5530       else
5531         //TRANSLATORS: simply saying not an issue with a probe or function
5532         clog << _("other") << endl;
5533     }
5534
5535   if (e->saved_conversion_error)
5536     throw (* (e->saved_conversion_error));
5537   else
5538     throw SEMANTIC_ERROR(_("unresolved target-symbol expression"), e->tok);
5539 }
5540
5541
5542 void
5543 typeresolution_info::visit_atvar_op (atvar_op* e)
5544 {
5545   // This occurs only if an @var() was not resolved over in
5546   // tapset.cxx land, that error was properly suppressed, and the
5547   // later unused-expression-elimination pass didn't get rid of it
5548   // either.  So we have an @var() that is believed to be of
5549   // genuine use, yet unresolved by the provider.
5550
5551   if (session.verbose > 2)
5552     {
5553       clog << _("Resolution problem with ");
5554       if (current_function)
5555         {
5556           clog << "function " << current_function->name << endl;
5557           current_function->body->print (clog);
5558           clog << endl;
5559         }
5560       else if (current_probe)
5561         {
5562           clog << "probe " << *current_probe->sole_location() << endl;
5563           current_probe->body->print (clog);
5564           clog << endl;
5565         }
5566       else
5567         //TRANSLATORS: simply saying not an issue with a probe or function
5568         clog << _("other") << endl;
5569     }
5570
5571   if (e->saved_conversion_error)
5572     throw (* (e->saved_conversion_error));
5573   else
5574     throw SEMANTIC_ERROR(_("unresolved @var() expression"), e->tok);
5575 }
5576
5577
5578 void
5579 typeresolution_info::visit_defined_op (defined_op* e)
5580 {
5581   throw SEMANTIC_ERROR(_("unexpected @defined"), e->tok);
5582 }
5583
5584
5585 void
5586 typeresolution_info::visit_entry_op (entry_op* e)
5587 {
5588   throw SEMANTIC_ERROR(_("@entry is only valid in .return probes"), e->tok);
5589 }
5590
5591
5592 void
5593 typeresolution_info::visit_cast_op (cast_op* e)
5594 {
5595   // Like target_symbol, a cast_op shouldn't survive this far
5596   // unless it was not resolved and its value is really needed.
5597   if (e->saved_conversion_error)
5598     throw (* (e->saved_conversion_error));
5599   else
5600     throw SEMANTIC_ERROR(_F("type definition '%s' not found in '%s'",
5601                             e->type_name.to_string().c_str(),
5602                             e->module.to_string().c_str()), e->tok);
5603 }
5604
5605
5606 void
5607 typeresolution_info::visit_autocast_op (autocast_op* e)
5608 {
5609   // Like cast_op, a implicit autocast_op shouldn't survive this far
5610   // unless it was not resolved and its value is really needed.
5611   if (assert_resolvability && e->saved_conversion_error)
5612     throw (* (e->saved_conversion_error));
5613   else if (assert_resolvability)
5614     throw SEMANTIC_ERROR(_("unknown type in dereference"), e->tok);
5615
5616   t = pe_long;
5617   e->operand->visit (this);
5618
5619   num_still_unresolved++;
5620   if (e->operand->type_details &&
5621       e->operand->type_details->expandable())
5622     num_available_autocasts++;
5623 }
5624
5625
5626 void
5627 typeresolution_info::visit_perf_op (perf_op* e)
5628 {
5629   // A perf_op should already be resolved
5630   if (t == pe_stats || t == pe_string)
5631     invalid (e->tok, t);
5632
5633   e->type = pe_long;
5634
5635   // (There is no real need to visit our operand - by parser
5636   // construction, it's always a string literal, with its type already
5637   // set.)
5638   t = pe_string;
5639   e->operand->visit (this);
5640 }
5641
5642
5643 void
5644 typeresolution_info::visit_arrayindex (arrayindex* e)
5645 {
5646
5647   symbol *array = NULL;
5648   hist_op *hist = NULL;
5649   classify_indexable(e->base, array, hist);
5650
5651   // Every hist_op has type [int]:int, that is to say, every hist_op
5652   // is a pseudo-one-dimensional integer array type indexed by
5653   // integers (bucket numbers).
5654
5655   if (hist)
5656     {
5657       if (e->indexes.size() != 1)
5658         unresolved (e->tok);
5659       t = pe_long;
5660       e->indexes[0]->visit (this);
5661       if (e->indexes[0]->type != pe_long)
5662         unresolved (e->tok);
5663       hist->visit (this);
5664       if (e->type != pe_long)
5665         {
5666           e->type = pe_long;
5667           resolved (e->tok, e->type);
5668         }
5669       return;
5670     }
5671
5672   // Now we are left with "normal" map inference and index checking.
5673
5674   assert (array);
5675   assert (array->referent != 0);
5676   resolve_2types (e, array->referent, this, t);
5677
5678   // now resolve the array indexes
5679
5680   // if (e->referent->index_types.size() == 0)
5681   //   // redesignate referent as array
5682   //   e->referent->set_arity (e->indexes.size ());
5683
5684   if (e->indexes.size() != array->referent->index_types.size())
5685     unresolved (e->tok); // symbol resolution should prevent this
5686   else for (unsigned i=0; i<e->indexes.size(); i++)
5687     {
5688       if (e->indexes[i])
5689         {
5690           expression* ee = e->indexes[i];
5691           exp_type& ft = array->referent->index_types [i];
5692           t = ft;
5693           ee->visit (this);
5694           exp_type at = ee->type;
5695
5696           if ((at == pe_string || at == pe_long) && ft == pe_unknown)
5697             {
5698               // propagate to formal type
5699               ft = at;
5700               resolved (ee->tok, ft, array->referent, i);
5701             }
5702           if (at == pe_stats)
5703             invalid (ee->tok, at);
5704           if (ft == pe_stats)
5705             invalid (ee->tok, ft);
5706           if (at != pe_unknown && ft != pe_unknown && ft != at)
5707             mismatch (ee->tok, ee->type, array->referent, i);
5708           if (at == pe_unknown)
5709               unresolved (ee->tok);
5710         }
5711     }
5712 }
5713
5714
5715 void
5716 typeresolution_info::visit_functioncall (functioncall* e)
5717 {
5718   if (e->referent == 0)
5719     throw SEMANTIC_ERROR (_F("internal error: unresolved function call to '%s'",
5720                              e->function.to_string().c_str()), e->tok);
5721
5722   resolve_2types (e, e->referent, this, t, true); // accept unknown type
5723
5724   if (e->type == pe_stats)
5725     invalid (e->tok, e->type);
5726
5727   const exp_type_ptr& func_type = e->referent->type_details;
5728   if (func_type && e->referent->type == e->type
5729       && (!e->type_details || *func_type != *e->type_details))
5730     resolved_details(e->referent->type_details, e->type_details);
5731
5732   // now resolve the function parameters
5733   if (e->args.size() != e->referent->formal_args.size())
5734     unresolved (e->tok); // symbol resolution should prevent this
5735   else for (unsigned i=0; i<e->args.size(); i++)
5736     {
5737       expression* ee = e->args[i];
5738       exp_type& ft = e->referent->formal_args[i]->type;
5739       const token* fe_tok = e->referent->formal_args[i]->tok;
5740       t = ft;
5741       ee->visit (this);
5742       exp_type at = ee->type;
5743
5744       if (((at == pe_string) || (at == pe_long)) && ft == pe_unknown)
5745         {
5746           // propagate to formal arg
5747           ft = at;
5748           resolved (ee->tok, ft, e->referent->formal_args[i], i);
5749         }
5750       if (at == pe_stats)
5751         invalid (ee->tok, at);
5752       if (ft == pe_stats)
5753         invalid (fe_tok, ft);
5754       if (at != pe_unknown && ft != pe_unknown && ft != at)
5755         mismatch (ee->tok, ee->type, e->referent->formal_args[i], i);
5756       if (at == pe_unknown)
5757         unresolved (ee->tok);
5758     }
5759 }
5760
5761
5762 void
5763 typeresolution_info::visit_block (block* e)
5764 {
5765   for (unsigned i=0; i<e->statements.size(); i++)
5766     {
5767       t = pe_unknown;
5768       e->statements[i]->visit (this);
5769     }
5770 }
5771
5772
5773 void
5774 typeresolution_info::visit_try_block (try_block* e)
5775 {
5776   if (e->try_block)
5777     e->try_block->visit (this);
5778   if (e->catch_error_var)
5779     {
5780       t = pe_string;
5781       e->catch_error_var->visit (this);
5782     }
5783   if (e->catch_block)
5784     e->catch_block->visit (this);
5785 }
5786
5787
5788 void
5789 typeresolution_info::visit_embeddedcode (embeddedcode* s)
5790 {
5791   // PR11573.  If we have survived thus far with a piece of embedded
5792   // code that requires uprobes, we need to track this.
5793   //
5794   // This is an odd place for this check, as opposed
5795   // to a separate 'optimization' pass, or c_unparser::visit_embeddedcode
5796   // over yonder in pass 3.  However, we want to do it during pass 2 so
5797   // that cached sessions also get the uprobes treatment.
5798   if (! session.need_uprobes
5799       && s->code.find("/* pragma:uprobes */") != string::npos)
5800     {
5801       if (session.verbose > 2)
5802         clog << _("Activating uprobes support because /* pragma:uprobes */ seen.") << endl;
5803       session.need_uprobes = true;
5804     }
5805
5806   // PR15065. Likewise, we need to detect /* pragma:tagged_dfa */
5807   // before the gen_dfa_table pass. Again, the typechecking part of
5808   // pass 2 is a good place for this.
5809   if (! session.need_tagged_dfa
5810       && s->code.find("/* pragma:tagged_dfa */") != string::npos)
5811     {
5812       // if (session.verbose > 2)
5813       //   clog << _F("Turning on DFA subexpressions, pragma:tagged_dfa found in %s",
5814       // current_function->name.c_str()) << endl;
5815       // session.need_tagged_dfa = true;
5816       throw SEMANTIC_ERROR (_("Tagged DFA support is not yet available"), s->tok);
5817     }
5818 }
5819
5820
5821 void
5822 typeresolution_info::visit_if_statement (if_statement* e)
5823 {
5824   t = pe_long;
5825   e->condition->visit (this);
5826
5827   t = pe_unknown;
5828   e->thenblock->visit (this);
5829
5830   if (e->elseblock)
5831     {
5832       t = pe_unknown;
5833       e->elseblock->visit (this);
5834     }
5835 }
5836
5837
5838 void
5839 typeresolution_info::visit_for_loop (for_loop* e)
5840 {
5841   t = pe_unknown;
5842   if (e->init) e->init->visit (this);
5843   t = pe_long;
5844   e->cond->visit (this);
5845   t = pe_unknown;
5846   if (e->incr) e->incr->visit (this);
5847   t = pe_unknown;
5848   e->block->visit (this);
5849 }
5850
5851
5852 void
5853 typeresolution_info::visit_foreach_loop (foreach_loop* e)
5854 {
5855   // See also visit_arrayindex.
5856   // This is different in that, being a statement, we can't assign
5857   // a type to the outer array, only propagate to/from the indexes
5858
5859   // if (e->referent->index_types.size() == 0)
5860   //   // redesignate referent as array
5861   //   e->referent->set_arity (e->indexes.size ());
5862
5863   exp_type wanted_value = pe_unknown;
5864   symbol *array = NULL;
5865   hist_op *hist = NULL;
5866   classify_indexable(e->base, array, hist);
5867
5868   if (hist)
5869     {
5870       if (e->indexes.size() != 1)
5871         unresolved (e->tok);
5872       t = pe_long;
5873       e->indexes[0]->visit (this);
5874       if (e->indexes[0]->type != pe_long)
5875         unresolved (e->tok);
5876       hist->visit (this);
5877       wanted_value = pe_long;
5878     }
5879   else
5880     {
5881       assert (array);
5882       if (e->indexes.size() != array->referent->index_types.size())
5883         unresolved (e->tok); // symbol resolution should prevent this
5884       else
5885         {
5886           for (unsigned i=0; i<e->indexes.size(); i++)
5887             {
5888               expression* ee = e->indexes[i];
5889               exp_type& ft = array->referent->index_types [i];
5890               t = ft;
5891               ee->visit (this);
5892               exp_type at = ee->type;
5893
5894               if ((at == pe_string || at == pe_long) && ft == pe_unknown)
5895                 {
5896                   // propagate to formal type
5897                   ft = at;
5898                   resolved (ee->tok, ee->type, array->referent, i);
5899                 }
5900               if (at == pe_stats)
5901                 invalid (ee->tok, at);
5902               if (ft == pe_stats)
5903                 invalid (ee->tok, ft);
5904               if (at != pe_unknown && ft != pe_unknown && ft != at)
5905                 mismatch (ee->tok, ee->type, array->referent, i);
5906               if (at == pe_unknown)
5907                 unresolved (ee->tok);
5908             }
5909           for (unsigned i=0; i<e->array_slice.size(); i++)
5910             if (e->array_slice[i])
5911               {
5912                 expression* ee = e->array_slice[i];
5913                 exp_type& ft = array->referent->index_types [i];
5914                 t = ft;
5915                 ee->visit (this);
5916                 exp_type at = ee->type;
5917
5918                 if ((at == pe_string || at == pe_long) && ft == pe_unknown)
5919                   {
5920                     // propagate to formal type
5921                     ft = at;
5922                     resolved (ee->tok, ee->type, array->referent, i);
5923                   }
5924                 if (at == pe_stats)
5925                   invalid (ee->tok, at);
5926                 if (ft == pe_stats)
5927                   invalid (ee->tok, ft);
5928                 if (at != pe_unknown && ft != pe_unknown && ft != at)
5929                   mismatch (ee->tok, ee->type, array->referent, i);
5930                 if (at == pe_unknown)
5931                   unresolved (ee->tok);
5932               }
5933         }
5934       t = pe_unknown;
5935       array->visit (this);
5936       wanted_value = array->type;
5937     }
5938
5939   if (e->value)
5940     {
5941       if (wanted_value == pe_stats)
5942         invalid(e->value->tok, wanted_value);
5943       else if (wanted_value != pe_unknown)
5944         check_arg_type(wanted_value, e->value);
5945       else
5946         {
5947           t = pe_unknown;
5948           e->value->visit (this);
5949         }
5950     }
5951
5952   /* Prevent @sum etc. aggregate sorting on non-statistics arrays. */
5953   if (wanted_value != pe_unknown)
5954     if (e->sort_aggr != sc_none && wanted_value != pe_stats)
5955       invalid (array->tok, wanted_value);
5956
5957   if (e->limit)
5958     {
5959       t = pe_long;
5960       e->limit->visit (this);
5961     }
5962
5963   t = pe_unknown;
5964   e->block->visit (this);
5965 }
5966
5967
5968 void
5969 typeresolution_info::visit_null_statement (null_statement*)
5970 {
5971 }
5972
5973
5974 void
5975 typeresolution_info::visit_expr_statement (expr_statement* e)
5976 {
5977   t = pe_unknown;
5978   e->value->visit (this);
5979 }
5980
5981
5982 struct delete_statement_typeresolution_info:
5983   public throwing_visitor
5984 {
5985   typeresolution_info *parent;
5986   delete_statement_typeresolution_info (typeresolution_info *p):
5987     throwing_visitor (_("invalid operand of delete expression")),
5988     parent (p)
5989   {}
5990
5991   void visit_arrayindex (arrayindex* e)
5992   {
5993     parent->visit_arrayindex (e);
5994   }
5995
5996   void visit_symbol (symbol* e)
5997   {
5998     exp_type ignored = pe_unknown;
5999     assert (e->referent != 0);
6000     resolve_2types (e, e->referent, parent, ignored);
6001   }
6002 };
6003
6004
6005 void
6006 typeresolution_info::visit_delete_statement (delete_statement* e)
6007 {
6008   delete_statement_typeresolution_info di (this);
6009   t = pe_unknown;
6010   e->value->visit (&di);
6011 }
6012
6013
6014 void
6015 typeresolution_info::visit_next_statement (next_statement*)
6016 {
6017 }
6018
6019
6020 void
6021 typeresolution_info::visit_break_statement (break_statement*)
6022 {
6023 }
6024
6025
6026 void
6027 typeresolution_info::visit_continue_statement (continue_statement*)
6028 {
6029 }
6030
6031
6032 void
6033 typeresolution_info::visit_array_in (array_in* e)
6034 {
6035   // all unary operators only work on numerics
6036   exp_type t1 = t;
6037   t = pe_unknown; // array value can be anything
6038   e->operand->visit (this);
6039
6040   if (t1 == pe_unknown && e->type != pe_unknown)
6041     ; // already resolved
6042   else if (t1 == pe_string || t1 == pe_stats)
6043     mismatch (e->tok, t1, pe_long);
6044   else if (e->type == pe_unknown)
6045     {
6046       e->type = pe_long;
6047       resolved (e->tok, e->type);
6048     }
6049 }
6050
6051
6052 void
6053 typeresolution_info::visit_return_statement (return_statement* e)
6054 {
6055   // This is like symbol, where the referent is
6056   // the return value of the function.
6057
6058   // translation pass will print error
6059   if (current_function == 0)
6060     return;
6061
6062   exp_type& e_type = current_function->type;
6063   t = current_function->type;
6064   e->value->visit (this);
6065
6066   if (e_type != pe_unknown && e->value->type != pe_unknown
6067       && e_type != e->value->type)
6068     mismatch (e->value->tok, e->value->type, current_function);
6069   if (e_type == pe_unknown &&
6070       (e->value->type == pe_long || e->value->type == pe_string))
6071     {
6072       // propagate non-statistics from value
6073       e_type = e->value->type;
6074       resolved (e->value->tok, e_type, current_function);
6075     }
6076   if (e->value->type == pe_stats)
6077     invalid (e->value->tok, e->value->type);
6078
6079   const exp_type_ptr& value_type = e->value->type_details;
6080   if (value_type && current_function->type == e->value->type)
6081     {
6082       exp_type_ptr& func_type = current_function->type_details;
6083       if (!func_type)
6084         // The function can take on the type details of the return value.
6085         resolved_details(value_type, func_type);
6086       else if (*func_type != *value_type && *func_type != *null_type)
6087         // Conflicting return types?  NO TYPE FOR YOU!
6088         resolved_details(null_type, func_type);
6089     }
6090 }
6091
6092 void
6093 typeresolution_info::visit_print_format (print_format* e)
6094 {
6095   size_t unresolved_args = 0;
6096
6097   if (e->hist)
6098     {
6099       e->hist->visit(this);
6100     }
6101
6102   else if (e->print_with_format)
6103     {
6104       // If there's a format string, we can do both inference *and*
6105       // checking.
6106
6107       // First we extract the subsequence of formatting components
6108       // which are conversions (not just literal string components)
6109
6110       unsigned expected_num_args = 0;
6111       std::vector<print_format::format_component> components;
6112       for (size_t i = 0; i < e->components.size(); ++i)
6113         {
6114           if (e->components[i].type == print_format::conv_unspecified)
6115             throw SEMANTIC_ERROR (_("Unspecified conversion in print operator format string"),
6116                                   e->tok);
6117           else if (e->components[i].type == print_format::conv_literal)
6118             continue;
6119           components.push_back(e->components[i]);
6120           ++expected_num_args;
6121           if (e->components[i].widthtype == print_format::width_dynamic)
6122             ++expected_num_args;
6123           if (e->components[i].prectype == print_format::prec_dynamic)
6124             ++expected_num_args;
6125         }
6126
6127       // Then we check that the number of conversions and the number
6128       // of args agree.
6129
6130       if (expected_num_args != e->args.size())
6131         throw SEMANTIC_ERROR (_("Wrong number of args to formatted print operator"),
6132                               e->tok);
6133
6134       // Then we check that the types of the conversions match the types
6135       // of the args.
6136       unsigned argno = 0;
6137       for (size_t i = 0; i < components.size(); ++i)
6138         {
6139           // Check the dynamic width, if specified
6140           if (components[i].widthtype == print_format::width_dynamic)
6141             {
6142               check_arg_type (pe_long, e->args[argno]);
6143               ++argno;
6144             }
6145
6146           // Check the dynamic precision, if specified
6147           if (components[i].prectype == print_format::prec_dynamic)
6148             {
6149               check_arg_type (pe_long, e->args[argno]);
6150               ++argno;
6151             }
6152
6153           exp_type wanted = pe_unknown;
6154
6155           switch (components[i].type)
6156             {
6157             case print_format::conv_unspecified:
6158             case print_format::conv_literal:
6159               assert (false);
6160               break;
6161
6162             case print_format::conv_pointer:
6163             case print_format::conv_number:
6164             case print_format::conv_binary:
6165             case print_format::conv_char:
6166             case print_format::conv_memory:
6167             case print_format::conv_memory_hex:
6168               wanted = pe_long;
6169               break;
6170
6171             case print_format::conv_string:
6172               wanted = pe_string;
6173               break;
6174             }
6175
6176           assert (wanted != pe_unknown);
6177           check_arg_type (wanted, e->args[argno]);
6178           ++argno;
6179         }
6180     }
6181   else
6182     {
6183       // Without a format string, the best we can do is require that
6184       // each argument resolve to a concrete type.
6185       for (size_t i = 0; i < e->args.size(); ++i)
6186         {
6187           t = pe_unknown;
6188           e->args[i]->visit (this);
6189           if (e->args[i]->type == pe_unknown)
6190             {
6191               unresolved (e->args[i]->tok);
6192               ++unresolved_args;
6193             }
6194         }
6195     }
6196
6197   if (unresolved_args == 0)
6198     {
6199       if (e->type == pe_unknown)
6200         {
6201           if (e->print_to_stream)
6202             e->type = pe_long;
6203           else
6204             e->type = pe_string;
6205           resolved (e->tok, e->type);
6206         }
6207     }
6208   else
6209     {
6210       e->type = pe_unknown;
6211       unresolved (e->tok);
6212     }
6213 }
6214
6215
6216 void
6217 typeresolution_info::visit_stat_op (stat_op* e)
6218 {
6219   t = pe_stats;
6220   e->stat->visit (this);
6221   if (e->type == pe_unknown)
6222     {
6223       e->type = pe_long;
6224       resolved (e->tok, e->type);
6225     }
6226   else if (e->type != pe_long)
6227     mismatch (e->tok, pe_long, e->type);
6228 }
6229
6230 void
6231 typeresolution_info::visit_hist_op (hist_op* e)
6232 {
6233   t = pe_stats;
6234   e->stat->visit (this);
6235 }
6236
6237
6238 void
6239 typeresolution_info::check_arg_type (exp_type wanted, expression* arg)
6240 {
6241   t = wanted;
6242   arg->visit (this);
6243
6244   if (arg->type == pe_unknown)
6245     {
6246       arg->type = wanted;
6247       resolved (arg->tok, arg->type);
6248     }
6249   else if (arg->type != wanted)
6250     {
6251       mismatch (arg->tok, wanted, arg->type);
6252     }
6253 }
6254
6255
6256 void
6257 typeresolution_info::check_local (vardecl* v)
6258 {
6259   if (v->arity != 0)
6260     {
6261       num_still_unresolved ++;
6262       if (assert_resolvability)
6263         session.print_error
6264           (SEMANTIC_ERROR (_("array locals not supported, missing global declaration? "), v->tok));
6265     }
6266
6267   if (v->type == pe_unknown)
6268     unresolved (v->tok);
6269   else if (v->type == pe_stats)
6270     {
6271       num_still_unresolved ++;
6272       if (assert_resolvability)
6273         session.print_error
6274           (SEMANTIC_ERROR (_("stat locals not supported, missing global declaration? "), v->tok));
6275     }
6276   else if (!(v->type == pe_long || v->type == pe_string))
6277     invalid (v->tok, v->type);
6278 }
6279
6280
6281 void
6282 typeresolution_info::unresolved (const token* tok)
6283 {
6284   num_still_unresolved ++;
6285
6286   if (assert_resolvability && mismatch_complexity <= 0)
6287     {
6288       stringstream msg;
6289       msg << _("unresolved type ");
6290       session.print_error (SEMANTIC_ERROR (msg.str(), tok));
6291     }
6292 }
6293
6294
6295 void
6296 typeresolution_info::invalid (const token* tok, exp_type pe)
6297 {
6298   num_still_unresolved ++;
6299
6300   if (assert_resolvability)
6301     {
6302       stringstream msg;
6303       if (tok && tok->type == tok_operator)
6304         msg << _("invalid operator");
6305       else
6306         msg << _("invalid type ") << pe;
6307       session.print_error (SEMANTIC_ERROR (msg.str(), tok));
6308     }
6309 }
6310
6311 void
6312 typeresolution_info::mismatch (const binary_expression* e)
6313 {
6314   num_still_unresolved ++;
6315
6316   if (assert_resolvability && mismatch_complexity <= 1)
6317     {
6318       stringstream msg;
6319       msg << _F("type mismatch: left and right sides don't agree (%s vs %s)",
6320                 lex_cast(e->left->type).c_str(), lex_cast(e->right->type).c_str());
6321       session.print_error (SEMANTIC_ERROR (msg.str(), e->tok));
6322     }
6323   else if (!assert_resolvability)
6324     mismatch_complexity = max(1, mismatch_complexity);
6325 }
6326
6327 /* tok   token where mismatch occurred
6328  * t1    type we expected (the 'good' type)
6329  * t2    type we received (the 'bad' type)
6330  * */
6331 void
6332 typeresolution_info::mismatch (const token* tok, exp_type t1, exp_type t2)
6333 {
6334   num_still_unresolved ++;
6335
6336   if (assert_resolvability && mismatch_complexity <= 2)
6337     {
6338       stringstream msg;
6339       msg << _F("type mismatch: expected %s", lex_cast(t1).c_str());
6340       if (t2 != pe_unknown)
6341         msg << _F(" but found %s", lex_cast(t2).c_str());
6342       session.print_error (SEMANTIC_ERROR (msg.str(), tok));
6343     }
6344   else if (!assert_resolvability)
6345     mismatch_complexity = max(2, mismatch_complexity);
6346 }
6347
6348 /* tok   token where the mismatch happened
6349  * type  type we received (the 'bad' type)
6350  * decl  declaration of mismatched symbol
6351  * index if index-based (array index or function arg)
6352  * */
6353 void
6354 typeresolution_info::mismatch (const token *tok, exp_type type,
6355                                const symboldecl* decl, int index)
6356 {
6357   num_still_unresolved ++;
6358
6359   if (assert_resolvability && mismatch_complexity <= 3)
6360     {
6361       assert(decl != NULL);
6362
6363       // If mismatch is against a function parameter from within the function
6364       // itself (rather than a function call), then the index will be -1. We
6365       // check here if the decl corresponds to one of the params and if so,
6366       // adjust the index.
6367       if (current_function != NULL && index == -1)
6368         {
6369           vector<vardecl*>& args = current_function->formal_args;
6370           for (unsigned i = 0; i < args.size() && index < 0; i++)
6371             if (args[i] == decl)
6372               index = i;
6373         }
6374
6375       // get the declaration's original type and token
6376       const resolved_type *original = NULL;
6377       for (vector<resolved_type>::const_iterator it = resolved_types.begin();
6378            it != resolved_types.end() && original == NULL; ++it)
6379         {
6380           if (it->decl == decl && it->index == index)
6381             original = &(*it);
6382         }
6383
6384       // print basic mismatch msg if we couldn't find the decl (this can happen
6385       // for explicitly typed decls e.g. myvar:long or for fabricated (already
6386       // resolved) decls e.g. __perf_read_*)
6387       if (original == NULL)
6388         {
6389           session.print_error (SEMANTIC_ERROR (
6390             _F("type mismatch: expected %s but found %s",
6391                lex_cast(type).c_str(),
6392                lex_cast(decl->type).c_str()),
6393             tok));
6394           return;
6395         }
6396
6397       // print where mismatch happened and chain with origin of decl type
6398       // resolution
6399       stringstream msg;
6400
6401       if (index >= 0)
6402         msg << _F("index %d ", index);
6403       msg << _F("type mismatch (%s)", lex_cast(type).c_str());
6404       semantic_error err(ERR_SRC, msg.str(), tok);
6405
6406       stringstream chain_msg;
6407       chain_msg << _("type");
6408       if (index >= 0)
6409         chain_msg << _F(" of index %d", index);
6410       chain_msg << _F(" was first inferred here (%s)",
6411                       lex_cast(decl->type).c_str());
6412       semantic_error chain(ERR_SRC, chain_msg.str(), original->tok);
6413
6414       err.set_chain(chain);
6415       session.print_error (err);
6416     }
6417   else if (!assert_resolvability)
6418     mismatch_complexity = max(3, mismatch_complexity);
6419 }
6420
6421
6422 /* tok   token where resolution occurred
6423  * type  type to which we resolved
6424  * decl  declaration of resolved symbol
6425  * index if index-based (array index or function arg)
6426  * */
6427 void
6428 typeresolution_info::resolved (const token *tok, exp_type type,
6429                                const symboldecl* decl, int index)
6430 {
6431   num_newly_resolved ++;
6432
6433   // We only use the resolved_types vector to give better mismatch messages
6434   // involving symbols. So don't bother adding it if we're not given a decl
6435   if (decl != NULL)
6436     {
6437       // As a fail-safe, if the decl & index is already in the vector, then
6438       // modify it instead of adding another one to ensure uniqueness. This
6439       // should never happen since we only call resolved once for each decl &
6440       // index, but better safe than sorry. (IE. if it does happen, better have
6441       // the latest resolution info for better mismatch reporting later).
6442       for (unsigned i = 0; i < resolved_types.size(); i++)
6443         {
6444           if (resolved_types[i].decl == decl
6445               && resolved_types[i].index == index)
6446             {
6447               resolved_types[i].tok = tok;
6448               return;
6449             }
6450         }
6451       resolved_type res(tok, decl, index);
6452       resolved_types.push_back(res);
6453     }
6454 }
6455
6456 void
6457 typeresolution_info::resolved_details (const exp_type_ptr& src,
6458                                        exp_type_ptr& dest)
6459 {
6460   num_newly_resolved ++;
6461   dest = src;
6462 }
6463
6464 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */