elaborate.cxx

   1 // elaboration functions
   2 // Copyright (C) 2005-2009 Red Hat Inc.
   3 // Copyright (C) 2008 Intel Corporation
   4 //
   5 // This file is part of systemtap, and is free software.  You can
   6 // redistribute it and/or modify it under the terms of the GNU General
   7 // Public License (GPL); either version 2, or (at your option) any
   8 // later version.
   9
  10 #include "config.h"
  11 #include "elaborate.h"
  12 #include "translate.h"
  13 #include "parse.h"
  14 #include "tapsets.h"
  15 #include "session.h"
  16 #include "util.h"
  17
  18 extern "C" {
  19 #include <sys/utsname.h>
  20 #include <fnmatch.h>
  21 }
  22
  23 #include <algorithm>
  24 #include <fstream>
  25 #include <map>
  26 #include <cassert>
  27 #include <set>
  28 #include <vector>
  29 #include <algorithm>
  30 #include <iterator>
  31
  32
  33 using namespace std;
  34
  35
  36 // ------------------------------------------------------------------------
  37
  38 // Used in probe_point condition construction.  Either argument may be
  39 // NULL; if both, return NULL too.  Resulting expression is a deep
  40 // copy for symbol resolution purposes.
  41 expression* add_condition (expression* a, expression* b)
  42 {
  43   if (!a && !b) return 0;
  44   if (! a) return deep_copy_visitor::deep_copy(b);
  45   if (! b) return deep_copy_visitor::deep_copy(a);
  46   logical_and_expr la;
  47   la.op = "&&";
  48   la.left = a;
  49   la.right = b;
  50   la.tok = a->tok; // or could be b->tok
  51   return deep_copy_visitor::deep_copy(& la);
  52 }
  53
  54 // ------------------------------------------------------------------------
  55
  56
  57
  58 derived_probe::derived_probe (probe *p):
  59   base (p), sdt_semaphore_addr(0)
  60 {
  61   assert (p);
  62   this->locations = p->locations;
  63   this->tok = p->tok;
  64   this->privileged = p->privileged;
  65   this->body = deep_copy_visitor::deep_copy(p->body);
  66 }
  67
  68
  69 derived_probe::derived_probe (probe *p, probe_point *l):
  70   base (p), sdt_semaphore_addr(0)
  71 {
  72   assert (p);
  73   this->tok = p->tok;
  74   this->privileged = p->privileged;
  75   this->body = deep_copy_visitor::deep_copy(p->body);
  76
  77   assert (l);
  78   this->locations.push_back (l);
  79 }
  80
  81
  82 void
  83 derived_probe::printsig (ostream& o) const
  84 {
  85   probe::printsig (o);
  86   printsig_nested (o);
  87 }
  88
  89 void
  90 derived_probe::printsig_nested (ostream& o) const
  91 {
  92   // We'd like to enclose the probe derivation chain in a /* */
  93   // comment delimiter.  But just printing /* base->printsig() */ is
  94   // not enough, since base might itself be a derived_probe.  So we,
  95   // er, "cleverly" encode our nesting state as a formatting flag for
  96   // the ostream.
  97   ios::fmtflags f = o.flags (ios::internal);
  98   if (f & ios::internal)
  99     {
 100       // already nested
 101       o << " <- ";
 102       base->printsig (o);
 103     }
 104   else
 105     {
 106       // outermost nesting
 107       o << " /* <- ";
 108       base->printsig (o);
 109       o << " */";
 110     }
 111   // restore flags
 112   (void) o.flags (f);
 113 }
 114
 115
 116 void
 117 derived_probe::collect_derivation_chain (std::vector<probe*> &probes_list)
 118 {
 119   probes_list.push_back(this);
 120   base->collect_derivation_chain(probes_list);
 121 }
 122
 123
 124 probe_point*
 125 derived_probe::sole_location () const
 126 {
 127   if (locations.size() == 0)
 128     throw semantic_error ("derived_probe with no locations", this->tok);
 129   else if (locations.size() > 1)
 130     throw semantic_error ("derived_probe with too many locations", this->tok);
 131   else
 132     return locations[0];
 133 }
 134
 135
 136 void
 137 derived_probe::emit_unprivileged_assertion (translator_output* o)
 138 {
 139   // Emit code which will cause compilation to fail if it is compiled in
 140   // unprivileged mode.
 141   o->newline() << "#ifndef STP_PRIVILEGED";
 142   o->newline() << "#error Internal Error: Probe ";
 143   probe::printsig (o->line());
 144   o->line()    << " generated in --unprivileged mode";
 145   o->newline() << "#endif";
 146 }
 147
 148
 149 void
 150 derived_probe::emit_process_owner_assertion (translator_output* o)
 151 {
 152   // Emit code which will abort should the current target not belong to the
 153   // user in unprivileged mode.
 154   o->newline()   << "#ifndef STP_PRIVILEGED";
 155   o->newline(1)  << "if (! is_myproc ()) {";
 156   o->newline(1)  << "snprintf(c->error_buffer, sizeof(c->error_buffer),";
 157   o->newline()   << "         \"Internal Error: Process %d does not belong to user %d in probe %s in --unprivileged mode\",";
 158   o->newline()   << "         current->tgid, _stp_uid, c->probe_point);";
 159   o->newline()   << "c->last_error = c->error_buffer;";
 160   // NB: since this check occurs before probe locking, its exit should
 161   // not be a "goto out", which would attempt unlocking.
 162   o->newline()   << "return;";
 163   o->newline(-1) << "}";
 164   o->newline(-1) << "#endif";
 165 }
 166
 167 void
 168 derived_probe::print_dupe_stamp_unprivileged(ostream& o)
 169 {
 170   o << "unprivileged users: authorized" << endl;
 171 }
 172
 173 void
 174 derived_probe::print_dupe_stamp_unprivileged_process_owner(ostream& o)
 175 {
 176   o << "unprivileged users: authorized for process owner" << endl;
 177 }
 178
 179 // ------------------------------------------------------------------------
 180 // Members of derived_probe_builder
 181
 182 void
 183 derived_probe_builder::check_unprivileged (const systemtap_session & sess,
 184                                            const literal_map_t & parameters)
 185 {
 186       // By default, probes are not allowed for unprivileged users.
 187       if (sess.unprivileged)
 188         {
 189           throw semantic_error (string("probe point is not allowed for unprivileged users"));
 190         }
 191 }
 192
 193 bool
 194 derived_probe_builder::get_param (std::map<std::string, literal*> const & params,
 195                                   const std::string& key,
 196                                   std::string& value)
 197 {
 198   map<string, literal *>::const_iterator i = params.find (key);
 199   if (i == params.end())
 200     return false;
 201   literal_string * ls = dynamic_cast<literal_string *>(i->second);
 202   if (!ls)
 203     return false;
 204   value = ls->value;
 205   return true;
 206 }
 207
 208
 209 bool
 210 derived_probe_builder::get_param (std::map<std::string, literal*> const & params,
 211                                   const std::string& key,
 212                                   int64_t& value)
 213 {
 214   map<string, literal *>::const_iterator i = params.find (key);
 215   if (i == params.end())
 216     return false;
 217   if (i->second == NULL)
 218     return false;
 219   literal_number * ln = dynamic_cast<literal_number *>(i->second);
 220   if (!ln)
 221     return false;
 222   value = ln->value;
 223   return true;
 224 }
 225
 226
 227 bool
 228 derived_probe_builder::has_null_param (std::map<std::string, literal*> const & params,
 229                                        const std::string& key)
 230 {
 231   map<string, literal *>::const_iterator i = params.find(key);
 232   return (i != params.end() && i->second == NULL);
 233 }
 234
 235
 236
 237 // ------------------------------------------------------------------------
 238 // Members of match_key.
 239
 240 match_key::match_key(string const & n)
 241   : name(n),
 242     have_parameter(false),
 243     parameter_type(pe_unknown)
 244 {
 245 }
 246
 247 match_key::match_key(probe_point::component const & c)
 248   : name(c.functor),
 249     have_parameter(c.arg != NULL),
 250     parameter_type(c.arg ? c.arg->type : pe_unknown)
 251 {
 252 }
 253
 254 match_key &
 255 match_key::with_number()
 256 {
 257   have_parameter = true;
 258   parameter_type = pe_long;
 259   return *this;
 260 }
 261
 262 match_key &
 263 match_key::with_string()
 264 {
 265   have_parameter = true;
 266   parameter_type = pe_string;
 267   return *this;
 268 }
 269
 270 string
 271 match_key::str() const
 272 {
 273   if (have_parameter)
 274     switch (parameter_type)
 275       {
 276       case pe_string: return name + "(string)";
 277       case pe_long: return name + "(number)";
 278       default: return name + "(...)";
 279       }
 280   return name;
 281 }
 282
 283 bool
 284 match_key::operator<(match_key const & other) const
 285 {
 286   return ((name < other.name)
 287
 288           || (name == other.name
 289               && have_parameter < other.have_parameter)
 290
 291           || (name == other.name
 292               && have_parameter == other.have_parameter
 293               && parameter_type < other.parameter_type));
 294 }
 295
 296 static bool
 297 isglob(string const & str)
 298 {
 299   return(str.find('*') != str.npos);
 300 }
 301
 302 bool
 303 match_key::globmatch(match_key const & other) const
 304 {
 305   const char *other_str = other.name.c_str();
 306   const char *name_str = name.c_str();
 307
 308   return ((fnmatch(name_str, other_str, FNM_NOESCAPE) == 0)
 309           && have_parameter == other.have_parameter
 310           && parameter_type == other.parameter_type);
 311 }
 312
 313 // ------------------------------------------------------------------------
 314 // Members of match_node
 315 // ------------------------------------------------------------------------
 316
 317 match_node::match_node()
 318 {
 319 }
 320
 321 match_node *
 322 match_node::bind(match_key const & k)
 323 {
 324   if (k.name == "*")
 325     throw semantic_error("invalid use of wildcard probe point component");
 326
 327   map<match_key, match_node *>::const_iterator i = sub.find(k);
 328   if (i != sub.end())
 329     return i->second;
 330   match_node * n = new match_node();
 331   sub.insert(make_pair(k, n));
 332   return n;
 333 }
 334
 335 void
 336 match_node::bind(derived_probe_builder * e)
 337 {
 338   ends.push_back (e);
 339 }
 340
 341 match_node *
 342 match_node::bind(string const & k)
 343 {
 344   return bind(match_key(k));
 345 }
 346
 347 match_node *
 348 match_node::bind_str(string const & k)
 349 {
 350   return bind(match_key(k).with_string());
 351 }
 352
 353 match_node *
 354 match_node::bind_num(string const & k)
 355 {
 356   return bind(match_key(k).with_number());
 357 }
 358
 359 void
 360 match_node::find_and_build (systemtap_session& s,
 361                             probe* p, probe_point *loc, unsigned pos,
 362                             vector<derived_probe *>& results)
 363 {
 364   assert (pos <= loc->components.size());
 365   if (pos == loc->components.size()) // matched all probe point components so far
 366     {
 367       if (ends.empty())
 368         {
 369           string alternatives;
 370           for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 371             alternatives += string(" ") + i->first.str();
 372
 373           throw semantic_error (string("probe point truncated at position ") +
 374                                 lex_cast (pos) +
 375                                 " (follow:" + alternatives + ")", loc->components.back()->tok);
 376         }
 377
 378       map<string, literal *> param_map;
 379       for (unsigned i=0; i<pos; i++)
 380         param_map[loc->components[i]->functor] = loc->components[i]->arg;
 381       // maybe 0
 382
 383       // Iterate over all bound builders
 384       for (unsigned k=0; k<ends.size(); k++)
 385         {
 386           derived_probe_builder *b = ends[k];
 387           b->check_unprivileged (s, param_map);
 388           b->build (s, p, loc, param_map, results);
 389         }
 390     }
 391   else if (isglob(loc->components[pos]->functor)) // wildcard?
 392     {
 393       match_key match (* loc->components[pos]);
 394
 395       // Call find_and_build for each possible match.  Ignore errors -
 396       // unless we don't find any match.
 397       unsigned int num_results = results.size();
 398       for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 399         {
 400           const match_key& subkey = i->first;
 401           match_node* subnode = i->second;
 402
 403           if (pending_interrupts) break;
 404
 405           if (match.globmatch(subkey))
 406             {
 407               if (s.verbose > 2)
 408                 clog << "wildcard '" << loc->components[pos]->functor
 409                      << "' matched '" << subkey.name << "'" << endl;
 410
 411               // When we have a wildcard, we need to create a copy of
 412               // the probe point.  Then we'll create a copy of the
 413               // wildcard component, and substitute the non-wildcard
 414               // functor.
 415               probe_point *non_wildcard_pp = new probe_point(*loc);
 416               probe_point::component *non_wildcard_component
 417                 = new probe_point::component(*loc->components[pos]);
 418               non_wildcard_component->functor = subkey.name;
 419               non_wildcard_pp->components[pos] = non_wildcard_component;
 420
 421               // NB: probe conditions are not attached at the wildcard
 422               // (component/functor) level, but at the overall
 423               // probe_point level.
 424
 425               // recurse (with the non-wildcard probe point)
 426               try
 427                 {
 428                   subnode->find_and_build (s, p, non_wildcard_pp, pos+1,
 429                                            results);
 430                 }
 431               catch (const semantic_error& e)
 432                 {
 433                   // Ignore semantic_errors while expanding wildcards.
 434                   // If we get done and nothing was expanded, the code
 435                   // following the loop will complain.
 436
 437                   // If this wildcard didn't match, cleanup.
 438                   delete non_wildcard_pp;
 439                   delete non_wildcard_component;
 440                 }
 441             }
 442         }
 443       if (! loc->optional && num_results == results.size())
 444         {
 445           // We didn't find any wildcard matches (since the size of
 446           // the result vector didn't change).  Throw an error.
 447           string alternatives;
 448           for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 449             alternatives += string(" ") + i->first.str();
 450
 451           throw semantic_error(string("probe point mismatch at position ") +
 452                                lex_cast (pos) +
 453                                " (alternatives:" + alternatives + ")" +
 454                                " didn't find any wildcard matches",
 455                                loc->components[pos]->tok);
 456         }
 457     }
 458   else
 459     {
 460       match_key match (* loc->components[pos]);
 461       sub_map_iterator_t i = sub.find (match);
 462       if (i == sub.end()) // no match
 463         {
 464           string alternatives;
 465           for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 466             alternatives += string(" ") + i->first.str();
 467
 468
 469           throw semantic_error (string("probe point mismatch at position ") +
 470                                 lex_cast (pos) +
 471                                 " (alternatives:" + alternatives + ")",
 472                                 loc->components[pos]->tok);
 473         }
 474
 475       match_node* subnode = i->second;
 476       // recurse
 477       subnode->find_and_build (s, p, loc, pos+1, results);
 478     }
 479 }
 480
 481
 482 void
 483 match_node::build_no_more (systemtap_session& s)
 484 {
 485   for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 486     i->second->build_no_more (s);
 487   for (unsigned k=0; k<ends.size(); k++)
 488     {
 489       derived_probe_builder *b = ends[k];
 490       b->build_no_more (s);
 491     }
 492 }
 493
 494
 495 // ------------------------------------------------------------------------
 496 // Alias probes
 497 // ------------------------------------------------------------------------
 498
 499 struct alias_derived_probe: public derived_probe
 500 {
 501   alias_derived_probe (probe* base, probe_point *l, const probe_alias *a):
 502     derived_probe (base, l), alias(a) {}
 503
 504   void upchuck () { throw semantic_error ("inappropriate", this->tok); }
 505
 506   // Alias probes are immediately expanded to other derived_probe
 507   // types, and are not themselves emitted or listed in
 508   // systemtap_session.probes
 509
 510   void join_group (systemtap_session&) { upchuck (); }
 511
 512   virtual const probe_alias *get_alias () const { return alias; }
 513
 514 private:
 515   const probe_alias *alias; // Used to check for recursion
 516 };
 517
 518
 519 struct
 520 alias_expansion_builder
 521   : public derived_probe_builder
 522 {
 523   probe_alias * alias;
 524
 525   alias_expansion_builder(probe_alias * a)
 526     : alias(a)
 527   {}
 528
 529   virtual void build(systemtap_session & sess,
 530                      probe * use,
 531                      probe_point * location,
 532                      std::map<std::string, literal *> const &,
 533                      vector<derived_probe *> & finished_results)
 534   {
 535     // Don't build the alias expansion if infinite recursion is detected.
 536     if (checkForRecursiveExpansion (use)) {
 537       stringstream msg;
 538       msg << "Recursive loop in alias expansion of " << *location  << " at " << location->components.front()->tok->location;
 539       // semantic_errors thrown here are ignored.
 540       sess.print_error (semantic_error (msg.str()));
 541       return;
 542     }
 543
 544     // We're going to build a new probe and wrap it up in an
 545     // alias_expansion_probe so that the expansion loop recognizes it as
 546     // such and re-expands its expansion.
 547
 548     alias_derived_probe * n = new alias_derived_probe (use, location /* soon overwritten */, this->alias);
 549     n->body = new block();
 550
 551     // The new probe gets a deep copy of the location list of
 552     // the alias (with incoming condition joined)
 553     n->locations.clear();
 554     for (unsigned i=0; i<alias->locations.size(); i++)
 555       {
 556         probe_point *pp = new probe_point(*alias->locations[i]);
 557         pp->condition = add_condition (pp->condition, location->condition);
 558         n->locations.push_back(pp);
 559       }
 560
 561     // the token location of the alias,
 562     n->tok = location->components.front()->tok;
 563
 564     // and statements representing the concatenation of the alias'
 565     // body with the use's.
 566     //
 567     // NB: locals are *not* copied forward, from either alias or
 568     // use. The expansion should have its locals re-inferred since
 569     // there's concatenated code here and we only want one vardecl per
 570     // resulting variable.
 571
 572     if (alias->epilogue_style)
 573       n->body = new block (use->body, alias->body);
 574     else
 575       n->body = new block (alias->body, use->body);
 576
 577     unsigned old_num_results = finished_results.size();
 578     derive_probes (sess, n, finished_results, location->optional);
 579
 580     // Check whether we resolved something. If so, put the
 581     // whole library into the queue if not already there.
 582     if (finished_results.size() > old_num_results)
 583       {
 584         stapfile *f = alias->tok->location.file;
 585         if (find (sess.files.begin(), sess.files.end(), f)
 586             == sess.files.end())
 587           sess.files.push_back (f);
 588       }
 589   }
 590
 591   bool checkForRecursiveExpansion (probe *use)
 592   {
 593     // Collect the derivation chain of this probe.
 594     vector<probe*>derivations;
 595     use->collect_derivation_chain (derivations);
 596
 597     // Check all probe points in the alias expansion against the currently-being-expanded probe point
 598     // of each of the probes in the derivation chain, looking for a match. This
 599     // indicates infinite recursion.
 600     // The first element of the derivation chain will be the derived_probe representing 'use', so
 601     // start the search with the second element.
 602     assert (derivations.size() > 0);
 603     assert (derivations[0] == use);
 604     for (unsigned d = 1; d < derivations.size(); ++d) {
 605       if (use->get_alias() == derivations[d]->get_alias())
 606         return true; // recursion detected
 607     }
 608     return false;
 609   }
 610
 611   // No action required. The actual probes will be checked when they are
 612   // built.
 613   virtual void check_unprivileged (const systemtap_session & sess,
 614                                    const literal_map_t & parameters) {}
 615 };
 616
 617
 618 // ------------------------------------------------------------------------
 619 // Pattern matching
 620 // ------------------------------------------------------------------------
 621
 622
 623 // Register all the aliases we've seen in library files, and the user
 624 // file, as patterns.
 625
 626 void
 627 systemtap_session::register_library_aliases()
 628 {
 629   vector<stapfile*> files(library_files);
 630   files.push_back(user_file);
 631
 632   for (unsigned f = 0; f < files.size(); ++f)
 633     {
 634       stapfile * file = files[f];
 635       for (unsigned a = 0; a < file->aliases.size(); ++a)
 636         {
 637           probe_alias * alias = file->aliases[a];
 638           try
 639             {
 640               for (unsigned n = 0; n < alias->alias_names.size(); ++n)
 641                 {
 642                   probe_point * name = alias->alias_names[n];
 643                   match_node * n = pattern_root;
 644                   for (unsigned c = 0; c < name->components.size(); ++c)
 645                     {
 646                       probe_point::component * comp = name->components[c];
 647                       // XXX: alias parameters
 648                       if (comp->arg)
 649                         throw semantic_error("alias component "
 650                                              + comp->functor
 651                                              + " contains illegal parameter");
 652                       n = n->bind(comp->functor);
 653                     }
 654                   n->bind(new alias_expansion_builder(alias));
 655                 }
 656             }
 657           catch (const semantic_error& e)
 658             {
 659               semantic_error* er = new semantic_error (e); // copy it
 660               stringstream msg;
 661               msg << e.msg2;
 662               msg << " while registering probe alias ";
 663               alias->printsig(msg);
 664               er->msg2 = msg.str();
 665               print_error (* er);
 666               delete er;
 667             }
 668         }
 669     }
 670 }
 671
 672
 673 static unsigned max_recursion = 100;
 674
 675 struct
 676 recursion_guard
 677 {
 678   unsigned & i;
 679   recursion_guard(unsigned & i) : i(i)
 680     {
 681       if (i > max_recursion)
 682         throw semantic_error("recursion limit reached");
 683       ++i;
 684     }
 685   ~recursion_guard()
 686     {
 687       --i;
 688     }
 689 };
 690
 691 // The match-and-expand loop.
 692 void
 693 derive_probes (systemtap_session& s,
 694                probe *p, vector<derived_probe*>& dps,
 695                bool optional)
 696 {
 697   for (unsigned i = 0; i < p->locations.size(); ++i)
 698     {
 699       if (pending_interrupts) break;
 700
 701       probe_point *loc = p->locations[i];
 702
 703       try
 704         {
 705           unsigned num_atbegin = dps.size();
 706
 707           // Pass down optional flag from e.g. alias reference to each
 708           // probe_point instance.  We do this by temporarily overriding
 709           // the probe_point optional flag.  We could instead deep-copy
 710           // and set a flag on the copy permanently.
 711           bool old_loc_opt = loc->optional;
 712           loc->optional = loc->optional || optional;
 713           try
 714             {
 715               s.pattern_root->find_and_build (s, p, loc, 0, dps); // <-- actual derivation!
 716             }
 717           catch (const semantic_error& e)
 718             {
 719               if (!loc->optional)
 720                 throw semantic_error(e);
 721               else /* tolerate failure for optional probe */
 722                 continue;
 723             }
 724
 725           loc->optional = old_loc_opt;
 726           unsigned num_atend = dps.size();
 727
 728           if (! (loc->optional||optional) && // something required, but
 729               num_atbegin == num_atend) // nothing new derived!
 730             throw semantic_error ("no match");
 731
 732           if (loc->sufficient && (num_atend > num_atbegin))
 733             {
 734               if (s.verbose > 1)
 735                 {
 736                   clog << "Probe point ";
 737                   p->locations[i]->print(clog);
 738                   clog << " sufficient, skipped";
 739                   for (unsigned j = i+1; j < p->locations.size(); ++j)
 740                     {
 741                       clog << " ";
 742                       p->locations[j]->print(clog);
 743                     }
 744                   clog << endl;
 745                 }
 746               break; // we need not try to derive for any other locations
 747             }
 748         }
 749       catch (const semantic_error& e)
 750         {
 751           // XXX: prefer not to print_error at every nest/unroll level
 752
 753           semantic_error* er = new semantic_error (e); // copy it
 754           stringstream msg;
 755           msg << e.msg2;
 756           msg << " while resolving probe point " << *loc;
 757           er->msg2 = msg.str();
 758           s.print_error (* er);
 759           delete er;
 760         }
 761
 762     }
 763 }
 764
 765
 766
 767 // ------------------------------------------------------------------------
 768 //
 769 // Indexable usage checks
 770 //
 771
 772 struct symbol_fetcher
 773   : public throwing_visitor
 774 {
 775   symbol *&sym;
 776
 777   symbol_fetcher (symbol *&sym): sym(sym)
 778   {}
 779
 780   void visit_symbol (symbol* e)
 781   {
 782     sym = e;
 783   }
 784
 785   void visit_target_symbol (target_symbol* e)
 786   {
 787     sym = e;
 788   }
 789
 790   void visit_arrayindex (arrayindex* e)
 791   {
 792     e->base->visit_indexable (this);
 793   }
 794
 795   void visit_cast_op (cast_op* e)
 796   {
 797     sym = e;
 798   }
 799
 800   void throwone (const token* t)
 801   {
 802     throw semantic_error ("Expecting symbol or array index expression", t);
 803   }
 804 };
 805
 806 symbol *
 807 get_symbol_within_expression (expression *e)
 808 {
 809   symbol *sym = NULL;
 810   symbol_fetcher fetcher(sym);
 811   e->visit (&fetcher);
 812   return sym; // NB: may be null!
 813 }
 814
 815 static symbol *
 816 get_symbol_within_indexable (indexable *ix)
 817 {
 818   symbol *array = NULL;
 819   hist_op *hist = NULL;
 820   classify_indexable(ix, array, hist);
 821   if (array)
 822     return array;
 823   else
 824     return get_symbol_within_expression (hist->stat);
 825 }
 826
 827 struct mutated_var_collector
 828   : public traversing_visitor
 829 {
 830   set<vardecl *> * mutated_vars;
 831
 832   mutated_var_collector (set<vardecl *> * mm)
 833     : mutated_vars (mm)
 834   {}
 835
 836   void visit_assignment(assignment* e)
 837   {
 838     if (e->type == pe_stats && e->op == "<<<")
 839       {
 840         vardecl *vd = get_symbol_within_expression (e->left)->referent;
 841         if (vd)
 842           mutated_vars->insert (vd);
 843       }
 844     traversing_visitor::visit_assignment(e);
 845   }
 846
 847   void visit_arrayindex (arrayindex *e)
 848   {
 849     if (is_active_lvalue (e))
 850       {
 851         symbol *sym;
 852         if (e->base->is_symbol (sym))
 853           mutated_vars->insert (sym->referent);
 854         else
 855           throw semantic_error("Assignment to read-only histogram bucket", e->tok);
 856       }
 857     traversing_visitor::visit_arrayindex (e);
 858   }
 859 };
 860
 861
 862 struct no_var_mutation_during_iteration_check
 863   : public traversing_visitor
 864 {
 865   systemtap_session & session;
 866   map<functiondecl *,set<vardecl *> *> & function_mutates_vars;
 867   vector<vardecl *> vars_being_iterated;
 868
 869   no_var_mutation_during_iteration_check
 870   (systemtap_session & sess,
 871    map<functiondecl *,set<vardecl *> *> & fmv)
 872     : session(sess), function_mutates_vars (fmv)
 873   {}
 874
 875   void visit_arrayindex (arrayindex *e)
 876   {
 877     if (is_active_lvalue(e))
 878       {
 879         vardecl *vd = get_symbol_within_indexable (e->base)->referent;
 880         if (vd)
 881           {
 882             for (unsigned i = 0; i < vars_being_iterated.size(); ++i)
 883               {
 884                 vardecl *v = vars_being_iterated[i];
 885                 if (v == vd)
 886                   {
 887                     string err = ("variable '" + v->name +
 888                                   "' modified during 'foreach' iteration");
 889                     session.print_error (semantic_error (err, e->tok));
 890                   }
 891               }
 892           }
 893       }
 894     traversing_visitor::visit_arrayindex (e);
 895   }
 896
 897   void visit_functioncall (functioncall* e)
 898   {
 899     map<functiondecl *,set<vardecl *> *>::const_iterator i
 900       = function_mutates_vars.find (e->referent);
 901
 902     if (i != function_mutates_vars.end())
 903       {
 904         for (unsigned j = 0; j < vars_being_iterated.size(); ++j)
 905           {
 906             vardecl *m = vars_being_iterated[j];
 907             if (i->second->find (m) != i->second->end())
 908               {
 909                 string err = ("function call modifies var '" + m->name +
 910                               "' during 'foreach' iteration");
 911                 session.print_error (semantic_error (err, e->tok));
 912               }
 913           }
 914       }
 915
 916     traversing_visitor::visit_functioncall (e);
 917   }
 918
 919   void visit_foreach_loop(foreach_loop* s)
 920   {
 921     vardecl *vd = get_symbol_within_indexable (s->base)->referent;
 922
 923     if (vd)
 924       vars_being_iterated.push_back (vd);
 925
 926     traversing_visitor::visit_foreach_loop (s);
 927
 928     if (vd)
 929       vars_being_iterated.pop_back();
 930   }
 931 };
 932
 933
 934 // ------------------------------------------------------------------------
 935
 936 struct stat_decl_collector
 937   : public traversing_visitor
 938 {
 939   systemtap_session & session;
 940
 941   stat_decl_collector(systemtap_session & sess)
 942     : session(sess)
 943   {}
 944
 945   void visit_stat_op (stat_op* e)
 946   {
 947     symbol *sym = get_symbol_within_expression (e->stat);
 948     if (session.stat_decls.find(sym->name) == session.stat_decls.end())
 949       session.stat_decls[sym->name] = statistic_decl();
 950   }
 951
 952   void visit_assignment (assignment* e)
 953   {
 954     if (e->op == "<<<")
 955       {
 956         symbol *sym = get_symbol_within_expression (e->left);
 957         if (session.stat_decls.find(sym->name) == session.stat_decls.end())
 958           session.stat_decls[sym->name] = statistic_decl();
 959       }
 960     else
 961       traversing_visitor::visit_assignment(e);
 962   }
 963
 964   void visit_hist_op (hist_op* e)
 965   {
 966     symbol *sym = get_symbol_within_expression (e->stat);
 967     statistic_decl new_stat;
 968
 969     if (e->htype == hist_linear)
 970       {
 971         new_stat.type = statistic_decl::linear;
 972         assert (e->params.size() == 3);
 973         new_stat.linear_low = e->params[0];
 974         new_stat.linear_high = e->params[1];
 975         new_stat.linear_step = e->params[2];
 976       }
 977     else
 978       {
 979         assert (e->htype == hist_log);
 980         new_stat.type = statistic_decl::logarithmic;
 981         assert (e->params.size() == 0);
 982       }
 983
 984     map<string, statistic_decl>::iterator i = session.stat_decls.find(sym->name);
 985     if (i == session.stat_decls.end())
 986       session.stat_decls[sym->name] = new_stat;
 987     else
 988       {
 989         statistic_decl & old_stat = i->second;
 990         if (!(old_stat == new_stat))
 991           {
 992             if (old_stat.type == statistic_decl::none)
 993               i->second = new_stat;
 994             else
 995               {
 996                 // FIXME: Support multiple co-declared histogram types
 997                 semantic_error se("multiple histogram types declared on '" + sym->name + "'",
 998                                   e->tok);
 999                 session.print_error (se);
1000               }
1001           }
1002       }
1003   }
1004
1005 };
1006
1007 static int
1008 semantic_pass_stats (systemtap_session & sess)
1009 {
1010   stat_decl_collector sdc(sess);
1011
1012   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1013     it->second->body->visit (&sdc);
1014
1015   for (unsigned i = 0; i < sess.probes.size(); ++i)
1016     sess.probes[i]->body->visit (&sdc);
1017
1018   for (unsigned i = 0; i < sess.globals.size(); ++i)
1019     {
1020       vardecl *v = sess.globals[i];
1021       if (v->type == pe_stats)
1022         {
1023
1024           if (sess.stat_decls.find(v->name) == sess.stat_decls.end())
1025             {
1026               semantic_error se("unable to infer statistic parameters for global '" + v->name + "'");
1027               sess.print_error (se);
1028             }
1029         }
1030     }
1031
1032   return sess.num_errors();
1033 }
1034
1035 // ------------------------------------------------------------------------
1036
1037 // Enforce variable-related invariants: no modification of
1038 // a foreach()-iterated array.
1039 static int
1040 semantic_pass_vars (systemtap_session & sess)
1041 {
1042
1043   map<functiondecl *, set<vardecl *> *> fmv;
1044   no_var_mutation_during_iteration_check chk(sess, fmv);
1045
1046   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1047     {
1048       functiondecl * fn = it->second;
1049       if (fn->body)
1050         {
1051           set<vardecl *> * m = new set<vardecl *>();
1052           mutated_var_collector mc (m);
1053           fn->body->visit (&mc);
1054           fmv[fn] = m;
1055         }
1056     }
1057
1058   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1059     {
1060       functiondecl * fn = it->second;
1061       if (fn->body) fn->body->visit (&chk);
1062     }
1063
1064   for (unsigned i = 0; i < sess.probes.size(); ++i)
1065     {
1066       if (sess.probes[i]->body)
1067         sess.probes[i]->body->visit (&chk);
1068     }
1069
1070   return sess.num_errors();
1071 }
1072
1073
1074 // ------------------------------------------------------------------------
1075
1076 // Rewrite probe condition expressions into probe bodies.  Tricky and
1077 // exciting business, this.  This:
1078 //
1079 // probe foo if (g1 || g2) { ... }
1080 // probe bar { ... g1 ++ ... }
1081 //
1082 // becomes:
1083 //
1084 // probe begin(MAX) { if (! (g1 || g2)) %{ disable_probe_foo %} }
1085 // probe foo { if (! (g1 || g2)) next; ... }
1086 // probe bar { ... g1 ++ ...;
1087 //             if (g1 || g2) %{ enable_probe_foo %} else %{ disable_probe_foo %}
1088 //           }
1089 //
1090 // XXX: As a first cut, do only the "inline probe condition" part of the
1091 // transform.
1092
1093 static int
1094 semantic_pass_conditions (systemtap_session & sess)
1095 {
1096   for (unsigned i = 0; i < sess.probes.size(); ++i)
1097     {
1098       derived_probe* p = sess.probes[i];
1099       expression* e = p->sole_location()->condition;
1100       if (e)
1101         {
1102           varuse_collecting_visitor vut(sess);
1103           e->visit (& vut);
1104
1105           if (! vut.written.empty())
1106             {
1107               string err = ("probe condition must not modify any variables");
1108               sess.print_error (semantic_error (err, e->tok));
1109             }
1110           else if (vut.embedded_seen)
1111             {
1112               sess.print_error (semantic_error ("probe condition must not include impure embedded-C", e->tok));
1113             }
1114
1115           // Add the condition expression to the front of the
1116           // derived_probe body.
1117           if_statement *ifs = new if_statement ();
1118           ifs->tok = e->tok;
1119           ifs->thenblock = new next_statement ();
1120           ifs->thenblock->tok = e->tok;
1121           ifs->elseblock = NULL;
1122           unary_expression *notex = new unary_expression ();
1123           notex->op = "!";
1124           notex->tok = e->tok;
1125           notex->operand = e;
1126           ifs->condition = notex;
1127           p->body = new block (ifs, p->body);
1128         }
1129     }
1130
1131   return sess.num_errors();
1132 }
1133
1134
1135 // ------------------------------------------------------------------------
1136
1137
1138 static int semantic_pass_symbols (systemtap_session&);
1139 static int semantic_pass_optimize1 (systemtap_session&);
1140 static int semantic_pass_optimize2 (systemtap_session&);
1141 static int semantic_pass_types (systemtap_session&);
1142 static int semantic_pass_vars (systemtap_session&);
1143 static int semantic_pass_stats (systemtap_session&);
1144 static int semantic_pass_conditions (systemtap_session&);
1145
1146
1147 // Link up symbols to their declarations.  Set the session's
1148 // files/probes/functions/globals vectors from the transitively
1149 // reached set of stapfiles in s.library_files, starting from
1150 // s.user_file.  Perform automatic tapset inclusion and probe
1151 // alias expansion.
1152 static int
1153 semantic_pass_symbols (systemtap_session& s)
1154 {
1155   symresolution_info sym (s);
1156
1157   // NB: s.files can grow during this iteration, so size() can
1158   // return gradually increasing numbers.
1159   s.files.push_back (s.user_file);
1160   for (unsigned i = 0; i < s.files.size(); i++)
1161     {
1162       if (pending_interrupts) break;
1163       stapfile* dome = s.files[i];
1164
1165       // Pass 1: add globals and functions to systemtap-session master list,
1166       //         so the find_* functions find them
1167
1168       for (unsigned i=0; i<dome->globals.size(); i++)
1169         s.globals.push_back (dome->globals[i]);
1170
1171       for (unsigned i=0; i<dome->functions.size(); i++)
1172         s.functions[dome->functions[i]->name] = dome->functions[i];
1173
1174       for (unsigned i=0; i<dome->embeds.size(); i++)
1175         s.embeds.push_back (dome->embeds[i]);
1176
1177       // Pass 2: process functions
1178
1179       for (unsigned i=0; i<dome->functions.size(); i++)
1180         {
1181           if (pending_interrupts) break;
1182           functiondecl* fd = dome->functions[i];
1183
1184           try
1185             {
1186               for (unsigned j=0; j<s.code_filters.size(); j++)
1187                 s.code_filters[j]->replace (fd->body);
1188
1189               sym.current_function = fd;
1190               sym.current_probe = 0;
1191               fd->body->visit (& sym);
1192             }
1193           catch (const semantic_error& e)
1194             {
1195               s.print_error (e);
1196             }
1197         }
1198
1199       // Pass 3: derive probes and resolve any further symbols in the
1200       // derived results.
1201
1202       for (unsigned i=0; i<dome->probes.size(); i++)
1203         {
1204           if (pending_interrupts) break;
1205           probe* p = dome->probes [i];
1206           vector<derived_probe*> dps;
1207
1208           // much magic happens here: probe alias expansion, wildcard
1209           // matching, low-level derived_probe construction.
1210           derive_probes (s, p, dps);
1211
1212           for (unsigned j=0; j<dps.size(); j++)
1213             {
1214               if (pending_interrupts) break;
1215               derived_probe* dp = dps[j];
1216               s.probes.push_back (dp);
1217               dp->join_group (s);
1218
1219               try
1220                 {
1221                   for (unsigned k=0; k<s.code_filters.size(); k++)
1222                     s.code_filters[k]->replace (dp->body);
1223
1224                   sym.current_function = 0;
1225                   sym.current_probe = dp;
1226                   dp->body->visit (& sym);
1227
1228                   // Process the probe-point condition expression.
1229                   sym.current_function = 0;
1230                   sym.current_probe = 0;
1231                   if (dp->sole_location()->condition)
1232                     dp->sole_location()->condition->visit (& sym);
1233                 }
1234               catch (const semantic_error& e)
1235                 {
1236                   s.print_error (e);
1237                 }
1238             }
1239         }
1240     }
1241
1242   // Inform all derived_probe builders that we're done with
1243   // all resolution, so it's time to release caches.
1244   s.pattern_root->build_no_more (s);
1245
1246   return s.num_errors(); // all those print_error calls
1247 }
1248
1249
1250 // Keep unread global variables for probe end value display.
1251 void add_global_var_display (systemtap_session& s)
1252 {
1253   // Don't generate synthetic end probes when in listings mode;
1254   // it would clutter up the list of probe points with "end ...".
1255   if (s.listing_mode) return;
1256
1257   varuse_collecting_visitor vut(s);
1258   for (unsigned i=0; i<s.probes.size(); i++)
1259     {
1260       s.probes[i]->body->visit (& vut);
1261
1262       if (s.probes[i]->sole_location()->condition)
1263         s.probes[i]->sole_location()->condition->visit (& vut);
1264     }
1265
1266   for (unsigned g=0; g < s.globals.size(); g++)
1267     {
1268       vardecl* l = s.globals[g];
1269       if (vut.read.find (l) != vut.read.end()
1270           || vut.written.find (l) == vut.written.end())
1271         continue;
1272
1273       // Don't generate synthetic end probes for unread globals
1274       // declared only within tapsets. (RHBZ 468139), but rather
1275       // only within the end-user script.
1276
1277       bool tapset_global = false;
1278       for (size_t m=0; m < s.library_files.size(); m++)
1279         {
1280           for (size_t n=0; n < s.library_files[m]->globals.size(); n++)
1281             {
1282               if (l->name == s.library_files[m]->globals[n]->name)
1283                 {tapset_global = true; break;}
1284             }
1285         }
1286       if (tapset_global)
1287         continue;
1288
1289       probe_point::component* c = new probe_point::component("end");
1290       probe_point* pl = new probe_point;
1291       pl->components.push_back (c);
1292
1293       vector<derived_probe*> dps;
1294       block *b = new block;
1295
1296       probe* p = new probe;
1297       p->tok = l->tok;
1298       p->locations.push_back (pl);
1299
1300       // Create a symbol
1301       symbol* g_sym = new symbol;
1302       g_sym->name = l->name;
1303       g_sym->tok = l->tok;
1304       g_sym->type = l->type;
1305       g_sym->referent = l;
1306
1307       token* print_tok = new token;
1308       print_tok->type = tok_identifier;
1309       print_tok->content = "printf";
1310
1311       print_format* pf = print_format::create(print_tok);
1312       pf->raw_components += l->name;
1313
1314       if (l->index_types.size() == 0) // Scalar
1315         {
1316           if (l->type == pe_stats)
1317             pf->raw_components += " @count=%#x @min=%#x @max=%#x @sum=%#x @avg=%#x\\n";
1318           else if (l->type == pe_string)
1319             pf->raw_components += "=\"%#s\"\\n";
1320           else
1321             pf->raw_components += "=%#x\\n";
1322           pf->components = print_format::string_to_components(pf->raw_components);
1323           expr_statement* feb = new expr_statement;
1324           feb->value = pf;
1325           feb->tok = print_tok;
1326           if (l->type == pe_stats)
1327             {
1328               struct stat_op* so [5];
1329               const stat_component_type stypes[] = {sc_count, sc_min, sc_max, sc_sum, sc_average};
1330
1331               for (unsigned si = 0;
1332                    si < (sizeof(so)/sizeof(struct stat_op*));
1333                    si++)
1334                 {
1335                   so[si]= new stat_op;
1336                   so[si]->ctype = stypes[si];
1337                   so[si]->type = pe_long;
1338                   so[si]->stat = g_sym;
1339                   so[si]->tok = l->tok;
1340                   pf->args.push_back(so[si]);
1341                 }
1342             }
1343           else
1344             pf->args.push_back(g_sym);
1345
1346           /* PR7053: Checking empty aggregate for global variable */
1347           if (l->type == pe_stats) {
1348               stat_op *so= new stat_op;
1349               so->ctype = sc_count;
1350               so->type = pe_long;
1351               so->stat = g_sym;
1352               so->tok = l->tok;
1353               comparison *be = new comparison;
1354               be->op = ">";
1355               be->tok = l->tok;
1356               be->left = so;
1357               be->right = new literal_number(0);
1358
1359               /* Create printf @count=0x0 in else block */
1360               print_format* pf_0 = print_format::create(print_tok);
1361               pf_0->raw_components += l->name;
1362               pf_0->raw_components += " @count=0x0\\n";
1363               pf_0->components = print_format::string_to_components(pf_0->raw_components);
1364               expr_statement* feb_else = new expr_statement;
1365               feb_else->value = pf_0;
1366               feb_else->tok = print_tok;
1367               if_statement *ifs = new if_statement;
1368               ifs->tok = l->tok;
1369               ifs->condition = be;
1370               ifs->thenblock = feb ;
1371               ifs->elseblock = feb_else;
1372               b->statements.push_back(ifs);
1373             }
1374           else /* other non-stat cases */
1375             b->statements.push_back(feb);
1376         }
1377       else                      // Array
1378         {
1379           int idx_count = l->index_types.size();
1380           symbol* idx_sym[idx_count];
1381           vardecl* idx_v[idx_count];
1382           // Create a foreach loop
1383           foreach_loop* fe = new foreach_loop;
1384           fe->sort_direction = -1; // imply decreasing sort on value
1385           fe->sort_column = 0;     // as in   foreach ([a,b,c] in array-) { }
1386           fe->limit = NULL;
1387
1388           // Create indices for the foreach loop
1389           for (int i=0; i < idx_count; i++)
1390             {
1391               char *idx_name;
1392               if (asprintf (&idx_name, "idx%d", i) < 0)
1393                 return;
1394               idx_sym[i] = new symbol;
1395               idx_sym[i]->name = idx_name;
1396               idx_sym[i]->tok = l->tok;
1397               idx_v[i] = new vardecl;
1398               idx_v[i]->name = idx_name;
1399               idx_v[i]->type = l->index_types[i];
1400               idx_v[i]->tok = l->tok;
1401               idx_sym[i]->referent = idx_v[i];
1402               fe->indexes.push_back (idx_sym[i]);
1403             }
1404
1405           // Create a printf for the foreach loop
1406           pf->raw_components += "[";
1407           for (int i=0; i < idx_count; i++)
1408             {
1409               if (i > 0)
1410                 pf->raw_components += ",";
1411               if (l->index_types[i] == pe_string)
1412                 pf->raw_components += "\"%#s\"";
1413               else
1414                 pf->raw_components += "%#d";
1415             }
1416           pf->raw_components += "]";
1417           if (l->type == pe_stats)
1418             pf->raw_components += " @count=%#x @min=%#x @max=%#x @sum=%#x @avg=%#x\\n";
1419           else if (l->type == pe_string)
1420             pf->raw_components += "=\"%#s\"\\n";
1421           else
1422             pf->raw_components += "=%#x\\n";
1423
1424           // Create an index for the array
1425           struct arrayindex* ai = new arrayindex;
1426           ai->tok = l->tok;
1427           ai->base = g_sym;
1428
1429           for (int i=0; i < idx_count; i++)
1430             {
1431               ai->indexes.push_back (idx_sym[i]);
1432               pf->args.push_back(idx_sym[i]);
1433             }
1434           if (l->type == pe_stats)
1435             {
1436               struct stat_op* so [5];
1437               const stat_component_type stypes[] = {sc_count, sc_min, sc_max, sc_sum, sc_average};
1438
1439               ai->type = pe_stats;
1440               for (unsigned si = 0;
1441                    si < (sizeof(so)/sizeof(struct stat_op*));
1442                    si++)
1443                 {
1444                   so[si]= new stat_op;
1445                   so[si]->ctype = stypes[si];
1446                   so[si]->type = pe_long;
1447                   so[si]->stat = ai;
1448                   so[si]->tok = l->tok;
1449                   pf->args.push_back(so[si]);
1450                 }
1451             }
1452           else
1453             pf->args.push_back(ai);
1454
1455           pf->components = print_format::string_to_components(pf->raw_components);
1456           expr_statement* feb = new expr_statement;
1457           feb->value = pf;
1458           fe->base = g_sym;
1459           fe->block = (statement*)feb;
1460           b->statements.push_back(fe);
1461         }
1462
1463       // Add created probe
1464       p->body = b;
1465       derive_probes (s, p, dps);
1466       for (unsigned i = 0; i < dps.size(); i++)
1467         {
1468           derived_probe* dp = dps[i];
1469           s.probes.push_back (dp);
1470           dp->join_group (s);
1471         }
1472       // Repopulate symbol and type info
1473       symresolution_info sym (s);
1474       sym.current_function = 0;
1475       sym.current_probe = dps[0];
1476       dps[0]->body->visit (& sym);
1477
1478       semantic_pass_types(s);
1479       // Mark that variable is read
1480       vut.read.insert (l);
1481     }
1482 }
1483
1484 int
1485 semantic_pass (systemtap_session& s)
1486 {
1487   int rc = 0;
1488
1489   try
1490     {
1491       s.register_library_aliases();
1492       register_standard_tapsets(s);
1493
1494       if (rc == 0) rc = semantic_pass_symbols (s);
1495       if (rc == 0) rc = semantic_pass_conditions (s);
1496       if (rc == 0 && ! s.unoptimized) rc = semantic_pass_optimize1 (s);
1497       if (rc == 0) rc = semantic_pass_types (s);
1498       if (rc == 0) add_global_var_display (s);
1499       if (rc == 0 && ! s.unoptimized) rc = semantic_pass_optimize2 (s);
1500       if (rc == 0) rc = semantic_pass_vars (s);
1501       if (rc == 0) rc = semantic_pass_stats (s);
1502
1503       if (s.num_errors() == 0 && s.probes.size() == 0 && !s.listing_mode)
1504         throw semantic_error ("no probes found");
1505     }
1506   catch (const semantic_error& e)
1507     {
1508       s.print_error (e);
1509       rc ++;
1510     }
1511
1512   return rc;
1513 }
1514
1515
1516 // ------------------------------------------------------------------------
1517
1518
1519 systemtap_session::systemtap_session ():
1520   // NB: pointer members must be manually initialized!
1521   pattern_root(new match_node),
1522   user_file (0),
1523   be_derived_probes(0),
1524   dwarf_derived_probes(0),
1525   kprobe_derived_probes(0),
1526   hwbkpt_derived_probes(0),
1527   uprobe_derived_probes(0),
1528   utrace_derived_probes(0),
1529   itrace_derived_probes(0),
1530   task_finder_derived_probes(0),
1531   timer_derived_probes(0),
1532   profile_derived_probes(0),
1533   mark_derived_probes(0),
1534   tracepoint_derived_probes(0),
1535   hrtimer_derived_probes(0),
1536   perfmon_derived_probes(0),
1537   procfs_derived_probes(0),
1538   op (0), up (0),
1539   sym_kprobes_text_start (0),
1540   sym_kprobes_text_end (0),
1541   sym_stext (0),
1542   module_cache (0),
1543   last_token (0)
1544 {
1545 }
1546
1547
1548 // Print this given token, but abbreviate it if the last one had the
1549 // same file name.
1550 void
1551 systemtap_session::print_token (ostream& o, const token* tok)
1552 {
1553   assert (tok);
1554
1555   if (last_token && last_token->location.file == tok->location.file)
1556     {
1557       stringstream tmpo;
1558       tmpo << *tok;
1559       string ts = tmpo.str();
1560       // search & replace the file name with nothing
1561       size_t idx = ts.find (tok->location.file->name);
1562       if (idx != string::npos)
1563           ts.replace (idx, tok->location.file->name.size(), "");
1564
1565       o << ts;
1566     }
1567   else
1568     o << *tok;
1569
1570   last_token = tok;
1571 }
1572
1573
1574
1575 void
1576 systemtap_session::print_error (const semantic_error& e)
1577 {
1578   string message_str[2];
1579   string align_semantic_error ("        ");
1580
1581   // We generate two messages.  The second one ([1]) is printed
1582   // without token compression, for purposes of duplicate elimination.
1583   // This way, the same message that may be generated once with a
1584   // compressed and once with an uncompressed token still only gets
1585   // printed once.
1586   for (int i=0; i<2; i++)
1587     {
1588       stringstream message;
1589
1590       message << "semantic error: " << e.what ();
1591       if (e.tok1 || e.tok2)
1592         message << ": ";
1593       if (e.tok1)
1594         {
1595           if (i == 0) print_token (message, e.tok1);
1596           else message << *e.tok1;
1597         }
1598       message << e.msg2;
1599       if (e.tok2)
1600         {
1601           if (i == 0) print_token (message, e.tok2);
1602           else message << *e.tok2;
1603         }
1604       message << endl;
1605       message_str[i] = message.str();
1606     }
1607
1608   // Duplicate elimination
1609   if (seen_errors.find (message_str[1]) == seen_errors.end())
1610     {
1611       seen_errors.insert (message_str[1]);
1612       cerr << message_str[0];
1613
1614       if (e.tok1)
1615         print_error_source (cerr, align_semantic_error, e.tok1);
1616
1617       if (e.tok2)
1618         print_error_source (cerr, align_semantic_error, e.tok2);
1619     }
1620
1621   if (e.chain)
1622     print_error (* e.chain);
1623 }
1624
1625 void
1626 systemtap_session::print_error_source (std::ostream& message,
1627                                        std::string& align, const token* tok)
1628 {
1629   unsigned i = 0;
1630
1631   assert (tok);
1632   if (!tok->location.file)
1633     //No source to print, silently exit
1634     return;
1635
1636   unsigned line = tok->location.line;
1637   unsigned col = tok->location.column;
1638   const string &file_contents = tok->location.file->file_contents;
1639
1640   size_t start_pos = 0, end_pos = 0;
1641   //Navigate to the appropriate line
1642   while (i != line && end_pos != std::string::npos)
1643     {
1644       start_pos = end_pos;
1645       end_pos = file_contents.find ('\n', start_pos) + 1;
1646       i++;
1647     }
1648   message << align << "source: " << file_contents.substr (start_pos, end_pos-start_pos-1) << endl;
1649   message << align << "        ";
1650   //Navigate to the appropriate column
1651   for (i=start_pos; i<start_pos+col-1; i++)
1652     {
1653       if(isspace(file_contents[i]))
1654         message << file_contents[i];
1655       else
1656         message << ' ';
1657     }
1658   message << "^" << endl;
1659 }
1660
1661 void
1662 systemtap_session::print_warning (const string& message_str, const token* tok)
1663 {
1664   // Duplicate elimination
1665   string align_warning (" ");
1666   if (seen_warnings.find (message_str) == seen_warnings.end())
1667     {
1668       seen_warnings.insert (message_str);
1669       clog << "WARNING: " << message_str;
1670       if (tok) { clog << ": "; print_token (clog, tok); }
1671       clog << endl;
1672       if (tok) { print_error_source (clog, align_warning, tok); }
1673     }
1674 }
1675
1676
1677 // ------------------------------------------------------------------------
1678 // semantic processing: symbol resolution
1679
1680
1681 symresolution_info::symresolution_info (systemtap_session& s):
1682   session (s), current_function (0), current_probe (0)
1683 {
1684 }
1685
1686
1687 void
1688 symresolution_info::visit_block (block* e)
1689 {
1690   for (unsigned i=0; i<e->statements.size(); i++)
1691     {
1692       try
1693         {
1694           e->statements[i]->visit (this);
1695         }
1696       catch (const semantic_error& e)
1697         {
1698           session.print_error (e);
1699         }
1700     }
1701 }
1702
1703
1704 void
1705 symresolution_info::visit_foreach_loop (foreach_loop* e)
1706 {
1707   for (unsigned i=0; i<e->indexes.size(); i++)
1708     e->indexes[i]->visit (this);
1709
1710   symbol *array = NULL;
1711   hist_op *hist = NULL;
1712   classify_indexable (e->base, array, hist);
1713
1714   if (array)
1715     {
1716       if (!array->referent)
1717         {
1718           vardecl* d = find_var (array->name, e->indexes.size (), array->tok);
1719           if (d)
1720             array->referent = d;
1721           else
1722             {
1723               stringstream msg;
1724               msg << "unresolved arity-" << e->indexes.size()
1725                   << " global array " << array->name;
1726               throw semantic_error (msg.str(), e->tok);
1727             }
1728         }
1729     }
1730   else
1731     {
1732       assert (hist);
1733       hist->visit (this);
1734     }
1735
1736   if (e->limit)
1737     e->limit->visit (this);
1738
1739   e->block->visit (this);
1740 }
1741
1742
1743 struct
1744 delete_statement_symresolution_info:
1745   public traversing_visitor
1746 {
1747   symresolution_info *parent;
1748
1749   delete_statement_symresolution_info (symresolution_info *p):
1750     parent(p)
1751   {}
1752
1753   void visit_arrayindex (arrayindex* e)
1754   {
1755     parent->visit_arrayindex (e);
1756   }
1757   void visit_functioncall (functioncall* e)
1758   {
1759     parent->visit_functioncall (e);
1760   }
1761
1762   void visit_symbol (symbol* e)
1763   {
1764     if (e->referent)
1765       return;
1766
1767     vardecl* d = parent->find_var (e->name, -1, e->tok);
1768     if (d)
1769       e->referent = d;
1770     else
1771       throw semantic_error ("unresolved array in delete statement", e->tok);
1772   }
1773 };
1774
1775 void
1776 symresolution_info::visit_delete_statement (delete_statement* s)
1777 {
1778   delete_statement_symresolution_info di (this);
1779   s->value->visit (&di);
1780 }
1781
1782
1783 void
1784 symresolution_info::visit_symbol (symbol* e)
1785 {
1786   if (e->referent)
1787     return;
1788
1789   vardecl* d = find_var (e->name, 0, e->tok);
1790   if (d)
1791     e->referent = d;
1792   else
1793     {
1794       // new local
1795       vardecl* v = new vardecl;
1796       v->name = e->name;
1797       v->tok = e->tok;
1798       if (current_function)
1799         current_function->locals.push_back (v);
1800       else if (current_probe)
1801         current_probe->locals.push_back (v);
1802       else
1803         // must be probe-condition expression
1804         throw semantic_error ("probe condition must not reference undeclared global", e->tok);
1805       e->referent = v;
1806     }
1807 }
1808
1809
1810 void
1811 symresolution_info::visit_arrayindex (arrayindex* e)
1812 {
1813   for (unsigned i=0; i<e->indexes.size(); i++)
1814     e->indexes[i]->visit (this);
1815
1816   symbol *array = NULL;
1817   hist_op *hist = NULL;
1818   classify_indexable(e->base, array, hist);
1819
1820   if (array)
1821     {
1822       if (array->referent)
1823         return;
1824
1825       vardecl* d = find_var (array->name, e->indexes.size (), array->tok);
1826       if (d)
1827         array->referent = d;
1828       else
1829         {
1830           // new local
1831           vardecl* v = new vardecl;
1832           v->set_arity(e->indexes.size());
1833           v->name = array->name;
1834           v->tok = array->tok;
1835           if (current_function)
1836             current_function->locals.push_back (v);
1837           else if (current_probe)
1838             current_probe->locals.push_back (v);
1839           else
1840             // must not happen
1841             throw semantic_error ("no current probe/function", e->tok);
1842           array->referent = v;
1843         }
1844     }
1845   else
1846     {
1847       assert (hist);
1848       hist->visit (this);
1849     }
1850 }
1851
1852
1853 void
1854 symresolution_info::visit_functioncall (functioncall* e)
1855 {
1856   // XXX: we could relax this, if we're going to examine the
1857   // vartracking data recursively.  See testsuite/semko/fortytwo.stp.
1858   if (! (current_function || current_probe))
1859     {
1860       // must be probe-condition expression
1861       throw semantic_error ("probe condition must not reference function", e->tok);
1862     }
1863
1864   for (unsigned i=0; i<e->args.size(); i++)
1865     e->args[i]->visit (this);
1866
1867   if (e->referent)
1868     return;
1869
1870   functiondecl* d = find_function (e->function, e->args.size ());
1871   if (d)
1872     e->referent = d;
1873   else
1874     {
1875       stringstream msg;
1876       msg << "unresolved arity-" << e->args.size()
1877           << " function";
1878       throw semantic_error (msg.str(), e->tok);
1879     }
1880 }
1881
1882
1883 vardecl*
1884 symresolution_info::find_var (const string& name, int arity, const token* tok)
1885 {
1886   if (current_function || current_probe)
1887     {
1888       // search locals
1889       vector<vardecl*>& locals = (current_function ?
1890                                   current_function->locals :
1891                                   current_probe->locals);
1892
1893
1894       for (unsigned i=0; i<locals.size(); i++)
1895         if (locals[i]->name == name
1896             && locals[i]->compatible_arity(arity))
1897           {
1898             locals[i]->set_arity (arity);
1899             return locals[i];
1900           }
1901     }
1902
1903   // search function formal parameters (for scalars)
1904   if (arity == 0 && current_function)
1905     for (unsigned i=0; i<current_function->formal_args.size(); i++)
1906       if (current_function->formal_args[i]->name == name)
1907         {
1908           // NB: no need to check arity here: formal args always scalar
1909           current_function->formal_args[i]->set_arity (0);
1910           return current_function->formal_args[i];
1911         }
1912
1913   // search processed globals
1914   for (unsigned i=0; i<session.globals.size(); i++)
1915     if (session.globals[i]->name == name
1916         && session.globals[i]->compatible_arity(arity))
1917       {
1918         session.globals[i]->set_arity (arity);
1919         if (! session.suppress_warnings)
1920           {
1921             vardecl* v = session.globals[i];
1922             // clog << "resolved " << *tok << " to global " << *v->tok << endl;
1923             if (v->tok->location.file != tok->location.file)
1924               {
1925                 session.print_warning ("cross-file global variable reference to " + lex_cast (*v->tok) + " from",
1926                                        tok);
1927               }
1928           }
1929         return session.globals[i];
1930       }
1931
1932   // search library globals
1933   for (unsigned i=0; i<session.library_files.size(); i++)
1934     {
1935       stapfile* f = session.library_files[i];
1936       for (unsigned j=0; j<f->globals.size(); j++)
1937         {
1938           vardecl* g = f->globals[j];
1939           if (g->name == name && g->compatible_arity (arity))
1940             {
1941               g->set_arity (arity);
1942
1943               // put library into the queue if not already there
1944               if (find (session.files.begin(), session.files.end(), f)
1945                   == session.files.end())
1946                 session.files.push_back (f);
1947
1948               return g;
1949             }
1950         }
1951     }
1952
1953   return 0;
1954 }
1955
1956
1957 functiondecl*
1958 symresolution_info::find_function (const string& name, unsigned arity)
1959 {
1960   // the common path
1961   if (session.functions.find(name) != session.functions.end())
1962     {
1963       functiondecl* fd = session.functions[name];
1964       assert (fd->name == name);
1965       if (fd->formal_args.size() == arity)
1966         return fd;
1967     }
1968
1969   // search library globals
1970   for (unsigned i=0; i<session.library_files.size(); i++)
1971     {
1972       stapfile* f = session.library_files[i];
1973       for (unsigned j=0; j<f->functions.size(); j++)
1974         if (f->functions[j]->name == name &&
1975             f->functions[j]->formal_args.size() == arity)
1976           {
1977             // put library into the queue if not already there
1978             if (0) // session.verbose_resolution
1979               cerr << "      function " << name << " "
1980                    << "is defined from " << f->name << endl;
1981
1982             if (find (session.files.begin(), session.files.end(), f)
1983                 == session.files.end())
1984               session.files.push_back (f);
1985             // else .. print different message?
1986
1987             return f->functions[j];
1988           }
1989     }
1990
1991   return 0;
1992 }
1993
1994
1995
1996 // ------------------------------------------------------------------------
1997 // optimization
1998
1999
2000 // Do away with functiondecls that are never (transitively) called
2001 // from probes.
2002 void semantic_pass_opt1 (systemtap_session& s, bool& relaxed_p)
2003 {
2004   functioncall_traversing_visitor ftv;
2005   for (unsigned i=0; i<s.probes.size(); i++)
2006     {
2007       s.probes[i]->body->visit (& ftv);
2008       if (s.probes[i]->sole_location()->condition)
2009         s.probes[i]->sole_location()->condition->visit (& ftv);
2010     }
2011   vector<functiondecl*> new_unused_functions;
2012   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2013     {
2014       functiondecl* fd = it->second;
2015       if (ftv.traversed.find(fd) == ftv.traversed.end())
2016         {
2017           if (fd->tok->location.file->name == s.user_file->name && // !tapset
2018               ! s.suppress_warnings)
2019             s.print_warning ("eliding unused function '" + fd->name + "'", fd->tok);
2020           else if (s.verbose>2)
2021             clog << "Eliding unused function " << fd->name
2022                  << endl;
2023           // s.functions.erase (it); // NB: can't, since we're already iterating upon it
2024           new_unused_functions.push_back (fd);
2025           relaxed_p = false;
2026         }
2027     }
2028   for (unsigned i=0; i<new_unused_functions.size(); i++)
2029     {
2030       map<string,functiondecl*>::iterator where = s.functions.find (new_unused_functions[i]->name);
2031       assert (where != s.functions.end());
2032       s.functions.erase (where);
2033       if (s.tapset_compile_coverage)
2034         s.unused_functions.push_back (new_unused_functions[i]);
2035     }
2036 }
2037
2038
2039 // ------------------------------------------------------------------------
2040
2041 // Do away with local & global variables that are never
2042 // written nor read.
2043 void semantic_pass_opt2 (systemtap_session& s, bool& relaxed_p, unsigned iterations)
2044 {
2045   varuse_collecting_visitor vut(s);
2046
2047   for (unsigned i=0; i<s.probes.size(); i++)
2048     {
2049       s.probes[i]->body->visit (& vut);
2050
2051       if (s.probes[i]->sole_location()->condition)
2052         s.probes[i]->sole_location()->condition->visit (& vut);
2053     }
2054
2055   // NB: Since varuse_collecting_visitor also traverses down
2056   // actually called functions, we don't need to explicitly
2057   // iterate over them.  Uncalled ones should have been pruned
2058   // in _opt1 above.
2059   //
2060   // for (unsigned i=0; i<s.functions.size(); i++)
2061   //   s.functions[i]->body->visit (& vut);
2062
2063   // Now in vut.read/written, we have a mixture of all locals, globals
2064
2065   for (unsigned i=0; i<s.probes.size(); i++)
2066     for (unsigned j=0; j<s.probes[i]->locals.size(); /* see below */)
2067       {
2068         vardecl* l = s.probes[i]->locals[j];
2069
2070         if (vut.read.find (l) == vut.read.end() &&
2071             vut.written.find (l) == vut.written.end())
2072           {
2073             if (l->tok->location.file->name == s.user_file->name && // !tapset
2074                 ! s.suppress_warnings)
2075               s.print_warning ("eliding unused variable '" + l->name + "'", l->tok);
2076             else if (s.verbose>2)
2077               clog << "Eliding unused local variable "
2078                    << l->name << " in " << s.probes[i]->name << endl;
2079             if (s.tapset_compile_coverage) {
2080               s.probes[i]->unused_locals.push_back
2081                       (s.probes[i]->locals[j]);
2082             }
2083             s.probes[i]->locals.erase(s.probes[i]->locals.begin() + j);
2084             relaxed_p = false;
2085             // don't increment j
2086           }
2087         else
2088           {
2089             if (vut.written.find (l) == vut.written.end())
2090               if (iterations == 0 && ! s.suppress_warnings)
2091                   {
2092                     stringstream o;
2093                     vector<vardecl*>::iterator it;
2094                     for (it = s.probes[i]->locals.begin(); it != s.probes[i]->locals.end(); it++)
2095                       if (l->name != (*it)->name)
2096                         o << " " <<  (*it)->name;
2097                     for (it = s.globals.begin(); it != s.globals.end(); it++)
2098                       if (l->name != (*it)->name)
2099                         o << " " <<  (*it)->name;
2100
2101                     s.print_warning ("never-assigned local variable '" + l->name + "' " +
2102                                      (o.str() == "" ? "" : ("(alternatives:" + o.str() + ")")), l->tok);
2103                   }
2104             j++;
2105           }
2106       }
2107
2108   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2109     {
2110       functiondecl *fd = it->second;
2111       for (unsigned j=0; j<fd->locals.size(); /* see below */)
2112         {
2113           vardecl* l = fd->locals[j];
2114           if (vut.read.find (l) == vut.read.end() &&
2115               vut.written.find (l) == vut.written.end())
2116             {
2117               if (l->tok->location.file->name == s.user_file->name && // !tapset
2118                   ! s.suppress_warnings)
2119                 s.print_warning ("eliding unused variable '" + l->name + "'", l->tok);
2120               else if (s.verbose>2)
2121                 clog << "Eliding unused local variable "
2122                      << l->name << " in function " << fd->name
2123                      << endl;
2124               if (s.tapset_compile_coverage) {
2125                 fd->unused_locals.push_back (fd->locals[j]);
2126               }
2127               fd->locals.erase(fd->locals.begin() + j);
2128               relaxed_p = false;
2129               // don't increment j
2130             }
2131           else
2132             {
2133               if (vut.written.find (l) == vut.written.end())
2134                 if (iterations == 0 && ! s.suppress_warnings)
2135                   {
2136                     stringstream o;
2137                     vector<vardecl*>::iterator it;
2138                     for (it = fd->formal_args.begin() ;
2139                          it != fd->formal_args.end(); it++)
2140                       if (l->name != (*it)->name)
2141                         o << " " << (*it)->name;
2142                     for (it = fd->locals.begin(); it != fd->locals.end(); it++)
2143                       if (l->name != (*it)->name)
2144                         o << " " << (*it)->name;
2145                     for (it = s.globals.begin(); it != s.globals.end(); it++)
2146                       if (l->name != (*it)->name)
2147                         o << " " << (*it)->name;
2148
2149                     s.print_warning ("never-assigned local variable '" + l->name + "' " +
2150                                      (o.str() == "" ? "" : ("(alternatives:" + o.str() + ")")), l->tok);
2151                   }
2152
2153               j++;
2154             }
2155         }
2156     }
2157   for (unsigned i=0; i<s.globals.size(); /* see below */)
2158     {
2159       vardecl* l = s.globals[i];
2160       if (vut.read.find (l) == vut.read.end() &&
2161           vut.written.find (l) == vut.written.end())
2162         {
2163           if (l->tok->location.file->name == s.user_file->name && // !tapset
2164               ! s.suppress_warnings)
2165             s.print_warning ("eliding unused variable '" + l->name + "'", l->tok);
2166           else if (s.verbose>2)
2167             clog << "Eliding unused global variable "
2168                  << l->name << endl;
2169           if (s.tapset_compile_coverage) {
2170             s.unused_globals.push_back(s.globals[i]);
2171           }
2172           s.globals.erase(s.globals.begin() + i);
2173           relaxed_p = false;
2174           // don't increment i
2175         }
2176       else
2177         {
2178           if (vut.written.find (l) == vut.written.end() && ! l->init) // no initializer
2179             if (iterations == 0 && ! s.suppress_warnings)
2180               {
2181                 stringstream o;
2182                 vector<vardecl*>::iterator it;
2183                 for (it = s.globals.begin(); it != s.globals.end(); it++)
2184                   if (l->name != (*it)->name)
2185                     o << " " << (*it)->name;
2186
2187                 s.print_warning ("never-assigned global variable '" + l->name + "' " +
2188                                  (o.str() == "" ? "" : ("(alternatives:" + o.str() + ")")), l->tok);
2189               }
2190
2191           i++;
2192         }
2193     }
2194 }
2195
2196
2197 // ------------------------------------------------------------------------
2198
2199 struct dead_assignment_remover: public update_visitor
2200 {
2201   systemtap_session& session;
2202   bool& relaxed_p;
2203   const varuse_collecting_visitor& vut;
2204
2205   dead_assignment_remover(systemtap_session& s, bool& r,
2206                           const varuse_collecting_visitor& v):
2207     session(s), relaxed_p(r), vut(v) {}
2208
2209   void visit_assignment (assignment* e);
2210 };
2211
2212
2213 void
2214 dead_assignment_remover::visit_assignment (assignment* e)
2215 {
2216   replace (e->left);
2217   replace (e->right);
2218
2219   symbol* left = get_symbol_within_expression (e->left);
2220   vardecl* leftvar = left->referent; // NB: may be 0 for unresolved $target
2221   if (leftvar) // not unresolved $target, so intended sideeffect may be elided
2222     {
2223       if (vut.read.find(leftvar) == vut.read.end()) // var never read?
2224         {
2225           // NB: Not so fast!  The left side could be an array whose
2226           // index expressions may have side-effects.  This would be
2227           // OK if we could replace the array assignment with a
2228           // statement-expression containing all the index expressions
2229           // and the rvalue... but we can't.
2230           // Another possibility is that we have an unread global variable
2231           // which are kept for probe end value display.
2232
2233           bool is_global = false;
2234           vector<vardecl*>::iterator it;
2235           for (it = session.globals.begin(); it != session.globals.end(); it++)
2236             if (leftvar->name == (*it)->name)
2237               {
2238                 is_global = true;
2239                 break;
2240               }
2241
2242           varuse_collecting_visitor lvut(session);
2243           e->left->visit (& lvut);
2244           if (lvut.side_effect_free () && !is_global) // XXX: use _wrt() once we track focal_vars
2245             {
2246               /* PR 1119: NB: This is not necessary here.  A write-only
2247                  variable will also be elided soon at the next _opt2 iteration.
2248               if (e->left->tok->location.file == session.user_file->name && // !tapset
2249                   ! session.suppress_warnings)
2250                 clog << "WARNING: eliding write-only " << *e->left->tok << endl;
2251               else
2252               */
2253               if (session.verbose>2)
2254                 clog << "Eliding assignment to " << leftvar->name
2255                      << " at " << *e->tok << endl;
2256
2257               provide (e->right); // goodbye assignment*
2258               relaxed_p = false;
2259               return;
2260             }
2261         }
2262     }
2263   provide (e);
2264 }
2265
2266 // Let's remove assignments to variables that are never read.  We
2267 // rewrite "(foo = expr)" as "(expr)".  This makes foo a candidate to
2268 // be optimized away as an unused variable, and expr a candidate to be
2269 // removed as a side-effect-free statement expression.  Wahoo!
2270 void semantic_pass_opt3 (systemtap_session& s, bool& relaxed_p)
2271 {
2272   // Recompute the varuse data, which will probably match the opt2
2273   // copy of the computation, except for those totally unused
2274   // variables that opt2 removed.
2275   varuse_collecting_visitor vut(s);
2276   for (unsigned i=0; i<s.probes.size(); i++)
2277     s.probes[i]->body->visit (& vut); // includes reachable functions too
2278
2279   dead_assignment_remover dar (s, relaxed_p, vut);
2280   // This instance may be reused for multiple probe/function body trims.
2281
2282   for (unsigned i=0; i<s.probes.size(); i++)
2283     dar.replace (s.probes[i]->body);
2284   for (map<string,functiondecl*>::iterator it = s.functions.begin();
2285        it != s.functions.end(); it++)
2286     dar.replace (it->second->body);
2287   // The rewrite operation is performed within the visitor.
2288
2289   // XXX: we could also zap write-only globals here
2290 }
2291
2292
2293 // ------------------------------------------------------------------------
2294
2295 struct dead_stmtexpr_remover: public update_visitor
2296 {
2297   systemtap_session& session;
2298   bool& relaxed_p;
2299   set<vardecl*> focal_vars; // vars considered subject to side-effects
2300
2301   dead_stmtexpr_remover(systemtap_session& s, bool& r):
2302     session(s), relaxed_p(r) {}
2303
2304   void visit_block (block *s);
2305   void visit_null_statement (null_statement *s);
2306   void visit_if_statement (if_statement* s);
2307   void visit_foreach_loop (foreach_loop *s);
2308   void visit_for_loop (for_loop *s);
2309   // XXX: and other places where stmt_expr's might be nested
2310
2311   void visit_expr_statement (expr_statement *s);
2312 };
2313
2314
2315 void
2316 dead_stmtexpr_remover::visit_null_statement (null_statement *s)
2317 {
2318   // easy!
2319   if (session.verbose>2)
2320     clog << "Eliding side-effect-free null statement " << *s->tok << endl;
2321   s = 0;
2322   provide (s);
2323 }
2324
2325
2326 void
2327 dead_stmtexpr_remover::visit_block (block *s)
2328 {
2329   vector<statement*> new_stmts;
2330   for (unsigned i=0; i<s->statements.size(); i++ )
2331     {
2332       statement* new_stmt = require (s->statements[i], true);
2333       if (new_stmt != 0)
2334         {
2335           // flatten nested blocks into this one
2336           block *b = dynamic_cast<block *>(new_stmt);
2337           if (b)
2338             {
2339               if (session.verbose>2)
2340                 clog << "Flattening nested block " << *b->tok << endl;
2341               new_stmts.insert(new_stmts.end(),
2342                   b->statements.begin(), b->statements.end());
2343               relaxed_p = false;
2344             }
2345           else
2346             new_stmts.push_back (new_stmt);
2347         }
2348     }
2349   if (new_stmts.size() == 0)
2350     {
2351       if (session.verbose>2)
2352         clog << "Eliding side-effect-free empty block " << *s->tok << endl;
2353       s = 0;
2354     }
2355   else if (new_stmts.size() == 1)
2356     {
2357       if (session.verbose>2)
2358         clog << "Eliding side-effect-free singleton block " << *s->tok << endl;
2359       provide (new_stmts[0]);
2360       return;
2361     }
2362   else
2363     s->statements = new_stmts;
2364   provide (s);
2365 }
2366
2367 void
2368 dead_stmtexpr_remover::visit_if_statement (if_statement *s)
2369 {
2370   replace (s->thenblock, true);
2371   replace (s->elseblock, true);
2372
2373   if (s->thenblock == 0)
2374     {
2375       if (s->elseblock == 0)
2376         {
2377           // We may be able to elide this statement, if the condition
2378           // expression is side-effect-free.
2379           varuse_collecting_visitor vct(session);
2380           s->condition->visit(& vct);
2381           if (vct.side_effect_free ())
2382             {
2383               if (session.verbose>2)
2384                 clog << "Eliding side-effect-free if statement "
2385                      << *s->tok << endl;
2386               s = 0; // yeah, baby
2387             }
2388           else
2389             {
2390               // We can still turn it into a simple expr_statement though...
2391               if (session.verbose>2)
2392                 clog << "Creating simple evaluation from if statement "
2393                      << *s->tok << endl;
2394               expr_statement *es = new expr_statement;
2395               es->value = s->condition;
2396               es->tok = es->value->tok;
2397               provide (es);
2398               return;
2399             }
2400         }
2401       else
2402         {
2403           // For an else without a then, we can invert the condition logic to
2404           // avoid having a null statement in the thenblock
2405           if (session.verbose>2)
2406             clog << "Inverting the condition of if statement "
2407                  << *s->tok << endl;
2408           unary_expression *ue = new unary_expression;
2409           ue->operand = s->condition;
2410           ue->tok = ue->operand->tok;
2411           ue->op = "!";
2412           s->condition = ue;
2413           s->thenblock = s->elseblock;
2414           s->elseblock = 0;
2415         }
2416     }
2417   provide (s);
2418 }
2419
2420 void
2421 dead_stmtexpr_remover::visit_foreach_loop (foreach_loop *s)
2422 {
2423   replace (s->block, true);
2424
2425   if (s->block == 0)
2426     {
2427       if (session.verbose>2)
2428         clog << "Eliding side-effect-free foreach statement " << *s->tok << endl;
2429       s = 0; // yeah, baby
2430     }
2431   provide (s);
2432 }
2433
2434 void
2435 dead_stmtexpr_remover::visit_for_loop (for_loop *s)
2436 {
2437   replace (s->block, true);
2438
2439   if (s->block == 0)
2440     {
2441       // We may be able to elide this statement, if the condition
2442       // expression is side-effect-free.
2443       varuse_collecting_visitor vct(session);
2444       if (s->init) s->init->visit(& vct);
2445       s->cond->visit(& vct);
2446       if (s->incr) s->incr->visit(& vct);
2447       if (vct.side_effect_free ())
2448         {
2449           if (session.verbose>2)
2450             clog << "Eliding side-effect-free for statement " << *s->tok << endl;
2451           s = 0; // yeah, baby
2452         }
2453       else
2454         {
2455           // Can't elide this whole statement; put a null in there.
2456           s->block = new null_statement();
2457           s->block->tok = s->tok;
2458         }
2459     }
2460   provide (s);
2461 }
2462
2463
2464
2465 void
2466 dead_stmtexpr_remover::visit_expr_statement (expr_statement *s)
2467 {
2468   // Run a varuse query against the operand expression.  If it has no
2469   // side-effects, replace the entire statement expression by a null
2470   // statement with the provide() call.
2471   //
2472   // Unlike many other visitors, we do *not* traverse this outermost
2473   // one into the expression subtrees.  There is no need - no
2474   // expr_statement nodes will be found there.  (Function bodies
2475   // need to be visited explicitly by our caller.)
2476   //
2477   // NB.  While we don't share nodes in the parse tree, let's not
2478   // deallocate *s anyway, just in case...
2479
2480   varuse_collecting_visitor vut(session);
2481   s->value->visit (& vut);
2482
2483   if (vut.side_effect_free_wrt (focal_vars))
2484     {
2485       /* PR 1119: NB: this message is not a good idea here.  It can
2486          name some arbitrary RHS expression of an assignment.
2487       if (s->value->tok->location.file == session.user_file->name && // not tapset
2488           ! session.suppress_warnings)
2489         clog << "WARNING: eliding never-assigned " << *s->value->tok << endl;
2490       else
2491       */
2492       if (session.verbose>2)
2493         clog << "Eliding side-effect-free expression "
2494              << *s->tok << endl;
2495
2496       // NB: this 0 pointer is invalid to leave around for any length of
2497       // time, but the parent parse tree objects above handle it.
2498       s = 0;
2499       relaxed_p = false;
2500     }
2501   provide (s);
2502 }
2503
2504
2505 void semantic_pass_opt4 (systemtap_session& s, bool& relaxed_p)
2506 {
2507   // Finally, let's remove some statement-expressions that have no
2508   // side-effect.  These should be exactly those whose private varuse
2509   // visitors come back with an empty "written" and "embedded" lists.
2510
2511   dead_stmtexpr_remover duv (s, relaxed_p);
2512   // This instance may be reused for multiple probe/function body trims.
2513
2514   for (unsigned i=0; i<s.probes.size(); i++)
2515     {
2516       if (pending_interrupts) break;
2517
2518       derived_probe* p = s.probes[i];
2519
2520       duv.focal_vars.clear ();
2521       duv.focal_vars.insert (s.globals.begin(),
2522                              s.globals.end());
2523       duv.focal_vars.insert (p->locals.begin(),
2524                              p->locals.end());
2525
2526       duv.replace (p->body, true);
2527       if (p->body == 0)
2528         {
2529           if (! s.suppress_warnings
2530               && ! s.timing) // PR10070
2531             s.print_warning ("side-effect-free probe '" + p->name + "'", p->tok);
2532
2533           p->body = new null_statement();
2534           p->body->tok = p->tok;
2535
2536           // XXX: possible duplicate warnings; see below
2537         }
2538     }
2539   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2540     {
2541       if (pending_interrupts) break;
2542
2543       functiondecl* fn = it->second;
2544       duv.focal_vars.clear ();
2545       duv.focal_vars.insert (fn->locals.begin(),
2546                              fn->locals.end());
2547       duv.focal_vars.insert (fn->formal_args.begin(),
2548                              fn->formal_args.end());
2549       duv.focal_vars.insert (s.globals.begin(),
2550                              s.globals.end());
2551
2552       duv.replace (fn->body, true);
2553       if (fn->body == 0)
2554         {
2555           if (! s.suppress_warnings)
2556             s.print_warning ("side-effect-free function '" + fn->name + "'", fn->tok);
2557
2558           fn->body = new null_statement();
2559           fn->body->tok = fn->tok;
2560
2561           // XXX: the next iteration of the outer optimization loop may
2562           // take this new null_statement away again, and thus give us a
2563           // fresh warning.  It would be better if this fixup was performed
2564           // only after the relaxation iterations.
2565           // XXX: or else see bug #6469.
2566         }
2567     }
2568 }
2569
2570
2571 // ------------------------------------------------------------------------
2572
2573 // The goal of this visitor is to reduce top-level expressions in void context
2574 // into separate statements that evaluate each subcomponent of the expression.
2575 // The dead-statement-remover can later remove some parts if they have no side
2576 // effects.
2577 //
2578 // All expressions must be overridden here so we never visit their subexpressions
2579 // accidentally.  Thus, the only visited expressions should be value of an
2580 // expr_statement.
2581 //
2582 // For an expression to replace its expr_statement with something else, it will
2583 // let the new statement provide(), and then provide(0) for itself.  The
2584 // expr_statement will take this as a sign that it's been replaced.
2585 struct void_statement_reducer: public update_visitor
2586 {
2587   systemtap_session& session;
2588   bool& relaxed_p;
2589   set<vardecl*> focal_vars; // vars considered subject to side-effects
2590
2591   void_statement_reducer(systemtap_session& s, bool& r):
2592     session(s), relaxed_p(r) {}
2593
2594   void visit_expr_statement (expr_statement* s);
2595
2596   // expressions in conditional / loop controls are definitely a side effect,
2597   // but still recurse into the child statements
2598   void visit_if_statement (if_statement* s);
2599   void visit_for_loop (for_loop* s);
2600   void visit_foreach_loop (foreach_loop* s);
2601
2602   // these expressions get rewritten into their statement equivalents
2603   void visit_logical_or_expr (logical_or_expr* e);
2604   void visit_logical_and_expr (logical_and_expr* e);
2605   void visit_ternary_expression (ternary_expression* e);
2606
2607   // all of these can be reduced into simpler statements
2608   void visit_binary_expression (binary_expression* e);
2609   void visit_unary_expression (unary_expression* e);
2610   void visit_comparison (comparison* e);
2611   void visit_concatenation (concatenation* e);
2612   void visit_functioncall (functioncall* e);
2613   void visit_print_format (print_format* e);
2614   void visit_target_symbol (target_symbol* e);
2615   void visit_cast_op (cast_op* e);
2616
2617   // these are a bit hairy to grok due to the intricacies of indexables and
2618   // stats, so I'm chickening out and skipping them...
2619   void visit_array_in (array_in* e) { provide (e); }
2620   void visit_arrayindex (arrayindex* e) { provide (e); }
2621   void visit_stat_op (stat_op* e) { provide (e); }
2622   void visit_hist_op (hist_op* e) { provide (e); }
2623
2624   // these can't be reduced because they always have an effect
2625   void visit_return_statement (return_statement* s) { provide (s); }
2626   void visit_delete_statement (delete_statement* s) { provide (s); }
2627   void visit_pre_crement (pre_crement* e) { provide (e); }
2628   void visit_post_crement (post_crement* e) { provide (e); }
2629   void visit_assignment (assignment* e) { provide (e); }
2630 };
2631
2632
2633 void
2634 void_statement_reducer::visit_expr_statement (expr_statement* s)
2635 {
2636   replace (s->value, true);
2637
2638   // if the expression provides 0, that's our signal that a new
2639   // statement has been provided, so we shouldn't provide this one.
2640   if (s->value != 0)
2641     provide(s);
2642 }
2643
2644 void
2645 void_statement_reducer::visit_if_statement (if_statement* s)
2646 {
2647   // s->condition is never void
2648   replace (s->thenblock);
2649   replace (s->elseblock);
2650   provide (s);
2651 }
2652
2653 void
2654 void_statement_reducer::visit_for_loop (for_loop* s)
2655 {
2656   // s->init/cond/incr are never void
2657   replace (s->block);
2658   provide (s);
2659 }
2660
2661 void
2662 void_statement_reducer::visit_foreach_loop (foreach_loop* s)
2663 {
2664   // s->indexes/base/limit are never void
2665   replace (s->block);
2666   provide (s);
2667 }
2668
2669 void
2670 void_statement_reducer::visit_logical_or_expr (logical_or_expr* e)
2671 {
2672   // In void context, the evaluation of "a || b" is exactly like
2673   // "if (!a) b", so let's do that instead.
2674
2675   if (session.verbose>2)
2676     clog << "Creating if statement from unused logical-or "
2677          << *e->tok << endl;
2678
2679   if_statement *is = new if_statement;
2680   is->tok = e->tok;
2681   is->elseblock = 0;
2682
2683   unary_expression *ue = new unary_expression;
2684   ue->operand = e->left;
2685   ue->tok = e->tok;
2686   ue->op = "!";
2687   is->condition = ue;
2688
2689   expr_statement *es = new expr_statement;
2690   es->value = e->right;
2691   es->tok = es->value->tok;
2692   is->thenblock = es;
2693
2694   is->visit(this);
2695   relaxed_p = false;
2696   e = 0;
2697   provide (e);
2698 }
2699
2700 void
2701 void_statement_reducer::visit_logical_and_expr (logical_and_expr* e)
2702 {
2703   // In void context, the evaluation of "a && b" is exactly like
2704   // "if (a) b", so let's do that instead.
2705
2706   if (session.verbose>2)
2707     clog << "Creating if statement from unused logical-and "
2708          << *e->tok << endl;
2709
2710   if_statement *is = new if_statement;
2711   is->tok = e->tok;
2712   is->elseblock = 0;
2713   is->condition = e->left;
2714
2715   expr_statement *es = new expr_statement;
2716   es->value = e->right;
2717   es->tok = es->value->tok;
2718   is->thenblock = es;
2719
2720   is->visit(this);
2721   relaxed_p = false;
2722   e = 0;
2723   provide (e);
2724 }
2725
2726 void
2727 void_statement_reducer::visit_ternary_expression (ternary_expression* e)
2728 {
2729   // In void context, the evaluation of "a ? b : c" is exactly like
2730   // "if (a) b else c", so let's do that instead.
2731
2732   if (session.verbose>2)
2733     clog << "Creating if statement from unused ternary expression "
2734          << *e->tok << endl;
2735
2736   if_statement *is = new if_statement;
2737   is->tok = e->tok;
2738   is->condition = e->cond;
2739
2740   expr_statement *es = new expr_statement;
2741   es->value = e->truevalue;
2742   es->tok = es->value->tok;
2743   is->thenblock = es;
2744
2745   es = new expr_statement;
2746   es->value = e->falsevalue;
2747   es->tok = es->value->tok;
2748   is->elseblock = es;
2749
2750   is->visit(this);
2751   relaxed_p = false;
2752   e = 0;
2753   provide (e);
2754 }
2755
2756 void
2757 void_statement_reducer::visit_binary_expression (binary_expression* e)
2758 {
2759   // When the result of a binary operation isn't needed, it's just as good to
2760   // evaluate the operands as sequential statements in a block.
2761
2762   if (session.verbose>2)
2763     clog << "Eliding unused binary " << *e->tok << endl;
2764
2765   block *b = new block;
2766   b->tok = e->tok;
2767
2768   expr_statement *es = new expr_statement;
2769   es->value = e->left;
2770   es->tok = es->value->tok;
2771   b->statements.push_back(es);
2772
2773   es = new expr_statement;
2774   es->value = e->right;
2775   es->tok = es->value->tok;
2776   b->statements.push_back(es);
2777
2778   b->visit(this);
2779   relaxed_p = false;
2780   e = 0;
2781   provide (e);
2782 }
2783
2784 void
2785 void_statement_reducer::visit_unary_expression (unary_expression* e)
2786 {
2787   // When the result of a unary operation isn't needed, it's just as good to
2788   // evaluate the operand directly
2789
2790   if (session.verbose>2)
2791     clog << "Eliding unused unary " << *e->tok << endl;
2792
2793   relaxed_p = false;
2794   e->operand->visit(this);
2795 }
2796
2797 void
2798 void_statement_reducer::visit_comparison (comparison* e)
2799 {
2800   visit_binary_expression(e);
2801 }
2802
2803 void
2804 void_statement_reducer::visit_concatenation (concatenation* e)
2805 {
2806   visit_binary_expression(e);
2807 }
2808
2809 void
2810 void_statement_reducer::visit_functioncall (functioncall* e)
2811 {
2812   // If a function call is pure and its result ignored, we can elide the call
2813   // and just evaluate the arguments in sequence
2814
2815   if (!e->args.size())
2816     {
2817       provide (e);
2818       return;
2819     }
2820
2821   varuse_collecting_visitor vut(session);
2822   vut.traversed.insert (e->referent);
2823   vut.current_function = e->referent;
2824   e->referent->body->visit (& vut);
2825   if (!vut.side_effect_free_wrt (focal_vars))
2826     {
2827       provide (e);
2828       return;
2829     }
2830
2831   if (session.verbose>2)
2832     clog << "Eliding side-effect-free function call " << *e->tok << endl;
2833
2834   block *b = new block;
2835   b->tok = e->tok;
2836
2837   for (unsigned i=0; i<e->args.size(); i++ )
2838     {
2839       expr_statement *es = new expr_statement;
2840       es->value = e->args[i];
2841       es->tok = es->value->tok;
2842       b->statements.push_back(es);
2843     }
2844
2845   b->visit(this);
2846   relaxed_p = false;
2847   e = 0;
2848   provide (e);
2849 }
2850
2851 void
2852 void_statement_reducer::visit_print_format (print_format* e)
2853 {
2854   // When an sprint's return value is ignored, we can simply evaluate the
2855   // arguments in sequence
2856
2857   if (e->print_to_stream || !e->args.size())
2858     {
2859       provide (e);
2860       return;
2861     }
2862
2863   if (session.verbose>2)
2864     clog << "Eliding unused print " << *e->tok << endl;
2865
2866   block *b = new block;
2867   b->tok = e->tok;
2868
2869   for (unsigned i=0; i<e->args.size(); i++ )
2870     {
2871       expr_statement *es = new expr_statement;
2872       es->value = e->args[i];
2873       es->tok = es->value->tok;
2874       b->statements.push_back(es);
2875     }
2876
2877   b->visit(this);
2878   relaxed_p = false;
2879   e = 0;
2880   provide (e);
2881 }
2882
2883 void
2884 void_statement_reducer::visit_target_symbol (target_symbol* e)
2885 {
2886   // When target_symbol isn't needed, it's just as good to
2887   // evaluate any array indexes directly
2888
2889   block *b = new block;
2890   b->tok = e->tok;
2891
2892   for (unsigned i=0; i<e->components.size(); i++ )
2893     {
2894       if (e->components[i].type != target_symbol::comp_expression_array_index)
2895         continue;
2896
2897       expr_statement *es = new expr_statement;
2898       es->value = e->components[i].expr_index;
2899       es->tok = es->value->tok;
2900       b->statements.push_back(es);
2901     }
2902
2903   if (b->statements.empty())
2904     {
2905       delete b;
2906       provide (e);
2907       return;
2908     }
2909
2910   if (session.verbose>2)
2911     clog << "Eliding unused target symbol " << *e->tok << endl;
2912
2913   b->visit(this);
2914   relaxed_p = false;
2915   e = 0;
2916   provide (e);
2917 }
2918
2919 void
2920 void_statement_reducer::visit_cast_op (cast_op* e)
2921 {
2922   // When the result of a cast operation isn't needed, it's just as good to
2923   // evaluate the operand and any array indexes directly
2924
2925   block *b = new block;
2926   b->tok = e->tok;
2927
2928   expr_statement *es = new expr_statement;
2929   es->value = e->operand;
2930   es->tok = es->value->tok;
2931   b->statements.push_back(es);
2932
2933   for (unsigned i=0; i<e->components.size(); i++ )
2934     {
2935       if (e->components[i].type != target_symbol::comp_expression_array_index)
2936         continue;
2937
2938       es = new expr_statement;
2939       es->value = e->components[i].expr_index;
2940       es->tok = es->value->tok;
2941       b->statements.push_back(es);
2942     }
2943
2944   if (session.verbose>2)
2945     clog << "Eliding unused typecast " << *e->tok << endl;
2946
2947   b->visit(this);
2948   relaxed_p = false;
2949   e = 0;
2950   provide (e);
2951 }
2952
2953
2954 void semantic_pass_opt5 (systemtap_session& s, bool& relaxed_p)
2955 {
2956   // Let's simplify statements with unused computed values.
2957
2958   void_statement_reducer vuv (s, relaxed_p);
2959   // This instance may be reused for multiple probe/function body trims.
2960
2961   vuv.focal_vars.insert (s.globals.begin(), s.globals.end());
2962
2963   for (unsigned i=0; i<s.probes.size(); i++)
2964     vuv.replace (s.probes[i]->body);
2965   for (map<string,functiondecl*>::iterator it = s.functions.begin();
2966        it != s.functions.end(); it++)
2967     vuv.replace (it->second->body);
2968 }
2969
2970
2971 struct duplicate_function_remover: public functioncall_traversing_visitor
2972 {
2973   systemtap_session& s;
2974   map<functiondecl*, functiondecl*>& duplicate_function_map;
2975
2976   duplicate_function_remover(systemtap_session& sess,
2977                              map<functiondecl*, functiondecl*>&dfm):
2978     s(sess), duplicate_function_map(dfm) {};
2979
2980   void visit_functioncall (functioncall* e);
2981 };
2982
2983 void
2984 duplicate_function_remover::visit_functioncall (functioncall *e)
2985 {
2986   functioncall_traversing_visitor::visit_functioncall (e);
2987
2988   // If the current function call reference points to a function that
2989   // is a duplicate, replace it.
2990   if (duplicate_function_map.count(e->referent) != 0)
2991     {
2992       if (s.verbose>2)
2993           clog << "Changing " << e->referent->name
2994                << " reference to "
2995                << duplicate_function_map[e->referent]->name
2996                << " reference\n";
2997       e->tok = duplicate_function_map[e->referent]->tok;
2998       e->function = duplicate_function_map[e->referent]->name;
2999       e->referent = duplicate_function_map[e->referent];
3000     }
3001 }
3002
3003 static string
3004 get_functionsig (functiondecl* f)
3005 {
3006   ostringstream s;
3007
3008   // Get the "name:args body" of the function in s.  We have to
3009   // include the args since the function 'x1(a, b)' is different than
3010   // the function 'x2(b, a)' even if the bodies of the two functions
3011   // are exactly the same.
3012   f->printsig(s);
3013   f->body->print(s);
3014
3015   // printsig puts f->name + ':' on the front.  Remove this
3016   // (otherwise, functions would never compare equal).
3017   string str = s.str().erase(0, f->name.size() + 1);
3018
3019   // Return the function signature.
3020   return str;
3021 }
3022
3023 void semantic_pass_opt6 (systemtap_session& s, bool& relaxed_p)
3024 {
3025   // Walk through all the functions, looking for duplicates.
3026   map<string, functiondecl*> functionsig_map;
3027   map<functiondecl*, functiondecl*> duplicate_function_map;
3028
3029
3030   vector<functiondecl*> newly_zapped_functions;
3031   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
3032     {
3033       functiondecl *fd = it->second;
3034       string functionsig = get_functionsig(fd);
3035
3036       if (functionsig_map.count(functionsig) == 0)
3037         {
3038           // This function is unique.  Remember it.
3039           functionsig_map[functionsig] = fd;
3040         }
3041       else
3042         {
3043           // This function is a duplicate.
3044           duplicate_function_map[fd] = functionsig_map[functionsig];
3045           newly_zapped_functions.push_back (fd);
3046           relaxed_p = false;
3047         }
3048     }
3049   for (unsigned i=0; i<newly_zapped_functions.size(); i++)
3050     {
3051       map<string,functiondecl*>::iterator where = s.functions.find (newly_zapped_functions[i]->name);
3052       assert (where != s.functions.end());
3053       s.functions.erase (where);
3054     }
3055
3056
3057   // If we have duplicate functions, traverse down the tree, replacing
3058   // the appropriate function calls.
3059   // duplicate_function_remover::visit_functioncall() handles the
3060   // details of replacing the function calls.
3061   if (duplicate_function_map.size() != 0)
3062     {
3063       duplicate_function_remover dfr (s, duplicate_function_map);
3064
3065       for (unsigned i=0; i < s.probes.size(); i++)
3066         s.probes[i]->body->visit(&dfr);
3067     }
3068 }
3069
3070
3071 static int
3072 semantic_pass_optimize1 (systemtap_session& s)
3073 {
3074   // In this pass, we attempt to rewrite probe/function bodies to
3075   // eliminate some blatantly unnecessary code.  This is run before
3076   // type inference, but after symbol resolution and derived_probe
3077   // creation.  We run an outer "relaxation" loop that repeats the
3078   // optimizations until none of them find anything to remove.
3079
3080   int rc = 0;
3081
3082   bool relaxed_p = false;
3083   unsigned iterations = 0;
3084   while (! relaxed_p)
3085     {
3086       if (pending_interrupts) break;
3087
3088       relaxed_p = true; // until proven otherwise
3089
3090       semantic_pass_opt1 (s, relaxed_p);
3091       semantic_pass_opt2 (s, relaxed_p, iterations); // produce some warnings only on iteration=0
3092       semantic_pass_opt3 (s, relaxed_p);
3093       semantic_pass_opt4 (s, relaxed_p);
3094       semantic_pass_opt5 (s, relaxed_p);
3095
3096       iterations ++;
3097     }
3098
3099   return rc;
3100 }
3101
3102
3103 static int
3104 semantic_pass_optimize2 (systemtap_session& s)
3105 {
3106   // This is run after type inference.  We run an outer "relaxation"
3107   // loop that repeats the optimizations until none of them find
3108   // anything to remove.
3109
3110   int rc = 0;
3111
3112   bool relaxed_p = false;
3113   while (! relaxed_p)
3114     {
3115       if (pending_interrupts) break;
3116       relaxed_p = true; // until proven otherwise
3117
3118       semantic_pass_opt6 (s, relaxed_p);
3119     }
3120
3121   return rc;
3122 }
3123
3124
3125
3126 // ------------------------------------------------------------------------
3127 // type resolution
3128
3129
3130 static int
3131 semantic_pass_types (systemtap_session& s)
3132 {
3133   int rc = 0;
3134
3135   // next pass: type inference
3136   unsigned iterations = 0;
3137   typeresolution_info ti (s);
3138
3139   ti.assert_resolvability = false;
3140   // XXX: maybe convert to exception-based error signalling
3141   while (1)
3142     {
3143       if (pending_interrupts) break;
3144
3145       iterations ++;
3146       ti.num_newly_resolved = 0;
3147       ti.num_still_unresolved = 0;
3148
3149   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
3150         {
3151           if (pending_interrupts) break;
3152
3153           functiondecl* fd = it->second;
3154           ti.current_probe = 0;
3155           ti.current_function = fd;
3156           ti.t = pe_unknown;
3157           fd->body->visit (& ti);
3158           // NB: we don't have to assert a known type for
3159           // functions here, to permit a "void" function.
3160           // The translator phase will omit the "retvalue".
3161           //
3162           // if (fd->type == pe_unknown)
3163           //   ti.unresolved (fd->tok);
3164         }
3165
3166       for (unsigned j=0; j<s.probes.size(); j++)
3167         {
3168           if (pending_interrupts) break;
3169
3170           derived_probe* pn = s.probes[j];
3171           ti.current_function = 0;
3172           ti.current_probe = pn;
3173           ti.t = pe_unknown;
3174           pn->body->visit (& ti);
3175
3176           probe_point* pp = pn->sole_location();
3177           if (pp->condition)
3178             {
3179               ti.current_function = 0;
3180               ti.current_probe = 0;
3181               ti.t = pe_long; // NB: expected type
3182               pp->condition->visit (& ti);
3183             }
3184         }
3185
3186       for (unsigned j=0; j<s.globals.size(); j++)
3187         {
3188           vardecl* gd = s.globals[j];
3189           if (gd->type == pe_unknown)
3190             ti.unresolved (gd->tok);
3191         }
3192
3193       if (ti.num_newly_resolved == 0) // converged
3194         {
3195           if (ti.num_still_unresolved == 0)
3196             break; // successfully
3197           else if (! ti.assert_resolvability)
3198             ti.assert_resolvability = true; // last pass, with error msgs
3199           else
3200             { // unsuccessful conclusion
3201               rc ++;
3202               break;
3203             }
3204         }
3205     }
3206
3207   return rc + s.num_errors();
3208 }
3209
3210
3211
3212 typeresolution_info::typeresolution_info (systemtap_session& s):
3213   session(s), current_function(0), current_probe(0)
3214 {
3215 }
3216
3217
3218 void
3219 typeresolution_info::visit_literal_number (literal_number* e)
3220 {
3221   assert (e->type == pe_long);
3222   if ((t == e->type) || (t == pe_unknown))
3223     return;
3224
3225   mismatch (e->tok, e->type, t);
3226 }
3227
3228
3229 void
3230 typeresolution_info::visit_literal_string (literal_string* e)
3231 {
3232   assert (e->type == pe_string);
3233   if ((t == e->type) || (t == pe_unknown))
3234     return;
3235
3236   mismatch (e->tok, e->type, t);
3237 }
3238
3239
3240 void
3241 typeresolution_info::visit_logical_or_expr (logical_or_expr *e)
3242 {
3243   visit_binary_expression (e);
3244 }
3245
3246
3247 void
3248 typeresolution_info::visit_logical_and_expr (logical_and_expr *e)
3249 {
3250   visit_binary_expression (e);
3251 }
3252
3253
3254 void
3255 typeresolution_info::visit_comparison (comparison *e)
3256 {
3257   // NB: result of any comparison is an integer!
3258   if (t == pe_stats || t == pe_string)
3259     invalid (e->tok, t);
3260
3261   t = (e->right->type != pe_unknown) ? e->right->type : pe_unknown;
3262   e->left->visit (this);
3263   t = (e->left->type != pe_unknown) ? e->left->type : pe_unknown;
3264   e->right->visit (this);
3265
3266   if (e->left->type != pe_unknown &&
3267       e->right->type != pe_unknown &&
3268       e->left->type != e->right->type)
3269     mismatch (e->tok, e->left->type, e->right->type);
3270
3271   if (e->type == pe_unknown)
3272     {
3273       e->type = pe_long;
3274       resolved (e->tok, e->type);
3275     }
3276 }
3277
3278
3279 void
3280 typeresolution_info::visit_concatenation (concatenation *e)
3281 {
3282   if (t != pe_unknown && t != pe_string)
3283     invalid (e->tok, t);
3284
3285   t = pe_string;
3286   e->left->visit (this);
3287   t = pe_string;
3288   e->right->visit (this);
3289
3290   if (e->type == pe_unknown)
3291     {
3292       e->type = pe_string;
3293       resolved (e->tok, e->type);
3294     }
3295 }
3296
3297
3298 void
3299 typeresolution_info::visit_assignment (assignment *e)
3300 {
3301   if (t == pe_stats)
3302     invalid (e->tok, t);
3303
3304   if (e->op == "<<<") // stats aggregation
3305     {
3306       if (t == pe_string)
3307         invalid (e->tok, t);
3308
3309       t = pe_stats;
3310       e->left->visit (this);
3311       t = pe_long;
3312       e->right->visit (this);
3313       if (e->type == pe_unknown ||
3314           e->type == pe_stats)
3315         {
3316           e->type = pe_long;
3317           resolved (e->tok, e->type);
3318         }
3319     }
3320
3321   else if (e->left->type == pe_stats)
3322     invalid (e->left->tok, e->left->type);
3323
3324   else if (e->right->type == pe_stats)
3325     invalid (e->right->tok, e->right->type);
3326
3327   else if (e->op == "+=" || // numeric only
3328            e->op == "-=" ||
3329            e->op == "*=" ||
3330            e->op == "/=" ||
3331            e->op == "%=" ||
3332            e->op == "&=" ||
3333            e->op == "^=" ||
3334            e->op == "|=" ||
3335            e->op == "<<=" ||
3336            e->op == ">>=" ||
3337            false)
3338     {
3339       visit_binary_expression (e);
3340     }
3341   else if (e->op == ".=" || // string only
3342            false)
3343     {
3344       if (t == pe_long || t == pe_stats)
3345         invalid (e->tok, t);
3346
3347       t = pe_string;
3348       e->left->visit (this);
3349       t = pe_string;
3350       e->right->visit (this);
3351       if (e->type == pe_unknown)
3352         {
3353           e->type = pe_string;
3354           resolved (e->tok, e->type);
3355         }
3356     }
3357   else if (e->op == "=") // overloaded = for string & numeric operands
3358     {
3359       // logic similar to ternary_expression
3360       exp_type sub_type = t;
3361
3362       // Infer types across the l/r values
3363       if (sub_type == pe_unknown && e->type != pe_unknown)
3364         sub_type = e->type;
3365
3366       t = (sub_type != pe_unknown) ? sub_type :
3367         (e->right->type != pe_unknown) ? e->right->type :
3368         pe_unknown;
3369       e->left->visit (this);
3370       t = (sub_type != pe_unknown) ? sub_type :
3371         (e->left->type != pe_unknown) ? e->left->type :
3372         pe_unknown;
3373       e->right->visit (this);
3374
3375       if ((sub_type != pe_unknown) && (e->type == pe_unknown))
3376         {
3377           e->type = sub_type;
3378           resolved (e->tok, e->type);
3379         }
3380       if ((sub_type == pe_unknown) && (e->left->type != pe_unknown))
3381         {
3382           e->type = e->left->type;
3383           resolved (e->tok, e->type);
3384         }
3385
3386       if (e->left->type != pe_unknown &&
3387           e->right->type != pe_unknown &&
3388           e->left->type != e->right->type)
3389         mismatch (e->tok, e->left->type, e->right->type);
3390
3391     }
3392   else
3393     throw semantic_error ("unsupported assignment operator " + e->op);
3394 }
3395
3396
3397 void
3398 typeresolution_info::visit_binary_expression (binary_expression* e)
3399 {
3400   if (t == pe_stats || t == pe_string)
3401     invalid (e->tok, t);
3402
3403   t = pe_long;
3404   e->left->visit (this);
3405   t = pe_long;
3406   e->right->visit (this);
3407
3408   if (e->left->type != pe_unknown &&
3409       e->right->type != pe_unknown &&
3410       e->left->type != e->right->type)
3411     mismatch (e->tok, e->left->type, e->right->type);
3412
3413   if (e->type == pe_unknown)
3414     {
3415       e->type = pe_long;
3416       resolved (e->tok, e->type);
3417     }
3418 }
3419
3420
3421 void
3422 typeresolution_info::visit_pre_crement (pre_crement *e)
3423 {
3424   visit_unary_expression (e);
3425 }
3426
3427
3428 void
3429 typeresolution_info::visit_post_crement (post_crement *e)
3430 {
3431   visit_unary_expression (e);
3432 }
3433
3434
3435 void
3436 typeresolution_info::visit_unary_expression (unary_expression* e)
3437 {
3438   if (t == pe_stats || t == pe_string)
3439     invalid (e->tok, t);
3440
3441   t = pe_long;
3442   e->operand->visit (this);
3443
3444   if (e->type == pe_unknown)
3445     {
3446       e->type = pe_long;
3447       resolved (e->tok, e->type);
3448     }
3449 }
3450
3451
3452 void
3453 typeresolution_info::visit_ternary_expression (ternary_expression* e)
3454 {
3455   exp_type sub_type = t;
3456
3457   t = pe_long;
3458   e->cond->visit (this);
3459
3460   // Infer types across the true/false arms of the ternary expression.
3461
3462   if (sub_type == pe_unknown && e->type != pe_unknown)
3463     sub_type = e->type;
3464   t = sub_type;
3465   e->truevalue->visit (this);
3466   t = sub_type;
3467   e->falsevalue->visit (this);
3468
3469   if ((sub_type == pe_unknown) && (e->type != pe_unknown))
3470     ; // already resolved
3471   else if ((sub_type != pe_unknown) && (e->type == pe_unknown))
3472     {
3473       e->type = sub_type;
3474       resolved (e->tok, e->type);
3475     }
3476   else if ((sub_type == pe_unknown) && (e->truevalue->type != pe_unknown))
3477     {
3478       e->type = e->truevalue->type;
3479       resolved (e->tok, e->type);
3480     }
3481   else if ((sub_type == pe_unknown) && (e->falsevalue->type != pe_unknown))
3482     {
3483       e->type = e->falsevalue->type;
3484       resolved (e->tok, e->type);
3485     }
3486   else if (e->type != sub_type)
3487     mismatch (e->tok, sub_type, e->type);
3488 }
3489
3490
3491 template <class Referrer, class Referent>
3492 void resolve_2types (Referrer* referrer, Referent* referent,
3493                     typeresolution_info* r, exp_type t, bool accept_unknown = false)
3494 {
3495   exp_type& re_type = referrer->type;
3496   const token* re_tok = referrer->tok;
3497   exp_type& te_type = referent->type;
3498   const token* te_tok = referent->tok;
3499
3500   if (t != pe_unknown && re_type == t && re_type == te_type)
3501     ; // do nothing: all three e->types in agreement
3502   else if (t == pe_unknown && re_type != pe_unknown && re_type == te_type)
3503     ; // do nothing: two known e->types in agreement
3504   else if (re_type != pe_unknown && te_type != pe_unknown && re_type != te_type)
3505     r->mismatch (re_tok, re_type, te_type);
3506   else if (re_type != pe_unknown && t != pe_unknown && re_type != t)
3507     r->mismatch (re_tok, re_type, t);
3508   else if (te_type != pe_unknown && t != pe_unknown && te_type != t)
3509     r->mismatch (te_tok, te_type, t);
3510   else if (re_type == pe_unknown && t != pe_unknown)
3511     {
3512       // propagate from upstream
3513       re_type = t;
3514       r->resolved (re_tok, re_type);
3515       // catch re_type/te_type mismatch later
3516     }
3517   else if (re_type == pe_unknown && te_type != pe_unknown)
3518     {
3519       // propagate from referent
3520       re_type = te_type;
3521       r->resolved (re_tok, re_type);
3522       // catch re_type/t mismatch later
3523     }
3524   else if (re_type != pe_unknown && te_type == pe_unknown)
3525     {
3526       // propagate to referent
3527       te_type = re_type;
3528       r->resolved (te_tok, te_type);
3529       // catch re_type/t mismatch later
3530     }
3531   else if (! accept_unknown)
3532     r->unresolved (re_tok);
3533 }
3534
3535
3536 void
3537 typeresolution_info::visit_symbol (symbol* e)
3538 {
3539   assert (e->referent != 0);
3540   resolve_2types (e, e->referent, this, t);
3541 }
3542
3543
3544 void
3545 typeresolution_info::visit_target_symbol (target_symbol* e)
3546 {
3547   if (!e->probe_context_var.empty())
3548     return;
3549
3550   // This occurs only if a target symbol was not resolved over in
3551   // tapset.cxx land, that error was properly suppressed, and the
3552   // later unused-expression-elimination pass didn't get rid of it
3553   // either.  So we have a target symbol that is believed to be of
3554   // genuine use, yet unresolved by the provider.
3555
3556   if (session.verbose > 2)
3557     {
3558       clog << "Resolution problem with ";
3559       if (current_function)
3560         {
3561           clog << "function " << current_function->name << endl;
3562           current_function->body->print (clog);
3563           clog << endl;
3564         }
3565       else if (current_probe)
3566         {
3567           clog << "probe " << current_probe->name << endl;
3568           current_probe->body->print (clog);
3569           clog << endl;
3570         }
3571       else
3572         clog << "other" << endl;
3573     }
3574
3575   if (e->saved_conversion_error)
3576     throw (* (e->saved_conversion_error));
3577   else
3578     throw semantic_error("unresolved target-symbol expression", e->tok);
3579 }
3580
3581
3582 void
3583 typeresolution_info::visit_cast_op (cast_op* e)
3584 {
3585   // Like target_symbol, a cast_op shouldn't survive this far
3586   // unless it was not resolved and its value is really needed.
3587   if (e->saved_conversion_error)
3588     throw (* (e->saved_conversion_error));
3589   else
3590     throw semantic_error("type definition '" + e->type + "' not found", e->tok);
3591 }
3592
3593
3594 void
3595 typeresolution_info::visit_arrayindex (arrayindex* e)
3596 {
3597
3598   symbol *array = NULL;
3599   hist_op *hist = NULL;
3600   classify_indexable(e->base, array, hist);
3601
3602   // Every hist_op has type [int]:int, that is to say, every hist_op
3603   // is a pseudo-one-dimensional integer array type indexed by
3604   // integers (bucket numbers).
3605
3606   if (hist)
3607     {
3608       if (e->indexes.size() != 1)
3609         unresolved (e->tok);
3610       t = pe_long;
3611       e->indexes[0]->visit (this);
3612       if (e->indexes[0]->type != pe_long)
3613         unresolved (e->tok);
3614       hist->visit (this);
3615       if (e->type != pe_long)
3616         {
3617           e->type = pe_long;
3618           resolved (e->tok, pe_long);
3619         }
3620       return;
3621     }
3622
3623   // Now we are left with "normal" map inference and index checking.
3624
3625   assert (array);
3626   assert (array->referent != 0);
3627   resolve_2types (e, array->referent, this, t);
3628
3629   // now resolve the array indexes
3630
3631   // if (e->referent->index_types.size() == 0)
3632   //   // redesignate referent as array
3633   //   e->referent->set_arity (e->indexes.size ());
3634
3635   if (e->indexes.size() != array->referent->index_types.size())
3636     unresolved (e->tok); // symbol resolution should prevent this
3637   else for (unsigned i=0; i<e->indexes.size(); i++)
3638     {
3639       expression* ee = e->indexes[i];
3640       exp_type& ft = array->referent->index_types [i];
3641       t = ft;
3642       ee->visit (this);
3643       exp_type at = ee->type;
3644
3645       if ((at == pe_string || at == pe_long) && ft == pe_unknown)
3646         {
3647           // propagate to formal type
3648           ft = at;
3649           resolved (array->referent->tok, ft);
3650           // uses array decl as there is no token for "formal type"
3651         }
3652       if (at == pe_stats)
3653         invalid (ee->tok, at);
3654       if (ft == pe_stats)
3655         invalid (ee->tok, ft);
3656       if (at != pe_unknown && ft != pe_unknown && ft != at)
3657         mismatch (e->tok, at, ft);
3658       if (at == pe_unknown)
3659           unresolved (ee->tok);
3660     }
3661 }
3662
3663
3664 void
3665 typeresolution_info::visit_functioncall (functioncall* e)
3666 {
3667   assert (e->referent != 0);
3668
3669   resolve_2types (e, e->referent, this, t, true); // accept unknown type
3670
3671   if (e->type == pe_stats)
3672     invalid (e->tok, e->type);
3673
3674   // now resolve the function parameters
3675   if (e->args.size() != e->referent->formal_args.size())
3676     unresolved (e->tok); // symbol resolution should prevent this
3677   else for (unsigned i=0; i<e->args.size(); i++)
3678     {
3679       expression* ee = e->args[i];
3680       exp_type& ft = e->referent->formal_args[i]->type;
3681       const token* fe_tok = e->referent->formal_args[i]->tok;
3682       t = ft;
3683       ee->visit (this);
3684       exp_type at = ee->type;
3685
3686       if (((at == pe_string) || (at == pe_long)) && ft == pe_unknown)
3687         {
3688           // propagate to formal arg
3689           ft = at;
3690           resolved (e->referent->formal_args[i]->tok, ft);
3691         }
3692       if (at == pe_stats)
3693         invalid (e->tok, at);
3694       if (ft == pe_stats)
3695         invalid (fe_tok, ft);
3696       if (at != pe_unknown && ft != pe_unknown && ft != at)
3697         mismatch (e->tok, at, ft);
3698       if (at == pe_unknown)
3699         unresolved (e->tok);
3700     }
3701 }
3702
3703
3704 void
3705 typeresolution_info::visit_block (block* e)
3706 {
3707   for (unsigned i=0; i<e->statements.size(); i++)
3708     {
3709       try
3710         {
3711           t = pe_unknown;
3712           e->statements[i]->visit (this);
3713         }
3714       catch (const semantic_error& e)
3715         {
3716           session.print_error (e);
3717         }
3718     }
3719 }
3720
3721
3722 void
3723 typeresolution_info::visit_embeddedcode (embeddedcode*)
3724 {
3725 }
3726
3727
3728 void
3729 typeresolution_info::visit_if_statement (if_statement* e)
3730 {
3731   t = pe_long;
3732   e->condition->visit (this);
3733
3734   t = pe_unknown;
3735   e->thenblock->visit (this);
3736
3737   if (e->elseblock)
3738     {
3739       t = pe_unknown;
3740       e->elseblock->visit (this);
3741     }
3742 }
3743
3744
3745 void
3746 typeresolution_info::visit_for_loop (for_loop* e)
3747 {
3748   t = pe_unknown;
3749   if (e->init) e->init->visit (this);
3750   t = pe_long;
3751   e->cond->visit (this);
3752   t = pe_unknown;
3753   if (e->incr) e->incr->visit (this);
3754   t = pe_unknown;
3755   e->block->visit (this);
3756 }
3757
3758
3759 void
3760 typeresolution_info::visit_foreach_loop (foreach_loop* e)
3761 {
3762   // See also visit_arrayindex.
3763   // This is different in that, being a statement, we can't assign
3764   // a type to the outer array, only propagate to/from the indexes
3765
3766   // if (e->referent->index_types.size() == 0)
3767   //   // redesignate referent as array
3768   //   e->referent->set_arity (e->indexes.size ());
3769
3770   symbol *array = NULL;
3771   hist_op *hist = NULL;
3772   classify_indexable(e->base, array, hist);
3773
3774   if (hist)
3775     {
3776       if (e->indexes.size() != 1)
3777         unresolved (e->tok);
3778       t = pe_long;
3779       e->indexes[0]->visit (this);
3780       if (e->indexes[0]->type != pe_long)
3781         unresolved (e->tok);
3782       hist->visit (this);
3783     }
3784   else
3785     {
3786       assert (array);
3787       if (e->indexes.size() != array->referent->index_types.size())
3788         unresolved (e->tok); // symbol resolution should prevent this
3789       else for (unsigned i=0; i<e->indexes.size(); i++)
3790         {
3791           expression* ee = e->indexes[i];
3792           exp_type& ft = array->referent->index_types [i];
3793           t = ft;
3794           ee->visit (this);
3795           exp_type at = ee->type;
3796
3797           if ((at == pe_string || at == pe_long) && ft == pe_unknown)
3798             {
3799               // propagate to formal type
3800               ft = at;
3801               resolved (array->referent->tok, ft);
3802               // uses array decl as there is no token for "formal type"
3803             }
3804           if (at == pe_stats)
3805             invalid (ee->tok, at);
3806           if (ft == pe_stats)
3807             invalid (ee->tok, ft);
3808           if (at != pe_unknown && ft != pe_unknown && ft != at)
3809             mismatch (e->tok, at, ft);
3810           if (at == pe_unknown)
3811             unresolved (ee->tok);
3812         }
3813     }
3814
3815   if (e->limit)
3816     {
3817       t = pe_long;
3818       e->limit->visit (this);
3819     }
3820
3821   t = pe_unknown;
3822   e->block->visit (this);
3823 }
3824
3825
3826 void
3827 typeresolution_info::visit_null_statement (null_statement*)
3828 {
3829 }
3830
3831
3832 void
3833 typeresolution_info::visit_expr_statement (expr_statement* e)
3834 {
3835   t = pe_unknown;
3836   e->value->visit (this);
3837 }
3838
3839
3840 struct delete_statement_typeresolution_info:
3841   public throwing_visitor
3842 {
3843   typeresolution_info *parent;
3844   delete_statement_typeresolution_info (typeresolution_info *p):
3845     throwing_visitor ("invalid operand of delete expression"),
3846     parent (p)
3847   {}
3848
3849   void visit_arrayindex (arrayindex* e)
3850   {
3851     parent->visit_arrayindex (e);
3852   }
3853
3854   void visit_symbol (symbol* e)
3855   {
3856     exp_type ignored = pe_unknown;
3857     assert (e->referent != 0);
3858     resolve_2types (e, e->referent, parent, ignored);
3859   }
3860 };
3861
3862
3863 void
3864 typeresolution_info::visit_delete_statement (delete_statement* e)
3865 {
3866   delete_statement_typeresolution_info di (this);
3867   t = pe_unknown;
3868   e->value->visit (&di);
3869 }
3870
3871
3872 void
3873 typeresolution_info::visit_next_statement (next_statement*)
3874 {
3875 }
3876
3877
3878 void
3879 typeresolution_info::visit_break_statement (break_statement*)
3880 {
3881 }
3882
3883
3884 void
3885 typeresolution_info::visit_continue_statement (continue_statement*)
3886 {
3887 }
3888
3889
3890 void
3891 typeresolution_info::visit_array_in (array_in* e)
3892 {
3893   // all unary operators only work on numerics
3894   exp_type t1 = t;
3895   t = pe_unknown; // array value can be anything
3896   e->operand->visit (this);
3897
3898   if (t1 == pe_unknown && e->type != pe_unknown)
3899     ; // already resolved
3900   else if (t1 == pe_string || t1 == pe_stats)
3901     mismatch (e->tok, t1, pe_long);
3902   else if (e->type == pe_unknown)
3903     {
3904       e->type = pe_long;
3905       resolved (e->tok, e->type);
3906     }
3907 }
3908
3909
3910 void
3911 typeresolution_info::visit_return_statement (return_statement* e)
3912 {
3913   // This is like symbol, where the referent is
3914   // the return value of the function.
3915
3916   // translation pass will print error
3917   if (current_function == 0)
3918     return;
3919
3920   exp_type& e_type = current_function->type;
3921   t = current_function->type;
3922   e->value->visit (this);
3923
3924   if (e_type != pe_unknown && e->value->type != pe_unknown
3925       && e_type != e->value->type)
3926     mismatch (current_function->tok, e_type, e->value->type);
3927   if (e_type == pe_unknown &&
3928       (e->value->type == pe_long || e->value->type == pe_string))
3929     {
3930       // propagate non-statistics from value
3931       e_type = e->value->type;
3932       resolved (current_function->tok, e->value->type);
3933     }
3934   if (e->value->type == pe_stats)
3935     invalid (e->value->tok, e->value->type);
3936 }
3937
3938 void
3939 typeresolution_info::visit_print_format (print_format* e)
3940 {
3941   size_t unresolved_args = 0;
3942
3943   if (e->hist)
3944     {
3945       e->hist->visit(this);
3946     }
3947
3948   else if (e->print_with_format)
3949     {
3950       // If there's a format string, we can do both inference *and*
3951       // checking.
3952
3953       // First we extract the subsequence of formatting components
3954       // which are conversions (not just literal string components)
3955
3956       unsigned expected_num_args = 0;
3957       std::vector<print_format::format_component> components;
3958       for (size_t i = 0; i < e->components.size(); ++i)
3959         {
3960           if (e->components[i].type == print_format::conv_unspecified)
3961             throw semantic_error ("Unspecified conversion in print operator format string",
3962                                   e->tok);
3963           else if (e->components[i].type == print_format::conv_literal)
3964             continue;
3965           components.push_back(e->components[i]);
3966           ++expected_num_args;
3967           if (e->components[i].widthtype == print_format::width_dynamic)
3968             ++expected_num_args;
3969           if (e->components[i].prectype == print_format::prec_dynamic)
3970             ++expected_num_args;
3971         }
3972
3973       // Then we check that the number of conversions and the number
3974       // of args agree.
3975
3976       if (expected_num_args != e->args.size())
3977         throw semantic_error ("Wrong number of args to formatted print operator",
3978                               e->tok);
3979
3980       // Then we check that the types of the conversions match the types
3981       // of the args.
3982       unsigned argno = 0;
3983       for (size_t i = 0; i < components.size(); ++i)
3984         {
3985           // Check the dynamic width, if specified
3986           if (components[i].widthtype == print_format::width_dynamic)
3987             {
3988               check_arg_type (pe_long, e->args[argno]);
3989               ++argno;
3990             }
3991
3992           // Check the dynamic precision, if specified
3993           if (components[i].prectype == print_format::prec_dynamic)
3994             {
3995               check_arg_type (pe_long, e->args[argno]);
3996               ++argno;
3997             }
3998
3999           exp_type wanted = pe_unknown;
4000
4001           switch (components[i].type)
4002             {
4003             case print_format::conv_unspecified:
4004             case print_format::conv_literal:
4005               assert (false);
4006               break;
4007
4008             case print_format::conv_signed_decimal:
4009             case print_format::conv_unsigned_decimal:
4010             case print_format::conv_unsigned_octal:
4011             case print_format::conv_unsigned_ptr:
4012             case print_format::conv_unsigned_uppercase_hex:
4013             case print_format::conv_unsigned_lowercase_hex:
4014             case print_format::conv_binary:
4015             case print_format::conv_char:
4016             case print_format::conv_memory:
4017             case print_format::conv_memory_hex:
4018               wanted = pe_long;
4019               break;
4020
4021             case print_format::conv_string:
4022               wanted = pe_string;
4023               break;
4024             }
4025
4026           assert (wanted != pe_unknown);
4027           check_arg_type (wanted, e->args[argno]);
4028           ++argno;
4029         }
4030     }
4031   else
4032     {
4033       // Without a format string, the best we can do is require that
4034       // each argument resolve to a concrete type.
4035       for (size_t i = 0; i < e->args.size(); ++i)
4036         {
4037           t = pe_unknown;
4038           e->args[i]->visit (this);
4039           if (e->args[i]->type == pe_unknown)
4040             {
4041               unresolved (e->args[i]->tok);
4042               ++unresolved_args;
4043             }
4044         }
4045     }
4046
4047   if (unresolved_args == 0)
4048     {
4049       if (e->type == pe_unknown)
4050         {
4051           if (e->print_to_stream)
4052             e->type = pe_long;
4053           else
4054             e->type = pe_string;
4055           resolved (e->tok, e->type);
4056         }
4057     }
4058   else
4059     {
4060       e->type = pe_unknown;
4061       unresolved (e->tok);
4062     }
4063 }
4064
4065
4066 void
4067 typeresolution_info::visit_stat_op (stat_op* e)
4068 {
4069   t = pe_stats;
4070   e->stat->visit (this);
4071   if (e->type == pe_unknown)
4072     {
4073       e->type = pe_long;
4074       resolved (e->tok, e->type);
4075     }
4076   else if (e->type != pe_long)
4077     mismatch (e->tok, e->type, pe_long);
4078 }
4079
4080 void
4081 typeresolution_info::visit_hist_op (hist_op* e)
4082 {
4083   t = pe_stats;
4084   e->stat->visit (this);
4085 }
4086
4087
4088 void
4089 typeresolution_info::check_arg_type (exp_type wanted, expression* arg)
4090 {
4091   t = wanted;
4092   arg->visit (this);
4093
4094   if (arg->type == pe_unknown)
4095     {
4096       arg->type = wanted;
4097       resolved (arg->tok, wanted);
4098     }
4099   else if (arg->type != wanted)
4100     {
4101       mismatch (arg->tok, arg->type, wanted);
4102     }
4103 }
4104
4105
4106 void
4107 typeresolution_info::unresolved (const token* tok)
4108 {
4109   num_still_unresolved ++;
4110
4111   if (assert_resolvability)
4112     {
4113       stringstream msg;
4114       string nm = (current_function ? current_function->name :
4115                    current_probe ? current_probe->name :
4116                    "probe condition");
4117       msg << nm + " with unresolved type";
4118       session.print_error (semantic_error (msg.str(), tok));
4119     }
4120 }
4121
4122
4123 void
4124 typeresolution_info::invalid (const token* tok, exp_type pe)
4125 {
4126   num_still_unresolved ++;
4127
4128   if (assert_resolvability)
4129     {
4130       stringstream msg;
4131       string nm = (current_function ? current_function->name :
4132                    current_probe ? current_probe->name :
4133                    "probe condition");
4134       if (tok && tok->type == tok_operator)
4135         msg << nm + " uses invalid operator";
4136       else
4137         msg << nm + " with invalid type " << pe;
4138       session.print_error (semantic_error (msg.str(), tok));
4139     }
4140 }
4141
4142
4143 void
4144 typeresolution_info::mismatch (const token* tok, exp_type t1, exp_type t2)
4145 {
4146   bool tok_resolved = false;
4147   size_t i;
4148   semantic_error* err1 = 0;
4149   num_still_unresolved ++;
4150
4151   //BZ 9719: for improving type mismatch messages, a semantic error is
4152   //generated with the token where type was first resolved. All such
4153   //resolved tokens, stored in a vector, are matched against their
4154   //content. If an error for the matching token hasn't been printed out
4155   //already, it is and the token pushed in another printed_toks vector
4156
4157   if (assert_resolvability)
4158     {
4159       stringstream msg;
4160       for (i=0; i<resolved_toks.size(); i++)
4161         {
4162           if (resolved_toks[i]->content == tok->content)
4163             {
4164               tok_resolved = true;
4165               break;
4166             }
4167         }
4168       if (!tok_resolved)
4169         {
4170           string nm = (current_function ? current_function->name :
4171                        current_probe ? current_probe->name :
4172                        "probe condition");
4173           msg << nm + " with type mismatch (" << t1 << " vs. " << t2 << ")";
4174         }
4175       else
4176         {
4177           bool tok_printed = false;
4178           for (size_t j=0; j<printed_toks.size(); j++)
4179             {
4180               if (printed_toks[j] == resolved_toks[i])
4181                 {
4182                   tok_printed = true;
4183                   break;
4184                 }
4185             }
4186           string nm = (current_function ? current_function->name :
4187                        current_probe ? current_probe->name :
4188                        "probe condition");
4189           msg << nm + " with type mismatch (" << t1 << " vs. " << t2 << ")";
4190           if (!tok_printed)
4191             {
4192               //error for possible mismatch in the earlier resolved token
4193               printed_toks.push_back (resolved_toks[i]);
4194               stringstream type_msg;
4195               type_msg << nm + " type first inferred here (" << t2 << ")";
4196               err1 = new semantic_error (type_msg.str(), resolved_toks[i]);
4197             }
4198         }
4199       semantic_error err (msg.str(), tok);
4200       err.chain = err1;
4201       session.print_error (err);
4202     }
4203 }
4204
4205
4206 void
4207 typeresolution_info::resolved (const token* tok, exp_type)
4208 {
4209   resolved_toks.push_back (tok);
4210   num_newly_resolved ++;
4211 }
4212
4213 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */