elaborate.cxx

   1 // elaboration functions
   2 // Copyright (C) 2005-2009 Red Hat Inc.
   3 // Copyright (C) 2008 Intel Corporation
   4 //
   5 // This file is part of systemtap, and is free software.  You can
   6 // redistribute it and/or modify it under the terms of the GNU General
   7 // Public License (GPL); either version 2, or (at your option) any
   8 // later version.
   9
  10 #include "config.h"
  11 #include "elaborate.h"
  12 #include "parse.h"
  13 #include "tapsets.h"
  14 #include "session.h"
  15 #include "util.h"
  16
  17 extern "C" {
  18 #include <sys/utsname.h>
  19 #include <fnmatch.h>
  20 }
  21
  22 #include <algorithm>
  23 #include <fstream>
  24 #include <map>
  25 #include <cassert>
  26 #include <set>
  27 #include <vector>
  28 #include <algorithm>
  29 #include <iterator>
  30
  31
  32 using namespace std;
  33
  34
  35 // ------------------------------------------------------------------------
  36
  37 // Used in probe_point condition construction.  Either argument may be
  38 // NULL; if both, return NULL too.  Resulting expression is a deep
  39 // copy for symbol resolution purposes.
  40 expression* add_condition (expression* a, expression* b)
  41 {
  42   if (!a && !b) return 0;
  43   if (! a) return deep_copy_visitor::deep_copy(b);
  44   if (! b) return deep_copy_visitor::deep_copy(a);
  45   logical_and_expr la;
  46   la.op = "&&";
  47   la.left = a;
  48   la.right = b;
  49   la.tok = a->tok; // or could be b->tok
  50   return deep_copy_visitor::deep_copy(& la);
  51 }
  52
  53 // ------------------------------------------------------------------------
  54
  55
  56
  57 derived_probe::derived_probe (probe *p):
  58   base (p)
  59 {
  60   assert (p);
  61   this->locations = p->locations;
  62   this->tok = p->tok;
  63   this->privileged = p->privileged;
  64   this->body = deep_copy_visitor::deep_copy(p->body);
  65 }
  66
  67
  68 derived_probe::derived_probe (probe *p, probe_point *l):
  69   base (p)
  70 {
  71   assert (p);
  72   this->tok = p->tok;
  73   this->privileged = p->privileged;
  74   this->body = deep_copy_visitor::deep_copy(p->body);
  75
  76   assert (l);
  77   this->locations.push_back (l);
  78 }
  79
  80
  81 void
  82 derived_probe::printsig (ostream& o) const
  83 {
  84   probe::printsig (o);
  85   printsig_nested (o);
  86 }
  87
  88 void
  89 derived_probe::printsig_nested (ostream& o) const
  90 {
  91   // We'd like to enclose the probe derivation chain in a /* */
  92   // comment delimiter.  But just printing /* base->printsig() */ is
  93   // not enough, since base might itself be a derived_probe.  So we,
  94   // er, "cleverly" encode our nesting state as a formatting flag for
  95   // the ostream.
  96   ios::fmtflags f = o.flags (ios::internal);
  97   if (f & ios::internal)
  98     {
  99       // already nested
 100       o << " <- ";
 101       base->printsig (o);
 102     }
 103   else
 104     {
 105       // outermost nesting
 106       o << " /* <- ";
 107       base->printsig (o);
 108       o << " */";
 109     }
 110   // restore flags
 111   (void) o.flags (f);
 112 }
 113
 114
 115 void
 116 derived_probe::collect_derivation_chain (std::vector<probe*> &probes_list)
 117 {
 118   probes_list.push_back(this);
 119   base->collect_derivation_chain(probes_list);
 120 }
 121
 122
 123 probe_point*
 124 derived_probe::sole_location () const
 125 {
 126   if (locations.size() == 0)
 127     throw semantic_error ("derived_probe with no locations", this->tok);
 128   else if (locations.size() > 1)
 129     throw semantic_error ("derived_probe with too many locations", this->tok);
 130   else
 131     return locations[0];
 132 }
 133
 134
 135
 136 // ------------------------------------------------------------------------
 137 // Members of derived_probe_builder
 138
 139 bool
 140 derived_probe_builder::get_param (std::map<std::string, literal*> const & params,
 141                                   const std::string& key,
 142                                   std::string& value)
 143 {
 144   map<string, literal *>::const_iterator i = params.find (key);
 145   if (i == params.end())
 146     return false;
 147   literal_string * ls = dynamic_cast<literal_string *>(i->second);
 148   if (!ls)
 149     return false;
 150   value = ls->value;
 151   return true;
 152 }
 153
 154
 155 bool
 156 derived_probe_builder::get_param (std::map<std::string, literal*> const & params,
 157                                   const std::string& key,
 158                                   int64_t& value)
 159 {
 160   map<string, literal *>::const_iterator i = params.find (key);
 161   if (i == params.end())
 162     return false;
 163   if (i->second == NULL)
 164     return false;
 165   literal_number * ln = dynamic_cast<literal_number *>(i->second);
 166   if (!ln)
 167     return false;
 168   value = ln->value;
 169   return true;
 170 }
 171
 172
 173 bool
 174 derived_probe_builder::has_null_param (std::map<std::string, literal*> const & params,
 175                                        const std::string& key)
 176 {
 177   map<string, literal *>::const_iterator i = params.find(key);
 178   return (i != params.end() && i->second == NULL);
 179 }
 180
 181
 182
 183 // ------------------------------------------------------------------------
 184 // Members of match_key.
 185
 186 match_key::match_key(string const & n)
 187   : name(n),
 188     have_parameter(false),
 189     parameter_type(pe_unknown)
 190 {
 191 }
 192
 193 match_key::match_key(probe_point::component const & c)
 194   : name(c.functor),
 195     have_parameter(c.arg != NULL),
 196     parameter_type(c.arg ? c.arg->type : pe_unknown)
 197 {
 198 }
 199
 200 match_key &
 201 match_key::with_number()
 202 {
 203   have_parameter = true;
 204   parameter_type = pe_long;
 205   return *this;
 206 }
 207
 208 match_key &
 209 match_key::with_string()
 210 {
 211   have_parameter = true;
 212   parameter_type = pe_string;
 213   return *this;
 214 }
 215
 216 string
 217 match_key::str() const
 218 {
 219   if (have_parameter)
 220     switch (parameter_type)
 221       {
 222       case pe_string: return name + "(string)";
 223       case pe_long: return name + "(number)";
 224       default: return name + "(...)";
 225       }
 226   return name;
 227 }
 228
 229 bool
 230 match_key::operator<(match_key const & other) const
 231 {
 232   return ((name < other.name)
 233
 234           || (name == other.name
 235               && have_parameter < other.have_parameter)
 236
 237           || (name == other.name
 238               && have_parameter == other.have_parameter
 239               && parameter_type < other.parameter_type));
 240 }
 241
 242 static bool
 243 isglob(string const & str)
 244 {
 245   return(str.find('*') != str.npos);
 246 }
 247
 248 bool
 249 match_key::globmatch(match_key const & other) const
 250 {
 251   const char *other_str = other.name.c_str();
 252   const char *name_str = name.c_str();
 253
 254   return ((fnmatch(name_str, other_str, FNM_NOESCAPE) == 0)
 255           && have_parameter == other.have_parameter
 256           && parameter_type == other.parameter_type);
 257 }
 258
 259 // ------------------------------------------------------------------------
 260 // Members of match_node
 261 // ------------------------------------------------------------------------
 262
 263 match_node::match_node()
 264   : end(NULL), unprivileged_ok (false)
 265 {
 266 }
 267
 268 match_node *
 269 match_node::bind(match_key const & k)
 270 {
 271   if (k.name == "*")
 272     throw semantic_error("invalid use of wildcard probe point component");
 273
 274   map<match_key, match_node *>::const_iterator i = sub.find(k);
 275   if (i != sub.end())
 276     return i->second;
 277   match_node * n = new match_node();
 278   sub.insert(make_pair(k, n));
 279   return n;
 280 }
 281
 282 void
 283 match_node::bind(derived_probe_builder * e)
 284 {
 285   if (end)
 286     throw semantic_error("duplicate probe point pattern");
 287   end = e;
 288 }
 289
 290 match_node *
 291 match_node::bind(string const & k)
 292 {
 293   return bind(match_key(k));
 294 }
 295
 296 match_node *
 297 match_node::bind_str(string const & k)
 298 {
 299   return bind(match_key(k).with_string());
 300 }
 301
 302 match_node *
 303 match_node::bind_num(string const & k)
 304 {
 305   return bind(match_key(k).with_number());
 306 }
 307
 308 match_node*
 309 match_node::allow_unprivileged (bool b)
 310 {
 311   unprivileged_ok = b;
 312   return this;
 313 }
 314
 315 bool
 316 match_node::unprivileged_allowed () const
 317 {
 318   return unprivileged_ok;
 319 }
 320
 321 void
 322 match_node::find_and_build (systemtap_session& s,
 323                             probe* p, probe_point *loc, unsigned pos,
 324                             vector<derived_probe *>& results)
 325 {
 326   assert (pos <= loc->components.size());
 327   if (pos == loc->components.size()) // matched all probe point components so far
 328     {
 329       derived_probe_builder *b = end; // may be 0 if only nested names are bound
 330
 331       if (! b)
 332         {
 333           string alternatives;
 334           for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 335             alternatives += string(" ") + i->first.str();
 336
 337           throw semantic_error (string("probe point truncated at position ") +
 338                                 lex_cast<string> (pos) +
 339                                 " (follow:" + alternatives + ")", loc->tok);
 340         }
 341
 342       map<string, literal *> param_map;
 343       for (unsigned i=0; i<pos; i++)
 344         param_map[loc->components[i]->functor] = loc->components[i]->arg;
 345       // maybe 0
 346
 347       // Are we compiling for unprivileged users?  */
 348       if (s.unprivileged)
 349         {
 350           // Is this probe point ok for unprivileged users?
 351           if (! unprivileged_allowed ())
 352             throw semantic_error (string("probe point is not allowed for unprivileged users"));
 353         }
 354
 355       b->build (s, p, loc, param_map, results);
 356     }
 357   else if (isglob(loc->components[pos]->functor)) // wildcard?
 358     {
 359       match_key match (* loc->components[pos]);
 360
 361       // Call find_and_build for each possible match.  Ignore errors -
 362       // unless we don't find any match.
 363       unsigned int num_results = results.size();
 364       for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 365         {
 366           const match_key& subkey = i->first;
 367           match_node* subnode = i->second;
 368
 369           if (pending_interrupts) break;
 370
 371           if (match.globmatch(subkey))
 372             {
 373               if (s.verbose > 2)
 374                 clog << "wildcard '" << loc->components[pos]->functor
 375                      << "' matched '" << subkey.name << "'" << endl;
 376
 377               // When we have a wildcard, we need to create a copy of
 378               // the probe point.  Then we'll create a copy of the
 379               // wildcard component, and substitute the non-wildcard
 380               // functor.
 381               probe_point *non_wildcard_pp = new probe_point(*loc);
 382               probe_point::component *non_wildcard_component
 383                 = new probe_point::component(*loc->components[pos]);
 384               non_wildcard_component->functor = subkey.name;
 385               non_wildcard_pp->components[pos] = non_wildcard_component;
 386
 387               // NB: probe conditions are not attached at the wildcard
 388               // (component/functor) level, but at the overall
 389               // probe_point level.
 390
 391               // recurse (with the non-wildcard probe point)
 392               try
 393                 {
 394                   subnode->find_and_build (s, p, non_wildcard_pp, pos+1,
 395                                            results);
 396                 }
 397               catch (const semantic_error& e)
 398                 {
 399                   // Ignore semantic_errors while expanding wildcards.
 400                   // If we get done and nothing was expanded, the code
 401                   // following the loop will complain.
 402
 403                   // If this wildcard didn't match, cleanup.
 404                   delete non_wildcard_pp;
 405                   delete non_wildcard_component;
 406                 }
 407             }
 408         }
 409       if (! loc->optional && num_results == results.size())
 410         {
 411           // We didn't find any wildcard matches (since the size of
 412           // the result vector didn't change).  Throw an error.
 413           string alternatives;
 414           for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 415             alternatives += string(" ") + i->first.str();
 416
 417           throw semantic_error(string("probe point mismatch at position ") +
 418                                lex_cast<string> (pos) +
 419                                " (alternatives:" + alternatives + ")" +
 420                                " didn't find any wildcard matches",
 421                                loc->tok);
 422         }
 423     }
 424   else
 425     {
 426       match_key match (* loc->components[pos]);
 427       sub_map_iterator_t i = sub.find (match);
 428       if (i == sub.end()) // no match
 429         {
 430           string alternatives;
 431           for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 432             alternatives += string(" ") + i->first.str();
 433
 434           throw semantic_error (string("probe point mismatch at position ") +
 435                                 lex_cast<string> (pos) +
 436                                 " (alternatives:" + alternatives + ")",
 437                                 loc->tok);
 438         }
 439
 440       match_node* subnode = i->second;
 441       // recurse
 442       subnode->find_and_build (s, p, loc, pos+1, results);
 443     }
 444 }
 445
 446
 447 void
 448 match_node::build_no_more (systemtap_session& s)
 449 {
 450   for (sub_map_iterator_t i = sub.begin(); i != sub.end(); i++)
 451     i->second->build_no_more (s);
 452   if (end) end->build_no_more (s);
 453 }
 454
 455
 456 // ------------------------------------------------------------------------
 457 // Alias probes
 458 // ------------------------------------------------------------------------
 459
 460 struct alias_derived_probe: public derived_probe
 461 {
 462   alias_derived_probe (probe* base, probe_point *l, const probe_alias *a):
 463     derived_probe (base, l), alias(a) {}
 464
 465   void upchuck () { throw semantic_error ("inappropriate", this->tok); }
 466
 467   // Alias probes are immediately expanded to other derived_probe
 468   // types, and are not themselves emitted or listed in
 469   // systemtap_session.probes
 470
 471   void join_group (systemtap_session&) { upchuck (); }
 472
 473   virtual const probe_alias *get_alias () const { return alias; }
 474
 475 private:
 476   const probe_alias *alias; // Used to check for recursion
 477 };
 478
 479
 480 struct
 481 alias_expansion_builder
 482   : public derived_probe_builder
 483 {
 484   probe_alias * alias;
 485
 486   alias_expansion_builder(probe_alias * a)
 487     : alias(a)
 488   {}
 489
 490   virtual void build(systemtap_session & sess,
 491                      probe * use,
 492                      probe_point * location,
 493                      std::map<std::string, literal *> const &,
 494                      vector<derived_probe *> & finished_results)
 495   {
 496     // Don't build the alias expansion if infinite recursion is detected.
 497     if (checkForRecursiveExpansion (use)) {
 498       stringstream msg;
 499       msg << "Recursive loop in alias expansion of " << *location  << " at " << location->tok->location;
 500       // semantic_errors thrown here are ignored.
 501       sess.print_error (semantic_error (msg.str()));
 502       return;
 503     }
 504
 505     // We're going to build a new probe and wrap it up in an
 506     // alias_expansion_probe so that the expansion loop recognizes it as
 507     // such and re-expands its expansion.
 508
 509     alias_derived_probe * n = new alias_derived_probe (use, location /* soon overwritten */, this->alias);
 510     n->body = new block();
 511
 512     // The new probe gets a deep copy of the location list of
 513     // the alias (with incoming condition joined)
 514     n->locations.clear();
 515     for (unsigned i=0; i<alias->locations.size(); i++)
 516       {
 517         probe_point *pp = new probe_point(*alias->locations[i]);
 518         pp->condition = add_condition (pp->condition, location->condition);
 519         n->locations.push_back(pp);
 520       }
 521
 522     // the token location of the alias,
 523     n->tok = location->tok;
 524
 525     // and statements representing the concatenation of the alias'
 526     // body with the use's.
 527     //
 528     // NB: locals are *not* copied forward, from either alias or
 529     // use. The expansion should have its locals re-inferred since
 530     // there's concatenated code here and we only want one vardecl per
 531     // resulting variable.
 532
 533     if (alias->epilogue_style)
 534       n->body = new block (use->body, alias->body);
 535     else
 536       n->body = new block (alias->body, use->body);
 537
 538     derive_probes (sess, n, finished_results, location->optional);
 539   }
 540
 541   bool checkForRecursiveExpansion (probe *use)
 542   {
 543     // Collect the derivation chain of this probe.
 544     vector<probe*>derivations;
 545     use->collect_derivation_chain (derivations);
 546
 547     // Check all probe points in the alias expansion against the currently-being-expanded probe point
 548     // of each of the probes in the derivation chain, looking for a match. This
 549     // indicates infinite recursion.
 550     // The first element of the derivation chain will be the derived_probe representing 'use', so
 551     // start the search with the second element.
 552     assert (derivations.size() > 0);
 553     assert (derivations[0] == use);
 554     for (unsigned d = 1; d < derivations.size(); ++d) {
 555       if (use->get_alias() == derivations[d]->get_alias())
 556         return true; // recursion detected
 557     }
 558     return false;
 559   }
 560 };
 561
 562
 563 // ------------------------------------------------------------------------
 564 // Pattern matching
 565 // ------------------------------------------------------------------------
 566
 567
 568 // Register all the aliases we've seen in library files, and the user
 569 // file, as patterns.
 570
 571 void
 572 systemtap_session::register_library_aliases()
 573 {
 574   vector<stapfile*> files(library_files);
 575   files.push_back(user_file);
 576
 577   for (unsigned f = 0; f < files.size(); ++f)
 578     {
 579       stapfile * file = files[f];
 580       for (unsigned a = 0; a < file->aliases.size(); ++a)
 581         {
 582           probe_alias * alias = file->aliases[a];
 583           try
 584             {
 585               for (unsigned n = 0; n < alias->alias_names.size(); ++n)
 586                 {
 587                   probe_point * name = alias->alias_names[n];
 588                   match_node * n = pattern_root;
 589                   for (unsigned c = 0; c < name->components.size(); ++c)
 590                     {
 591                       probe_point::component * comp = name->components[c];
 592                       // XXX: alias parameters
 593                       if (comp->arg)
 594                         throw semantic_error("alias component "
 595                                              + comp->functor
 596                                              + " contains illegal parameter");
 597                       n = n->bind(comp->functor);
 598                     }
 599                   n->bind(new alias_expansion_builder(alias));
 600                 }
 601             }
 602           catch (const semantic_error& e)
 603             {
 604               semantic_error* er = new semantic_error (e); // copy it
 605               stringstream msg;
 606               msg << e.msg2;
 607               msg << " while registering probe alias ";
 608               alias->printsig(msg);
 609               er->msg2 = msg.str();
 610               print_error (* er);
 611               delete er;
 612             }
 613         }
 614     }
 615 }
 616
 617
 618 static unsigned max_recursion = 100;
 619
 620 struct
 621 recursion_guard
 622 {
 623   unsigned & i;
 624   recursion_guard(unsigned & i) : i(i)
 625     {
 626       if (i > max_recursion)
 627         throw semantic_error("recursion limit reached");
 628       ++i;
 629     }
 630   ~recursion_guard()
 631     {
 632       --i;
 633     }
 634 };
 635
 636 // The match-and-expand loop.
 637 void
 638 derive_probes (systemtap_session& s,
 639                probe *p, vector<derived_probe*>& dps,
 640                bool optional)
 641 {
 642   for (unsigned i = 0; i < p->locations.size(); ++i)
 643     {
 644       if (pending_interrupts) break;
 645
 646       probe_point *loc = p->locations[i];
 647
 648       try
 649         {
 650           unsigned num_atbegin = dps.size();
 651
 652           // Pass down optional flag from e.g. alias reference to each
 653           // probe_point instance.  We do this by temporarily overriding
 654           // the probe_point optional flag.  We could instead deep-copy
 655           // and set a flag on the copy permanently.
 656           bool old_loc_opt = loc->optional;
 657           loc->optional = loc->optional || optional;
 658           try
 659             {
 660               s.pattern_root->find_and_build (s, p, loc, 0, dps); // <-- actual derivation!
 661             }
 662           catch (const semantic_error& e)
 663             {
 664               if (!loc->optional)
 665                 throw semantic_error(e);
 666               else /* tolerate failure for optional probe */
 667                 continue;
 668             }
 669
 670           loc->optional = old_loc_opt;
 671           unsigned num_atend = dps.size();
 672
 673           if (! (loc->optional||optional) && // something required, but
 674               num_atbegin == num_atend) // nothing new derived!
 675             throw semantic_error ("no match");
 676
 677           if (loc->sufficient && (num_atend > num_atbegin))
 678             {
 679               if (s.verbose > 1)
 680                 {
 681                   clog << "Probe point ";
 682                   p->locations[i]->print(clog);
 683                   clog << " sufficient, skipped";
 684                   for (unsigned j = i+1; j < p->locations.size(); ++j)
 685                     {
 686                       clog << " ";
 687                       p->locations[j]->print(clog);
 688                     }
 689                   clog << endl;
 690                 }
 691               break; // we need not try to derive for any other locations
 692             }
 693         }
 694       catch (const semantic_error& e)
 695         {
 696           // XXX: prefer not to print_error at every nest/unroll level
 697
 698           semantic_error* er = new semantic_error (e); // copy it
 699           stringstream msg;
 700           msg << e.msg2;
 701           msg << " while resolving probe point " << *loc;
 702           er->msg2 = msg.str();
 703           s.print_error (* er);
 704           delete er;
 705         }
 706
 707     }
 708 }
 709
 710
 711
 712 // ------------------------------------------------------------------------
 713 //
 714 // Indexable usage checks
 715 //
 716
 717 struct symbol_fetcher
 718   : public throwing_visitor
 719 {
 720   symbol *&sym;
 721
 722   symbol_fetcher (symbol *&sym): sym(sym)
 723   {}
 724
 725   void visit_symbol (symbol* e)
 726   {
 727     sym = e;
 728   }
 729
 730   void visit_target_symbol (target_symbol* e)
 731   {
 732     sym = e;
 733   }
 734
 735   void visit_arrayindex (arrayindex* e)
 736   {
 737     e->base->visit_indexable (this);
 738   }
 739
 740   void visit_cast_op (cast_op* e)
 741   {
 742     sym = e;
 743   }
 744
 745   void throwone (const token* t)
 746   {
 747     throw semantic_error ("Expecting symbol or array index expression", t);
 748   }
 749 };
 750
 751 symbol *
 752 get_symbol_within_expression (expression *e)
 753 {
 754   symbol *sym = NULL;
 755   symbol_fetcher fetcher(sym);
 756   e->visit (&fetcher);
 757   return sym; // NB: may be null!
 758 }
 759
 760 static symbol *
 761 get_symbol_within_indexable (indexable *ix)
 762 {
 763   symbol *array = NULL;
 764   hist_op *hist = NULL;
 765   classify_indexable(ix, array, hist);
 766   if (array)
 767     return array;
 768   else
 769     return get_symbol_within_expression (hist->stat);
 770 }
 771
 772 struct mutated_var_collector
 773   : public traversing_visitor
 774 {
 775   set<vardecl *> * mutated_vars;
 776
 777   mutated_var_collector (set<vardecl *> * mm)
 778     : mutated_vars (mm)
 779   {}
 780
 781   void visit_assignment(assignment* e)
 782   {
 783     if (e->type == pe_stats && e->op == "<<<")
 784       {
 785         vardecl *vd = get_symbol_within_expression (e->left)->referent;
 786         if (vd)
 787           mutated_vars->insert (vd);
 788       }
 789     traversing_visitor::visit_assignment(e);
 790   }
 791
 792   void visit_arrayindex (arrayindex *e)
 793   {
 794     if (is_active_lvalue (e))
 795       {
 796         symbol *sym;
 797         if (e->base->is_symbol (sym))
 798           mutated_vars->insert (sym->referent);
 799         else
 800           throw semantic_error("Assignment to read-only histogram bucket", e->tok);
 801       }
 802     traversing_visitor::visit_arrayindex (e);
 803   }
 804 };
 805
 806
 807 struct no_var_mutation_during_iteration_check
 808   : public traversing_visitor
 809 {
 810   systemtap_session & session;
 811   map<functiondecl *,set<vardecl *> *> & function_mutates_vars;
 812   vector<vardecl *> vars_being_iterated;
 813
 814   no_var_mutation_during_iteration_check
 815   (systemtap_session & sess,
 816    map<functiondecl *,set<vardecl *> *> & fmv)
 817     : session(sess), function_mutates_vars (fmv)
 818   {}
 819
 820   void visit_arrayindex (arrayindex *e)
 821   {
 822     if (is_active_lvalue(e))
 823       {
 824         vardecl *vd = get_symbol_within_indexable (e->base)->referent;
 825         if (vd)
 826           {
 827             for (unsigned i = 0; i < vars_being_iterated.size(); ++i)
 828               {
 829                 vardecl *v = vars_being_iterated[i];
 830                 if (v == vd)
 831                   {
 832                     string err = ("variable '" + v->name +
 833                                   "' modified during 'foreach' iteration");
 834                     session.print_error (semantic_error (err, e->tok));
 835                   }
 836               }
 837           }
 838       }
 839     traversing_visitor::visit_arrayindex (e);
 840   }
 841
 842   void visit_functioncall (functioncall* e)
 843   {
 844     map<functiondecl *,set<vardecl *> *>::const_iterator i
 845       = function_mutates_vars.find (e->referent);
 846
 847     if (i != function_mutates_vars.end())
 848       {
 849         for (unsigned j = 0; j < vars_being_iterated.size(); ++j)
 850           {
 851             vardecl *m = vars_being_iterated[j];
 852             if (i->second->find (m) != i->second->end())
 853               {
 854                 string err = ("function call modifies var '" + m->name +
 855                               "' during 'foreach' iteration");
 856                 session.print_error (semantic_error (err, e->tok));
 857               }
 858           }
 859       }
 860
 861     traversing_visitor::visit_functioncall (e);
 862   }
 863
 864   void visit_foreach_loop(foreach_loop* s)
 865   {
 866     vardecl *vd = get_symbol_within_indexable (s->base)->referent;
 867
 868     if (vd)
 869       vars_being_iterated.push_back (vd);
 870
 871     traversing_visitor::visit_foreach_loop (s);
 872
 873     if (vd)
 874       vars_being_iterated.pop_back();
 875   }
 876 };
 877
 878
 879 // ------------------------------------------------------------------------
 880
 881 struct stat_decl_collector
 882   : public traversing_visitor
 883 {
 884   systemtap_session & session;
 885
 886   stat_decl_collector(systemtap_session & sess)
 887     : session(sess)
 888   {}
 889
 890   void visit_stat_op (stat_op* e)
 891   {
 892     symbol *sym = get_symbol_within_expression (e->stat);
 893     if (session.stat_decls.find(sym->name) == session.stat_decls.end())
 894       session.stat_decls[sym->name] = statistic_decl();
 895   }
 896
 897   void visit_assignment (assignment* e)
 898   {
 899     if (e->op == "<<<")
 900       {
 901         symbol *sym = get_symbol_within_expression (e->left);
 902         if (session.stat_decls.find(sym->name) == session.stat_decls.end())
 903           session.stat_decls[sym->name] = statistic_decl();
 904       }
 905     else
 906       traversing_visitor::visit_assignment(e);
 907   }
 908
 909   void visit_hist_op (hist_op* e)
 910   {
 911     symbol *sym = get_symbol_within_expression (e->stat);
 912     statistic_decl new_stat;
 913
 914     if (e->htype == hist_linear)
 915       {
 916         new_stat.type = statistic_decl::linear;
 917         assert (e->params.size() == 3);
 918         new_stat.linear_low = e->params[0];
 919         new_stat.linear_high = e->params[1];
 920         new_stat.linear_step = e->params[2];
 921       }
 922     else
 923       {
 924         assert (e->htype == hist_log);
 925         new_stat.type = statistic_decl::logarithmic;
 926         assert (e->params.size() == 0);
 927       }
 928
 929     map<string, statistic_decl>::iterator i = session.stat_decls.find(sym->name);
 930     if (i == session.stat_decls.end())
 931       session.stat_decls[sym->name] = new_stat;
 932     else
 933       {
 934         statistic_decl & old_stat = i->second;
 935         if (!(old_stat == new_stat))
 936           {
 937             if (old_stat.type == statistic_decl::none)
 938               i->second = new_stat;
 939             else
 940               {
 941                 // FIXME: Support multiple co-declared histogram types
 942                 semantic_error se("multiple histogram types declared on '" + sym->name + "'",
 943                                   e->tok);
 944                 session.print_error (se);
 945               }
 946           }
 947       }
 948   }
 949
 950 };
 951
 952 static int
 953 semantic_pass_stats (systemtap_session & sess)
 954 {
 955   stat_decl_collector sdc(sess);
 956
 957   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
 958     it->second->body->visit (&sdc);
 959
 960   for (unsigned i = 0; i < sess.probes.size(); ++i)
 961     sess.probes[i]->body->visit (&sdc);
 962
 963   for (unsigned i = 0; i < sess.globals.size(); ++i)
 964     {
 965       vardecl *v = sess.globals[i];
 966       if (v->type == pe_stats)
 967         {
 968
 969           if (sess.stat_decls.find(v->name) == sess.stat_decls.end())
 970             {
 971               semantic_error se("unable to infer statistic parameters for global '" + v->name + "'");
 972               sess.print_error (se);
 973             }
 974         }
 975     }
 976
 977   return sess.num_errors();
 978 }
 979
 980 // ------------------------------------------------------------------------
 981
 982 // Enforce variable-related invariants: no modification of
 983 // a foreach()-iterated array.
 984 static int
 985 semantic_pass_vars (systemtap_session & sess)
 986 {
 987
 988   map<functiondecl *, set<vardecl *> *> fmv;
 989   no_var_mutation_during_iteration_check chk(sess, fmv);
 990
 991   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
 992     {
 993       functiondecl * fn = it->second;
 994       if (fn->body)
 995         {
 996           set<vardecl *> * m = new set<vardecl *>();
 997           mutated_var_collector mc (m);
 998           fn->body->visit (&mc);
 999           fmv[fn] = m;
1000         }
1001     }
1002
1003   for (map<string,functiondecl*>::iterator it = sess.functions.begin(); it != sess.functions.end(); it++)
1004     {
1005       functiondecl * fn = it->second;
1006       if (fn->body) fn->body->visit (&chk);
1007     }
1008
1009   for (unsigned i = 0; i < sess.probes.size(); ++i)
1010     {
1011       if (sess.probes[i]->body)
1012         sess.probes[i]->body->visit (&chk);
1013     }
1014
1015   return sess.num_errors();
1016 }
1017
1018
1019 // ------------------------------------------------------------------------
1020
1021 // Rewrite probe condition expressions into probe bodies.  Tricky and
1022 // exciting business, this.  This:
1023 //
1024 // probe foo if (g1 || g2) { ... }
1025 // probe bar { ... g1 ++ ... }
1026 //
1027 // becomes:
1028 //
1029 // probe begin(MAX) { if (! (g1 || g2)) %{ disable_probe_foo %} }
1030 // probe foo { if (! (g1 || g2)) next; ... }
1031 // probe bar { ... g1 ++ ...;
1032 //             if (g1 || g2) %{ enable_probe_foo %} else %{ disable_probe_foo %}
1033 //           }
1034 //
1035 // XXX: As a first cut, do only the "inline probe condition" part of the
1036 // transform.
1037
1038 static int
1039 semantic_pass_conditions (systemtap_session & sess)
1040 {
1041   for (unsigned i = 0; i < sess.probes.size(); ++i)
1042     {
1043       derived_probe* p = sess.probes[i];
1044       expression* e = p->sole_location()->condition;
1045       if (e)
1046         {
1047           varuse_collecting_visitor vut(sess);
1048           e->visit (& vut);
1049
1050           if (! vut.written.empty())
1051             {
1052               string err = ("probe condition must not modify any variables");
1053               sess.print_error (semantic_error (err, e->tok));
1054             }
1055           else if (vut.embedded_seen)
1056             {
1057               sess.print_error (semantic_error ("probe condition must not include impure embedded-C", e->tok));
1058             }
1059
1060           // Add the condition expression to the front of the
1061           // derived_probe body.
1062           if_statement *ifs = new if_statement ();
1063           ifs->tok = e->tok;
1064           ifs->thenblock = new next_statement ();
1065           ifs->thenblock->tok = e->tok;
1066           ifs->elseblock = NULL;
1067           unary_expression *notex = new unary_expression ();
1068           notex->op = "!";
1069           notex->tok = e->tok;
1070           notex->operand = e;
1071           ifs->condition = notex;
1072           p->body = new block (ifs, p->body);
1073         }
1074     }
1075
1076   return sess.num_errors();
1077 }
1078
1079
1080 // ------------------------------------------------------------------------
1081
1082
1083 static int semantic_pass_symbols (systemtap_session&);
1084 static int semantic_pass_optimize1 (systemtap_session&);
1085 static int semantic_pass_optimize2 (systemtap_session&);
1086 static int semantic_pass_types (systemtap_session&);
1087 static int semantic_pass_vars (systemtap_session&);
1088 static int semantic_pass_stats (systemtap_session&);
1089 static int semantic_pass_conditions (systemtap_session&);
1090
1091
1092 // Link up symbols to their declarations.  Set the session's
1093 // files/probes/functions/globals vectors from the transitively
1094 // reached set of stapfiles in s.library_files, starting from
1095 // s.user_file.  Perform automatic tapset inclusion and probe
1096 // alias expansion.
1097 static int
1098 semantic_pass_symbols (systemtap_session& s)
1099 {
1100   symresolution_info sym (s);
1101
1102   // NB: s.files can grow during this iteration, so size() can
1103   // return gradually increasing numbers.
1104   s.files.push_back (s.user_file);
1105   for (unsigned i = 0; i < s.files.size(); i++)
1106     {
1107       if (pending_interrupts) break;
1108       stapfile* dome = s.files[i];
1109
1110       // Pass 1: add globals and functions to systemtap-session master list,
1111       //         so the find_* functions find them
1112
1113       for (unsigned i=0; i<dome->globals.size(); i++)
1114         s.globals.push_back (dome->globals[i]);
1115
1116       for (unsigned i=0; i<dome->functions.size(); i++)
1117         s.functions[dome->functions[i]->name] = dome->functions[i];
1118
1119       for (unsigned i=0; i<dome->embeds.size(); i++)
1120         s.embeds.push_back (dome->embeds[i]);
1121
1122       // Pass 2: process functions
1123
1124       for (unsigned i=0; i<dome->functions.size(); i++)
1125         {
1126           if (pending_interrupts) break;
1127           functiondecl* fd = dome->functions[i];
1128
1129           try
1130             {
1131               for (unsigned j=0; j<s.code_filters.size(); j++)
1132                 s.code_filters[j]->replace (fd->body);
1133
1134               sym.current_function = fd;
1135               sym.current_probe = 0;
1136               fd->body->visit (& sym);
1137             }
1138           catch (const semantic_error& e)
1139             {
1140               s.print_error (e);
1141             }
1142         }
1143
1144       // Pass 3: derive probes and resolve any further symbols in the
1145       // derived results.
1146
1147       for (unsigned i=0; i<dome->probes.size(); i++)
1148         {
1149           if (pending_interrupts) break;
1150           probe* p = dome->probes [i];
1151           vector<derived_probe*> dps;
1152
1153           // much magic happens here: probe alias expansion, wildcard
1154           // matching, low-level derived_probe construction.
1155           derive_probes (s, p, dps);
1156
1157           for (unsigned j=0; j<dps.size(); j++)
1158             {
1159               if (pending_interrupts) break;
1160               derived_probe* dp = dps[j];
1161               s.probes.push_back (dp);
1162               dp->join_group (s);
1163
1164               try
1165                 {
1166                   for (unsigned k=0; k<s.code_filters.size(); k++)
1167                     s.code_filters[k]->replace (dp->body);
1168
1169                   sym.current_function = 0;
1170                   sym.current_probe = dp;
1171                   dp->body->visit (& sym);
1172
1173                   // Process the probe-point condition expression.
1174                   sym.current_function = 0;
1175                   sym.current_probe = 0;
1176                   if (dp->sole_location()->condition)
1177                     dp->sole_location()->condition->visit (& sym);
1178                 }
1179               catch (const semantic_error& e)
1180                 {
1181                   s.print_error (e);
1182                 }
1183             }
1184         }
1185     }
1186
1187   // Inform all derived_probe builders that we're done with
1188   // all resolution, so it's time to release caches.
1189   s.pattern_root->build_no_more (s);
1190
1191   return s.num_errors(); // all those print_error calls
1192 }
1193
1194
1195 // Keep unread global variables for probe end value display.
1196 void add_global_var_display (systemtap_session& s)
1197 {
1198   // Don't generate synthetic end probes when in listings mode;
1199   // it would clutter up the list of probe points with "end ...".
1200   if (s.listing_mode) return;
1201
1202   varuse_collecting_visitor vut(s);
1203   for (unsigned i=0; i<s.probes.size(); i++)
1204     {
1205       s.probes[i]->body->visit (& vut);
1206
1207       if (s.probes[i]->sole_location()->condition)
1208         s.probes[i]->sole_location()->condition->visit (& vut);
1209     }
1210
1211   for (unsigned g=0; g < s.globals.size(); g++)
1212     {
1213       vardecl* l = s.globals[g];
1214       if (vut.read.find (l) != vut.read.end()
1215           || vut.written.find (l) == vut.written.end())
1216         continue;
1217
1218       // Don't generate synthetic end probes for unread globals
1219       // declared only within tapsets. (RHBZ 468139), but rather
1220       // only within the end-user script.
1221
1222       bool tapset_global = false;
1223       for (size_t m=0; m < s.library_files.size(); m++)
1224         {
1225           for (size_t n=0; n < s.library_files[m]->globals.size(); n++)
1226             {
1227               if (l->name == s.library_files[m]->globals[n]->name)
1228                 {tapset_global = true; break;}
1229             }
1230         }
1231       if (tapset_global)
1232         continue;
1233
1234       print_format* pf = new print_format;
1235       probe* p = new probe;
1236       probe_point* pl = new probe_point;
1237       probe_point::component* c = new probe_point::component("end");
1238       token* print_tok = new token;
1239       vector<derived_probe*> dps;
1240       block *b = new block;
1241
1242       pl->components.push_back (c);
1243       p->tok = l->tok;
1244       p->locations.push_back (pl);
1245       print_tok->type = tok_identifier;
1246       print_tok->content = "printf";
1247
1248       // Create a symbol
1249       symbol* g_sym = new symbol;
1250       g_sym->name = l->name;
1251       g_sym->tok = l->tok;
1252       g_sym->type = l->type;
1253       g_sym->referent = l;
1254
1255       pf->print_to_stream = true;
1256       pf->print_with_format = true;
1257       pf->print_with_delim = false;
1258       pf->print_with_newline = false;
1259       pf->print_char = false;
1260       pf->raw_components += l->name;
1261       pf->tok = print_tok;
1262
1263       if (l->index_types.size() == 0) // Scalar
1264         {
1265           if (l->type == pe_stats)
1266             pf->raw_components += " @count=%#x @min=%#x @max=%#x @sum=%#x @avg=%#x\\n";
1267           else if (l->type == pe_string)
1268             pf->raw_components += "=\"%#s\"\\n";
1269           else
1270             pf->raw_components += "=%#x\\n";
1271           pf->components = print_format::string_to_components(pf->raw_components);
1272           expr_statement* feb = new expr_statement;
1273           feb->value = pf;
1274           feb->tok = print_tok;
1275           if (l->type == pe_stats)
1276             {
1277               struct stat_op* so [5];
1278               const stat_component_type stypes[] = {sc_count, sc_min, sc_max, sc_sum, sc_average};
1279
1280               for (unsigned si = 0;
1281                    si < (sizeof(so)/sizeof(struct stat_op*));
1282                    si++)
1283                 {
1284                   so[si]= new stat_op;
1285                   so[si]->ctype = stypes[si];
1286                   so[si]->type = pe_long;
1287                   so[si]->stat = g_sym;
1288                   so[si]->tok = l->tok;
1289                   pf->args.push_back(so[si]);
1290                 }
1291             }
1292           else
1293             pf->args.push_back(g_sym);
1294
1295           /* PR7053: Checking empty aggregate for global variable */
1296           if (l->type == pe_stats) {
1297               stat_op *so= new stat_op;
1298               so->ctype = sc_count;
1299               so->type = pe_long;
1300               so->stat = g_sym;
1301               so->tok = l->tok;
1302               comparison *be = new comparison;
1303               be->op = ">";
1304               be->tok = l->tok;
1305               be->left = so;
1306               be->right = new literal_number(0);
1307
1308               /* Create printf @count=0x0 in else block */
1309               print_format* pf_0 = new print_format;
1310               pf_0->print_to_stream = true;
1311               pf_0->print_with_format = true;
1312               pf_0->print_with_delim = false;
1313               pf_0->print_with_newline = false;
1314               pf_0->print_char = false;
1315               pf_0->raw_components += l->name;
1316               pf_0->raw_components += " @count=0x0\\n";
1317               pf_0->tok = print_tok;
1318               pf_0->components = print_format::string_to_components(pf_0->raw_components);
1319               expr_statement* feb_else = new expr_statement;
1320               feb_else->value = pf_0;
1321               feb_else->tok = print_tok;
1322               if_statement *ifs = new if_statement;
1323               ifs->tok = l->tok;
1324               ifs->condition = be;
1325               ifs->thenblock = feb ;
1326               ifs->elseblock = feb_else;
1327               b->statements.push_back(ifs);
1328             }
1329           else /* other non-stat cases */
1330             b->statements.push_back(feb);
1331         }
1332       else                      // Array
1333         {
1334           int idx_count = l->index_types.size();
1335           symbol* idx_sym[idx_count];
1336           vardecl* idx_v[idx_count];
1337           // Create a foreach loop
1338           foreach_loop* fe = new foreach_loop;
1339           fe->sort_direction = -1; // imply decreasing sort on value
1340           fe->sort_column = 0;     // as in   foreach ([a,b,c] in array-) { }
1341           fe->limit = NULL;
1342
1343           // Create indices for the foreach loop
1344           for (int i=0; i < idx_count; i++)
1345             {
1346               char *idx_name;
1347               if (asprintf (&idx_name, "idx%d", i) < 0)
1348                 return;
1349               idx_sym[i] = new symbol;
1350               idx_sym[i]->name = idx_name;
1351               idx_sym[i]->tok = l->tok;
1352               idx_v[i] = new vardecl;
1353               idx_v[i]->name = idx_name;
1354               idx_v[i]->type = l->index_types[i];
1355               idx_v[i]->tok = l->tok;
1356               idx_sym[i]->referent = idx_v[i];
1357               fe->indexes.push_back (idx_sym[i]);
1358             }
1359
1360           // Create a printf for the foreach loop
1361           pf->raw_components += "[";
1362           for (int i=0; i < idx_count; i++)
1363             {
1364               if (i > 0)
1365                 pf->raw_components += ",";
1366               if (l->index_types[i] == pe_string)
1367                 pf->raw_components += "\"%#s\"";
1368               else
1369                 pf->raw_components += "%#d";
1370             }
1371           pf->raw_components += "]";
1372           if (l->type == pe_stats)
1373             pf->raw_components += " @count=%#x @min=%#x @max=%#x @sum=%#x @avg=%#x\\n";
1374           else if (l->type == pe_string)
1375             pf->raw_components += "=\"%#s\"\\n";
1376           else
1377             pf->raw_components += "=%#x\\n";
1378
1379           // Create an index for the array
1380           struct arrayindex* ai = new arrayindex;
1381           ai->tok = l->tok;
1382           ai->base = g_sym;
1383
1384           for (int i=0; i < idx_count; i++)
1385             {
1386               ai->indexes.push_back (idx_sym[i]);
1387               pf->args.push_back(idx_sym[i]);
1388             }
1389           if (l->type == pe_stats)
1390             {
1391               struct stat_op* so [5];
1392               const stat_component_type stypes[] = {sc_count, sc_min, sc_max, sc_sum, sc_average};
1393
1394               ai->type = pe_stats;
1395               for (unsigned si = 0;
1396                    si < (sizeof(so)/sizeof(struct stat_op*));
1397                    si++)
1398                 {
1399                   so[si]= new stat_op;
1400                   so[si]->ctype = stypes[si];
1401                   so[si]->type = pe_long;
1402                   so[si]->stat = ai;
1403                   so[si]->tok = l->tok;
1404                   pf->args.push_back(so[si]);
1405                 }
1406             }
1407           else
1408             pf->args.push_back(ai);
1409
1410           pf->components = print_format::string_to_components(pf->raw_components);
1411           expr_statement* feb = new expr_statement;
1412           feb->value = pf;
1413           fe->base = g_sym;
1414           fe->block = (statement*)feb;
1415           b->statements.push_back(fe);
1416         }
1417
1418       // Add created probe
1419       p->body = b;
1420       derive_probes (s, p, dps);
1421       for (unsigned i = 0; i < dps.size(); i++)
1422         {
1423           derived_probe* dp = dps[i];
1424           s.probes.push_back (dp);
1425           dp->join_group (s);
1426         }
1427       // Repopulate symbol and type info
1428       symresolution_info sym (s);
1429       sym.current_function = 0;
1430       sym.current_probe = dps[0];
1431       dps[0]->body->visit (& sym);
1432
1433       semantic_pass_types(s);
1434       // Mark that variable is read
1435       vut.read.insert (l);
1436     }
1437 }
1438
1439 int
1440 semantic_pass (systemtap_session& s)
1441 {
1442   int rc = 0;
1443
1444   try
1445     {
1446       s.register_library_aliases();
1447       register_standard_tapsets(s);
1448
1449       if (rc == 0) rc = semantic_pass_symbols (s);
1450       if (rc == 0) rc = semantic_pass_conditions (s);
1451       if (rc == 0 && ! s.unoptimized) rc = semantic_pass_optimize1 (s);
1452       if (rc == 0) rc = semantic_pass_types (s);
1453       if (rc == 0) add_global_var_display (s);
1454       if (rc == 0 && ! s.unoptimized) rc = semantic_pass_optimize2 (s);
1455       if (rc == 0) rc = semantic_pass_vars (s);
1456       if (rc == 0) rc = semantic_pass_stats (s);
1457
1458       if (s.probes.size() == 0 && !s.listing_mode)
1459         throw semantic_error ("no probes found");
1460     }
1461   catch (const semantic_error& e)
1462     {
1463       s.print_error (e);
1464       rc ++;
1465     }
1466
1467   return rc;
1468 }
1469
1470
1471 // ------------------------------------------------------------------------
1472
1473
1474 systemtap_session::systemtap_session ():
1475   // NB: pointer members must be manually initialized!
1476   pattern_root(new match_node),
1477   user_file (0),
1478   be_derived_probes(0),
1479   dwarf_derived_probes(0),
1480   kprobe_derived_probes(0),
1481   uprobe_derived_probes(0),
1482   utrace_derived_probes(0),
1483   itrace_derived_probes(0),
1484   task_finder_derived_probes(0),
1485   timer_derived_probes(0),
1486   profile_derived_probes(0),
1487   mark_derived_probes(0),
1488   tracepoint_derived_probes(0),
1489   hrtimer_derived_probes(0),
1490   perfmon_derived_probes(0),
1491   procfs_derived_probes(0),
1492   op (0), up (0),
1493   sym_kprobes_text_start (0),
1494   sym_kprobes_text_end (0),
1495   sym_stext (0),
1496   module_cache (0),
1497   last_token (0)
1498 {
1499 }
1500
1501
1502 // Print this given token, but abbreviate it if the last one had the
1503 // same file name.
1504 void
1505 systemtap_session::print_token (ostream& o, const token* tok)
1506 {
1507   assert (tok);
1508
1509   if (last_token && last_token->location.file == tok->location.file)
1510     {
1511       stringstream tmpo;
1512       tmpo << *tok;
1513       string ts = tmpo.str();
1514       // search & replace the file name with nothing
1515       size_t idx = ts.find (tok->location.file->name);
1516       if (idx != string::npos)
1517           ts.replace (idx, tok->location.file->name.size(), "");
1518
1519       o << ts;
1520     }
1521   else
1522     o << *tok;
1523
1524   last_token = tok;
1525 }
1526
1527
1528
1529 void
1530 systemtap_session::print_error (const semantic_error& e)
1531 {
1532   string message_str[2];
1533   string align_semantic_error ("        ");
1534
1535   // We generate two messages.  The second one ([1]) is printed
1536   // without token compression, for purposes of duplicate elimination.
1537   // This way, the same message that may be generated once with a
1538   // compressed and once with an uncompressed token still only gets
1539   // printed once.
1540   for (int i=0; i<2; i++)
1541     {
1542       stringstream message;
1543
1544       message << "semantic error: " << e.what ();
1545       if (e.tok1 || e.tok2)
1546         message << ": ";
1547       if (e.tok1)
1548         {
1549           if (i == 0) print_token (message, e.tok1);
1550           else message << *e.tok1;
1551         }
1552       message << e.msg2;
1553       if (e.tok2)
1554         {
1555           if (i == 0) print_token (message, e.tok2);
1556           else message << *e.tok2;
1557         }
1558       message << endl;
1559       message_str[i] = message.str();
1560     }
1561
1562   // Duplicate elimination
1563   if (seen_errors.find (message_str[1]) == seen_errors.end())
1564     {
1565       seen_errors.insert (message_str[1]);
1566       cerr << message_str[0];
1567
1568       if (e.tok1)
1569         print_error_source (cerr, align_semantic_error, e.tok1);
1570
1571       if (e.tok2)
1572         print_error_source (cerr, align_semantic_error, e.tok2);
1573     }
1574
1575   if (e.chain)
1576     print_error (* e.chain);
1577 }
1578
1579 void
1580 systemtap_session::print_error_source (std::ostream& message,
1581                                        std::string& align, const token* tok)
1582 {
1583   unsigned i = 0;
1584
1585   assert (tok);
1586   if (!tok->location.file)
1587     //No source to print, silently exit
1588     return;
1589
1590   unsigned line = tok->location.line;
1591   unsigned col = tok->location.column;
1592   const string &file_contents = tok->location.file->file_contents;
1593
1594   size_t start_pos = 0, end_pos = 0;
1595   //Navigate to the appropriate line
1596   while (i != line && end_pos != std::string::npos)
1597     {
1598       start_pos = end_pos;
1599       end_pos = file_contents.find ('\n', start_pos) + 1;
1600       i++;
1601     }
1602   message << align << "source: " << file_contents.substr (start_pos, end_pos-start_pos-1) << endl;
1603   message << align << "        ";
1604   //Navigate to the appropriate column
1605   for (i=start_pos; i<start_pos+col-1; i++)
1606     {
1607       if(isspace(file_contents[i]))
1608         message << file_contents[i];
1609       else
1610         message << ' ';
1611     }
1612   message << "^" << endl;
1613 }
1614
1615 void
1616 systemtap_session::print_warning (const string& message_str, const token* tok)
1617 {
1618   // Duplicate elimination
1619   string align_warning (" ");
1620   if (seen_warnings.find (message_str) == seen_warnings.end())
1621     {
1622       seen_warnings.insert (message_str);
1623       clog << "WARNING: " << message_str;
1624       if (tok) { clog << ": "; print_token (clog, tok); }
1625       clog << endl;
1626       if (tok) { print_error_source (clog, align_warning, tok); }
1627     }
1628 }
1629
1630
1631 // ------------------------------------------------------------------------
1632 // semantic processing: symbol resolution
1633
1634
1635 symresolution_info::symresolution_info (systemtap_session& s):
1636   session (s), current_function (0), current_probe (0)
1637 {
1638 }
1639
1640
1641 void
1642 symresolution_info::visit_block (block* e)
1643 {
1644   for (unsigned i=0; i<e->statements.size(); i++)
1645     {
1646       try
1647         {
1648           e->statements[i]->visit (this);
1649         }
1650       catch (const semantic_error& e)
1651         {
1652           session.print_error (e);
1653         }
1654     }
1655 }
1656
1657
1658 void
1659 symresolution_info::visit_foreach_loop (foreach_loop* e)
1660 {
1661   for (unsigned i=0; i<e->indexes.size(); i++)
1662     e->indexes[i]->visit (this);
1663
1664   symbol *array = NULL;
1665   hist_op *hist = NULL;
1666   classify_indexable (e->base, array, hist);
1667
1668   if (array)
1669     {
1670       if (!array->referent)
1671         {
1672           vardecl* d = find_var (array->name, e->indexes.size ());
1673           if (d)
1674             array->referent = d;
1675           else
1676             {
1677               stringstream msg;
1678               msg << "unresolved arity-" << e->indexes.size()
1679                   << " global array " << array->name;
1680               throw semantic_error (msg.str(), e->tok);
1681             }
1682         }
1683     }
1684   else
1685     {
1686       assert (hist);
1687       hist->visit (this);
1688     }
1689
1690   if (e->limit)
1691     e->limit->visit (this);
1692
1693   e->block->visit (this);
1694 }
1695
1696
1697 struct
1698 delete_statement_symresolution_info:
1699   public traversing_visitor
1700 {
1701   symresolution_info *parent;
1702
1703   delete_statement_symresolution_info (symresolution_info *p):
1704     parent(p)
1705   {}
1706
1707   void visit_arrayindex (arrayindex* e)
1708   {
1709     parent->visit_arrayindex (e);
1710   }
1711   void visit_functioncall (functioncall* e)
1712   {
1713     parent->visit_functioncall (e);
1714   }
1715
1716   void visit_symbol (symbol* e)
1717   {
1718     if (e->referent)
1719       return;
1720
1721     vardecl* d = parent->find_var (e->name, -1);
1722     if (d)
1723       e->referent = d;
1724     else
1725       throw semantic_error ("unresolved array in delete statement", e->tok);
1726   }
1727 };
1728
1729 void
1730 symresolution_info::visit_delete_statement (delete_statement* s)
1731 {
1732   delete_statement_symresolution_info di (this);
1733   s->value->visit (&di);
1734 }
1735
1736
1737 void
1738 symresolution_info::visit_symbol (symbol* e)
1739 {
1740   if (e->referent)
1741     return;
1742
1743   vardecl* d = find_var (e->name, 0);
1744   if (d)
1745     e->referent = d;
1746   else
1747     {
1748       // new local
1749       vardecl* v = new vardecl;
1750       v->name = e->name;
1751       v->tok = e->tok;
1752       if (current_function)
1753         current_function->locals.push_back (v);
1754       else if (current_probe)
1755         current_probe->locals.push_back (v);
1756       else
1757         // must be probe-condition expression
1758         throw semantic_error ("probe condition must not reference undeclared global", e->tok);
1759       e->referent = v;
1760     }
1761 }
1762
1763
1764 void
1765 symresolution_info::visit_arrayindex (arrayindex* e)
1766 {
1767   for (unsigned i=0; i<e->indexes.size(); i++)
1768     e->indexes[i]->visit (this);
1769
1770   symbol *array = NULL;
1771   hist_op *hist = NULL;
1772   classify_indexable(e->base, array, hist);
1773
1774   if (array)
1775     {
1776       if (array->referent)
1777         return;
1778
1779       vardecl* d = find_var (array->name, e->indexes.size ());
1780       if (d)
1781         array->referent = d;
1782       else
1783         {
1784           // new local
1785           vardecl* v = new vardecl;
1786           v->set_arity(e->indexes.size());
1787           v->name = array->name;
1788           v->tok = array->tok;
1789           if (current_function)
1790             current_function->locals.push_back (v);
1791           else if (current_probe)
1792             current_probe->locals.push_back (v);
1793           else
1794             // must not happen
1795             throw semantic_error ("no current probe/function", e->tok);
1796           array->referent = v;
1797         }
1798     }
1799   else
1800     {
1801       assert (hist);
1802       hist->visit (this);
1803     }
1804 }
1805
1806
1807 void
1808 symresolution_info::visit_functioncall (functioncall* e)
1809 {
1810   // XXX: we could relax this, if we're going to examine the
1811   // vartracking data recursively.  See testsuite/semko/fortytwo.stp.
1812   if (! (current_function || current_probe))
1813     {
1814       // must be probe-condition expression
1815       throw semantic_error ("probe condition must not reference function", e->tok);
1816     }
1817
1818   for (unsigned i=0; i<e->args.size(); i++)
1819     e->args[i]->visit (this);
1820
1821   if (e->referent)
1822     return;
1823
1824   functiondecl* d = find_function (e->function, e->args.size ());
1825   if (d)
1826     e->referent = d;
1827   else
1828     {
1829       stringstream msg;
1830       msg << "unresolved arity-" << e->args.size()
1831           << " function";
1832       throw semantic_error (msg.str(), e->tok);
1833     }
1834 }
1835
1836
1837 vardecl*
1838 symresolution_info::find_var (const string& name, int arity)
1839 {
1840   if (current_function || current_probe)
1841     {
1842       // search locals
1843       vector<vardecl*>& locals = (current_function ?
1844                                   current_function->locals :
1845                                   current_probe->locals);
1846
1847
1848       for (unsigned i=0; i<locals.size(); i++)
1849         if (locals[i]->name == name
1850             && locals[i]->compatible_arity(arity))
1851           {
1852             locals[i]->set_arity (arity);
1853             return locals[i];
1854           }
1855     }
1856
1857   // search function formal parameters (for scalars)
1858   if (arity == 0 && current_function)
1859     for (unsigned i=0; i<current_function->formal_args.size(); i++)
1860       if (current_function->formal_args[i]->name == name)
1861         {
1862           // NB: no need to check arity here: formal args always scalar
1863           current_function->formal_args[i]->set_arity (0);
1864           return current_function->formal_args[i];
1865         }
1866
1867   // search processed globals
1868   for (unsigned i=0; i<session.globals.size(); i++)
1869     if (session.globals[i]->name == name
1870         && session.globals[i]->compatible_arity(arity))
1871       {
1872         session.globals[i]->set_arity (arity);
1873         return session.globals[i];
1874       }
1875
1876   // search library globals
1877   for (unsigned i=0; i<session.library_files.size(); i++)
1878     {
1879       stapfile* f = session.library_files[i];
1880       for (unsigned j=0; j<f->globals.size(); j++)
1881         {
1882           vardecl* g = f->globals[j];
1883           if (g->name == name && g->compatible_arity (arity))
1884             {
1885               g->set_arity (arity);
1886
1887               // put library into the queue if not already there
1888               if (find (session.files.begin(), session.files.end(), f)
1889                   == session.files.end())
1890                 session.files.push_back (f);
1891
1892               return g;
1893             }
1894         }
1895     }
1896
1897   return 0;
1898 }
1899
1900
1901 functiondecl*
1902 symresolution_info::find_function (const string& name, unsigned arity)
1903 {
1904   // the common path
1905   if (session.functions.find(name) != session.functions.end())
1906     {
1907       functiondecl* fd = session.functions[name];
1908       assert (fd->name == name);
1909       if (fd->formal_args.size() == arity)
1910         return fd;
1911     }
1912
1913   // search library globals
1914   for (unsigned i=0; i<session.library_files.size(); i++)
1915     {
1916       stapfile* f = session.library_files[i];
1917       for (unsigned j=0; j<f->functions.size(); j++)
1918         if (f->functions[j]->name == name &&
1919             f->functions[j]->formal_args.size() == arity)
1920           {
1921             // put library into the queue if not already there
1922             if (0) // session.verbose_resolution
1923               cerr << "      function " << name << " "
1924                    << "is defined from " << f->name << endl;
1925
1926             if (find (session.files.begin(), session.files.end(), f)
1927                 == session.files.end())
1928               session.files.push_back (f);
1929             // else .. print different message?
1930
1931             return f->functions[j];
1932           }
1933     }
1934
1935   return 0;
1936 }
1937
1938
1939
1940 // ------------------------------------------------------------------------
1941 // optimization
1942
1943
1944 // Do away with functiondecls that are never (transitively) called
1945 // from probes.
1946 void semantic_pass_opt1 (systemtap_session& s, bool& relaxed_p)
1947 {
1948   functioncall_traversing_visitor ftv;
1949   for (unsigned i=0; i<s.probes.size(); i++)
1950     {
1951       s.probes[i]->body->visit (& ftv);
1952       if (s.probes[i]->sole_location()->condition)
1953         s.probes[i]->sole_location()->condition->visit (& ftv);
1954     }
1955   vector<functiondecl*> new_unused_functions;
1956   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
1957     {
1958       functiondecl* fd = it->second;
1959       if (ftv.traversed.find(fd) == ftv.traversed.end())
1960         {
1961           if (fd->tok->location.file->name == s.user_file->name && // !tapset
1962               ! s.suppress_warnings)
1963             s.print_warning ("eliding unused function '" + fd->name + "'", fd->tok);
1964           else if (s.verbose>2)
1965             clog << "Eliding unused function " << fd->name
1966                  << endl;
1967           // s.functions.erase (it); // NB: can't, since we're already iterating upon it
1968           new_unused_functions.push_back (fd);
1969           relaxed_p = false;
1970         }
1971     }
1972   for (unsigned i=0; i<new_unused_functions.size(); i++)
1973     {
1974       map<string,functiondecl*>::iterator where = s.functions.find (new_unused_functions[i]->name);
1975       assert (where != s.functions.end());
1976       s.functions.erase (where);
1977       if (s.tapset_compile_coverage)
1978         s.unused_functions.push_back (new_unused_functions[i]);
1979     }
1980 }
1981
1982
1983 // ------------------------------------------------------------------------
1984
1985 // Do away with local & global variables that are never
1986 // written nor read.
1987 void semantic_pass_opt2 (systemtap_session& s, bool& relaxed_p, unsigned iterations)
1988 {
1989   varuse_collecting_visitor vut(s);
1990
1991   for (unsigned i=0; i<s.probes.size(); i++)
1992     {
1993       s.probes[i]->body->visit (& vut);
1994
1995       if (s.probes[i]->sole_location()->condition)
1996         s.probes[i]->sole_location()->condition->visit (& vut);
1997     }
1998
1999   // NB: Since varuse_collecting_visitor also traverses down
2000   // actually called functions, we don't need to explicitly
2001   // iterate over them.  Uncalled ones should have been pruned
2002   // in _opt1 above.
2003   //
2004   // for (unsigned i=0; i<s.functions.size(); i++)
2005   //   s.functions[i]->body->visit (& vut);
2006
2007   // Now in vut.read/written, we have a mixture of all locals, globals
2008
2009   for (unsigned i=0; i<s.probes.size(); i++)
2010     for (unsigned j=0; j<s.probes[i]->locals.size(); /* see below */)
2011       {
2012         vardecl* l = s.probes[i]->locals[j];
2013
2014         if (vut.read.find (l) == vut.read.end() &&
2015             vut.written.find (l) == vut.written.end())
2016           {
2017             if (l->tok->location.file->name == s.user_file->name && // !tapset
2018                 ! s.suppress_warnings)
2019               s.print_warning ("eliding unused variable '" + l->name + "'", l->tok);
2020             else if (s.verbose>2)
2021               clog << "Eliding unused local variable "
2022                    << l->name << " in " << s.probes[i]->name << endl;
2023             if (s.tapset_compile_coverage) {
2024               s.probes[i]->unused_locals.push_back
2025                       (s.probes[i]->locals[j]);
2026             }
2027             s.probes[i]->locals.erase(s.probes[i]->locals.begin() + j);
2028             relaxed_p = false;
2029             // don't increment j
2030           }
2031         else
2032           {
2033             if (vut.written.find (l) == vut.written.end())
2034               if (iterations == 0 && ! s.suppress_warnings)
2035                   {
2036                     stringstream o;
2037                     vector<vardecl*>::iterator it;
2038                     for (it = s.probes[i]->locals.begin(); it != s.probes[i]->locals.end(); it++)
2039                       if (l->name != (*it)->name)
2040                         o << " " <<  (*it)->name;
2041                     for (it = s.globals.begin(); it != s.globals.end(); it++)
2042                       if (l->name != (*it)->name)
2043                         o << " " <<  (*it)->name;
2044
2045                     s.print_warning ("read-only local variable '" + l->name + "' " +
2046                                      (o.str() == "" ? "" : ("(alternatives:" + o.str() + ")")), l->tok);
2047                   }
2048             j++;
2049           }
2050       }
2051
2052   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2053     {
2054       functiondecl *fd = it->second;
2055       for (unsigned j=0; j<fd->locals.size(); /* see below */)
2056         {
2057           vardecl* l = fd->locals[j];
2058           if (vut.read.find (l) == vut.read.end() &&
2059               vut.written.find (l) == vut.written.end())
2060             {
2061               if (l->tok->location.file->name == s.user_file->name && // !tapset
2062                   ! s.suppress_warnings)
2063                 s.print_warning ("eliding unused variable '" + l->name + "'", l->tok);
2064               else if (s.verbose>2)
2065                 clog << "Eliding unused local variable "
2066                      << l->name << " in function " << fd->name
2067                      << endl;
2068               if (s.tapset_compile_coverage) {
2069                 fd->unused_locals.push_back (fd->locals[j]);
2070               }
2071               fd->locals.erase(fd->locals.begin() + j);
2072               relaxed_p = false;
2073               // don't increment j
2074             }
2075           else
2076             {
2077               if (vut.written.find (l) == vut.written.end())
2078                 if (iterations == 0 && ! s.suppress_warnings)
2079                   {
2080                     stringstream o;
2081                     vector<vardecl*>::iterator it;
2082                     for (it = fd->formal_args.begin() ;
2083                          it != fd->formal_args.end(); it++)
2084                       if (l->name != (*it)->name)
2085                         o << " " << (*it)->name;
2086                     for (it = fd->locals.begin(); it != fd->locals.end(); it++)
2087                       if (l->name != (*it)->name)
2088                         o << " " << (*it)->name;
2089                     for (it = s.globals.begin(); it != s.globals.end(); it++)
2090                       if (l->name != (*it)->name)
2091                         o << " " << (*it)->name;
2092
2093                     s.print_warning ("read-only local variable '" + l->name + "' " +
2094                                      (o.str() == "" ? "" : ("(alternatives:" + o.str() + ")")), l->tok);
2095                   }
2096
2097               j++;
2098             }
2099         }
2100     }
2101   for (unsigned i=0; i<s.globals.size(); /* see below */)
2102     {
2103       vardecl* l = s.globals[i];
2104       if (vut.read.find (l) == vut.read.end() &&
2105           vut.written.find (l) == vut.written.end())
2106         {
2107           if (l->tok->location.file->name == s.user_file->name && // !tapset
2108               ! s.suppress_warnings)
2109             s.print_warning ("eliding unused variable '" + l->name + "'", l->tok);
2110           else if (s.verbose>2)
2111             clog << "Eliding unused global variable "
2112                  << l->name << endl;
2113           if (s.tapset_compile_coverage) {
2114             s.unused_globals.push_back(s.globals[i]);
2115           }
2116           s.globals.erase(s.globals.begin() + i);
2117           relaxed_p = false;
2118           // don't increment i
2119         }
2120       else
2121         {
2122           if (vut.written.find (l) == vut.written.end() && ! l->init) // no initializer
2123             if (iterations == 0 && ! s.suppress_warnings)
2124               {
2125                 stringstream o;
2126                 vector<vardecl*>::iterator it;
2127                 for (it = s.globals.begin(); it != s.globals.end(); it++)
2128                   if (l->name != (*it)->name)
2129                     o << " " << (*it)->name;
2130
2131                 s.print_warning ("read-only global variable '" + l->name + "' " +
2132                                  (o.str() == "" ? "" : ("(alternatives:" + o.str() + ")")), l->tok);
2133               }
2134
2135           i++;
2136         }
2137     }
2138 }
2139
2140
2141 // ------------------------------------------------------------------------
2142
2143 struct dead_assignment_remover: public update_visitor
2144 {
2145   systemtap_session& session;
2146   bool& relaxed_p;
2147   const varuse_collecting_visitor& vut;
2148
2149   dead_assignment_remover(systemtap_session& s, bool& r,
2150                           const varuse_collecting_visitor& v):
2151     session(s), relaxed_p(r), vut(v) {}
2152
2153   void visit_assignment (assignment* e);
2154 };
2155
2156
2157 void
2158 dead_assignment_remover::visit_assignment (assignment* e)
2159 {
2160   replace (e->left);
2161   replace (e->right);
2162
2163   symbol* left = get_symbol_within_expression (e->left);
2164   vardecl* leftvar = left->referent; // NB: may be 0 for unresolved $target
2165   if (leftvar) // not unresolved $target, so intended sideeffect may be elided
2166     {
2167       if (vut.read.find(leftvar) == vut.read.end()) // var never read?
2168         {
2169           // NB: Not so fast!  The left side could be an array whose
2170           // index expressions may have side-effects.  This would be
2171           // OK if we could replace the array assignment with a
2172           // statement-expression containing all the index expressions
2173           // and the rvalue... but we can't.
2174           // Another possibility is that we have an unread global variable
2175           // which are kept for probe end value display.
2176
2177           bool is_global = false;
2178           vector<vardecl*>::iterator it;
2179           for (it = session.globals.begin(); it != session.globals.end(); it++)
2180             if (leftvar->name == (*it)->name)
2181               {
2182                 is_global = true;
2183                 break;
2184               }
2185
2186           varuse_collecting_visitor lvut(session);
2187           e->left->visit (& lvut);
2188           if (lvut.side_effect_free () && !is_global) // XXX: use _wrt() once we track focal_vars
2189             {
2190               /* PR 1119: NB: This is not necessary here.  A write-only
2191                  variable will also be elided soon at the next _opt2 iteration.
2192               if (e->left->tok->location.file == session.user_file->name && // !tapset
2193                   ! session.suppress_warnings)
2194                 clog << "WARNING: eliding write-only " << *e->left->tok << endl;
2195               else
2196               */
2197               if (session.verbose>2)
2198                 clog << "Eliding assignment to " << leftvar->name
2199                      << " at " << *e->tok << endl;
2200
2201               provide (e->right); // goodbye assignment*
2202               relaxed_p = false;
2203               return;
2204             }
2205         }
2206     }
2207   provide (e);
2208 }
2209
2210 // Let's remove assignments to variables that are never read.  We
2211 // rewrite "(foo = expr)" as "(expr)".  This makes foo a candidate to
2212 // be optimized away as an unused variable, and expr a candidate to be
2213 // removed as a side-effect-free statement expression.  Wahoo!
2214 void semantic_pass_opt3 (systemtap_session& s, bool& relaxed_p)
2215 {
2216   // Recompute the varuse data, which will probably match the opt2
2217   // copy of the computation, except for those totally unused
2218   // variables that opt2 removed.
2219   varuse_collecting_visitor vut(s);
2220   for (unsigned i=0; i<s.probes.size(); i++)
2221     s.probes[i]->body->visit (& vut); // includes reachable functions too
2222
2223   dead_assignment_remover dar (s, relaxed_p, vut);
2224   // This instance may be reused for multiple probe/function body trims.
2225
2226   for (unsigned i=0; i<s.probes.size(); i++)
2227     dar.replace (s.probes[i]->body);
2228   for (map<string,functiondecl*>::iterator it = s.functions.begin();
2229        it != s.functions.end(); it++)
2230     dar.replace (it->second->body);
2231   // The rewrite operation is performed within the visitor.
2232
2233   // XXX: we could also zap write-only globals here
2234 }
2235
2236
2237 // ------------------------------------------------------------------------
2238
2239 struct dead_stmtexpr_remover: public update_visitor
2240 {
2241   systemtap_session& session;
2242   bool& relaxed_p;
2243   set<vardecl*> focal_vars; // vars considered subject to side-effects
2244
2245   dead_stmtexpr_remover(systemtap_session& s, bool& r):
2246     session(s), relaxed_p(r) {}
2247
2248   void visit_block (block *s);
2249   void visit_null_statement (null_statement *s);
2250   void visit_if_statement (if_statement* s);
2251   void visit_foreach_loop (foreach_loop *s);
2252   void visit_for_loop (for_loop *s);
2253   // XXX: and other places where stmt_expr's might be nested
2254
2255   void visit_expr_statement (expr_statement *s);
2256 };
2257
2258
2259 void
2260 dead_stmtexpr_remover::visit_null_statement (null_statement *s)
2261 {
2262   // easy!
2263   if (session.verbose>2)
2264     clog << "Eliding side-effect-free null statement " << *s->tok << endl;
2265   s = 0;
2266   provide (s);
2267 }
2268
2269
2270 void
2271 dead_stmtexpr_remover::visit_block (block *s)
2272 {
2273   vector<statement*> new_stmts;
2274   for (unsigned i=0; i<s->statements.size(); i++ )
2275     {
2276       statement* new_stmt = require (s->statements[i], true);
2277       if (new_stmt != 0)
2278         {
2279           // flatten nested blocks into this one
2280           block *b = dynamic_cast<block *>(new_stmt);
2281           if (b)
2282             {
2283               if (session.verbose>2)
2284                 clog << "Flattening nested block " << *b->tok << endl;
2285               new_stmts.insert(new_stmts.end(),
2286                   b->statements.begin(), b->statements.end());
2287               relaxed_p = false;
2288             }
2289           else
2290             new_stmts.push_back (new_stmt);
2291         }
2292     }
2293   if (new_stmts.size() == 0)
2294     {
2295       if (session.verbose>2)
2296         clog << "Eliding side-effect-free empty block " << *s->tok << endl;
2297       s = 0;
2298     }
2299   else if (new_stmts.size() == 1)
2300     {
2301       if (session.verbose>2)
2302         clog << "Eliding side-effect-free singleton block " << *s->tok << endl;
2303       provide (new_stmts[0]);
2304       return;
2305     }
2306   else
2307     s->statements = new_stmts;
2308   provide (s);
2309 }
2310
2311 void
2312 dead_stmtexpr_remover::visit_if_statement (if_statement *s)
2313 {
2314   replace (s->thenblock, true);
2315   replace (s->elseblock, true);
2316
2317   if (s->thenblock == 0)
2318     {
2319       if (s->elseblock == 0)
2320         {
2321           // We may be able to elide this statement, if the condition
2322           // expression is side-effect-free.
2323           varuse_collecting_visitor vct(session);
2324           s->condition->visit(& vct);
2325           if (vct.side_effect_free ())
2326             {
2327               if (session.verbose>2)
2328                 clog << "Eliding side-effect-free if statement "
2329                      << *s->tok << endl;
2330               s = 0; // yeah, baby
2331             }
2332           else
2333             {
2334               // We can still turn it into a simple expr_statement though...
2335               if (session.verbose>2)
2336                 clog << "Creating simple evaluation from if statement "
2337                      << *s->tok << endl;
2338               expr_statement *es = new expr_statement;
2339               es->value = s->condition;
2340               es->tok = es->value->tok;
2341               provide (es);
2342               return;
2343             }
2344         }
2345       else
2346         {
2347           // For an else without a then, we can invert the condition logic to
2348           // avoid having a null statement in the thenblock
2349           if (session.verbose>2)
2350             clog << "Inverting the condition of if statement "
2351                  << *s->tok << endl;
2352           unary_expression *ue = new unary_expression;
2353           ue->operand = s->condition;
2354           ue->tok = ue->operand->tok;
2355           ue->op = "!";
2356           s->condition = ue;
2357           s->thenblock = s->elseblock;
2358           s->elseblock = 0;
2359         }
2360     }
2361   provide (s);
2362 }
2363
2364 void
2365 dead_stmtexpr_remover::visit_foreach_loop (foreach_loop *s)
2366 {
2367   replace (s->block, true);
2368
2369   if (s->block == 0)
2370     {
2371       if (session.verbose>2)
2372         clog << "Eliding side-effect-free foreach statement " << *s->tok << endl;
2373       s = 0; // yeah, baby
2374     }
2375   provide (s);
2376 }
2377
2378 void
2379 dead_stmtexpr_remover::visit_for_loop (for_loop *s)
2380 {
2381   replace (s->block, true);
2382
2383   if (s->block == 0)
2384     {
2385       // We may be able to elide this statement, if the condition
2386       // expression is side-effect-free.
2387       varuse_collecting_visitor vct(session);
2388       if (s->init) s->init->visit(& vct);
2389       s->cond->visit(& vct);
2390       if (s->incr) s->incr->visit(& vct);
2391       if (vct.side_effect_free ())
2392         {
2393           if (session.verbose>2)
2394             clog << "Eliding side-effect-free for statement " << *s->tok << endl;
2395           s = 0; // yeah, baby
2396         }
2397       else
2398         {
2399           // Can't elide this whole statement; put a null in there.
2400           s->block = new null_statement();
2401           s->block->tok = s->tok;
2402         }
2403     }
2404   provide (s);
2405 }
2406
2407
2408
2409 void
2410 dead_stmtexpr_remover::visit_expr_statement (expr_statement *s)
2411 {
2412   // Run a varuse query against the operand expression.  If it has no
2413   // side-effects, replace the entire statement expression by a null
2414   // statement with the provide() call.
2415   //
2416   // Unlike many other visitors, we do *not* traverse this outermost
2417   // one into the expression subtrees.  There is no need - no
2418   // expr_statement nodes will be found there.  (Function bodies
2419   // need to be visited explicitly by our caller.)
2420   //
2421   // NB.  While we don't share nodes in the parse tree, let's not
2422   // deallocate *s anyway, just in case...
2423
2424   varuse_collecting_visitor vut(session);
2425   s->value->visit (& vut);
2426
2427   if (vut.side_effect_free_wrt (focal_vars))
2428     {
2429       /* PR 1119: NB: this message is not a good idea here.  It can
2430          name some arbitrary RHS expression of an assignment.
2431       if (s->value->tok->location.file == session.user_file->name && // not tapset
2432           ! session.suppress_warnings)
2433         clog << "WARNING: eliding read-only " << *s->value->tok << endl;
2434       else
2435       */
2436       if (session.verbose>2)
2437         clog << "Eliding side-effect-free expression "
2438              << *s->tok << endl;
2439
2440       // NB: this 0 pointer is invalid to leave around for any length of
2441       // time, but the parent parse tree objects above handle it.
2442       s = 0;
2443       relaxed_p = false;
2444     }
2445   provide (s);
2446 }
2447
2448
2449 void semantic_pass_opt4 (systemtap_session& s, bool& relaxed_p)
2450 {
2451   // Finally, let's remove some statement-expressions that have no
2452   // side-effect.  These should be exactly those whose private varuse
2453   // visitors come back with an empty "written" and "embedded" lists.
2454
2455   dead_stmtexpr_remover duv (s, relaxed_p);
2456   // This instance may be reused for multiple probe/function body trims.
2457
2458   for (unsigned i=0; i<s.probes.size(); i++)
2459     {
2460       if (pending_interrupts) break;
2461
2462       derived_probe* p = s.probes[i];
2463
2464       duv.focal_vars.clear ();
2465       duv.focal_vars.insert (s.globals.begin(),
2466                              s.globals.end());
2467       duv.focal_vars.insert (p->locals.begin(),
2468                              p->locals.end());
2469
2470       duv.replace (p->body, true);
2471       if (p->body == 0)
2472         {
2473           if (! s.suppress_warnings
2474               && ! s.timing) // PR10070
2475             s.print_warning ("side-effect-free probe '" + p->name + "'", p->tok);
2476
2477           p->body = new null_statement();
2478           p->body->tok = p->tok;
2479
2480           // XXX: possible duplicate warnings; see below
2481         }
2482     }
2483   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2484     {
2485       if (pending_interrupts) break;
2486
2487       functiondecl* fn = it->second;
2488       duv.focal_vars.clear ();
2489       duv.focal_vars.insert (fn->locals.begin(),
2490                              fn->locals.end());
2491       duv.focal_vars.insert (fn->formal_args.begin(),
2492                              fn->formal_args.end());
2493       duv.focal_vars.insert (s.globals.begin(),
2494                              s.globals.end());
2495
2496       duv.replace (fn->body, true);
2497       if (fn->body == 0)
2498         {
2499           if (! s.suppress_warnings)
2500             s.print_warning ("side-effect-free function '" + fn->name + "'", fn->tok);
2501
2502           fn->body = new null_statement();
2503           fn->body->tok = fn->tok;
2504
2505           // XXX: the next iteration of the outer optimization loop may
2506           // take this new null_statement away again, and thus give us a
2507           // fresh warning.  It would be better if this fixup was performed
2508           // only after the relaxation iterations.
2509           // XXX: or else see bug #6469.
2510         }
2511     }
2512 }
2513
2514
2515 // ------------------------------------------------------------------------
2516
2517 // The goal of this visitor is to reduce top-level expressions in void context
2518 // into separate statements that evaluate each subcomponent of the expression.
2519 // The dead-statement-remover can later remove some parts if they have no side
2520 // effects.
2521 //
2522 // All expressions must be overridden here so we never visit their subexpressions
2523 // accidentally.  Thus, the only visited expressions should be value of an
2524 // expr_statement.
2525 //
2526 // For an expression to replace its expr_statement with something else, it will
2527 // let the new statement provide(), and then provide(0) for itself.  The
2528 // expr_statement will take this as a sign that it's been replaced.
2529 struct void_statement_reducer: public update_visitor
2530 {
2531   systemtap_session& session;
2532   bool& relaxed_p;
2533   set<vardecl*> focal_vars; // vars considered subject to side-effects
2534
2535   void_statement_reducer(systemtap_session& s, bool& r):
2536     session(s), relaxed_p(r) {}
2537
2538   void visit_expr_statement (expr_statement* s);
2539
2540   // expressions in conditional / loop controls are definitely a side effect,
2541   // but still recurse into the child statements
2542   void visit_if_statement (if_statement* s);
2543   void visit_for_loop (for_loop* s);
2544   void visit_foreach_loop (foreach_loop* s);
2545
2546   // these expressions get rewritten into their statement equivalents
2547   void visit_logical_or_expr (logical_or_expr* e);
2548   void visit_logical_and_expr (logical_and_expr* e);
2549   void visit_ternary_expression (ternary_expression* e);
2550
2551   // all of these can be reduced into simpler statements
2552   void visit_binary_expression (binary_expression* e);
2553   void visit_unary_expression (unary_expression* e);
2554   void visit_comparison (comparison* e);
2555   void visit_concatenation (concatenation* e);
2556   void visit_functioncall (functioncall* e);
2557   void visit_print_format (print_format* e);
2558   void visit_target_symbol (target_symbol* e);
2559   void visit_cast_op (cast_op* e);
2560
2561   // these are a bit hairy to grok due to the intricacies of indexables and
2562   // stats, so I'm chickening out and skipping them...
2563   void visit_array_in (array_in* e) { provide (e); }
2564   void visit_arrayindex (arrayindex* e) { provide (e); }
2565   void visit_stat_op (stat_op* e) { provide (e); }
2566   void visit_hist_op (hist_op* e) { provide (e); }
2567
2568   // these can't be reduced because they always have an effect
2569   void visit_return_statement (return_statement* s) { provide (s); }
2570   void visit_delete_statement (delete_statement* s) { provide (s); }
2571   void visit_pre_crement (pre_crement* e) { provide (e); }
2572   void visit_post_crement (post_crement* e) { provide (e); }
2573   void visit_assignment (assignment* e) { provide (e); }
2574 };
2575
2576
2577 void
2578 void_statement_reducer::visit_expr_statement (expr_statement* s)
2579 {
2580   replace (s->value, true);
2581
2582   // if the expression provides 0, that's our signal that a new
2583   // statement has been provided, so we shouldn't provide this one.
2584   if (s->value != 0)
2585     provide(s);
2586 }
2587
2588 void
2589 void_statement_reducer::visit_if_statement (if_statement* s)
2590 {
2591   // s->condition is never void
2592   replace (s->thenblock);
2593   replace (s->elseblock);
2594   provide (s);
2595 }
2596
2597 void
2598 void_statement_reducer::visit_for_loop (for_loop* s)
2599 {
2600   // s->init/cond/incr are never void
2601   replace (s->block);
2602   provide (s);
2603 }
2604
2605 void
2606 void_statement_reducer::visit_foreach_loop (foreach_loop* s)
2607 {
2608   // s->indexes/base/limit are never void
2609   replace (s->block);
2610   provide (s);
2611 }
2612
2613 void
2614 void_statement_reducer::visit_logical_or_expr (logical_or_expr* e)
2615 {
2616   // In void context, the evaluation of "a || b" is exactly like
2617   // "if (!a) b", so let's do that instead.
2618
2619   if (session.verbose>2)
2620     clog << "Creating if statement from unused logical-or "
2621          << *e->tok << endl;
2622
2623   if_statement *is = new if_statement;
2624   is->tok = e->tok;
2625   is->elseblock = 0;
2626
2627   unary_expression *ue = new unary_expression;
2628   ue->operand = e->left;
2629   ue->tok = e->tok;
2630   ue->op = "!";
2631   is->condition = ue;
2632
2633   expr_statement *es = new expr_statement;
2634   es->value = e->right;
2635   es->tok = es->value->tok;
2636   is->thenblock = es;
2637
2638   is->visit(this);
2639   relaxed_p = false;
2640   e = 0;
2641   provide (e);
2642 }
2643
2644 void
2645 void_statement_reducer::visit_logical_and_expr (logical_and_expr* e)
2646 {
2647   // In void context, the evaluation of "a && b" is exactly like
2648   // "if (a) b", so let's do that instead.
2649
2650   if (session.verbose>2)
2651     clog << "Creating if statement from unused logical-and "
2652          << *e->tok << endl;
2653
2654   if_statement *is = new if_statement;
2655   is->tok = e->tok;
2656   is->elseblock = 0;
2657   is->condition = e->left;
2658
2659   expr_statement *es = new expr_statement;
2660   es->value = e->right;
2661   es->tok = es->value->tok;
2662   is->thenblock = es;
2663
2664   is->visit(this);
2665   relaxed_p = false;
2666   e = 0;
2667   provide (e);
2668 }
2669
2670 void
2671 void_statement_reducer::visit_ternary_expression (ternary_expression* e)
2672 {
2673   // In void context, the evaluation of "a ? b : c" is exactly like
2674   // "if (a) b else c", so let's do that instead.
2675
2676   if (session.verbose>2)
2677     clog << "Creating if statement from unused ternary expression "
2678          << *e->tok << endl;
2679
2680   if_statement *is = new if_statement;
2681   is->tok = e->tok;
2682   is->condition = e->cond;
2683
2684   expr_statement *es = new expr_statement;
2685   es->value = e->truevalue;
2686   es->tok = es->value->tok;
2687   is->thenblock = es;
2688
2689   es = new expr_statement;
2690   es->value = e->falsevalue;
2691   es->tok = es->value->tok;
2692   is->elseblock = es;
2693
2694   is->visit(this);
2695   relaxed_p = false;
2696   e = 0;
2697   provide (e);
2698 }
2699
2700 void
2701 void_statement_reducer::visit_binary_expression (binary_expression* e)
2702 {
2703   // When the result of a binary operation isn't needed, it's just as good to
2704   // evaluate the operands as sequential statements in a block.
2705
2706   if (session.verbose>2)
2707     clog << "Eliding unused binary " << *e->tok << endl;
2708
2709   block *b = new block;
2710   b->tok = e->tok;
2711
2712   expr_statement *es = new expr_statement;
2713   es->value = e->left;
2714   es->tok = es->value->tok;
2715   b->statements.push_back(es);
2716
2717   es = new expr_statement;
2718   es->value = e->right;
2719   es->tok = es->value->tok;
2720   b->statements.push_back(es);
2721
2722   b->visit(this);
2723   relaxed_p = false;
2724   e = 0;
2725   provide (e);
2726 }
2727
2728 void
2729 void_statement_reducer::visit_unary_expression (unary_expression* e)
2730 {
2731   // When the result of a unary operation isn't needed, it's just as good to
2732   // evaluate the operand directly
2733
2734   if (session.verbose>2)
2735     clog << "Eliding unused unary " << *e->tok << endl;
2736
2737   relaxed_p = false;
2738   e->operand->visit(this);
2739 }
2740
2741 void
2742 void_statement_reducer::visit_comparison (comparison* e)
2743 {
2744   visit_binary_expression(e);
2745 }
2746
2747 void
2748 void_statement_reducer::visit_concatenation (concatenation* e)
2749 {
2750   visit_binary_expression(e);
2751 }
2752
2753 void
2754 void_statement_reducer::visit_functioncall (functioncall* e)
2755 {
2756   // If a function call is pure and its result ignored, we can elide the call
2757   // and just evaluate the arguments in sequence
2758
2759   if (!e->args.size())
2760     {
2761       provide (e);
2762       return;
2763     }
2764
2765   varuse_collecting_visitor vut(session);
2766   vut.traversed.insert (e->referent);
2767   vut.current_function = e->referent;
2768   e->referent->body->visit (& vut);
2769   if (!vut.side_effect_free_wrt (focal_vars))
2770     {
2771       provide (e);
2772       return;
2773     }
2774
2775   if (session.verbose>2)
2776     clog << "Eliding side-effect-free function call " << *e->tok << endl;
2777
2778   block *b = new block;
2779   b->tok = e->tok;
2780
2781   for (unsigned i=0; i<e->args.size(); i++ )
2782     {
2783       expr_statement *es = new expr_statement;
2784       es->value = e->args[i];
2785       es->tok = es->value->tok;
2786       b->statements.push_back(es);
2787     }
2788
2789   b->visit(this);
2790   relaxed_p = false;
2791   e = 0;
2792   provide (e);
2793 }
2794
2795 void
2796 void_statement_reducer::visit_print_format (print_format* e)
2797 {
2798   // When an sprint's return value is ignored, we can simply evaluate the
2799   // arguments in sequence
2800
2801   if (e->print_to_stream || !e->args.size())
2802     {
2803       provide (e);
2804       return;
2805     }
2806
2807   if (session.verbose>2)
2808     clog << "Eliding unused print " << *e->tok << endl;
2809
2810   block *b = new block;
2811   b->tok = e->tok;
2812
2813   for (unsigned i=0; i<e->args.size(); i++ )
2814     {
2815       expr_statement *es = new expr_statement;
2816       es->value = e->args[i];
2817       es->tok = es->value->tok;
2818       b->statements.push_back(es);
2819     }
2820
2821   b->visit(this);
2822   relaxed_p = false;
2823   e = 0;
2824   provide (e);
2825 }
2826
2827 void
2828 void_statement_reducer::visit_target_symbol (target_symbol* e)
2829 {
2830   // When target_symbol isn't needed, it's just as good to
2831   // evaluate any array indexes directly
2832
2833   block *b = new block;
2834   b->tok = e->tok;
2835
2836   for (unsigned i=0; i<e->components.size(); i++ )
2837     {
2838       if (e->components[i].type != target_symbol::comp_expression_array_index)
2839         continue;
2840
2841       expr_statement *es = new expr_statement;
2842       es->value = e->components[i].expr_index;
2843       es->tok = es->value->tok;
2844       b->statements.push_back(es);
2845     }
2846
2847   if (b->statements.empty())
2848     {
2849       delete b;
2850       provide (e);
2851       return;
2852     }
2853
2854   if (session.verbose>2)
2855     clog << "Eliding unused target symbol " << *e->tok << endl;
2856
2857   b->visit(this);
2858   relaxed_p = false;
2859   e = 0;
2860   provide (e);
2861 }
2862
2863 void
2864 void_statement_reducer::visit_cast_op (cast_op* e)
2865 {
2866   // When the result of a cast operation isn't needed, it's just as good to
2867   // evaluate the operand and any array indexes directly
2868
2869   block *b = new block;
2870   b->tok = e->tok;
2871
2872   expr_statement *es = new expr_statement;
2873   es->value = e->operand;
2874   es->tok = es->value->tok;
2875   b->statements.push_back(es);
2876
2877   for (unsigned i=0; i<e->components.size(); i++ )
2878     {
2879       if (e->components[i].type != target_symbol::comp_expression_array_index)
2880         continue;
2881
2882       es = new expr_statement;
2883       es->value = e->components[i].expr_index;
2884       es->tok = es->value->tok;
2885       b->statements.push_back(es);
2886     }
2887
2888   if (session.verbose>2)
2889     clog << "Eliding unused typecast " << *e->tok << endl;
2890
2891   b->visit(this);
2892   relaxed_p = false;
2893   e = 0;
2894   provide (e);
2895 }
2896
2897
2898 void semantic_pass_opt5 (systemtap_session& s, bool& relaxed_p)
2899 {
2900   // Let's simplify statements with unused computed values.
2901
2902   void_statement_reducer vuv (s, relaxed_p);
2903   // This instance may be reused for multiple probe/function body trims.
2904
2905   vuv.focal_vars.insert (s.globals.begin(), s.globals.end());
2906
2907   for (unsigned i=0; i<s.probes.size(); i++)
2908     vuv.replace (s.probes[i]->body);
2909   for (map<string,functiondecl*>::iterator it = s.functions.begin();
2910        it != s.functions.end(); it++)
2911     vuv.replace (it->second->body);
2912 }
2913
2914
2915 struct duplicate_function_remover: public functioncall_traversing_visitor
2916 {
2917   systemtap_session& s;
2918   map<functiondecl*, functiondecl*>& duplicate_function_map;
2919
2920   duplicate_function_remover(systemtap_session& sess,
2921                              map<functiondecl*, functiondecl*>&dfm):
2922     s(sess), duplicate_function_map(dfm) {};
2923
2924   void visit_functioncall (functioncall* e);
2925 };
2926
2927 void
2928 duplicate_function_remover::visit_functioncall (functioncall *e)
2929 {
2930   functioncall_traversing_visitor::visit_functioncall (e);
2931
2932   // If the current function call reference points to a function that
2933   // is a duplicate, replace it.
2934   if (duplicate_function_map.count(e->referent) != 0)
2935     {
2936       if (s.verbose>2)
2937           clog << "Changing " << e->referent->name
2938                << " reference to "
2939                << duplicate_function_map[e->referent]->name
2940                << " reference\n";
2941       e->tok = duplicate_function_map[e->referent]->tok;
2942       e->function = duplicate_function_map[e->referent]->name;
2943       e->referent = duplicate_function_map[e->referent];
2944     }
2945 }
2946
2947 static string
2948 get_functionsig (functiondecl* f)
2949 {
2950   ostringstream s;
2951
2952   // Get the "name:args body" of the function in s.  We have to
2953   // include the args since the function 'x1(a, b)' is different than
2954   // the function 'x2(b, a)' even if the bodies of the two functions
2955   // are exactly the same.
2956   f->printsig(s);
2957   f->body->print(s);
2958
2959   // printsig puts f->name + ':' on the front.  Remove this
2960   // (otherwise, functions would never compare equal).
2961   string str = s.str().erase(0, f->name.size() + 1);
2962
2963   // Return the function signature.
2964   return str;
2965 }
2966
2967 void semantic_pass_opt6 (systemtap_session& s, bool& relaxed_p)
2968 {
2969   // Walk through all the functions, looking for duplicates.
2970   map<string, functiondecl*> functionsig_map;
2971   map<functiondecl*, functiondecl*> duplicate_function_map;
2972
2973
2974   vector<functiondecl*> newly_zapped_functions;
2975   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
2976     {
2977       functiondecl *fd = it->second;
2978       string functionsig = get_functionsig(fd);
2979
2980       if (functionsig_map.count(functionsig) == 0)
2981         {
2982           // This function is unique.  Remember it.
2983           functionsig_map[functionsig] = fd;
2984         }
2985       else
2986         {
2987           // This function is a duplicate.
2988           duplicate_function_map[fd] = functionsig_map[functionsig];
2989           newly_zapped_functions.push_back (fd);
2990           relaxed_p = false;
2991         }
2992     }
2993   for (unsigned i=0; i<newly_zapped_functions.size(); i++)
2994     {
2995       map<string,functiondecl*>::iterator where = s.functions.find (newly_zapped_functions[i]->name);
2996       assert (where != s.functions.end());
2997       s.functions.erase (where);
2998     }
2999
3000
3001   // If we have duplicate functions, traverse down the tree, replacing
3002   // the appropriate function calls.
3003   // duplicate_function_remover::visit_functioncall() handles the
3004   // details of replacing the function calls.
3005   if (duplicate_function_map.size() != 0)
3006     {
3007       duplicate_function_remover dfr (s, duplicate_function_map);
3008
3009       for (unsigned i=0; i < s.probes.size(); i++)
3010         s.probes[i]->body->visit(&dfr);
3011     }
3012 }
3013
3014
3015 static int
3016 semantic_pass_optimize1 (systemtap_session& s)
3017 {
3018   // In this pass, we attempt to rewrite probe/function bodies to
3019   // eliminate some blatantly unnecessary code.  This is run before
3020   // type inference, but after symbol resolution and derived_probe
3021   // creation.  We run an outer "relaxation" loop that repeats the
3022   // optimizations until none of them find anything to remove.
3023
3024   int rc = 0;
3025
3026   bool relaxed_p = false;
3027   unsigned iterations = 0;
3028   while (! relaxed_p)
3029     {
3030       if (pending_interrupts) break;
3031
3032       relaxed_p = true; // until proven otherwise
3033
3034       semantic_pass_opt1 (s, relaxed_p);
3035       semantic_pass_opt2 (s, relaxed_p, iterations); // produce some warnings only on iteration=0
3036       semantic_pass_opt3 (s, relaxed_p);
3037       semantic_pass_opt4 (s, relaxed_p);
3038       semantic_pass_opt5 (s, relaxed_p);
3039
3040       iterations ++;
3041     }
3042
3043   return rc;
3044 }
3045
3046
3047 static int
3048 semantic_pass_optimize2 (systemtap_session& s)
3049 {
3050   // This is run after type inference.  We run an outer "relaxation"
3051   // loop that repeats the optimizations until none of them find
3052   // anything to remove.
3053
3054   int rc = 0;
3055
3056   bool relaxed_p = false;
3057   while (! relaxed_p)
3058     {
3059       if (pending_interrupts) break;
3060       relaxed_p = true; // until proven otherwise
3061
3062       semantic_pass_opt6 (s, relaxed_p);
3063     }
3064
3065   return rc;
3066 }
3067
3068
3069
3070 // ------------------------------------------------------------------------
3071 // type resolution
3072
3073
3074 static int
3075 semantic_pass_types (systemtap_session& s)
3076 {
3077   int rc = 0;
3078
3079   // next pass: type inference
3080   unsigned iterations = 0;
3081   typeresolution_info ti (s);
3082
3083   ti.assert_resolvability = false;
3084   // XXX: maybe convert to exception-based error signalling
3085   while (1)
3086     {
3087       if (pending_interrupts) break;
3088
3089       iterations ++;
3090       ti.num_newly_resolved = 0;
3091       ti.num_still_unresolved = 0;
3092
3093   for (map<string,functiondecl*>::iterator it = s.functions.begin(); it != s.functions.end(); it++)
3094         {
3095           if (pending_interrupts) break;
3096
3097           functiondecl* fd = it->second;
3098           ti.current_probe = 0;
3099           ti.current_function = fd;
3100           ti.t = pe_unknown;
3101           fd->body->visit (& ti);
3102           // NB: we don't have to assert a known type for
3103           // functions here, to permit a "void" function.
3104           // The translator phase will omit the "retvalue".
3105           //
3106           // if (fd->type == pe_unknown)
3107           //   ti.unresolved (fd->tok);
3108         }
3109
3110       for (unsigned j=0; j<s.probes.size(); j++)
3111         {
3112           if (pending_interrupts) break;
3113
3114           derived_probe* pn = s.probes[j];
3115           ti.current_function = 0;
3116           ti.current_probe = pn;
3117           ti.t = pe_unknown;
3118           pn->body->visit (& ti);
3119
3120           probe_point* pp = pn->sole_location();
3121           if (pp->condition)
3122             {
3123               ti.current_function = 0;
3124               ti.current_probe = 0;
3125               ti.t = pe_long; // NB: expected type
3126               pp->condition->visit (& ti);
3127             }
3128         }
3129
3130       for (unsigned j=0; j<s.globals.size(); j++)
3131         {
3132           vardecl* gd = s.globals[j];
3133           if (gd->type == pe_unknown)
3134             ti.unresolved (gd->tok);
3135         }
3136
3137       if (ti.num_newly_resolved == 0) // converged
3138         {
3139           if (ti.num_still_unresolved == 0)
3140             break; // successfully
3141           else if (! ti.assert_resolvability)
3142             ti.assert_resolvability = true; // last pass, with error msgs
3143           else
3144             { // unsuccessful conclusion
3145               rc ++;
3146               break;
3147             }
3148         }
3149     }
3150
3151   return rc + s.num_errors();
3152 }
3153
3154
3155
3156 typeresolution_info::typeresolution_info (systemtap_session& s):
3157   session(s), current_function(0), current_probe(0)
3158 {
3159 }
3160
3161
3162 void
3163 typeresolution_info::visit_literal_number (literal_number* e)
3164 {
3165   assert (e->type == pe_long);
3166   if ((t == e->type) || (t == pe_unknown))
3167     return;
3168
3169   mismatch (e->tok, e->type, t);
3170 }
3171
3172
3173 void
3174 typeresolution_info::visit_literal_string (literal_string* e)
3175 {
3176   assert (e->type == pe_string);
3177   if ((t == e->type) || (t == pe_unknown))
3178     return;
3179
3180   mismatch (e->tok, e->type, t);
3181 }
3182
3183
3184 void
3185 typeresolution_info::visit_logical_or_expr (logical_or_expr *e)
3186 {
3187   visit_binary_expression (e);
3188 }
3189
3190
3191 void
3192 typeresolution_info::visit_logical_and_expr (logical_and_expr *e)
3193 {
3194   visit_binary_expression (e);
3195 }
3196
3197
3198 void
3199 typeresolution_info::visit_comparison (comparison *e)
3200 {
3201   // NB: result of any comparison is an integer!
3202   if (t == pe_stats || t == pe_string)
3203     invalid (e->tok, t);
3204
3205   t = (e->right->type != pe_unknown) ? e->right->type : pe_unknown;
3206   e->left->visit (this);
3207   t = (e->left->type != pe_unknown) ? e->left->type : pe_unknown;
3208   e->right->visit (this);
3209
3210   if (e->left->type != pe_unknown &&
3211       e->right->type != pe_unknown &&
3212       e->left->type != e->right->type)
3213     mismatch (e->tok, e->left->type, e->right->type);
3214
3215   if (e->type == pe_unknown)
3216     {
3217       e->type = pe_long;
3218       resolved (e->tok, e->type);
3219     }
3220 }
3221
3222
3223 void
3224 typeresolution_info::visit_concatenation (concatenation *e)
3225 {
3226   if (t != pe_unknown && t != pe_string)
3227     invalid (e->tok, t);
3228
3229   t = pe_string;
3230   e->left->visit (this);
3231   t = pe_string;
3232   e->right->visit (this);
3233
3234   if (e->type == pe_unknown)
3235     {
3236       e->type = pe_string;
3237       resolved (e->tok, e->type);
3238     }
3239 }
3240
3241
3242 void
3243 typeresolution_info::visit_assignment (assignment *e)
3244 {
3245   if (t == pe_stats)
3246     invalid (e->tok, t);
3247
3248   if (e->op == "<<<") // stats aggregation
3249     {
3250       if (t == pe_string)
3251         invalid (e->tok, t);
3252
3253       t = pe_stats;
3254       e->left->visit (this);
3255       t = pe_long;
3256       e->right->visit (this);
3257       if (e->type == pe_unknown ||
3258           e->type == pe_stats)
3259         {
3260           e->type = pe_long;
3261           resolved (e->tok, e->type);
3262         }
3263     }
3264
3265   else if (e->left->type == pe_stats)
3266     invalid (e->left->tok, e->left->type);
3267
3268   else if (e->right->type == pe_stats)
3269     invalid (e->right->tok, e->right->type);
3270
3271   else if (e->op == "+=" || // numeric only
3272            e->op == "-=" ||
3273            e->op == "*=" ||
3274            e->op == "/=" ||
3275            e->op == "%=" ||
3276            e->op == "&=" ||
3277            e->op == "^=" ||
3278            e->op == "|=" ||
3279            e->op == "<<=" ||
3280            e->op == ">>=" ||
3281            false)
3282     {
3283       visit_binary_expression (e);
3284     }
3285   else if (e->op == ".=" || // string only
3286            false)
3287     {
3288       if (t == pe_long || t == pe_stats)
3289         invalid (e->tok, t);
3290
3291       t = pe_string;
3292       e->left->visit (this);
3293       t = pe_string;
3294       e->right->visit (this);
3295       if (e->type == pe_unknown)
3296         {
3297           e->type = pe_string;
3298           resolved (e->tok, e->type);
3299         }
3300     }
3301   else if (e->op == "=") // overloaded = for string & numeric operands
3302     {
3303       // logic similar to ternary_expression
3304       exp_type sub_type = t;
3305
3306       // Infer types across the l/r values
3307       if (sub_type == pe_unknown && e->type != pe_unknown)
3308         sub_type = e->type;
3309
3310       t = (sub_type != pe_unknown) ? sub_type :
3311         (e->right->type != pe_unknown) ? e->right->type :
3312         pe_unknown;
3313       e->left->visit (this);
3314       t = (sub_type != pe_unknown) ? sub_type :
3315         (e->left->type != pe_unknown) ? e->left->type :
3316         pe_unknown;
3317       e->right->visit (this);
3318
3319       if ((sub_type != pe_unknown) && (e->type == pe_unknown))
3320         {
3321           e->type = sub_type;
3322           resolved (e->tok, e->type);
3323         }
3324       if ((sub_type == pe_unknown) && (e->left->type != pe_unknown))
3325         {
3326           e->type = e->left->type;
3327           resolved (e->tok, e->type);
3328         }
3329
3330       if (e->left->type != pe_unknown &&
3331           e->right->type != pe_unknown &&
3332           e->left->type != e->right->type)
3333         mismatch (e->tok, e->left->type, e->right->type);
3334
3335     }
3336   else
3337     throw semantic_error ("unsupported assignment operator " + e->op);
3338 }
3339
3340
3341 void
3342 typeresolution_info::visit_binary_expression (binary_expression* e)
3343 {
3344   if (t == pe_stats || t == pe_string)
3345     invalid (e->tok, t);
3346
3347   t = pe_long;
3348   e->left->visit (this);
3349   t = pe_long;
3350   e->right->visit (this);
3351
3352   if (e->left->type != pe_unknown &&
3353       e->right->type != pe_unknown &&
3354       e->left->type != e->right->type)
3355     mismatch (e->tok, e->left->type, e->right->type);
3356
3357   if (e->type == pe_unknown)
3358     {
3359       e->type = pe_long;
3360       resolved (e->tok, e->type);
3361     }
3362 }
3363
3364
3365 void
3366 typeresolution_info::visit_pre_crement (pre_crement *e)
3367 {
3368   visit_unary_expression (e);
3369 }
3370
3371
3372 void
3373 typeresolution_info::visit_post_crement (post_crement *e)
3374 {
3375   visit_unary_expression (e);
3376 }
3377
3378
3379 void
3380 typeresolution_info::visit_unary_expression (unary_expression* e)
3381 {
3382   if (t == pe_stats || t == pe_string)
3383     invalid (e->tok, t);
3384
3385   t = pe_long;
3386   e->operand->visit (this);
3387
3388   if (e->type == pe_unknown)
3389     {
3390       e->type = pe_long;
3391       resolved (e->tok, e->type);
3392     }
3393 }
3394
3395
3396 void
3397 typeresolution_info::visit_ternary_expression (ternary_expression* e)
3398 {
3399   exp_type sub_type = t;
3400
3401   t = pe_long;
3402   e->cond->visit (this);
3403
3404   // Infer types across the true/false arms of the ternary expression.
3405
3406   if (sub_type == pe_unknown && e->type != pe_unknown)
3407     sub_type = e->type;
3408   t = sub_type;
3409   e->truevalue->visit (this);
3410   t = sub_type;
3411   e->falsevalue->visit (this);
3412
3413   if ((sub_type == pe_unknown) && (e->type != pe_unknown))
3414     ; // already resolved
3415   else if ((sub_type != pe_unknown) && (e->type == pe_unknown))
3416     {
3417       e->type = sub_type;
3418       resolved (e->tok, e->type);
3419     }
3420   else if ((sub_type == pe_unknown) && (e->truevalue->type != pe_unknown))
3421     {
3422       e->type = e->truevalue->type;
3423       resolved (e->tok, e->type);
3424     }
3425   else if ((sub_type == pe_unknown) && (e->falsevalue->type != pe_unknown))
3426     {
3427       e->type = e->falsevalue->type;
3428       resolved (e->tok, e->type);
3429     }
3430   else if (e->type != sub_type)
3431     mismatch (e->tok, sub_type, e->type);
3432 }
3433
3434
3435 template <class Referrer, class Referent>
3436 void resolve_2types (Referrer* referrer, Referent* referent,
3437                     typeresolution_info* r, exp_type t, bool accept_unknown = false)
3438 {
3439   exp_type& re_type = referrer->type;
3440   const token* re_tok = referrer->tok;
3441   exp_type& te_type = referent->type;
3442   const token* te_tok = referent->tok;
3443
3444   if (t != pe_unknown && re_type == t && re_type == te_type)
3445     ; // do nothing: all three e->types in agreement
3446   else if (t == pe_unknown && re_type != pe_unknown && re_type == te_type)
3447     ; // do nothing: two known e->types in agreement
3448   else if (re_type != pe_unknown && te_type != pe_unknown && re_type != te_type)
3449     r->mismatch (re_tok, re_type, te_type);
3450   else if (re_type != pe_unknown && t != pe_unknown && re_type != t)
3451     r->mismatch (re_tok, re_type, t);
3452   else if (te_type != pe_unknown && t != pe_unknown && te_type != t)
3453     r->mismatch (te_tok, te_type, t);
3454   else if (re_type == pe_unknown && t != pe_unknown)
3455     {
3456       // propagate from upstream
3457       re_type = t;
3458       r->resolved (re_tok, re_type);
3459       // catch re_type/te_type mismatch later
3460     }
3461   else if (re_type == pe_unknown && te_type != pe_unknown)
3462     {
3463       // propagate from referent
3464       re_type = te_type;
3465       r->resolved (re_tok, re_type);
3466       // catch re_type/t mismatch later
3467     }
3468   else if (re_type != pe_unknown && te_type == pe_unknown)
3469     {
3470       // propagate to referent
3471       te_type = re_type;
3472       r->resolved (te_tok, te_type);
3473       // catch re_type/t mismatch later
3474     }
3475   else if (! accept_unknown)
3476     r->unresolved (re_tok);
3477 }
3478
3479
3480 void
3481 typeresolution_info::visit_symbol (symbol* e)
3482 {
3483   assert (e->referent != 0);
3484   resolve_2types (e, e->referent, this, t);
3485 }
3486
3487
3488 void
3489 typeresolution_info::visit_target_symbol (target_symbol* e)
3490 {
3491   if (!e->probe_context_var.empty())
3492     return;
3493
3494   // This occurs only if a target symbol was not resolved over in
3495   // tapset.cxx land, that error was properly suppressed, and the
3496   // later unused-expression-elimination pass didn't get rid of it
3497   // either.  So we have a target symbol that is believed to be of
3498   // genuine use, yet unresolved by the provider.
3499
3500   if (session.verbose > 2)
3501     {
3502       clog << "Resolution problem with ";
3503       if (current_function)
3504         {
3505           clog << "function " << current_function->name << endl;
3506           current_function->body->print (clog);
3507           clog << endl;
3508         }
3509       else if (current_probe)
3510         {
3511           clog << "probe " << current_probe->name << endl;
3512           current_probe->body->print (clog);
3513           clog << endl;
3514         }
3515       else
3516         clog << "other" << endl;
3517     }
3518
3519   if (e->saved_conversion_error)
3520     throw (* (e->saved_conversion_error));
3521   else
3522     throw semantic_error("unresolved target-symbol expression", e->tok);
3523 }
3524
3525
3526 void
3527 typeresolution_info::visit_cast_op (cast_op* e)
3528 {
3529   // Like target_symbol, a cast_op shouldn't survive this far
3530   // unless it was not resolved and its value is really needed.
3531   if (e->saved_conversion_error)
3532     throw (* (e->saved_conversion_error));
3533   else
3534     throw semantic_error("type definition '" + e->type + "' not found", e->tok);
3535 }
3536
3537
3538 void
3539 typeresolution_info::visit_arrayindex (arrayindex* e)
3540 {
3541
3542   symbol *array = NULL;
3543   hist_op *hist = NULL;
3544   classify_indexable(e->base, array, hist);
3545
3546   // Every hist_op has type [int]:int, that is to say, every hist_op
3547   // is a pseudo-one-dimensional integer array type indexed by
3548   // integers (bucket numbers).
3549
3550   if (hist)
3551     {
3552       if (e->indexes.size() != 1)
3553         unresolved (e->tok);
3554       t = pe_long;
3555       e->indexes[0]->visit (this);
3556       if (e->indexes[0]->type != pe_long)
3557         unresolved (e->tok);
3558       hist->visit (this);
3559       if (e->type != pe_long)
3560         {
3561           e->type = pe_long;
3562           resolved (e->tok, pe_long);
3563         }
3564       return;
3565     }
3566
3567   // Now we are left with "normal" map inference and index checking.
3568
3569   assert (array);
3570   assert (array->referent != 0);
3571   resolve_2types (e, array->referent, this, t);
3572
3573   // now resolve the array indexes
3574
3575   // if (e->referent->index_types.size() == 0)
3576   //   // redesignate referent as array
3577   //   e->referent->set_arity (e->indexes.size ());
3578
3579   if (e->indexes.size() != array->referent->index_types.size())
3580     unresolved (e->tok); // symbol resolution should prevent this
3581   else for (unsigned i=0; i<e->indexes.size(); i++)
3582     {
3583       expression* ee = e->indexes[i];
3584       exp_type& ft = array->referent->index_types [i];
3585       t = ft;
3586       ee->visit (this);
3587       exp_type at = ee->type;
3588
3589       if ((at == pe_string || at == pe_long) && ft == pe_unknown)
3590         {
3591           // propagate to formal type
3592           ft = at;
3593           resolved (array->referent->tok, ft);
3594           // uses array decl as there is no token for "formal type"
3595         }
3596       if (at == pe_stats)
3597         invalid (ee->tok, at);
3598       if (ft == pe_stats)
3599         invalid (ee->tok, ft);
3600       if (at != pe_unknown && ft != pe_unknown && ft != at)
3601         mismatch (e->tok, at, ft);
3602       if (at == pe_unknown)
3603           unresolved (ee->tok);
3604     }
3605 }
3606
3607
3608 void
3609 typeresolution_info::visit_functioncall (functioncall* e)
3610 {
3611   assert (e->referent != 0);
3612
3613   resolve_2types (e, e->referent, this, t, true); // accept unknown type
3614
3615   if (e->type == pe_stats)
3616     invalid (e->tok, e->type);
3617
3618   // now resolve the function parameters
3619   if (e->args.size() != e->referent->formal_args.size())
3620     unresolved (e->tok); // symbol resolution should prevent this
3621   else for (unsigned i=0; i<e->args.size(); i++)
3622     {
3623       expression* ee = e->args[i];
3624       exp_type& ft = e->referent->formal_args[i]->type;
3625       const token* fe_tok = e->referent->formal_args[i]->tok;
3626       t = ft;
3627       ee->visit (this);
3628       exp_type at = ee->type;
3629
3630       if (((at == pe_string) || (at == pe_long)) && ft == pe_unknown)
3631         {
3632           // propagate to formal arg
3633           ft = at;
3634           resolved (e->referent->formal_args[i]->tok, ft);
3635         }
3636       if (at == pe_stats)
3637         invalid (e->tok, at);
3638       if (ft == pe_stats)
3639         invalid (fe_tok, ft);
3640       if (at != pe_unknown && ft != pe_unknown && ft != at)
3641         mismatch (e->tok, at, ft);
3642       if (at == pe_unknown)
3643         unresolved (e->tok);
3644     }
3645 }
3646
3647
3648 void
3649 typeresolution_info::visit_block (block* e)
3650 {
3651   for (unsigned i=0; i<e->statements.size(); i++)
3652     {
3653       try
3654         {
3655           t = pe_unknown;
3656           e->statements[i]->visit (this);
3657         }
3658       catch (const semantic_error& e)
3659         {
3660           session.print_error (e);
3661         }
3662     }
3663 }
3664
3665
3666 void
3667 typeresolution_info::visit_embeddedcode (embeddedcode*)
3668 {
3669 }
3670
3671
3672 void
3673 typeresolution_info::visit_if_statement (if_statement* e)
3674 {
3675   t = pe_long;
3676   e->condition->visit (this);
3677
3678   t = pe_unknown;
3679   e->thenblock->visit (this);
3680
3681   if (e->elseblock)
3682     {
3683       t = pe_unknown;
3684       e->elseblock->visit (this);
3685     }
3686 }
3687
3688
3689 void
3690 typeresolution_info::visit_for_loop (for_loop* e)
3691 {
3692   t = pe_unknown;
3693   if (e->init) e->init->visit (this);
3694   t = pe_long;
3695   e->cond->visit (this);
3696   t = pe_unknown;
3697   if (e->incr) e->incr->visit (this);
3698   t = pe_unknown;
3699   e->block->visit (this);
3700 }
3701
3702
3703 void
3704 typeresolution_info::visit_foreach_loop (foreach_loop* e)
3705 {
3706   // See also visit_arrayindex.
3707   // This is different in that, being a statement, we can't assign
3708   // a type to the outer array, only propagate to/from the indexes
3709
3710   // if (e->referent->index_types.size() == 0)
3711   //   // redesignate referent as array
3712   //   e->referent->set_arity (e->indexes.size ());
3713
3714   symbol *array = NULL;
3715   hist_op *hist = NULL;
3716   classify_indexable(e->base, array, hist);
3717
3718   if (hist)
3719     {
3720       if (e->indexes.size() != 1)
3721         unresolved (e->tok);
3722       t = pe_long;
3723       e->indexes[0]->visit (this);
3724       if (e->indexes[0]->type != pe_long)
3725         unresolved (e->tok);
3726       hist->visit (this);
3727     }
3728   else
3729     {
3730       assert (array);
3731       if (e->indexes.size() != array->referent->index_types.size())
3732         unresolved (e->tok); // symbol resolution should prevent this
3733       else for (unsigned i=0; i<e->indexes.size(); i++)
3734         {
3735           expression* ee = e->indexes[i];
3736           exp_type& ft = array->referent->index_types [i];
3737           t = ft;
3738           ee->visit (this);
3739           exp_type at = ee->type;
3740
3741           if ((at == pe_string || at == pe_long) && ft == pe_unknown)
3742             {
3743               // propagate to formal type
3744               ft = at;
3745               resolved (array->referent->tok, ft);
3746               // uses array decl as there is no token for "formal type"
3747             }
3748           if (at == pe_stats)
3749             invalid (ee->tok, at);
3750           if (ft == pe_stats)
3751             invalid (ee->tok, ft);
3752           if (at != pe_unknown && ft != pe_unknown && ft != at)
3753             mismatch (e->tok, at, ft);
3754           if (at == pe_unknown)
3755             unresolved (ee->tok);
3756         }
3757     }
3758
3759   if (e->limit)
3760     {
3761       t = pe_long;
3762       e->limit->visit (this);
3763     }
3764
3765   t = pe_unknown;
3766   e->block->visit (this);
3767 }
3768
3769
3770 void
3771 typeresolution_info::visit_null_statement (null_statement*)
3772 {
3773 }
3774
3775
3776 void
3777 typeresolution_info::visit_expr_statement (expr_statement* e)
3778 {
3779   t = pe_unknown;
3780   e->value->visit (this);
3781 }
3782
3783
3784 struct delete_statement_typeresolution_info:
3785   public throwing_visitor
3786 {
3787   typeresolution_info *parent;
3788   delete_statement_typeresolution_info (typeresolution_info *p):
3789     throwing_visitor ("invalid operand of delete expression"),
3790     parent (p)
3791   {}
3792
3793   void visit_arrayindex (arrayindex* e)
3794   {
3795     parent->visit_arrayindex (e);
3796   }
3797
3798   void visit_symbol (symbol* e)
3799   {
3800     exp_type ignored = pe_unknown;
3801     assert (e->referent != 0);
3802     resolve_2types (e, e->referent, parent, ignored);
3803   }
3804 };
3805
3806
3807 void
3808 typeresolution_info::visit_delete_statement (delete_statement* e)
3809 {
3810   delete_statement_typeresolution_info di (this);
3811   t = pe_unknown;
3812   e->value->visit (&di);
3813 }
3814
3815
3816 void
3817 typeresolution_info::visit_next_statement (next_statement*)
3818 {
3819 }
3820
3821
3822 void
3823 typeresolution_info::visit_break_statement (break_statement*)
3824 {
3825 }
3826
3827
3828 void
3829 typeresolution_info::visit_continue_statement (continue_statement*)
3830 {
3831 }
3832
3833
3834 void
3835 typeresolution_info::visit_array_in (array_in* e)
3836 {
3837   // all unary operators only work on numerics
3838   exp_type t1 = t;
3839   t = pe_unknown; // array value can be anything
3840   e->operand->visit (this);
3841
3842   if (t1 == pe_unknown && e->type != pe_unknown)
3843     ; // already resolved
3844   else if (t1 == pe_string || t1 == pe_stats)
3845     mismatch (e->tok, t1, pe_long);
3846   else if (e->type == pe_unknown)
3847     {
3848       e->type = pe_long;
3849       resolved (e->tok, e->type);
3850     }
3851 }
3852
3853
3854 void
3855 typeresolution_info::visit_return_statement (return_statement* e)
3856 {
3857   // This is like symbol, where the referent is
3858   // the return value of the function.
3859
3860   // translation pass will print error
3861   if (current_function == 0)
3862     return;
3863
3864   exp_type& e_type = current_function->type;
3865   t = current_function->type;
3866   e->value->visit (this);
3867
3868   if (e_type != pe_unknown && e->value->type != pe_unknown
3869       && e_type != e->value->type)
3870     mismatch (current_function->tok, e_type, e->value->type);
3871   if (e_type == pe_unknown &&
3872       (e->value->type == pe_long || e->value->type == pe_string))
3873     {
3874       // propagate non-statistics from value
3875       e_type = e->value->type;
3876       resolved (current_function->tok, e->value->type);
3877     }
3878   if (e->value->type == pe_stats)
3879     invalid (e->value->tok, e->value->type);
3880 }
3881
3882 void
3883 typeresolution_info::visit_print_format (print_format* e)
3884 {
3885   size_t unresolved_args = 0;
3886
3887   if (e->hist)
3888     {
3889       e->hist->visit(this);
3890     }
3891
3892   else if (e->print_with_format)
3893     {
3894       // If there's a format string, we can do both inference *and*
3895       // checking.
3896
3897       // First we extract the subsequence of formatting components
3898       // which are conversions (not just literal string components)
3899
3900       unsigned expected_num_args = 0;
3901       std::vector<print_format::format_component> components;
3902       for (size_t i = 0; i < e->components.size(); ++i)
3903         {
3904           if (e->components[i].type == print_format::conv_unspecified)
3905             throw semantic_error ("Unspecified conversion in print operator format string",
3906                                   e->tok);
3907           else if (e->components[i].type == print_format::conv_literal)
3908             continue;
3909           components.push_back(e->components[i]);
3910           ++expected_num_args;
3911           if (e->components[i].widthtype == print_format::width_dynamic)
3912             ++expected_num_args;
3913           if (e->components[i].prectype == print_format::prec_dynamic)
3914             ++expected_num_args;
3915         }
3916
3917       // Then we check that the number of conversions and the number
3918       // of args agree.
3919
3920       if (expected_num_args != e->args.size())
3921         throw semantic_error ("Wrong number of args to formatted print operator",
3922                               e->tok);
3923
3924       // Then we check that the types of the conversions match the types
3925       // of the args.
3926       unsigned argno = 0;
3927       for (size_t i = 0; i < components.size(); ++i)
3928         {
3929           // Check the dynamic width, if specified
3930           if (components[i].widthtype == print_format::width_dynamic)
3931             {
3932               check_arg_type (pe_long, e->args[argno]);
3933               ++argno;
3934             }
3935
3936           // Check the dynamic precision, if specified
3937           if (components[i].prectype == print_format::prec_dynamic)
3938             {
3939               check_arg_type (pe_long, e->args[argno]);
3940               ++argno;
3941             }
3942
3943           exp_type wanted = pe_unknown;
3944
3945           switch (components[i].type)
3946             {
3947             case print_format::conv_unspecified:
3948             case print_format::conv_literal:
3949               assert (false);
3950               break;
3951
3952             case print_format::conv_signed_decimal:
3953             case print_format::conv_unsigned_decimal:
3954             case print_format::conv_unsigned_octal:
3955             case print_format::conv_unsigned_ptr:
3956             case print_format::conv_unsigned_uppercase_hex:
3957             case print_format::conv_unsigned_lowercase_hex:
3958             case print_format::conv_binary:
3959             case print_format::conv_char:
3960             case print_format::conv_memory:
3961             case print_format::conv_memory_hex:
3962               wanted = pe_long;
3963               break;
3964
3965             case print_format::conv_string:
3966               wanted = pe_string;
3967               break;
3968             }
3969
3970           assert (wanted != pe_unknown);
3971           check_arg_type (wanted, e->args[argno]);
3972           ++argno;
3973         }
3974     }
3975   else
3976     {
3977       // Without a format string, the best we can do is require that
3978       // each argument resolve to a concrete type.
3979       for (size_t i = 0; i < e->args.size(); ++i)
3980         {
3981           t = pe_unknown;
3982           e->args[i]->visit (this);
3983           if (e->args[i]->type == pe_unknown)
3984             {
3985               unresolved (e->args[i]->tok);
3986               ++unresolved_args;
3987             }
3988         }
3989     }
3990
3991   if (unresolved_args == 0)
3992     {
3993       if (e->type == pe_unknown)
3994         {
3995           if (e->print_to_stream)
3996             e->type = pe_long;
3997           else
3998             e->type = pe_string;
3999           resolved (e->tok, e->type);
4000         }
4001     }
4002   else
4003     {
4004       e->type = pe_unknown;
4005       unresolved (e->tok);
4006     }
4007 }
4008
4009
4010 void
4011 typeresolution_info::visit_stat_op (stat_op* e)
4012 {
4013   t = pe_stats;
4014   e->stat->visit (this);
4015   if (e->type == pe_unknown)
4016     {
4017       e->type = pe_long;
4018       resolved (e->tok, e->type);
4019     }
4020   else if (e->type != pe_long)
4021     mismatch (e->tok, e->type, pe_long);
4022 }
4023
4024 void
4025 typeresolution_info::visit_hist_op (hist_op* e)
4026 {
4027   t = pe_stats;
4028   e->stat->visit (this);
4029 }
4030
4031
4032 void
4033 typeresolution_info::check_arg_type (exp_type wanted, expression* arg)
4034 {
4035   t = wanted;
4036   arg->visit (this);
4037
4038   if (arg->type == pe_unknown)
4039     {
4040       arg->type = wanted;
4041       resolved (arg->tok, wanted);
4042     }
4043   else if (arg->type != wanted)
4044     {
4045       mismatch (arg->tok, arg->type, wanted);
4046     }
4047 }
4048
4049
4050 void
4051 typeresolution_info::unresolved (const token* tok)
4052 {
4053   num_still_unresolved ++;
4054
4055   if (assert_resolvability)
4056     {
4057       stringstream msg;
4058       string nm = (current_function ? current_function->name :
4059                    current_probe ? current_probe->name :
4060                    "probe condition");
4061       msg << nm + " with unresolved type";
4062       session.print_error (semantic_error (msg.str(), tok));
4063     }
4064 }
4065
4066
4067 void
4068 typeresolution_info::invalid (const token* tok, exp_type pe)
4069 {
4070   num_still_unresolved ++;
4071
4072   if (assert_resolvability)
4073     {
4074       stringstream msg;
4075       string nm = (current_function ? current_function->name :
4076                    current_probe ? current_probe->name :
4077                    "probe condition");
4078       if (tok && tok->type == tok_operator)
4079         msg << nm + " uses invalid operator";
4080       else
4081         msg << nm + " with invalid type " << pe;
4082       session.print_error (semantic_error (msg.str(), tok));
4083     }
4084 }
4085
4086
4087 void
4088 typeresolution_info::mismatch (const token* tok, exp_type t1, exp_type t2)
4089 {
4090   bool tok_resolved = false;
4091   size_t i;
4092   semantic_error* err1 = 0;
4093   num_still_unresolved ++;
4094
4095   //BZ 9719: for improving type mismatch messages, a semantic error is
4096   //generated with the token where type was first resolved. All such
4097   //resolved tokens, stored in a vector, are matched against their
4098   //content. If an error for the matching token hasn't been printed out
4099   //already, it is and the token pushed in another printed_toks vector
4100
4101   if (assert_resolvability)
4102     {
4103       stringstream msg;
4104       for (i=0; i<resolved_toks.size(); i++)
4105         {
4106           if (resolved_toks[i]->content == tok->content)
4107             {
4108               tok_resolved = true;
4109               break;
4110             }
4111         }
4112       if (!tok_resolved)
4113         {
4114           string nm = (current_function ? current_function->name :
4115                        current_probe ? current_probe->name :
4116                        "probe condition");
4117           msg << nm + " with type mismatch (" << t1 << " vs. " << t2 << ")";
4118         }
4119       else
4120         {
4121           bool tok_printed = false;
4122           for (size_t j=0; j<printed_toks.size(); j++)
4123             {
4124               if (printed_toks[j] == resolved_toks[i])
4125                 {
4126                   tok_printed = true;
4127                   break;
4128                 }
4129             }
4130           string nm = (current_function ? current_function->name :
4131                        current_probe ? current_probe->name :
4132                        "probe condition");
4133           msg << nm + " with type mismatch (" << t1 << " vs. " << t2 << ")";
4134           if (!tok_printed)
4135             {
4136               //error for possible mismatch in the earlier resolved token
4137               printed_toks.push_back (resolved_toks[i]);
4138               stringstream type_msg;
4139               type_msg << nm + " type first inferred here (" << t2 << ")";
4140               err1 = new semantic_error (type_msg.str(), resolved_toks[i]);
4141             }
4142         }
4143       semantic_error err (msg.str(), tok);
4144       err.chain = err1;
4145       session.print_error (err);
4146     }
4147 }
4148
4149
4150 void
4151 typeresolution_info::resolved (const token* tok, exp_type)
4152 {
4153   resolved_toks.push_back (tok);
4154   num_newly_resolved ++;
4155 }
4156
4157 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */