parse.cxx

   1 // recursive descent parser for systemtap scripts
   2 // Copyright (C) 2005-2019 Red Hat Inc.
   3 // Copyright (C) 2006 Intel Corporation.
   4 // Copyright (C) 2007 Bull S.A.S
   5 // Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
   6 //
   7 // This file is part of systemtap, and is free software.  You can
   8 // redistribute it and/or modify it under the terms of the GNU General
   9 // Public License (GPL); either version 2, or (at your option) any
  10 // later version.
  11
  12 #include "config.h"
  13 #include "staptree.h"
  14 #include "parse.h"
  15 #include "session.h"
  16 #include "util.h"
  17 #include "stringtable.h"
  18
  19 #include <iostream>
  20
  21 #include <fstream>
  22 #include <cctype>
  23 #include <cstdlib>
  24 #include <cassert>
  25 #include <cerrno>
  26 #include <climits>
  27 #include <sstream>
  28 #include <cstring>
  29 #include <cctype>
  30 #include <iterator>
  31 #include <unordered_set>
  32
  33 extern "C" {
  34 #include <fnmatch.h>
  35 }
  36
  37 using namespace std;
  38
  39
  40 class parser;
  41 class lexer
  42 {
  43 public:
  44   bool ate_comment; // current token follows a comment
  45   bool ate_whitespace; // the most recent token followed whitespace
  46   bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
  47   bool check_compatible; // whether to gate features on session.compatible
  48
  49   token* scan ();
  50   lexer (istream&, const string&, systemtap_session&, bool);
  51   void set_current_file (stapfile* f);
  52   void set_current_token_chain (const token* tok);
  53   inline bool has_version (const char* v) const;
  54
  55   unordered_set<interned_string> keywords;
  56   static unordered_set<string> atwords;
  57 private:
  58   inline int input_get ();
  59   inline int input_peek (unsigned n=0);
  60   void input_put (const string&, const token*);
  61   string input_name;
  62   string input_contents; // NB: being a temporary, no need to interned_string optimize this object
  63   const char *input_pointer; // index into input_contents; NB: recompute if input_contents changed!
  64   const char *input_end;
  65   unsigned cursor_suspend_count;
  66   unsigned cursor_suspend_line;
  67   unsigned cursor_suspend_column;
  68   unsigned cursor_line;
  69   unsigned cursor_column;
  70   systemtap_session& session;
  71   stapfile* current_file;
  72   const token* current_token_chain;
  73 };
  74
  75
  76 class parser
  77 {
  78 public:
  79   parser (systemtap_session& s, const string& n, istream& i, unsigned flags=0);
  80   ~parser ();
  81
  82   stapfile* parse ();
  83   probe* parse_synthetic_probe (const token* chain);
  84   stapfile* parse_library_macros ();
  85
  86 private:
  87   typedef enum {
  88       PP_NONE,
  89       PP_KEEP_THEN,
  90       PP_SKIP_THEN,
  91       PP_KEEP_ELSE,
  92       PP_SKIP_ELSE,
  93   } pp_state_t;
  94
  95   struct pp1_activation;
  96
  97   struct pp_macrodecl : public macrodecl {
  98     pp1_activation* parent_act; // used for param bindings
  99     virtual bool is_closure() { return parent_act != 0; }
 100     pp_macrodecl () : macrodecl(), parent_act(0) { }
 101   };
 102
 103   systemtap_session& session;
 104   string input_name;
 105   lexer input;
 106   bool errs_as_warnings;
 107   bool privileged;
 108   bool user_file;
 109   bool auto_path;
 110   parse_context context;
 111
 112   // preprocessing subordinate, first pass (macros)
 113   struct pp1_activation {
 114     const token* tok;
 115     unsigned cursor; // position within macro body
 116     map<string, pp_macrodecl*> params;
 117
 118     macrodecl* curr_macro;
 119
 120     pp1_activation (const token* tok, macrodecl* curr_macro)
 121       : tok(tok), cursor(0), curr_macro(curr_macro) { }
 122     ~pp1_activation ();
 123   };
 124
 125   map<string, macrodecl*> pp1_namespace;
 126   vector<pp1_activation*> pp1_state;
 127   const token* next_pp1 ();
 128   const token* scan_pp1 (bool ignore_macros);
 129   const token* slurp_pp1_param (vector<const token*>& param);
 130   const token* slurp_pp1_body (vector<const token*>& body);
 131
 132   // preprocessing subordinate, final pass (conditionals)
 133   vector<pair<const token*, pp_state_t> > pp_state;
 134   const token* scan_pp ();
 135   const token* skip_pp ();
 136
 137   // scanning state
 138   const token* next ();
 139   const token* peek ();
 140
 141   // Advance past and throw away current token after peek () or next ().
 142   void swallow ();
 143
 144   const token* systemtap_v_seen;
 145   const token* last_t; // the last value returned by peek() or next()
 146   const token* next_t; // lookahead token
 147
 148   // expectations, these swallow the token
 149   void expect_known (token_type tt, string const & expected);
 150   void expect_unknown (token_type tt, interned_string & target);
 151   void expect_unknown2 (token_type tt1, token_type tt2, interned_string & target);
 152
 153   // convenience forms, these also swallow the token
 154   void expect_op (string const & expected);
 155   interned_string expect_op_any (initializer_list<const char*> expected);
 156   void expect_kw (string const & expected);
 157   void expect_number (int64_t & expected);
 158   void expect_ident_or_keyword (interned_string & target);
 159
 160   // convenience forms, which return true or false, these don't swallow token
 161   bool peek_op (string const & op);
 162   bool peek_kw (string const & kw);
 163
 164   // convenience forms, which return the token
 165   const token* expect_kw_token (string const & expected);
 166   const token* expect_ident_or_atword (interned_string & target);
 167
 168   void print_error (const parse_error& pe, bool errs_as_warnings = false);
 169   unsigned num_errors;
 170
 171 private: // nonterminals
 172   void parse_probe (vector<probe*>&, vector<probe_alias*>&);
 173   void parse_private (vector<vardecl*>&, vector<probe*>&,
 174                       string const&, vector<functiondecl*>&);
 175   void parse_global (vector<vardecl*>&, vector<probe*>&,
 176                      string const&);
 177   void do_parse_global (vector<vardecl*>&, vector<probe*>&,
 178                         string const&, const token*, bool);
 179   void parse_functiondecl (vector<functiondecl*>&, string const&);
 180   void do_parse_functiondecl (vector<functiondecl*>&, const token*,
 181                               string const&, bool);
 182   embeddedcode* parse_embeddedcode ();
 183   vector<probe_point*> parse_probe_points ();
 184   vector<probe_point*> parse_components ();
 185   vector<probe_point*> parse_component ();
 186   literal_string* consume_string_literals (const token*);
 187   literal_string* parse_literal_string ();
 188   literal* parse_literal ();
 189   block* parse_stmt_block ();
 190   try_block* parse_try_block ();
 191   statement* parse_statement ();
 192   if_statement* parse_if_statement ();
 193   for_loop* parse_for_loop ();
 194   for_loop* parse_while_loop ();
 195   foreach_loop* parse_foreach_loop ();
 196   expr_statement* parse_expr_statement ();
 197   return_statement* parse_return_statement ();
 198   delete_statement* parse_delete_statement ();
 199   next_statement* parse_next_statement ();
 200   break_statement* parse_break_statement ();
 201   continue_statement* parse_continue_statement ();
 202   indexable* parse_indexable ();
 203   const token *parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name);
 204   target_symbol *parse_target_symbol ();
 205   cast_op *parse_cast_op ();
 206   atvar_op *parse_atvar_op ();
 207   expression* parse_entry_op (const token* t);
 208   expression* parse_defined_op (const token* t);
 209   expression* parse_const_op (const token* t);
 210   expression* parse_perf_op (const token* t);
 211   expression* parse_target_register (const token* t);
 212   expression* parse_target_deref (const token* t);
 213   expression* parse_expression ();
 214   expression* parse_assignment ();
 215   expression* parse_ternary ();
 216   expression* parse_logical_or ();
 217   expression* parse_logical_and ();
 218   expression* parse_boolean_or ();
 219   expression* parse_boolean_xor ();
 220   expression* parse_boolean_and ();
 221   expression* parse_array_in ();
 222   expression* parse_comparison_or_regex_query ();
 223   expression* parse_shift ();
 224   expression* parse_concatenation ();
 225   expression* parse_additive ();
 226   expression* parse_multiplicative ();
 227   expression* parse_unary ();
 228   expression* parse_crement ();
 229   expression* parse_dwarf_value ();
 230   expression* parse_value ();
 231   expression* parse_symbol ();
 232
 233   bool peek_target_symbol_components ();
 234   void parse_target_symbol_components (target_symbol* e);
 235 };
 236
 237
 238 // ------------------------------------------------------------------------
 239
 240 stapfile*
 241 parse (systemtap_session& s, const string& n, istream& i, unsigned flags)
 242 {
 243   parser p (s, n, i, flags);
 244   return p.parse ();
 245 }
 246
 247 stapfile*
 248 parse (systemtap_session& s, const string& name, unsigned flags)
 249 {
 250   ifstream i(name.c_str(), ios::in);
 251   if (i.fail())
 252     {
 253       cerr << (file_exists(name)
 254                ? _F("Input file '%s' can't be opened for reading.", name.c_str())
 255                : _F("Input file '%s' is missing.", name.c_str()))
 256            << endl;
 257       return 0;
 258     }
 259
 260   parser p (s, name, i, flags);
 261   return p.parse ();
 262 }
 263
 264 stapfile*
 265 parse_library_macros (systemtap_session& s, const string& name)
 266 {
 267   ifstream i(name.c_str(), ios::in);
 268   if (i.fail())
 269     {
 270       cerr << (file_exists(name)
 271                ? _F("Input file '%s' can't be opened for reading.", name.c_str())
 272                : _F("Input file '%s' is missing.", name.c_str()))
 273            << endl;
 274       return 0;
 275     }
 276
 277   parser p (s, name, i);
 278   return p.parse_library_macros ();
 279 }
 280
 281 probe*
 282 parse_synthetic_probe (systemtap_session &s, istream& i, const token* tok)
 283 {
 284   parser p (s, tok ? tok->location.file->name : "<synthetic>", i);
 285   return p.parse_synthetic_probe (tok);
 286 }
 287
 288 // ------------------------------------------------------------------------
 289
 290 parser::parser (systemtap_session& s, const string &n, istream& i, unsigned flags):
 291   session (s), input_name (n), input (i, input_name, s, !(flags & pf_no_compatible)),
 292   errs_as_warnings(flags & pf_squash_errors), privileged (flags & pf_guru),
 293   user_file (flags & pf_user_file), auto_path (flags & pf_auto_path),
 294   context(con_unknown), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
 295 {
 296 }
 297
 298 parser::~parser()
 299 {
 300 }
 301
 302 static string
 303 tt2str(token_type tt)
 304 {
 305   switch (tt)
 306     {
 307     case tok_junk: return "junk";
 308     case tok_identifier: return "identifier";
 309     case tok_operator: return "operator";
 310     case tok_string: return "string";
 311     case tok_number: return "number";
 312     case tok_embedded: return "embedded-code";
 313     case tok_keyword: return "keyword";
 314     }
 315   return "unknown token";
 316 }
 317
 318 ostream&
 319 operator << (ostream& o, const source_loc& loc)
 320 {
 321   o << loc.file->name << ":"
 322     << loc.line << ":"
 323     << loc.column;
 324
 325   return o;
 326 }
 327
 328 ostream&
 329 operator << (ostream& o, const token& t)
 330 {
 331   o << tt2str(t.type);
 332
 333   if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
 334     {
 335       o << " '";
 336       for (unsigned i=0; i<t.content.length(); i++)
 337         {
 338           char c = t.content[i];
 339           o << (isprint (c) ? c : '?');
 340         }
 341       o << "'";
 342     }
 343
 344   o << " at "
 345     << t.location;
 346
 347   return o;
 348 }
 349
 350
 351 void
 352 parser::print_error  (const parse_error &pe, bool errs_as_warnings)
 353 {
 354   const token *tok = pe.tok ? pe.tok : last_t;
 355   session.print_error(pe, tok, input_name, errs_as_warnings);
 356   num_errors ++;
 357 }
 358
 359
 360
 361
 362 template <typename OPERAND>
 363 bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
 364 {
 365   if (op->type == tok_operator && op->content == "<=")
 366     { return lhs <= rhs; }
 367   else if (op->type == tok_operator && op->content == ">=")
 368     { return lhs >= rhs; }
 369   else if (op->type == tok_operator && op->content == "<")
 370     { return lhs < rhs; }
 371   else if (op->type == tok_operator && op->content == ">")
 372     { return lhs > rhs; }
 373   else if (op->type == tok_operator && op->content == "==")
 374     { return lhs == rhs; }
 375   else if (op->type == tok_operator && op->content == "!=")
 376     { return lhs != rhs; }
 377   else
 378     throw PARSE_ERROR (_("expected comparison operator"), op);
 379 }
 380
 381
 382 // Here, we perform on-the-fly preprocessing in two passes.
 383
 384 // First pass - macro declaration and expansion.
 385 //
 386 // The basic form of a declaration is @define SIGNATURE %( BODY %)
 387 // where SIGNATURE is of the form macro_name (a, b, c, ...)
 388 // and BODY can obtain the parameter contents as @a, @b, @c, ....
 389 // Note that parameterless macros can also be declared.
 390 //
 391 // Macro definitions may not be nested.
 392 // A macro is available textually after it has been defined.
 393 //
 394 // The basic form of a macro invocation
 395 //   for a parameterless macro is @macro_name,
 396 //   for a macro with parameters is @macro_name(param_1, param_2, ...).
 397 //
 398 // NB: this means that a parameterless macro @foo called as @foo(a, b, c)
 399 // leaves its 'parameters' alone, rather than consuming them to result
 400 // in a "too many parameters error". This may be useful in the unusual
 401 // case of wanting @foo to expand to the name of a function.
 402 //
 403 // Invocations of unknown macros are left unexpanded, to allow
 404 // the continued use of constructs such as @cast, @var, etc.
 405
 406 macrodecl::~macrodecl ()
 407 {
 408   delete tok;
 409   for (vector<const token*>::iterator it = body.begin();
 410        it != body.end(); it++)
 411     delete *it;
 412 }
 413
 414 parser::pp1_activation::~pp1_activation ()
 415 {
 416   delete tok;
 417   if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
 418   for (map<string, pp_macrodecl*>::iterator it = params.begin();
 419        it != params.end(); it++)
 420     delete it->second;
 421 }
 422
 423 // Grab a token from the current input source (main file or macro body):
 424 const token*
 425 parser::next_pp1 ()
 426 {
 427   if (pp1_state.empty())
 428     return input.scan ();
 429
 430   // otherwise, we're inside a macro
 431   pp1_activation* act = pp1_state.back();
 432   unsigned& cursor = act->cursor;
 433   if (cursor < act->curr_macro->body.size())
 434     {
 435       token* t = new token(*act->curr_macro->body[cursor]);
 436       t->chain = new token(*act->tok); // mark chained token
 437       cursor++;
 438       return t;
 439     }
 440   else
 441     return 0; // reached end of macro body
 442 }
 443
 444 const token*
 445 parser::scan_pp1 (bool ignore_macros = false)
 446 {
 447   while (true)
 448     {
 449       const token* t = next_pp1 ();
 450       if (t == 0) // EOF or end of macro body
 451         {
 452           if (pp1_state.empty()) // actual EOF
 453             return 0;
 454
 455           // Exit macro and loop around to look for the next token.
 456           pp1_activation* act = pp1_state.back();
 457           pp1_state.pop_back(); delete act;
 458           continue;
 459         }
 460
 461       // macro definition
 462       // PR18462 don't catalog preprocessor-disabled macros
 463       if (t->type == tok_operator && t->content == "@define" && !ignore_macros)
 464         {
 465           if (!pp1_state.empty())
 466             throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t);
 467           delete t;
 468
 469           // handle macro definition
 470           // (1) consume macro signature
 471           t = input.scan();
 472           if (! (t && t->type == tok_identifier))
 473             throw PARSE_ERROR (_("expected identifier"), t);
 474           string name = t->content;
 475
 476           // check for redefinition of existing macro
 477           if (pp1_namespace.find(name) != pp1_namespace.end())
 478             {
 479               parse_error er (ERR_SRC, _F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
 480
 481               // Also point to pp1_namespace[name]->tok, the site of
 482               // the original definition:
 483               er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here",
 484                                              name.c_str()), pp1_namespace[name]->tok);
 485               throw er;
 486             }
 487
 488           // XXX: the above restriction was mostly necessary due to
 489           // wanting to leave open the possibility of
 490           // statically-scoped semantics in the future.
 491
 492           // XXX: this cascades into further parse errors as the
 493           // parser tries to parse the remaining definition... (e.g.
 494           // it can't tell that the macro body isn't a conditional,
 495           // that the uses of parameters aren't nonexistent
 496           // macros.....)
 497           if (name == "define")
 498             throw PARSE_ERROR (_("attempt to redefine '@define'"), t);
 499           if (input.atwords.count(name))
 500             session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
 501
 502           macrodecl* decl = (pp1_namespace[name] = new macrodecl);
 503           decl->tok = t;
 504
 505           // determine if the macro takes parameters
 506           bool saw_params = false;
 507           t = input.scan();
 508           if (t && t->type == tok_operator && t->content == "(")
 509             {
 510               saw_params = true;
 511               do
 512                 {
 513                   delete t;
 514
 515                   t = input.scan ();
 516                   if (! (t && t->type == tok_identifier))
 517                     throw PARSE_ERROR(_("expected identifier"), t);
 518                   decl->formal_args.push_back(t->content);
 519                   delete t;
 520
 521                   t = input.scan ();
 522                   if (t && t->type == tok_operator && t->content == ",")
 523                     {
 524                       continue;
 525                     }
 526                   else if (t && t->type == tok_operator && t->content == ")")
 527                     {
 528                       delete t;
 529                       t = input.scan();
 530                       break;
 531                     }
 532                   else
 533                     {
 534                       throw PARSE_ERROR (_("expected ',' or ')'"), t);
 535                     }
 536                 }
 537               while (true);
 538             }
 539
 540           // (2) identify & consume macro body
 541           if (! (t && t->type == tok_operator && t->content == "%("))
 542             {
 543               if (saw_params)
 544                 throw PARSE_ERROR (_("expected '%('"), t);
 545               else
 546                 throw PARSE_ERROR (_("expected '%(' or '('"), t);
 547             }
 548           delete t;
 549
 550           t = slurp_pp1_body (decl->body);
 551           if (!t)
 552             throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl->tok);
 553           delete t;
 554
 555           // Now loop around to look for a real token.
 556           continue;
 557         }
 558
 559       // (potential) macro invocation
 560       if (t->type == tok_operator && t->content[0] == '@')
 561         {
 562           const string& name = t->content.substr(1); // strip initial '@'
 563
 564           // check if name refers to a real parameter or macro
 565           macrodecl* decl;
 566           pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
 567           if (act && act->params.find(name) != act->params.end())
 568             decl = act->params[name];
 569           else if (!(act && act->curr_macro->context == ctx_library)
 570                    && pp1_namespace.find(name) != pp1_namespace.end())
 571             decl = pp1_namespace[name];
 572           else if (session.library_macros.find(name)
 573                    != session.library_macros.end())
 574             decl = session.library_macros[name];
 575           else // this is an ordinary @operator
 576             return t;
 577
 578           // handle macro invocation, taking ownership of t
 579           pp1_activation *new_act = new pp1_activation(t, decl);
 580           unsigned num_params = decl->formal_args.size();
 581
 582           // (1a) restore parameter invocation closure
 583           if (num_params == 0 && decl->is_closure())
 584             {
 585               // NB: decl->parent_act is always safe since the
 586               // parameter decl (if any) comes from an activation
 587               // record which deeper in the stack than new_act.
 588
 589               // decl is a macro parameter which must be evaluated in
 590               // the context of the original point of invocation:
 591               new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
 592               goto expand;
 593             }
 594
 595           // (1b) consume macro parameters (if any)
 596           if (num_params == 0)
 597             goto expand;
 598
 599           // for simplicity, we do not allow macro constructs here
 600           // -- if we did, we'd have to recursively call scan_pp1()
 601           t = next_pp1 ();
 602           if (! (t && t->type == tok_operator && t->content == "("))
 603             {
 604               delete new_act;
 605               throw PARSE_ERROR (_NF
 606                                     ("expected '(' in invocation of macro '@%s'"
 607                                      " taking %d parameter",
 608                                      "expected '(' in invocation of macro '@%s'"
 609                                      " taking %d parameters",
 610                                      num_params, name.c_str(), num_params), t);
 611             }
 612
 613           // XXX perhaps parse/count the full number of params,
 614           // so we can say "expected x, found y params" on error?
 615           for (unsigned i = 0; i < num_params; i++)
 616             {
 617               delete t;
 618
 619               // create parameter closure
 620               string param_name = decl->formal_args[i];
 621               pp_macrodecl* p = (new_act->params[param_name]
 622                                  = new pp_macrodecl);
 623               p->tok = new token(*new_act->tok);
 624               p->parent_act = act;
 625               // NB: *new_act->tok points to invocation, act is NULL at top level
 626
 627               t = slurp_pp1_param (p->body);
 628
 629               // check correct usage of ',' or ')'
 630               if (t == 0) // hit unexpected EOF or end of macro
 631                 {
 632                   // XXX could we pop the stack and continue parsing
 633                   // the invocation, allowing macros to construct new
 634                   // invocations in piecemeal fashion??
 635                   const token* orig_t = new token(*new_act->tok);
 636                   delete new_act;
 637                   throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t);
 638                 }
 639               if (t->type == tok_operator && t->content == ",")
 640                 {
 641                   if (i + 1 == num_params)
 642                     {
 643                       delete new_act;
 644                       throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
 645                     }
 646                 }
 647               else if (t->type == tok_operator && t->content == ")")
 648                 {
 649                   if (i + 1 != num_params)
 650                     {
 651                       delete new_act;
 652                       throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
 653                     }
 654                 }
 655               else
 656                 {
 657                   // XXX this is, incidentally, impossible
 658                   delete new_act;
 659                   throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t);
 660                 }
 661             }
 662
 663           delete t;
 664
 665           // (2) set up macro expansion
 666         expand:
 667           pp1_state.push_back (new_act);
 668
 669           // Now loop around to look for a real token.
 670           continue;
 671         }
 672
 673       // Otherwise, we have an ordinary token.
 674       return t;
 675     }
 676 }
 677
 678 // Consume a single macro invocation's parameters, heeding nesting
 679 // brackets and stopping on an unbalanced ')' or an unbracketed ','
 680 // (and returning the final separator token).
 681 const token*
 682 parser::slurp_pp1_param (vector<const token*>& param)
 683 {
 684   const token* t = 0;
 685   unsigned nesting = 0;
 686   do
 687     {
 688       t = next_pp1 ();
 689
 690       if (!t)
 691         break;
 692       // [ needed in case macro paramater is used as prefix for array-deref operation
 693       if (t->type == tok_operator && (t->content == "(" || t->content == "["))
 694         ++nesting;
 695       else if (nesting && t->type == tok_operator && (t->content == ")" || t->content == "]"))
 696         --nesting;
 697       else if (!nesting && t->type == tok_operator
 698                && (t->content == ")" || t->content == ","))
 699         break;
 700       param.push_back(t);
 701     }
 702   while (true);
 703   return t; // report ")" or "," or NULL
 704 }
 705
 706
 707 // Consume a macro declaration's body, heeding nested %( %) brackets.
 708 const token*
 709 parser::slurp_pp1_body (vector<const token*>& body)
 710 {
 711   const token* t = 0;
 712   unsigned nesting = 0;
 713   do
 714     {
 715       t = next_pp1 ();
 716
 717       if (!t)
 718         break;
 719       if (t->type == tok_operator && t->content == "%(")
 720         ++nesting;
 721       else if (nesting && t->type == tok_operator && t->content == "%)")
 722         --nesting;
 723       else if (!nesting && t->type == tok_operator && t->content == "%)")
 724         break;
 725       body.push_back(t);
 726     }
 727   while (true);
 728   return t; // report final "%)" or NULL
 729 }
 730
 731 // Used for parsing .stpm files.
 732 stapfile*
 733 parser::parse_library_macros ()
 734 {
 735   stapfile* f = new stapfile;
 736   f->privileged = this->privileged;
 737   input.set_current_file (f);
 738
 739   try
 740     {
 741       const token* t = scan_pp ();
 742
 743       // Currently we only take objection to macro invocations if they
 744       // produce a non-whitespace token after being expanded.
 745
 746       // XXX should we prevent macro invocations even if they expand to empty??
 747
 748       if (t != 0)
 749         throw PARSE_ERROR (_F("unexpected token in library macro file '%s'", input_name.c_str()), t);
 750
 751       // We need to first check whether *any* of the macros are duplicates,
 752       // then commit to including the entire file in the global namespace
 753       // (or not). Yuck.
 754       for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
 755            it != pp1_namespace.end(); it++)
 756         {
 757           string name = it->first;
 758
 759           if (session.library_macros.find(name) != session.library_macros.end())
 760             {
 761               parse_error er(ERR_SRC, _F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
 762               er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
 763               print_error (er);
 764
 765               delete er.chain;
 766               delete f;
 767               return 0;
 768             }
 769         }
 770
 771     }
 772   catch (const parse_error& pe)
 773     {
 774       print_error (pe, errs_as_warnings);
 775       delete f;
 776       return 0;
 777     }
 778
 779   // If no errors, include the entire file.  Note how this is outside
 780   // of the try-catch block -- no errors possible.
 781   for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
 782        it != pp1_namespace.end(); it++)
 783     {
 784       string name = it->first;
 785
 786       session.library_macros[name] = it->second;
 787       session.library_macros[name]->context = ctx_library;
 788     }
 789
 790   return f;
 791 }
 792
 793 // Second pass - preprocessor conditional expansion.
 794 //
 795 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
 796 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
 797 //                 or: arch COMPARISON-OP "arch-string"
 798 //                 or: systemtap_v COMPARISON-OP "version-string"
 799 //                 or: systemtap_privilege COMPARISON-OP "privilege-string"
 800 //                 or: CONFIG_foo COMPARISON-OP "config-string"
 801 //                 or: CONFIG_foo COMPARISON-OP number
 802 //                 or: CONFIG_foo COMPARISON-OP CONFIG_bar
 803 //                 or: "string1" COMPARISON-OP "string2"
 804 //                 or: number1 COMPARISON-OP number2
 805 // The %: ELSE-TOKENS part is optional.
 806 //
 807 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
 808 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
 809 // e.g. %( CONFIG_foo %? "foo" %: "baz" %)
 810 //
 811 // Up to an entire %( ... %) expression is processed by a single call
 812 // to this function.  Tokens included by any nested conditions are
 813 // enqueued in a private vector.
 814
 815 bool eval_pp_conditional (systemtap_session& s,
 816                           const token* l, const token* op, const token* r)
 817 {
 818   if (l->type == tok_identifier && (l->content == "kernel_v" ||
 819                                     l->content == "kernel_vr" ||
 820                                     l->content == "systemtap_v"))
 821     {
 822       if (! (r->type == tok_string))
 823         throw PARSE_ERROR (_("expected string literal"), r);
 824
 825       string target_kernel_vr = s.kernel_release;
 826       string target_kernel_v = s.kernel_base_release;
 827       string target;
 828
 829       if (l->content == "kernel_v") target = target_kernel_v;
 830       else if (l->content == "kernel_vr") target = target_kernel_vr;
 831       else if (l->content == "systemtap_v") target = s.compatible;
 832       else assert (0);
 833
 834       string query = r->content;
 835       bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
 836
 837       // collect acceptable strverscmp results.
 838       int rvc_ok1, rvc_ok2;
 839       bool wc_ok = false;
 840       if (op->type == tok_operator && op->content == "<=")
 841         { rvc_ok1 = -1; rvc_ok2 = 0; }
 842       else if (op->type == tok_operator && op->content == ">=")
 843         { rvc_ok1 = 1; rvc_ok2 = 0; }
 844       else if (op->type == tok_operator && op->content == "<")
 845         { rvc_ok1 = -1; rvc_ok2 = -1; }
 846       else if (op->type == tok_operator && op->content == ">")
 847         { rvc_ok1 = 1; rvc_ok2 = 1; }
 848       else if (op->type == tok_operator && op->content == "==")
 849         { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
 850       else if (op->type == tok_operator && op->content == "!=")
 851         { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
 852       else
 853         throw PARSE_ERROR (_("expected comparison operator"), op);
 854
 855       if ((!wc_ok) && rhs_wildcard)
 856         throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op);
 857
 858       if (rhs_wildcard)
 859         {
 860           int rvc_result = fnmatch (query.c_str(), target.c_str(),
 861                                     FNM_NOESCAPE); // spooky
 862           bool badness = (rvc_result == 0) ^ (op->content == "==");
 863           return !badness;
 864         }
 865       else
 866         {
 867           int rvc_result = strverscmp (target.c_str(), query.c_str());
 868           // normalize rvc_result
 869           if (rvc_result < 0) rvc_result = -1;
 870           if (rvc_result > 0) rvc_result = 1;
 871           return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
 872         }
 873     }
 874   else if (l->type == tok_identifier && l->content == "systemtap_privilege")
 875     {
 876       string target_privilege =
 877         pr_contains(s.privilege, pr_stapdev) ? "stapdev"
 878         : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
 879         : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
 880         : "none"; /* should be impossible -- s.privilege always one of above */
 881       assert(target_privilege != "none");
 882
 883       if (! (r->type == tok_string))
 884         throw PARSE_ERROR (_("expected string literal"), r);
 885       string query_privilege = r->content;
 886
 887       bool nomatch = (target_privilege != query_privilege);
 888
 889       bool result;
 890       if (op->type == tok_operator && op->content == "==")
 891         result = !nomatch;
 892       else if (op->type == tok_operator && op->content == "!=")
 893         result = nomatch;
 894       else
 895         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 896       /* XXX perhaps allow <= >= and similar comparisons */
 897
 898       return result;
 899     }
 900   else if (l->type == tok_identifier && l->content == "guru_mode")
 901     {
 902       if (! (r->type == tok_number))
 903         throw PARSE_ERROR (_("expected number"), r);
 904       int64_t lhs = (int64_t) s.guru_mode;
 905       int64_t rhs = lex_cast<int64_t>(r->content);
 906       if (!((rhs == 0)||(rhs == 1)))
 907         throw PARSE_ERROR (_("expected 0 or 1"), op);
 908       if (!((op->type == tok_operator && op->content == "==") ||
 909             (op->type == tok_operator && op->content == "!=")))
 910         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 911
 912       return eval_comparison (lhs, op, rhs);
 913     }
 914   else if (l->type == tok_identifier && l->content == "arch")
 915     {
 916       string target_architecture = s.architecture;
 917       if (! (r->type == tok_string))
 918         throw PARSE_ERROR (_("expected string literal"), r);
 919       string query_architecture = r->content;
 920
 921       int nomatch = fnmatch (query_architecture.c_str(),
 922                              target_architecture.c_str(),
 923                              FNM_NOESCAPE); // still spooky
 924
 925       bool result;
 926       if (op->type == tok_operator && op->content == "==")
 927         result = !nomatch;
 928       else if (op->type == tok_operator && op->content == "!=")
 929         result = nomatch;
 930       else
 931         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 932
 933       return result;
 934     }
 935   else if (l->type == tok_identifier && l->content == "runtime")
 936     {
 937       if (! (r->type == tok_string))
 938         throw PARSE_ERROR (_("expected string literal"), r);
 939
 940       string query_runtime = r->content;
 941       string target_runtime;
 942
 943       if (s.runtime_mode == systemtap_session::dyninst_runtime)
 944         target_runtime = "dyninst";
 945       else if (s.runtime_mode == systemtap_session::bpf_runtime)
 946         target_runtime = "bpf";
 947       else
 948         target_runtime = "kernel";
 949
 950       int nomatch = fnmatch (query_runtime.c_str(),
 951                              target_runtime.c_str(),
 952                              FNM_NOESCAPE); // still spooky
 953
 954       bool result;
 955       if (op->type == tok_operator && op->content == "==")
 956         result = !nomatch;
 957       else if (op->type == tok_operator && op->content == "!=")
 958         result = nomatch;
 959       else
 960         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 961
 962       return result;
 963     }
 964   else if (l->type == tok_identifier && l->content.starts_with("CONFIG_"))
 965     {
 966       if (r->type == tok_string)
 967         {
 968           string lhs = s.kernel_config[l->content]; // may be empty
 969           string rhs = r->content;
 970
 971           int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
 972
 973           bool result;
 974           if (op->type == tok_operator && op->content == "==")
 975             result = !nomatch;
 976           else if (op->type == tok_operator && op->content == "!=")
 977             result = nomatch;
 978           else
 979             throw PARSE_ERROR (_("expected '==' or '!='"), op);
 980
 981           return result;
 982         }
 983       else if (r->type == tok_number)
 984         {
 985           const string& lhs_string = s.kernel_config[l->content];
 986           const char* startp = lhs_string.c_str ();
 987           char* endp = (char*) startp;
 988           errno = 0;
 989           int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
 990           if (errno == ERANGE || errno == EINVAL || *endp != '\0')
 991             throw PARSE_ERROR ("Config option value not a number", l);
 992
 993           int64_t rhs = lex_cast<int64_t>(r->content);
 994           return eval_comparison (lhs, op, rhs);
 995         }
 996       else if (r->type == tok_identifier
 997                && r->content.starts_with( "CONFIG_"))
 998         {
 999           // First try to convert both to numbers,
1000           // otherwise threat both as strings.
1001           const string& lhs_string = s.kernel_config[l->content];
1002           const string& rhs_string = s.kernel_config[r->content];
1003           const char* startp = lhs_string.c_str ();
1004           char* endp = (char*) startp;
1005           errno = 0;
1006           int64_t val = (int64_t) strtoll (startp, & endp, 0);
1007           if (errno != ERANGE && errno != EINVAL && *endp == '\0')
1008             {
1009               int64_t lhs = val;
1010               startp = rhs_string.c_str ();
1011               endp = (char*) startp;
1012               errno = 0;
1013               int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
1014               if (errno != ERANGE && errno != EINVAL && *endp == '\0')
1015                 return eval_comparison (lhs, op, rhs);
1016             }
1017
1018           return eval_comparison (lhs_string, op, rhs_string);
1019         }
1020       else
1021         throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r);
1022     }
1023   else if (l->type == tok_string && r->type == tok_string)
1024     {
1025       string lhs = l->content;
1026       string rhs = r->content;
1027       return eval_comparison (lhs, op, rhs);
1028       // NB: no wildcarding option here
1029     }
1030   else if (l->type == tok_number && r->type == tok_number)
1031     {
1032       int64_t lhs = lex_cast<int64_t>(l->content);
1033       int64_t rhs = lex_cast<int64_t>(r->content);
1034       return eval_comparison (lhs, op, rhs);
1035       // NB: no wildcarding option here
1036     }
1037   else if (l->type == tok_string && r->type == tok_number
1038             && op->type == tok_operator)
1039     throw PARSE_ERROR (_("expected string literal as right value"), r);
1040   else if (l->type == tok_number && r->type == tok_string
1041             && op->type == tok_operator)
1042     throw PARSE_ERROR (_("expected number literal as right value"), r);
1043
1044   else
1045     throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
1046                          "             'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1047                          "             comparison between strings or integers"), l);
1048 }
1049
1050
1051 // Only tokens corresponding to the TRUE statement must be expanded
1052 const token*
1053 parser::scan_pp ()
1054 {
1055   while (true)
1056     {
1057       pp_state_t pp = PP_NONE;
1058       if (!pp_state.empty())
1059         pp = pp_state.back().second;
1060
1061       const token* t = 0;
1062       if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1063         t = skip_pp ();
1064       else
1065         t = scan_pp1 ();
1066
1067       if (t == 0) // EOF
1068         {
1069           if (pp != PP_NONE)
1070             {
1071               t = pp_state.back().first;
1072               pp_state.pop_back(); // so skip_some doesn't keep trying to close this
1073               //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
1074               throw PARSE_ERROR (_("incomplete conditional at end of file"), t);
1075             }
1076           return t;
1077         }
1078
1079       // misplaced preprocessor "then"
1080       if (t->type == tok_operator && t->content == "%?")
1081         throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1082
1083       // preprocessor "else"
1084       if (t->type == tok_operator && t->content == "%:")
1085         {
1086           if (pp == PP_NONE)
1087             throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1088           if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
1089             throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t);
1090           // XXX: here and elsewhere, error cascades might be avoided
1091           // by dropping tokens until we reach the closing %)
1092
1093           pp_state.back().second = (pp == PP_KEEP_THEN) ?
1094                                    PP_SKIP_ELSE : PP_KEEP_ELSE;
1095           delete t;
1096           continue;
1097         }
1098
1099       // preprocessor close
1100       if (t->type == tok_operator && t->content == "%)")
1101         {
1102           if (pp == PP_NONE)
1103             throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1104           delete pp_state.back().first;
1105           delete t; //this is the closing bracket
1106           pp_state.pop_back();
1107           continue;
1108         }
1109
1110       if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1111         return t;
1112
1113       // We have a %( - it's time to throw a preprocessing party!
1114
1115       bool result = false;
1116       bool and_result = true;
1117       const token *n = NULL;
1118       do {
1119         const token *l, *op, *r;
1120         l = scan_pp1 ();
1121         op = scan_pp1 ();
1122         r = scan_pp1 ();
1123         if (l == 0 || op == 0 || r == 0)
1124           throw PARSE_ERROR (_("incomplete condition after '%('"), t);
1125         // NB: consider generalizing to consume all tokens until %?, and
1126         // passing that as a vector to an evaluator.
1127
1128         // Do not evaluate the condition if we haven't expanded everything.
1129         // This may occur when having several recursive conditionals.
1130         and_result &= eval_pp_conditional (session, l, op, r);
1131         if(l->content=="systemtap_v")
1132           systemtap_v_seen=r;
1133
1134         else
1135           delete r;
1136
1137         delete l;
1138         delete op;
1139         delete n;
1140
1141         n = scan_pp1 ();
1142         if (n && n->type == tok_operator && n->content == "&&")
1143           continue;
1144         result |= and_result;
1145         and_result = true;
1146         if (! (n && n->type == tok_operator && n->content == "||"))
1147           break;
1148       } while (true);
1149
1150       /*
1151       clog << "PP eval (" << *t << ") == " << result << endl;
1152       */
1153
1154       const token *m = n;
1155       if (! (m && m->type == tok_operator && m->content == "%?"))
1156         throw PARSE_ERROR (_("expected '%?' marker for conditional"), t);
1157       delete m; // "%?"
1158
1159       pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1160       pp_state.push_back (make_pair (t, pp));
1161
1162       // Now loop around to look for a real token.
1163     }
1164 }
1165
1166
1167 // Skip over tokens and any errors, heeding
1168 // only nested preprocessor starts and ends.
1169 const token*
1170 parser::skip_pp ()
1171 {
1172   const token* t = 0;
1173   unsigned nesting = 0;
1174   do
1175     {
1176       try
1177         {
1178           t = scan_pp1 (true);
1179         }
1180       catch (const parse_error &e)
1181         {
1182           continue;
1183         }
1184       if (!t)
1185         break;
1186       if (t->type == tok_operator && t->content == "%(")
1187         ++nesting;
1188       else if (nesting && t->type == tok_operator && t->content == "%)")
1189         --nesting;
1190       else if (!nesting && t->type == tok_operator &&
1191                (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1192         break;
1193       delete t;
1194     }
1195   while (true);
1196   return t;
1197 }
1198
1199
1200 const token*
1201 parser::next ()
1202 {
1203   if (! next_t)
1204     next_t = scan_pp ();
1205   if (! next_t)
1206     throw PARSE_ERROR (_("unexpected end-of-file"));
1207
1208   last_t = next_t;
1209   // advance by zeroing next_t
1210   next_t = 0;
1211   return last_t;
1212 }
1213
1214
1215 const token*
1216 parser::peek ()
1217 {
1218   if (! next_t)
1219     next_t = scan_pp ();
1220
1221   // don't advance by zeroing next_t
1222   last_t = next_t;
1223   return next_t;
1224 }
1225
1226
1227 void
1228 parser::swallow ()
1229 {
1230   // can only swallow something last peeked or nexted token.
1231   assert (last_t != 0);
1232   delete last_t;
1233   // advance by zeroing next_t
1234   last_t = next_t = 0;
1235 }
1236
1237
1238 static inline bool
1239 tok_is(token const * t, token_type tt, string const & expected)
1240 {
1241   return t && t->type == tt && t->content == expected;
1242 }
1243
1244
1245 void
1246 parser::expect_known (token_type tt, string const & expected)
1247 {
1248   const token *t = next();
1249   if (! (t && t->type == tt && t->content == expected))
1250     throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1251   // NB: PR25174 may require consume_string_literals() someday
1252   swallow (); // We are done with it, content was copied.
1253 }
1254
1255
1256 void
1257 parser::expect_unknown (token_type tt, interned_string & target)
1258 {
1259   const token *t = next();
1260   if (!(t && t->type == tt))
1261     throw PARSE_ERROR (_("expected ") + tt2str(tt));
1262   if (t->type==tok_string)
1263     {
1264       literal_string *ls = consume_string_literals (t);
1265       target = ls->value;
1266       delete ls;
1267     }
1268   else
1269     {
1270       target = t->content;
1271       swallow (); // We are done with it, content was copied.
1272     }
1273 }
1274
1275
1276 void
1277 parser::expect_unknown2 (token_type tt1, token_type tt2, interned_string & target)
1278 {
1279   const token *t = next();
1280   if (!(t && (t->type == tt1 || t->type == tt2)))
1281     throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
1282   // NB: PR25174 may require consume_string_literals() someday
1283   target = t->content;
1284   swallow (); // We are done with it, content was copied.
1285 }
1286
1287
1288 void
1289 parser::expect_op (string const & expected)
1290 {
1291   expect_known (tok_operator, expected);
1292 }
1293
1294 interned_string
1295 parser::expect_op_any (initializer_list<const char*> expected)
1296 {
1297   const token *t = next();
1298   if (t && t->type == tok_operator)
1299     for (auto it = expected.begin(); it != expected.end(); ++it)
1300       if (t->content == *it)
1301         {
1302           interned_string found = t->content;
1303           swallow (); // We are done with it, content was copied.
1304           return found;
1305         }
1306
1307   string msg;
1308   for (auto it = expected.begin(); it != expected.end(); ++it)
1309     {
1310       if (it != expected.begin())
1311         msg.append(" ");
1312       msg.append(*it);
1313     }
1314   throw PARSE_ERROR (_F("expected one of '%s'", msg.c_str()));
1315 }
1316
1317 void
1318 parser::expect_kw (string const & expected)
1319 {
1320   expect_known (tok_keyword, expected);
1321 }
1322
1323 const token*
1324 parser::expect_kw_token (string const & expected)
1325 {
1326   const token *t = next();
1327   if (! (t && t->type == tok_keyword && t->content == expected))
1328     throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1329   return t;
1330 }
1331
1332 void
1333 parser::expect_number (int64_t & value)
1334 {
1335   bool neg = false;
1336   const token *t = next();
1337   if (t->type == tok_operator && t->content == "-")
1338     {
1339       neg = true;
1340       swallow ();
1341       t = next ();
1342     }
1343   if (!(t && t->type == tok_number))
1344     throw PARSE_ERROR (_("expected number"));
1345
1346   const string& s = t->content;
1347   const char* startp = s.c_str ();
1348   char* endp = (char*) startp;
1349
1350   // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1351   // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1352   // since the lexer only gives us positive digit strings, but we'll
1353   // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1354   errno = 0;
1355   value = (int64_t) strtoull (startp, & endp, 0);
1356   if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1357       || (neg && (unsigned long long) value > 9223372036854775808ULL)
1358       || (unsigned long long) value > 18446744073709551615ULL
1359       || value < -9223372036854775807LL-1)
1360     throw PARSE_ERROR (_("number invalid or out of range"));
1361
1362   if (neg)
1363     value = -value;
1364
1365   swallow (); // We are done with it, content was parsed and copied into value.
1366 }
1367
1368
1369 const token*
1370 parser::expect_ident_or_atword (interned_string & target)
1371 {
1372   const token *t = next();
1373
1374   // accept identifiers and operators beginning in '@':
1375   if (!t || (t->type != tok_identifier
1376              && (t->type != tok_operator || t->content[0] != '@')))
1377     // XXX currently this is only called from parse_hist_op_or_bare_name(),
1378     // so the message is accurate, but keep an eye out in the future:
1379     throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
1380
1381   target = t->content;
1382   return t;
1383 }
1384
1385
1386 void
1387 parser::expect_ident_or_keyword (interned_string & target)
1388 {
1389   expect_unknown2 (tok_identifier, tok_keyword, target);
1390 }
1391
1392
1393 bool
1394 parser::peek_op (string const & op)
1395 {
1396   return tok_is (peek(), tok_operator, op);
1397 }
1398
1399
1400 bool
1401 parser::peek_kw (string const & kw)
1402 {
1403   return tok_is (peek(), tok_identifier, kw);
1404 }
1405
1406
1407
1408 lexer::lexer (istream& input, const string& in, systemtap_session& s, bool cc):
1409   ate_comment(false), ate_whitespace(false), saw_tokens(false), check_compatible(cc),
1410   input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1411   cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1412   cursor_column (1), session(s), current_file (0), current_token_chain (0)
1413 {
1414   getline(input, input_contents, '\0');
1415
1416   input_pointer = input_contents.data();
1417   input_end = input_contents.data() + input_contents.size();
1418
1419   if (keywords.empty())
1420     {
1421       // NB: adding new keywords is highly disruptive to the language,
1422       // in particular to existing scripts that could be suddenly
1423       // broken.  If done at all, it has to be s.compatible-sensitive,
1424       // and broadly advertised.
1425       keywords.insert("probe");
1426       keywords.insert("global");
1427       if (has_version("3.0"))
1428         keywords.insert("private");
1429       keywords.insert("function");
1430       keywords.insert("if");
1431       keywords.insert("else");
1432       keywords.insert("for");
1433       keywords.insert("foreach");
1434       keywords.insert("in");
1435       keywords.insert("limit");
1436       keywords.insert("return");
1437       keywords.insert("delete");
1438       keywords.insert("while");
1439       keywords.insert("break");
1440       keywords.insert("continue");
1441       keywords.insert("next");
1442       keywords.insert("string");
1443       keywords.insert("long");
1444       keywords.insert("try");
1445       keywords.insert("catch");
1446     }
1447
1448   if (atwords.empty())
1449     {
1450       // NB: adding new @words is mildly disruptive to existing
1451       // scripts that define macros with the same name, but not
1452       // really. The user will merely receive a warning that they are
1453       // redefining an existing operator.
1454
1455       // These are inserted without the actual '@', so we can directly check
1456       // proposed macro names without building a string with that prefix.
1457       atwords.insert("cast");
1458       atwords.insert("defined");
1459       atwords.insert("entry");
1460       atwords.insert("perf");
1461       atwords.insert("var");
1462       atwords.insert("avg");
1463       atwords.insert("count");
1464       atwords.insert("sum");
1465       atwords.insert("min");
1466       atwords.insert("max");
1467       atwords.insert("hist_linear");
1468       atwords.insert("hist_log");
1469       if (has_version("3.1"))
1470         {
1471           atwords.insert("const");
1472           atwords.insert("variance");
1473         }
1474       if (has_version("4.0"))
1475         {
1476           atwords.insert("kregister");
1477           atwords.insert("uregister");
1478           atwords.insert("kderef");
1479           atwords.insert("uderef");
1480         }
1481     }
1482 }
1483
1484 unordered_set<string> lexer::atwords;
1485
1486 void
1487 lexer::set_current_file (stapfile* f)
1488 {
1489   current_file = f;
1490   if (f)
1491     {
1492       f->file_contents = input_contents;
1493       f->name = input_name;
1494     }
1495 }
1496
1497 void
1498 lexer::set_current_token_chain (const token* tok)
1499 {
1500   current_token_chain = tok;
1501 }
1502
1503 int
1504 lexer::input_peek (unsigned n)
1505 {
1506   if (input_pointer + n >= input_end)
1507     return -1; // EOF
1508   return (unsigned char)*(input_pointer + n);
1509 }
1510
1511
1512 bool
1513 lexer::has_version (const char* v) const
1514 {
1515   return check_compatible
1516     ? strverscmp(session.compatible.c_str(), v) >= 0
1517     : true;
1518 }
1519
1520 int
1521 lexer::input_get ()
1522 {
1523   int c = input_peek();
1524   if (c < 0) return c; // EOF
1525
1526   ++input_pointer;
1527
1528   if (cursor_suspend_count)
1529     {
1530       // Track effect of input_put: preserve previous cursor/line_column
1531       // until all of its characters are consumed.
1532       if (--cursor_suspend_count == 0)
1533         {
1534           cursor_line = cursor_suspend_line;
1535           cursor_column = cursor_suspend_column;
1536         }
1537     }
1538   else
1539     {
1540       // update source cursor
1541       if (c == '\n')
1542         {
1543           cursor_line ++;
1544           cursor_column = 1;
1545         }
1546       else
1547         cursor_column ++;
1548     }
1549
1550   // clog << "[" << (char)c << "]";
1551   return c;
1552 }
1553
1554
1555 void
1556 lexer::input_put (const string& chars, const token* t)
1557 {
1558   size_t pos = input_pointer - input_contents.data();
1559   // clog << "[put:" << chars << " @" << pos << "]";
1560   input_contents.insert (pos, chars);
1561   cursor_suspend_count += chars.size();
1562   cursor_suspend_line = cursor_line;
1563   cursor_suspend_column = cursor_column;
1564   cursor_line = t->location.line;
1565   cursor_column = t->location.column;
1566   input_pointer = input_contents.data() + pos;
1567   input_end = input_contents.data() + input_contents.size();
1568 }
1569
1570
1571 token*
1572 lexer::scan ()
1573 {
1574   ate_comment = false; // reset for each new token
1575   ate_whitespace = false; // reset for each new token
1576
1577   // XXX be very sure to restore old_saw_tokens if we return without a token:
1578   bool old_saw_tokens = saw_tokens;
1579   saw_tokens = true;
1580
1581   token* n = new token;
1582   string token_str; // accumulate here instead of by incremental interning
1583   n->location.file = current_file;
1584   n->chain = current_token_chain;
1585
1586 skip:
1587   bool suspended = (cursor_suspend_count > 0);
1588   n->location.line = cursor_line;
1589   n->location.column = cursor_column;
1590
1591   int c = input_get();
1592   // clog << "{" << (char)c << (char)c2 << "}";
1593   if (c < 0)
1594     {
1595       delete n;
1596       saw_tokens = old_saw_tokens;
1597       return 0;
1598     }
1599
1600   if (isspace (c))
1601     {
1602       ate_whitespace = true;
1603       goto skip;
1604     }
1605
1606   int c2 = input_peek ();
1607
1608   // Paste command line arguments as character streams into
1609   // the beginning of a token.  $1..$999 go through as raw
1610   // characters; @1..@999 are quoted/escaped as strings.
1611   // $# and @# expand to the number of arguments, similarly
1612   // raw or quoted.
1613   if ((c == '$' || c == '@') && (c2 == '#'))
1614     {
1615       token_str.push_back (c);
1616       token_str.push_back (c2);
1617       input_get(); // swallow '#'
1618
1619       if (suspended)
1620         {
1621           n->make_junk(tok_junk_nested_arg);
1622           return n;
1623         }
1624       size_t num_args = session.args.size ();
1625       input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1626       token_str.clear();
1627       goto skip;
1628     }
1629   else if ((c == '$' || c == '@') && (isdigit (c2)))
1630     {
1631       unsigned idx = 0;
1632       token_str.push_back (c);
1633       do
1634         {
1635           input_get ();
1636           token_str.push_back (c2);
1637           idx = (idx * 10) + (c2 - '0');
1638           c2 = input_peek ();
1639         } while (c2 > 0 &&
1640                  isdigit (c2) &&
1641                  idx <= session.args.size()); // prevent overflow
1642       if (suspended)
1643         {
1644           n->make_junk(tok_junk_nested_arg);
1645           return n;
1646         }
1647       if (idx == 0 ||
1648           idx-1 >= session.args.size())
1649         {
1650           n->make_junk(tok_junk_invalid_arg);
1651           return n;
1652         }
1653       session.used_args[idx-1] = true;
1654       const string& arg = session.args[idx-1];
1655       input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1656       token_str.clear();
1657       goto skip;
1658     }
1659
1660   else if (isalpha (c) || c == '$' || c == '@' || c == '_')
1661     {
1662       token_str = (char) c;
1663       while (isalnum (c2) || c2 == '_' || c2 == '$')
1664         {
1665           input_get ();
1666           token_str.push_back (c2);
1667           c2 = input_peek ();
1668         }
1669       n->content = token_str;
1670
1671       if (n->content[0] == '@')
1672         // makes it easier to detect illegal use of @words:
1673         n->type = tok_operator;
1674       else if (keywords.count(n->content))
1675         n->type = tok_keyword;
1676       else
1677         n->type = tok_identifier;
1678
1679       return n;
1680     }
1681
1682   else if (isdigit (c)) // positive literal
1683     {
1684       n->type = tok_number;
1685       token_str = (char) c;
1686
1687       while (isalnum (c2))
1688         {
1689           // NB: isalnum is very permissive.  We rely on strtol, called in
1690           // parser::parse_literal below, to confirm that the number string
1691           // is correctly formatted and in range.
1692
1693           input_get ();
1694           token_str.push_back (c2);
1695           c2 = input_peek ();
1696         }
1697
1698       n->content = token_str;
1699       return n;
1700     }
1701
1702   else if (c == '\"')
1703     {
1704       n->type = tok_string;
1705       while (1)
1706         {
1707           c = input_get ();
1708
1709           if (c < 0 || c == '\n')
1710             {
1711               n->make_junk(tok_junk_unclosed_quote);
1712               return n;
1713             }
1714           if (c == '\"') // closing double-quotes
1715             break;
1716           else if (c == '\\') // see also input_put
1717             {
1718               c = input_get();
1719               switch (c)
1720                 {
1721                 case 'x':
1722                   if (!has_version("2.3"))
1723                     goto the_default;
1724                   /* FALLTHROUGH */
1725                 case 'a':
1726                 case 'b':
1727                 case 't':
1728                 case 'n':
1729                 case 'v':
1730                 case 'f':
1731                 case 'r':
1732                 case '0' ... '7': // NB: need only match the first digit
1733                 case '\\':
1734                   // Pass these escapes through to the string value
1735                   // being parsed; it will be emitted into a C literal.
1736                   // XXX: PR13371: perhaps we should evaluate them here
1737                   // (and re-quote them during translate.cxx emission).
1738                   token_str.push_back ('\\');
1739
1740                   // fall through
1741                 default: the_default:
1742                   token_str.push_back (c);
1743                   break;
1744                 }
1745             }
1746           else
1747             token_str.push_back (c);
1748         }
1749       n->content = token_str;
1750       return n;
1751     }
1752
1753   else if (ispunct (c))
1754     {
1755       int c3 = input_peek (1);
1756
1757       // NB: if we were to recognize negative numeric literals here,
1758       // we'd introduce another grammar ambiguity:
1759       // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1760       // instead of tok_number(1) tok_operator('-') tok_number(1)
1761
1762       if (c == '#') // shell comment
1763         {
1764           unsigned this_line = cursor_line;
1765           do { c = input_get (); }
1766           while (c >= 0 && cursor_line == this_line);
1767           ate_comment = true;
1768           ate_whitespace = true;
1769           goto skip;
1770         }
1771       else if ((c == '/' && c2 == '/')) // C++ comment
1772         {
1773           unsigned this_line = cursor_line;
1774           do { c = input_get (); }
1775           while (c >= 0 && cursor_line == this_line);
1776           ate_comment = true;
1777           ate_whitespace = true;
1778           goto skip;
1779         }
1780       else if (c == '/' && c2 == '*') // C comment
1781         {
1782           (void) input_get (); // swallow '*' already in c2
1783           c = input_get ();
1784           c2 = input_get ();
1785           while (c2 >= 0)
1786             {
1787               if (c == '*' && c2 == '/')
1788                 break;
1789               c = c2;
1790               c2 = input_get ();
1791             }
1792           ate_comment = true;
1793           ate_whitespace = true;
1794           goto skip;
1795         }
1796       else if (c == '%' && c2 == '{') // embedded code
1797         {
1798           n->type = tok_embedded;
1799           (void) input_get (); // swallow '{' already in c2
1800           c = input_get ();
1801           c2 = input_get ();
1802           while (c2 >= 0)
1803             {
1804               if (c == '%' && c2 == '}')
1805                 {
1806                   n->content = token_str;
1807                   return n;
1808                 }
1809               if (c == '}' && c2 == '%') // possible typo
1810                 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
1811               token_str.push_back (c);
1812               c = c2;
1813               c2 = input_get();
1814             }
1815
1816             n->make_junk(tok_junk_unclosed_embedded);
1817             return n;
1818         }
1819
1820       // We're committed to recognizing at least the first character
1821       // as an operator.
1822       n->type = tok_operator;
1823       token_str = (char) c;
1824
1825       // match all valid operators, in decreasing size order
1826       if ((c == '<' && c2 == '<' && c3 == '<') ||
1827           (c == '>' && c2 == '>' && c3 == '>') ||
1828           (c == '<' && c2 == '<' && c3 == '=') ||
1829           (c == '>' && c2 == '>' && c3 == '='))
1830         {
1831           token_str.push_back (c2);
1832           token_str.push_back (c3);
1833           input_get (); // c2
1834           input_get (); // c3
1835         }
1836       else if ((c == '=' && c2 == '=') ||
1837                (c == '!' && c2 == '=') ||
1838                (c == '<' && c2 == '=') ||
1839                (c == '>' && c2 == '=') ||
1840                (c == '=' && c2 == '~') ||
1841                (c == '!' && c2 == '~') ||
1842                (c == '+' && c2 == '=') ||
1843                (c == '-' && c2 == '=') ||
1844                (c == '*' && c2 == '=') ||
1845                (c == '/' && c2 == '=') ||
1846                (c == '%' && c2 == '=') ||
1847                (c == '&' && c2 == '=') ||
1848                (c == '^' && c2 == '=') ||
1849                (c == '|' && c2 == '=') ||
1850                (c == '.' && c2 == '=') ||
1851                (c == '&' && c2 == '&') ||
1852                (c == '|' && c2 == '|') ||
1853                (c == '+' && c2 == '+') ||
1854                (c == '-' && c2 == '-') ||
1855                (c == '-' && c2 == '>') ||
1856                (c == '<' && c2 == '<') ||
1857                (c == '>' && c2 == '>') ||
1858                // preprocessor tokens
1859                (c == '%' && c2 == '(') ||
1860                (c == '%' && c2 == '?') ||
1861                (c == '%' && c2 == ':') ||
1862                (c == '%' && c2 == ')'))
1863         {
1864           token_str.push_back (c2);
1865           input_get (); // swallow other character
1866         }
1867
1868       n->content = token_str;
1869       return n;
1870     }
1871
1872   else
1873     {
1874       n->type = tok_junk;
1875       ostringstream s;
1876       s << "\\x" << hex << setw(2) << setfill('0') << c;
1877       n->content = s.str();
1878       // signal parser to emit "expected X, found junk" type error
1879       n->make_junk(tok_junk_unknown);
1880       return n;
1881     }
1882 }
1883
1884 // ------------------------------------------------------------------------
1885
1886 void
1887 token::make_junk (token_junk_type junk)
1888 {
1889   type = tok_junk;
1890   junk_type = junk;
1891 }
1892
1893 // ------------------------------------------------------------------------
1894
1895 string
1896 token::junk_message(systemtap_session& session) const
1897 {
1898   switch (junk_type)
1899     {
1900     case tok_junk_nested_arg:
1901       return _("invalid nested substitution of command line arguments");
1902
1903     case tok_junk_invalid_arg:
1904       return _F("command line argument out of range [1-%lu]",
1905                 (unsigned long) session.args.size());
1906
1907     case tok_junk_unclosed_quote:
1908       return _("Could not find matching closing quote");
1909
1910     case tok_junk_unclosed_embedded:
1911       return _("Could not find matching '%}' to close embedded function block");
1912
1913     default:
1914       return _("unknown junk token");
1915     }
1916 }
1917
1918 // ------------------------------------------------------------------------
1919
1920 stapfile*
1921 parser::parse ()
1922 {
1923   stapfile* f = new stapfile;
1924   f->privileged = this->privileged;
1925   input.set_current_file (f);
1926
1927   bool empty = true;
1928
1929   while (1)
1930     {
1931       try
1932         {
1933           systemtap_v_seen = 0;
1934           const token* t = peek ();
1935           if (! t) // nice clean EOF, modulo any preprocessing that occurred
1936             break;
1937
1938           empty = false;
1939           if (t->type == tok_keyword && t->content == "probe")
1940             {
1941               context = con_probe;
1942               parse_probe (f->probes, f->aliases);
1943             }
1944           else if (t->type == tok_keyword && t->content == "private")
1945             {
1946               context = con_unknown;
1947               parse_private (f->globals, f->probes, f->name, f->functions);
1948             }
1949           else if (t->type == tok_keyword && t->content == "global")
1950             {
1951               context = con_global;
1952               parse_global (f->globals, f->probes, f->name);
1953             }
1954           else if (t->type == tok_keyword && t->content == "function")
1955             {
1956               context = con_function;
1957               parse_functiondecl (f->functions, f->name);
1958             }
1959           else if (t->type == tok_embedded)
1960             {
1961               context = con_embedded;
1962               f->embeds.push_back (parse_embeddedcode ());
1963             }
1964           else
1965             {
1966               context = con_unknown;
1967               throw PARSE_ERROR (_("expected 'probe', 'global', 'private', 'function', or '%{'"));
1968             }
1969         }
1970       catch (parse_error& pe)
1971         {
1972           print_error (pe, errs_as_warnings);
1973
1974           // XXX: do we want tok_junk to be able to force skip_some behaviour?
1975           if (pe.skip_some) // for recovery
1976             // Quietly swallow all tokens until the next keyword we can start parsing from.
1977             while (1)
1978               try
1979                 {
1980                   {
1981                     const token* t = peek ();
1982                     if (! t)
1983                       break;
1984                     if (t->type == tok_keyword && t->content == "probe") break;
1985                     else if (t->type == tok_keyword && t->content == "private") break;
1986                     else if (t->type == tok_keyword && t->content == "global") break;
1987                     else if (t->type == tok_keyword && t->content == "function") break;
1988                     else if (t->type == tok_embedded) break;
1989                     swallow (); // swallow it
1990                   }
1991                 }
1992               catch (parse_error& pe2)
1993                 {
1994                   // parse error during recovery ... ugh
1995                   print_error (pe2);
1996                 }
1997         }
1998     }
1999
2000   if (empty && user_file)
2001     {
2002       // vary message depending on whether file was *actually* empty:
2003       cerr << (input.saw_tokens
2004                ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
2005                : _F("Input file '%s' is empty.", input_name.c_str()))
2006            << endl;
2007       delete f;
2008       f = 0;
2009     }
2010   else if (num_errors > 0)
2011     {
2012       cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
2013       delete f;
2014       f = 0;
2015     }
2016
2017   input.set_current_file(0);
2018   return f;
2019 }
2020
2021
2022 probe*
2023 parser::parse_synthetic_probe (const token* chain)
2024 {
2025   probe* p = NULL;
2026   stapfile* f = new stapfile;
2027   f->privileged = this->privileged;
2028   f->synthetic = true;
2029   input.set_current_file (f);
2030   input.set_current_token_chain (chain);
2031
2032   try
2033     {
2034       context = con_probe;
2035       parse_probe (f->probes, f->aliases);
2036
2037       if (f->probes.size() != 1 || !f->aliases.empty())
2038         throw PARSE_ERROR (_("expected a single synthetic probe"));
2039       p = f->probes[0];
2040     }
2041   catch (parse_error& pe)
2042     {
2043       print_error (pe, errs_as_warnings);
2044     }
2045
2046   // TODO check for unparsed tokens?
2047
2048   input.set_current_file(0);
2049   input.set_current_token_chain(0);
2050   p->synthetic = true;
2051   return p;
2052 }
2053
2054
2055 void
2056 parser::parse_probe (vector<probe *> & probe_ret,
2057                      vector<probe_alias *> & alias_ret)
2058 {
2059   const token* t0 = next ();
2060   if (! (t0->type == tok_keyword && t0->content == "probe"))
2061     throw PARSE_ERROR (_("expected 'probe'"));
2062
2063   vector<probe_point *> aliases;
2064   vector<probe_point *> locations;
2065
2066   int epilogue_alias = 0;
2067
2068   while (1)
2069     {
2070       vector<probe_point*> pps = parse_probe_points();
2071
2072       const token* t = peek ();
2073       if (pps.size() == 1 && t
2074           && t->type == tok_operator && t->content == "=")
2075         {
2076           if (pps[0]->optional || pps[0]->sufficient)
2077             throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2078           aliases.push_back(pps[0]);
2079           swallow ();
2080           continue;
2081         }
2082       else if (pps.size() == 1 && t
2083           && t->type == tok_operator && t->content == "+=")
2084         {
2085           if (pps[0]->optional || pps[0]->sufficient)
2086             throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2087           aliases.push_back(pps[0]);
2088           epilogue_alias = 1;
2089           swallow ();
2090           continue;
2091         }
2092       else if (t && t->type == tok_operator && t->content == "{")
2093         {
2094           locations.insert(locations.end(), pps.begin(), pps.end());
2095           break;
2096         }
2097       else
2098         throw PARSE_ERROR (_("expected probe point specifier"));
2099     }
2100
2101   if (aliases.empty())
2102     {
2103       probe* p = new probe;
2104       p->tok = t0;
2105       p->locations = locations;
2106       p->body = parse_stmt_block ();
2107       p->privileged = privileged;
2108       p->systemtap_v_conditional = systemtap_v_seen;
2109       probe_ret.push_back (p);
2110     }
2111   else
2112     {
2113       probe_alias* p = new probe_alias (aliases);
2114       if(epilogue_alias)
2115         p->epilogue_style = true;
2116       else
2117         p->epilogue_style = false;
2118       p->tok = t0;
2119       p->locations = locations;
2120       p->body = parse_stmt_block ();
2121       p->privileged = privileged;
2122       p->systemtap_v_conditional = systemtap_v_seen;
2123       alias_ret.push_back (p);
2124     }
2125 }
2126
2127
2128 embeddedcode*
2129 parser::parse_embeddedcode ()
2130 {
2131   embeddedcode* e = new embeddedcode;
2132   const token* t = next ();
2133   if (t->type != tok_embedded)
2134     throw PARSE_ERROR (_("expected '%{'"));
2135
2136   if (! privileged)
2137     throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
2138                        false /* don't skip tokens for parse resumption */);
2139
2140   e->tok = t;
2141   e->code = t->content;
2142   return e;
2143 }
2144
2145
2146 block*
2147 parser::parse_stmt_block ()
2148 {
2149   block* pb = new block;
2150
2151   const token* t = next ();
2152   if (! (t->type == tok_operator && t->content == "{"))
2153     throw PARSE_ERROR (_("expected '{'"));
2154
2155   pb->tok = t;
2156
2157   while (1)
2158     {
2159       t = peek ();
2160       if (t && t->type == tok_operator && t->content == "}")
2161         {
2162           swallow ();
2163           break;
2164         }
2165       pb->statements.push_back (parse_statement ());
2166     }
2167
2168   return pb;
2169 }
2170
2171
2172 try_block*
2173 parser::parse_try_block ()
2174 {
2175   try_block* pb = new try_block;
2176
2177   pb->tok = expect_kw_token ("try");
2178   pb->try_block = parse_stmt_block();
2179   expect_kw ("catch");
2180
2181   const token* t = peek ();
2182   if (t != NULL && t->type == tok_operator && t->content == "(")
2183     {
2184       swallow (); // swallow the '('
2185
2186       t = next();
2187       if (! (t->type == tok_identifier))
2188         throw PARSE_ERROR (_("expected identifier"));
2189       symbol* sym = new symbol;
2190       sym->tok = t;
2191       sym->name = t->content;
2192       pb->catch_error_var = sym;
2193
2194       expect_op (")");
2195     }
2196   else
2197     pb->catch_error_var = 0;
2198
2199   pb->catch_block = parse_stmt_block();
2200
2201   return pb;
2202 }
2203
2204
2205
2206 statement*
2207 parser::parse_statement ()
2208 {
2209   statement *ret;
2210   const token* t = peek ();
2211   if (t && t->type == tok_operator && t->content == ";")
2212     return new null_statement (next ());
2213   else if (t && t->type == tok_operator && t->content == "{")
2214     return parse_stmt_block (); // Don't squash semicolons.
2215   else if (t && t->type == tok_keyword && t->content == "try")
2216     return parse_try_block (); // Don't squash semicolons.
2217   else if (t && t->type == tok_keyword && t->content == "if")
2218     return parse_if_statement (); // Don't squash semicolons.
2219   else if (t && t->type == tok_keyword && t->content == "for")
2220     return parse_for_loop (); // Don't squash semicolons.
2221   else if (t && t->type == tok_keyword && t->content == "foreach")
2222     return parse_foreach_loop (); // Don't squash semicolons.
2223   else if (t && t->type == tok_keyword && t->content == "while")
2224     return parse_while_loop (); // Don't squash semicolons.
2225   else if (t && t->type == tok_keyword && t->content == "return")
2226     ret = parse_return_statement ();
2227   else if (t && t->type == tok_keyword && t->content == "delete")
2228     ret = parse_delete_statement ();
2229   else if (t && t->type == tok_keyword && t->content == "break")
2230     ret = parse_break_statement ();
2231   else if (t && t->type == tok_keyword && t->content == "continue")
2232     ret = parse_continue_statement ();
2233   else if (t && t->type == tok_keyword && t->content == "next")
2234     ret = parse_next_statement ();
2235   else if (t && (t->type == tok_operator || // expressions are flexible
2236                  t->type == tok_identifier ||
2237                  t->type == tok_number ||
2238                  t->type == tok_string ||
2239                  t->type == tok_embedded ))
2240     ret = parse_expr_statement ();
2241   // XXX: consider generally accepting tok_embedded here too
2242   else
2243     throw PARSE_ERROR (_("expected statement"));
2244
2245   // Squash "empty" trailing colons after any "non-block-like" statement.
2246   t = peek ();
2247   if (t && t->type == tok_operator && t->content == ";")
2248     {
2249       swallow (); // Silently eat trailing ; after statement
2250     }
2251
2252   return ret;
2253 }
2254
2255 void
2256 parser::parse_private (vector <vardecl*>& globals, vector<probe*>& probes,
2257                        string const & fname, vector<functiondecl*>& functions)
2258 {
2259   const token* t = next ();
2260   if (! (t->type == tok_keyword && t->content == "private"))
2261     throw PARSE_ERROR (_("expected 'private'"));
2262   swallow ();
2263   t = next ();
2264   if (t->type == tok_keyword && t->content == "function")
2265   {
2266     swallow ();
2267     context = con_function;
2268     do_parse_functiondecl(functions, t, fname, true);
2269   }
2270   else if (t->type == tok_keyword && t->content == "global")
2271   {
2272     swallow ();
2273     context = con_global;
2274     t = next ();
2275     if (! (t->type == tok_identifier))
2276       throw PARSE_ERROR (_("expected identifier"));
2277     do_parse_global(globals, probes, fname, t, true);
2278   }
2279   // The `private <identifier>` is an acceptable shorthand
2280   // for `private global <identifier>` per above.
2281   else if (t->type == tok_identifier)
2282   {
2283     context = con_global;
2284     do_parse_global(globals, probes, fname, t, true);
2285   }
2286   else
2287     throw PARSE_ERROR (_("expected 'function' or identifier"));
2288 }
2289
2290 void
2291 parser::parse_global (vector <vardecl*>& globals, vector<probe*>& probes,
2292                       string const & fname)
2293 {
2294   const token* t0 = next ();
2295   if (! (t0->type == tok_keyword && t0->content == "global"))
2296     throw PARSE_ERROR (_("expected 'global' or 'private'"));
2297   swallow ();
2298   do_parse_global(globals, probes, fname, 0, false);
2299 }
2300
2301 void
2302 parser::do_parse_global (vector <vardecl*>& globals, vector<probe*>&,
2303                          string const & fname, const token* t0, bool priv)
2304 {
2305   bool iter0 = true;
2306   const token* t;
2307   while (1)
2308     {
2309       t = (iter0 && priv) ? t0 : next ();
2310       iter0 = false;
2311       if (! (t->type == tok_identifier))
2312         throw PARSE_ERROR (_("expected identifier"));
2313
2314       string gname = "__global_" + string(t->content);
2315       string pname = "__private_" + detox_path(fname) + string(t->content);
2316       string name = priv ? pname : gname;
2317
2318       for (unsigned i=0; i<globals.size(); i++)
2319       {
2320         if (globals[i]->name == name)
2321           throw PARSE_ERROR (_("duplicate global name"));
2322         if ((globals[i]->name == gname) || (globals[i]->name == pname))
2323           throw PARSE_ERROR (_("global versus private variable declaration conflict"));
2324       }
2325
2326       vardecl* d = new vardecl;
2327       d->unmangled_name = t->content;
2328       d->name = name;
2329       d->tok = t;
2330       d->systemtap_v_conditional = systemtap_v_seen;
2331       globals.push_back (d);
2332
2333       t = peek ();
2334
2335       if(t && t->type == tok_operator && t->content == "%") //wrapping
2336         {
2337           d->wrap = true;
2338           swallow ();
2339           t = peek();
2340         }
2341
2342       if (t && t->type == tok_operator && t->content == "[") // array size
2343         {
2344           int64_t size;
2345           swallow ();
2346           expect_number(size);
2347           if (size <= 0 || size > INT_MAX)
2348             throw PARSE_ERROR(_("array size out of range"));
2349           d->maxsize = (int)size;
2350           expect_known(tok_operator, "]");
2351           t = peek ();
2352         }
2353
2354       if (t && t->type == tok_operator && t->content == "=") // initialization
2355         {
2356           if (!d->compatible_arity(0))
2357             throw PARSE_ERROR(_("only scalar globals can be initialized"));
2358           d->set_arity(0, t);
2359           next (); // Don't swallow, set_arity() used the peeked token.
2360           d->init = parse_literal ();
2361           d->type = d->init->type;
2362           t = peek ();
2363         }
2364
2365       if (t && t->type == tok_operator && t->content == ";") // termination
2366         {
2367           swallow ();
2368           break;
2369         }
2370
2371       if (t && t->type == tok_operator && t->content == ",") // next global
2372         {
2373           swallow ();
2374           continue;
2375         }
2376       else
2377         break;
2378     }
2379 }
2380
2381 void
2382 parser::parse_functiondecl (vector<functiondecl*>& functions,
2383                             string const & fname)
2384 {
2385   const token* t = next ();
2386   if (! (t->type == tok_keyword && t->content == "function"))
2387     throw PARSE_ERROR (_("expected 'function'"));
2388   swallow ();
2389   do_parse_functiondecl(functions, t, fname, false);
2390 }
2391
2392 void
2393 parser::do_parse_functiondecl (vector<functiondecl*>& functions, const token* t,
2394                                string const & fname, bool priv)
2395 {
2396   t = next ();
2397   if (! (t->type == tok_identifier)
2398       && ! (t->type == tok_keyword
2399             && (t->content == "string" || t->content == "long")))
2400     throw PARSE_ERROR (_("expected identifier"));
2401
2402   string gname = "__global_" + string(t->content);
2403   string pname = "__private_" + detox_path(fname) + string(t->content);
2404   string name = priv ? pname : gname;
2405   name += "__overload_" + lex_cast(session.overload_count[t->content]++);
2406
2407   functiondecl *fd = new functiondecl ();
2408   fd->unmangled_name = t->content;
2409   fd->name = name;
2410   fd->tok = t;
2411
2412   t = next ();
2413   if (t->type == tok_operator && t->content == ":")
2414     {
2415       swallow ();
2416       t = next ();
2417       if (t->type == tok_keyword && t->content == "string")
2418         fd->type = pe_string;
2419       else if (t->type == tok_keyword && t->content == "long")
2420         fd->type = pe_long;
2421       else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2422       swallow ();
2423
2424       t = next ();
2425     }
2426
2427   if (! (t->type == tok_operator && t->content == "("))
2428     throw PARSE_ERROR (_("expected '('"));
2429   swallow ();
2430
2431   while (1)
2432     {
2433       t = next ();
2434
2435       // permit zero-argument functions
2436       if (t->type == tok_operator && t->content == ")")
2437         {
2438           swallow ();
2439           break;
2440         }
2441       else if (! (t->type == tok_identifier))
2442         throw PARSE_ERROR (_("expected identifier"));
2443       vardecl* vd = new vardecl;
2444       vd->unmangled_name = vd->name = t->content;
2445
2446       for (auto it = fd->formal_args.begin() ; it != fd->formal_args.end(); ++it)
2447         {
2448           string param = vd->unmangled_name;
2449           if ((*it)->unmangled_name == param)
2450             throw PARSE_ERROR(_("duplicate parameter names"));
2451         }
2452
2453       vd->tok = t;
2454       fd->formal_args.push_back (vd);
2455       fd->systemtap_v_conditional = systemtap_v_seen;
2456
2457       t = next ();
2458
2459       if (t->type == tok_operator && t->content == ":")
2460         {
2461           swallow ();
2462           t = next ();
2463           if (t->type == tok_keyword && t->content == "string")
2464             vd->type = pe_string;
2465           else if (t->type == tok_keyword && t->content == "long")
2466             vd->type = pe_long;
2467           else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2468           swallow ();
2469           t = next ();
2470         }
2471       if (t->type == tok_operator && t->content == ")")
2472         {
2473           swallow ();
2474           break;
2475         }
2476       if (t->type == tok_operator && t->content == ",")
2477         {
2478           swallow ();
2479           continue;
2480         }
2481       else
2482         throw PARSE_ERROR (_("expected ',' or ')'"));
2483     }
2484
2485   t = peek();
2486   if (t->type == tok_operator && t->content == ":")
2487     {
2488       swallow();
2489       literal* literal = parse_literal();
2490       literal_number* ln = dynamic_cast<literal_number*>(literal);
2491       if (ln == 0)
2492         throw PARSE_ERROR (_("expected literal number"));
2493       fd->priority = ln->value;
2494
2495       // reserve priority 0 for user script implementation
2496       if (fd->priority < 1)
2497         throw PARSE_ERROR (_("specified priority must be > 0"));
2498       delete literal;
2499     }
2500   else if (user_file)
2501     {
2502       // allow script file implementation override automatically when
2503       // priority not specified
2504       fd->priority = 0;
2505     }
2506
2507   t = peek ();
2508   if (t && t->type == tok_embedded)
2509     fd->body = parse_embeddedcode ();
2510   else
2511     fd->body = parse_stmt_block ();
2512
2513   functions.push_back (fd);
2514 }
2515
2516 vector<probe_point*>
2517 parser::parse_probe_points()
2518 {
2519   vector<probe_point*> pps;
2520   while (1)
2521     {
2522       vector<probe_point*> tail = parse_components();
2523       pps.insert(pps.end(), tail.begin(), tail.end());
2524
2525       const token* t = peek();
2526       if (t && t->type == tok_operator && t->content == ",")
2527         {
2528           swallow();
2529           continue;
2530         }
2531
2532       if (t && t->type == tok_operator
2533           && (t->content == "{" || t->content == "=" ||
2534               t->content == "+="|| t->content == "}"))
2535         break;
2536
2537       throw PARSE_ERROR (_("expected one of ', { } = +='"));
2538     }
2539   return pps;
2540 }
2541
2542 vector<probe_point*>
2543 parser::parse_components()
2544 {
2545   vector<probe_point*> pps;
2546   while (1)
2547     {
2548       vector<probe_point*> suffix = parse_component();
2549
2550       // Cartesian product of components
2551       if (pps.empty())
2552         pps = suffix;
2553       else
2554         {
2555           assert(!suffix.empty());
2556           vector<probe_point*> product;
2557           for (unsigned i = 0; i < pps.size(); i++)
2558             {
2559               if (pps[i]->optional || pps[i]->sufficient || pps[i]->condition)
2560                 throw PARSE_ERROR (_("'?', '!' or condition must only be specified in suffix"),
2561                                    pps[i]->components[0]->tok);
2562               for (unsigned j = 0; j < suffix.size(); j++)
2563                 {
2564                   probe_point* pp = new probe_point;
2565                   pp->components.insert(pp->components.end(),
2566                                         pps[i]->components.begin(),
2567                                         pps[i]->components.end());
2568                   pp->components.insert(pp->components.end(),
2569                                         suffix[j]->components.begin(),
2570                                         suffix[j]->components.end());
2571                   pp->optional = suffix[j]->optional;
2572                   pp->sufficient = suffix[j]->sufficient;
2573                   if (auto_path)
2574                     pp->auto_path = suffix[j]->auto_path;
2575                   pp->condition = suffix[j]->condition;
2576                   product.push_back(pp);
2577                 }
2578             }
2579           for (unsigned i = 0; i < pps.size(); i++) delete pps[i];
2580           for (unsigned i = 0; i < suffix.size(); i++) delete suffix[i];
2581           pps = product;
2582         }
2583
2584       const token* t = peek();
2585       if (t && t->type == tok_operator && t->content == ".")
2586         {
2587           swallow ();
2588           continue;
2589         }
2590
2591       // We only fall through here at the end of        a probe point (past
2592       // all the dotted/parametrized components).
2593
2594       if (t && t->type == tok_operator &&
2595           (t->content == "?" || t->content == "!"))
2596         {
2597           for (unsigned i = 0; i < pps.size(); i++)
2598             {
2599               if (pps[i]->optional || pps[i]->sufficient)
2600                 throw PARSE_ERROR (_("'?' or '!' respecified"));
2601               pps[i]->optional = true;
2602               if (t->content == "!") pps[i]->sufficient = true;
2603             }
2604           // NB: sufficient implies optional
2605           swallow ();
2606           t = peek ();
2607           // fall through
2608         }
2609
2610       if (t && t->type == tok_keyword && t->content == "if")
2611         {
2612           swallow ();
2613           t = peek ();
2614           if (!(t && t->type == tok_operator && t->content == "("))
2615             throw PARSE_ERROR (_("expected '('"));
2616           swallow ();
2617
2618           expression* e = parse_expression();
2619           for (unsigned i = 0; i < pps.size(); i++)
2620             {
2621               if (pps[i]->condition != 0)
2622                 throw PARSE_ERROR (_("condition respecified"));
2623               pps[i]->condition = e;
2624             }
2625
2626           t = peek ();
2627           if (!(t && t->type == tok_operator && t->content == ")"))
2628             throw PARSE_ERROR (_("expected ')'"));
2629           swallow ();
2630         }
2631
2632       break;
2633     }
2634   return pps;
2635 }
2636
2637 vector<probe_point*>
2638 parser::parse_component()
2639 {
2640   const token* t = next ();
2641   if (! (t->type == tok_identifier
2642          // we must allow ".return" and ".function", which are keywords
2643          || t->type == tok_keyword
2644          // we must allow "*", due to being an operator
2645          || (t->type == tok_operator && (t->content == "*" || t->content == "{"))))
2646     throw PARSE_ERROR (_("expected identifier or '*' or '{'"));
2647
2648   if (t && t->type == tok_operator && t->content == "{")
2649     {
2650       swallow();
2651       vector<probe_point*> pps = parse_probe_points();
2652       t = peek();
2653       if (!(t && t->type == tok_operator && t->content == "}"))
2654         throw PARSE_ERROR (_("expected '}'"));
2655       swallow();
2656       return pps;
2657     }
2658   else
2659     {
2660       // loop which reconstitutes an identifier with wildcards
2661       string content = t->content;
2662       bool changed_p = false;
2663       while (1)
2664         {
2665           const token* u = peek();
2666           if (u == NULL)
2667             break;
2668           // ensure pieces of the identifier are adjacent:
2669           if (input.ate_whitespace)
2670             break;
2671           // ensure pieces of the identifier are valid:
2672           if (! (u->type == tok_identifier
2673                  // we must allow arbitrary keywords with a wildcard
2674                  || u->type == tok_keyword
2675                  // we must allow "*", due to being an operator
2676                  || (u->type == tok_operator && u->content == "*")))
2677             break;
2678
2679           // append u to t
2680           content = content + (string)u->content;
2681           changed_p = true;
2682
2683           // consume u
2684           swallow ();
2685         }
2686
2687       if (changed_p)
2688         {
2689           // We've already swallowed the first token and we're not
2690           // putting it back; no one else has a copy; so we can
2691           // safely overwrite its content and reuse it.
2692           const_cast<token*>(t)->content = content;
2693         }
2694
2695       probe_point::component* c = new probe_point::component;
2696       c->functor = t->content;
2697       c->tok = t;
2698       vector<probe_point*> pps;
2699       probe_point* pp = new probe_point;
2700       if (auto_path)
2701         pp->auto_path = input_name;
2702       pp->components.push_back(c);
2703       pps.push_back(pp);
2704       // NB we may add c->arg soon
2705
2706       t = peek ();
2707
2708       // consume optional parameter
2709       if (t && t->type == tok_operator && t->content == "(")
2710         {
2711           swallow (); // consume "("
2712           c->arg = parse_literal ();
2713
2714           t = next ();
2715           if (! (t->type == tok_operator && t->content == ")"))
2716             throw PARSE_ERROR (_("expected ')'"));
2717           swallow ();
2718         }
2719
2720       return pps;
2721     }
2722 }
2723
2724 literal_string*
2725 parser::consume_string_literals(const token *t)
2726 {
2727   literal_string *ls = new literal_string (t->content);
2728
2729   // PR11208: check if the next token is also a string literal;
2730   // auto-concatenate it.  This is complicated to the extent that we
2731   // need to skip intermediate whitespace.
2732   //
2733   // NB for versions prior to 2.0: but don't skip over intervening comments
2734   string concat;
2735   bool p_concat = false;
2736   const token *n = peek();
2737   while (n != NULL && n->type == tok_string
2738          && ! (!input.has_version("2.0") && input.ate_comment))
2739     {
2740       if (!p_concat)
2741         {
2742           concat = t->content;
2743           p_concat = true;
2744         }
2745       concat.append(n->content.data(), n->content.size());
2746       next(); // consume the token
2747       n = peek();
2748     }
2749   if (p_concat)
2750     ls->value = concat;
2751   return ls;
2752 }
2753
2754
2755 // Parse a string literal and perform backslash escaping on the contents:
2756 literal_string*
2757 parser::parse_literal_string ()
2758 {
2759   const token* t = next ();
2760   literal_string* l;
2761   if (t->type == tok_string)
2762     l = consume_string_literals (t);
2763   else
2764     throw PARSE_ERROR (_("expected literal string"));
2765
2766   l->tok = t;
2767   return l;
2768 }
2769
2770
2771 literal*
2772 parser::parse_literal ()
2773 {
2774   const token* t = next ();
2775   literal* l;
2776   if (t->type == tok_string)
2777     {
2778       l = consume_string_literals (t);
2779     }
2780   else
2781     {
2782       bool neg = false;
2783       if (t->type == tok_operator && t->content == "-")
2784         {
2785           neg = true;
2786           swallow ();
2787           t = next ();
2788         }
2789
2790       if (t->type == tok_number)
2791         {
2792           const string& s = t->content;
2793           const char* startp = s.c_str ();
2794           char* endp = (char*) startp;
2795
2796           // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2797           // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
2798           // since the lexer only gives us positive digit strings, but we'll
2799           // limit it to LLONG_MIN when a '-' operator is fed into the literal.
2800           errno = 0;
2801           long long value = (long long) strtoull (startp, & endp, 0);
2802           if (errno == ERANGE || errno == EINVAL || *endp != '\0'
2803               || (neg && (unsigned long long) value > 9223372036854775808ULL)
2804               || (unsigned long long) value > 18446744073709551615ULL
2805               || value < -9223372036854775807LL-1)
2806             throw PARSE_ERROR (_("number invalid or out of range"));
2807
2808           if (neg)
2809             value = -value;
2810
2811           l = new literal_number (value);
2812         }
2813       else
2814         throw PARSE_ERROR (_("expected literal string or number"));
2815     }
2816
2817   l->tok = t;
2818   return l;
2819 }
2820
2821
2822 if_statement*
2823 parser::parse_if_statement ()
2824 {
2825   const token* t = next ();
2826   if (! (t->type == tok_keyword && t->content == "if"))
2827     throw PARSE_ERROR (_("expected 'if'"));
2828   if_statement* s = new if_statement;
2829   s->tok = t;
2830
2831   t = next ();
2832   if (! (t->type == tok_operator && t->content == "("))
2833     throw PARSE_ERROR (_("expected '('"));
2834   swallow ();
2835
2836   s->condition = parse_expression ();
2837
2838   t = next ();
2839   if (! (t->type == tok_operator && t->content == ")"))
2840     throw PARSE_ERROR (_("expected ')'"));
2841   swallow ();
2842
2843   s->thenblock = parse_statement ();
2844
2845   t = peek ();
2846   if (t && t->type == tok_keyword && t->content == "else")
2847     {
2848       swallow ();
2849       s->elseblock = parse_statement ();
2850     }
2851   else
2852     s->elseblock = 0; // in case not otherwise initialized
2853
2854   return s;
2855 }
2856
2857
2858 expr_statement*
2859 parser::parse_expr_statement ()
2860 {
2861   expr_statement *es = new expr_statement;
2862   const token* t = peek ();
2863   if (t == NULL)
2864     throw PARSE_ERROR (_("expression statement expected"));
2865   // Copy, we only peeked, parse_expression might swallow.
2866   es->tok = new token (*t);
2867   es->value = parse_expression ();
2868   return es;
2869 }
2870
2871
2872 return_statement*
2873 parser::parse_return_statement ()
2874 {
2875   const token* t = next ();
2876   if (! (t->type == tok_keyword && t->content == "return"))
2877     throw PARSE_ERROR (_("expected 'return'"));
2878   if (context != con_function)
2879     throw PARSE_ERROR (_("found 'return' not in function context"));
2880   return_statement* s = new return_statement;
2881   s->tok = t;
2882
2883   t = peek ();
2884   if (t->type == tok_operator && (t->content == ";" || t->content == "}"))
2885     s->value = NULL;  // no return value
2886   else
2887     s->value = parse_expression ();
2888   return s;
2889 }
2890
2891
2892 delete_statement*
2893 parser::parse_delete_statement ()
2894 {
2895   const token* t = next ();
2896   if (! (t->type == tok_keyword && t->content == "delete"))
2897     throw PARSE_ERROR (_("expected 'delete'"));
2898   delete_statement* s = new delete_statement;
2899   s->tok = t;
2900   s->value = parse_expression ();
2901   return s;
2902 }
2903
2904
2905 next_statement*
2906 parser::parse_next_statement ()
2907 {
2908   const token* t = next ();
2909   if (! (t->type == tok_keyword && t->content == "next"))
2910     throw PARSE_ERROR (_("expected 'next'"));
2911   next_statement* s = new next_statement;
2912   s->tok = t;
2913   return s;
2914 }
2915
2916
2917 break_statement*
2918 parser::parse_break_statement ()
2919 {
2920   const token* t = next ();
2921   if (! (t->type == tok_keyword && t->content == "break"))
2922     throw PARSE_ERROR (_("expected 'break'"));
2923   break_statement* s = new break_statement;
2924   s->tok = t;
2925   return s;
2926 }
2927
2928
2929 continue_statement*
2930 parser::parse_continue_statement ()
2931 {
2932   const token* t = next ();
2933   if (! (t->type == tok_keyword && t->content == "continue"))
2934     throw PARSE_ERROR (_("expected 'continue'"));
2935   continue_statement* s = new continue_statement;
2936   s->tok = t;
2937   return s;
2938 }
2939
2940
2941 for_loop*
2942 parser::parse_for_loop ()
2943 {
2944   const token* t = next ();
2945   if (! (t->type == tok_keyword && t->content == "for"))
2946     throw PARSE_ERROR (_("expected 'for'"));
2947   for_loop* s = new for_loop;
2948   s->tok = t;
2949
2950   t = next ();
2951   if (! (t->type == tok_operator && t->content == "("))
2952     throw PARSE_ERROR (_("expected '('"));
2953   swallow ();
2954
2955   // initializer + ";"
2956   t = peek ();
2957   if (t && t->type == tok_operator && t->content == ";")
2958     {
2959       s->init = 0;
2960       swallow ();
2961     }
2962   else
2963     {
2964       s->init = parse_expr_statement ();
2965       t = next ();
2966       if (! (t->type == tok_operator && t->content == ";"))
2967         throw PARSE_ERROR (_("expected ';'"));
2968       swallow ();
2969     }
2970
2971   // condition + ";"
2972   t = peek ();
2973   if (t && t->type == tok_operator && t->content == ";")
2974     {
2975       literal_number* l = new literal_number(1);
2976       s->cond = l;
2977       s->cond->tok = next ();
2978     }
2979   else
2980     {
2981       s->cond = parse_expression ();
2982       t = next ();
2983       if (! (t->type == tok_operator && t->content == ";"))
2984         throw PARSE_ERROR (_("expected ';'"));
2985       swallow ();
2986     }
2987
2988   // increment + ")"
2989   t = peek ();
2990   if (t && t->type == tok_operator && t->content == ")")
2991     {
2992       s->incr = 0;
2993       swallow ();
2994     }
2995   else
2996     {
2997       s->incr = parse_expr_statement ();
2998       t = next ();
2999       if (! (t->type == tok_operator && t->content == ")"))
3000         throw PARSE_ERROR (_("expected ')'"));
3001       swallow ();
3002     }
3003
3004   // block
3005   s->block = parse_statement ();
3006
3007   return s;
3008 }
3009
3010
3011 for_loop*
3012 parser::parse_while_loop ()
3013 {
3014   const token* t = next ();
3015   if (! (t->type == tok_keyword && t->content == "while"))
3016     throw PARSE_ERROR (_("expected 'while'"));
3017   for_loop* s = new for_loop;
3018   s->tok = t;
3019
3020   t = next ();
3021   if (! (t->type == tok_operator && t->content == "("))
3022     throw PARSE_ERROR (_("expected '('"));
3023   swallow ();
3024
3025   // dummy init and incr fields
3026   s->init = 0;
3027   s->incr = 0;
3028
3029   // condition
3030   s->cond = parse_expression ();
3031
3032   t = next ();
3033   if (! (t->type == tok_operator && t->content == ")"))
3034     throw PARSE_ERROR (_("expected ')'"));
3035   swallow ();
3036
3037   // block
3038   s->block = parse_statement ();
3039
3040   return s;
3041 }
3042
3043
3044 foreach_loop*
3045 parser::parse_foreach_loop ()
3046 {
3047   const token* t = next ();
3048   if (! (t->type == tok_keyword && t->content == "foreach"))
3049     throw PARSE_ERROR (_("expected 'foreach'"));
3050   foreach_loop* s = new foreach_loop;
3051   s->tok = t;
3052   s->sort_direction = 0;
3053   s->sort_aggr = sc_none;
3054   s->value = NULL;
3055   s->limit = NULL;
3056
3057   t = next ();
3058   if (! (t->type == tok_operator && t->content == "("))
3059     throw PARSE_ERROR (_("expected '('"));
3060   swallow ();
3061
3062   symbol* lookahead_sym = NULL;
3063   int lookahead_sort = 0;
3064
3065   t = peek ();
3066   if (t && t->type == tok_identifier)
3067     {
3068       next ();
3069       lookahead_sym = new symbol;
3070       lookahead_sym->tok = t;
3071       lookahead_sym->name = t->content;
3072
3073       t = peek ();
3074       if (t && t->type == tok_operator &&
3075           (t->content == "+" || t->content == "-"))
3076         {
3077           lookahead_sort = (t->content == "+") ? 1 : -1;
3078           swallow ();
3079         }
3080
3081       t = peek ();
3082       if (t && t->type == tok_operator && t->content == "=")
3083         {
3084           swallow ();
3085           s->value = lookahead_sym;
3086           if (lookahead_sort)
3087             {
3088               s->sort_direction = lookahead_sort;
3089               s->sort_column = 0;
3090             }
3091           lookahead_sym = NULL;
3092         }
3093     }
3094
3095   // see also parse_array_in
3096
3097   bool parenthesized = false;
3098   t = peek ();
3099   if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
3100     {
3101       swallow ();
3102       parenthesized = true;
3103     }
3104
3105   if (lookahead_sym)
3106     {
3107       s->indexes.push_back (lookahead_sym);
3108       if (lookahead_sort)
3109         {
3110           s->sort_direction = lookahead_sort;
3111           s->sort_column = 1;
3112         }
3113       lookahead_sym = NULL;
3114     }
3115   else while (1)
3116     {
3117       t = next ();
3118       if (! (t->type == tok_identifier))
3119         throw PARSE_ERROR (_("expected identifier"));
3120       symbol* sym = new symbol;
3121       sym->tok = t;
3122       sym->name = t->content;
3123       s->indexes.push_back (sym);
3124
3125       t = peek ();
3126       if (t && t->type == tok_operator &&
3127           (t->content == "+" || t->content == "-"))
3128         {
3129           if (s->sort_direction)
3130             throw PARSE_ERROR (_("multiple sort directives"));
3131           s->sort_direction = (t->content == "+") ? 1 : -1;
3132           s->sort_column = s->indexes.size();
3133           swallow ();
3134         }
3135
3136       if (parenthesized)
3137         {
3138           t = peek ();
3139           if (t && t->type == tok_operator && t->content == ",")
3140             {
3141               swallow ();
3142               continue;
3143             }
3144           else if (t && t->type == tok_operator && t->content == "]")
3145             {
3146               swallow ();
3147               break;
3148             }
3149           else
3150             throw PARSE_ERROR (_("expected ',' or ']'"));
3151         }
3152       else
3153         break; // expecting only one expression
3154     }
3155
3156   t = next ();
3157   if (! (t->type == tok_keyword && t->content == "in"))
3158     throw PARSE_ERROR (_("expected 'in'"));
3159   swallow ();
3160
3161   s->base = parse_indexable();
3162
3163   // check if there was an array slice that was specified
3164   t = peek();
3165   if (t && t->type == tok_operator && t->content == "[")
3166     {
3167       swallow();
3168       while (1)
3169         {
3170           t = peek();
3171           if (t && t->type == tok_operator && t->content == "*")
3172             {
3173               swallow();
3174               s->array_slice.push_back (NULL);
3175             }
3176           else
3177             s->array_slice.push_back (parse_expression());
3178
3179           t = peek ();
3180           if (t && t->type == tok_operator && t->content == ",")
3181             {
3182               swallow ();
3183               continue;
3184             }
3185           else if (t && t->type == tok_operator && t->content == "]")
3186             {
3187               swallow ();
3188               break;
3189             }
3190           else
3191             throw PARSE_ERROR (_("expected ',' or ']'"));
3192         }
3193     }
3194
3195
3196   // check for atword, see also expect_ident_or_atword,
3197   t = peek ();
3198   if (t && t->type == tok_operator && t->content[0] == '@')
3199     {
3200       if (t->content == "@avg") s->sort_aggr = sc_average;
3201       else if (t->content == "@min") s->sort_aggr = sc_min;
3202       else if (t->content == "@max") s->sort_aggr = sc_max;
3203       else if (t->content == "@count") s->sort_aggr = sc_count;
3204       else if (t->content == "@sum") s->sort_aggr = sc_sum;
3205       else if (t->content == "@variance") s->sort_aggr = sc_variance;
3206       else throw PARSE_ERROR(_("expected statistical operation"));
3207       swallow();
3208
3209       t = peek ();
3210       if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
3211         throw PARSE_ERROR(_("expected sort directive"));
3212     }
3213
3214   t = peek ();
3215   if (t && t->type == tok_operator &&
3216       (t->content == "+" || t->content == "-"))
3217     {
3218       if (s->sort_direction)
3219         throw PARSE_ERROR (_("multiple sort directives"));
3220       s->sort_direction = (t->content == "+") ? 1 : -1;
3221       s->sort_column = 0;
3222       swallow ();
3223     }
3224
3225   t = peek ();
3226   if (tok_is(t, tok_keyword, "limit"))
3227     {
3228       swallow ();                       // get past the "limit"
3229       s->limit = parse_expression ();
3230     }
3231
3232   t = next ();
3233   if (! (t->type == tok_operator && t->content == ")"))
3234     throw PARSE_ERROR ("expected ')'");
3235   swallow ();
3236
3237   s->block = parse_statement ();
3238   return s;
3239 }
3240
3241
3242 expression*
3243 parser::parse_expression ()
3244 {
3245   return parse_assignment ();
3246 }
3247
3248
3249 expression*
3250 parser::parse_assignment ()
3251 {
3252   expression* op1 = parse_ternary ();
3253
3254   const token* t = peek ();
3255   // right-associative operators
3256   if (t && t->type == tok_operator
3257       && (t->content == "=" ||
3258           t->content == "<<<" ||
3259           t->content == "+=" ||
3260           t->content == "-=" ||
3261           t->content == "*=" ||
3262           t->content == "/=" ||
3263           t->content == "%=" ||
3264           t->content == "<<=" ||
3265           t->content == ">>=" ||
3266           t->content == "&=" ||
3267           t->content == "^=" ||
3268           t->content == "|=" ||
3269           t->content == ".=" ||
3270           false))
3271     {
3272       // NB: lvalueness is checked during elaboration / translation
3273       assignment* e = new assignment;
3274       e->left = op1;
3275       e->op = t->content;
3276       e->tok = t;
3277       next ();
3278       e->right = parse_expression ();
3279       op1 = e;
3280     }
3281
3282   return op1;
3283 }
3284
3285
3286 expression*
3287 parser::parse_ternary ()
3288 {
3289   expression* op1 = parse_logical_or ();
3290
3291   const token* t = peek ();
3292   if (t && t->type == tok_operator && t->content == "?")
3293     {
3294       ternary_expression* e = new ternary_expression;
3295       e->tok = t;
3296       e->cond = op1;
3297       next ();
3298       e->truevalue = parse_expression (); // XXX
3299
3300       t = next ();
3301       if (! (t->type == tok_operator && t->content == ":"))
3302         throw PARSE_ERROR (_("expected ':'"));
3303       swallow ();
3304
3305       if (input.has_version("4.0"))
3306         e->falsevalue = parse_ternary ();
3307       else
3308         e->falsevalue = parse_expression ();
3309       return e;
3310     }
3311   else
3312     return op1;
3313 }
3314
3315
3316 expression*
3317 parser::parse_logical_or ()
3318 {
3319   expression* op1 = parse_logical_and ();
3320
3321   const token* t = peek ();
3322   while (t && t->type == tok_operator && t->content == "||")
3323     {
3324       logical_or_expr* e = new logical_or_expr;
3325       e->tok = t;
3326       e->op = t->content;
3327       e->left = op1;
3328       next ();
3329       e->right = parse_logical_and ();
3330       op1 = e;
3331       t = peek ();
3332     }
3333
3334   return op1;
3335 }
3336
3337
3338 expression*
3339 parser::parse_logical_and ()
3340 {
3341   expression* op1 = parse_boolean_or ();
3342
3343   const token* t = peek ();
3344   while (t && t->type == tok_operator && t->content == "&&")
3345     {
3346       logical_and_expr *e = new logical_and_expr;
3347       e->left = op1;
3348       e->op = t->content;
3349       e->tok = t;
3350       next ();
3351       e->right = parse_boolean_or ();
3352       op1 = e;
3353       t = peek ();
3354     }
3355
3356   return op1;
3357 }
3358
3359
3360 expression*
3361 parser::parse_boolean_or ()
3362 {
3363   expression* op1 = parse_boolean_xor ();
3364
3365   const token* t = peek ();
3366   while (t && t->type == tok_operator && t->content == "|")
3367     {
3368       binary_expression* e = new binary_expression;
3369       e->left = op1;
3370       e->op = t->content;
3371       e->tok = t;
3372       next ();
3373       e->right = parse_boolean_xor ();
3374       op1 = e;
3375       t = peek ();
3376     }
3377
3378   return op1;
3379 }
3380
3381
3382 expression*
3383 parser::parse_boolean_xor ()
3384 {
3385   expression* op1 = parse_boolean_and ();
3386
3387   const token* t = peek ();
3388   while (t && t->type == tok_operator && t->content == "^")
3389     {
3390       binary_expression* e = new binary_expression;
3391       e->left = op1;
3392       e->op = t->content;
3393       e->tok = t;
3394       next ();
3395       e->right = parse_boolean_and ();
3396       op1 = e;
3397       t = peek ();
3398     }
3399
3400   return op1;
3401 }
3402
3403
3404 expression*
3405 parser::parse_boolean_and ()
3406 {
3407   expression* op1 = parse_array_in ();
3408
3409   const token* t = peek ();
3410   while (t && t->type == tok_operator && t->content == "&")
3411     {
3412       binary_expression* e = new binary_expression;
3413       e->left = op1;
3414       e->op = t->content;
3415       e->tok = t;
3416       next ();
3417       e->right = parse_array_in ();
3418       op1 = e;
3419       t = peek ();
3420     }
3421
3422   return op1;
3423 }
3424
3425
3426 expression*
3427 parser::parse_array_in ()
3428 {
3429   // This is a very tricky case.  All these are legit expressions:
3430   // "a in b"  "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
3431   vector<expression*> indexes;
3432   bool parenthesized = false;
3433
3434   const token* t = peek ();
3435   if (t && t->type == tok_operator && t->content == "[")
3436     {
3437       swallow ();
3438       parenthesized = true;
3439     }
3440
3441   while (1)
3442     {
3443       t = peek();
3444       if (t && t->type == tok_operator && t->content == "*" && parenthesized)
3445         {
3446           swallow();
3447           indexes.push_back(NULL);
3448         }
3449       else
3450         {
3451           expression* op1 = parse_comparison_or_regex_query ();
3452           indexes.push_back (op1);
3453         }
3454
3455       if (parenthesized)
3456         {
3457           const token* t = peek ();
3458           if (t && t->type == tok_operator && t->content == ",")
3459             {
3460               swallow ();
3461               continue;
3462             }
3463           else if (t && t->type == tok_operator && t->content == "]")
3464             {
3465               swallow ();
3466               break;
3467             }
3468           else
3469             throw PARSE_ERROR (_("expected ',' or ']'"));
3470         }
3471       else
3472         break; // expecting only one expression
3473     }
3474
3475   t = peek ();
3476   if (t && t->type == tok_keyword && t->content == "in")
3477     {
3478       array_in *e = new array_in;
3479       e->tok = t;
3480       next ();
3481
3482       arrayindex* a = new arrayindex;
3483       a->indexes = indexes;
3484       a->base = parse_indexable();
3485       a->tok = a->base->tok;
3486       e->operand = a;
3487       return e;
3488     }
3489   else if (indexes.size() == 1) // no "in" - need one expression only
3490     return indexes[0];
3491   else
3492     throw PARSE_ERROR (_("unexpected comma-separated expression list"));
3493 }
3494
3495
3496 expression*
3497 parser::parse_comparison_or_regex_query ()
3498 {
3499   expression* op1 = parse_shift ();
3500
3501   // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
3502   const token *t = peek();
3503   if (t && t->type == tok_operator
3504       && (t->content == "=~" ||
3505           t->content == "!~"))
3506     {
3507       regex_query* r = new regex_query;
3508       r->left = op1;
3509       r->op = t->content;
3510       r->tok = t;
3511       next ();
3512       r->right = parse_literal_string();
3513       op1 = r;
3514       t = peek ();
3515     }
3516   else while (t && t->type == tok_operator
3517       && (t->content == ">" ||
3518           t->content == "<" ||
3519           t->content == "==" ||
3520           t->content == "!=" ||
3521           t->content == "<=" ||
3522           t->content == ">="))
3523     {
3524       comparison* e = new comparison;
3525       e->left = op1;
3526       e->op = t->content;
3527       e->tok = t;
3528       next ();
3529       e->right = parse_shift ();
3530       op1 = e;
3531       t = peek ();
3532     }
3533
3534   return op1;
3535 }
3536
3537
3538 expression*
3539 parser::parse_shift ()
3540 {
3541   expression* op1 = parse_concatenation ();
3542
3543   const token* t = peek ();
3544   while (t && t->type == tok_operator &&
3545          (t->content == "<<" || t->content == ">>" || t->content == ">>>"))
3546     {
3547       binary_expression* e = new binary_expression;
3548       e->left = op1;
3549       e->op = t->content;
3550       e->tok = t;
3551       next ();
3552       e->right = parse_concatenation ();
3553       op1 = e;
3554       t = peek ();
3555     }
3556
3557   return op1;
3558 }
3559
3560
3561 expression*
3562 parser::parse_concatenation ()
3563 {
3564   expression* op1 = parse_additive ();
3565
3566   const token* t = peek ();
3567   // XXX: the actual awk string-concatenation operator is *whitespace*.
3568   // I don't know how to easily to model that here.
3569   while (t && t->type == tok_operator && t->content == ".")
3570     {
3571       concatenation* e = new concatenation;
3572       e->left = op1;
3573       e->op = t->content;
3574       e->tok = t;
3575       next ();
3576       e->right = parse_additive ();
3577       op1 = e;
3578       t = peek ();
3579     }
3580
3581   return op1;
3582 }
3583
3584
3585 expression*
3586 parser::parse_additive ()
3587 {
3588   expression* op1 = parse_multiplicative ();
3589
3590   const token* t = peek ();
3591   while (t && t->type == tok_operator
3592       && (t->content == "+" || t->content == "-"))
3593     {
3594       binary_expression* e = new binary_expression;
3595       e->op = t->content;
3596       e->left = op1;
3597       e->tok = t;
3598       next ();
3599       e->right = parse_multiplicative ();
3600       op1 = e;
3601       t = peek ();
3602     }
3603
3604   return op1;
3605 }
3606
3607
3608 expression*
3609 parser::parse_multiplicative ()
3610 {
3611   expression* op1 = parse_unary ();
3612
3613   const token* t = peek ();
3614   while (t && t->type == tok_operator
3615       && (t->content == "*" || t->content == "/" || t->content == "%"))
3616     {
3617       binary_expression* e = new binary_expression;
3618       e->op = t->content;
3619       e->left = op1;
3620       e->tok = t;
3621       next ();
3622       e->right = parse_unary ();
3623       op1 = e;
3624       t = peek ();
3625     }
3626
3627   return op1;
3628 }
3629
3630
3631 expression*
3632 parser::parse_unary ()
3633 {
3634   const token* t = peek ();
3635   if (t && t->type == tok_operator
3636       && (t->content == "+" ||
3637           t->content == "-" ||
3638           t->content == "!" ||
3639           t->content == "~" ||
3640           false))
3641     {
3642       unary_expression* e = new unary_expression;
3643       e->op = t->content;
3644       e->tok = t;
3645       next ();
3646       e->operand = parse_unary ();
3647       return e;
3648     }
3649   else
3650     return parse_crement ();
3651 }
3652
3653
3654 expression*
3655 parser::parse_crement () // as in "increment" / "decrement"
3656 {
3657   // NB: Ideally, we'd parse only a symbol as an operand to the
3658   // *crement operators, instead of a general expression value.  We'd
3659   // need more complex lookahead code to tell apart the postfix cases.
3660   // So we just punt, and leave it to pass-3 to signal errors on
3661   // cases like "4++".
3662
3663   const token* t = peek ();
3664   if (t && t->type == tok_operator
3665       && (t->content == "++" || t->content == "--"))
3666     {
3667       pre_crement* e = new pre_crement;
3668       e->op = t->content;
3669       e->tok = t;
3670       next ();
3671       e->operand = parse_dwarf_value ();
3672       return e;
3673     }
3674
3675   // post-crement or non-crement
3676   expression *op1 = parse_dwarf_value ();
3677
3678   t = peek ();
3679   if (t && t->type == tok_operator
3680       && (t->content == "++" || t->content == "--"))
3681     {
3682       post_crement* e = new post_crement;
3683       e->op = t->content;
3684       e->tok = t;
3685       next ();
3686       e->operand = op1;
3687       return e;
3688     }
3689   else
3690     return op1;
3691 }
3692
3693
3694 expression*
3695 parser::parse_dwarf_value ()
3696 {
3697   expression* expr = NULL;
3698   target_symbol* tsym = NULL;
3699
3700   // With '&' we'll definitely be making a target symbol of some sort
3701   const token* addrtok = peek_op ("&") ? next () : NULL;
3702   bool addressof = (addrtok != NULL);
3703
3704   // First try target_symbol types: $var, @cast, and @var.
3705   const token* t = peek ();
3706   if (t && t->type == tok_identifier && t->content[0] == '$')
3707     expr = tsym = parse_target_symbol ();
3708   else if (tok_is (t, tok_operator, "@cast"))
3709     expr = tsym = parse_cast_op ();
3710   else if (tok_is (t, tok_operator, "@var"))
3711     expr = tsym = parse_atvar_op ();
3712   else if (addressof && !input.has_version("2.6"))
3713     // '&' on old version only allowed specific target_symbol types
3714     throw PARSE_ERROR (_("expected @cast, @var or $var"));
3715   else
3716     {
3717       // Otherwise just get a plain value of any sort.
3718       expr = parse_value ();
3719       if (addressof)
3720         {
3721           tsym = dynamic_cast<target_symbol*> (expr);
3722           if (tsym && tsym->addressof)
3723             throw PARSE_ERROR (_("cannot take address more than once"),
3724                                addrtok);
3725         }
3726     }
3727
3728   // If we had '&' or see any target suffixes, that forces a target_symbol.
3729   // For compatibility, we only do this starting with 2.6.
3730   if (!tsym && (addressof || peek_target_symbol_components ())
3731       && input.has_version("2.6"))
3732     {
3733       autocast_op *cop = new autocast_op;
3734       cop->tok = addrtok ?: peek ();
3735       cop->operand = expr;
3736       expr = tsym = cop;
3737     }
3738
3739   if (tsym)
3740     {
3741       // Parse the rest of any kind of target symbol
3742       tsym->addressof = addressof;
3743       parse_target_symbol_components (tsym);
3744     }
3745
3746   return expr;
3747 }
3748
3749
3750 expression*
3751 parser::parse_value ()
3752 {
3753   const token* t = peek ();
3754   if (! t)
3755     throw PARSE_ERROR (_("expected value"));
3756
3757   if (t->type == tok_embedded)
3758     {
3759       if (! privileged)
3760         throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
3761
3762       embedded_expr *e = new embedded_expr;
3763       e->tok = t;
3764       e->code = t->content;
3765       next ();
3766       return e;
3767     }
3768
3769   if (t->type == tok_operator && t->content == "(")
3770     {
3771       swallow ();
3772       expression* e = parse_expression ();
3773       t = next ();
3774       if (! (t->type == tok_operator && t->content == ")"))
3775         throw PARSE_ERROR (_("expected ')'"));
3776       swallow ();
3777       return e;
3778     }
3779   else if (t->type == tok_identifier
3780            || (t->type == tok_operator && t->content[0] == '@'))
3781     return parse_symbol ();
3782   else
3783     return parse_literal ();
3784 }
3785
3786
3787 const token *
3788 parser::parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name)
3789 {
3790   hop = NULL;
3791   const token* t = expect_ident_or_atword (name);
3792   if (name == "@hist_linear" || name == "@hist_log")
3793     {
3794       hop = new hist_op;
3795       if (name == "@hist_linear")
3796         hop->htype = hist_linear;
3797       else if (name == "@hist_log")
3798         hop->htype = hist_log;
3799       hop->tok = t;
3800       expect_op("(");
3801       hop->stat = parse_expression ();
3802       int64_t tnum;
3803       if (hop->htype == hist_linear)
3804         {
3805           for (size_t i = 0; i < 3; ++i)
3806             {
3807               expect_op (",");
3808               expect_number (tnum);
3809               hop->params.push_back (tnum);
3810             }
3811         }
3812       expect_op(")");
3813     }
3814   return t;
3815 }
3816
3817
3818 indexable*
3819 parser::parse_indexable ()
3820 {
3821   hist_op *hop = NULL;
3822   interned_string name;
3823   const token *tok = parse_hist_op_or_bare_name(hop, name);
3824   if (hop)
3825     return hop;
3826   else
3827     {
3828       symbol* sym = new symbol;
3829       sym->name = name;
3830       sym->tok = tok;
3831       return sym;
3832     }
3833 }
3834
3835
3836 // var, indexable[index], func(parms), printf("...", ...),
3837 // @defined, @entry, @stat_op(stat)
3838 expression* parser::parse_symbol ()
3839 {
3840   hist_op *hop = NULL;
3841   symbol *sym = NULL;
3842   interned_string name;
3843   unsigned max_params = 0;
3844   const token *t = parse_hist_op_or_bare_name(hop, name);
3845
3846   if (!hop)
3847     {
3848       // If we didn't get a hist_op, then we did get an identifier. We can
3849       // now scrutinize this identifier for the various magic forms of identifier
3850       // (printf, @stat_op...)
3851
3852       // NB: PR11343: @defined() is not incompatible with earlier versions
3853       // of stap, so no need to check session.compatible for 1.2
3854       if (name == "@defined")
3855         return parse_defined_op (t);
3856
3857       if (name == "@const")
3858         return parse_const_op (t);
3859
3860       if (name == "@entry")
3861         return parse_entry_op (t);
3862
3863       if (name == "@perf")
3864         return parse_perf_op (t);
3865
3866       if (input.has_version("4.0"))
3867         {
3868           if (name == "@kregister" || name == "@uregister")
3869             return parse_target_register (t);
3870
3871           if (name == "@kderef" || name == "@uderef")
3872             return parse_target_deref (t);
3873         }
3874
3875       if (name.size() > 0 && name[0] == '@')
3876         {
3877           stat_op *sop = new stat_op;
3878           if (name == "@avg")
3879             sop->ctype = sc_average;
3880           else if (name == "@variance")
3881             sop->ctype = sc_variance, max_params = 1;
3882           else if (name == "@count")
3883             sop->ctype = sc_count;
3884           else if (name == "@sum")
3885             sop->ctype = sc_sum;
3886           else if (name == "@min")
3887             sop->ctype = sc_min;
3888           else if (name == "@max")
3889             sop->ctype = sc_max;
3890           else
3891             throw PARSE_ERROR(_F("unknown operator %s",
3892                                  name.to_string().c_str()));
3893           expect_op("(");
3894           sop->tok = t;
3895           sop->stat = parse_expression ();
3896
3897           while(1)
3898             {
3899               t = next ();
3900               if (t && t->type == tok_operator && t->content == ")")
3901                 {
3902                   swallow ();
3903                   break;
3904                 }
3905                 else if (t && t->type == tok_operator && t->content == ",")
3906                 {
3907                   if (sop->params.size() >= max_params)
3908                     throw PARSE_ERROR(_NF("not more than %d parameter allowed",
3909                                           "not more than %d parameters allowed",
3910                                           max_params+1, max_params+1), t);
3911
3912                   swallow ();
3913                   int64_t tnum;
3914                   expect_number (tnum);
3915                   sop->params.push_back (tnum);
3916                 }
3917             }
3918           return sop;
3919         }
3920
3921       else if (print_format *fmt = print_format::create(t))
3922         {
3923           expect_op("(");
3924           if ((name == "print" || name == "println" ||
3925                name == "sprint" || name == "sprintln") &&
3926               (peek_op("@hist_linear") || peek_op("@hist_log")))
3927             {
3928               // We have a special case where we recognize
3929               // print(@hist_foo(bar)) as a magic print-the-histogram
3930               // construct. This is sort of gross but it avoids
3931               // promoting histogram references to typeful
3932               // expressions.
3933
3934               hop = NULL;
3935               t = parse_hist_op_or_bare_name(hop, name);
3936               assert(hop);
3937
3938               // It is, sadly, possible that even while parsing a
3939               // hist_op, we *mis-guessed* and the user wishes to
3940               // print(@hist_op(foo)[bucket]), a scalar. In that case
3941               // we must parse the arrayindex and print an expression.
3942               //
3943               // XXX: This still fails if the arrayindex is part of a
3944               // larger expression.  To really handle everything, we'd
3945               // need to push back all the hist tokens start over.
3946
3947               if (!peek_op ("["))
3948                 fmt->hist = hop;
3949               else
3950                 {
3951                   // This is simplified version of the
3952                   // multi-array-index parser below, because we can
3953                   // only ever have one index on a histogram anyways.
3954                   expect_op("[");
3955                   struct arrayindex* ai = new arrayindex;
3956                   ai->tok = t;
3957                   ai->base = hop;
3958                   ai->indexes.push_back (parse_expression ());
3959                   expect_op("]");
3960                   fmt->args.push_back(ai);
3961
3962                   // Consume any subsequent arguments.
3963                   while (!peek_op(")"))
3964                     {
3965                       // ')' is not possible here but we want to output a nicer
3966                       // parser error message.
3967                       (void) expect_op_any ({",", ")"});
3968                       expression *e = parse_expression ();
3969                       fmt->args.push_back(e);
3970                     }
3971                 }
3972             }
3973           else
3974             {
3975               int min_args = 0;
3976               bool consumed_arg = false;
3977               if (fmt->print_with_format)
3978                 {
3979                   // Consume and convert a format string. Agreement between the
3980                   // format string and the arguments is postponed to the
3981                   // typechecking phase.
3982                   literal_string* ls = parse_literal_string();
3983                   fmt->raw_components = ls->value;
3984                   delete ls;
3985                   fmt->components = print_format::string_to_components (fmt->raw_components);
3986                   consumed_arg = true;
3987                 }
3988               else if (fmt->print_with_delim)
3989                 {
3990                   // Consume a delimiter to separate arguments.
3991                   literal_string* ls = parse_literal_string();
3992                   fmt->delimiter = ls->value;
3993                   delete ls;
3994                   consumed_arg = true;
3995                   min_args = 2; // so that the delim is used at least once
3996                 }
3997               else if (!fmt->print_with_newline)
3998                 {
3999                   // If we are not printing with a format string, nor with a
4000                   // delim, nor with a newline, then it's either print() or
4001                   // sprint(), both of which require at least one argument (of
4002                   // any type).
4003                   min_args = 1;
4004                 }
4005
4006               // Consume any subsequent arguments.
4007               while (min_args || !peek_op (")"))
4008                 {
4009                   // ')' is not possible here but we want to output a nicer
4010                   // parser error message.
4011                   if (consumed_arg)
4012                     (void) expect_op_any({",", ")"});
4013                   expression *e = parse_expression ();
4014                   fmt->args.push_back(e);
4015                   consumed_arg = true;
4016                   if (min_args)
4017                     --min_args;
4018                 }
4019             }
4020           expect_op(")");
4021           return fmt;
4022         }
4023
4024       else if (peek_op ("(")) // function call
4025         {
4026           swallow ();
4027           struct functioncall* f = new functioncall;
4028           f->tok = t;
4029           f->function = name;
4030           // Allow empty actual parameter list
4031           if (peek_op (")"))
4032             {
4033               swallow ();
4034               return f;
4035             }
4036           while (1)
4037             {
4038               f->args.push_back (parse_expression ());
4039               interned_string op = expect_op_any({")", ","});
4040               if (op == ")")
4041                 break;
4042               else if (op == ",")
4043                 continue;
4044             }
4045           return f;
4046         }
4047
4048       else
4049         {
4050           sym = new symbol;
4051           sym->name = name;
4052           sym->tok = t;
4053         }
4054     }
4055
4056   // By now, either we had a hist_op in the first place, or else
4057   // we had a plain word and it was converted to a symbol.
4058
4059   assert (!hop != !sym); // logical XOR
4060
4061   // All that remains is to check for array indexing
4062
4063   if (peek_op ("[")) // array
4064     {
4065       swallow ();
4066       struct arrayindex* ai = new arrayindex;
4067       ai->tok = t;
4068
4069       if (hop)
4070         ai->base = hop;
4071       else
4072         ai->base = sym;
4073
4074       while (1)
4075         {
4076           if (peek_op("*"))
4077             {
4078               swallow();
4079               ai->indexes.push_back (NULL);
4080             }
4081           else
4082             ai->indexes.push_back (parse_expression ());
4083           interned_string op = expect_op_any({"]", ","});
4084           if (op == "]")
4085             break;
4086           else if (op == ",")
4087             continue;
4088         }
4089
4090       return ai;
4091     }
4092
4093   // If we got to here, we *should* have a symbol; if we have
4094   // a hist_op on its own, it doesn't count as an expression,
4095   // so we throw a parse error.
4096
4097   if (hop)
4098     throw PARSE_ERROR(_("base histogram operator where expression expected"), t);
4099
4100   return sym;
4101 }
4102
4103 // Parse a $var.
4104 target_symbol* parser::parse_target_symbol ()
4105 {
4106   const token* t = next ();
4107   if (t->type == tok_identifier && t->content[0]=='$')
4108     {
4109       // target_symbol time
4110       target_symbol *tsym = new target_symbol;
4111       tsym->tok = t;
4112       tsym->name = t->content;
4113       return tsym;
4114     }
4115
4116   throw PARSE_ERROR (_("expected $var"));
4117 }
4118
4119
4120 // Parse a @cast.
4121 cast_op* parser::parse_cast_op ()
4122 {
4123   const token* t = next ();
4124   if (t->type == tok_operator && t->content == "@cast")
4125     {
4126       cast_op *cop = new cast_op;
4127       cop->tok = t;
4128       cop->name = t->content;
4129       expect_op("(");
4130       cop->operand = parse_expression ();
4131       expect_op(",");
4132       expect_unknown(tok_string, cop->type_name);
4133       if (cop->type_name.empty())
4134         throw PARSE_ERROR (_("expected non-empty string"));
4135       if (peek_op (","))
4136         {
4137           swallow ();
4138           expect_unknown(tok_string, cop->module);
4139         }
4140       expect_op(")");
4141       return cop;
4142     }
4143
4144   throw PARSE_ERROR (_("expected @cast"));
4145 }
4146
4147
4148 // Parse a @var.
4149 atvar_op* parser::parse_atvar_op ()
4150 {
4151   const token* t = next ();
4152   if (t->type == tok_operator && t->content == "@var")
4153     {
4154       atvar_op *aop = new atvar_op;
4155       aop->tok = t;
4156       aop->name = t->content;
4157       expect_op("(");
4158       expect_unknown(tok_string, aop->target_name);
4159       size_t found_at = aop->target_name.find("@");
4160       if (found_at != string::npos)
4161         aop->cu_name = aop->target_name.substr(found_at + 1);
4162       else
4163         aop->cu_name = "";
4164       if (peek_op (","))
4165         {
4166           swallow ();
4167           expect_unknown (tok_string, aop->module);
4168         }
4169       else
4170         aop->module = "";
4171       expect_op(")");
4172       return aop;
4173     }
4174
4175   throw PARSE_ERROR (_("expected @var"));
4176 }
4177
4178
4179 // Parse a @defined().  Given head token has already been consumed.
4180 expression* parser::parse_defined_op (const token* t)
4181 {
4182   defined_op* dop = new defined_op;
4183   dop->tok = t;
4184   expect_op("(");
4185   dop->operand = parse_expression ();
4186   expect_op(")");
4187   return dop;
4188 }
4189
4190
4191 // Parse a @const().  Given head token has already been consumed.
4192 expression* parser::parse_const_op (const token* t)
4193 {
4194   if (! privileged)
4195     throw PARSE_ERROR (_("using @const operator not permitted; need stap -g"),
4196                        false /* don't skip tokens for parse resumption */);
4197
4198   interned_string cnst;
4199   embedded_expr *ee = new embedded_expr;
4200   ee->tok = t;
4201   expect_op("(");
4202   expect_unknown(tok_string, cnst);
4203   if(cnst.empty())
4204     throw PARSE_ERROR (_("expected non-empty string"));
4205   expect_op(")");
4206   ee->code = string("/* pure */ /* unprivileged */ /* stable */ ") + string(cnst);
4207   return ee;
4208 }
4209
4210
4211 // Parse a @entry().  Given head token has already been consumed.
4212 expression* parser::parse_entry_op (const token* t)
4213 {
4214   entry_op* eop = new entry_op;
4215   eop->tok = t;
4216   expect_op("(");
4217   eop->operand = parse_expression ();
4218   expect_op(")");
4219   return eop;
4220 }
4221
4222
4223 // Parse a @perf().  Given head token has already been consumed.
4224 expression* parser::parse_perf_op (const token* t)
4225 {
4226   perf_op* pop = new perf_op;
4227   pop->tok = t;
4228   expect_op("(");
4229   pop->operand = parse_literal_string ();
4230   if (pop->operand->value == "")
4231     throw PARSE_ERROR (_("expected non-empty string"));
4232   expect_op(")");
4233   return pop;
4234 }
4235
4236 // Parse a @kregister or @uregister.  Given head token has already been consumed.
4237 expression* parser::parse_target_register (const token* t)
4238 {
4239   target_register *treg = new target_register;
4240   int64_t regno;
4241   treg->tok = t;
4242   treg->userspace_p = (t->content[1] == 'u');
4243   if (! treg->userspace_p && ! privileged)
4244     throw PARSE_ERROR (_("using @kregister operator not permitted; need stap -g"),
4245                        false /* don't skip tokens for parse resumption */);
4246   expect_op("(");
4247   expect_number(regno);
4248   treg->regno = regno;
4249   expect_op(")");
4250   return treg;
4251 }
4252
4253 // Parse a @kderef or @uderef.  Given head token has already been consumed.
4254 expression* parser::parse_target_deref (const token* t)
4255 {
4256   target_deref *tderef = new target_deref;
4257   int64_t size;
4258   tderef->tok = t;
4259   tderef->userspace_p = (t->content[1] == 'u');
4260   if (! tderef->userspace_p && ! privileged)
4261     throw PARSE_ERROR (_("using @kderef operator not permitted; need stap -g"),
4262                        false /* don't skip tokens for parse resumption */);
4263   expect_op("(");
4264   expect_number(size);
4265   tderef->size = size;
4266   expect_op(",");
4267   tderef->addr = parse_expression();
4268   expect_op(")");
4269   return tderef;
4270 }
4271
4272 bool
4273 parser::peek_target_symbol_components ()
4274 {
4275   const token * t = peek ();
4276   return t &&
4277     ((t->type == tok_operator && (t->content == "->" || t->content == "["))
4278      || (t->type == tok_identifier &&
4279          t->content.find_first_not_of('$') == string::npos));
4280 }
4281
4282 void
4283 parser::parse_target_symbol_components (target_symbol* e)
4284 {
4285   bool pprint = false;
4286
4287   // check for pretty-print in the form $foo$
4288   string base = e->name;
4289   size_t pprint_pos = base.find_last_not_of('$');
4290   if (0 < pprint_pos && pprint_pos < base.length() - 1)
4291     {
4292       string pprint_val = base.substr(pprint_pos + 1);
4293       base.erase(pprint_pos + 1);
4294       e->name = base;
4295       e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
4296       pprint = true;
4297     }
4298
4299   while (!pprint)
4300     {
4301       if (peek_op ("->"))
4302         {
4303           const token* t = next();
4304           interned_string member;
4305           expect_ident_or_keyword (member);
4306
4307           // check for pretty-print in the form $foo->$ or $foo->bar$
4308           pprint_pos = member.find_last_not_of('$');
4309           interned_string pprint_val;
4310           if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
4311             {
4312               pprint_val = member.substr(pprint_pos + 1);
4313               member = member.substr(0, pprint_pos + 1);
4314               pprint = true;
4315             }
4316
4317           if (!member.empty())
4318             e->components.push_back (target_symbol::component(t, member));
4319           if (pprint)
4320             e->components.push_back (target_symbol::component(t, pprint_val, true));
4321         }
4322       else if (peek_op ("["))
4323         {
4324           const token* t = next();
4325           expression* index = parse_expression();
4326           literal_number* ln = dynamic_cast<literal_number*>(index);
4327           if (ln)
4328             e->components.push_back (target_symbol::component(t, ln->value));
4329           else
4330             e->components.push_back (target_symbol::component(t, index));
4331           expect_op ("]");
4332         }
4333       else
4334         break;
4335     }
4336
4337   if (!pprint)
4338     {
4339       // check for pretty-print in the form $foo $
4340       // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
4341       const token* t = peek();
4342       if (t != NULL && t->type == tok_identifier &&
4343           t->content.find_first_not_of('$') == string::npos)
4344         {
4345           t = next();
4346           e->components.push_back (target_symbol::component(t, t->content, true));
4347           pprint = true;
4348         }
4349     }
4350
4351   if (pprint && (peek_op ("->") || peek_op("[")))
4352     throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
4353 }
4354
4355 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */