parse.cxx

   1 // recursive descent parser for systemtap scripts
   2 // Copyright (C) 2005-2015 Red Hat Inc.
   3 // Copyright (C) 2006 Intel Corporation.
   4 // Copyright (C) 2007 Bull S.A.S
   5 // Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
   6 //
   7 // This file is part of systemtap, and is free software.  You can
   8 // redistribute it and/or modify it under the terms of the GNU General
   9 // Public License (GPL); either version 2, or (at your option) any
  10 // later version.
  11
  12 #include "config.h"
  13 #include "staptree.h"
  14 #include "parse.h"
  15 #include "session.h"
  16 #include "util.h"
  17 #include "stringtable.h"
  18 #include "unordered.h"
  19
  20 #include <iostream>
  21
  22 #include <fstream>
  23 #include <cctype>
  24 #include <cstdlib>
  25 #include <cassert>
  26 #include <cerrno>
  27 #include <climits>
  28 #include <sstream>
  29 #include <cstring>
  30 #include <cctype>
  31 #include <iterator>
  32
  33 extern "C" {
  34 #include <fnmatch.h>
  35 }
  36
  37 using namespace std;
  38
  39
  40 class lexer
  41 {
  42 public:
  43   bool ate_comment; // current token follows a comment
  44   bool ate_whitespace; // the most recent token followed whitespace
  45   bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
  46   bool check_compatible; // whether to gate features on session.compatible
  47
  48   token* scan ();
  49   lexer (istream&, const string&, systemtap_session&, bool);
  50   void set_current_file (stapfile* f);
  51   void set_current_token_chain (const token* tok);
  52   inline bool has_version (const char* v) const;
  53
  54   unordered_set<interned_string> keywords;
  55   static unordered_set<string> atwords;
  56 private:
  57   inline int input_get ();
  58   inline int input_peek (unsigned n=0);
  59   void input_put (const string&, const token*);
  60   string input_name;
  61   string input_contents; // NB: being a temporary, no need to interned_string optimize this object
  62   const char *input_pointer; // index into input_contents; NB: recompute if input_contents changed!
  63   const char *input_end;
  64   unsigned cursor_suspend_count;
  65   unsigned cursor_suspend_line;
  66   unsigned cursor_suspend_column;
  67   unsigned cursor_line;
  68   unsigned cursor_column;
  69   systemtap_session& session;
  70   stapfile* current_file;
  71   const token* current_token_chain;
  72 };
  73
  74
  75 class parser
  76 {
  77 public:
  78   parser (systemtap_session& s, const string& n, istream& i, unsigned flags=0);
  79   ~parser ();
  80
  81   stapfile* parse ();
  82   probe* parse_synthetic_probe (const token* chain);
  83   stapfile* parse_library_macros ();
  84
  85 private:
  86   typedef enum {
  87       PP_NONE,
  88       PP_KEEP_THEN,
  89       PP_SKIP_THEN,
  90       PP_KEEP_ELSE,
  91       PP_SKIP_ELSE,
  92   } pp_state_t;
  93
  94   struct pp1_activation;
  95
  96   struct pp_macrodecl : public macrodecl {
  97     pp1_activation* parent_act; // used for param bindings
  98     virtual bool is_closure() { return parent_act != 0; }
  99     pp_macrodecl () : macrodecl(), parent_act(0) { }
 100   };
 101
 102   systemtap_session& session;
 103   string input_name;
 104   lexer input;
 105   bool errs_as_warnings;
 106   bool privileged;
 107   bool user_file;
 108   parse_context context;
 109
 110   // preprocessing subordinate, first pass (macros)
 111   struct pp1_activation {
 112     const token* tok;
 113     unsigned cursor; // position within macro body
 114     map<string, pp_macrodecl*> params;
 115
 116     macrodecl* curr_macro;
 117
 118     pp1_activation (const token* tok, macrodecl* curr_macro)
 119       : tok(tok), cursor(0), curr_macro(curr_macro) { }
 120     ~pp1_activation ();
 121   };
 122
 123   map<string, macrodecl*> pp1_namespace;
 124   vector<pp1_activation*> pp1_state;
 125   const token* next_pp1 ();
 126   const token* scan_pp1 (bool ignore_macros);
 127   const token* slurp_pp1_param (vector<const token*>& param);
 128   const token* slurp_pp1_body (vector<const token*>& body);
 129
 130   // preprocessing subordinate, final pass (conditionals)
 131   vector<pair<const token*, pp_state_t> > pp_state;
 132   const token* scan_pp ();
 133   const token* skip_pp ();
 134
 135   // scanning state
 136   const token* next ();
 137   const token* peek ();
 138
 139   // Advance past and throw away current token after peek () or next ().
 140   void swallow ();
 141
 142   const token* systemtap_v_seen;
 143   const token* last_t; // the last value returned by peek() or next()
 144   const token* next_t; // lookahead token
 145
 146   // expectations, these swallow the token
 147   void expect_known (token_type tt, string const & expected);
 148   void expect_unknown (token_type tt, interned_string & target);
 149   void expect_unknown2 (token_type tt1, token_type tt2, interned_string & target);
 150
 151   // convenience forms, these also swallow the token
 152   void expect_op (string const & expected);
 153   void expect_kw (string const & expected);
 154   void expect_number (int64_t & expected);
 155   void expect_ident_or_keyword (interned_string & target);
 156
 157   // convenience forms, which return true or false, these don't swallow token
 158   bool peek_op (string const & op);
 159   bool peek_kw (string const & kw);
 160
 161   // convenience forms, which return the token
 162   const token* expect_kw_token (string const & expected);
 163   const token* expect_ident_or_atword (interned_string & target);
 164
 165   void print_error (const parse_error& pe, bool errs_as_warnings = false);
 166   unsigned num_errors;
 167
 168 private: // nonterminals
 169   void parse_probe (vector<probe*>&, vector<probe_alias*>&);
 170   void parse_private (vector<vardecl*>&, vector<probe*>&,
 171                       string const&, vector<functiondecl*>&);
 172   void parse_global (vector<vardecl*>&, vector<probe*>&,
 173                      string const&);
 174   void do_parse_global (vector<vardecl*>&, vector<probe*>&,
 175                         string const&, const token*, bool);
 176   void parse_functiondecl (vector<functiondecl*>&, string const&);
 177   void do_parse_functiondecl (vector<functiondecl*>&, const token*,
 178                               string const&, bool);
 179   embeddedcode* parse_embeddedcode ();
 180   vector<probe_point*> parse_probe_points ();
 181   vector<probe_point*> parse_components ();
 182   vector<probe_point*> parse_component ();
 183   literal_string* consume_string_literals (const token*);
 184   literal_string* parse_literal_string ();
 185   literal* parse_literal ();
 186   block* parse_stmt_block ();
 187   try_block* parse_try_block ();
 188   statement* parse_statement ();
 189   if_statement* parse_if_statement ();
 190   for_loop* parse_for_loop ();
 191   for_loop* parse_while_loop ();
 192   foreach_loop* parse_foreach_loop ();
 193   expr_statement* parse_expr_statement ();
 194   return_statement* parse_return_statement ();
 195   delete_statement* parse_delete_statement ();
 196   next_statement* parse_next_statement ();
 197   break_statement* parse_break_statement ();
 198   continue_statement* parse_continue_statement ();
 199   indexable* parse_indexable ();
 200   const token *parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name);
 201   target_symbol *parse_target_symbol ();
 202   cast_op *parse_cast_op ();
 203   atvar_op *parse_atvar_op ();
 204   expression* parse_entry_op (const token* t);
 205   expression* parse_defined_op (const token* t);
 206   expression* parse_perf_op (const token* t);
 207   expression* parse_expression ();
 208   expression* parse_assignment ();
 209   expression* parse_ternary ();
 210   expression* parse_logical_or ();
 211   expression* parse_logical_and ();
 212   expression* parse_boolean_or ();
 213   expression* parse_boolean_xor ();
 214   expression* parse_boolean_and ();
 215   expression* parse_array_in ();
 216   expression* parse_comparison_or_regex_query ();
 217   expression* parse_shift ();
 218   expression* parse_concatenation ();
 219   expression* parse_additive ();
 220   expression* parse_multiplicative ();
 221   expression* parse_unary ();
 222   expression* parse_crement ();
 223   expression* parse_dwarf_value ();
 224   expression* parse_value ();
 225   expression* parse_symbol ();
 226
 227   bool peek_target_symbol_components ();
 228   void parse_target_symbol_components (target_symbol* e);
 229 };
 230
 231
 232 // ------------------------------------------------------------------------
 233
 234 stapfile*
 235 parse (systemtap_session& s, const string& n, istream& i, unsigned flags)
 236 {
 237   parser p (s, n, i, flags);
 238   return p.parse ();
 239 }
 240
 241 stapfile*
 242 parse (systemtap_session& s, const string& name, unsigned flags)
 243 {
 244   ifstream i(name.c_str(), ios::in);
 245   if (i.fail())
 246     {
 247       cerr << (file_exists(name)
 248                ? _F("Input file '%s' can't be opened for reading.", name.c_str())
 249                : _F("Input file '%s' is missing.", name.c_str()))
 250            << endl;
 251       return 0;
 252     }
 253
 254   parser p (s, name, i, flags);
 255   return p.parse ();
 256 }
 257
 258 stapfile*
 259 parse_library_macros (systemtap_session& s, const string& name)
 260 {
 261   ifstream i(name.c_str(), ios::in);
 262   if (i.fail())
 263     {
 264       cerr << (file_exists(name)
 265                ? _F("Input file '%s' can't be opened for reading.", name.c_str())
 266                : _F("Input file '%s' is missing.", name.c_str()))
 267            << endl;
 268       return 0;
 269     }
 270
 271   parser p (s, name, i);
 272   return p.parse_library_macros ();
 273 }
 274
 275 probe*
 276 parse_synthetic_probe (systemtap_session &s, istream& i, const token* tok)
 277 {
 278   parser p (s, tok ? tok->location.file->name : "<synthetic>", i);
 279   return p.parse_synthetic_probe (tok);
 280 }
 281
 282 // ------------------------------------------------------------------------
 283
 284 parser::parser (systemtap_session& s, const string &n, istream& i, unsigned flags):
 285   session (s), input_name (n), input (i, input_name, s, !(flags & pf_no_compatible)),
 286   errs_as_warnings(flags & pf_squash_errors), privileged (flags & pf_guru),
 287   user_file (flags & pf_user_file), context(con_unknown), systemtap_v_seen(0),
 288   last_t (0), next_t (0), num_errors (0)
 289 {
 290 }
 291
 292 parser::~parser()
 293 {
 294 }
 295
 296 static string
 297 tt2str(token_type tt)
 298 {
 299   switch (tt)
 300     {
 301     case tok_junk: return "junk";
 302     case tok_identifier: return "identifier";
 303     case tok_operator: return "operator";
 304     case tok_string: return "string";
 305     case tok_number: return "number";
 306     case tok_embedded: return "embedded-code";
 307     case tok_keyword: return "keyword";
 308     }
 309   return "unknown token";
 310 }
 311
 312 ostream&
 313 operator << (ostream& o, const source_loc& loc)
 314 {
 315   o << loc.file->name << ":"
 316     << loc.line << ":"
 317     << loc.column;
 318
 319   return o;
 320 }
 321
 322 ostream&
 323 operator << (ostream& o, const token& t)
 324 {
 325   o << tt2str(t.type);
 326
 327   if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
 328     {
 329       o << " '";
 330       for (unsigned i=0; i<t.content.length(); i++)
 331         {
 332           char c = t.content[i];
 333           o << (isprint (c) ? c : '?');
 334         }
 335       o << "'";
 336     }
 337
 338   o << " at "
 339     << t.location;
 340
 341   return o;
 342 }
 343
 344
 345 void
 346 parser::print_error  (const parse_error &pe, bool errs_as_warnings)
 347 {
 348   const token *tok = pe.tok ? pe.tok : last_t;
 349   session.print_error(pe, tok, input_name, errs_as_warnings);
 350   num_errors ++;
 351 }
 352
 353
 354
 355
 356 template <typename OPERAND>
 357 bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
 358 {
 359   if (op->type == tok_operator && op->content == "<=")
 360     { return lhs <= rhs; }
 361   else if (op->type == tok_operator && op->content == ">=")
 362     { return lhs >= rhs; }
 363   else if (op->type == tok_operator && op->content == "<")
 364     { return lhs < rhs; }
 365   else if (op->type == tok_operator && op->content == ">")
 366     { return lhs > rhs; }
 367   else if (op->type == tok_operator && op->content == "==")
 368     { return lhs == rhs; }
 369   else if (op->type == tok_operator && op->content == "!=")
 370     { return lhs != rhs; }
 371   else
 372     throw PARSE_ERROR (_("expected comparison operator"), op);
 373 }
 374
 375
 376 // Here, we perform on-the-fly preprocessing in two passes.
 377
 378 // First pass - macro declaration and expansion.
 379 //
 380 // The basic form of a declaration is @define SIGNATURE %( BODY %)
 381 // where SIGNATURE is of the form macro_name (a, b, c, ...)
 382 // and BODY can obtain the parameter contents as @a, @b, @c, ....
 383 // Note that parameterless macros can also be declared.
 384 //
 385 // Macro definitions may not be nested.
 386 // A macro is available textually after it has been defined.
 387 //
 388 // The basic form of a macro invocation
 389 //   for a parameterless macro is @macro_name,
 390 //   for a macro with parameters is @macro_name(param_1, param_2, ...).
 391 //
 392 // NB: this means that a parameterless macro @foo called as @foo(a, b, c)
 393 // leaves its 'parameters' alone, rather than consuming them to result
 394 // in a "too many parameters error". This may be useful in the unusual
 395 // case of wanting @foo to expand to the name of a function.
 396 //
 397 // Invocations of unknown macros are left unexpanded, to allow
 398 // the continued use of constructs such as @cast, @var, etc.
 399
 400 macrodecl::~macrodecl ()
 401 {
 402   delete tok;
 403   for (vector<const token*>::iterator it = body.begin();
 404        it != body.end(); it++)
 405     delete *it;
 406 }
 407
 408 parser::pp1_activation::~pp1_activation ()
 409 {
 410   delete tok;
 411   if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
 412   for (map<string, pp_macrodecl*>::iterator it = params.begin();
 413        it != params.end(); it++)
 414     delete it->second;
 415 }
 416
 417 // Grab a token from the current input source (main file or macro body):
 418 const token*
 419 parser::next_pp1 ()
 420 {
 421   if (pp1_state.empty())
 422     return input.scan ();
 423
 424   // otherwise, we're inside a macro
 425   pp1_activation* act = pp1_state.back();
 426   unsigned& cursor = act->cursor;
 427   if (cursor < act->curr_macro->body.size())
 428     {
 429       token* t = new token(*act->curr_macro->body[cursor]);
 430       t->chain = new token(*act->tok); // mark chained token
 431       cursor++;
 432       return t;
 433     }
 434   else
 435     return 0; // reached end of macro body
 436 }
 437
 438 const token*
 439 parser::scan_pp1 (bool ignore_macros = false)
 440 {
 441   while (true)
 442     {
 443       const token* t = next_pp1 ();
 444       if (t == 0) // EOF or end of macro body
 445         {
 446           if (pp1_state.empty()) // actual EOF
 447             return 0;
 448
 449           // Exit macro and loop around to look for the next token.
 450           pp1_activation* act = pp1_state.back();
 451           pp1_state.pop_back(); delete act;
 452           continue;
 453         }
 454
 455       // macro definition
 456       // PR18462 don't catalog preprocessor-disabled macros
 457       if (t->type == tok_operator && t->content == "@define" && !ignore_macros)
 458         {
 459           if (!pp1_state.empty())
 460             throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t);
 461           delete t;
 462
 463           // handle macro definition
 464           // (1) consume macro signature
 465           t = input.scan();
 466           if (! (t && t->type == tok_identifier))
 467             throw PARSE_ERROR (_("expected identifier"), t);
 468           string name = t->content;
 469
 470           // check for redefinition of existing macro
 471           if (pp1_namespace.find(name) != pp1_namespace.end())
 472             {
 473               parse_error er (ERR_SRC, _F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
 474
 475               // Also point to pp1_namespace[name]->tok, the site of
 476               // the original definition:
 477               er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here",
 478                                              name.c_str()), pp1_namespace[name]->tok);
 479               throw er;
 480             }
 481
 482           // XXX: the above restriction was mostly necessary due to
 483           // wanting to leave open the possibility of
 484           // statically-scoped semantics in the future.
 485
 486           // XXX: this cascades into further parse errors as the
 487           // parser tries to parse the remaining definition... (e.g.
 488           // it can't tell that the macro body isn't a conditional,
 489           // that the uses of parameters aren't nonexistent
 490           // macros.....)
 491           if (name == "define")
 492             throw PARSE_ERROR (_("attempt to redefine '@define'"), t);
 493           if (input.atwords.count(name))
 494             session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
 495
 496           macrodecl* decl = (pp1_namespace[name] = new macrodecl);
 497           decl->tok = t;
 498
 499           // determine if the macro takes parameters
 500           bool saw_params = false;
 501           t = input.scan();
 502           if (t && t->type == tok_operator && t->content == "(")
 503             {
 504               saw_params = true;
 505               do
 506                 {
 507                   delete t;
 508
 509                   t = input.scan ();
 510                   if (! (t && t->type == tok_identifier))
 511                     throw PARSE_ERROR(_("expected identifier"), t);
 512                   decl->formal_args.push_back(t->content);
 513                   delete t;
 514
 515                   t = input.scan ();
 516                   if (t && t->type == tok_operator && t->content == ",")
 517                     {
 518                       continue;
 519                     }
 520                   else if (t && t->type == tok_operator && t->content == ")")
 521                     {
 522                       delete t;
 523                       t = input.scan();
 524                       break;
 525                     }
 526                   else
 527                     {
 528                       throw PARSE_ERROR (_("expected ',' or ')'"), t);
 529                     }
 530                 }
 531               while (true);
 532             }
 533
 534           // (2) identify & consume macro body
 535           if (! (t && t->type == tok_operator && t->content == "%("))
 536             {
 537               if (saw_params)
 538                 throw PARSE_ERROR (_("expected '%('"), t);
 539               else
 540                 throw PARSE_ERROR (_("expected '%(' or '('"), t);
 541             }
 542           delete t;
 543
 544           t = slurp_pp1_body (decl->body);
 545           if (!t)
 546             throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl->tok);
 547           delete t;
 548
 549           // Now loop around to look for a real token.
 550           continue;
 551         }
 552
 553       // (potential) macro invocation
 554       if (t->type == tok_operator && t->content[0] == '@')
 555         {
 556           const string& name = t->content.substr(1); // strip initial '@'
 557
 558           // check if name refers to a real parameter or macro
 559           macrodecl* decl;
 560           pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
 561           if (act && act->params.find(name) != act->params.end())
 562             decl = act->params[name];
 563           else if (!(act && act->curr_macro->context == ctx_library)
 564                    && pp1_namespace.find(name) != pp1_namespace.end())
 565             decl = pp1_namespace[name];
 566           else if (session.library_macros.find(name)
 567                    != session.library_macros.end())
 568             decl = session.library_macros[name];
 569           else // this is an ordinary @operator
 570             return t;
 571
 572           // handle macro invocation, taking ownership of t
 573           pp1_activation *new_act = new pp1_activation(t, decl);
 574           unsigned num_params = decl->formal_args.size();
 575
 576           // (1a) restore parameter invocation closure
 577           if (num_params == 0 && decl->is_closure())
 578             {
 579               // NB: decl->parent_act is always safe since the
 580               // parameter decl (if any) comes from an activation
 581               // record which deeper in the stack than new_act.
 582
 583               // decl is a macro parameter which must be evaluated in
 584               // the context of the original point of invocation:
 585               new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
 586               goto expand;
 587             }
 588
 589           // (1b) consume macro parameters (if any)
 590           if (num_params == 0)
 591             goto expand;
 592
 593           // for simplicity, we do not allow macro constructs here
 594           // -- if we did, we'd have to recursively call scan_pp1()
 595           t = next_pp1 ();
 596           if (! (t && t->type == tok_operator && t->content == "("))
 597             {
 598               delete new_act;
 599               throw PARSE_ERROR (_NF
 600                                     ("expected '(' in invocation of macro '@%s'"
 601                                      " taking %d parameter",
 602                                      "expected '(' in invocation of macro '@%s'"
 603                                      " taking %d parameters",
 604                                      num_params, name.c_str(), num_params), t);
 605             }
 606
 607           // XXX perhaps parse/count the full number of params,
 608           // so we can say "expected x, found y params" on error?
 609           for (unsigned i = 0; i < num_params; i++)
 610             {
 611               delete t;
 612
 613               // create parameter closure
 614               string param_name = decl->formal_args[i];
 615               pp_macrodecl* p = (new_act->params[param_name]
 616                                  = new pp_macrodecl);
 617               p->tok = new token(*new_act->tok);
 618               p->parent_act = act;
 619               // NB: *new_act->tok points to invocation, act is NULL at top level
 620
 621               t = slurp_pp1_param (p->body);
 622
 623               // check correct usage of ',' or ')'
 624               if (t == 0) // hit unexpected EOF or end of macro
 625                 {
 626                   // XXX could we pop the stack and continue parsing
 627                   // the invocation, allowing macros to construct new
 628                   // invocations in piecemeal fashion??
 629                   const token* orig_t = new token(*new_act->tok);
 630                   delete new_act;
 631                   throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t);
 632                 }
 633               if (t->type == tok_operator && t->content == ",")
 634                 {
 635                   if (i + 1 == num_params)
 636                     {
 637                       delete new_act;
 638                       throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
 639                     }
 640                 }
 641               else if (t->type == tok_operator && t->content == ")")
 642                 {
 643                   if (i + 1 != num_params)
 644                     {
 645                       delete new_act;
 646                       throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
 647                     }
 648                 }
 649               else
 650                 {
 651                   // XXX this is, incidentally, impossible
 652                   delete new_act;
 653                   throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t);
 654                 }
 655             }
 656
 657           delete t;
 658
 659           // (2) set up macro expansion
 660         expand:
 661           pp1_state.push_back (new_act);
 662
 663           // Now loop around to look for a real token.
 664           continue;
 665         }
 666
 667       // Otherwise, we have an ordinary token.
 668       return t;
 669     }
 670 }
 671
 672 // Consume a single macro invocation's parameters, heeding nesting
 673 // brackets and stopping on an unbalanced ')' or an unbracketed ','
 674 // (and returning the final separator token).
 675 const token*
 676 parser::slurp_pp1_param (vector<const token*>& param)
 677 {
 678   const token* t = 0;
 679   unsigned nesting = 0;
 680   do
 681     {
 682       t = next_pp1 ();
 683
 684       if (!t)
 685         break;
 686       // [ needed in case macro paramater is used as prefix for array-deref operation
 687       if (t->type == tok_operator && (t->content == "(" || t->content == "["))
 688         ++nesting;
 689       else if (nesting && t->type == tok_operator && (t->content == ")" || t->content == "]"))
 690         --nesting;
 691       else if (!nesting && t->type == tok_operator
 692                && (t->content == ")" || t->content == ","))
 693         break;
 694       param.push_back(t);
 695     }
 696   while (true);
 697   return t; // report ")" or "," or NULL
 698 }
 699
 700
 701 // Consume a macro declaration's body, heeding nested %( %) brackets.
 702 const token*
 703 parser::slurp_pp1_body (vector<const token*>& body)
 704 {
 705   const token* t = 0;
 706   unsigned nesting = 0;
 707   do
 708     {
 709       t = next_pp1 ();
 710
 711       if (!t)
 712         break;
 713       if (t->type == tok_operator && t->content == "%(")
 714         ++nesting;
 715       else if (nesting && t->type == tok_operator && t->content == "%)")
 716         --nesting;
 717       else if (!nesting && t->type == tok_operator && t->content == "%)")
 718         break;
 719       body.push_back(t);
 720     }
 721   while (true);
 722   return t; // report final "%)" or NULL
 723 }
 724
 725 // Used for parsing .stpm files.
 726 stapfile*
 727 parser::parse_library_macros ()
 728 {
 729   stapfile* f = new stapfile;
 730   input.set_current_file (f);
 731
 732   try
 733     {
 734       const token* t = scan_pp ();
 735
 736       // Currently we only take objection to macro invocations if they
 737       // produce a non-whitespace token after being expanded.
 738
 739       // XXX should we prevent macro invocations even if they expand to empty??
 740
 741       if (t != 0)
 742         throw PARSE_ERROR (_F("unexpected token in library macro file '%s'", input_name.c_str()), t);
 743
 744       // We need to first check whether *any* of the macros are duplicates,
 745       // then commit to including the entire file in the global namespace
 746       // (or not). Yuck.
 747       for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
 748            it != pp1_namespace.end(); it++)
 749         {
 750           string name = it->first;
 751
 752           if (session.library_macros.find(name) != session.library_macros.end())
 753             {
 754               parse_error er(ERR_SRC, _F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
 755               er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
 756               print_error (er);
 757
 758               delete er.chain;
 759               delete f;
 760               return 0;
 761             }
 762         }
 763
 764     }
 765   catch (const parse_error& pe)
 766     {
 767       print_error (pe, errs_as_warnings);
 768       delete f;
 769       return 0;
 770     }
 771
 772   // If no errors, include the entire file.  Note how this is outside
 773   // of the try-catch block -- no errors possible.
 774   for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
 775        it != pp1_namespace.end(); it++)
 776     {
 777       string name = it->first;
 778
 779       session.library_macros[name] = it->second;
 780       session.library_macros[name]->context = ctx_library;
 781     }
 782
 783   return f;
 784 }
 785
 786 // Second pass - preprocessor conditional expansion.
 787 //
 788 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
 789 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
 790 //                 or: arch COMPARISON-OP "arch-string"
 791 //                 or: systemtap_v COMPARISON-OP "version-string"
 792 //                 or: systemtap_privilege COMPARISON-OP "privilege-string"
 793 //                 or: CONFIG_foo COMPARISON-OP "config-string"
 794 //                 or: CONFIG_foo COMPARISON-OP number
 795 //                 or: CONFIG_foo COMPARISON-OP CONFIG_bar
 796 //                 or: "string1" COMPARISON-OP "string2"
 797 //                 or: number1 COMPARISON-OP number2
 798 // The %: ELSE-TOKENS part is optional.
 799 //
 800 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
 801 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
 802 // e.g. %( CONFIG_foo %? "foo" %: "baz" %)
 803 //
 804 // Up to an entire %( ... %) expression is processed by a single call
 805 // to this function.  Tokens included by any nested conditions are
 806 // enqueued in a private vector.
 807
 808 bool eval_pp_conditional (systemtap_session& s,
 809                           const token* l, const token* op, const token* r)
 810 {
 811   if (l->type == tok_identifier && (l->content == "kernel_v" ||
 812                                     l->content == "kernel_vr" ||
 813                                     l->content == "systemtap_v"))
 814     {
 815       if (! (r->type == tok_string))
 816         throw PARSE_ERROR (_("expected string literal"), r);
 817
 818       string target_kernel_vr = s.kernel_release;
 819       string target_kernel_v = s.kernel_base_release;
 820       string target;
 821
 822       if (l->content == "kernel_v") target = target_kernel_v;
 823       else if (l->content == "kernel_vr") target = target_kernel_vr;
 824       else if (l->content == "systemtap_v") target = s.compatible;
 825       else assert (0);
 826
 827       string query = r->content;
 828       bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
 829
 830       // collect acceptable strverscmp results.
 831       int rvc_ok1, rvc_ok2;
 832       bool wc_ok = false;
 833       if (op->type == tok_operator && op->content == "<=")
 834         { rvc_ok1 = -1; rvc_ok2 = 0; }
 835       else if (op->type == tok_operator && op->content == ">=")
 836         { rvc_ok1 = 1; rvc_ok2 = 0; }
 837       else if (op->type == tok_operator && op->content == "<")
 838         { rvc_ok1 = -1; rvc_ok2 = -1; }
 839       else if (op->type == tok_operator && op->content == ">")
 840         { rvc_ok1 = 1; rvc_ok2 = 1; }
 841       else if (op->type == tok_operator && op->content == "==")
 842         { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
 843       else if (op->type == tok_operator && op->content == "!=")
 844         { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
 845       else
 846         throw PARSE_ERROR (_("expected comparison operator"), op);
 847
 848       if ((!wc_ok) && rhs_wildcard)
 849         throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op);
 850
 851       if (rhs_wildcard)
 852         {
 853           int rvc_result = fnmatch (query.c_str(), target.c_str(),
 854                                     FNM_NOESCAPE); // spooky
 855           bool badness = (rvc_result == 0) ^ (op->content == "==");
 856           return !badness;
 857         }
 858       else
 859         {
 860           int rvc_result = strverscmp (target.c_str(), query.c_str());
 861           // normalize rvc_result
 862           if (rvc_result < 0) rvc_result = -1;
 863           if (rvc_result > 0) rvc_result = 1;
 864           return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
 865         }
 866     }
 867   else if (l->type == tok_identifier && l->content == "systemtap_privilege")
 868     {
 869       string target_privilege =
 870         pr_contains(s.privilege, pr_stapdev) ? "stapdev"
 871         : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
 872         : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
 873         : "none"; /* should be impossible -- s.privilege always one of above */
 874       assert(target_privilege != "none");
 875
 876       if (! (r->type == tok_string))
 877         throw PARSE_ERROR (_("expected string literal"), r);
 878       string query_privilege = r->content;
 879
 880       bool nomatch = (target_privilege != query_privilege);
 881
 882       bool result;
 883       if (op->type == tok_operator && op->content == "==")
 884         result = !nomatch;
 885       else if (op->type == tok_operator && op->content == "!=")
 886         result = nomatch;
 887       else
 888         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 889       /* XXX perhaps allow <= >= and similar comparisons */
 890
 891       return result;
 892     }
 893   else if (l->type == tok_identifier && l->content == "guru_mode")
 894     {
 895       if (! (r->type == tok_number))
 896         throw PARSE_ERROR (_("expected number"), r);
 897       int64_t lhs = (int64_t) s.guru_mode;
 898       int64_t rhs = lex_cast<int64_t>(r->content);
 899       if (!((rhs == 0)||(rhs == 1)))
 900         throw PARSE_ERROR (_("expected 0 or 1"), op);
 901       if (!((op->type == tok_operator && op->content == "==") ||
 902             (op->type == tok_operator && op->content == "!=")))
 903         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 904
 905       return eval_comparison (lhs, op, rhs);
 906     }
 907   else if (l->type == tok_identifier && l->content == "arch")
 908     {
 909       string target_architecture = s.architecture;
 910       if (! (r->type == tok_string))
 911         throw PARSE_ERROR (_("expected string literal"), r);
 912       string query_architecture = r->content;
 913
 914       int nomatch = fnmatch (query_architecture.c_str(),
 915                              target_architecture.c_str(),
 916                              FNM_NOESCAPE); // still spooky
 917
 918       bool result;
 919       if (op->type == tok_operator && op->content == "==")
 920         result = !nomatch;
 921       else if (op->type == tok_operator && op->content == "!=")
 922         result = nomatch;
 923       else
 924         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 925
 926       return result;
 927     }
 928   else if (l->type == tok_identifier && l->content == "runtime")
 929     {
 930       if (! (r->type == tok_string))
 931         throw PARSE_ERROR (_("expected string literal"), r);
 932
 933       string query_runtime = r->content;
 934       string target_runtime;
 935
 936       target_runtime = (s.runtime_mode == systemtap_session::dyninst_runtime
 937                         ? "dyninst" : "kernel");
 938       int nomatch = fnmatch (query_runtime.c_str(),
 939                              target_runtime.c_str(),
 940                              FNM_NOESCAPE); // still spooky
 941
 942       bool result;
 943       if (op->type == tok_operator && op->content == "==")
 944         result = !nomatch;
 945       else if (op->type == tok_operator && op->content == "!=")
 946         result = nomatch;
 947       else
 948         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 949
 950       return result;
 951     }
 952   else if (l->type == tok_identifier && l->content.starts_with("CONFIG_"))
 953     {
 954       if (r->type == tok_string)
 955         {
 956           string lhs = s.kernel_config[l->content]; // may be empty
 957           string rhs = r->content;
 958
 959           int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
 960
 961           bool result;
 962           if (op->type == tok_operator && op->content == "==")
 963             result = !nomatch;
 964           else if (op->type == tok_operator && op->content == "!=")
 965             result = nomatch;
 966           else
 967             throw PARSE_ERROR (_("expected '==' or '!='"), op);
 968
 969           return result;
 970         }
 971       else if (r->type == tok_number)
 972         {
 973           const string& lhs_string = s.kernel_config[l->content];
 974           const char* startp = lhs_string.c_str ();
 975           char* endp = (char*) startp;
 976           errno = 0;
 977           int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
 978           if (errno == ERANGE || errno == EINVAL || *endp != '\0')
 979             throw PARSE_ERROR ("Config option value not a number", l);
 980
 981           int64_t rhs = lex_cast<int64_t>(r->content);
 982           return eval_comparison (lhs, op, rhs);
 983         }
 984       else if (r->type == tok_identifier
 985                && r->content.starts_with( "CONFIG_"))
 986         {
 987           // First try to convert both to numbers,
 988           // otherwise threat both as strings.
 989           const string& lhs_string = s.kernel_config[l->content];
 990           const string& rhs_string = s.kernel_config[r->content];
 991           const char* startp = lhs_string.c_str ();
 992           char* endp = (char*) startp;
 993           errno = 0;
 994           int64_t val = (int64_t) strtoll (startp, & endp, 0);
 995           if (errno != ERANGE && errno != EINVAL && *endp == '\0')
 996             {
 997               int64_t lhs = val;
 998               startp = rhs_string.c_str ();
 999               endp = (char*) startp;
1000               errno = 0;
1001               int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
1002               if (errno != ERANGE && errno != EINVAL && *endp == '\0')
1003                 return eval_comparison (lhs, op, rhs);
1004             }
1005
1006           return eval_comparison (lhs_string, op, rhs_string);
1007         }
1008       else
1009         throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r);
1010     }
1011   else if (l->type == tok_string && r->type == tok_string)
1012     {
1013       string lhs = l->content;
1014       string rhs = r->content;
1015       return eval_comparison (lhs, op, rhs);
1016       // NB: no wildcarding option here
1017     }
1018   else if (l->type == tok_number && r->type == tok_number)
1019     {
1020       int64_t lhs = lex_cast<int64_t>(l->content);
1021       int64_t rhs = lex_cast<int64_t>(r->content);
1022       return eval_comparison (lhs, op, rhs);
1023       // NB: no wildcarding option here
1024     }
1025   else if (l->type == tok_string && r->type == tok_number
1026             && op->type == tok_operator)
1027     throw PARSE_ERROR (_("expected string literal as right value"), r);
1028   else if (l->type == tok_number && r->type == tok_string
1029             && op->type == tok_operator)
1030     throw PARSE_ERROR (_("expected number literal as right value"), r);
1031
1032   else
1033     throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
1034                          "             'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1035                          "             comparison between strings or integers"), l);
1036 }
1037
1038
1039 // Only tokens corresponding to the TRUE statement must be expanded
1040 const token*
1041 parser::scan_pp ()
1042 {
1043   while (true)
1044     {
1045       pp_state_t pp = PP_NONE;
1046       if (!pp_state.empty())
1047         pp = pp_state.back().second;
1048
1049       const token* t = 0;
1050       if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1051         t = skip_pp ();
1052       else
1053         t = scan_pp1 ();
1054
1055       if (t == 0) // EOF
1056         {
1057           if (pp != PP_NONE)
1058             {
1059               t = pp_state.back().first;
1060               pp_state.pop_back(); // so skip_some doesn't keep trying to close this
1061               //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
1062               throw PARSE_ERROR (_("incomplete conditional at end of file"), t);
1063             }
1064           return t;
1065         }
1066
1067       // misplaced preprocessor "then"
1068       if (t->type == tok_operator && t->content == "%?")
1069         throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1070
1071       // preprocessor "else"
1072       if (t->type == tok_operator && t->content == "%:")
1073         {
1074           if (pp == PP_NONE)
1075             throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1076           if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
1077             throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t);
1078           // XXX: here and elsewhere, error cascades might be avoided
1079           // by dropping tokens until we reach the closing %)
1080
1081           pp_state.back().second = (pp == PP_KEEP_THEN) ?
1082                                    PP_SKIP_ELSE : PP_KEEP_ELSE;
1083           delete t;
1084           continue;
1085         }
1086
1087       // preprocessor close
1088       if (t->type == tok_operator && t->content == "%)")
1089         {
1090           if (pp == PP_NONE)
1091             throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1092           delete pp_state.back().first;
1093           delete t; //this is the closing bracket
1094           pp_state.pop_back();
1095           continue;
1096         }
1097
1098       if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1099         return t;
1100
1101       // We have a %( - it's time to throw a preprocessing party!
1102
1103       bool result = false;
1104       bool and_result = true;
1105       const token *n = NULL;
1106       do {
1107         const token *l, *op, *r;
1108         l = scan_pp1 ();
1109         op = scan_pp1 ();
1110         r = scan_pp1 ();
1111         if (l == 0 || op == 0 || r == 0)
1112           throw PARSE_ERROR (_("incomplete condition after '%('"), t);
1113         // NB: consider generalizing to consume all tokens until %?, and
1114         // passing that as a vector to an evaluator.
1115
1116         // Do not evaluate the condition if we haven't expanded everything.
1117         // This may occur when having several recursive conditionals.
1118         and_result &= eval_pp_conditional (session, l, op, r);
1119         if(l->content=="systemtap_v")
1120           systemtap_v_seen=r;
1121
1122         else
1123           delete r;
1124
1125         delete l;
1126         delete op;
1127         delete n;
1128
1129         n = scan_pp1 ();
1130         if (n && n->type == tok_operator && n->content == "&&")
1131           continue;
1132         result |= and_result;
1133         and_result = true;
1134         if (! (n && n->type == tok_operator && n->content == "||"))
1135           break;
1136       } while (true);
1137
1138       /*
1139       clog << "PP eval (" << *t << ") == " << result << endl;
1140       */
1141
1142       const token *m = n;
1143       if (! (m && m->type == tok_operator && m->content == "%?"))
1144         throw PARSE_ERROR (_("expected '%?' marker for conditional"), t);
1145       delete m; // "%?"
1146
1147       pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1148       pp_state.push_back (make_pair (t, pp));
1149
1150       // Now loop around to look for a real token.
1151     }
1152 }
1153
1154
1155 // Skip over tokens and any errors, heeding
1156 // only nested preprocessor starts and ends.
1157 const token*
1158 parser::skip_pp ()
1159 {
1160   const token* t = 0;
1161   unsigned nesting = 0;
1162   do
1163     {
1164       try
1165         {
1166           t = scan_pp1 (true);
1167         }
1168       catch (const parse_error &e)
1169         {
1170           continue;
1171         }
1172       if (!t)
1173         break;
1174       if (t->type == tok_operator && t->content == "%(")
1175         ++nesting;
1176       else if (nesting && t->type == tok_operator && t->content == "%)")
1177         --nesting;
1178       else if (!nesting && t->type == tok_operator &&
1179                (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1180         break;
1181       delete t;
1182     }
1183   while (true);
1184   return t;
1185 }
1186
1187
1188 const token*
1189 parser::next ()
1190 {
1191   if (! next_t)
1192     next_t = scan_pp ();
1193   if (! next_t)
1194     throw PARSE_ERROR (_("unexpected end-of-file"));
1195
1196   last_t = next_t;
1197   // advance by zeroing next_t
1198   next_t = 0;
1199   return last_t;
1200 }
1201
1202
1203 const token*
1204 parser::peek ()
1205 {
1206   if (! next_t)
1207     next_t = scan_pp ();
1208
1209   // don't advance by zeroing next_t
1210   last_t = next_t;
1211   return next_t;
1212 }
1213
1214
1215 void
1216 parser::swallow ()
1217 {
1218   // can only swallow something last peeked or nexted token.
1219   assert (last_t != 0);
1220   delete last_t;
1221   // advance by zeroing next_t
1222   last_t = next_t = 0;
1223 }
1224
1225
1226 static inline bool
1227 tok_is(token const * t, token_type tt, string const & expected)
1228 {
1229   return t && t->type == tt && t->content == expected;
1230 }
1231
1232
1233 void
1234 parser::expect_known (token_type tt, string const & expected)
1235 {
1236   const token *t = next();
1237   if (! (t && t->type == tt && t->content == expected))
1238     throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1239   swallow (); // We are done with it, content was copied.
1240 }
1241
1242
1243 void
1244 parser::expect_unknown (token_type tt, interned_string & target)
1245 {
1246   const token *t = next();
1247   if (!(t && t->type == tt))
1248     throw PARSE_ERROR (_("expected ") + tt2str(tt));
1249   target = t->content;
1250   swallow (); // We are done with it, content was copied.
1251 }
1252
1253
1254 void
1255 parser::expect_unknown2 (token_type tt1, token_type tt2, interned_string & target)
1256 {
1257   const token *t = next();
1258   if (!(t && (t->type == tt1 || t->type == tt2)))
1259     throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
1260   target = t->content;
1261   swallow (); // We are done with it, content was copied.
1262 }
1263
1264
1265 void
1266 parser::expect_op (string const & expected)
1267 {
1268   expect_known (tok_operator, expected);
1269 }
1270
1271
1272 void
1273 parser::expect_kw (string const & expected)
1274 {
1275   expect_known (tok_keyword, expected);
1276 }
1277
1278 const token*
1279 parser::expect_kw_token (string const & expected)
1280 {
1281   const token *t = next();
1282   if (! (t && t->type == tok_keyword && t->content == expected))
1283     throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1284   return t;
1285 }
1286
1287 void
1288 parser::expect_number (int64_t & value)
1289 {
1290   bool neg = false;
1291   const token *t = next();
1292   if (t->type == tok_operator && t->content == "-")
1293     {
1294       neg = true;
1295       swallow ();
1296       t = next ();
1297     }
1298   if (!(t && t->type == tok_number))
1299     throw PARSE_ERROR (_("expected number"));
1300
1301   const string& s = t->content;
1302   const char* startp = s.c_str ();
1303   char* endp = (char*) startp;
1304
1305   // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1306   // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1307   // since the lexer only gives us positive digit strings, but we'll
1308   // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1309   errno = 0;
1310   value = (int64_t) strtoull (startp, & endp, 0);
1311   if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1312       || (neg && (unsigned long long) value > 9223372036854775808ULL)
1313       || (unsigned long long) value > 18446744073709551615ULL
1314       || value < -9223372036854775807LL-1)
1315     throw PARSE_ERROR (_("number invalid or out of range"));
1316
1317   if (neg)
1318     value = -value;
1319
1320   swallow (); // We are done with it, content was parsed and copied into value.
1321 }
1322
1323
1324 const token*
1325 parser::expect_ident_or_atword (interned_string & target)
1326 {
1327   const token *t = next();
1328
1329   // accept identifiers and operators beginning in '@':
1330   if (!t || (t->type != tok_identifier
1331              && (t->type != tok_operator || t->content[0] != '@')))
1332     // XXX currently this is only called from parse_hist_op_or_bare_name(),
1333     // so the message is accurate, but keep an eye out in the future:
1334     throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
1335
1336   target = t->content;
1337   return t;
1338 }
1339
1340
1341 void
1342 parser::expect_ident_or_keyword (interned_string & target)
1343 {
1344   expect_unknown2 (tok_identifier, tok_keyword, target);
1345 }
1346
1347
1348 bool
1349 parser::peek_op (string const & op)
1350 {
1351   return tok_is (peek(), tok_operator, op);
1352 }
1353
1354
1355 bool
1356 parser::peek_kw (string const & kw)
1357 {
1358   return tok_is (peek(), tok_identifier, kw);
1359 }
1360
1361
1362
1363 lexer::lexer (istream& input, const string& in, systemtap_session& s, bool cc):
1364   ate_comment(false), ate_whitespace(false), saw_tokens(false), check_compatible(cc),
1365   input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1366   cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1367   cursor_column (1), session(s), current_file (0), current_token_chain (0)
1368 {
1369   getline(input, input_contents, '\0');
1370
1371   input_pointer = input_contents.data();
1372   input_end = input_contents.data() + input_contents.size();
1373
1374   if (keywords.empty())
1375     {
1376       // NB: adding new keywords is highly disruptive to the language,
1377       // in particular to existing scripts that could be suddenly
1378       // broken.  If done at all, it has to be s.compatible-sensitive,
1379       // and broadly advertised.
1380       keywords.insert("probe");
1381       keywords.insert("global");
1382       if (has_version("3.0"))
1383         keywords.insert("private");
1384       keywords.insert("function");
1385       keywords.insert("if");
1386       keywords.insert("else");
1387       keywords.insert("for");
1388       keywords.insert("foreach");
1389       keywords.insert("in");
1390       keywords.insert("limit");
1391       keywords.insert("return");
1392       keywords.insert("delete");
1393       keywords.insert("while");
1394       keywords.insert("break");
1395       keywords.insert("continue");
1396       keywords.insert("next");
1397       keywords.insert("string");
1398       keywords.insert("long");
1399       keywords.insert("try");
1400       keywords.insert("catch");
1401     }
1402
1403   if (atwords.empty())
1404     {
1405       // NB: adding new @words is mildly disruptive to existing
1406       // scripts that define macros with the same name, but not
1407       // really. The user will merely receive a warning that they are
1408       // redefining an existing operator.
1409
1410       // These are inserted without the actual '@', so we can directly check
1411       // proposed macro names without building a string with that prefix.
1412       atwords.insert("cast");
1413       atwords.insert("defined");
1414       atwords.insert("entry");
1415       atwords.insert("perf");
1416       atwords.insert("var");
1417       atwords.insert("avg");
1418       atwords.insert("count");
1419       atwords.insert("sum");
1420       atwords.insert("min");
1421       atwords.insert("max");
1422       atwords.insert("hist_linear");
1423       atwords.insert("hist_log");
1424     }
1425 }
1426
1427 unordered_set<string> lexer::atwords;
1428
1429 void
1430 lexer::set_current_file (stapfile* f)
1431 {
1432   current_file = f;
1433   if (f)
1434     {
1435       f->file_contents = input_contents;
1436       f->name = input_name;
1437     }
1438 }
1439
1440 void
1441 lexer::set_current_token_chain (const token* tok)
1442 {
1443   current_token_chain = tok;
1444 }
1445
1446 int
1447 lexer::input_peek (unsigned n)
1448 {
1449   if (input_pointer + n >= input_end)
1450     return -1; // EOF
1451   return (unsigned char)*(input_pointer + n);
1452 }
1453
1454
1455 bool
1456 lexer::has_version (const char* v) const
1457 {
1458   return check_compatible
1459     ? strverscmp(session.compatible.c_str(), v) >= 0
1460     : true;
1461 }
1462
1463 int
1464 lexer::input_get ()
1465 {
1466   int c = input_peek();
1467   if (c < 0) return c; // EOF
1468
1469   ++input_pointer;
1470
1471   if (cursor_suspend_count)
1472     {
1473       // Track effect of input_put: preserve previous cursor/line_column
1474       // until all of its characters are consumed.
1475       if (--cursor_suspend_count == 0)
1476         {
1477           cursor_line = cursor_suspend_line;
1478           cursor_column = cursor_suspend_column;
1479         }
1480     }
1481   else
1482     {
1483       // update source cursor
1484       if (c == '\n')
1485         {
1486           cursor_line ++;
1487           cursor_column = 1;
1488         }
1489       else
1490         cursor_column ++;
1491     }
1492
1493   // clog << "[" << (char)c << "]";
1494   return c;
1495 }
1496
1497
1498 void
1499 lexer::input_put (const string& chars, const token* t)
1500 {
1501   size_t pos = input_pointer - input_contents.data();
1502   // clog << "[put:" << chars << " @" << pos << "]";
1503   input_contents.insert (pos, chars);
1504   cursor_suspend_count += chars.size();
1505   cursor_suspend_line = cursor_line;
1506   cursor_suspend_column = cursor_column;
1507   cursor_line = t->location.line;
1508   cursor_column = t->location.column;
1509   input_pointer = input_contents.data() + pos;
1510   input_end = input_contents.data() + input_contents.size();
1511 }
1512
1513
1514 token*
1515 lexer::scan ()
1516 {
1517   ate_comment = false; // reset for each new token
1518   ate_whitespace = false; // reset for each new token
1519
1520   // XXX be very sure to restore old_saw_tokens if we return without a token:
1521   bool old_saw_tokens = saw_tokens;
1522   saw_tokens = true;
1523
1524   token* n = new token;
1525   string token_str; // accumulate here instead of by incremental interning
1526   n->location.file = current_file;
1527   n->chain = current_token_chain;
1528
1529 skip:
1530   bool suspended = (cursor_suspend_count > 0);
1531   n->location.line = cursor_line;
1532   n->location.column = cursor_column;
1533
1534   int c = input_get();
1535   // clog << "{" << (char)c << (char)c2 << "}";
1536   if (c < 0)
1537     {
1538       delete n;
1539       saw_tokens = old_saw_tokens;
1540       return 0;
1541     }
1542
1543   if (isspace (c))
1544     {
1545       ate_whitespace = true;
1546       goto skip;
1547     }
1548
1549   int c2 = input_peek ();
1550
1551   // Paste command line arguments as character streams into
1552   // the beginning of a token.  $1..$999 go through as raw
1553   // characters; @1..@999 are quoted/escaped as strings.
1554   // $# and @# expand to the number of arguments, similarly
1555   // raw or quoted.
1556   if ((c == '$' || c == '@') && (c2 == '#'))
1557     {
1558       token_str.push_back (c);
1559       token_str.push_back (c2);
1560       input_get(); // swallow '#'
1561
1562       if (suspended)
1563         {
1564           n->make_junk(tok_junk_nested_arg);
1565           return n;
1566         }
1567       size_t num_args = session.args.size ();
1568       input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1569       token_str.clear();
1570       goto skip;
1571     }
1572   else if ((c == '$' || c == '@') && (isdigit (c2)))
1573     {
1574       unsigned idx = 0;
1575       token_str.push_back (c);
1576       do
1577         {
1578           input_get ();
1579           token_str.push_back (c2);
1580           idx = (idx * 10) + (c2 - '0');
1581           c2 = input_peek ();
1582         } while (c2 > 0 &&
1583                  isdigit (c2) &&
1584                  idx <= session.args.size()); // prevent overflow
1585       if (suspended)
1586         {
1587           n->make_junk(tok_junk_nested_arg);
1588           return n;
1589         }
1590       if (idx == 0 ||
1591           idx-1 >= session.args.size())
1592         {
1593           n->make_junk(tok_junk_invalid_arg);
1594           return n;
1595         }
1596       const string& arg = session.args[idx-1];
1597       input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1598       token_str.clear();
1599       goto skip;
1600     }
1601
1602   else if (isalpha (c) || c == '$' || c == '@' || c == '_')
1603     {
1604       token_str = (char) c;
1605       while (isalnum (c2) || c2 == '_' || c2 == '$')
1606         {
1607           input_get ();
1608           token_str.push_back (c2);
1609           c2 = input_peek ();
1610         }
1611       n->content = token_str;
1612
1613       if (n->content[0] == '@')
1614         // makes it easier to detect illegal use of @words:
1615         n->type = tok_operator;
1616       else if (keywords.count(n->content))
1617         n->type = tok_keyword;
1618       else
1619         n->type = tok_identifier;
1620
1621       return n;
1622     }
1623
1624   else if (isdigit (c)) // positive literal
1625     {
1626       n->type = tok_number;
1627       token_str = (char) c;
1628
1629       while (isalnum (c2))
1630         {
1631           // NB: isalnum is very permissive.  We rely on strtol, called in
1632           // parser::parse_literal below, to confirm that the number string
1633           // is correctly formatted and in range.
1634
1635           input_get ();
1636           token_str.push_back (c2);
1637           c2 = input_peek ();
1638         }
1639
1640       n->content = token_str;
1641       return n;
1642     }
1643
1644   else if (c == '\"')
1645     {
1646       n->type = tok_string;
1647       while (1)
1648         {
1649           c = input_get ();
1650
1651           if (c < 0 || c == '\n')
1652             {
1653               n->make_junk(tok_junk_unclosed_quote);
1654               return n;
1655             }
1656           if (c == '\"') // closing double-quotes
1657             break;
1658           else if (c == '\\') // see also input_put
1659             {
1660               c = input_get();
1661               switch (c)
1662                 {
1663                 case 'x':
1664                   if (!has_version("2.3"))
1665                     goto the_default;
1666                   /* FALLTHROUGH */
1667                 case 'a':
1668                 case 'b':
1669                 case 't':
1670                 case 'n':
1671                 case 'v':
1672                 case 'f':
1673                 case 'r':
1674                 case '0' ... '7': // NB: need only match the first digit
1675                 case '\\':
1676                   // Pass these escapes through to the string value
1677                   // being parsed; it will be emitted into a C literal.
1678                   // XXX: PR13371: perhaps we should evaluate them here
1679                   // (and re-quote them during translate.cxx emission).
1680                   token_str.push_back ('\\');
1681
1682                   // fall through
1683                 default: the_default:
1684                   token_str.push_back (c);
1685                   break;
1686                 }
1687             }
1688           else
1689             token_str.push_back (c);
1690         }
1691       n->content = token_str;
1692       return n;
1693     }
1694
1695   else if (ispunct (c))
1696     {
1697       int c3 = input_peek (1);
1698
1699       // NB: if we were to recognize negative numeric literals here,
1700       // we'd introduce another grammar ambiguity:
1701       // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1702       // instead of tok_number(1) tok_operator('-') tok_number(1)
1703
1704       if (c == '#') // shell comment
1705         {
1706           unsigned this_line = cursor_line;
1707           do { c = input_get (); }
1708           while (c >= 0 && cursor_line == this_line);
1709           ate_comment = true;
1710           ate_whitespace = true;
1711           goto skip;
1712         }
1713       else if ((c == '/' && c2 == '/')) // C++ comment
1714         {
1715           unsigned this_line = cursor_line;
1716           do { c = input_get (); }
1717           while (c >= 0 && cursor_line == this_line);
1718           ate_comment = true;
1719           ate_whitespace = true;
1720           goto skip;
1721         }
1722       else if (c == '/' && c2 == '*') // C comment
1723         {
1724           (void) input_get (); // swallow '*' already in c2
1725           c = input_get ();
1726           c2 = input_get ();
1727           while (c2 >= 0)
1728             {
1729               if (c == '*' && c2 == '/')
1730                 break;
1731               c = c2;
1732               c2 = input_get ();
1733             }
1734           ate_comment = true;
1735           ate_whitespace = true;
1736           goto skip;
1737         }
1738       else if (c == '%' && c2 == '{') // embedded code
1739         {
1740           n->type = tok_embedded;
1741           (void) input_get (); // swallow '{' already in c2
1742           c = input_get ();
1743           c2 = input_get ();
1744           while (c2 >= 0)
1745             {
1746               if (c == '%' && c2 == '}')
1747                 {
1748                   n->content = token_str;
1749                   return n;
1750                 }
1751               if (c == '}' && c2 == '%') // possible typo
1752                 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
1753               token_str.push_back (c);
1754               c = c2;
1755               c2 = input_get();
1756             }
1757
1758             n->make_junk(tok_junk_unclosed_embedded);
1759             return n;
1760         }
1761
1762       // We're committed to recognizing at least the first character
1763       // as an operator.
1764       n->type = tok_operator;
1765       token_str = (char) c;
1766
1767       // match all valid operators, in decreasing size order
1768       if ((c == '<' && c2 == '<' && c3 == '<') ||
1769           (c == '<' && c2 == '<' && c3 == '=') ||
1770           (c == '>' && c2 == '>' && c3 == '='))
1771         {
1772           token_str.push_back (c2);
1773           token_str.push_back (c3);
1774           input_get (); // c2
1775           input_get (); // c3
1776         }
1777       else if ((c == '=' && c2 == '=') ||
1778                (c == '!' && c2 == '=') ||
1779                (c == '<' && c2 == '=') ||
1780                (c == '>' && c2 == '=') ||
1781                (c == '=' && c2 == '~') ||
1782                (c == '!' && c2 == '~') ||
1783                (c == '+' && c2 == '=') ||
1784                (c == '-' && c2 == '=') ||
1785                (c == '*' && c2 == '=') ||
1786                (c == '/' && c2 == '=') ||
1787                (c == '%' && c2 == '=') ||
1788                (c == '&' && c2 == '=') ||
1789                (c == '^' && c2 == '=') ||
1790                (c == '|' && c2 == '=') ||
1791                (c == '.' && c2 == '=') ||
1792                (c == '&' && c2 == '&') ||
1793                (c == '|' && c2 == '|') ||
1794                (c == '+' && c2 == '+') ||
1795                (c == '-' && c2 == '-') ||
1796                (c == '-' && c2 == '>') ||
1797                (c == '<' && c2 == '<') ||
1798                (c == '>' && c2 == '>') ||
1799                // preprocessor tokens
1800                (c == '%' && c2 == '(') ||
1801                (c == '%' && c2 == '?') ||
1802                (c == '%' && c2 == ':') ||
1803                (c == '%' && c2 == ')'))
1804         {
1805           token_str.push_back (c2);
1806           input_get (); // swallow other character
1807         }
1808
1809       n->content = token_str;
1810       return n;
1811     }
1812
1813   else
1814     {
1815       n->type = tok_junk;
1816       ostringstream s;
1817       s << "\\x" << hex << setw(2) << setfill('0') << c;
1818       n->content = s.str();
1819       // signal parser to emit "expected X, found junk" type error
1820       n->make_junk(tok_junk_unknown);
1821       return n;
1822     }
1823 }
1824
1825 // ------------------------------------------------------------------------
1826
1827 void
1828 token::make_junk (token_junk_type junk)
1829 {
1830   type = tok_junk;
1831   junk_type = junk;
1832 }
1833
1834 // ------------------------------------------------------------------------
1835
1836 string
1837 token::junk_message(systemtap_session& session) const
1838 {
1839   switch (junk_type)
1840     {
1841     case tok_junk_nested_arg:
1842       return _("invalid nested substitution of command line arguments");
1843
1844     case tok_junk_invalid_arg:
1845       return _F("command line argument out of range [1-%lu]",
1846                 (unsigned long) session.args.size());
1847
1848     case tok_junk_unclosed_quote:
1849       return _("Could not find matching closing quote");
1850
1851     case tok_junk_unclosed_embedded:
1852       return _("Could not find matching '%}' to close embedded function block");
1853
1854     default:
1855       return _("unknown junk token");
1856     }
1857 }
1858
1859 // ------------------------------------------------------------------------
1860
1861 stapfile*
1862 parser::parse ()
1863 {
1864   stapfile* f = new stapfile;
1865   input.set_current_file (f);
1866
1867   bool empty = true;
1868
1869   while (1)
1870     {
1871       try
1872         {
1873           systemtap_v_seen = 0;
1874           const token* t = peek ();
1875           if (! t) // nice clean EOF, modulo any preprocessing that occurred
1876             break;
1877
1878           empty = false;
1879           if (t->type == tok_keyword && t->content == "probe")
1880             {
1881               context = con_probe;
1882               parse_probe (f->probes, f->aliases);
1883             }
1884           else if (t->type == tok_keyword && t->content == "private")
1885             {
1886               context = con_unknown;
1887               parse_private (f->globals, f->probes, f->name, f->functions);
1888             }
1889           else if (t->type == tok_keyword && t->content == "global")
1890             {
1891               context = con_global;
1892               parse_global (f->globals, f->probes, f->name);
1893             }
1894           else if (t->type == tok_keyword && t->content == "function")
1895             {
1896               context = con_function;
1897               parse_functiondecl (f->functions, f->name);
1898             }
1899           else if (t->type == tok_embedded)
1900             {
1901               context = con_embedded;
1902               f->embeds.push_back (parse_embeddedcode ());
1903             }
1904           else
1905             {
1906               context = con_unknown;
1907               throw PARSE_ERROR (_("expected 'probe', 'global', 'private', 'function', or '%{'"));
1908             }
1909         }
1910       catch (parse_error& pe)
1911         {
1912           print_error (pe, errs_as_warnings);
1913
1914           // XXX: do we want tok_junk to be able to force skip_some behaviour?
1915           if (pe.skip_some) // for recovery
1916             // Quietly swallow all tokens until the next keyword we can start parsing from.
1917             while (1)
1918               try
1919                 {
1920                   {
1921                     const token* t = peek ();
1922                     if (! t)
1923                       break;
1924                     if (t->type == tok_keyword && t->content == "probe") break;
1925                     else if (t->type == tok_keyword && t->content == "private") break;
1926                     else if (t->type == tok_keyword && t->content == "global") break;
1927                     else if (t->type == tok_keyword && t->content == "function") break;
1928                     else if (t->type == tok_embedded) break;
1929                     swallow (); // swallow it
1930                   }
1931                 }
1932               catch (parse_error& pe2)
1933                 {
1934                   // parse error during recovery ... ugh
1935                   print_error (pe2);
1936                 }
1937         }
1938     }
1939
1940   if (empty)
1941     {
1942       // vary message depending on whether file was *actually* empty:
1943       cerr << (input.saw_tokens
1944                ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
1945                : _F("Input file '%s' is empty.", input_name.c_str()))
1946            << endl;
1947       delete f;
1948       f = 0;
1949     }
1950   else if (num_errors > 0)
1951     {
1952       cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
1953       delete f;
1954       f = 0;
1955     }
1956
1957   input.set_current_file(0);
1958   return f;
1959 }
1960
1961
1962 probe*
1963 parser::parse_synthetic_probe (const token* chain)
1964 {
1965   probe* p = NULL;
1966   stapfile* f = new stapfile;
1967   f->synthetic = true;
1968   input.set_current_file (f);
1969   input.set_current_token_chain (chain);
1970
1971   try
1972     {
1973       context = con_probe;
1974       parse_probe (f->probes, f->aliases);
1975
1976       if (f->probes.size() != 1 || !f->aliases.empty())
1977         throw PARSE_ERROR (_("expected a single synthetic probe"));
1978       p = f->probes[0];
1979     }
1980   catch (parse_error& pe)
1981     {
1982       print_error (pe, errs_as_warnings);
1983     }
1984
1985   // TODO check for unparsed tokens?
1986
1987   input.set_current_file(0);
1988   input.set_current_token_chain(0);
1989   return p;
1990 }
1991
1992
1993 void
1994 parser::parse_probe (vector<probe *> & probe_ret,
1995                      vector<probe_alias *> & alias_ret)
1996 {
1997   const token* t0 = next ();
1998   if (! (t0->type == tok_keyword && t0->content == "probe"))
1999     throw PARSE_ERROR (_("expected 'probe'"));
2000
2001   vector<probe_point *> aliases;
2002   vector<probe_point *> locations;
2003
2004   int epilogue_alias = 0;
2005
2006   while (1)
2007     {
2008       vector<probe_point*> pps = parse_probe_points();
2009
2010       const token* t = peek ();
2011       if (pps.size() == 1 && t
2012           && t->type == tok_operator && t->content == "=")
2013         {
2014           if (pps[0]->optional || pps[0]->sufficient)
2015             throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2016           aliases.push_back(pps[0]);
2017           swallow ();
2018           continue;
2019         }
2020       else if (pps.size() == 1 && t
2021           && t->type == tok_operator && t->content == "+=")
2022         {
2023           if (pps[0]->optional || pps[0]->sufficient)
2024             throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2025           aliases.push_back(pps[0]);
2026           epilogue_alias = 1;
2027           swallow ();
2028           continue;
2029         }
2030       else if (t && t->type == tok_operator && t->content == "{")
2031         {
2032           locations.insert(locations.end(), pps.begin(), pps.end());
2033           break;
2034         }
2035       else
2036         throw PARSE_ERROR (_("expected probe point specifier"));
2037     }
2038
2039   if (aliases.empty())
2040     {
2041       probe* p = new probe;
2042       p->tok = t0;
2043       p->locations = locations;
2044       p->body = parse_stmt_block ();
2045       p->privileged = privileged;
2046       p->systemtap_v_conditional = systemtap_v_seen;
2047       probe_ret.push_back (p);
2048     }
2049   else
2050     {
2051       probe_alias* p = new probe_alias (aliases);
2052       if(epilogue_alias)
2053         p->epilogue_style = true;
2054       else
2055         p->epilogue_style = false;
2056       p->tok = t0;
2057       p->locations = locations;
2058       p->body = parse_stmt_block ();
2059       p->privileged = privileged;
2060       p->systemtap_v_conditional = systemtap_v_seen;
2061       alias_ret.push_back (p);
2062     }
2063 }
2064
2065
2066 embeddedcode*
2067 parser::parse_embeddedcode ()
2068 {
2069   embeddedcode* e = new embeddedcode;
2070   const token* t = next ();
2071   if (t->type != tok_embedded)
2072     throw PARSE_ERROR (_("expected '%{'"));
2073
2074   if (! privileged)
2075     throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
2076                        false /* don't skip tokens for parse resumption */);
2077
2078   e->tok = t;
2079   e->code = t->content;
2080   return e;
2081 }
2082
2083
2084 block*
2085 parser::parse_stmt_block ()
2086 {
2087   block* pb = new block;
2088
2089   const token* t = next ();
2090   if (! (t->type == tok_operator && t->content == "{"))
2091     throw PARSE_ERROR (_("expected '{'"));
2092
2093   pb->tok = t;
2094
2095   while (1)
2096     {
2097       t = peek ();
2098       if (t && t->type == tok_operator && t->content == "}")
2099         {
2100           swallow ();
2101           break;
2102         }
2103       pb->statements.push_back (parse_statement ());
2104     }
2105
2106   return pb;
2107 }
2108
2109
2110 try_block*
2111 parser::parse_try_block ()
2112 {
2113   try_block* pb = new try_block;
2114
2115   pb->tok = expect_kw_token ("try");
2116   pb->try_block = parse_stmt_block();
2117   expect_kw ("catch");
2118
2119   const token* t = peek ();
2120   if (t != NULL && t->type == tok_operator && t->content == "(")
2121     {
2122       swallow (); // swallow the '('
2123
2124       t = next();
2125       if (! (t->type == tok_identifier))
2126         throw PARSE_ERROR (_("expected identifier"));
2127       symbol* sym = new symbol;
2128       sym->tok = t;
2129       sym->name = t->content;
2130       pb->catch_error_var = sym;
2131
2132       expect_op (")");
2133     }
2134   else
2135     pb->catch_error_var = 0;
2136
2137   pb->catch_block = parse_stmt_block();
2138
2139   return pb;
2140 }
2141
2142
2143
2144 statement*
2145 parser::parse_statement ()
2146 {
2147   statement *ret;
2148   const token* t = peek ();
2149   if (t && t->type == tok_operator && t->content == ";")
2150     return new null_statement (next ());
2151   else if (t && t->type == tok_operator && t->content == "{")
2152     return parse_stmt_block (); // Don't squash semicolons.
2153   else if (t && t->type == tok_keyword && t->content == "try")
2154     return parse_try_block (); // Don't squash semicolons.
2155   else if (t && t->type == tok_keyword && t->content == "if")
2156     return parse_if_statement (); // Don't squash semicolons.
2157   else if (t && t->type == tok_keyword && t->content == "for")
2158     return parse_for_loop (); // Don't squash semicolons.
2159   else if (t && t->type == tok_keyword && t->content == "foreach")
2160     return parse_foreach_loop (); // Don't squash semicolons.
2161   else if (t && t->type == tok_keyword && t->content == "while")
2162     return parse_while_loop (); // Don't squash semicolons.
2163   else if (t && t->type == tok_keyword && t->content == "return")
2164     ret = parse_return_statement ();
2165   else if (t && t->type == tok_keyword && t->content == "delete")
2166     ret = parse_delete_statement ();
2167   else if (t && t->type == tok_keyword && t->content == "break")
2168     ret = parse_break_statement ();
2169   else if (t && t->type == tok_keyword && t->content == "continue")
2170     ret = parse_continue_statement ();
2171   else if (t && t->type == tok_keyword && t->content == "next")
2172     ret = parse_next_statement ();
2173   else if (t && (t->type == tok_operator || // expressions are flexible
2174                  t->type == tok_identifier ||
2175                  t->type == tok_number ||
2176                  t->type == tok_string ||
2177                  t->type == tok_embedded ))
2178     ret = parse_expr_statement ();
2179   // XXX: consider generally accepting tok_embedded here too
2180   else
2181     throw PARSE_ERROR (_("expected statement"));
2182
2183   // Squash "empty" trailing colons after any "non-block-like" statement.
2184   t = peek ();
2185   if (t && t->type == tok_operator && t->content == ";")
2186     {
2187       swallow (); // Silently eat trailing ; after statement
2188     }
2189
2190   return ret;
2191 }
2192
2193 void
2194 parser::parse_private (vector <vardecl*>& globals, vector<probe*>& probes,
2195                        string const & fname, vector<functiondecl*>& functions)
2196 {
2197   const token* t = next ();
2198   if (! (t->type == tok_keyword && t->content == "private"))
2199     throw PARSE_ERROR (_("expected 'private'"));
2200   swallow ();
2201   t = next ();
2202   if (t->type == tok_keyword && t->content == "function")
2203   {
2204     swallow ();
2205     context = con_function;
2206     do_parse_functiondecl(functions, t, fname, true);
2207   }
2208   else if (t->type == tok_keyword && t->content == "global")
2209   {
2210     swallow ();
2211     context = con_global;
2212     t = next ();
2213     if (! (t->type == tok_identifier))
2214       throw PARSE_ERROR (_("expected identifier"));
2215     do_parse_global(globals, probes, fname, t, true);
2216   }
2217   // The `private <identifier>` is an acceptable shorthand
2218   // for `private global <identifier>` per above.
2219   else if (t->type == tok_identifier)
2220   {
2221     context = con_global;
2222     do_parse_global(globals, probes, fname, t, true);
2223   }
2224   else
2225     throw PARSE_ERROR (_("expected 'function' or identifier"));
2226 }
2227
2228 void
2229 parser::parse_global (vector <vardecl*>& globals, vector<probe*>& probes,
2230                       string const & fname)
2231 {
2232   const token* t0 = next ();
2233   if (! (t0->type == tok_keyword && t0->content == "global"))
2234     throw PARSE_ERROR (_("expected 'global' or 'private'"));
2235   swallow ();
2236   do_parse_global(globals, probes, fname, 0, false);
2237 }
2238
2239 void
2240 parser::do_parse_global (vector <vardecl*>& globals, vector<probe*>&,
2241                          string const & fname, const token* t0, bool priv)
2242 {
2243   bool iter0 = true;
2244   const token* t;
2245   while (1)
2246     {
2247       t = (iter0 && priv) ? t0 : next ();
2248       iter0 = false;
2249       if (! (t->type == tok_identifier))
2250         throw PARSE_ERROR (_("expected identifier"));
2251
2252       string gname = "__global_" + string(t->content);
2253       string pname = "__private_" + detox_path(fname) + string(t->content);
2254       string name = priv ? pname : gname;
2255
2256       for (unsigned i=0; i<globals.size(); i++)
2257       {
2258         if (globals[i]->name == name)
2259           throw PARSE_ERROR (_("duplicate global name"));
2260         if ((globals[i]->name == gname) || (globals[i]->name == pname))
2261           throw PARSE_ERROR (_("global versus private variable declaration conflict"));
2262       }
2263
2264       vardecl* d = new vardecl;
2265       d->unmangled_name = t->content;
2266       d->name = name;
2267       d->tok = t;
2268       d->systemtap_v_conditional = systemtap_v_seen;
2269       globals.push_back (d);
2270
2271       t = peek ();
2272
2273       if(t && t->type == tok_operator && t->content == "%") //wrapping
2274         {
2275           d->wrap = true;
2276           swallow ();
2277           t = peek();
2278         }
2279
2280       if (t && t->type == tok_operator && t->content == "[") // array size
2281         {
2282           int64_t size;
2283           swallow ();
2284           expect_number(size);
2285           if (size <= 0 || size > 1000000) // arbitrary max
2286             throw PARSE_ERROR(_("array size out of range"));
2287           d->maxsize = (int)size;
2288           expect_known(tok_operator, "]");
2289           t = peek ();
2290         }
2291
2292       if (t && t->type == tok_operator && t->content == "=") // initialization
2293         {
2294           if (!d->compatible_arity(0))
2295             throw PARSE_ERROR(_("only scalar globals can be initialized"));
2296           d->set_arity(0, t);
2297           next (); // Don't swallow, set_arity() used the peeked token.
2298           d->init = parse_literal ();
2299           d->type = d->init->type;
2300           t = peek ();
2301         }
2302
2303       if (t && t->type == tok_operator && t->content == ";") // termination
2304         {
2305           swallow ();
2306           break;
2307         }
2308
2309       if (t && t->type == tok_operator && t->content == ",") // next global
2310         {
2311           swallow ();
2312           continue;
2313         }
2314       else
2315         break;
2316     }
2317 }
2318
2319 void
2320 parser::parse_functiondecl (vector<functiondecl*>& functions,
2321                             string const & fname)
2322 {
2323   const token* t = next ();
2324   if (! (t->type == tok_keyword && t->content == "function"))
2325     throw PARSE_ERROR (_("expected 'function'"));
2326   swallow ();
2327   do_parse_functiondecl(functions, t, fname, false);
2328 }
2329
2330 void
2331 parser::do_parse_functiondecl (vector<functiondecl*>& functions, const token* t,
2332                                string const & fname, bool priv)
2333 {
2334   t = next ();
2335   if (! (t->type == tok_identifier)
2336       && ! (t->type == tok_keyword
2337             && (t->content == "string" || t->content == "long")))
2338     throw PARSE_ERROR (_("expected identifier"));
2339
2340   string gname = "__global_" + string(t->content);
2341   string pname = "__private_" + detox_path(fname) + string(t->content);
2342   string name = priv ? pname : gname;
2343   name += "__overload_" + lex_cast(session.overload_count[t->content]++);
2344
2345   functiondecl *fd = new functiondecl ();
2346   fd->unmangled_name = t->content;
2347   fd->name = name;
2348   fd->tok = t;
2349
2350   t = next ();
2351   if (t->type == tok_operator && t->content == ":")
2352     {
2353       swallow ();
2354       t = next ();
2355       if (t->type == tok_keyword && t->content == "string")
2356         fd->type = pe_string;
2357       else if (t->type == tok_keyword && t->content == "long")
2358         fd->type = pe_long;
2359       else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2360       swallow ();
2361
2362       t = next ();
2363     }
2364
2365   if (! (t->type == tok_operator && t->content == "("))
2366     throw PARSE_ERROR (_("expected '('"));
2367   swallow ();
2368
2369   while (1)
2370     {
2371       t = next ();
2372
2373       // permit zero-argument functions
2374       if (t->type == tok_operator && t->content == ")")
2375         {
2376           swallow ();
2377           break;
2378         }
2379       else if (! (t->type == tok_identifier))
2380         throw PARSE_ERROR (_("expected identifier"));
2381       vardecl* vd = new vardecl;
2382       vd->unmangled_name = vd->name = t->content;
2383       vd->tok = t;
2384       fd->formal_args.push_back (vd);
2385       fd->systemtap_v_conditional = systemtap_v_seen;
2386
2387       t = next ();
2388       if (t->type == tok_operator && t->content == ":")
2389         {
2390           swallow ();
2391           t = next ();
2392           if (t->type == tok_keyword && t->content == "string")
2393             vd->type = pe_string;
2394           else if (t->type == tok_keyword && t->content == "long")
2395             vd->type = pe_long;
2396           else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2397           swallow ();
2398           t = next ();
2399         }
2400       if (t->type == tok_operator && t->content == ")")
2401         {
2402           swallow ();
2403           break;
2404         }
2405       if (t->type == tok_operator && t->content == ",")
2406         {
2407           swallow ();
2408           continue;
2409         }
2410       else
2411         throw PARSE_ERROR (_("expected ',' or ')'"));
2412     }
2413
2414   t = peek();
2415   if (t->type == tok_operator && t->content == ":")
2416     {
2417       int64_t priority;
2418       swallow();
2419       expect_number(priority);
2420       fd->priority = priority;
2421       // reserve priority 0 for user script implementation
2422       if (priority < 1)
2423         throw PARSE_ERROR (_("specified priority must be > 0"));
2424     }
2425   else if (user_file)
2426     {
2427       // allow script file implementation override automatically when
2428       // priority not specified
2429       fd->priority = 0;
2430     }
2431
2432   t = peek ();
2433   if (t && t->type == tok_embedded)
2434     fd->body = parse_embeddedcode ();
2435   else
2436     fd->body = parse_stmt_block ();
2437
2438   functions.push_back (fd);
2439 }
2440
2441 vector<probe_point*>
2442 parser::parse_probe_points()
2443 {
2444   vector<probe_point*> pps;
2445   while (1)
2446     {
2447       vector<probe_point*> tail = parse_components();
2448       pps.insert(pps.end(), tail.begin(), tail.end());
2449
2450       const token* t = peek();
2451       if (t && t->type == tok_operator && t->content == ",")
2452         {
2453           swallow();
2454           continue;
2455         }
2456
2457       if (t && t->type == tok_operator
2458           && (t->content == "{" || t->content == "=" ||
2459               t->content == "+="|| t->content == "}"))
2460         break;
2461
2462       throw PARSE_ERROR (_("expected one of ', { } = +='"));
2463     }
2464   return pps;
2465 }
2466
2467 vector<probe_point*>
2468 parser::parse_components()
2469 {
2470   vector<probe_point*> pps;
2471   while (1)
2472     {
2473       vector<probe_point*> suffix = parse_component();
2474
2475       // Cartesian product of components
2476       if (pps.empty())
2477         pps = suffix;
2478       else
2479         {
2480           assert(!suffix.empty());
2481           vector<probe_point*> product;
2482           for (unsigned i = 0; i < pps.size(); i++)
2483             {
2484               if (pps[i]->optional || pps[i]->sufficient || pps[i]->condition)
2485                 throw PARSE_ERROR (_("'?', '!' or condition must only be specified in suffix"),
2486                                    pps[i]->components[0]->tok);
2487               for (unsigned j = 0; j < suffix.size(); j++)
2488                 {
2489                   probe_point* pp = new probe_point;
2490                   pp->components.insert(pp->components.end(),
2491                                         pps[i]->components.begin(), pps[i]->components.end());
2492                   pp->components.insert(pp->components.end(),
2493                                         suffix[j]->components.begin(), suffix[j]->components.end());
2494                   pp->optional = suffix[j]->optional;
2495                   pp->sufficient = suffix[j]->sufficient;
2496                   pp->condition = suffix[j]->condition;
2497                   product.push_back(pp);
2498                 }
2499             }
2500           for (unsigned i = 0; i < pps.size(); i++) delete pps[i];
2501           for (unsigned i = 0; i < suffix.size(); i++) delete suffix[i];
2502           pps = product;
2503         }
2504
2505       const token* t = peek();
2506       if (t && t->type == tok_operator && t->content == ".")
2507         {
2508           swallow ();
2509           continue;
2510         }
2511
2512       // We only fall through here at the end of        a probe point (past
2513       // all the dotted/parametrized components).
2514
2515       if (t && t->type == tok_operator &&
2516           (t->content == "?" || t->content == "!"))
2517         {
2518           for (unsigned i = 0; i < pps.size(); i++)
2519             {
2520               if (pps[i]->optional || pps[i]->sufficient)
2521                 throw PARSE_ERROR (_("'?' or '!' respecified"));
2522               pps[i]->optional = true;
2523               if (t->content == "!") pps[i]->sufficient = true;
2524             }
2525           // NB: sufficient implies optional
2526           swallow ();
2527           t = peek ();
2528           // fall through
2529         }
2530
2531       if (t && t->type == tok_keyword && t->content == "if")
2532         {
2533           swallow ();
2534           t = peek ();
2535           if (!(t && t->type == tok_operator && t->content == "("))
2536             throw PARSE_ERROR (_("expected '('"));
2537           swallow ();
2538
2539           expression* e = parse_expression();
2540           for (unsigned i = 0; i < pps.size(); i++)
2541             {
2542               if (pps[i]->condition != 0)
2543                 throw PARSE_ERROR (_("condition respecified"));
2544               pps[i]->condition = e;
2545             }
2546
2547           t = peek ();
2548           if (!(t && t->type == tok_operator && t->content == ")"))
2549             throw PARSE_ERROR (_("expected ')'"));
2550           swallow ();
2551         }
2552
2553       break;
2554     }
2555   return pps;
2556 }
2557
2558 vector<probe_point*>
2559 parser::parse_component()
2560 {
2561   const token* t = next ();
2562   if (! (t->type == tok_identifier
2563          // we must allow ".return" and ".function", which are keywords
2564          || t->type == tok_keyword
2565          // we must allow "*", due to being an operator
2566          || (t->type == tok_operator && (t->content == "*" || t->content == "{"))))
2567     throw PARSE_ERROR (_("expected identifier or '*' or '{'"));
2568
2569   if (t && t->type == tok_operator && t->content == "{")
2570     {
2571       swallow();
2572       vector<probe_point*> pps = parse_probe_points();
2573       t = peek();
2574       if (!(t && t->type == tok_operator && t->content == "}"))
2575         throw PARSE_ERROR (_("expected '}'"));
2576       swallow();
2577       return pps;
2578     }
2579   else
2580     {
2581       // loop which reconstitutes an identifier with wildcards
2582       string content = t->content;
2583       bool changed_p = false;
2584       while (1)
2585         {
2586           const token* u = peek();
2587           if (u == NULL)
2588             break;
2589           // ensure pieces of the identifier are adjacent:
2590           if (input.ate_whitespace)
2591             break;
2592           // ensure pieces of the identifier are valid:
2593           if (! (u->type == tok_identifier
2594                  // we must allow arbitrary keywords with a wildcard
2595                  || u->type == tok_keyword
2596                  // we must allow "*", due to being an operator
2597                  || (u->type == tok_operator && u->content == "*")))
2598             break;
2599
2600           // append u to t
2601           content = content + (string)u->content;
2602           changed_p = true;
2603
2604           // consume u
2605           swallow ();
2606         }
2607
2608       if (changed_p)
2609         {
2610           // We've already swallowed the first token and we're not
2611           // putting it back; no one else has a copy; so we can
2612           // safely overwrite its content and reuse it.
2613           const_cast<token*>(t)->content = content;
2614         }
2615
2616       probe_point::component* c = new probe_point::component;
2617       c->functor = t->content;
2618       c->tok = t;
2619       vector<probe_point*> pps;
2620       probe_point* pp = new probe_point;
2621       pp->components.push_back(c);
2622       pps.push_back(pp);
2623       // NB we may add c->arg soon
2624
2625       t = peek ();
2626
2627       // consume optional parameter
2628       if (t && t->type == tok_operator && t->content == "(")
2629         {
2630           swallow (); // consume "("
2631           c->arg = parse_literal ();
2632
2633           t = next ();
2634           if (! (t->type == tok_operator && t->content == ")"))
2635             throw PARSE_ERROR (_("expected ')'"));
2636           swallow ();
2637         }
2638       return pps;
2639     }
2640 }
2641
2642 literal_string*
2643 parser::consume_string_literals(const token *t)
2644 {
2645   literal_string *ls = new literal_string (t->content);
2646
2647   // PR11208: check if the next token is also a string literal;
2648   // auto-concatenate it.  This is complicated to the extent that we
2649   // need to skip intermediate whitespace.
2650   //
2651   // NB for versions prior to 2.0: but don't skip over intervening comments
2652   string concat;
2653   bool p_concat = false;
2654   const token *n = peek();
2655   while (n != NULL && n->type == tok_string
2656          && ! (!input.has_version("2.0") && input.ate_comment))
2657     {
2658       if (!p_concat)
2659         {
2660           concat = t->content;
2661           p_concat = true;
2662         }
2663       concat.append(n->content.data(), n->content.size());
2664       next(); // consume the token
2665       n = peek();
2666     }
2667   if (p_concat)
2668     ls->value = concat;
2669   return ls;
2670 }
2671
2672
2673 // Parse a string literal and perform backslash escaping on the contents:
2674 literal_string*
2675 parser::parse_literal_string ()
2676 {
2677   const token* t = next ();
2678   literal_string* l;
2679   if (t->type == tok_string)
2680     l = consume_string_literals (t);
2681   else
2682     throw PARSE_ERROR (_("expected literal string"));
2683
2684   l->tok = t;
2685   return l;
2686 }
2687
2688
2689 literal*
2690 parser::parse_literal ()
2691 {
2692   const token* t = next ();
2693   literal* l;
2694   if (t->type == tok_string)
2695     {
2696       l = consume_string_literals (t);
2697     }
2698   else
2699     {
2700       bool neg = false;
2701       if (t->type == tok_operator && t->content == "-")
2702         {
2703           neg = true;
2704           swallow ();
2705           t = next ();
2706         }
2707
2708       if (t->type == tok_number)
2709         {
2710           const string& s = t->content;
2711           const char* startp = s.c_str ();
2712           char* endp = (char*) startp;
2713
2714           // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2715           // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
2716           // since the lexer only gives us positive digit strings, but we'll
2717           // limit it to LLONG_MIN when a '-' operator is fed into the literal.
2718           errno = 0;
2719           long long value = (long long) strtoull (startp, & endp, 0);
2720           if (errno == ERANGE || errno == EINVAL || *endp != '\0'
2721               || (neg && (unsigned long long) value > 9223372036854775808ULL)
2722               || (unsigned long long) value > 18446744073709551615ULL
2723               || value < -9223372036854775807LL-1)
2724             throw PARSE_ERROR (_("number invalid or out of range"));
2725
2726           if (neg)
2727             value = -value;
2728
2729           l = new literal_number (value);
2730         }
2731       else
2732         throw PARSE_ERROR (_("expected literal string or number"));
2733     }
2734
2735   l->tok = t;
2736   return l;
2737 }
2738
2739
2740 if_statement*
2741 parser::parse_if_statement ()
2742 {
2743   const token* t = next ();
2744   if (! (t->type == tok_keyword && t->content == "if"))
2745     throw PARSE_ERROR (_("expected 'if'"));
2746   if_statement* s = new if_statement;
2747   s->tok = t;
2748
2749   t = next ();
2750   if (! (t->type == tok_operator && t->content == "("))
2751     throw PARSE_ERROR (_("expected '('"));
2752   swallow ();
2753
2754   s->condition = parse_expression ();
2755
2756   t = next ();
2757   if (! (t->type == tok_operator && t->content == ")"))
2758     throw PARSE_ERROR (_("expected ')'"));
2759   swallow ();
2760
2761   s->thenblock = parse_statement ();
2762
2763   t = peek ();
2764   if (t && t->type == tok_keyword && t->content == "else")
2765     {
2766       swallow ();
2767       s->elseblock = parse_statement ();
2768     }
2769   else
2770     s->elseblock = 0; // in case not otherwise initialized
2771
2772   return s;
2773 }
2774
2775
2776 expr_statement*
2777 parser::parse_expr_statement ()
2778 {
2779   expr_statement *es = new expr_statement;
2780   const token* t = peek ();
2781   if (t == NULL)
2782     throw PARSE_ERROR (_("expression statement expected"));
2783   // Copy, we only peeked, parse_expression might swallow.
2784   es->tok = new token (*t);
2785   es->value = parse_expression ();
2786   return es;
2787 }
2788
2789
2790 return_statement*
2791 parser::parse_return_statement ()
2792 {
2793   const token* t = next ();
2794   if (! (t->type == tok_keyword && t->content == "return"))
2795     throw PARSE_ERROR (_("expected 'return'"));
2796   if (context != con_function)
2797     throw PARSE_ERROR (_("found 'return' not in function context"));
2798   return_statement* s = new return_statement;
2799   s->tok = t;
2800   s->value = parse_expression ();
2801   return s;
2802 }
2803
2804
2805 delete_statement*
2806 parser::parse_delete_statement ()
2807 {
2808   const token* t = next ();
2809   if (! (t->type == tok_keyword && t->content == "delete"))
2810     throw PARSE_ERROR (_("expected 'delete'"));
2811   delete_statement* s = new delete_statement;
2812   s->tok = t;
2813   s->value = parse_expression ();
2814   return s;
2815 }
2816
2817
2818 next_statement*
2819 parser::parse_next_statement ()
2820 {
2821   const token* t = next ();
2822   if (! (t->type == tok_keyword && t->content == "next"))
2823     throw PARSE_ERROR (_("expected 'next'"));
2824   next_statement* s = new next_statement;
2825   s->tok = t;
2826   return s;
2827 }
2828
2829
2830 break_statement*
2831 parser::parse_break_statement ()
2832 {
2833   const token* t = next ();
2834   if (! (t->type == tok_keyword && t->content == "break"))
2835     throw PARSE_ERROR (_("expected 'break'"));
2836   break_statement* s = new break_statement;
2837   s->tok = t;
2838   return s;
2839 }
2840
2841
2842 continue_statement*
2843 parser::parse_continue_statement ()
2844 {
2845   const token* t = next ();
2846   if (! (t->type == tok_keyword && t->content == "continue"))
2847     throw PARSE_ERROR (_("expected 'continue'"));
2848   continue_statement* s = new continue_statement;
2849   s->tok = t;
2850   return s;
2851 }
2852
2853
2854 for_loop*
2855 parser::parse_for_loop ()
2856 {
2857   const token* t = next ();
2858   if (! (t->type == tok_keyword && t->content == "for"))
2859     throw PARSE_ERROR (_("expected 'for'"));
2860   for_loop* s = new for_loop;
2861   s->tok = t;
2862
2863   t = next ();
2864   if (! (t->type == tok_operator && t->content == "("))
2865     throw PARSE_ERROR (_("expected '('"));
2866   swallow ();
2867
2868   // initializer + ";"
2869   t = peek ();
2870   if (t && t->type == tok_operator && t->content == ";")
2871     {
2872       s->init = 0;
2873       swallow ();
2874     }
2875   else
2876     {
2877       s->init = parse_expr_statement ();
2878       t = next ();
2879       if (! (t->type == tok_operator && t->content == ";"))
2880         throw PARSE_ERROR (_("expected ';'"));
2881       swallow ();
2882     }
2883
2884   // condition + ";"
2885   t = peek ();
2886   if (t && t->type == tok_operator && t->content == ";")
2887     {
2888       literal_number* l = new literal_number(1);
2889       s->cond = l;
2890       s->cond->tok = next ();
2891     }
2892   else
2893     {
2894       s->cond = parse_expression ();
2895       t = next ();
2896       if (! (t->type == tok_operator && t->content == ";"))
2897         throw PARSE_ERROR (_("expected ';'"));
2898       swallow ();
2899     }
2900
2901   // increment + ")"
2902   t = peek ();
2903   if (t && t->type == tok_operator && t->content == ")")
2904     {
2905       s->incr = 0;
2906       swallow ();
2907     }
2908   else
2909     {
2910       s->incr = parse_expr_statement ();
2911       t = next ();
2912       if (! (t->type == tok_operator && t->content == ")"))
2913         throw PARSE_ERROR (_("expected ')'"));
2914       swallow ();
2915     }
2916
2917   // block
2918   s->block = parse_statement ();
2919
2920   return s;
2921 }
2922
2923
2924 for_loop*
2925 parser::parse_while_loop ()
2926 {
2927   const token* t = next ();
2928   if (! (t->type == tok_keyword && t->content == "while"))
2929     throw PARSE_ERROR (_("expected 'while'"));
2930   for_loop* s = new for_loop;
2931   s->tok = t;
2932
2933   t = next ();
2934   if (! (t->type == tok_operator && t->content == "("))
2935     throw PARSE_ERROR (_("expected '('"));
2936   swallow ();
2937
2938   // dummy init and incr fields
2939   s->init = 0;
2940   s->incr = 0;
2941
2942   // condition
2943   s->cond = parse_expression ();
2944
2945   t = next ();
2946   if (! (t->type == tok_operator && t->content == ")"))
2947     throw PARSE_ERROR (_("expected ')'"));
2948   swallow ();
2949
2950   // block
2951   s->block = parse_statement ();
2952
2953   return s;
2954 }
2955
2956
2957 foreach_loop*
2958 parser::parse_foreach_loop ()
2959 {
2960   const token* t = next ();
2961   if (! (t->type == tok_keyword && t->content == "foreach"))
2962     throw PARSE_ERROR (_("expected 'foreach'"));
2963   foreach_loop* s = new foreach_loop;
2964   s->tok = t;
2965   s->sort_direction = 0;
2966   s->sort_aggr = sc_none;
2967   s->value = NULL;
2968   s->limit = NULL;
2969
2970   t = next ();
2971   if (! (t->type == tok_operator && t->content == "("))
2972     throw PARSE_ERROR (_("expected '('"));
2973   swallow ();
2974
2975   symbol* lookahead_sym = NULL;
2976   int lookahead_sort = 0;
2977
2978   t = peek ();
2979   if (t && t->type == tok_identifier)
2980     {
2981       next ();
2982       lookahead_sym = new symbol;
2983       lookahead_sym->tok = t;
2984       lookahead_sym->name = t->content;
2985
2986       t = peek ();
2987       if (t && t->type == tok_operator &&
2988           (t->content == "+" || t->content == "-"))
2989         {
2990           lookahead_sort = (t->content == "+") ? 1 : -1;
2991           swallow ();
2992         }
2993
2994       t = peek ();
2995       if (t && t->type == tok_operator && t->content == "=")
2996         {
2997           swallow ();
2998           s->value = lookahead_sym;
2999           if (lookahead_sort)
3000             {
3001               s->sort_direction = lookahead_sort;
3002               s->sort_column = 0;
3003             }
3004           lookahead_sym = NULL;
3005         }
3006     }
3007
3008   // see also parse_array_in
3009
3010   bool parenthesized = false;
3011   t = peek ();
3012   if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
3013     {
3014       swallow ();
3015       parenthesized = true;
3016     }
3017
3018   if (lookahead_sym)
3019     {
3020       s->indexes.push_back (lookahead_sym);
3021       if (lookahead_sort)
3022         {
3023           s->sort_direction = lookahead_sort;
3024           s->sort_column = 1;
3025         }
3026       lookahead_sym = NULL;
3027     }
3028   else while (1)
3029     {
3030       t = next ();
3031       if (! (t->type == tok_identifier))
3032         throw PARSE_ERROR (_("expected identifier"));
3033       symbol* sym = new symbol;
3034       sym->tok = t;
3035       sym->name = t->content;
3036       s->indexes.push_back (sym);
3037
3038       t = peek ();
3039       if (t && t->type == tok_operator &&
3040           (t->content == "+" || t->content == "-"))
3041         {
3042           if (s->sort_direction)
3043             throw PARSE_ERROR (_("multiple sort directives"));
3044           s->sort_direction = (t->content == "+") ? 1 : -1;
3045           s->sort_column = s->indexes.size();
3046           swallow ();
3047         }
3048
3049       if (parenthesized)
3050         {
3051           t = peek ();
3052           if (t && t->type == tok_operator && t->content == ",")
3053             {
3054               swallow ();
3055               continue;
3056             }
3057           else if (t && t->type == tok_operator && t->content == "]")
3058             {
3059               swallow ();
3060               break;
3061             }
3062           else
3063             throw PARSE_ERROR (_("expected ',' or ']'"));
3064         }
3065       else
3066         break; // expecting only one expression
3067     }
3068
3069   t = next ();
3070   if (! (t->type == tok_keyword && t->content == "in"))
3071     throw PARSE_ERROR (_("expected 'in'"));
3072   swallow ();
3073
3074   s->base = parse_indexable();
3075
3076   // check if there was an array slice that was specified
3077   t = peek();
3078   if (t && t->type == tok_operator && t->content == "[")
3079     {
3080       swallow();
3081       while (1)
3082         {
3083           t = peek();
3084           if (t && t->type == tok_operator && t->content == "*")
3085             {
3086               swallow();
3087               s->array_slice.push_back (NULL);
3088             }
3089           else
3090             s->array_slice.push_back (parse_expression());
3091
3092           t = peek ();
3093           if (t && t->type == tok_operator && t->content == ",")
3094             {
3095               swallow ();
3096               continue;
3097             }
3098           else if (t && t->type == tok_operator && t->content == "]")
3099             {
3100               swallow ();
3101               break;
3102             }
3103           else
3104             throw PARSE_ERROR (_("expected ',' or ']'"));
3105         }
3106     }
3107
3108
3109   // check for atword, see also expect_ident_or_atword,
3110   t = peek ();
3111   if (t && t->type == tok_operator && t->content[0] == '@')
3112     {
3113       if (t->content == "@avg") s->sort_aggr = sc_average;
3114       else if (t->content == "@min") s->sort_aggr = sc_min;
3115       else if (t->content == "@max") s->sort_aggr = sc_max;
3116       else if (t->content == "@count") s->sort_aggr = sc_count;
3117       else if (t->content == "@sum") s->sort_aggr = sc_sum;
3118       else throw PARSE_ERROR(_("expected statistical operation"));
3119       swallow();
3120
3121       t = peek ();
3122       if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
3123         throw PARSE_ERROR(_("expected sort directive"));
3124     }
3125
3126   t = peek ();
3127   if (t && t->type == tok_operator &&
3128       (t->content == "+" || t->content == "-"))
3129     {
3130       if (s->sort_direction)
3131         throw PARSE_ERROR (_("multiple sort directives"));
3132       s->sort_direction = (t->content == "+") ? 1 : -1;
3133       s->sort_column = 0;
3134       swallow ();
3135     }
3136
3137   t = peek ();
3138   if (tok_is(t, tok_keyword, "limit"))
3139     {
3140       swallow ();                       // get past the "limit"
3141       s->limit = parse_expression ();
3142     }
3143
3144   t = next ();
3145   if (! (t->type == tok_operator && t->content == ")"))
3146     throw PARSE_ERROR ("expected ')'");
3147   swallow ();
3148
3149   s->block = parse_statement ();
3150   return s;
3151 }
3152
3153
3154 expression*
3155 parser::parse_expression ()
3156 {
3157   return parse_assignment ();
3158 }
3159
3160
3161 expression*
3162 parser::parse_assignment ()
3163 {
3164   expression* op1 = parse_ternary ();
3165
3166   const token* t = peek ();
3167   // right-associative operators
3168   if (t && t->type == tok_operator
3169       && (t->content == "=" ||
3170           t->content == "<<<" ||
3171           t->content == "+=" ||
3172           t->content == "-=" ||
3173           t->content == "*=" ||
3174           t->content == "/=" ||
3175           t->content == "%=" ||
3176           t->content == "<<=" ||
3177           t->content == ">>=" ||
3178           t->content == "&=" ||
3179           t->content == "^=" ||
3180           t->content == "|=" ||
3181           t->content == ".=" ||
3182           false))
3183     {
3184       // NB: lvalueness is checked during elaboration / translation
3185       assignment* e = new assignment;
3186       e->left = op1;
3187       e->op = t->content;
3188       e->tok = t;
3189       next ();
3190       e->right = parse_expression ();
3191       op1 = e;
3192     }
3193
3194   return op1;
3195 }
3196
3197
3198 expression*
3199 parser::parse_ternary ()
3200 {
3201   expression* op1 = parse_logical_or ();
3202
3203   const token* t = peek ();
3204   if (t && t->type == tok_operator && t->content == "?")
3205     {
3206       ternary_expression* e = new ternary_expression;
3207       e->tok = t;
3208       e->cond = op1;
3209       next ();
3210       e->truevalue = parse_expression (); // XXX
3211
3212       t = next ();
3213       if (! (t->type == tok_operator && t->content == ":"))
3214         throw PARSE_ERROR (_("expected ':'"));
3215       swallow ();
3216
3217       e->falsevalue = parse_expression (); // XXX
3218       return e;
3219     }
3220   else
3221     return op1;
3222 }
3223
3224
3225 expression*
3226 parser::parse_logical_or ()
3227 {
3228   expression* op1 = parse_logical_and ();
3229
3230   const token* t = peek ();
3231   while (t && t->type == tok_operator && t->content == "||")
3232     {
3233       logical_or_expr* e = new logical_or_expr;
3234       e->tok = t;
3235       e->op = t->content;
3236       e->left = op1;
3237       next ();
3238       e->right = parse_logical_and ();
3239       op1 = e;
3240       t = peek ();
3241     }
3242
3243   return op1;
3244 }
3245
3246
3247 expression*
3248 parser::parse_logical_and ()
3249 {
3250   expression* op1 = parse_boolean_or ();
3251
3252   const token* t = peek ();
3253   while (t && t->type == tok_operator && t->content == "&&")
3254     {
3255       logical_and_expr *e = new logical_and_expr;
3256       e->left = op1;
3257       e->op = t->content;
3258       e->tok = t;
3259       next ();
3260       e->right = parse_boolean_or ();
3261       op1 = e;
3262       t = peek ();
3263     }
3264
3265   return op1;
3266 }
3267
3268
3269 expression*
3270 parser::parse_boolean_or ()
3271 {
3272   expression* op1 = parse_boolean_xor ();
3273
3274   const token* t = peek ();
3275   while (t && t->type == tok_operator && t->content == "|")
3276     {
3277       binary_expression* e = new binary_expression;
3278       e->left = op1;
3279       e->op = t->content;
3280       e->tok = t;
3281       next ();
3282       e->right = parse_boolean_xor ();
3283       op1 = e;
3284       t = peek ();
3285     }
3286
3287   return op1;
3288 }
3289
3290
3291 expression*
3292 parser::parse_boolean_xor ()
3293 {
3294   expression* op1 = parse_boolean_and ();
3295
3296   const token* t = peek ();
3297   while (t && t->type == tok_operator && t->content == "^")
3298     {
3299       binary_expression* e = new binary_expression;
3300       e->left = op1;
3301       e->op = t->content;
3302       e->tok = t;
3303       next ();
3304       e->right = parse_boolean_and ();
3305       op1 = e;
3306       t = peek ();
3307     }
3308
3309   return op1;
3310 }
3311
3312
3313 expression*
3314 parser::parse_boolean_and ()
3315 {
3316   expression* op1 = parse_array_in ();
3317
3318   const token* t = peek ();
3319   while (t && t->type == tok_operator && t->content == "&")
3320     {
3321       binary_expression* e = new binary_expression;
3322       e->left = op1;
3323       e->op = t->content;
3324       e->tok = t;
3325       next ();
3326       e->right = parse_array_in ();
3327       op1 = e;
3328       t = peek ();
3329     }
3330
3331   return op1;
3332 }
3333
3334
3335 expression*
3336 parser::parse_array_in ()
3337 {
3338   // This is a very tricky case.  All these are legit expressions:
3339   // "a in b"  "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
3340   vector<expression*> indexes;
3341   bool parenthesized = false;
3342
3343   const token* t = peek ();
3344   if (t && t->type == tok_operator && t->content == "[")
3345     {
3346       swallow ();
3347       parenthesized = true;
3348     }
3349
3350   while (1)
3351     {
3352       t = peek();
3353       if (t && t->type == tok_operator && t->content == "*" && parenthesized)
3354         {
3355           swallow();
3356           indexes.push_back(NULL);
3357         }
3358       else
3359         {
3360           expression* op1 = parse_comparison_or_regex_query ();
3361           indexes.push_back (op1);
3362         }
3363
3364       if (parenthesized)
3365         {
3366           const token* t = peek ();
3367           if (t && t->type == tok_operator && t->content == ",")
3368             {
3369               swallow ();
3370               continue;
3371             }
3372           else if (t && t->type == tok_operator && t->content == "]")
3373             {
3374               swallow ();
3375               break;
3376             }
3377           else
3378             throw PARSE_ERROR (_("expected ',' or ']'"));
3379         }
3380       else
3381         break; // expecting only one expression
3382     }
3383
3384   t = peek ();
3385   if (t && t->type == tok_keyword && t->content == "in")
3386     {
3387       array_in *e = new array_in;
3388       e->tok = t;
3389       next ();
3390
3391       arrayindex* a = new arrayindex;
3392       a->indexes = indexes;
3393       a->base = parse_indexable();
3394       a->tok = a->base->tok;
3395       e->operand = a;
3396       return e;
3397     }
3398   else if (indexes.size() == 1) // no "in" - need one expression only
3399     return indexes[0];
3400   else
3401     throw PARSE_ERROR (_("unexpected comma-separated expression list"));
3402 }
3403
3404
3405 expression*
3406 parser::parse_comparison_or_regex_query ()
3407 {
3408   expression* op1 = parse_shift ();
3409
3410   // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
3411   const token *t = peek();
3412   if (t && t->type == tok_operator
3413       && (t->content == "=~" ||
3414           t->content == "!~"))
3415     {
3416       regex_query* r = new regex_query;
3417       r->left = op1;
3418       r->op = t->content;
3419       r->tok = t;
3420       next ();
3421       r->right = parse_literal_string();
3422       op1 = r;
3423       t = peek ();
3424     }
3425   else while (t && t->type == tok_operator
3426       && (t->content == ">" ||
3427           t->content == "<" ||
3428           t->content == "==" ||
3429           t->content == "!=" ||
3430           t->content == "<=" ||
3431           t->content == ">="))
3432     {
3433       comparison* e = new comparison;
3434       e->left = op1;
3435       e->op = t->content;
3436       e->tok = t;
3437       next ();
3438       e->right = parse_shift ();
3439       op1 = e;
3440       t = peek ();
3441     }
3442
3443   return op1;
3444 }
3445
3446
3447 expression*
3448 parser::parse_shift ()
3449 {
3450   expression* op1 = parse_concatenation ();
3451
3452   const token* t = peek ();
3453   while (t && t->type == tok_operator &&
3454          (t->content == "<<" || t->content == ">>"))
3455     {
3456       binary_expression* e = new binary_expression;
3457       e->left = op1;
3458       e->op = t->content;
3459       e->tok = t;
3460       next ();
3461       e->right = parse_concatenation ();
3462       op1 = e;
3463       t = peek ();
3464     }
3465
3466   return op1;
3467 }
3468
3469
3470 expression*
3471 parser::parse_concatenation ()
3472 {
3473   expression* op1 = parse_additive ();
3474
3475   const token* t = peek ();
3476   // XXX: the actual awk string-concatenation operator is *whitespace*.
3477   // I don't know how to easily to model that here.
3478   while (t && t->type == tok_operator && t->content == ".")
3479     {
3480       concatenation* e = new concatenation;
3481       e->left = op1;
3482       e->op = t->content;
3483       e->tok = t;
3484       next ();
3485       e->right = parse_additive ();
3486       op1 = e;
3487       t = peek ();
3488     }
3489
3490   return op1;
3491 }
3492
3493
3494 expression*
3495 parser::parse_additive ()
3496 {
3497   expression* op1 = parse_multiplicative ();
3498
3499   const token* t = peek ();
3500   while (t && t->type == tok_operator
3501       && (t->content == "+" || t->content == "-"))
3502     {
3503       binary_expression* e = new binary_expression;
3504       e->op = t->content;
3505       e->left = op1;
3506       e->tok = t;
3507       next ();
3508       e->right = parse_multiplicative ();
3509       op1 = e;
3510       t = peek ();
3511     }
3512
3513   return op1;
3514 }
3515
3516
3517 expression*
3518 parser::parse_multiplicative ()
3519 {
3520   expression* op1 = parse_unary ();
3521
3522   const token* t = peek ();
3523   while (t && t->type == tok_operator
3524       && (t->content == "*" || t->content == "/" || t->content == "%"))
3525     {
3526       binary_expression* e = new binary_expression;
3527       e->op = t->content;
3528       e->left = op1;
3529       e->tok = t;
3530       next ();
3531       e->right = parse_unary ();
3532       op1 = e;
3533       t = peek ();
3534     }
3535
3536   return op1;
3537 }
3538
3539
3540 expression*
3541 parser::parse_unary ()
3542 {
3543   const token* t = peek ();
3544   if (t && t->type == tok_operator
3545       && (t->content == "+" ||
3546           t->content == "-" ||
3547           t->content == "!" ||
3548           t->content == "~" ||
3549           false))
3550     {
3551       unary_expression* e = new unary_expression;
3552       e->op = t->content;
3553       e->tok = t;
3554       next ();
3555       e->operand = parse_unary ();
3556       return e;
3557     }
3558   else
3559     return parse_crement ();
3560 }
3561
3562
3563 expression*
3564 parser::parse_crement () // as in "increment" / "decrement"
3565 {
3566   // NB: Ideally, we'd parse only a symbol as an operand to the
3567   // *crement operators, instead of a general expression value.  We'd
3568   // need more complex lookahead code to tell apart the postfix cases.
3569   // So we just punt, and leave it to pass-3 to signal errors on
3570   // cases like "4++".
3571
3572   const token* t = peek ();
3573   if (t && t->type == tok_operator
3574       && (t->content == "++" || t->content == "--"))
3575     {
3576       pre_crement* e = new pre_crement;
3577       e->op = t->content;
3578       e->tok = t;
3579       next ();
3580       e->operand = parse_dwarf_value ();
3581       return e;
3582     }
3583
3584   // post-crement or non-crement
3585   expression *op1 = parse_dwarf_value ();
3586
3587   t = peek ();
3588   if (t && t->type == tok_operator
3589       && (t->content == "++" || t->content == "--"))
3590     {
3591       post_crement* e = new post_crement;
3592       e->op = t->content;
3593       e->tok = t;
3594       next ();
3595       e->operand = op1;
3596       return e;
3597     }
3598   else
3599     return op1;
3600 }
3601
3602
3603 expression*
3604 parser::parse_dwarf_value ()
3605 {
3606   expression* expr = NULL;
3607   target_symbol* tsym = NULL;
3608
3609   // With '&' we'll definitely be making a target symbol of some sort
3610   const token* addrtok = peek_op ("&") ? next () : NULL;
3611   bool addressof = (addrtok != NULL);
3612
3613   // First try target_symbol types: $var, @cast, and @var.
3614   const token* t = peek ();
3615   if (t && t->type == tok_identifier && t->content[0] == '$')
3616     expr = tsym = parse_target_symbol ();
3617   else if (tok_is (t, tok_operator, "@cast"))
3618     expr = tsym = parse_cast_op ();
3619   else if (tok_is (t, tok_operator, "@var"))
3620     expr = tsym = parse_atvar_op ();
3621   else if (addressof && !input.has_version("2.6"))
3622     // '&' on old version only allowed specific target_symbol types
3623     throw PARSE_ERROR (_("expected @cast, @var or $var"));
3624   else
3625     // Otherwise just get a plain value of any sort.
3626     expr = parse_value ();
3627
3628   // If we had '&' or see any target suffixes, that forces a target_symbol.
3629   // For compatibility, we only do this starting with 2.6.
3630   if (!tsym && (addressof || peek_target_symbol_components ())
3631       && input.has_version("2.6"))
3632     {
3633       autocast_op *cop = new autocast_op;
3634       cop->tok = addrtok ?: peek ();
3635       cop->operand = expr;
3636       expr = tsym = cop;
3637     }
3638
3639   if (tsym)
3640     {
3641       // Parse the rest of any kind of target symbol
3642       tsym->addressof = addressof;
3643       parse_target_symbol_components (tsym);
3644     }
3645
3646   return expr;
3647 }
3648
3649
3650 expression*
3651 parser::parse_value ()
3652 {
3653   const token* t = peek ();
3654   if (! t)
3655     throw PARSE_ERROR (_("expected value"));
3656
3657   if (t->type == tok_embedded)
3658     {
3659       if (! privileged)
3660         throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
3661
3662       embedded_expr *e = new embedded_expr;
3663       e->tok = t;
3664       e->code = t->content;
3665       next ();
3666       return e;
3667     }
3668
3669   if (t->type == tok_operator && t->content == "(")
3670     {
3671       swallow ();
3672       expression* e = parse_expression ();
3673       t = next ();
3674       if (! (t->type == tok_operator && t->content == ")"))
3675         throw PARSE_ERROR (_("expected ')'"));
3676       swallow ();
3677       return e;
3678     }
3679   else if (t->type == tok_identifier
3680            || (t->type == tok_operator && t->content[0] == '@'))
3681     return parse_symbol ();
3682   else
3683     return parse_literal ();
3684 }
3685
3686
3687 const token *
3688 parser::parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name)
3689 {
3690   hop = NULL;
3691   const token* t = expect_ident_or_atword (name);
3692   if (name == "@hist_linear" || name == "@hist_log")
3693     {
3694       hop = new hist_op;
3695       if (name == "@hist_linear")
3696         hop->htype = hist_linear;
3697       else if (name == "@hist_log")
3698         hop->htype = hist_log;
3699       hop->tok = t;
3700       expect_op("(");
3701       hop->stat = parse_expression ();
3702       int64_t tnum;
3703       if (hop->htype == hist_linear)
3704         {
3705           for (size_t i = 0; i < 3; ++i)
3706             {
3707               expect_op (",");
3708               expect_number (tnum);
3709               hop->params.push_back (tnum);
3710             }
3711         }
3712       expect_op(")");
3713     }
3714   return t;
3715 }
3716
3717
3718 indexable*
3719 parser::parse_indexable ()
3720 {
3721   hist_op *hop = NULL;
3722   interned_string name;
3723   const token *tok = parse_hist_op_or_bare_name(hop, name);
3724   if (hop)
3725     return hop;
3726   else
3727     {
3728       symbol* sym = new symbol;
3729       sym->name = name;
3730       sym->tok = tok;
3731       return sym;
3732     }
3733 }
3734
3735
3736 // var, indexable[index], func(parms), printf("...", ...),
3737 // @defined, @entry, @stat_op(stat)
3738 expression* parser::parse_symbol ()
3739 {
3740   hist_op *hop = NULL;
3741   symbol *sym = NULL;
3742   interned_string name;
3743   const token *t = parse_hist_op_or_bare_name(hop, name);
3744
3745   if (!hop)
3746     {
3747       // If we didn't get a hist_op, then we did get an identifier. We can
3748       // now scrutinize this identifier for the various magic forms of identifier
3749       // (printf, @stat_op...)
3750
3751       // NB: PR11343: @defined() is not incompatible with earlier versions
3752       // of stap, so no need to check session.compatible for 1.2
3753       if (name == "@defined")
3754         return parse_defined_op (t);
3755
3756       if (name == "@entry")
3757         return parse_entry_op (t);
3758
3759       if (name == "@perf")
3760         return parse_perf_op (t);
3761
3762       if (name.size() > 0 && name[0] == '@')
3763         {
3764           stat_op *sop = new stat_op;
3765           if (name == "@avg")
3766             sop->ctype = sc_average;
3767           else if (name == "@count")
3768             sop->ctype = sc_count;
3769           else if (name == "@sum")
3770             sop->ctype = sc_sum;
3771           else if (name == "@min")
3772             sop->ctype = sc_min;
3773           else if (name == "@max")
3774             sop->ctype = sc_max;
3775           else
3776             throw PARSE_ERROR(_F("unknown operator %s",
3777                                  name.to_string().c_str()));
3778           expect_op("(");
3779           sop->tok = t;
3780           sop->stat = parse_expression ();
3781           expect_op(")");
3782           return sop;
3783         }
3784
3785       else if (print_format *fmt = print_format::create(t))
3786         {
3787           expect_op("(");
3788           if ((name == "print" || name == "println" ||
3789                name == "sprint" || name == "sprintln") &&
3790               (peek_op("@hist_linear") || peek_op("@hist_log")))
3791             {
3792               // We have a special case where we recognize
3793               // print(@hist_foo(bar)) as a magic print-the-histogram
3794               // construct. This is sort of gross but it avoids
3795               // promoting histogram references to typeful
3796               // expressions.
3797
3798               hop = NULL;
3799               t = parse_hist_op_or_bare_name(hop, name);
3800               assert(hop);
3801
3802               // It is, sadly, possible that even while parsing a
3803               // hist_op, we *mis-guessed* and the user wishes to
3804               // print(@hist_op(foo)[bucket]), a scalar. In that case
3805               // we must parse the arrayindex and print an expression.
3806               //
3807               // XXX: This still fails if the arrayindex is part of a
3808               // larger expression.  To really handle everything, we'd
3809               // need to push back all the hist tokens start over.
3810
3811               if (!peek_op ("["))
3812                 fmt->hist = hop;
3813               else
3814                 {
3815                   // This is simplified version of the
3816                   // multi-array-index parser below, because we can
3817                   // only ever have one index on a histogram anyways.
3818                   expect_op("[");
3819                   struct arrayindex* ai = new arrayindex;
3820                   ai->tok = t;
3821                   ai->base = hop;
3822                   ai->indexes.push_back (parse_expression ());
3823                   expect_op("]");
3824                   fmt->args.push_back(ai);
3825
3826                   // Consume any subsequent arguments.
3827                   while (!peek_op (")"))
3828                     {
3829                       expect_op(",");
3830                       expression *e = parse_expression ();
3831                       fmt->args.push_back(e);
3832                     }
3833                 }
3834             }
3835           else
3836             {
3837               int min_args = 0;
3838               bool consumed_arg = false;
3839               if (fmt->print_with_format)
3840                 {
3841                   // Consume and convert a format string. Agreement between the
3842                   // format string and the arguments is postponed to the
3843                   // typechecking phase.
3844                   literal_string* ls = parse_literal_string();
3845                   fmt->raw_components = ls->value;
3846                   delete ls;
3847                   fmt->components = print_format::string_to_components (fmt->raw_components);
3848                   consumed_arg = true;
3849                 }
3850               else if (fmt->print_with_delim)
3851                 {
3852                   // Consume a delimiter to separate arguments.
3853                   literal_string* ls = parse_literal_string();
3854                   fmt->delimiter = ls->value;
3855                   delete ls;
3856                   consumed_arg = true;
3857                   min_args = 2; // so that the delim is used at least once
3858                 }
3859               else if (!fmt->print_with_newline)
3860                 {
3861                   // If we are not printing with a format string, nor with a
3862                   // delim, nor with a newline, then it's either print() or
3863                   // sprint(), both of which require at least one argument (of
3864                   // any type).
3865                   min_args = 1;
3866                 }
3867
3868               // Consume any subsequent arguments.
3869               while (min_args || !peek_op (")"))
3870                 {
3871                   if (consumed_arg)
3872                     expect_op(",");
3873                   expression *e = parse_expression ();
3874                   fmt->args.push_back(e);
3875                   consumed_arg = true;
3876                   if (min_args)
3877                     --min_args;
3878                 }
3879             }
3880           expect_op(")");
3881           return fmt;
3882         }
3883
3884       else if (peek_op ("(")) // function call
3885         {
3886           swallow ();
3887           struct functioncall* f = new functioncall;
3888           f->tok = t;
3889           f->function = name;
3890           // Allow empty actual parameter list
3891           if (peek_op (")"))
3892             {
3893               swallow ();
3894               return f;
3895             }
3896           while (1)
3897             {
3898               f->args.push_back (parse_expression ());
3899               if (peek_op (")"))
3900                 {
3901                   swallow ();
3902                   break;
3903                 }
3904               else if (peek_op (","))
3905                 {
3906                   swallow ();
3907                   continue;
3908                 }
3909               else
3910                 throw PARSE_ERROR (_("expected ',' or ')'"));
3911             }
3912           return f;
3913         }
3914
3915       else
3916         {
3917           sym = new symbol;
3918           sym->name = name;
3919           sym->tok = t;
3920         }
3921     }
3922
3923   // By now, either we had a hist_op in the first place, or else
3924   // we had a plain word and it was converted to a symbol.
3925
3926   assert (!hop != !sym); // logical XOR
3927
3928   // All that remains is to check for array indexing
3929
3930   if (peek_op ("[")) // array
3931     {
3932       swallow ();
3933       struct arrayindex* ai = new arrayindex;
3934       ai->tok = t;
3935
3936       if (hop)
3937         ai->base = hop;
3938       else
3939         ai->base = sym;
3940
3941       while (1)
3942         {
3943           if (peek_op("*"))
3944             {
3945               swallow();
3946               ai->indexes.push_back (NULL);
3947             }
3948           else
3949             ai->indexes.push_back (parse_expression ());
3950           if (peek_op ("]"))
3951             {
3952               swallow ();
3953               break;
3954             }
3955           else if (peek_op (","))
3956             {
3957               swallow ();
3958               continue;
3959             }
3960           else
3961             throw PARSE_ERROR (_("expected ',' or ']'"));
3962         }
3963
3964       return ai;
3965     }
3966
3967   // If we got to here, we *should* have a symbol; if we have
3968   // a hist_op on its own, it doesn't count as an expression,
3969   // so we throw a parse error.
3970
3971   if (hop)
3972     throw PARSE_ERROR(_("base histogram operator where expression expected"), t);
3973
3974   return sym;
3975 }
3976
3977 // Parse a $var.
3978 target_symbol* parser::parse_target_symbol ()
3979 {
3980   const token* t = next ();
3981   if (t->type == tok_identifier && t->content[0]=='$')
3982     {
3983       // target_symbol time
3984       target_symbol *tsym = new target_symbol;
3985       tsym->tok = t;
3986       tsym->name = t->content;
3987       return tsym;
3988     }
3989
3990   throw PARSE_ERROR (_("expected $var"));
3991 }
3992
3993
3994 // Parse a @cast.
3995 cast_op* parser::parse_cast_op ()
3996 {
3997   const token* t = next ();
3998   if (t->type == tok_operator && t->content == "@cast")
3999     {
4000       cast_op *cop = new cast_op;
4001       cop->tok = t;
4002       cop->name = t->content;
4003       expect_op("(");
4004       cop->operand = parse_expression ();
4005       expect_op(",");
4006       expect_unknown(tok_string, cop->type_name);
4007       if (cop->type_name.empty())
4008         throw PARSE_ERROR (_("expected non-empty string"));
4009       if (peek_op (","))
4010         {
4011           swallow ();
4012           expect_unknown(tok_string, cop->module);
4013         }
4014       expect_op(")");
4015       return cop;
4016     }
4017
4018   throw PARSE_ERROR (_("expected @cast"));
4019 }
4020
4021
4022 // Parse a @var.
4023 atvar_op* parser::parse_atvar_op ()
4024 {
4025   const token* t = next ();
4026   if (t->type == tok_operator && t->content == "@var")
4027     {
4028       atvar_op *aop = new atvar_op;
4029       aop->tok = t;
4030       aop->name = t->content;
4031       expect_op("(");
4032       expect_unknown(tok_string, aop->target_name);
4033       size_t found_at = aop->target_name.find("@");
4034       if (found_at != string::npos)
4035         aop->cu_name = aop->target_name.substr(found_at + 1);
4036       else
4037         aop->cu_name = "";
4038       if (peek_op (","))
4039         {
4040           swallow ();
4041           expect_unknown (tok_string, aop->module);
4042         }
4043       else
4044         aop->module = "";
4045       expect_op(")");
4046       return aop;
4047     }
4048
4049   throw PARSE_ERROR (_("expected @var"));
4050 }
4051
4052
4053 // Parse a @defined().  Given head token has already been consumed.
4054 expression* parser::parse_defined_op (const token* t)
4055 {
4056   defined_op* dop = new defined_op;
4057   dop->tok = t;
4058   expect_op("(");
4059   dop->operand = parse_expression ();
4060   expect_op(")");
4061   return dop;
4062 }
4063
4064
4065 // Parse a @entry().  Given head token has already been consumed.
4066 expression* parser::parse_entry_op (const token* t)
4067 {
4068   entry_op* eop = new entry_op;
4069   eop->tok = t;
4070   expect_op("(");
4071   eop->operand = parse_expression ();
4072   expect_op(")");
4073   return eop;
4074 }
4075
4076
4077 // Parse a @perf().  Given head token has already been consumed.
4078 expression* parser::parse_perf_op (const token* t)
4079 {
4080   perf_op* pop = new perf_op;
4081   pop->tok = t;
4082   expect_op("(");
4083   pop->operand = parse_literal_string ();
4084   if (pop->operand->value == "")
4085     throw PARSE_ERROR (_("expected non-empty string"));
4086   expect_op(")");
4087   return pop;
4088 }
4089
4090
4091 bool
4092 parser::peek_target_symbol_components ()
4093 {
4094   const token * t = peek ();
4095   return t &&
4096     ((t->type == tok_operator && (t->content == "->" || t->content == "["))
4097      || (t->type == tok_identifier &&
4098          t->content.find_first_not_of('$') == string::npos));
4099 }
4100
4101 void
4102 parser::parse_target_symbol_components (target_symbol* e)
4103 {
4104   bool pprint = false;
4105
4106   // check for pretty-print in the form $foo$
4107   string base = e->name;
4108   size_t pprint_pos = base.find_last_not_of('$');
4109   if (0 < pprint_pos && pprint_pos < base.length() - 1)
4110     {
4111       string pprint_val = base.substr(pprint_pos + 1);
4112       base.erase(pprint_pos + 1);
4113       e->name = base;
4114       e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
4115       pprint = true;
4116     }
4117
4118   while (!pprint)
4119     {
4120       if (peek_op ("->"))
4121         {
4122           const token* t = next();
4123           interned_string member;
4124           expect_ident_or_keyword (member);
4125
4126           // check for pretty-print in the form $foo->$ or $foo->bar$
4127           pprint_pos = member.find_last_not_of('$');
4128           interned_string pprint_val;
4129           if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
4130             {
4131               pprint_val = member.substr(pprint_pos + 1);
4132               member = member.substr(0, pprint_pos + 1);
4133               pprint = true;
4134             }
4135
4136           if (!member.empty())
4137             e->components.push_back (target_symbol::component(t, member));
4138           if (pprint)
4139             e->components.push_back (target_symbol::component(t, pprint_val, true));
4140         }
4141       else if (peek_op ("["))
4142         {
4143           const token* t = next();
4144           expression* index = parse_expression();
4145           literal_number* ln = dynamic_cast<literal_number*>(index);
4146           if (ln)
4147             e->components.push_back (target_symbol::component(t, ln->value));
4148           else
4149             e->components.push_back (target_symbol::component(t, index));
4150           expect_op ("]");
4151         }
4152       else
4153         break;
4154     }
4155
4156   if (!pprint)
4157     {
4158       // check for pretty-print in the form $foo $
4159       // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
4160       const token* t = peek();
4161       if (t != NULL && t->type == tok_identifier &&
4162           t->content.find_first_not_of('$') == string::npos)
4163         {
4164           t = next();
4165           e->components.push_back (target_symbol::component(t, t->content, true));
4166           pprint = true;
4167         }
4168     }
4169
4170   if (pprint && (peek_op ("->") || peek_op("[")))
4171     throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
4172 }
4173
4174 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */