parse.cxx

   1 // recursive descent parser for systemtap scripts
   2 // Copyright (C) 2005-2014 Red Hat Inc.
   3 // Copyright (C) 2006 Intel Corporation.
   4 // Copyright (C) 2007 Bull S.A.S
   5 // Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
   6 //
   7 // This file is part of systemtap, and is free software.  You can
   8 // redistribute it and/or modify it under the terms of the GNU General
   9 // Public License (GPL); either version 2, or (at your option) any
  10 // later version.
  11
  12 #include "config.h"
  13 #include "staptree.h"
  14 #include "parse.h"
  15 #include "session.h"
  16 #include "util.h"
  17
  18 #include <iostream>
  19
  20 #include <fstream>
  21 #include <cctype>
  22 #include <cstdlib>
  23 #include <cassert>
  24 #include <cerrno>
  25 #include <climits>
  26 #include <sstream>
  27 #include <cstring>
  28 #include <cctype>
  29 #include <iterator>
  30
  31 extern "C" {
  32 #include <fnmatch.h>
  33 }
  34
  35 using namespace std;
  36
  37
  38 class lexer
  39 {
  40 public:
  41   bool ate_comment; // current token follows a comment
  42   bool ate_whitespace; // the most recent token followed whitespace
  43   bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
  44
  45   token* scan ();
  46   lexer (istream&, const string&, systemtap_session&);
  47   void set_current_file (stapfile* f);
  48   void set_current_token_chain (const token* tok);
  49
  50   static set<string> keywords;
  51   static set<string> atwords;
  52 private:
  53   inline int input_get ();
  54   inline int input_peek (unsigned n=0);
  55   void input_put (const string&, const token*);
  56   string input_name;
  57   string input_contents;
  58   const char *input_pointer; // index into input_contents
  59   const char *input_end;
  60   unsigned cursor_suspend_count;
  61   unsigned cursor_suspend_line;
  62   unsigned cursor_suspend_column;
  63   unsigned cursor_line;
  64   unsigned cursor_column;
  65   systemtap_session& session;
  66   stapfile* current_file;
  67   const token* current_token_chain;
  68 };
  69
  70
  71 class parser
  72 {
  73 public:
  74   parser (systemtap_session& s, const string& n, istream& i, bool p);
  75   ~parser ();
  76
  77   stapfile* parse (bool errs_as_warnings);
  78   probe* parse_synthetic_probe (const token* chain, bool errs_as_warnings);
  79   stapfile* parse_library_macros (bool errs_as_warnings);
  80
  81 private:
  82   typedef enum {
  83       PP_NONE,
  84       PP_KEEP_THEN,
  85       PP_SKIP_THEN,
  86       PP_KEEP_ELSE,
  87       PP_SKIP_ELSE,
  88   } pp_state_t;
  89
  90   struct pp1_activation;
  91
  92   struct pp_macrodecl : public macrodecl {
  93     pp1_activation* parent_act; // used for param bindings
  94     virtual bool is_closure() { return parent_act != 0; }
  95     pp_macrodecl () : macrodecl(), parent_act(0) { }
  96   };
  97
  98   systemtap_session& session;
  99   string input_name;
 100   lexer input;
 101   bool privileged;
 102   parse_context context;
 103
 104   // preprocessing subordinate, first pass (macros)
 105   struct pp1_activation {
 106     const token* tok;
 107     unsigned cursor; // position within macro body
 108     map<string, pp_macrodecl*> params;
 109
 110     macrodecl* curr_macro;
 111
 112     pp1_activation (const token* tok, macrodecl* curr_macro)
 113       : tok(tok), cursor(0), curr_macro(curr_macro) { }
 114     ~pp1_activation ();
 115   };
 116
 117   map<string, macrodecl*> pp1_namespace;
 118   vector<pp1_activation*> pp1_state;
 119   const token* next_pp1 ();
 120   const token* scan_pp1 ();
 121   const token* slurp_pp1_param (vector<const token*>& param);
 122   const token* slurp_pp1_body (vector<const token*>& body);
 123
 124   // preprocessing subordinate, final pass (conditionals)
 125   vector<pair<const token*, pp_state_t> > pp_state;
 126   const token* scan_pp ();
 127   const token* skip_pp ();
 128
 129   // scanning state
 130   const token* next ();
 131   const token* peek ();
 132
 133   // Advance past and throw away current token after peek () or next ().
 134   void swallow ();
 135
 136   const token* systemtap_v_seen;
 137   const token* last_t; // the last value returned by peek() or next()
 138   const token* next_t; // lookahead token
 139
 140   // expectations, these swallow the token
 141   void expect_known (token_type tt, string const & expected);
 142   void expect_unknown (token_type tt, string & target);
 143   void expect_unknown2 (token_type tt1, token_type tt2, string & target);
 144
 145   // convenience forms, these also swallow the token
 146   void expect_op (string const & expected);
 147   void expect_kw (string const & expected);
 148   void expect_number (int64_t & expected);
 149   void expect_ident_or_keyword (string & target);
 150
 151   // convenience forms, which return true or false, these don't swallow token
 152   bool peek_op (string const & op);
 153   bool peek_kw (string const & kw);
 154
 155   // convenience forms, which return the token
 156   const token* expect_kw_token (string const & expected);
 157   const token* expect_ident_or_atword (string & target);
 158
 159   void print_error (const parse_error& pe, bool errs_as_warnings = false);
 160   unsigned num_errors;
 161
 162 private: // nonterminals
 163   void parse_probe (vector<probe*>&, vector<probe_alias*>&);
 164   void parse_global (vector<vardecl*>&, vector<probe*>&);
 165   void parse_functiondecl (vector<functiondecl*>&);
 166   embeddedcode* parse_embeddedcode ();
 167   probe_point* parse_probe_point ();
 168   literal_string* consume_string_literals (const token*);
 169   literal_string* parse_literal_string ();
 170   literal* parse_literal ();
 171   block* parse_stmt_block ();
 172   try_block* parse_try_block ();
 173   statement* parse_statement ();
 174   if_statement* parse_if_statement ();
 175   for_loop* parse_for_loop ();
 176   for_loop* parse_while_loop ();
 177   foreach_loop* parse_foreach_loop ();
 178   expr_statement* parse_expr_statement ();
 179   return_statement* parse_return_statement ();
 180   delete_statement* parse_delete_statement ();
 181   next_statement* parse_next_statement ();
 182   break_statement* parse_break_statement ();
 183   continue_statement* parse_continue_statement ();
 184   indexable* parse_indexable ();
 185   const token *parse_hist_op_or_bare_name (hist_op *&hop, string &name);
 186   target_symbol *parse_target_symbol (const token* t);
 187   expression* parse_entry_op (const token* t);
 188   expression* parse_defined_op (const token* t);
 189   expression* parse_perf_op (const token* t);
 190   expression* parse_expression ();
 191   expression* parse_assignment ();
 192   expression* parse_ternary ();
 193   expression* parse_logical_or ();
 194   expression* parse_logical_and ();
 195   expression* parse_boolean_or ();
 196   expression* parse_boolean_xor ();
 197   expression* parse_boolean_and ();
 198   expression* parse_array_in ();
 199   expression* parse_comparison_or_regex_query ();
 200   expression* parse_shift ();
 201   expression* parse_concatenation ();
 202   expression* parse_additive ();
 203   expression* parse_multiplicative ();
 204   expression* parse_unary ();
 205   expression* parse_crement ();
 206   expression* parse_value ();
 207   expression* parse_symbol ();
 208
 209   void parse_target_symbol_components (target_symbol* e);
 210 };
 211
 212
 213 // ------------------------------------------------------------------------
 214
 215 stapfile*
 216 parse (systemtap_session& s, istream& i, bool pr, bool errs_as_warnings)
 217 {
 218   parser p (s, "<input>", i, pr);
 219   return p.parse (errs_as_warnings);
 220 }
 221
 222
 223 stapfile*
 224 parse (systemtap_session& s, const string& name, bool pr, bool errs_as_warnings)
 225 {
 226   ifstream i(name.c_str(), ios::in);
 227   if (i.fail())
 228     {
 229       cerr << (file_exists(name)
 230                ? _F("Input file '%s' can't be opened for reading.", name.c_str())
 231                : _F("Input file '%s' is missing.", name.c_str()))
 232            << endl;
 233       return 0;
 234     }
 235
 236   parser p (s, name, i, pr);
 237   return p.parse (errs_as_warnings);
 238 }
 239
 240 stapfile*
 241 parse_library_macros (systemtap_session& s, const string& name, bool errs_as_warnings)
 242 {
 243   ifstream i(name.c_str(), ios::in);
 244   if (i.fail())
 245     {
 246       cerr << (file_exists(name)
 247                ? _F("Input file '%s' can't be opened for reading.", name.c_str())
 248                : _F("Input file '%s' is missing.", name.c_str()))
 249            << endl;
 250       return 0;
 251     }
 252
 253   parser p (s, name, i, false); // TODOXX pr is ...? should path be full??
 254   return p.parse_library_macros (errs_as_warnings);
 255 }
 256
 257 probe*
 258 parse_synthetic_probe (systemtap_session &s, std::istream& i, const token* tok)
 259 {
 260   parser p (s, "<synthetic>", i, false);
 261   return p.parse_synthetic_probe (tok, false);
 262 }
 263
 264 // ------------------------------------------------------------------------
 265
 266
 267 parser::parser (systemtap_session& s, const string &n, istream& i, bool p):
 268   session (s), input_name (n), input (i, input_name, s), privileged (p),
 269   context(con_unknown), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
 270 {
 271 }
 272
 273 parser::~parser()
 274 {
 275 }
 276
 277 static string
 278 tt2str(token_type tt)
 279 {
 280   switch (tt)
 281     {
 282     case tok_junk: return "junk";
 283     case tok_identifier: return "identifier";
 284     case tok_operator: return "operator";
 285     case tok_string: return "string";
 286     case tok_number: return "number";
 287     case tok_embedded: return "embedded-code";
 288     case tok_keyword: return "keyword";
 289     }
 290   return "unknown token";
 291 }
 292
 293 ostream&
 294 operator << (ostream& o, const source_loc& loc)
 295 {
 296   o << loc.file->name << ":"
 297     << loc.line << ":"
 298     << loc.column;
 299
 300   return o;
 301 }
 302
 303 ostream&
 304 operator << (ostream& o, const token& t)
 305 {
 306   o << tt2str(t.type);
 307
 308   if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
 309     {
 310       o << " '";
 311       for (unsigned i=0; i<t.content.length(); i++)
 312         {
 313           char c = t.content[i];
 314           o << (isprint (c) ? c : '?');
 315         }
 316       o << "'";
 317     }
 318
 319   o << " at "
 320     << t.location;
 321
 322   return o;
 323 }
 324
 325
 326 void
 327 parser::print_error  (const parse_error &pe, bool errs_as_warnings)
 328 {
 329   const token *tok = pe.tok ? pe.tok : last_t;
 330   session.print_error(pe, tok, input_name, errs_as_warnings);
 331   num_errors ++;
 332 }
 333
 334
 335
 336
 337 template <typename OPERAND>
 338 bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
 339 {
 340   if (op->type == tok_operator && op->content == "<=")
 341     { return lhs <= rhs; }
 342   else if (op->type == tok_operator && op->content == ">=")
 343     { return lhs >= rhs; }
 344   else if (op->type == tok_operator && op->content == "<")
 345     { return lhs < rhs; }
 346   else if (op->type == tok_operator && op->content == ">")
 347     { return lhs > rhs; }
 348   else if (op->type == tok_operator && op->content == "==")
 349     { return lhs == rhs; }
 350   else if (op->type == tok_operator && op->content == "!=")
 351     { return lhs != rhs; }
 352   else
 353     throw PARSE_ERROR (_("expected comparison operator"), op);
 354 }
 355
 356
 357 // Here, we perform on-the-fly preprocessing in two passes.
 358
 359 // First pass - macro declaration and expansion.
 360 //
 361 // The basic form of a declaration is @define SIGNATURE %( BODY %)
 362 // where SIGNATURE is of the form macro_name (a, b, c, ...)
 363 // and BODY can obtain the parameter contents as @a, @b, @c, ....
 364 // Note that parameterless macros can also be declared.
 365 //
 366 // Macro definitions may not be nested.
 367 // A macro is available textually after it has been defined.
 368 //
 369 // The basic form of a macro invocation
 370 //   for a parameterless macro is @macro_name,
 371 //   for a macro with parameters is @macro_name(param_1, param_2, ...).
 372 //
 373 // NB: this means that a parameterless macro @foo called as @foo(a, b, c)
 374 // leaves its 'parameters' alone, rather than consuming them to result
 375 // in a "too many parameters error". This may be useful in the unusual
 376 // case of wanting @foo to expand to the name of a function.
 377 //
 378 // Invocations of unknown macros are left unexpanded, to allow
 379 // the continued use of constructs such as @cast, @var, etc.
 380
 381 macrodecl::~macrodecl ()
 382 {
 383   delete tok;
 384   for (vector<const token*>::iterator it = body.begin();
 385        it != body.end(); it++)
 386     delete *it;
 387 }
 388
 389 parser::pp1_activation::~pp1_activation ()
 390 {
 391   delete tok;
 392   if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
 393   for (map<string, pp_macrodecl*>::iterator it = params.begin();
 394        it != params.end(); it++)
 395     delete it->second;
 396 }
 397
 398 // Grab a token from the current input source (main file or macro body):
 399 const token*
 400 parser::next_pp1 ()
 401 {
 402   if (pp1_state.empty())
 403     return input.scan ();
 404
 405   // otherwise, we're inside a macro
 406   pp1_activation* act = pp1_state.back();
 407   unsigned& cursor = act->cursor;
 408   if (cursor < act->curr_macro->body.size())
 409     {
 410       token* t = new token(*act->curr_macro->body[cursor]);
 411       t->chain = new token(*act->tok); // mark chained token
 412       cursor++;
 413       return t;
 414     }
 415   else
 416     return 0; // reached end of macro body
 417 }
 418
 419 const token*
 420 parser::scan_pp1 ()
 421 {
 422   while (true)
 423     {
 424       const token* t = next_pp1 ();
 425       if (t == 0) // EOF or end of macro body
 426         {
 427           if (pp1_state.empty()) // actual EOF
 428             return 0;
 429
 430           // Exit macro and loop around to look for the next token.
 431           pp1_activation* act = pp1_state.back();
 432           pp1_state.pop_back(); delete act;
 433           continue;
 434         }
 435
 436       // macro definition
 437       if (t->type == tok_operator && t->content == "@define")
 438         {
 439           if (!pp1_state.empty())
 440             throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t);
 441           delete t;
 442
 443           // handle macro definition
 444           // (1) consume macro signature
 445           t = input.scan();
 446           if (! (t && t->type == tok_identifier))
 447             throw PARSE_ERROR (_("expected identifier"), t);
 448           string name = t->content;
 449
 450           // check for redefinition of existing macro
 451           if (pp1_namespace.find(name) != pp1_namespace.end())
 452             {
 453               parse_error er (ERR_SRC, _F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
 454
 455               // Also point to pp1_namespace[name]->tok, the site of
 456               // the original definition:
 457               er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here",
 458                                              name.c_str()), pp1_namespace[name]->tok);
 459               throw er;
 460             }
 461
 462           // XXX: the above restriction was mostly necessary due to
 463           // wanting to leave open the possibility of
 464           // statically-scoped semantics in the future.
 465
 466           // XXX: this cascades into further parse errors as the
 467           // parser tries to parse the remaining definition... (e.g.
 468           // it can't tell that the macro body isn't a conditional,
 469           // that the uses of parameters aren't nonexistent
 470           // macros.....)
 471           if (name == "define")
 472             throw PARSE_ERROR (_("attempt to redefine '@define'"), t);
 473           if (input.atwords.count("@" + name))
 474             session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
 475
 476           macrodecl* decl = (pp1_namespace[name] = new macrodecl);
 477           decl->tok = t;
 478
 479           // determine if the macro takes parameters
 480           bool saw_params = false;
 481           t = input.scan();
 482           if (t && t->type == tok_operator && t->content == "(")
 483             {
 484               saw_params = true;
 485               do
 486                 {
 487                   delete t;
 488
 489                   t = input.scan ();
 490                   if (! (t && t->type == tok_identifier))
 491                     throw PARSE_ERROR(_("expected identifier"), t);
 492                   decl->formal_args.push_back(t->content);
 493                   delete t;
 494
 495                   t = input.scan ();
 496                   if (t && t->type == tok_operator && t->content == ",")
 497                     {
 498                       continue;
 499                     }
 500                   else if (t && t->type == tok_operator && t->content == ")")
 501                     {
 502                       delete t;
 503                       t = input.scan();
 504                       break;
 505                     }
 506                   else
 507                     {
 508                       throw PARSE_ERROR (_("expected ',' or ')'"), t);
 509                     }
 510                 }
 511               while (true);
 512             }
 513
 514           // (2) identify & consume macro body
 515           if (! (t && t->type == tok_operator && t->content == "%("))
 516             {
 517               if (saw_params)
 518                 throw PARSE_ERROR (_("expected '%('"), t);
 519               else
 520                 throw PARSE_ERROR (_("expected '%(' or '('"), t);
 521             }
 522           delete t;
 523
 524           t = slurp_pp1_body (decl->body);
 525           if (!t)
 526             throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl->tok);
 527           delete t;
 528
 529           // Now loop around to look for a real token.
 530           continue;
 531         }
 532
 533       // (potential) macro invocation
 534       if (t->type == tok_operator && t->content[0] == '@')
 535         {
 536           string name = t->content.substr(1); // strip initial '@'
 537
 538           // check if name refers to a real parameter or macro
 539           macrodecl* decl;
 540           pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
 541           if (act && act->params.find(name) != act->params.end())
 542             decl = act->params[name];
 543           else if (!(act && act->curr_macro->context == ctx_library)
 544                    && pp1_namespace.find(name) != pp1_namespace.end())
 545             decl = pp1_namespace[name];
 546           else if (session.library_macros.find(name)
 547                    != session.library_macros.end())
 548             decl = session.library_macros[name];
 549           else // this is an ordinary @operator
 550             return t;
 551
 552           // handle macro invocation, taking ownership of t
 553           pp1_activation *new_act = new pp1_activation(t, decl);
 554           unsigned num_params = decl->formal_args.size();
 555
 556           // (1a) restore parameter invocation closure
 557           if (num_params == 0 && decl->is_closure())
 558             {
 559               // NB: decl->parent_act is always safe since the
 560               // parameter decl (if any) comes from an activation
 561               // record which deeper in the stack than new_act.
 562
 563               // decl is a macro parameter which must be evaluated in
 564               // the context of the original point of invocation:
 565               new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
 566               goto expand;
 567             }
 568
 569           // (1b) consume macro parameters (if any)
 570           if (num_params == 0)
 571             goto expand;
 572
 573           // for simplicity, we do not allow macro constructs here
 574           // -- if we did, we'd have to recursively call scan_pp1()
 575           t = next_pp1 ();
 576           if (! (t && t->type == tok_operator && t->content == "("))
 577             {
 578               delete new_act;
 579               throw PARSE_ERROR (_NF
 580                                     ("expected '(' in invocation of macro '@%s'"
 581                                      " taking %d parameter",
 582                                      "expected '(' in invocation of macro '@%s'"
 583                                      " taking %d parameters",
 584                                      num_params, name.c_str(), num_params), t);
 585             }
 586
 587           // XXX perhaps parse/count the full number of params,
 588           // so we can say "expected x, found y params" on error?
 589           for (unsigned i = 0; i < num_params; i++)
 590             {
 591               delete t;
 592
 593               // create parameter closure
 594               string param_name = decl->formal_args[i];
 595               pp_macrodecl* p = (new_act->params[param_name]
 596                                  = new pp_macrodecl);
 597               p->tok = new token(*new_act->tok);
 598               p->parent_act = act;
 599               // NB: *new_act->tok points to invocation, act is NULL at top level
 600
 601               t = slurp_pp1_param (p->body);
 602
 603               // check correct usage of ',' or ')'
 604               if (t == 0) // hit unexpected EOF or end of macro
 605                 {
 606                   // XXX could we pop the stack and continue parsing
 607                   // the invocation, allowing macros to construct new
 608                   // invocations in piecemeal fashion??
 609                   const token* orig_t = new token(*new_act->tok);
 610                   delete new_act;
 611                   throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t);
 612                 }
 613               if (t->type == tok_operator && t->content == ",")
 614                 {
 615                   if (i + 1 == num_params)
 616                     {
 617                       delete new_act;
 618                       throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
 619                     }
 620                 }
 621               else if (t->type == tok_operator && t->content == ")")
 622                 {
 623                   if (i + 1 != num_params)
 624                     {
 625                       delete new_act;
 626                       throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
 627                     }
 628                 }
 629               else
 630                 {
 631                   // XXX this is, incidentally, impossible
 632                   delete new_act;
 633                   throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t);
 634                 }
 635             }
 636
 637           delete t;
 638
 639           // (2) set up macro expansion
 640         expand:
 641           pp1_state.push_back (new_act);
 642
 643           // Now loop around to look for a real token.
 644           continue;
 645         }
 646
 647       // Otherwise, we have an ordinary token.
 648       return t;
 649     }
 650 }
 651
 652 // Consume a single macro invocation's parameters, heeding nested ( )
 653 // brackets and stopping on an unbalanced ')' or an unbracketed ','
 654 // (and returning the final separator token).
 655 const token*
 656 parser::slurp_pp1_param (vector<const token*>& param)
 657 {
 658   const token* t = 0;
 659   unsigned nesting = 0;
 660   do
 661     {
 662       t = next_pp1 ();
 663
 664       if (!t)
 665         break;
 666       if (t->type == tok_operator && t->content == "(")
 667         ++nesting;
 668       else if (nesting && t->type == tok_operator && t->content == ")")
 669         --nesting;
 670       else if (!nesting && t->type == tok_operator
 671                && (t->content == ")" || t->content == ","))
 672         break;
 673       param.push_back(t);
 674     }
 675   while (true);
 676   return t; // report ")" or "," or NULL
 677 }
 678
 679
 680 // Consume a macro declaration's body, heeding nested %( %) brackets.
 681 const token*
 682 parser::slurp_pp1_body (vector<const token*>& body)
 683 {
 684   const token* t = 0;
 685   unsigned nesting = 0;
 686   do
 687     {
 688       t = next_pp1 ();
 689
 690       if (!t)
 691         break;
 692       if (t->type == tok_operator && t->content == "%(")
 693         ++nesting;
 694       else if (nesting && t->type == tok_operator && t->content == "%)")
 695         --nesting;
 696       else if (!nesting && t->type == tok_operator && t->content == "%)")
 697         break;
 698       body.push_back(t);
 699     }
 700   while (true);
 701   return t; // report final "%)" or NULL
 702 }
 703
 704 // Used for parsing .stpm files.
 705 stapfile*
 706 parser::parse_library_macros (bool errs_as_warnings)
 707 {
 708   stapfile* f = new stapfile;
 709   input.set_current_file (f);
 710
 711   try
 712     {
 713       const token* t = scan_pp1 ();
 714
 715       // Currently we only take objection to macro invocations if they
 716       // produce a non-whitespace token after being expanded.
 717
 718       // XXX should we prevent macro invocations even if they expand to empty??
 719
 720       if (t != 0)
 721         throw PARSE_ERROR (_F("library macro file '%s' contains non-@define construct", input_name.c_str()), t);
 722
 723       // We need to first check whether *any* of the macros are duplicates,
 724       // then commit to including the entire file in the global namespace
 725       // (or not). Yuck.
 726       for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
 727            it != pp1_namespace.end(); it++)
 728         {
 729           string name = it->first;
 730
 731           if (session.library_macros.find(name) != session.library_macros.end())
 732             {
 733               parse_error er(ERR_SRC, _F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
 734               er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
 735               print_error (er);
 736
 737               delete er.chain;
 738               delete f;
 739               return 0;
 740             }
 741         }
 742
 743     }
 744   catch (const parse_error& pe)
 745     {
 746       print_error (pe, errs_as_warnings);
 747       delete f;
 748       return 0;
 749     }
 750
 751   // If no errors, include the entire file.  Note how this is outside
 752   // of the try-catch block -- no errors possible.
 753   for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
 754        it != pp1_namespace.end(); it++)
 755     {
 756       string name = it->first;
 757
 758       session.library_macros[name] = it->second;
 759       session.library_macros[name]->context = ctx_library;
 760     }
 761
 762   return f;
 763 }
 764
 765 // Second pass - preprocessor conditional expansion.
 766 //
 767 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
 768 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
 769 //                 or: arch COMPARISON-OP "arch-string"
 770 //                 or: systemtap_v COMPARISON-OP "version-string"
 771 //                 or: systemtap_privilege COMPARISON-OP "privilege-string"
 772 //                 or: CONFIG_foo COMPARISON-OP "config-string"
 773 //                 or: CONFIG_foo COMPARISON-OP number
 774 //                 or: CONFIG_foo COMPARISON-OP CONFIG_bar
 775 //                 or: "string1" COMPARISON-OP "string2"
 776 //                 or: number1 COMPARISON-OP number2
 777 // The %: ELSE-TOKENS part is optional.
 778 //
 779 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
 780 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
 781 // e.g. %( CONFIG_foo %? "foo" %: "baz" %)
 782 //
 783 // Up to an entire %( ... %) expression is processed by a single call
 784 // to this function.  Tokens included by any nested conditions are
 785 // enqueued in a private vector.
 786
 787 bool eval_pp_conditional (systemtap_session& s,
 788                           const token* l, const token* op, const token* r)
 789 {
 790   if (l->type == tok_identifier && (l->content == "kernel_v" ||
 791                                     l->content == "kernel_vr" ||
 792                                     l->content == "systemtap_v"))
 793     {
 794       if (! (r->type == tok_string))
 795         throw PARSE_ERROR (_("expected string literal"), r);
 796
 797       string target_kernel_vr = s.kernel_release;
 798       string target_kernel_v = s.kernel_base_release;
 799       string target;
 800
 801       if (l->content == "kernel_v") target = target_kernel_v;
 802       else if (l->content == "kernel_vr") target = target_kernel_vr;
 803       else if (l->content == "systemtap_v") target = s.compatible;
 804       else assert (0);
 805
 806       string query = r->content;
 807       bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
 808
 809       // collect acceptable strverscmp results.
 810       int rvc_ok1, rvc_ok2;
 811       bool wc_ok = false;
 812       if (op->type == tok_operator && op->content == "<=")
 813         { rvc_ok1 = -1; rvc_ok2 = 0; }
 814       else if (op->type == tok_operator && op->content == ">=")
 815         { rvc_ok1 = 1; rvc_ok2 = 0; }
 816       else if (op->type == tok_operator && op->content == "<")
 817         { rvc_ok1 = -1; rvc_ok2 = -1; }
 818       else if (op->type == tok_operator && op->content == ">")
 819         { rvc_ok1 = 1; rvc_ok2 = 1; }
 820       else if (op->type == tok_operator && op->content == "==")
 821         { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
 822       else if (op->type == tok_operator && op->content == "!=")
 823         { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
 824       else
 825         throw PARSE_ERROR (_("expected comparison operator"), op);
 826
 827       if ((!wc_ok) && rhs_wildcard)
 828         throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op);
 829
 830       if (rhs_wildcard)
 831         {
 832           int rvc_result = fnmatch (query.c_str(), target.c_str(),
 833                                     FNM_NOESCAPE); // spooky
 834           bool badness = (rvc_result == 0) ^ (op->content == "==");
 835           return !badness;
 836         }
 837       else
 838         {
 839           int rvc_result = strverscmp (target.c_str(), query.c_str());
 840           // normalize rvc_result
 841           if (rvc_result < 0) rvc_result = -1;
 842           if (rvc_result > 0) rvc_result = 1;
 843           return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
 844         }
 845     }
 846   else if (l->type == tok_identifier && l->content == "systemtap_privilege")
 847     {
 848       string target_privilege =
 849         pr_contains(s.privilege, pr_stapdev) ? "stapdev"
 850         : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
 851         : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
 852         : "none"; /* should be impossible -- s.privilege always one of above */
 853       assert(target_privilege != "none");
 854
 855       if (! (r->type == tok_string))
 856         throw PARSE_ERROR (_("expected string literal"), r);
 857       string query_privilege = r->content;
 858
 859       bool nomatch = (target_privilege != query_privilege);
 860
 861       bool result;
 862       if (op->type == tok_operator && op->content == "==")
 863         result = !nomatch;
 864       else if (op->type == tok_operator && op->content == "!=")
 865         result = nomatch;
 866       else
 867         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 868       /* XXX perhaps allow <= >= and similar comparisons */
 869
 870       return result;
 871     }
 872   else if (l->type == tok_identifier && l->content == "guru_mode")
 873     {
 874       if (! (r->type == tok_number))
 875         throw PARSE_ERROR (_("expected number"), r);
 876       int64_t lhs = (int64_t) s.guru_mode;
 877       int64_t rhs = lex_cast<int64_t>(r->content);
 878       if (!((rhs == 0)||(rhs == 1)))
 879         throw PARSE_ERROR (_("expected 0 or 1"), op);
 880       if (!((op->type == tok_operator && op->content == "==") ||
 881             (op->type == tok_operator && op->content == "!=")))
 882         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 883
 884       return eval_comparison (lhs, op, rhs);
 885     }
 886   else if (l->type == tok_identifier && l->content == "arch")
 887     {
 888       string target_architecture = s.architecture;
 889       if (! (r->type == tok_string))
 890         throw PARSE_ERROR (_("expected string literal"), r);
 891       string query_architecture = r->content;
 892
 893       int nomatch = fnmatch (query_architecture.c_str(),
 894                              target_architecture.c_str(),
 895                              FNM_NOESCAPE); // still spooky
 896
 897       bool result;
 898       if (op->type == tok_operator && op->content == "==")
 899         result = !nomatch;
 900       else if (op->type == tok_operator && op->content == "!=")
 901         result = nomatch;
 902       else
 903         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 904
 905       return result;
 906     }
 907   else if (l->type == tok_identifier && l->content == "runtime")
 908     {
 909       if (! (r->type == tok_string))
 910         throw PARSE_ERROR (_("expected string literal"), r);
 911
 912       string query_runtime = r->content;
 913       string target_runtime;
 914
 915       target_runtime = (s.runtime_mode == systemtap_session::dyninst_runtime
 916                         ? "dyninst" : "kernel");
 917       int nomatch = fnmatch (query_runtime.c_str(),
 918                              target_runtime.c_str(),
 919                              FNM_NOESCAPE); // still spooky
 920
 921       bool result;
 922       if (op->type == tok_operator && op->content == "==")
 923         result = !nomatch;
 924       else if (op->type == tok_operator && op->content == "!=")
 925         result = nomatch;
 926       else
 927         throw PARSE_ERROR (_("expected '==' or '!='"), op);
 928
 929       return result;
 930     }
 931   else if (l->type == tok_identifier && startswith(l->content, "CONFIG_"))
 932     {
 933       if (r->type == tok_string)
 934         {
 935           string lhs = s.kernel_config[l->content]; // may be empty
 936           string rhs = r->content;
 937
 938           int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
 939
 940           bool result;
 941           if (op->type == tok_operator && op->content == "==")
 942             result = !nomatch;
 943           else if (op->type == tok_operator && op->content == "!=")
 944             result = nomatch;
 945           else
 946             throw PARSE_ERROR (_("expected '==' or '!='"), op);
 947
 948           return result;
 949         }
 950       else if (r->type == tok_number)
 951         {
 952           const char* startp = s.kernel_config[l->content].c_str ();
 953           char* endp = (char*) startp;
 954           errno = 0;
 955           int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
 956           if (errno == ERANGE || errno == EINVAL || *endp != '\0')
 957             throw PARSE_ERROR ("Config option value not a number", l);
 958
 959           int64_t rhs = lex_cast<int64_t>(r->content);
 960           return eval_comparison (lhs, op, rhs);
 961         }
 962       else if (r->type == tok_identifier
 963                && startswith(r->content, "CONFIG_"))
 964         {
 965           // First try to convert both to numbers,
 966           // otherwise threat both as strings.
 967           const char* startp = s.kernel_config[l->content].c_str ();
 968           char* endp = (char*) startp;
 969           errno = 0;
 970           int64_t val = (int64_t) strtoll (startp, & endp, 0);
 971           if (errno != ERANGE && errno != EINVAL && *endp == '\0')
 972             {
 973               int64_t lhs = val;
 974               startp = s.kernel_config[r->content].c_str ();
 975               endp = (char*) startp;
 976               errno = 0;
 977               int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
 978               if (errno != ERANGE && errno != EINVAL && *endp == '\0')
 979                 return eval_comparison (lhs, op, rhs);
 980             }
 981
 982           string lhs = s.kernel_config[l->content];
 983           string rhs = s.kernel_config[r->content];
 984           return eval_comparison (lhs, op, rhs);
 985         }
 986       else
 987         throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r);
 988     }
 989   else if (l->type == tok_string && r->type == tok_string)
 990     {
 991       string lhs = l->content;
 992       string rhs = r->content;
 993       return eval_comparison (lhs, op, rhs);
 994       // NB: no wildcarding option here
 995     }
 996   else if (l->type == tok_number && r->type == tok_number)
 997     {
 998       int64_t lhs = lex_cast<int64_t>(l->content);
 999       int64_t rhs = lex_cast<int64_t>(r->content);
1000       return eval_comparison (lhs, op, rhs);
1001       // NB: no wildcarding option here
1002     }
1003   else if (l->type == tok_string && r->type == tok_number
1004             && op->type == tok_operator)
1005     throw PARSE_ERROR (_("expected string literal as right value"), r);
1006   else if (l->type == tok_number && r->type == tok_string
1007             && op->type == tok_operator)
1008     throw PARSE_ERROR (_("expected number literal as right value"), r);
1009
1010   else
1011     throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
1012                          "             'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1013                          "             comparison between strings or integers"), l);
1014 }
1015
1016
1017 // Only tokens corresponding to the TRUE statement must be expanded
1018 const token*
1019 parser::scan_pp ()
1020 {
1021   while (true)
1022     {
1023       pp_state_t pp = PP_NONE;
1024       if (!pp_state.empty())
1025         pp = pp_state.back().second;
1026
1027       const token* t = 0;
1028       if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1029         t = skip_pp ();
1030       else
1031         t = scan_pp1 ();
1032
1033       if (t == 0) // EOF
1034         {
1035           if (pp != PP_NONE)
1036             {
1037               t = pp_state.back().first;
1038               pp_state.pop_back(); // so skip_some doesn't keep trying to close this
1039               //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
1040               throw PARSE_ERROR (_("incomplete conditional at end of file"), t);
1041             }
1042           return t;
1043         }
1044
1045       // misplaced preprocessor "then"
1046       if (t->type == tok_operator && t->content == "%?")
1047         throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1048
1049       // preprocessor "else"
1050       if (t->type == tok_operator && t->content == "%:")
1051         {
1052           if (pp == PP_NONE)
1053             throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1054           if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
1055             throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t);
1056           // XXX: here and elsewhere, error cascades might be avoided
1057           // by dropping tokens until we reach the closing %)
1058
1059           pp_state.back().second = (pp == PP_KEEP_THEN) ?
1060                                    PP_SKIP_ELSE : PP_KEEP_ELSE;
1061           delete t;
1062           continue;
1063         }
1064
1065       // preprocessor close
1066       if (t->type == tok_operator && t->content == "%)")
1067         {
1068           if (pp == PP_NONE)
1069             throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1070           delete pp_state.back().first;
1071           delete t; //this is the closing bracket
1072           pp_state.pop_back();
1073           continue;
1074         }
1075
1076       if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1077         return t;
1078
1079       // We have a %( - it's time to throw a preprocessing party!
1080
1081       bool result = false;
1082       bool and_result = true;
1083       const token *n = NULL;
1084       do {
1085         const token *l, *op, *r;
1086         l = scan_pp1 ();
1087         op = scan_pp1 ();
1088         r = scan_pp1 ();
1089         if (l == 0 || op == 0 || r == 0)
1090           throw PARSE_ERROR (_("incomplete condition after '%('"), t);
1091         // NB: consider generalizing to consume all tokens until %?, and
1092         // passing that as a vector to an evaluator.
1093
1094         // Do not evaluate the condition if we haven't expanded everything.
1095         // This may occur when having several recursive conditionals.
1096         and_result &= eval_pp_conditional (session, l, op, r);
1097         if(l->content=="systemtap_v")
1098           systemtap_v_seen=r;
1099
1100         else
1101           delete r;
1102
1103         delete l;
1104         delete op;
1105         delete n;
1106
1107         n = scan_pp1 ();
1108         if (n && n->type == tok_operator && n->content == "&&")
1109           continue;
1110         result |= and_result;
1111         and_result = true;
1112         if (! (n && n->type == tok_operator && n->content == "||"))
1113           break;
1114       } while (true);
1115
1116       /*
1117       clog << "PP eval (" << *t << ") == " << result << endl;
1118       */
1119
1120       const token *m = n;
1121       if (! (m && m->type == tok_operator && m->content == "%?"))
1122         throw PARSE_ERROR (_("expected '%?' marker for conditional"), t);
1123       delete m; // "%?"
1124
1125       pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1126       pp_state.push_back (make_pair (t, pp));
1127
1128       // Now loop around to look for a real token.
1129     }
1130 }
1131
1132
1133 // Skip over tokens and any errors, heeding
1134 // only nested preprocessor starts and ends.
1135 const token*
1136 parser::skip_pp ()
1137 {
1138   const token* t = 0;
1139   unsigned nesting = 0;
1140   do
1141     {
1142       try
1143         {
1144           t = scan_pp1 ();
1145         }
1146       catch (const parse_error &e)
1147         {
1148           continue;
1149         }
1150       if (!t)
1151         break;
1152       if (t->type == tok_operator && t->content == "%(")
1153         ++nesting;
1154       else if (nesting && t->type == tok_operator && t->content == "%)")
1155         --nesting;
1156       else if (!nesting && t->type == tok_operator &&
1157                (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1158         break;
1159       delete t;
1160     }
1161   while (true);
1162   return t;
1163 }
1164
1165
1166 const token*
1167 parser::next ()
1168 {
1169   if (! next_t)
1170     next_t = scan_pp ();
1171   if (! next_t)
1172     throw PARSE_ERROR (_("unexpected end-of-file"));
1173
1174   last_t = next_t;
1175   // advance by zeroing next_t
1176   next_t = 0;
1177   return last_t;
1178 }
1179
1180
1181 const token*
1182 parser::peek ()
1183 {
1184   if (! next_t)
1185     next_t = scan_pp ();
1186
1187   // don't advance by zeroing next_t
1188   last_t = next_t;
1189   return next_t;
1190 }
1191
1192
1193 void
1194 parser::swallow ()
1195 {
1196   // can only swallow something last peeked or nexted token.
1197   assert (last_t != 0);
1198   delete last_t;
1199   // advance by zeroing next_t
1200   last_t = next_t = 0;
1201 }
1202
1203
1204 static inline bool
1205 tok_is(token const * t, token_type tt, string const & expected)
1206 {
1207   return t && t->type == tt && t->content == expected;
1208 }
1209
1210
1211 void
1212 parser::expect_known (token_type tt, string const & expected)
1213 {
1214   const token *t = next();
1215   if (! (t && t->type == tt && t->content == expected))
1216     throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1217   swallow (); // We are done with it, content was copied.
1218 }
1219
1220
1221 void
1222 parser::expect_unknown (token_type tt, string & target)
1223 {
1224   const token *t = next();
1225   if (!(t && t->type == tt))
1226     throw PARSE_ERROR (_("expected ") + tt2str(tt));
1227   target = t->content;
1228   swallow (); // We are done with it, content was copied.
1229 }
1230
1231
1232 void
1233 parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
1234 {
1235   const token *t = next();
1236   if (!(t && (t->type == tt1 || t->type == tt2)))
1237     throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
1238   target = t->content;
1239   swallow (); // We are done with it, content was copied.
1240 }
1241
1242
1243 void
1244 parser::expect_op (std::string const & expected)
1245 {
1246   expect_known (tok_operator, expected);
1247 }
1248
1249
1250 void
1251 parser::expect_kw (std::string const & expected)
1252 {
1253   expect_known (tok_keyword, expected);
1254 }
1255
1256 const token*
1257 parser::expect_kw_token (std::string const & expected)
1258 {
1259   const token *t = next();
1260   if (! (t && t->type == tok_keyword && t->content == expected))
1261     throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1262   return t;
1263 }
1264
1265 void
1266 parser::expect_number (int64_t & value)
1267 {
1268   bool neg = false;
1269   const token *t = next();
1270   if (t->type == tok_operator && t->content == "-")
1271     {
1272       neg = true;
1273       swallow ();
1274       t = next ();
1275     }
1276   if (!(t && t->type == tok_number))
1277     throw PARSE_ERROR (_("expected number"));
1278
1279   const char* startp = t->content.c_str ();
1280   char* endp = (char*) startp;
1281
1282   // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1283   // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1284   // since the lexer only gives us positive digit strings, but we'll
1285   // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1286   errno = 0;
1287   value = (int64_t) strtoull (startp, & endp, 0);
1288   if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1289       || (neg && (unsigned long long) value > 9223372036854775808ULL)
1290       || (unsigned long long) value > 18446744073709551615ULL
1291       || value < -9223372036854775807LL-1)
1292     throw PARSE_ERROR (_("number invalid or out of range"));
1293
1294   if (neg)
1295     value = -value;
1296
1297   swallow (); // We are done with it, content was parsed and copied into value.
1298 }
1299
1300
1301 const token*
1302 parser::expect_ident_or_atword (std::string & target)
1303 {
1304   const token *t = next();
1305
1306   // accept identifiers and operators beginning in '@':
1307   if (!t || (t->type != tok_identifier
1308              && (t->type != tok_operator || t->content[0] != '@')))
1309     // XXX currently this is only called from parse_hist_op_or_bare_name(),
1310     // so the message is accurate, but keep an eye out in the future:
1311     throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
1312
1313   target = t->content;
1314   return t;
1315 }
1316
1317
1318 void
1319 parser::expect_ident_or_keyword (std::string & target)
1320 {
1321   expect_unknown2 (tok_identifier, tok_keyword, target);
1322 }
1323
1324
1325 bool
1326 parser::peek_op (std::string const & op)
1327 {
1328   return tok_is (peek(), tok_operator, op);
1329 }
1330
1331
1332 bool
1333 parser::peek_kw (std::string const & kw)
1334 {
1335   return tok_is (peek(), tok_identifier, kw);
1336 }
1337
1338
1339
1340 lexer::lexer (istream& input, const string& in, systemtap_session& s):
1341   ate_comment(false), ate_whitespace(false), saw_tokens(false),
1342   input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1343   cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1344   cursor_column (1), session(s), current_file (0), current_token_chain (0)
1345 {
1346   getline(input, input_contents, '\0');
1347
1348   input_pointer = input_contents.data();
1349   input_end = input_contents.data() + input_contents.size();
1350
1351   if (keywords.empty())
1352     {
1353       // NB: adding new keywords is highly disruptive to the language,
1354       // in particular to existing scripts that could be suddenly
1355       // broken.  If done at all, it has to be s.compatible-sensitive,
1356       // and broadly advertised.
1357       keywords.insert("probe");
1358       keywords.insert("global");
1359       keywords.insert("function");
1360       keywords.insert("if");
1361       keywords.insert("else");
1362       keywords.insert("for");
1363       keywords.insert("foreach");
1364       keywords.insert("in");
1365       keywords.insert("limit");
1366       keywords.insert("return");
1367       keywords.insert("delete");
1368       keywords.insert("while");
1369       keywords.insert("break");
1370       keywords.insert("continue");
1371       keywords.insert("next");
1372       keywords.insert("string");
1373       keywords.insert("long");
1374       keywords.insert("try");
1375       keywords.insert("catch");
1376     }
1377
1378   if (atwords.empty())
1379     {
1380       // NB: adding new @words is mildly disruptive to existing
1381       // scripts that define macros with the same name, but not
1382       // really. The user will merely receive a warning that they are
1383       // redefining an existing operator.
1384       atwords.insert("@cast");
1385       atwords.insert("@defined");
1386       atwords.insert("@entry");
1387       atwords.insert("@perf");
1388       atwords.insert("@var");
1389       atwords.insert("@avg");
1390       atwords.insert("@count");
1391       atwords.insert("@sum");
1392       atwords.insert("@min");
1393       atwords.insert("@max");
1394       atwords.insert("@hist_linear");
1395       atwords.insert("@hist_log");
1396     }
1397 }
1398
1399 set<string> lexer::keywords;
1400 set<string> lexer::atwords;
1401
1402 void
1403 lexer::set_current_file (stapfile* f)
1404 {
1405   current_file = f;
1406   if (f)
1407     {
1408       f->file_contents = input_contents;
1409       f->name = input_name;
1410     }
1411 }
1412
1413 void
1414 lexer::set_current_token_chain (const token* tok)
1415 {
1416   current_token_chain = tok;
1417 }
1418
1419 int
1420 lexer::input_peek (unsigned n)
1421 {
1422   if (input_pointer + n >= input_end)
1423     return -1; // EOF
1424   return (unsigned char)*(input_pointer + n);
1425 }
1426
1427
1428 int
1429 lexer::input_get ()
1430 {
1431   int c = input_peek();
1432   if (c < 0) return c; // EOF
1433
1434   ++input_pointer;
1435
1436   if (cursor_suspend_count)
1437     {
1438       // Track effect of input_put: preserve previous cursor/line_column
1439       // until all of its characters are consumed.
1440       if (--cursor_suspend_count == 0)
1441         {
1442           cursor_line = cursor_suspend_line;
1443           cursor_column = cursor_suspend_column;
1444         }
1445     }
1446   else
1447     {
1448       // update source cursor
1449       if (c == '\n')
1450         {
1451           cursor_line ++;
1452           cursor_column = 1;
1453         }
1454       else
1455         cursor_column ++;
1456     }
1457
1458   // clog << "[" << (char)c << "]";
1459   return c;
1460 }
1461
1462
1463 void
1464 lexer::input_put (const string& chars, const token* t)
1465 {
1466   size_t pos = input_pointer - input_contents.data();
1467   // clog << "[put:" << chars << " @" << pos << "]";
1468   input_contents.insert (pos, chars);
1469   cursor_suspend_count += chars.size();
1470   cursor_suspend_line = cursor_line;
1471   cursor_suspend_column = cursor_column;
1472   cursor_line = t->location.line;
1473   cursor_column = t->location.column;
1474   input_pointer = input_contents.data() + pos;
1475   input_end = input_contents.data() + input_contents.size();
1476 }
1477
1478
1479 token*
1480 lexer::scan ()
1481 {
1482   ate_comment = false; // reset for each new token
1483   ate_whitespace = false; // reset for each new token
1484
1485   // XXX be very sure to restore old_saw_tokens if we return without a token:
1486   bool old_saw_tokens = saw_tokens;
1487   saw_tokens = true;
1488
1489   token* n = new token;
1490   n->location.file = current_file;
1491   n->chain = current_token_chain;
1492
1493 skip:
1494   bool suspended = (cursor_suspend_count > 0);
1495   n->location.line = cursor_line;
1496   n->location.column = cursor_column;
1497
1498   int c = input_get();
1499   // clog << "{" << (char)c << (char)c2 << "}";
1500   if (c < 0)
1501     {
1502       delete n;
1503       saw_tokens = old_saw_tokens;
1504       return 0;
1505     }
1506
1507   if (isspace (c))
1508     {
1509       ate_whitespace = true;
1510       goto skip;
1511     }
1512
1513   int c2 = input_peek ();
1514
1515   // Paste command line arguments as character streams into
1516   // the beginning of a token.  $1..$999 go through as raw
1517   // characters; @1..@999 are quoted/escaped as strings.
1518   // $# and @# expand to the number of arguments, similarly
1519   // raw or quoted.
1520   if ((c == '$' || c == '@') && (c2 == '#'))
1521     {
1522       n->content.push_back (c);
1523       n->content.push_back (c2);
1524       input_get(); // swallow '#'
1525       if (suspended)
1526         {
1527           n->make_junk(_("invalid nested substitution of command line arguments"));
1528           return n;
1529         }
1530       size_t num_args = session.args.size ();
1531       input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1532       n->content.clear();
1533       goto skip;
1534     }
1535   else if ((c == '$' || c == '@') && (isdigit (c2)))
1536     {
1537       n->content.push_back (c);
1538       unsigned idx = 0;
1539       do
1540         {
1541           input_get ();
1542           idx = (idx * 10) + (c2 - '0');
1543           n->content.push_back (c2);
1544           c2 = input_peek ();
1545         } while (c2 > 0 &&
1546                  isdigit (c2) &&
1547                  idx <= session.args.size()); // prevent overflow
1548       if (suspended)
1549         {
1550           n->make_junk(_("invalid nested substitution of command line arguments"));
1551           return n;
1552         }
1553       if (idx == 0 ||
1554           idx-1 >= session.args.size())
1555         {
1556           n->make_junk(_F("command line argument index %lu out of range [1-%lu]",
1557                           (unsigned long) idx, (unsigned long) session.args.size()));
1558           return n;
1559         }
1560       const string& arg = session.args[idx-1];
1561       input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1562       n->content.clear();
1563       goto skip;
1564     }
1565
1566   else if (isalpha (c) || c == '$' || c == '@' || c == '_')
1567     {
1568       n->type = tok_identifier;
1569       n->content = (char) c;
1570       while (isalnum (c2) || c2 == '_' || c2 == '$')
1571         {
1572           input_get ();
1573           n->content.push_back (c2);
1574           c2 = input_peek ();
1575         }
1576
1577       if (keywords.count(n->content))
1578         n->type = tok_keyword;
1579       else if (n->content[0] == '@')
1580         // makes it easier to detect illegal use of @words:
1581         n->type = tok_operator;
1582
1583       return n;
1584     }
1585
1586   else if (isdigit (c)) // positive literal
1587     {
1588       n->type = tok_number;
1589       n->content = (char) c;
1590
1591       while (isalnum (c2))
1592         {
1593           // NB: isalnum is very permissive.  We rely on strtol, called in
1594           // parser::parse_literal below, to confirm that the number string
1595           // is correctly formatted and in range.
1596
1597           input_get ();
1598           n->content.push_back (c2);
1599           c2 = input_peek ();
1600         }
1601       return n;
1602     }
1603
1604   else if (c == '\"')
1605     {
1606       n->type = tok_string;
1607       while (1)
1608         {
1609           c = input_get ();
1610
1611           if (c < 0 || c == '\n')
1612             {
1613               n->make_junk(_("Could not find matching closing quote"));
1614               return n;
1615             }
1616           if (c == '\"') // closing double-quotes
1617             break;
1618           else if (c == '\\') // see also input_put
1619             {
1620               c = input_get ();
1621               switch (c)
1622                 {
1623                 case 'x':
1624                   if (strverscmp(session.compatible.c_str(), "2.3") < 0)
1625                     goto the_default;
1626                 case 'a':
1627                 case 'b':
1628                 case 't':
1629                 case 'n':
1630                 case 'v':
1631                 case 'f':
1632                 case 'r':
1633                 case '0' ... '7': // NB: need only match the first digit
1634                 case '\\':
1635                   // Pass these escapes through to the string value
1636                   // being parsed; it will be emitted into a C literal.
1637                   // XXX: PR13371: perhaps we should evaluate them here
1638                   // (and re-quote them during translate.cxx emission).
1639                   n->content.push_back('\\');
1640
1641                   // fall through
1642                 default: the_default:
1643                     n->content.push_back(c);
1644                     break;
1645                 }
1646             }
1647           else
1648             n->content.push_back(c);
1649         }
1650       return n;
1651     }
1652
1653   else if (ispunct (c))
1654     {
1655       int c3 = input_peek (1);
1656
1657       // NB: if we were to recognize negative numeric literals here,
1658       // we'd introduce another grammar ambiguity:
1659       // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1660       // instead of tok_number(1) tok_operator('-') tok_number(1)
1661
1662       if (c == '#') // shell comment
1663         {
1664           unsigned this_line = cursor_line;
1665           do { c = input_get (); }
1666           while (c >= 0 && cursor_line == this_line);
1667           ate_comment = true;
1668           ate_whitespace = true;
1669           goto skip;
1670         }
1671       else if ((c == '/' && c2 == '/')) // C++ comment
1672         {
1673           unsigned this_line = cursor_line;
1674           do { c = input_get (); }
1675           while (c >= 0 && cursor_line == this_line);
1676           ate_comment = true;
1677           ate_whitespace = true;
1678           goto skip;
1679         }
1680       else if (c == '/' && c2 == '*') // C comment
1681         {
1682           (void) input_get (); // swallow '*' already in c2
1683           c = input_get ();
1684           c2 = input_get ();
1685           while (c2 >= 0)
1686             {
1687               if (c == '*' && c2 == '/')
1688                 break;
1689               c = c2;
1690               c2 = input_get ();
1691             }
1692           ate_comment = true;
1693           ate_whitespace = true;
1694           goto skip;
1695         }
1696       else if (c == '%' && c2 == '{') // embedded code
1697         {
1698           n->type = tok_embedded;
1699           (void) input_get (); // swallow '{' already in c2
1700           c = input_get ();
1701           c2 = input_get ();
1702           while (c2 >= 0)
1703             {
1704               if (c == '%' && c2 == '}')
1705                 return n;
1706               if (c == '}' && c2 == '%') // possible typo
1707                 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
1708               n->content += c;
1709               c = c2;
1710               c2 = input_get ();
1711             }
1712
1713           n->make_junk(_("Could not find matching '%}' to close embedded function block"));
1714           return n;
1715         }
1716
1717       // We're committed to recognizing at least the first character
1718       // as an operator.
1719       n->type = tok_operator;
1720       n->content = c;
1721
1722       // match all valid operators, in decreasing size order
1723       if ((c == '<' && c2 == '<' && c3 == '<') ||
1724           (c == '<' && c2 == '<' && c3 == '=') ||
1725           (c == '>' && c2 == '>' && c3 == '='))
1726         {
1727           n->content += c2;
1728           n->content += c3;
1729           input_get (); input_get (); // swallow other two characters
1730         }
1731       else if ((c == '=' && c2 == '=') ||
1732                (c == '!' && c2 == '=') ||
1733                (c == '<' && c2 == '=') ||
1734                (c == '>' && c2 == '=') ||
1735                (c == '=' && c2 == '~') ||
1736                (c == '!' && c2 == '~') ||
1737                (c == '+' && c2 == '=') ||
1738                (c == '-' && c2 == '=') ||
1739                (c == '*' && c2 == '=') ||
1740                (c == '/' && c2 == '=') ||
1741                (c == '%' && c2 == '=') ||
1742                (c == '&' && c2 == '=') ||
1743                (c == '^' && c2 == '=') ||
1744                (c == '|' && c2 == '=') ||
1745                (c == '.' && c2 == '=') ||
1746                (c == '&' && c2 == '&') ||
1747                (c == '|' && c2 == '|') ||
1748                (c == '+' && c2 == '+') ||
1749                (c == '-' && c2 == '-') ||
1750                (c == '-' && c2 == '>') ||
1751                (c == '<' && c2 == '<') ||
1752                (c == '>' && c2 == '>') ||
1753                // preprocessor tokens
1754                (c == '%' && c2 == '(') ||
1755                (c == '%' && c2 == '?') ||
1756                (c == '%' && c2 == ':') ||
1757                (c == '%' && c2 == ')'))
1758         {
1759           n->content += c2;
1760           input_get (); // swallow other character
1761         }
1762
1763       return n;
1764     }
1765
1766   else
1767     {
1768       n->type = tok_junk;
1769       ostringstream s;
1770       s << "\\x" << hex << setw(2) << setfill('0') << c;
1771       n->content = s.str();
1772       n->msg = ""; // signal parser to emit "expected X, found junk" type error
1773       return n;
1774     }
1775 }
1776
1777 // ------------------------------------------------------------------------
1778
1779 void
1780 token::make_junk (const string new_msg)
1781 {
1782   type = tok_junk;
1783   msg = new_msg;
1784 }
1785
1786 // ------------------------------------------------------------------------
1787
1788 stapfile*
1789 parser::parse (bool errs_as_warnings)
1790 {
1791   stapfile* f = new stapfile;
1792   input.set_current_file (f);
1793
1794   bool empty = true;
1795
1796   while (1)
1797     {
1798       try
1799         {
1800           systemtap_v_seen = 0;
1801           const token* t = peek ();
1802           if (! t) // nice clean EOF, modulo any preprocessing that occurred
1803             break;
1804
1805           empty = false;
1806           if (t->type == tok_keyword && t->content == "probe")
1807             {
1808               context = con_probe;
1809               parse_probe (f->probes, f->aliases);
1810             }
1811           else if (t->type == tok_keyword && t->content == "global")
1812             {
1813               context = con_global;
1814               parse_global (f->globals, f->probes);
1815             }
1816           else if (t->type == tok_keyword && t->content == "function")
1817             {
1818               context = con_function;
1819               parse_functiondecl (f->functions);
1820             }
1821           else if (t->type == tok_embedded)
1822             {
1823               context = con_embedded;
1824               f->embeds.push_back (parse_embeddedcode ());
1825             }
1826           else
1827             {
1828               context = con_unknown;
1829               throw PARSE_ERROR (_("expected 'probe', 'global', 'function', or '%{'"));
1830             }
1831         }
1832       catch (parse_error& pe)
1833         {
1834           print_error (pe, errs_as_warnings);
1835
1836           // XXX: do we want tok_junk to be able to force skip_some behaviour?
1837           if (pe.skip_some) // for recovery
1838             // Quietly swallow all tokens until the next keyword we can start parsing from.
1839             while (1)
1840               try
1841                 {
1842                   {
1843                     const token* t = peek ();
1844                     if (! t)
1845                       break;
1846                     if (t->type == tok_keyword && t->content == "probe") break;
1847                     else if (t->type == tok_keyword && t->content == "global") break;
1848                     else if (t->type == tok_keyword && t->content == "function") break;
1849                     else if (t->type == tok_embedded) break;
1850                     swallow (); // swallow it
1851                   }
1852                 }
1853               catch (parse_error& pe2)
1854                 {
1855                   // parse error during recovery ... ugh
1856                   print_error (pe2);
1857                 }
1858         }
1859     }
1860
1861   if (empty)
1862     {
1863       // vary message depending on whether file was *actually* empty:
1864       cerr << (input.saw_tokens
1865                ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
1866                : _F("Input file '%s' is empty.", input_name.c_str()))
1867            << endl;
1868       delete f;
1869       f = 0;
1870     }
1871   else if (num_errors > 0)
1872     {
1873       cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
1874       delete f;
1875       f = 0;
1876     }
1877
1878   input.set_current_file(0);
1879   return f;
1880 }
1881
1882
1883 probe*
1884 parser::parse_synthetic_probe (const token* chain, bool errs_as_warnings)
1885 {
1886   probe* p = NULL;
1887   stapfile* f = new stapfile;
1888   f->synthetic = true;
1889   input.set_current_file (f);
1890   input.set_current_token_chain (chain);
1891
1892   try
1893     {
1894       context = con_probe;
1895       parse_probe (f->probes, f->aliases);
1896
1897       if (f->probes.size() != 1 || !f->aliases.empty())
1898         throw PARSE_ERROR (_("expected a single synthetic probe"));
1899       p = f->probes[0];
1900     }
1901   catch (parse_error& pe)
1902     {
1903       print_error (pe, errs_as_warnings);
1904     }
1905
1906   // TODO check for unparsed tokens?
1907
1908   input.set_current_file(0);
1909   input.set_current_token_chain(0);
1910   return p;
1911 }
1912
1913
1914 void
1915 parser::parse_probe (std::vector<probe *> & probe_ret,
1916                      std::vector<probe_alias *> & alias_ret)
1917 {
1918   const token* t0 = next ();
1919   if (! (t0->type == tok_keyword && t0->content == "probe"))
1920     throw PARSE_ERROR (_("expected 'probe'"));
1921
1922   vector<probe_point *> aliases;
1923   vector<probe_point *> locations;
1924
1925   bool equals_ok = true;
1926
1927   int epilogue_alias = 0;
1928
1929   while (1)
1930     {
1931       probe_point * pp = parse_probe_point ();
1932
1933       const token* t = peek ();
1934       if (equals_ok && t
1935           && t->type == tok_operator && t->content == "=")
1936         {
1937           if (pp->optional || pp->sufficient)
1938             throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
1939           aliases.push_back(pp);
1940           swallow ();
1941           continue;
1942         }
1943       else if (equals_ok && t
1944           && t->type == tok_operator && t->content == "+=")
1945         {
1946           if (pp->optional || pp->sufficient)
1947             throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
1948           aliases.push_back(pp);
1949           epilogue_alias = 1;
1950           swallow ();
1951           continue;
1952         }
1953       else if (t && t->type == tok_operator && t->content == ",")
1954         {
1955           locations.push_back(pp);
1956           equals_ok = false;
1957           swallow ();
1958           continue;
1959         }
1960       else if (t && t->type == tok_operator && t->content == "{")
1961         {
1962           locations.push_back(pp);
1963           break;
1964         }
1965       else
1966         throw PARSE_ERROR (_("expected probe point specifier"));
1967     }
1968
1969   if (aliases.empty())
1970     {
1971       probe* p = new probe;
1972       p->tok = t0;
1973       p->locations = locations;
1974       p->body = parse_stmt_block ();
1975       p->privileged = privileged;
1976       p->systemtap_v_conditional = systemtap_v_seen;
1977       probe_ret.push_back (p);
1978     }
1979   else
1980     {
1981       probe_alias* p = new probe_alias (aliases);
1982       if(epilogue_alias)
1983         p->epilogue_style = true;
1984       else
1985         p->epilogue_style = false;
1986       p->tok = t0;
1987       p->locations = locations;
1988       p->body = parse_stmt_block ();
1989       p->privileged = privileged;
1990       p->systemtap_v_conditional = systemtap_v_seen;
1991       alias_ret.push_back (p);
1992     }
1993 }
1994
1995
1996 embeddedcode*
1997 parser::parse_embeddedcode ()
1998 {
1999   embeddedcode* e = new embeddedcode;
2000   const token* t = next ();
2001   if (t->type != tok_embedded)
2002     throw PARSE_ERROR (_("expected '%{'"));
2003
2004   if (! privileged)
2005     throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
2006                        false /* don't skip tokens for parse resumption */);
2007
2008   e->tok = t;
2009   e->code = t->content;
2010   return e;
2011 }
2012
2013
2014 block*
2015 parser::parse_stmt_block ()
2016 {
2017   block* pb = new block;
2018
2019   const token* t = next ();
2020   if (! (t->type == tok_operator && t->content == "{"))
2021     throw PARSE_ERROR (_("expected '{'"));
2022
2023   pb->tok = t;
2024
2025   while (1)
2026     {
2027       t = peek ();
2028       if (t && t->type == tok_operator && t->content == "}")
2029         {
2030           swallow ();
2031           break;
2032         }
2033       pb->statements.push_back (parse_statement ());
2034     }
2035
2036   return pb;
2037 }
2038
2039
2040 try_block*
2041 parser::parse_try_block ()
2042 {
2043   try_block* pb = new try_block;
2044
2045   pb->tok = expect_kw_token ("try");
2046   pb->try_block = parse_stmt_block();
2047   expect_kw ("catch");
2048
2049   const token* t = peek ();
2050   if (t != NULL && t->type == tok_operator && t->content == "(")
2051     {
2052       swallow (); // swallow the '('
2053
2054       t = next();
2055       if (! (t->type == tok_identifier))
2056         throw PARSE_ERROR (_("expected identifier"));
2057       symbol* sym = new symbol;
2058       sym->tok = t;
2059       sym->name = t->content;
2060       pb->catch_error_var = sym;
2061
2062       expect_op (")");
2063     }
2064   else
2065     pb->catch_error_var = 0;
2066
2067   pb->catch_block = parse_stmt_block();
2068
2069   return pb;
2070 }
2071
2072
2073
2074 statement*
2075 parser::parse_statement ()
2076 {
2077   statement *ret;
2078   const token* t = peek ();
2079   if (t && t->type == tok_operator && t->content == ";")
2080     return new null_statement (next ());
2081   else if (t && t->type == tok_operator && t->content == "{")
2082     return parse_stmt_block (); // Don't squash semicolons.
2083   else if (t && t->type == tok_keyword && t->content == "try")
2084     return parse_try_block (); // Don't squash semicolons.
2085   else if (t && t->type == tok_keyword && t->content == "if")
2086     return parse_if_statement (); // Don't squash semicolons.
2087   else if (t && t->type == tok_keyword && t->content == "for")
2088     return parse_for_loop (); // Don't squash semicolons.
2089   else if (t && t->type == tok_keyword && t->content == "foreach")
2090     return parse_foreach_loop (); // Don't squash semicolons.
2091   else if (t && t->type == tok_keyword && t->content == "while")
2092     return parse_while_loop (); // Don't squash semicolons.
2093   else if (t && t->type == tok_keyword && t->content == "return")
2094     ret = parse_return_statement ();
2095   else if (t && t->type == tok_keyword && t->content == "delete")
2096     ret = parse_delete_statement ();
2097   else if (t && t->type == tok_keyword && t->content == "break")
2098     ret = parse_break_statement ();
2099   else if (t && t->type == tok_keyword && t->content == "continue")
2100     ret = parse_continue_statement ();
2101   else if (t && t->type == tok_keyword && t->content == "next")
2102     ret = parse_next_statement ();
2103   else if (t && (t->type == tok_operator || // expressions are flexible
2104                  t->type == tok_identifier ||
2105                  t->type == tok_number ||
2106                  t->type == tok_string ||
2107                  t->type == tok_embedded ))
2108     ret = parse_expr_statement ();
2109   // XXX: consider generally accepting tok_embedded here too
2110   else
2111     throw PARSE_ERROR (_("expected statement"));
2112
2113   // Squash "empty" trailing colons after any "non-block-like" statement.
2114   t = peek ();
2115   if (t && t->type == tok_operator && t->content == ";")
2116     {
2117       swallow (); // Silently eat trailing ; after statement
2118     }
2119
2120   return ret;
2121 }
2122
2123
2124 void
2125 parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
2126 {
2127   const token* t0 = next ();
2128   if (! (t0->type == tok_keyword && t0->content == "global"))
2129     throw PARSE_ERROR (_("expected 'global'"));
2130   swallow ();
2131
2132   while (1)
2133     {
2134       const token* t = next ();
2135       if (! (t->type == tok_identifier))
2136         throw PARSE_ERROR (_("expected identifier"));
2137
2138       for (unsigned i=0; i<globals.size(); i++)
2139         if (globals[i]->name == t->content)
2140           throw PARSE_ERROR (_("duplicate global name"));
2141
2142       vardecl* d = new vardecl;
2143       d->name = t->content;
2144       d->tok = t;
2145       d->systemtap_v_conditional = systemtap_v_seen;
2146       globals.push_back (d);
2147
2148       t = peek ();
2149
2150       if(t && t->type == tok_operator && t->content == "%") //wrapping
2151         {
2152           d->wrap = true;
2153           swallow ();
2154           t = peek();
2155         }
2156
2157       if (t && t->type == tok_operator && t->content == "[") // array size
2158         {
2159           int64_t size;
2160           swallow ();
2161           expect_number(size);
2162           if (size <= 0 || size > 1000000) // arbitrary max
2163             throw PARSE_ERROR(_("array size out of range"));
2164           d->maxsize = (int)size;
2165           expect_known(tok_operator, "]");
2166           t = peek ();
2167         }
2168
2169       if (t && t->type == tok_operator && t->content == "=") // initialization
2170         {
2171           if (!d->compatible_arity(0))
2172             throw PARSE_ERROR(_("only scalar globals can be initialized"));
2173           d->set_arity(0, t);
2174           next (); // Don't swallow, set_arity() used the peeked token.
2175           d->init = parse_literal ();
2176           d->type = d->init->type;
2177           t = peek ();
2178         }
2179
2180       if (t && t->type == tok_operator && t->content == ";") // termination
2181         {
2182           swallow ();
2183           break;
2184         }
2185
2186       if (t && t->type == tok_operator && t->content == ",") // next global
2187         {
2188           swallow ();
2189           continue;
2190         }
2191       else
2192         break;
2193     }
2194 }
2195
2196
2197 void
2198 parser::parse_functiondecl (std::vector<functiondecl*>& functions)
2199 {
2200   const token* t = next ();
2201   if (! (t->type == tok_keyword && t->content == "function"))
2202     throw PARSE_ERROR (_("expected 'function'"));
2203   swallow ();
2204
2205   t = next ();
2206   if (! (t->type == tok_identifier)
2207       && ! (t->type == tok_keyword
2208             && (t->content == "string" || t->content == "long")))
2209     throw PARSE_ERROR (_("expected identifier"));
2210
2211   for (unsigned i=0; i<functions.size(); i++)
2212     if (functions[i]->name == t->content)
2213       throw PARSE_ERROR (_("duplicate function name"));
2214
2215   functiondecl *fd = new functiondecl ();
2216   fd->name = t->content;
2217   fd->tok = t;
2218
2219   t = next ();
2220   if (t->type == tok_operator && t->content == ":")
2221     {
2222       swallow ();
2223       t = next ();
2224       if (t->type == tok_keyword && t->content == "string")
2225         fd->type = pe_string;
2226       else if (t->type == tok_keyword && t->content == "long")
2227         fd->type = pe_long;
2228       else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2229       swallow ();
2230
2231       t = next ();
2232     }
2233
2234   if (! (t->type == tok_operator && t->content == "("))
2235     throw PARSE_ERROR (_("expected '('"));
2236   swallow ();
2237
2238   while (1)
2239     {
2240       t = next ();
2241
2242       // permit zero-argument functions
2243       if (t->type == tok_operator && t->content == ")")
2244         {
2245           swallow ();
2246           break;
2247         }
2248       else if (! (t->type == tok_identifier))
2249         throw PARSE_ERROR (_("expected identifier"));
2250       vardecl* vd = new vardecl;
2251       vd->name = t->content;
2252       vd->tok = t;
2253       fd->formal_args.push_back (vd);
2254       fd->systemtap_v_conditional = systemtap_v_seen;
2255
2256       t = next ();
2257       if (t->type == tok_operator && t->content == ":")
2258         {
2259           swallow ();
2260           t = next ();
2261           if (t->type == tok_keyword && t->content == "string")
2262             vd->type = pe_string;
2263           else if (t->type == tok_keyword && t->content == "long")
2264             vd->type = pe_long;
2265           else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2266           swallow ();
2267           t = next ();
2268         }
2269       if (t->type == tok_operator && t->content == ")")
2270         {
2271           swallow ();
2272           break;
2273         }
2274       if (t->type == tok_operator && t->content == ",")
2275         {
2276           swallow ();
2277           continue;
2278         }
2279       else
2280         throw PARSE_ERROR (_("expected ',' or ')'"));
2281     }
2282
2283   t = peek ();
2284   if (t && t->type == tok_embedded)
2285     fd->body = parse_embeddedcode ();
2286   else
2287     fd->body = parse_stmt_block ();
2288
2289   functions.push_back (fd);
2290 }
2291
2292
2293 probe_point*
2294 parser::parse_probe_point ()
2295 {
2296   probe_point* pl = new probe_point;
2297
2298   while (1)
2299     {
2300       const token* t = next ();
2301       if (! (t->type == tok_identifier
2302              // we must allow ".return" and ".function", which are keywords
2303              || t->type == tok_keyword
2304              // we must allow "*", due to being an operator
2305              || (t->type == tok_operator && t->content == "*")))
2306         throw PARSE_ERROR (_("expected identifier or '*'"));
2307
2308       // loop which reconstitutes an identifier with wildcards
2309       string content = t->content;
2310       while (1)
2311         {
2312           const token* u = peek();
2313           if (u == NULL)
2314             break;
2315           // ensure pieces of the identifier are adjacent:
2316           if (input.ate_whitespace)
2317             break;
2318           // ensure pieces of the identifier are valid:
2319           if (! (u->type == tok_identifier
2320                  // we must allow arbitrary keywords with a wildcard
2321                  || u->type == tok_keyword
2322                  // we must allow "*", due to being an operator
2323                  || (u->type == tok_operator && u->content == "*")))
2324             break;
2325
2326           // append u to t
2327           content = content + u->content;
2328
2329           // consume u
2330           swallow ();
2331         }
2332       // get around const-ness of t:
2333       token* new_t = new token(*t);
2334       new_t->content = content;
2335       delete t; t = new_t;
2336
2337       probe_point::component* c = new probe_point::component;
2338       c->functor = t->content;
2339       c->tok = t;
2340       pl->components.push_back (c);
2341       // NB we may add c->arg soon
2342
2343       t = peek ();
2344
2345       // consume optional parameter
2346       if (t && t->type == tok_operator && t->content == "(")
2347         {
2348           swallow (); // consume "("
2349           c->arg = parse_literal ();
2350
2351           t = next ();
2352           if (! (t->type == tok_operator && t->content == ")"))
2353             throw PARSE_ERROR (_("expected ')'"));
2354           swallow ();
2355
2356           t = peek ();
2357         }
2358
2359       if (t && t->type == tok_operator && t->content == ".")
2360         {
2361           swallow ();
2362           continue;
2363         }
2364
2365       // We only fall through here at the end of        a probe point (past
2366       // all the dotted/parametrized components).
2367
2368       if (t && t->type == tok_operator &&
2369           (t->content == "?" || t->content == "!"))
2370         {
2371           pl->optional = true;
2372           if (t->content == "!") pl->sufficient = true;
2373           // NB: sufficient implies optional
2374           swallow ();
2375           t = peek ();
2376           // fall through
2377         }
2378
2379       if (t && t->type == tok_keyword && t->content == "if")
2380         {
2381           swallow ();
2382           t = peek ();
2383           if (!(t && t->type == tok_operator && t->content == "("))
2384             throw PARSE_ERROR (_("expected '('"));
2385           swallow ();
2386
2387           pl->condition = parse_expression ();
2388
2389           t = peek ();
2390           if (!(t && t->type == tok_operator && t->content == ")"))
2391             throw PARSE_ERROR (_("expected ')'"));
2392           swallow ();
2393           t = peek ();
2394           // fall through
2395         }
2396
2397       if (t && t->type == tok_operator
2398           && (t->content == "{" || t->content == "," ||
2399               t->content == "=" || t->content == "+=" ))
2400         break;
2401
2402       throw PARSE_ERROR (_("expected one of '. , ( ? ! { = +='"));
2403     }
2404
2405   return pl;
2406 }
2407
2408
2409 literal_string*
2410 parser::consume_string_literals(const token *t)
2411 {
2412   literal_string *ls = new literal_string (t->content);
2413
2414   // PR11208: check if the next token is also a string literal;
2415   // auto-concatenate it.  This is complicated to the extent that we
2416   // need to skip intermediate whitespace.
2417   //
2418   // NB for versions prior to 2.0: but don't skip over intervening comments
2419   const token *n = peek();
2420   while (n != NULL && n->type == tok_string
2421          && ! (strverscmp(session.compatible.c_str(), "2.0") < 0
2422                && input.ate_comment))
2423     {
2424       ls->value.append(next()->content); // consume and append the token
2425       n = peek();
2426     }
2427   return ls;
2428 }
2429
2430
2431 // Parse a string literal and perform backslash escaping on the contents:
2432 literal_string*
2433 parser::parse_literal_string ()
2434 {
2435   const token* t = next ();
2436   literal_string* l;
2437   if (t->type == tok_string)
2438     l = consume_string_literals (t);
2439   else
2440     throw PARSE_ERROR (_("expected literal string"));
2441
2442   l->tok = t;
2443   return l;
2444 }
2445
2446
2447 literal*
2448 parser::parse_literal ()
2449 {
2450   const token* t = next ();
2451   literal* l;
2452   if (t->type == tok_string)
2453     {
2454       l = consume_string_literals (t);
2455     }
2456   else
2457     {
2458       bool neg = false;
2459       if (t->type == tok_operator && t->content == "-")
2460         {
2461           neg = true;
2462           swallow ();
2463           t = next ();
2464         }
2465
2466       if (t->type == tok_number)
2467         {
2468           const char* startp = t->content.c_str ();
2469           char* endp = (char*) startp;
2470
2471           // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2472           // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
2473           // since the lexer only gives us positive digit strings, but we'll
2474           // limit it to LLONG_MIN when a '-' operator is fed into the literal.
2475           errno = 0;
2476           long long value = (long long) strtoull (startp, & endp, 0);
2477           if (errno == ERANGE || errno == EINVAL || *endp != '\0'
2478               || (neg && (unsigned long long) value > 9223372036854775808ULL)
2479               || (unsigned long long) value > 18446744073709551615ULL
2480               || value < -9223372036854775807LL-1)
2481             throw PARSE_ERROR (_("number invalid or out of range"));
2482
2483           if (neg)
2484             value = -value;
2485
2486           l = new literal_number (value);
2487         }
2488       else
2489         throw PARSE_ERROR (_("expected literal string or number"));
2490     }
2491
2492   l->tok = t;
2493   return l;
2494 }
2495
2496
2497 if_statement*
2498 parser::parse_if_statement ()
2499 {
2500   const token* t = next ();
2501   if (! (t->type == tok_keyword && t->content == "if"))
2502     throw PARSE_ERROR (_("expected 'if'"));
2503   if_statement* s = new if_statement;
2504   s->tok = t;
2505
2506   t = next ();
2507   if (! (t->type == tok_operator && t->content == "("))
2508     throw PARSE_ERROR (_("expected '('"));
2509   swallow ();
2510
2511   s->condition = parse_expression ();
2512
2513   t = next ();
2514   if (! (t->type == tok_operator && t->content == ")"))
2515     throw PARSE_ERROR (_("expected ')'"));
2516   swallow ();
2517
2518   s->thenblock = parse_statement ();
2519
2520   t = peek ();
2521   if (t && t->type == tok_keyword && t->content == "else")
2522     {
2523       swallow ();
2524       s->elseblock = parse_statement ();
2525     }
2526   else
2527     s->elseblock = 0; // in case not otherwise initialized
2528
2529   return s;
2530 }
2531
2532
2533 expr_statement*
2534 parser::parse_expr_statement ()
2535 {
2536   expr_statement *es = new expr_statement;
2537   const token* t = peek ();
2538   if (t == NULL)
2539     throw PARSE_ERROR (_("expression statement expected"));
2540   // Copy, we only peeked, parse_expression might swallow.
2541   es->tok = new token (*t);
2542   es->value = parse_expression ();
2543   return es;
2544 }
2545
2546
2547 return_statement*
2548 parser::parse_return_statement ()
2549 {
2550   const token* t = next ();
2551   if (! (t->type == tok_keyword && t->content == "return"))
2552     throw PARSE_ERROR (_("expected 'return'"));
2553   if (context != con_function)
2554     throw PARSE_ERROR (_("found 'return' not in function context"));
2555   return_statement* s = new return_statement;
2556   s->tok = t;
2557   s->value = parse_expression ();
2558   return s;
2559 }
2560
2561
2562 delete_statement*
2563 parser::parse_delete_statement ()
2564 {
2565   const token* t = next ();
2566   if (! (t->type == tok_keyword && t->content == "delete"))
2567     throw PARSE_ERROR (_("expected 'delete'"));
2568   delete_statement* s = new delete_statement;
2569   s->tok = t;
2570   s->value = parse_expression ();
2571   return s;
2572 }
2573
2574
2575 next_statement*
2576 parser::parse_next_statement ()
2577 {
2578   const token* t = next ();
2579   if (! (t->type == tok_keyword && t->content == "next"))
2580     throw PARSE_ERROR (_("expected 'next'"));
2581   if (context != con_probe)
2582     throw PARSE_ERROR (_("found 'next' not in probe context"));
2583   next_statement* s = new next_statement;
2584   s->tok = t;
2585   return s;
2586 }
2587
2588
2589 break_statement*
2590 parser::parse_break_statement ()
2591 {
2592   const token* t = next ();
2593   if (! (t->type == tok_keyword && t->content == "break"))
2594     throw PARSE_ERROR (_("expected 'break'"));
2595   break_statement* s = new break_statement;
2596   s->tok = t;
2597   return s;
2598 }
2599
2600
2601 continue_statement*
2602 parser::parse_continue_statement ()
2603 {
2604   const token* t = next ();
2605   if (! (t->type == tok_keyword && t->content == "continue"))
2606     throw PARSE_ERROR (_("expected 'continue'"));
2607   continue_statement* s = new continue_statement;
2608   s->tok = t;
2609   return s;
2610 }
2611
2612
2613 for_loop*
2614 parser::parse_for_loop ()
2615 {
2616   const token* t = next ();
2617   if (! (t->type == tok_keyword && t->content == "for"))
2618     throw PARSE_ERROR (_("expected 'for'"));
2619   for_loop* s = new for_loop;
2620   s->tok = t;
2621
2622   t = next ();
2623   if (! (t->type == tok_operator && t->content == "("))
2624     throw PARSE_ERROR (_("expected '('"));
2625   swallow ();
2626
2627   // initializer + ";"
2628   t = peek ();
2629   if (t && t->type == tok_operator && t->content == ";")
2630     {
2631       s->init = 0;
2632       swallow ();
2633     }
2634   else
2635     {
2636       s->init = parse_expr_statement ();
2637       t = next ();
2638       if (! (t->type == tok_operator && t->content == ";"))
2639         throw PARSE_ERROR (_("expected ';'"));
2640       swallow ();
2641     }
2642
2643   // condition + ";"
2644   t = peek ();
2645   if (t && t->type == tok_operator && t->content == ";")
2646     {
2647       literal_number* l = new literal_number(1);
2648       s->cond = l;
2649       s->cond->tok = next ();
2650     }
2651   else
2652     {
2653       s->cond = parse_expression ();
2654       t = next ();
2655       if (! (t->type == tok_operator && t->content == ";"))
2656         throw PARSE_ERROR (_("expected ';'"));
2657       swallow ();
2658     }
2659
2660   // increment + ")"
2661   t = peek ();
2662   if (t && t->type == tok_operator && t->content == ")")
2663     {
2664       s->incr = 0;
2665       swallow ();
2666     }
2667   else
2668     {
2669       s->incr = parse_expr_statement ();
2670       t = next ();
2671       if (! (t->type == tok_operator && t->content == ")"))
2672         throw PARSE_ERROR (_("expected ')'"));
2673       swallow ();
2674     }
2675
2676   // block
2677   s->block = parse_statement ();
2678
2679   return s;
2680 }
2681
2682
2683 for_loop*
2684 parser::parse_while_loop ()
2685 {
2686   const token* t = next ();
2687   if (! (t->type == tok_keyword && t->content == "while"))
2688     throw PARSE_ERROR (_("expected 'while'"));
2689   for_loop* s = new for_loop;
2690   s->tok = t;
2691
2692   t = next ();
2693   if (! (t->type == tok_operator && t->content == "("))
2694     throw PARSE_ERROR (_("expected '('"));
2695   swallow ();
2696
2697   // dummy init and incr fields
2698   s->init = 0;
2699   s->incr = 0;
2700
2701   // condition
2702   s->cond = parse_expression ();
2703
2704   t = next ();
2705   if (! (t->type == tok_operator && t->content == ")"))
2706     throw PARSE_ERROR (_("expected ')'"));
2707   swallow ();
2708
2709   // block
2710   s->block = parse_statement ();
2711
2712   return s;
2713 }
2714
2715
2716 foreach_loop*
2717 parser::parse_foreach_loop ()
2718 {
2719   const token* t = next ();
2720   if (! (t->type == tok_keyword && t->content == "foreach"))
2721     throw PARSE_ERROR (_("expected 'foreach'"));
2722   foreach_loop* s = new foreach_loop;
2723   s->tok = t;
2724   s->sort_direction = 0;
2725   s->sort_aggr = sc_none;
2726   s->value = NULL;
2727   s->limit = NULL;
2728
2729   t = next ();
2730   if (! (t->type == tok_operator && t->content == "("))
2731     throw PARSE_ERROR (_("expected '('"));
2732   swallow ();
2733
2734   symbol* lookahead_sym = NULL;
2735   int lookahead_sort = 0;
2736
2737   t = peek ();
2738   if (t && t->type == tok_identifier)
2739     {
2740       next ();
2741       lookahead_sym = new symbol;
2742       lookahead_sym->tok = t;
2743       lookahead_sym->name = t->content;
2744
2745       t = peek ();
2746       if (t && t->type == tok_operator &&
2747           (t->content == "+" || t->content == "-"))
2748         {
2749           lookahead_sort = (t->content == "+") ? 1 : -1;
2750           swallow ();
2751         }
2752
2753       t = peek ();
2754       if (t && t->type == tok_operator && t->content == "=")
2755         {
2756           swallow ();
2757           s->value = lookahead_sym;
2758           if (lookahead_sort)
2759             {
2760               s->sort_direction = lookahead_sort;
2761               s->sort_column = 0;
2762             }
2763           lookahead_sym = NULL;
2764         }
2765     }
2766
2767   // see also parse_array_in
2768
2769   bool parenthesized = false;
2770   t = peek ();
2771   if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
2772     {
2773       swallow ();
2774       parenthesized = true;
2775     }
2776
2777   if (lookahead_sym)
2778     {
2779       s->indexes.push_back (lookahead_sym);
2780       if (lookahead_sort)
2781         {
2782           s->sort_direction = lookahead_sort;
2783           s->sort_column = 1;
2784         }
2785       lookahead_sym = NULL;
2786     }
2787   else while (1)
2788     {
2789       t = next ();
2790       if (! (t->type == tok_identifier))
2791         throw PARSE_ERROR (_("expected identifier"));
2792       symbol* sym = new symbol;
2793       sym->tok = t;
2794       sym->name = t->content;
2795       s->indexes.push_back (sym);
2796
2797       t = peek ();
2798       if (t && t->type == tok_operator &&
2799           (t->content == "+" || t->content == "-"))
2800         {
2801           if (s->sort_direction)
2802             throw PARSE_ERROR (_("multiple sort directives"));
2803           s->sort_direction = (t->content == "+") ? 1 : -1;
2804           s->sort_column = s->indexes.size();
2805           swallow ();
2806         }
2807
2808       if (parenthesized)
2809         {
2810           t = peek ();
2811           if (t && t->type == tok_operator && t->content == ",")
2812             {
2813               swallow ();
2814               continue;
2815             }
2816           else if (t && t->type == tok_operator && t->content == "]")
2817             {
2818               swallow ();
2819               break;
2820             }
2821           else
2822             throw PARSE_ERROR (_("expected ',' or ']'"));
2823         }
2824       else
2825         break; // expecting only one expression
2826     }
2827
2828   t = next ();
2829   if (! (t->type == tok_keyword && t->content == "in"))
2830     throw PARSE_ERROR (_("expected 'in'"));
2831   swallow ();
2832
2833   s->base = parse_indexable();
2834
2835   // check for atword, see also expect_ident_or_atword,
2836   t = peek ();
2837   if (t && t->type == tok_operator && t->content[0] == '@')
2838     {
2839       if (t->content == "@avg") s->sort_aggr = sc_average;
2840       else if (t->content == "@min") s->sort_aggr = sc_min;
2841       else if (t->content == "@max") s->sort_aggr = sc_max;
2842       else if (t->content == "@count") s->sort_aggr = sc_count;
2843       else if (t->content == "@sum") s->sort_aggr = sc_sum;
2844       else throw PARSE_ERROR(_("expected statistical operation"));
2845       swallow();
2846
2847       t = peek ();
2848       if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
2849         throw PARSE_ERROR(_("expected sort directive"));
2850     }
2851
2852   t = peek ();
2853   if (t && t->type == tok_operator &&
2854       (t->content == "+" || t->content == "-"))
2855     {
2856       if (s->sort_direction)
2857         throw PARSE_ERROR (_("multiple sort directives"));
2858       s->sort_direction = (t->content == "+") ? 1 : -1;
2859       s->sort_column = 0;
2860       swallow ();
2861     }
2862
2863   t = peek ();
2864   if (tok_is(t, tok_keyword, "limit"))
2865     {
2866       swallow ();                       // get past the "limit"
2867       s->limit = parse_expression ();
2868     }
2869
2870   t = next ();
2871   if (! (t->type == tok_operator && t->content == ")"))
2872     throw PARSE_ERROR ("expected ')'");
2873   swallow ();
2874
2875   s->block = parse_statement ();
2876   return s;
2877 }
2878
2879
2880 expression*
2881 parser::parse_expression ()
2882 {
2883   return parse_assignment ();
2884 }
2885
2886
2887 expression*
2888 parser::parse_assignment ()
2889 {
2890   expression* op1 = parse_ternary ();
2891
2892   const token* t = peek ();
2893   // right-associative operators
2894   if (t && t->type == tok_operator
2895       && (t->content == "=" ||
2896           t->content == "<<<" ||
2897           t->content == "+=" ||
2898           t->content == "-=" ||
2899           t->content == "*=" ||
2900           t->content == "/=" ||
2901           t->content == "%=" ||
2902           t->content == "<<=" ||
2903           t->content == ">>=" ||
2904           t->content == "&=" ||
2905           t->content == "^=" ||
2906           t->content == "|=" ||
2907           t->content == ".=" ||
2908           false))
2909     {
2910       // NB: lvalueness is checked during elaboration / translation
2911       assignment* e = new assignment;
2912       e->left = op1;
2913       e->op = t->content;
2914       e->tok = t;
2915       next ();
2916       e->right = parse_expression ();
2917       op1 = e;
2918     }
2919
2920   return op1;
2921 }
2922
2923
2924 expression*
2925 parser::parse_ternary ()
2926 {
2927   expression* op1 = parse_logical_or ();
2928
2929   const token* t = peek ();
2930   if (t && t->type == tok_operator && t->content == "?")
2931     {
2932       ternary_expression* e = new ternary_expression;
2933       e->tok = t;
2934       e->cond = op1;
2935       next ();
2936       e->truevalue = parse_expression (); // XXX
2937
2938       t = next ();
2939       if (! (t->type == tok_operator && t->content == ":"))
2940         throw PARSE_ERROR (_("expected ':'"));
2941       swallow ();
2942
2943       e->falsevalue = parse_expression (); // XXX
2944       return e;
2945     }
2946   else
2947     return op1;
2948 }
2949
2950
2951 expression*
2952 parser::parse_logical_or ()
2953 {
2954   expression* op1 = parse_logical_and ();
2955
2956   const token* t = peek ();
2957   while (t && t->type == tok_operator && t->content == "||")
2958     {
2959       logical_or_expr* e = new logical_or_expr;
2960       e->tok = t;
2961       e->op = t->content;
2962       e->left = op1;
2963       next ();
2964       e->right = parse_logical_and ();
2965       op1 = e;
2966       t = peek ();
2967     }
2968
2969   return op1;
2970 }
2971
2972
2973 expression*
2974 parser::parse_logical_and ()
2975 {
2976   expression* op1 = parse_boolean_or ();
2977
2978   const token* t = peek ();
2979   while (t && t->type == tok_operator && t->content == "&&")
2980     {
2981       logical_and_expr *e = new logical_and_expr;
2982       e->left = op1;
2983       e->op = t->content;
2984       e->tok = t;
2985       next ();
2986       e->right = parse_boolean_or ();
2987       op1 = e;
2988       t = peek ();
2989     }
2990
2991   return op1;
2992 }
2993
2994
2995 expression*
2996 parser::parse_boolean_or ()
2997 {
2998   expression* op1 = parse_boolean_xor ();
2999
3000   const token* t = peek ();
3001   while (t && t->type == tok_operator && t->content == "|")
3002     {
3003       binary_expression* e = new binary_expression;
3004       e->left = op1;
3005       e->op = t->content;
3006       e->tok = t;
3007       next ();
3008       e->right = parse_boolean_xor ();
3009       op1 = e;
3010       t = peek ();
3011     }
3012
3013   return op1;
3014 }
3015
3016
3017 expression*
3018 parser::parse_boolean_xor ()
3019 {
3020   expression* op1 = parse_boolean_and ();
3021
3022   const token* t = peek ();
3023   while (t && t->type == tok_operator && t->content == "^")
3024     {
3025       binary_expression* e = new binary_expression;
3026       e->left = op1;
3027       e->op = t->content;
3028       e->tok = t;
3029       next ();
3030       e->right = parse_boolean_and ();
3031       op1 = e;
3032       t = peek ();
3033     }
3034
3035   return op1;
3036 }
3037
3038
3039 expression*
3040 parser::parse_boolean_and ()
3041 {
3042   expression* op1 = parse_array_in ();
3043
3044   const token* t = peek ();
3045   while (t && t->type == tok_operator && t->content == "&")
3046     {
3047       binary_expression* e = new binary_expression;
3048       e->left = op1;
3049       e->op = t->content;
3050       e->tok = t;
3051       next ();
3052       e->right = parse_array_in ();
3053       op1 = e;
3054       t = peek ();
3055     }
3056
3057   return op1;
3058 }
3059
3060
3061 expression*
3062 parser::parse_array_in ()
3063 {
3064   // This is a very tricky case.  All these are legit expressions:
3065   // "a in b"  "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
3066   vector<expression*> indexes;
3067   bool parenthesized = false;
3068
3069   const token* t = peek ();
3070   if (t && t->type == tok_operator && t->content == "[")
3071     {
3072       swallow ();
3073       parenthesized = true;
3074     }
3075
3076   while (1)
3077     {
3078       expression* op1 = parse_comparison_or_regex_query ();
3079       indexes.push_back (op1);
3080
3081       if (parenthesized)
3082         {
3083           const token* t = peek ();
3084           if (t && t->type == tok_operator && t->content == ",")
3085             {
3086               swallow ();
3087               continue;
3088             }
3089           else if (t && t->type == tok_operator && t->content == "]")
3090             {
3091               swallow ();
3092               break;
3093             }
3094           else
3095             throw PARSE_ERROR (_("expected ',' or ']'"));
3096         }
3097       else
3098         break; // expecting only one expression
3099     }
3100
3101   t = peek ();
3102   if (t && t->type == tok_keyword && t->content == "in")
3103     {
3104       array_in *e = new array_in;
3105       e->tok = t;
3106       next ();
3107
3108       arrayindex* a = new arrayindex;
3109       a->indexes = indexes;
3110       a->base = parse_indexable();
3111       a->tok = a->base->tok;
3112       e->operand = a;
3113       return e;
3114     }
3115   else if (indexes.size() == 1) // no "in" - need one expression only
3116     return indexes[0];
3117   else
3118     throw PARSE_ERROR (_("unexpected comma-separated expression list"));
3119 }
3120
3121
3122 expression*
3123 parser::parse_comparison_or_regex_query ()
3124 {
3125   expression* op1 = parse_shift ();
3126
3127   // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
3128   const token *t = peek();
3129   if (t && t->type == tok_operator
3130       && (t->content == "=~" ||
3131           t->content == "!~"))
3132     {
3133       regex_query* r = new regex_query;
3134       r->left = op1;
3135       r->op = t->content;
3136       r->tok = t;
3137       next ();
3138       r->right = parse_literal_string();
3139       op1 = r;
3140       t = peek ();
3141     }
3142   else while (t && t->type == tok_operator
3143       && (t->content == ">" ||
3144           t->content == "<" ||
3145           t->content == "==" ||
3146           t->content == "!=" ||
3147           t->content == "<=" ||
3148           t->content == ">="))
3149     {
3150       comparison* e = new comparison;
3151       e->left = op1;
3152       e->op = t->content;
3153       e->tok = t;
3154       next ();
3155       e->right = parse_shift ();
3156       op1 = e;
3157       t = peek ();
3158     }
3159
3160   return op1;
3161 }
3162
3163
3164 expression*
3165 parser::parse_shift ()
3166 {
3167   expression* op1 = parse_concatenation ();
3168
3169   const token* t = peek ();
3170   while (t && t->type == tok_operator &&
3171          (t->content == "<<" || t->content == ">>"))
3172     {
3173       binary_expression* e = new binary_expression;
3174       e->left = op1;
3175       e->op = t->content;
3176       e->tok = t;
3177       next ();
3178       e->right = parse_concatenation ();
3179       op1 = e;
3180       t = peek ();
3181     }
3182
3183   return op1;
3184 }
3185
3186
3187 expression*
3188 parser::parse_concatenation ()
3189 {
3190   expression* op1 = parse_additive ();
3191
3192   const token* t = peek ();
3193   // XXX: the actual awk string-concatenation operator is *whitespace*.
3194   // I don't know how to easily to model that here.
3195   while (t && t->type == tok_operator && t->content == ".")
3196     {
3197       concatenation* e = new concatenation;
3198       e->left = op1;
3199       e->op = t->content;
3200       e->tok = t;
3201       next ();
3202       e->right = parse_additive ();
3203       op1 = e;
3204       t = peek ();
3205     }
3206
3207   return op1;
3208 }
3209
3210
3211 expression*
3212 parser::parse_additive ()
3213 {
3214   expression* op1 = parse_multiplicative ();
3215
3216   const token* t = peek ();
3217   while (t && t->type == tok_operator
3218       && (t->content == "+" || t->content == "-"))
3219     {
3220       binary_expression* e = new binary_expression;
3221       e->op = t->content;
3222       e->left = op1;
3223       e->tok = t;
3224       next ();
3225       e->right = parse_multiplicative ();
3226       op1 = e;
3227       t = peek ();
3228     }
3229
3230   return op1;
3231 }
3232
3233
3234 expression*
3235 parser::parse_multiplicative ()
3236 {
3237   expression* op1 = parse_unary ();
3238
3239   const token* t = peek ();
3240   while (t && t->type == tok_operator
3241       && (t->content == "*" || t->content == "/" || t->content == "%"))
3242     {
3243       binary_expression* e = new binary_expression;
3244       e->op = t->content;
3245       e->left = op1;
3246       e->tok = t;
3247       next ();
3248       e->right = parse_unary ();
3249       op1 = e;
3250       t = peek ();
3251     }
3252
3253   return op1;
3254 }
3255
3256
3257 expression*
3258 parser::parse_unary ()
3259 {
3260   const token* t = peek ();
3261   if (t && t->type == tok_operator
3262       && (t->content == "+" ||
3263           t->content == "-" ||
3264           t->content == "!" ||
3265           t->content == "~" ||
3266           false))
3267     {
3268       unary_expression* e = new unary_expression;
3269       e->op = t->content;
3270       e->tok = t;
3271       next ();
3272       e->operand = parse_unary ();
3273       return e;
3274     }
3275   else
3276     return parse_crement ();
3277 }
3278
3279
3280 expression*
3281 parser::parse_crement () // as in "increment" / "decrement"
3282 {
3283   // NB: Ideally, we'd parse only a symbol as an operand to the
3284   // *crement operators, instead of a general expression value.  We'd
3285   // need more complex lookahead code to tell apart the postfix cases.
3286   // So we just punt, and leave it to pass-3 to signal errors on
3287   // cases like "4++".
3288
3289   const token* t = peek ();
3290   if (t && t->type == tok_operator
3291       && (t->content == "++" || t->content == "--"))
3292     {
3293       pre_crement* e = new pre_crement;
3294       e->op = t->content;
3295       e->tok = t;
3296       next ();
3297       e->operand = parse_value ();
3298       return e;
3299     }
3300
3301   // post-crement or non-crement
3302   expression *op1 = parse_value ();
3303
3304   t = peek ();
3305   if (t && t->type == tok_operator
3306       && (t->content == "++" || t->content == "--"))
3307     {
3308       post_crement* e = new post_crement;
3309       e->op = t->content;
3310       e->tok = t;
3311       next ();
3312       e->operand = op1;
3313       return e;
3314     }
3315   else
3316     return op1;
3317 }
3318
3319
3320 expression*
3321 parser::parse_value ()
3322 {
3323   const token* t = peek ();
3324   if (! t)
3325     throw PARSE_ERROR (_("expected value"));
3326
3327   if (t->type == tok_embedded)
3328     {
3329       if (! privileged)
3330         throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
3331
3332       embedded_expr *e = new embedded_expr;
3333       e->tok = t;
3334       e->code = t->content;
3335       next ();
3336       return e;
3337     }
3338
3339   if (t->type == tok_operator && t->content == "(")
3340     {
3341       swallow ();
3342       expression* e = parse_expression ();
3343       t = next ();
3344       if (! (t->type == tok_operator && t->content == ")"))
3345         throw PARSE_ERROR (_("expected ')'"));
3346       swallow ();
3347       return e;
3348     }
3349   else if (t->type == tok_operator && t->content == "&")
3350     {
3351       next (); // Cannot swallow, passing token on...
3352       return parse_target_symbol (t);
3353     }
3354   else if (t->type == tok_identifier
3355            || (t->type == tok_operator && t->content[0] == '@'))
3356     return parse_symbol ();
3357   else
3358     return parse_literal ();
3359 }
3360
3361
3362 const token *
3363 parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
3364 {
3365   hop = NULL;
3366   const token* t = expect_ident_or_atword (name);
3367   if (name == "@hist_linear" || name == "@hist_log")
3368     {
3369       hop = new hist_op;
3370       if (name == "@hist_linear")
3371         hop->htype = hist_linear;
3372       else if (name == "@hist_log")
3373         hop->htype = hist_log;
3374       hop->tok = t;
3375       expect_op("(");
3376       hop->stat = parse_expression ();
3377       int64_t tnum;
3378       if (hop->htype == hist_linear)
3379         {
3380           for (size_t i = 0; i < 3; ++i)
3381             {
3382               expect_op (",");
3383               expect_number (tnum);
3384               hop->params.push_back (tnum);
3385             }
3386         }
3387       expect_op(")");
3388     }
3389   return t;
3390 }
3391
3392
3393 indexable*
3394 parser::parse_indexable ()
3395 {
3396   hist_op *hop = NULL;
3397   string name;
3398   const token *tok = parse_hist_op_or_bare_name(hop, name);
3399   if (hop)
3400     return hop;
3401   else
3402     {
3403       symbol* sym = new symbol;
3404       sym->name = name;
3405       sym->tok = tok;
3406       return sym;
3407     }
3408 }
3409
3410
3411 // var, indexable[index], func(parms), printf("...", ...), $var,r
3412 // @cast, @defined, @entry, @var, $var->member, @stat_op(stat)
3413 expression* parser::parse_symbol ()
3414 {
3415   hist_op *hop = NULL;
3416   symbol *sym = NULL;
3417   string name;
3418   const token *t = parse_hist_op_or_bare_name(hop, name);
3419
3420   if (!hop)
3421     {
3422       // If we didn't get a hist_op, then we did get an identifier. We can
3423       // now scrutinize this identifier for the various magic forms of identifier
3424       // (printf, @stat_op, and $var...)
3425
3426       if (name == "@cast"
3427           || name == "@var"
3428           || (name.size() > 0 && name[0] == '$'))
3429         return parse_target_symbol (t);
3430
3431       // NB: PR11343: @defined() is not incompatible with earlier versions
3432       // of stap, so no need to check session.compatible for 1.2
3433       if (name == "@defined")
3434         return parse_defined_op (t);
3435
3436       if (name == "@entry")
3437         return parse_entry_op (t);
3438
3439       if (name == "@perf")
3440         return parse_perf_op (t);
3441
3442       if (name.size() > 0 && name[0] == '@')
3443         {
3444           stat_op *sop = new stat_op;
3445           if (name == "@avg")
3446             sop->ctype = sc_average;
3447           else if (name == "@count")
3448             sop->ctype = sc_count;
3449           else if (name == "@sum")
3450             sop->ctype = sc_sum;
3451           else if (name == "@min")
3452             sop->ctype = sc_min;
3453           else if (name == "@max")
3454             sop->ctype = sc_max;
3455           else
3456             throw PARSE_ERROR(_("unknown operator ") + name);
3457           expect_op("(");
3458           sop->tok = t;
3459           sop->stat = parse_expression ();
3460           expect_op(")");
3461           return sop;
3462         }
3463
3464       else if (print_format *fmt = print_format::create(t))
3465         {
3466           expect_op("(");
3467           if ((name == "print" || name == "println" ||
3468                name == "sprint" || name == "sprintln") &&
3469               (peek_op("@hist_linear") || peek_op("@hist_log")))
3470             {
3471               // We have a special case where we recognize
3472               // print(@hist_foo(bar)) as a magic print-the-histogram
3473               // construct. This is sort of gross but it avoids
3474               // promoting histogram references to typeful
3475               // expressions.
3476
3477               hop = NULL;
3478               t = parse_hist_op_or_bare_name(hop, name);
3479               assert(hop);
3480
3481               // It is, sadly, possible that even while parsing a
3482               // hist_op, we *mis-guessed* and the user wishes to
3483               // print(@hist_op(foo)[bucket]), a scalar. In that case
3484               // we must parse the arrayindex and print an expression.
3485               //
3486               // XXX: This still fails if the arrayindex is part of a
3487               // larger expression.  To really handle everything, we'd
3488               // need to push back all the hist tokens start over.
3489
3490               if (!peek_op ("["))
3491                 fmt->hist = hop;
3492               else
3493                 {
3494                   // This is simplified version of the
3495                   // multi-array-index parser below, because we can
3496                   // only ever have one index on a histogram anyways.
3497                   expect_op("[");
3498                   struct arrayindex* ai = new arrayindex;
3499                   ai->tok = t;
3500                   ai->base = hop;
3501                   ai->indexes.push_back (parse_expression ());
3502                   expect_op("]");
3503                   fmt->args.push_back(ai);
3504
3505                   // Consume any subsequent arguments.
3506                   while (!peek_op (")"))
3507                     {
3508                       expect_op(",");
3509                       expression *e = parse_expression ();
3510                       fmt->args.push_back(e);
3511                     }
3512                 }
3513             }
3514           else
3515             {
3516               int min_args = 0;
3517               bool consumed_arg = false;
3518               if (fmt->print_with_format)
3519                 {
3520                   // Consume and convert a format string. Agreement between the
3521                   // format string and the arguments is postponed to the
3522                   // typechecking phase.
3523                   string tmp;
3524                   expect_unknown (tok_string, tmp);
3525                   fmt->raw_components = tmp;
3526                   fmt->components = print_format::string_to_components (tmp);
3527                   consumed_arg = true;
3528                 }
3529               else if (fmt->print_with_delim)
3530                 {
3531                   // Consume a delimiter to separate arguments.
3532                   fmt->delimiter.clear();
3533                   fmt->delimiter.type = print_format::conv_literal;
3534                   expect_unknown (tok_string, fmt->delimiter.literal_string);
3535                   consumed_arg = true;
3536                   min_args = 2; // so that the delim is used at least once
3537                 }
3538               else if (!fmt->print_with_newline)
3539                 {
3540                   // If we are not printing with a format string, nor with a
3541                   // delim, nor with a newline, then it's either print() or
3542                   // sprint(), both of which require at least one argument (of
3543                   // any type).
3544                   min_args = 1;
3545                 }
3546
3547               // Consume any subsequent arguments.
3548               while (min_args || !peek_op (")"))
3549                 {
3550                   if (consumed_arg)
3551                     expect_op(",");
3552                   expression *e = parse_expression ();
3553                   fmt->args.push_back(e);
3554                   consumed_arg = true;
3555                   if (min_args)
3556                     --min_args;
3557                 }
3558             }
3559           expect_op(")");
3560           return fmt;
3561         }
3562
3563       else if (peek_op ("(")) // function call
3564         {
3565           swallow ();
3566           struct functioncall* f = new functioncall;
3567           f->tok = t;
3568           f->function = name;
3569           // Allow empty actual parameter list
3570           if (peek_op (")"))
3571             {
3572               swallow ();
3573               return f;
3574             }
3575           while (1)
3576             {
3577               f->args.push_back (parse_expression ());
3578               if (peek_op (")"))
3579                 {
3580                   swallow ();
3581                   break;
3582                 }
3583               else if (peek_op (","))
3584                 {
3585                   swallow ();
3586                   continue;
3587                 }
3588               else
3589                 throw PARSE_ERROR (_("expected ',' or ')'"));
3590             }
3591           return f;
3592         }
3593
3594       else
3595         {
3596           sym = new symbol;
3597           sym->name = name;
3598           sym->tok = t;
3599         }
3600     }
3601
3602   // By now, either we had a hist_op in the first place, or else
3603   // we had a plain word and it was converted to a symbol.
3604
3605   assert (!hop != !sym); // logical XOR
3606
3607   // All that remains is to check for array indexing
3608
3609   if (peek_op ("[")) // array
3610     {
3611       swallow ();
3612       struct arrayindex* ai = new arrayindex;
3613       ai->tok = t;
3614
3615       if (hop)
3616         ai->base = hop;
3617       else
3618         ai->base = sym;
3619
3620       while (1)
3621         {
3622           ai->indexes.push_back (parse_expression ());
3623           if (peek_op ("]"))
3624             {
3625               swallow ();
3626               break;
3627             }
3628           else if (peek_op (","))
3629             {
3630               swallow ();
3631               continue;
3632             }
3633           else
3634             throw PARSE_ERROR (_("expected ',' or ']'"));
3635         }
3636       return ai;
3637     }
3638
3639   // If we got to here, we *should* have a symbol; if we have
3640   // a hist_op on its own, it doesn't count as an expression,
3641   // so we throw a parse error.
3642
3643   if (hop)
3644     throw PARSE_ERROR(_("base histogram operator where expression expected"), t);
3645
3646   return sym;
3647 }
3648
3649 // Parse a @cast or $var.  Given head token has already been consumed.
3650 target_symbol* parser::parse_target_symbol (const token* t)
3651 {
3652   bool addressof = false;
3653   if (t->type == tok_operator && t->content == "&")
3654     {
3655       addressof = true;
3656       // Don't delete t before trying next token.
3657       // We might need it in the error message when there is no next token.
3658       const token *next_t = next ();
3659       delete t;
3660       t = next_t;
3661     }
3662
3663   if (t->type == tok_operator && t->content == "@cast")
3664     {
3665       cast_op *cop = new cast_op;
3666       cop->tok = t;
3667       cop->name = t->content;
3668       expect_op("(");
3669       cop->operand = parse_expression ();
3670       expect_op(",");
3671       expect_unknown(tok_string, cop->type_name);
3672       if (peek_op (","))
3673         {
3674           swallow ();
3675           expect_unknown(tok_string, cop->module);
3676         }
3677       expect_op(")");
3678       parse_target_symbol_components(cop);
3679       cop->addressof = addressof;
3680       return cop;
3681     }
3682
3683   if (t->type == tok_identifier && t->content[0]=='$')
3684     {
3685       // target_symbol time
3686       target_symbol *tsym = new target_symbol;
3687       tsym->tok = t;
3688       tsym->name = t->content;
3689       parse_target_symbol_components(tsym);
3690       tsym->addressof = addressof;
3691       return tsym;
3692     }
3693
3694   if (t->type == tok_operator && t->content == "@var")
3695     {
3696       atvar_op *aop = new atvar_op;
3697       aop->tok = t;
3698       aop->name = t->content;
3699       expect_op("(");
3700       expect_unknown(tok_string, aop->target_name);
3701       size_t found_at = aop->target_name.find("@");
3702       if (found_at != string::npos)
3703         aop->cu_name = aop->target_name.substr(found_at + 1);
3704       else
3705         aop->cu_name = "";
3706       if (peek_op (","))
3707         {
3708           swallow ();
3709           expect_unknown (tok_string, aop->module);
3710         }
3711       else
3712         aop->module = "";
3713       expect_op(")");
3714       parse_target_symbol_components(aop);
3715       aop->addressof = addressof;
3716       return aop;
3717     }
3718
3719   throw PARSE_ERROR (_("expected @cast, @var or $var"));
3720 }
3721
3722
3723 // Parse a @defined().  Given head token has already been consumed.
3724 expression* parser::parse_defined_op (const token* t)
3725 {
3726   defined_op* dop = new defined_op;
3727   dop->tok = t;
3728   expect_op("(");
3729   // no need for parse_hist_op... etc., as @defined takes only target_symbols as its operand.
3730   const token* tt = next ();
3731   dop->operand = parse_target_symbol (tt);
3732   expect_op(")");
3733   return dop;
3734 }
3735
3736
3737 // Parse a @entry().  Given head token has already been consumed.
3738 expression* parser::parse_entry_op (const token* t)
3739 {
3740   entry_op* eop = new entry_op;
3741   eop->tok = t;
3742   expect_op("(");
3743   eop->operand = parse_expression ();
3744   expect_op(")");
3745   return eop;
3746 }
3747
3748
3749 // Parse a @perf().  Given head token has already been consumed.
3750 expression* parser::parse_perf_op (const token* t)
3751 {
3752   perf_op* pop = new perf_op;
3753   pop->tok = t;
3754   expect_op("(");
3755   pop->operand = parse_literal_string ();
3756   if (pop->operand->value == "")
3757     throw PARSE_ERROR (_("expected non-empty string"));
3758   expect_op(")");
3759   return pop;
3760 }
3761
3762
3763
3764 void
3765 parser::parse_target_symbol_components (target_symbol* e)
3766 {
3767   bool pprint = false;
3768
3769   // check for pretty-print in the form $foo$
3770   string &base = e->name;
3771   size_t pprint_pos = base.find_last_not_of('$');
3772   if (0 < pprint_pos && pprint_pos < base.length() - 1)
3773     {
3774       string pprint_val = base.substr(pprint_pos + 1);
3775       base.erase(pprint_pos + 1);
3776       e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
3777       pprint = true;
3778     }
3779
3780   while (!pprint)
3781     {
3782       if (peek_op ("->"))
3783         {
3784           const token* t = next();
3785           string member;
3786           expect_ident_or_keyword (member);
3787
3788           // check for pretty-print in the form $foo->$ or $foo->bar$
3789           pprint_pos = member.find_last_not_of('$');
3790           string pprint_val;
3791           if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
3792             {
3793               pprint_val = member.substr(pprint_pos + 1);
3794               member.erase(pprint_pos + 1);
3795               pprint = true;
3796             }
3797
3798           if (!member.empty())
3799             e->components.push_back (target_symbol::component(t, member));
3800           if (pprint)
3801             e->components.push_back (target_symbol::component(t, pprint_val, true));
3802         }
3803       else if (peek_op ("["))
3804         {
3805           const token* t = next();
3806           expression* index = parse_expression();
3807           literal_number* ln = dynamic_cast<literal_number*>(index);
3808           if (ln)
3809             e->components.push_back (target_symbol::component(t, ln->value));
3810           else
3811             e->components.push_back (target_symbol::component(t, index));
3812           expect_op ("]");
3813         }
3814       else
3815         break;
3816     }
3817
3818   if (!pprint)
3819     {
3820       // check for pretty-print in the form $foo $
3821       // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
3822       const token* t = peek();
3823       if (t != NULL && t->type == tok_identifier &&
3824           t->content.find_first_not_of('$') == string::npos)
3825         {
3826           t = next();
3827           e->components.push_back (target_symbol::component(t, t->content, true));
3828           pprint = true;
3829         }
3830     }
3831
3832   if (pprint && (peek_op ("->") || peek_op("[")))
3833     throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
3834 }
3835
3836 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */