parse.cxx

   1 // recursive descent parser for systemtap scripts
   2 // Copyright (C) 2005-2009 Red Hat Inc.
   3 // Copyright (C) 2006 Intel Corporation.
   4 // Copyright (C) 2007 Bull S.A.S
   5 //
   6 // This file is part of systemtap, and is free software.  You can
   7 // redistribute it and/or modify it under the terms of the GNU General
   8 // Public License (GPL); either version 2, or (at your option) any
   9 // later version.
  10
  11 #include "config.h"
  12 #include "staptree.h"
  13 #include "parse.h"
  14 #include "session.h"
  15 #include "util.h"
  16
  17 #include <iostream>
  18
  19 #include <fstream>
  20 #include <cctype>
  21 #include <cstdlib>
  22 #include <cassert>
  23 #include <cerrno>
  24 #include <climits>
  25 #include <sstream>
  26 #include <cstring>
  27 #include <cctype>
  28 #include <iterator>
  29
  30 extern "C" {
  31 #include <fnmatch.h>
  32 }
  33
  34 using namespace std;
  35
  36 // ------------------------------------------------------------------------
  37
  38
  39
  40 parser::parser (systemtap_session& s, istream& i, bool p):
  41   session (s),
  42   input_name ("<input>"), free_input (0),
  43   input (i, input_name, s), privileged (p),
  44   context(con_unknown), last_t (0), next_t (0), num_errors (0)
  45 { }
  46
  47 parser::parser (systemtap_session& s, const string& fn, bool p):
  48   session (s),
  49   input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
  50   input (* free_input, input_name, s), privileged (p),
  51   context(con_unknown), last_t (0), next_t (0), num_errors (0)
  52 { }
  53
  54 parser::~parser()
  55 {
  56   if (free_input) delete free_input;
  57 }
  58
  59
  60 stapfile*
  61 parser::parse (systemtap_session& s, std::istream& i, bool pr)
  62 {
  63   parser p (s, i, pr);
  64   return p.parse ();
  65 }
  66
  67
  68 stapfile*
  69 parser::parse (systemtap_session& s, const std::string& n, bool pr)
  70 {
  71   parser p (s, n, pr);
  72   return p.parse ();
  73 }
  74
  75 static string
  76 tt2str(token_type tt)
  77 {
  78   switch (tt)
  79     {
  80     case tok_junk: return "junk";
  81     case tok_identifier: return "identifier";
  82     case tok_operator: return "operator";
  83     case tok_string: return "string";
  84     case tok_number: return "number";
  85     case tok_embedded: return "embedded-code";
  86     case tok_keyword: return "keyword";
  87     }
  88   return "unknown token";
  89 }
  90
  91 ostream&
  92 operator << (ostream& o, const source_loc& loc)
  93 {
  94   o << loc.file << ":"
  95     << loc.line << ":"
  96     << loc.column;
  97
  98   return o;
  99 }
 100
 101 ostream&
 102 operator << (ostream& o, const token& t)
 103 {
 104   o << tt2str(t.type);
 105
 106   if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
 107     {
 108       o << " '";
 109       for (unsigned i=0; i<t.content.length(); i++)
 110         {
 111           char c = t.content[i];
 112           o << (isprint (c) ? c : '?');
 113         }
 114       o << "'";
 115     }
 116
 117   o << " at "
 118     << t.location;
 119
 120   return o;
 121 }
 122
 123
 124 void
 125 parser::print_error  (const parse_error &pe)
 126 {
 127   string align_parse_error ("     ");
 128   cerr << "parse error: " << pe.what () << endl;
 129
 130   if (pe.tok)
 131     {
 132       cerr << "\tat: " << *pe.tok << endl;
 133       session.print_error_source (cerr, align_parse_error, pe.tok);
 134     }
 135   else
 136     {
 137       const token* t = last_t;
 138       if (t)
 139         {
 140           cerr << "\tsaw: " << *t << endl;
 141           session.print_error_source (cerr, align_parse_error, t);
 142         }
 143       else
 144         cerr << "\tsaw: " << input_name << " EOF" << endl;
 145     }
 146
 147   // XXX: make it possible to print the last input line,
 148   // so as to line up an arrow with the specific error column
 149
 150   num_errors ++;
 151 }
 152
 153
 154 const token*
 155 parser::last ()
 156 {
 157   return last_t;
 158 }
 159
 160
 161
 162 template <typename OPERAND>
 163 bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
 164 {
 165   if (op->type == tok_operator && op->content == "<=")
 166     { return lhs <= rhs; }
 167   else if (op->type == tok_operator && op->content == ">=")
 168     { return lhs >= rhs; }
 169   else if (op->type == tok_operator && op->content == "<")
 170     { return lhs < rhs; }
 171   else if (op->type == tok_operator && op->content == ">")
 172     { return lhs > rhs; }
 173   else if (op->type == tok_operator && op->content == "==")
 174     { return lhs == rhs; }
 175   else if (op->type == tok_operator && op->content == "!=")
 176     { return lhs != rhs; }
 177   else
 178     throw parse_error ("expected comparison operator", op);
 179 }
 180
 181
 182 // Here, we perform on-the-fly preprocessing.
 183 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
 184 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
 185 //                 or: arch COMPARISON-OP "arch-string"
 186 //                 or: "string1" COMPARISON-OP "string2"
 187 //                 or: number1 COMPARISON-OP number2
 188 // The %: ELSE-TOKENS part is optional.
 189 //
 190 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
 191 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
 192 //
 193 // Up to an entire %( ... %) expression is processed by a single call
 194 // to this function.  Tokens included by any nested conditions are
 195 // enqueued in a private vector.
 196
 197 bool eval_pp_conditional (systemtap_session& s,
 198                           const token* l, const token* op, const token* r)
 199 {
 200   if (l->type == tok_identifier && (l->content == "kernel_v" ||
 201                                     l->content == "kernel_vr"))
 202     {
 203       string target_kernel_vr = s.kernel_release;
 204       string target_kernel_v = s.kernel_base_release;
 205
 206       if (! (r->type == tok_string))
 207         throw parse_error ("expected string literal", r);
 208
 209       string target = (l->content == "kernel_vr" ?
 210                        target_kernel_vr.c_str() :
 211                        target_kernel_v.c_str());
 212       string query = r->content;
 213       bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
 214
 215       // collect acceptable strverscmp results.
 216       int rvc_ok1, rvc_ok2;
 217       bool wc_ok = false;
 218       if (op->type == tok_operator && op->content == "<=")
 219         { rvc_ok1 = -1; rvc_ok2 = 0; }
 220       else if (op->type == tok_operator && op->content == ">=")
 221         { rvc_ok1 = 1; rvc_ok2 = 0; }
 222       else if (op->type == tok_operator && op->content == "<")
 223         { rvc_ok1 = -1; rvc_ok2 = -1; }
 224       else if (op->type == tok_operator && op->content == ">")
 225         { rvc_ok1 = 1; rvc_ok2 = 1; }
 226       else if (op->type == tok_operator && op->content == "==")
 227         { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
 228       else if (op->type == tok_operator && op->content == "!=")
 229         { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
 230       else
 231         throw parse_error ("expected comparison operator", op);
 232
 233       if ((!wc_ok) && rhs_wildcard)
 234         throw parse_error ("wildcard not allowed with order comparison operators", op);
 235
 236       if (rhs_wildcard)
 237         {
 238           int rvc_result = fnmatch (query.c_str(), target.c_str(),
 239                                     FNM_NOESCAPE); // spooky
 240           bool badness = (rvc_result == 0) ^ (op->content == "==");
 241           return !badness;
 242         }
 243       else
 244         {
 245           int rvc_result = strverscmp (target.c_str(), query.c_str());
 246           // normalize rvc_result
 247           if (rvc_result < 0) rvc_result = -1;
 248           if (rvc_result > 0) rvc_result = 1;
 249           return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
 250         }
 251     }
 252   else if (l->type == tok_identifier && l->content == "arch")
 253     {
 254       string target_architecture = s.architecture;
 255       if (! (r->type == tok_string))
 256         throw parse_error ("expected string literal", r);
 257       string query_architecture = r->content;
 258
 259       int nomatch = fnmatch (query_architecture.c_str(),
 260                              target_architecture.c_str(),
 261                              FNM_NOESCAPE); // still spooky
 262
 263       bool result;
 264       if (op->type == tok_operator && op->content == "==")
 265         result = !nomatch;
 266       else if (op->type == tok_operator && op->content == "!=")
 267         result = nomatch;
 268       else
 269         throw parse_error ("expected '==' or '!='", op);
 270
 271       return result;
 272     }
 273   else if (l->type == tok_string && r->type == tok_string)
 274     {
 275       string lhs = l->content;
 276       string rhs = r->content;
 277       return eval_comparison (lhs, op, rhs);
 278       // NB: no wildcarding option here
 279     }
 280   else if (l->type == tok_number && r->type == tok_number)
 281     {
 282       int64_t lhs = lex_cast<int64_t>(l->content);
 283       int64_t rhs = lex_cast<int64_t>(r->content);
 284       return eval_comparison (lhs, op, rhs);
 285       // NB: no wildcarding option here
 286     }
 287   else if (l->type == tok_string && r->type == tok_number
 288             && op->type == tok_operator)
 289     throw parse_error ("expected string literal as right value", r);
 290   else if (l->type == tok_number && r->type == tok_string
 291             && op->type == tok_operator)
 292     throw parse_error ("expected number literal as right value", r);
 293
 294   // XXX: support other forms?  "CONFIG_SMP" ?
 295
 296   else
 297     throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr'\n"
 298                        "             or comparison between strings or integers", l);
 299 }
 300
 301
 302 // Only tokens corresponding to the TRUE statement must be expanded
 303 const token*
 304 parser::scan_pp (bool wildcard)
 305 {
 306   while (true)
 307     {
 308       if (enqueued_pp.size() > 0)
 309         {
 310           const token* t = enqueued_pp[0];
 311           enqueued_pp.erase (enqueued_pp.begin());
 312           return t;
 313         }
 314
 315       const token* t = input.scan (wildcard); // NB: not recursive!
 316       if (t == 0) // EOF
 317         return t;
 318
 319       if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
 320         return t;
 321
 322       // We have a %( - it's time to throw a preprocessing party!
 323
 324       const token *l, *op, *r;
 325       l = input.scan (false); // NB: not recursive, though perhaps could be
 326       op = input.scan (false);
 327       r = input.scan (false);
 328       if (l == 0 || op == 0 || r == 0)
 329         throw parse_error ("incomplete condition after '%('", t);
 330       // NB: consider generalizing to consume all tokens until %?, and
 331       // passing that as a vector to an evaluator.
 332
 333       // Do not evaluate the condition if we haven't expanded everything.
 334       // This may occur when having several recursive conditionals.
 335       bool result = eval_pp_conditional (session, l, op, r);
 336       delete l;
 337       delete op;
 338       delete r;
 339
 340       /*
 341       clog << "PP eval (" << *t << ") == " << result << endl;
 342       */
 343
 344       const token *m = input.scan (); // NB: not recursive
 345       if (! (m && m->type == tok_operator && m->content == "%?"))
 346         throw parse_error ("expected '%?' marker for conditional", t);
 347       delete m; // "%?"
 348
 349       vector<const token*> my_enqueued_pp;
 350
 351       int nesting = 0;
 352       while (true) // consume THEN tokens
 353         {
 354           try
 355             {
 356               m = result ? scan_pp (wildcard) : input.scan (wildcard);
 357             }
 358           catch (const parse_error &e)
 359             {
 360               if (result) throw e; // propagate errors if THEN branch taken
 361               continue;
 362             }
 363
 364           if (m && m->type == tok_operator && m->content == "%(") // nested %(
 365             nesting ++;
 366           if (nesting == 0 && m && (m->type == tok_operator && (m->content == "%:" || // ELSE
 367                                                                 m->content == "%)"))) // END
 368             break;
 369           if (nesting && m && m->type == tok_operator && m->content == "%)") // nested %)
 370             nesting --;
 371
 372           if (!m)
 373             throw parse_error ("incomplete conditional - missing '%:' or '%)'", t);
 374           if (result)
 375             my_enqueued_pp.push_back (m);
 376           if (!result)
 377             delete m; // do nothing, just dispose of unkept THEN token
 378
 379           continue;
 380         }
 381
 382       if (m && m->type == tok_operator && m->content == "%:") // ELSE
 383         {
 384           delete m; // "%:"
 385           int nesting = 0;
 386           while (true)
 387             {
 388               try
 389                 {
 390                   m = result ? input.scan (wildcard) : scan_pp (wildcard);
 391                 }
 392               catch (const parse_error& e)
 393                 {
 394                   if (!result) throw e; // propagate errors if ELSE branch taken
 395                   continue;
 396                 }
 397
 398               if (m && m->type == tok_operator && m->content == "%(") // nested %(
 399                 nesting ++;
 400               if (nesting == 0 && m && m->type == tok_operator && m->content == "%)") // END
 401                 break;
 402               if (nesting && m && m->type == tok_operator && m->content == "%)") // nested %)
 403                 nesting --;
 404
 405               if (!m)
 406                 throw parse_error ("incomplete conditional - missing %)", t);
 407               if (!result)
 408                 my_enqueued_pp.push_back (m);
 409               if (result)
 410                 delete m; // do nothing, just dispose of unkept ELSE token
 411
 412               continue;
 413             }
 414         }
 415
 416       /*
 417       clog << "PP eval (" << *t << ") == " << result << " tokens: " << endl;
 418       for (unsigned k=0; k<my_enqueued_pp.size(); k++)
 419         clog << * my_enqueued_pp[k] << endl;
 420       clog << endl;
 421       */
 422
 423       delete t; // "%("
 424       delete m; // "%)"
 425
 426
 427       // NB: we transcribe the retained tokens here, and not inside
 428       // the THEN/ELSE while loops.  If it were done there, each loop
 429       // would become infinite (each iteration consuming an ordinary
 430       // token the previous one just pushed there).  Guess how I
 431       // figured that out.
 432       enqueued_pp.insert (enqueued_pp.end(),
 433                           my_enqueued_pp.begin(),
 434                           my_enqueued_pp.end());
 435
 436       // Go back to outermost while(true) loop.  We hope that at least
 437       // some THEN or ELSE tokens were enqueued.  If not, around we go
 438       // again, until EOF.
 439     }
 440 }
 441
 442
 443 const token*
 444 parser::next (bool wildcard)
 445 {
 446   if (! next_t)
 447     next_t = scan_pp (wildcard);
 448   if (! next_t)
 449     throw parse_error ("unexpected end-of-file");
 450
 451   last_t = next_t;
 452   // advance by zeroing next_t
 453   next_t = 0;
 454   return last_t;
 455 }
 456
 457
 458 const token*
 459 parser::peek (bool wildcard)
 460 {
 461   if (! next_t)
 462     next_t = scan_pp (wildcard);
 463
 464   // don't advance by zeroing next_t
 465   last_t = next_t;
 466   return next_t;
 467 }
 468
 469
 470 static inline bool
 471 tok_is(token const * t, token_type tt, string const & expected)
 472 {
 473   return t && t->type == tt && t->content == expected;
 474 }
 475
 476
 477 const token*
 478 parser::expect_known (token_type tt, string const & expected)
 479 {
 480   const token *t = next();
 481   if (! (t && t->type == tt && t->content == expected))
 482     throw parse_error ("expected '" + expected + "'");
 483   return t;
 484 }
 485
 486
 487 const token*
 488 parser::expect_unknown (token_type tt, string & target)
 489 {
 490   const token *t = next();
 491   if (!(t && t->type == tt))
 492     throw parse_error ("expected " + tt2str(tt));
 493   target = t->content;
 494   return t;
 495 }
 496
 497
 498 const token*
 499 parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
 500 {
 501   const token *t = next();
 502   if (!(t && (t->type == tt1 || t->type == tt2)))
 503     throw parse_error ("expected " + tt2str(tt1) + " or " + tt2str(tt2));
 504   target = t->content;
 505   return t;
 506 }
 507
 508
 509 const token*
 510 parser::expect_op (std::string const & expected)
 511 {
 512   return expect_known (tok_operator, expected);
 513 }
 514
 515
 516 const token*
 517 parser::expect_kw (std::string const & expected)
 518 {
 519   return expect_known (tok_identifier, expected);
 520 }
 521
 522 const token*
 523 parser::expect_number (int64_t & value)
 524 {
 525   bool neg = false;
 526   const token *t = next();
 527   if (t->type == tok_operator && t->content == "-")
 528     {
 529       neg = true;
 530       t = next ();
 531     }
 532   if (!(t && t->type == tok_number))
 533     throw parse_error ("expected number");
 534
 535   const char* startp = t->content.c_str ();
 536   char* endp = (char*) startp;
 537
 538   // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
 539   // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
 540   // since the lexer only gives us positive digit strings, but we'll
 541   // limit it to LLONG_MIN when a '-' operator is fed into the literal.
 542   errno = 0;
 543   value = (int64_t) strtoull (startp, & endp, 0);
 544   if (errno == ERANGE || errno == EINVAL || *endp != '\0'
 545       || (neg && (unsigned long long) value > 9223372036854775808ULL)
 546       || (unsigned long long) value > 18446744073709551615ULL
 547       || value < -9223372036854775807LL-1)
 548     throw parse_error ("number invalid or out of range");
 549
 550   if (neg)
 551     value = -value;
 552
 553   return t;
 554 }
 555
 556
 557 const token*
 558 parser::expect_ident (std::string & target)
 559 {
 560   return expect_unknown (tok_identifier, target);
 561 }
 562
 563
 564 const token*
 565 parser::expect_ident_or_keyword (std::string & target)
 566 {
 567   return expect_unknown2 (tok_identifier, tok_keyword, target);
 568 }
 569
 570
 571 bool
 572 parser::peek_op (std::string const & op)
 573 {
 574   return tok_is (peek(), tok_operator, op);
 575 }
 576
 577
 578 bool
 579 parser::peek_kw (std::string const & kw)
 580 {
 581   return tok_is (peek(), tok_identifier, kw);
 582 }
 583
 584
 585
 586 lexer::lexer (istream& i, const string& in, systemtap_session& s):
 587   input (i), input_name (in), input_contents (""),
 588   input_pointer (0), cursor_suspend_count(0),
 589   cursor_line (1), cursor_column (1), session(s),
 590   current_file (0)
 591 {
 592   char c;
 593   while(input.get(c))
 594     input_contents.push_back(c);
 595 }
 596
 597 std::string
 598 lexer::get_input_contents ()
 599 {
 600   return input_contents;
 601 }
 602
 603 void
 604 lexer::set_current_file (stapfile* f)
 605 {
 606   current_file = f;
 607 }
 608
 609 int
 610 lexer::input_peek (unsigned n)
 611 {
 612   if (input_contents.size() > (input_pointer + n))
 613     return (int)(unsigned char)input_contents[input_pointer+n];
 614   else
 615     return -1;
 616 }
 617
 618
 619 int
 620 lexer::input_get ()
 621 {
 622   int c = input_peek (0);
 623   input_pointer ++;
 624
 625   if (c < 0) return c; // EOF
 626
 627   if (cursor_suspend_count)
 628     // Track effect of input_put: preserve previous cursor/line_column
 629     // until all of its characters are consumed.
 630     cursor_suspend_count --;
 631   else
 632     {
 633       // update source cursor
 634       if (c == '\n')
 635         {
 636           cursor_line ++;
 637           cursor_column = 1;
 638         }
 639       else
 640         cursor_column ++;
 641     }
 642
 643   // clog << "[" << (char)c << "]";
 644   return c;
 645 }
 646
 647
 648 void
 649 lexer::input_put (const string& chars)
 650 {
 651   // clog << "[put:" << chars << " @" << input_pointer << "]";
 652   input_contents.insert (input_contents.begin() + input_pointer, chars.begin(), chars.end());
 653   cursor_suspend_count += chars.size();
 654 }
 655
 656
 657 token*
 658 lexer::scan (bool wildcard)
 659 {
 660   token* n = new token;
 661   n->location.file = input_name;
 662   if (current_file)
 663     n->location.stap_file = current_file;
 664
 665   unsigned semiskipped_p = 0;
 666
 667  skip:
 668   n->location.line = cursor_line;
 669   n->location.column = cursor_column;
 670
 671  semiskip:
 672   if (semiskipped_p > 1)
 673     {
 674       input_get ();
 675       throw parse_error ("invalid nested substitution of command line arguments");
 676     }
 677
 678   int c = input_get();
 679   int c2 = input_peek ();
 680   // clog << "{" << (char)c << (char)c2 << "}";
 681   if (c < 0)
 682     {
 683       delete n;
 684       return 0;
 685     }
 686
 687   if (isspace (c))
 688     goto skip;
 689
 690   // Paste command line arguments as character streams into
 691   // the beginning of a token.  $1..$999 go through as raw
 692   // characters; @1..@999 are quoted/escaped as strings.
 693   // $# and @# expand to the number of arguments, similarly
 694   // raw or quoted.
 695   if ((c == '$' || c == '@') &&
 696       (c2 == '#'))
 697     {
 698       input_get(); // swallow '#'
 699       stringstream converter;
 700       converter << session.args.size ();
 701       if (c == '$') input_put (converter.str());
 702       else input_put (lex_cast_qstring (converter.str()));
 703       semiskipped_p ++;
 704       goto semiskip;
 705     }
 706   else if ((c == '$' || c == '@') &&
 707            (isdigit (c2)))
 708     {
 709       unsigned idx = 0;
 710       do
 711         {
 712           input_get ();
 713           idx = (idx * 10) + (c2 - '0');
 714           c2 = input_peek ();
 715         } while (c2 > 0 &&
 716                  isdigit (c2) &&
 717                  idx <= session.args.size()); // prevent overflow
 718       if (idx == 0 ||
 719           idx-1 >= session.args.size())
 720         throw parse_error ("command line argument index " + lex_cast<string>(idx)
 721                            + " out of range [1-" + lex_cast<string>(session.args.size()) + "]", n);
 722
 723       string arg = session.args[idx-1];
 724       if (c == '$') input_put (arg);
 725       else input_put (lex_cast_qstring (arg));
 726       semiskipped_p ++;
 727       goto semiskip;
 728     }
 729
 730   else if (isalpha (c) || c == '$' || c == '@' || c == '_' ||
 731            (wildcard && c == '*'))
 732     {
 733       n->type = tok_identifier;
 734       n->content = (char) c;
 735       while (isalnum (c2) || c2 == '_' || c2 == '$' ||
 736              (wildcard && c2 == '*'))
 737         {
 738           input_get ();
 739           n->content.push_back (c2);
 740           c2 = input_peek ();
 741         }
 742
 743       if (n->content    == "probe"
 744           || n->content == "global"
 745           || n->content == "function"
 746           || n->content == "if"
 747           || n->content == "else"
 748           || n->content == "for"
 749           || n->content == "foreach"
 750           || n->content == "in"
 751           || n->content == "limit"
 752           || n->content == "return"
 753           || n->content == "delete"
 754           || n->content == "while"
 755           || n->content == "break"
 756           || n->content == "continue"
 757           || n->content == "next"
 758           || n->content == "string"
 759           || n->content == "long")
 760         n->type = tok_keyword;
 761
 762       return n;
 763     }
 764
 765   else if (isdigit (c)) // positive literal
 766     {
 767       n->type = tok_number;
 768       n->content = (char) c;
 769
 770       while (1)
 771         {
 772           int c2 = input_peek ();
 773           if (c2 < 0)
 774             break;
 775
 776           // NB: isalnum is very permissive.  We rely on strtol, called in
 777           // parser::parse_literal below, to confirm that the number string
 778           // is correctly formatted and in range.
 779
 780           if (isalnum (c2))
 781             {
 782               n->content.push_back (c2);
 783               input_get ();
 784             }
 785           else
 786             break;
 787         }
 788       return n;
 789     }
 790
 791   else if (c == '\"')
 792     {
 793       n->type = tok_string;
 794       while (1)
 795         {
 796           c = input_get ();
 797
 798           if (c < 0 || c == '\n')
 799             {
 800               n->type = tok_junk;
 801               break;
 802             }
 803           if (c == '\"') // closing double-quotes
 804             break;
 805           else if (c == '\\') // see also input_put
 806             {
 807               c = input_get ();
 808               switch (c)
 809                 {
 810                 case 'a':
 811                 case 'b':
 812                 case 't':
 813                 case 'n':
 814                 case 'v':
 815                 case 'f':
 816                 case 'r':
 817                 case '0' ... '7': // NB: need only match the first digit
 818                 case '\\':
 819                   // Pass these escapes through to the string value
 820                   // being parsed; it will be emitted into a C literal.
 821
 822                   n->content.push_back('\\');
 823
 824                   // fall through
 825                 default:
 826                   n->content.push_back(c);
 827                   break;
 828                 }
 829             }
 830           else
 831             n->content.push_back(c);
 832         }
 833       return n;
 834     }
 835
 836   else if (ispunct (c))
 837     {
 838       int c2 = input_peek ();
 839       int c3 = input_peek (1);
 840       string s1 = string("") + (char) c;
 841       string s2 = (c2 > 0 ? s1 + (char) c2 : s1);
 842       string s3 = (c3 > 0 ? s2 + (char) c3 : s2);
 843
 844       // NB: if we were to recognize negative numeric literals here,
 845       // we'd introduce another grammar ambiguity:
 846       // 1-1 would be parsed as tok_number(1) and tok_number(-1)
 847       // instead of tok_number(1) tok_operator('-') tok_number(1)
 848
 849       if (s1 == "#") // shell comment
 850         {
 851           unsigned this_line = cursor_line;
 852           do { c = input_get (); }
 853           while (c >= 0 && cursor_line == this_line);
 854           goto skip;
 855         }
 856       else if (s2 == "//") // C++ comment
 857         {
 858           unsigned this_line = cursor_line;
 859           do { c = input_get (); }
 860           while (c >= 0 && cursor_line == this_line);
 861           goto skip;
 862         }
 863       else if (c == '/' && c2 == '*') // C comment
 864         {
 865           c2 = input_get ();
 866           unsigned chars = 0;
 867           while (c2 >= 0)
 868             {
 869               chars ++; // track this to prevent "/*/" from being accepted
 870               c = c2;
 871               c2 = input_get ();
 872               if (chars > 1 && c == '*' && c2 == '/')
 873                 break;
 874             }
 875           goto skip;
 876         }
 877       else if (c == '%' && c2 == '{') // embedded code
 878         {
 879           n->type = tok_embedded;
 880           (void) input_get (); // swallow '{' already in c2
 881           while (true)
 882             {
 883               c = input_get ();
 884               if (c < 0) // EOF
 885                 {
 886                   n->type = tok_junk;
 887                   break;
 888                 }
 889               if (c == '%')
 890                 {
 891                   c2 = input_peek ();
 892                   if (c2 == '}')
 893                     {
 894                       (void) input_get (); // swallow '}' too
 895                       break;
 896                     }
 897                 }
 898               n->content += c;
 899             }
 900           return n;
 901         }
 902
 903       // We're committed to recognizing at least the first character
 904       // as an operator.
 905       n->type = tok_operator;
 906
 907       // match all valid operators, in decreasing size order
 908       if (s3 == "<<<" ||
 909           s3 == "<<=" ||
 910           s3 == ">>=")
 911         {
 912           n->content = s3;
 913           input_get (); input_get (); // swallow other two characters
 914         }
 915       else if (s2 == "==" ||
 916                s2 == "!=" ||
 917                s2 == "<=" ||
 918                s2 == ">=" ||
 919                s2 == "+=" ||
 920                s2 == "-=" ||
 921                s2 == "*=" ||
 922                s2 == "/=" ||
 923                s2 == "%=" ||
 924                s2 == "&=" ||
 925                s2 == "^=" ||
 926                s2 == "|=" ||
 927                s2 == ".=" ||
 928                s2 == "&&" ||
 929                s2 == "||" ||
 930                s2 == "++" ||
 931                s2 == "--" ||
 932                s2 == "->" ||
 933                s2 == "<<" ||
 934                s2 == ">>" ||
 935                // preprocessor tokens
 936                s2 == "%(" ||
 937                s2 == "%?" ||
 938                s2 == "%:" ||
 939                s2 == "%)")
 940         {
 941           n->content = s2;
 942           input_get (); // swallow other character
 943         }
 944       else
 945         {
 946           n->content = s1;
 947         }
 948
 949       return n;
 950     }
 951
 952   else
 953     {
 954       n->type = tok_junk;
 955       n->content = (char) c;
 956       return n;
 957     }
 958 }
 959
 960
 961 // ------------------------------------------------------------------------
 962
 963 stapfile*
 964 parser::parse ()
 965 {
 966   stapfile* f = new stapfile;
 967   input.set_current_file (f);
 968   f->file_contents = input.get_input_contents ();
 969   f->name = input_name;
 970
 971   bool empty = true;
 972
 973   while (1)
 974     {
 975       try
 976         {
 977           const token* t = peek ();
 978           if (! t) // nice clean EOF
 979             break;
 980
 981           empty = false;
 982           if (t->type == tok_keyword && t->content == "probe")
 983             {
 984               context = con_probe;
 985               parse_probe (f->probes, f->aliases);
 986             }
 987           else if (t->type == tok_keyword && t->content == "global")
 988             {
 989               context = con_global;
 990               parse_global (f->globals, f->probes);
 991             }
 992           else if (t->type == tok_keyword && t->content == "function")
 993             {
 994               context = con_function;
 995               parse_functiondecl (f->functions);
 996             }
 997           else if (t->type == tok_embedded)
 998             {
 999               context = con_embedded;
1000               f->embeds.push_back (parse_embeddedcode ());
1001             }
1002           else
1003             {
1004               context = con_unknown;
1005               throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
1006             }
1007         }
1008       catch (parse_error& pe)
1009         {
1010           print_error (pe);
1011           if (pe.skip_some) // for recovery
1012             try
1013               {
1014                 // Quietly swallow all tokens until the next '}'.
1015                 while (1)
1016                   {
1017                     const token* t = peek ();
1018                     if (! t)
1019                       break;
1020                     next ();
1021                     if (t->type == tok_operator && t->content == "}")
1022                       break;
1023                   }
1024               }
1025             catch (parse_error& pe2)
1026               {
1027                 // parse error during recovery ... ugh
1028                 print_error (pe2);
1029               }
1030         }
1031     }
1032
1033   if (empty)
1034     {
1035       cerr << "Input file '" << input_name << "' is empty or missing." << endl;
1036       delete f;
1037       input.set_current_file (0);
1038       return 0;
1039     }
1040   else if (num_errors > 0)
1041     {
1042       cerr << num_errors << " parse error(s)." << endl;
1043       delete f;
1044       input.set_current_file (0);
1045       return 0;
1046     }
1047
1048   input.set_current_file (0);
1049   return f;
1050 }
1051
1052
1053 void
1054 parser::parse_probe (std::vector<probe *> & probe_ret,
1055                      std::vector<probe_alias *> & alias_ret)
1056 {
1057   const token* t0 = next ();
1058   if (! (t0->type == tok_keyword && t0->content == "probe"))
1059     throw parse_error ("expected 'probe'");
1060
1061   vector<probe_point *> aliases;
1062   vector<probe_point *> locations;
1063
1064   bool equals_ok = true;
1065
1066   int epilogue_alias = 0;
1067
1068   while (1)
1069     {
1070       probe_point * pp = parse_probe_point ();
1071
1072       const token* t = peek ();
1073       if (equals_ok && t
1074           && t->type == tok_operator && t->content == "=")
1075         {
1076           if (pp->optional || pp->sufficient)
1077             throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->tok);
1078           aliases.push_back(pp);
1079           next ();
1080           continue;
1081         }
1082       else if (equals_ok && t
1083           && t->type == tok_operator && t->content == "+=")
1084         {
1085           if (pp->optional || pp->sufficient)
1086             throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->tok);
1087           aliases.push_back(pp);
1088           epilogue_alias = 1;
1089           next ();
1090           continue;
1091         }
1092       else if (t && t->type == tok_operator && t->content == ",")
1093         {
1094           locations.push_back(pp);
1095           equals_ok = false;
1096           next ();
1097           continue;
1098         }
1099       else if (t && t->type == tok_operator && t->content == "{")
1100         {
1101           locations.push_back(pp);
1102           break;
1103         }
1104       else
1105         throw parse_error ("expected probe point specifier");
1106     }
1107
1108   if (aliases.empty())
1109     {
1110       probe* p = new probe;
1111       p->tok = t0;
1112       p->locations = locations;
1113       p->body = parse_stmt_block ();
1114       p->privileged = privileged;
1115       probe_ret.push_back (p);
1116     }
1117   else
1118     {
1119       probe_alias* p = new probe_alias (aliases);
1120       if(epilogue_alias)
1121         p->epilogue_style = true;
1122       else
1123         p->epilogue_style = false;
1124       p->tok = t0;
1125       p->locations = locations;
1126       p->body = parse_stmt_block ();
1127       p->privileged = privileged;
1128       alias_ret.push_back (p);
1129     }
1130 }
1131
1132
1133 embeddedcode*
1134 parser::parse_embeddedcode ()
1135 {
1136   embeddedcode* e = new embeddedcode;
1137   const token* t = next ();
1138   if (t->type != tok_embedded)
1139     throw parse_error ("expected '%{'");
1140
1141   if (! privileged)
1142     throw parse_error ("embedded code in unprivileged script",
1143                        false /* don't skip tokens for parse resumption */);
1144
1145   e->tok = t;
1146   e->code = t->content;
1147   return e;
1148 }
1149
1150
1151 block*
1152 parser::parse_stmt_block ()
1153 {
1154   block* pb = new block;
1155
1156   const token* t = next ();
1157   if (! (t->type == tok_operator && t->content == "{"))
1158     throw parse_error ("expected '{'");
1159
1160   pb->tok = t;
1161
1162   while (1)
1163     {
1164       try
1165         {
1166           t = peek ();
1167           if (t && t->type == tok_operator && t->content == "}")
1168             {
1169               next ();
1170               break;
1171             }
1172
1173           pb->statements.push_back (parse_statement ());
1174         }
1175       catch (parse_error& pe)
1176         {
1177           print_error (pe);
1178
1179           // Quietly swallow all tokens until the next ';' or '}'.
1180           while (1)
1181             {
1182               const token* t = peek ();
1183               if (! t) return 0;
1184               next ();
1185               if (t->type == tok_operator
1186                   && (t->content == "}" || t->content == ";"))
1187                 break;
1188             }
1189         }
1190     }
1191
1192   return pb;
1193 }
1194
1195
1196 statement*
1197 parser::parse_statement ()
1198 {
1199   const token* t = peek ();
1200   if (t && t->type == tok_operator && t->content == ";")
1201     {
1202       null_statement* n = new null_statement ();
1203       n->tok = next ();
1204       return n;
1205     }
1206   else if (t && t->type == tok_operator && t->content == "{")
1207     return parse_stmt_block ();
1208   else if (t && t->type == tok_keyword && t->content == "if")
1209     return parse_if_statement ();
1210   else if (t && t->type == tok_keyword && t->content == "for")
1211     return parse_for_loop ();
1212   else if (t && t->type == tok_keyword && t->content == "foreach")
1213     return parse_foreach_loop ();
1214   else if (t && t->type == tok_keyword && t->content == "return")
1215     return parse_return_statement ();
1216   else if (t && t->type == tok_keyword && t->content == "delete")
1217     return parse_delete_statement ();
1218   else if (t && t->type == tok_keyword && t->content == "while")
1219     return parse_while_loop ();
1220   else if (t && t->type == tok_keyword && t->content == "break")
1221     return parse_break_statement ();
1222   else if (t && t->type == tok_keyword && t->content == "continue")
1223     return parse_continue_statement ();
1224   else if (t && t->type == tok_keyword && t->content == "next")
1225     return parse_next_statement ();
1226   // XXX: "do/while" statement?
1227   else if (t && (t->type == tok_operator || // expressions are flexible
1228                  t->type == tok_identifier ||
1229                  t->type == tok_number ||
1230                  t->type == tok_string))
1231     return parse_expr_statement ();
1232   // XXX: consider generally accepting tok_embedded here too
1233   else
1234     throw parse_error ("expected statement");
1235 }
1236
1237
1238 void
1239 parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
1240 {
1241   const token* t0 = next ();
1242   if (! (t0->type == tok_keyword && t0->content == "global"))
1243     throw parse_error ("expected 'global'");
1244
1245   while (1)
1246     {
1247       const token* t = next ();
1248       if (! (t->type == tok_identifier))
1249         throw parse_error ("expected identifier");
1250
1251       for (unsigned i=0; i<globals.size(); i++)
1252         if (globals[i]->name == t->content)
1253           throw parse_error ("duplicate global name");
1254
1255       vardecl* d = new vardecl;
1256       d->name = t->content;
1257       d->tok = t;
1258       globals.push_back (d);
1259
1260       t = peek ();
1261
1262       if (t && t->type == tok_operator && t->content == "[") // array size
1263         {
1264           int64_t size;
1265           next ();
1266           expect_number(size);
1267           if (size <= 0 || size > 1000000) // arbitrary max
1268             throw parse_error("array size out of range");
1269           d->maxsize = (int)size;
1270           expect_known(tok_operator, "]");
1271           t = peek ();
1272         }
1273
1274       if (t && t->type == tok_operator && t->content == "=") // initialization
1275         {
1276           if (!d->compatible_arity(0))
1277             throw parse_error("only scalar globals can be initialized");
1278           d->set_arity(0);
1279           next ();
1280           d->init = parse_literal ();
1281           d->type = d->init->type;
1282           t = peek ();
1283         }
1284
1285       if (t && t->type == tok_operator && t->content == ";") // termination
1286           next();
1287
1288       if (t && t->type == tok_operator && t->content == ",") // next global
1289         {
1290           next ();
1291           continue;
1292         }
1293       else
1294         break;
1295     }
1296 }
1297
1298
1299 void
1300 parser::parse_functiondecl (std::vector<functiondecl*>& functions)
1301 {
1302   const token* t = next ();
1303   if (! (t->type == tok_keyword && t->content == "function"))
1304     throw parse_error ("expected 'function'");
1305
1306
1307   t = next ();
1308   if (! (t->type == tok_identifier)
1309       && ! (t->type == tok_keyword
1310             && (t->content == "string" || t->content == "long")))
1311     throw parse_error ("expected identifier");
1312
1313   for (unsigned i=0; i<functions.size(); i++)
1314     if (functions[i]->name == t->content)
1315       throw parse_error ("duplicate function name");
1316
1317   functiondecl *fd = new functiondecl ();
1318   fd->name = t->content;
1319   fd->tok = t;
1320
1321   t = next ();
1322   if (t->type == tok_operator && t->content == ":")
1323     {
1324       t = next ();
1325       if (t->type == tok_keyword && t->content == "string")
1326         fd->type = pe_string;
1327       else if (t->type == tok_keyword && t->content == "long")
1328         fd->type = pe_long;
1329       else throw parse_error ("expected 'string' or 'long'");
1330
1331       t = next ();
1332     }
1333
1334   if (! (t->type == tok_operator && t->content == "("))
1335     throw parse_error ("expected '('");
1336
1337   while (1)
1338     {
1339       t = next ();
1340
1341       // permit zero-argument fuctions
1342       if (t->type == tok_operator && t->content == ")")
1343         break;
1344       else if (! (t->type == tok_identifier))
1345         throw parse_error ("expected identifier");
1346       vardecl* vd = new vardecl;
1347       vd->name = t->content;
1348       vd->tok = t;
1349       fd->formal_args.push_back (vd);
1350
1351       t = next ();
1352       if (t->type == tok_operator && t->content == ":")
1353         {
1354           t = next ();
1355           if (t->type == tok_keyword && t->content == "string")
1356             vd->type = pe_string;
1357           else if (t->type == tok_keyword && t->content == "long")
1358             vd->type = pe_long;
1359           else throw parse_error ("expected 'string' or 'long'");
1360
1361           t = next ();
1362         }
1363       if (t->type == tok_operator && t->content == ")")
1364         break;
1365       if (t->type == tok_operator && t->content == ",")
1366         continue;
1367       else
1368         throw parse_error ("expected ',' or ')'");
1369     }
1370
1371   t = peek ();
1372   if (t && t->type == tok_embedded)
1373     fd->body = parse_embeddedcode ();
1374   else
1375     fd->body = parse_stmt_block ();
1376
1377   functions.push_back (fd);
1378 }
1379
1380
1381 probe_point*
1382 parser::parse_probe_point ()
1383 {
1384   probe_point* pl = new probe_point;
1385
1386   while (1)
1387     {
1388       const token* t = next (true); // wildcard scanning here
1389       if (! (t->type == tok_identifier
1390              // we must allow ".return" and ".function", which are keywords
1391              || t->type == tok_keyword))
1392         throw parse_error ("expected identifier or '*'");
1393
1394       if (pl->tok == 0) pl->tok = t;
1395
1396       probe_point::component* c = new probe_point::component;
1397       c->functor = t->content;
1398       pl->components.push_back (c);
1399       // NB we may add c->arg soon
1400
1401       t = peek ();
1402
1403       // consume optional parameter
1404       if (t && t->type == tok_operator && t->content == "(")
1405         {
1406           next (); // consume "("
1407           c->arg = parse_literal ();
1408
1409           t = next ();
1410           if (! (t->type == tok_operator && t->content == ")"))
1411             throw parse_error ("expected ')'");
1412
1413           t = peek ();
1414         }
1415
1416       if (t && t->type == tok_operator && t->content == ".")
1417         {
1418           next ();
1419           continue;
1420         }
1421
1422       // We only fall through here at the end of a probe point (past
1423       // all the dotted/parametrized components).
1424
1425       if (t && t->type == tok_operator &&
1426           (t->content == "?" || t->content == "!"))
1427         {
1428           pl->optional = true;
1429           if (t->content == "!") pl->sufficient = true;
1430           // NB: sufficient implies optional
1431           next ();
1432           t = peek ();
1433           // fall through
1434         }
1435
1436       if (t && t->type == tok_keyword && t->content == "if")
1437         {
1438           next ();
1439           t = peek ();
1440           if (t && ! (t->type == tok_operator && t->content == "("))
1441             throw parse_error ("expected '('");
1442           next ();
1443
1444           pl->condition = parse_expression ();
1445
1446           t = peek ();
1447           if (t && ! (t->type == tok_operator && t->content == ")"))
1448             throw parse_error ("expected ')'");
1449           next ();
1450           t = peek ();
1451           // fall through
1452         }
1453
1454       if (t && t->type == tok_operator
1455           && (t->content == "{" || t->content == "," ||
1456               t->content == "=" || t->content == "+=" ))
1457         break;
1458
1459       throw parse_error ("expected one of '. , ( ? ! { = +='");
1460     }
1461
1462   return pl;
1463 }
1464
1465
1466 literal*
1467 parser::parse_literal ()
1468 {
1469   const token* t = next ();
1470   literal* l;
1471   if (t->type == tok_string)
1472     l = new literal_string (t->content);
1473   else
1474     {
1475       bool neg = false;
1476       if (t->type == tok_operator && t->content == "-")
1477         {
1478           neg = true;
1479           t = next ();
1480         }
1481
1482       if (t->type == tok_number)
1483         {
1484           const char* startp = t->content.c_str ();
1485           char* endp = (char*) startp;
1486
1487           // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1488           // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1489           // since the lexer only gives us positive digit strings, but we'll
1490           // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1491           errno = 0;
1492           long long value = (long long) strtoull (startp, & endp, 0);
1493           if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1494               || (neg && (unsigned long long) value > 9223372036854775808ULL)
1495               || (unsigned long long) value > 18446744073709551615ULL
1496               || value < -9223372036854775807LL-1)
1497             throw parse_error ("number invalid or out of range");
1498
1499           if (neg)
1500             value = -value;
1501
1502           l = new literal_number (value);
1503         }
1504       else
1505         throw parse_error ("expected literal string or number");
1506     }
1507
1508   l->tok = t;
1509   return l;
1510 }
1511
1512
1513 if_statement*
1514 parser::parse_if_statement ()
1515 {
1516   const token* t = next ();
1517   if (! (t->type == tok_keyword && t->content == "if"))
1518     throw parse_error ("expected 'if'");
1519   if_statement* s = new if_statement;
1520   s->tok = t;
1521
1522   t = next ();
1523   if (! (t->type == tok_operator && t->content == "("))
1524     throw parse_error ("expected '('");
1525
1526   s->condition = parse_expression ();
1527
1528   t = next ();
1529   if (! (t->type == tok_operator && t->content == ")"))
1530     throw parse_error ("expected ')'");
1531
1532   s->thenblock = parse_statement ();
1533
1534   t = peek ();
1535   if (t && t->type == tok_keyword && t->content == "else")
1536     {
1537       next ();
1538       s->elseblock = parse_statement ();
1539     }
1540   else
1541     s->elseblock = 0; // in case not otherwise initialized
1542
1543   return s;
1544 }
1545
1546
1547 expr_statement*
1548 parser::parse_expr_statement ()
1549 {
1550   expr_statement *es = new expr_statement;
1551   const token* t = peek ();
1552   es->tok = t;
1553   es->value = parse_expression ();
1554   return es;
1555 }
1556
1557
1558 return_statement*
1559 parser::parse_return_statement ()
1560 {
1561   const token* t = next ();
1562   if (! (t->type == tok_keyword && t->content == "return"))
1563     throw parse_error ("expected 'return'");
1564   if (context != con_function)
1565     throw parse_error ("found 'return' not in function context");
1566   return_statement* s = new return_statement;
1567   s->tok = t;
1568   s->value = parse_expression ();
1569   return s;
1570 }
1571
1572
1573 delete_statement*
1574 parser::parse_delete_statement ()
1575 {
1576   const token* t = next ();
1577   if (! (t->type == tok_keyword && t->content == "delete"))
1578     throw parse_error ("expected 'delete'");
1579   delete_statement* s = new delete_statement;
1580   s->tok = t;
1581   s->value = parse_expression ();
1582   return s;
1583 }
1584
1585
1586 next_statement*
1587 parser::parse_next_statement ()
1588 {
1589   const token* t = next ();
1590   if (! (t->type == tok_keyword && t->content == "next"))
1591     throw parse_error ("expected 'next'");
1592   if (context != con_probe)
1593     throw parse_error ("found 'next' not in probe context");
1594   next_statement* s = new next_statement;
1595   s->tok = t;
1596   return s;
1597 }
1598
1599
1600 break_statement*
1601 parser::parse_break_statement ()
1602 {
1603   const token* t = next ();
1604   if (! (t->type == tok_keyword && t->content == "break"))
1605     throw parse_error ("expected 'break'");
1606   break_statement* s = new break_statement;
1607   s->tok = t;
1608   return s;
1609 }
1610
1611
1612 continue_statement*
1613 parser::parse_continue_statement ()
1614 {
1615   const token* t = next ();
1616   if (! (t->type == tok_keyword && t->content == "continue"))
1617     throw parse_error ("expected 'continue'");
1618   continue_statement* s = new continue_statement;
1619   s->tok = t;
1620   return s;
1621 }
1622
1623
1624 for_loop*
1625 parser::parse_for_loop ()
1626 {
1627   const token* t = next ();
1628   if (! (t->type == tok_keyword && t->content == "for"))
1629     throw parse_error ("expected 'for'");
1630   for_loop* s = new for_loop;
1631   s->tok = t;
1632
1633   t = next ();
1634   if (! (t->type == tok_operator && t->content == "("))
1635     throw parse_error ("expected '('");
1636
1637   // initializer + ";"
1638   t = peek ();
1639   if (t && t->type == tok_operator && t->content == ";")
1640     {
1641       s->init = 0;
1642       next ();
1643     }
1644   else
1645     {
1646       s->init = parse_expr_statement ();
1647       t = next ();
1648       if (! (t->type == tok_operator && t->content == ";"))
1649         throw parse_error ("expected ';'");
1650     }
1651
1652   // condition + ";"
1653   t = peek ();
1654   if (t && t->type == tok_operator && t->content == ";")
1655     {
1656       literal_number* l = new literal_number(1);
1657       s->cond = l;
1658       s->cond->tok = next ();
1659     }
1660   else
1661     {
1662       s->cond = parse_expression ();
1663       t = next ();
1664       if (! (t->type == tok_operator && t->content == ";"))
1665         throw parse_error ("expected ';'");
1666     }
1667
1668   // increment + ")"
1669   t = peek ();
1670   if (t && t->type == tok_operator && t->content == ")")
1671     {
1672       s->incr = 0;
1673       next ();
1674     }
1675   else
1676     {
1677       s->incr = parse_expr_statement ();
1678       t = next ();
1679       if (! (t->type == tok_operator && t->content == ")"))
1680         throw parse_error ("expected ')'");
1681     }
1682
1683   // block
1684   s->block = parse_statement ();
1685
1686   return s;
1687 }
1688
1689
1690 for_loop*
1691 parser::parse_while_loop ()
1692 {
1693   const token* t = next ();
1694   if (! (t->type == tok_keyword && t->content == "while"))
1695     throw parse_error ("expected 'while'");
1696   for_loop* s = new for_loop;
1697   s->tok = t;
1698
1699   t = next ();
1700   if (! (t->type == tok_operator && t->content == "("))
1701     throw parse_error ("expected '('");
1702
1703   // dummy init and incr fields
1704   s->init = 0;
1705   s->incr = 0;
1706
1707   // condition
1708   s->cond = parse_expression ();
1709
1710   t = next ();
1711   if (! (t->type == tok_operator && t->content == ")"))
1712     throw parse_error ("expected ')'");
1713
1714   // block
1715   s->block = parse_statement ();
1716
1717   return s;
1718 }
1719
1720
1721 foreach_loop*
1722 parser::parse_foreach_loop ()
1723 {
1724   const token* t = next ();
1725   if (! (t->type == tok_keyword && t->content == "foreach"))
1726     throw parse_error ("expected 'foreach'");
1727   foreach_loop* s = new foreach_loop;
1728   s->tok = t;
1729   s->sort_direction = 0;
1730   s->limit = NULL;
1731
1732   t = next ();
1733   if (! (t->type == tok_operator && t->content == "("))
1734     throw parse_error ("expected '('");
1735
1736   // see also parse_array_in
1737
1738   bool parenthesized = false;
1739   t = peek ();
1740   if (t && t->type == tok_operator && t->content == "[")
1741     {
1742       next ();
1743       parenthesized = true;
1744     }
1745
1746   while (1)
1747     {
1748       t = next ();
1749       if (! (t->type == tok_identifier))
1750         throw parse_error ("expected identifier");
1751       symbol* sym = new symbol;
1752       sym->tok = t;
1753       sym->name = t->content;
1754       s->indexes.push_back (sym);
1755
1756       t = peek ();
1757       if (t && t->type == tok_operator &&
1758           (t->content == "+" || t->content == "-"))
1759         {
1760           if (s->sort_direction)
1761             throw parse_error ("multiple sort directives");
1762           s->sort_direction = (t->content == "+") ? 1 : -1;
1763           s->sort_column = s->indexes.size();
1764           next();
1765         }
1766
1767       if (parenthesized)
1768         {
1769           t = peek ();
1770           if (t && t->type == tok_operator && t->content == ",")
1771             {
1772               next ();
1773               continue;
1774             }
1775           else if (t && t->type == tok_operator && t->content == "]")
1776             {
1777               next ();
1778               break;
1779             }
1780           else
1781             throw parse_error ("expected ',' or ']'");
1782         }
1783       else
1784         break; // expecting only one expression
1785     }
1786
1787   t = next ();
1788   if (! (t->type == tok_keyword && t->content == "in"))
1789     throw parse_error ("expected 'in'");
1790
1791   s->base = parse_indexable();
1792
1793   t = peek ();
1794   if (t && t->type == tok_operator &&
1795       (t->content == "+" || t->content == "-"))
1796     {
1797       if (s->sort_direction)
1798         throw parse_error ("multiple sort directives");
1799       s->sort_direction = (t->content == "+") ? 1 : -1;
1800       s->sort_column = 0;
1801       next();
1802     }
1803
1804   t = peek ();
1805   if (tok_is(t, tok_keyword, "limit"))
1806     {
1807       next ();                          // get past the "limit"
1808       s->limit = parse_expression ();
1809     }
1810
1811   t = next ();
1812   if (! (t->type == tok_operator && t->content == ")"))
1813     throw parse_error ("expected ')'");
1814
1815   s->block = parse_statement ();
1816   return s;
1817 }
1818
1819
1820 expression*
1821 parser::parse_expression ()
1822 {
1823   return parse_assignment ();
1824 }
1825
1826
1827 expression*
1828 parser::parse_assignment ()
1829 {
1830   expression* op1 = parse_ternary ();
1831
1832   const token* t = peek ();
1833   // right-associative operators
1834   if (t && t->type == tok_operator
1835       && (t->content == "=" ||
1836           t->content == "<<<" ||
1837           t->content == "+=" ||
1838           t->content == "-=" ||
1839           t->content == "*=" ||
1840           t->content == "/=" ||
1841           t->content == "%=" ||
1842           t->content == "<<=" ||
1843           t->content == ">>=" ||
1844           t->content == "&=" ||
1845           t->content == "^=" ||
1846           t->content == "|=" ||
1847           t->content == ".=" ||
1848           false))
1849     {
1850       // NB: lvalueness is checked during elaboration / translation
1851       assignment* e = new assignment;
1852       e->left = op1;
1853       e->op = t->content;
1854       e->tok = t;
1855       next ();
1856       e->right = parse_expression ();
1857       op1 = e;
1858     }
1859
1860   return op1;
1861 }
1862
1863
1864 expression*
1865 parser::parse_ternary ()
1866 {
1867   expression* op1 = parse_logical_or ();
1868
1869   const token* t = peek ();
1870   if (t && t->type == tok_operator && t->content == "?")
1871     {
1872       ternary_expression* e = new ternary_expression;
1873       e->tok = t;
1874       e->cond = op1;
1875       next ();
1876       e->truevalue = parse_expression (); // XXX
1877
1878       t = next ();
1879       if (! (t->type == tok_operator && t->content == ":"))
1880         throw parse_error ("expected ':'");
1881
1882       e->falsevalue = parse_expression (); // XXX
1883       return e;
1884     }
1885   else
1886     return op1;
1887 }
1888
1889
1890 expression*
1891 parser::parse_logical_or ()
1892 {
1893   expression* op1 = parse_logical_and ();
1894
1895   const token* t = peek ();
1896   while (t && t->type == tok_operator && t->content == "||")
1897     {
1898       logical_or_expr* e = new logical_or_expr;
1899       e->tok = t;
1900       e->op = t->content;
1901       e->left = op1;
1902       next ();
1903       e->right = parse_logical_and ();
1904       op1 = e;
1905       t = peek ();
1906     }
1907
1908   return op1;
1909 }
1910
1911
1912 expression*
1913 parser::parse_logical_and ()
1914 {
1915   expression* op1 = parse_boolean_or ();
1916
1917   const token* t = peek ();
1918   while (t && t->type == tok_operator && t->content == "&&")
1919     {
1920       logical_and_expr *e = new logical_and_expr;
1921       e->left = op1;
1922       e->op = t->content;
1923       e->tok = t;
1924       next ();
1925       e->right = parse_boolean_or ();
1926       op1 = e;
1927       t = peek ();
1928     }
1929
1930   return op1;
1931 }
1932
1933
1934 expression*
1935 parser::parse_boolean_or ()
1936 {
1937   expression* op1 = parse_boolean_xor ();
1938
1939   const token* t = peek ();
1940   while (t && t->type == tok_operator && t->content == "|")
1941     {
1942       binary_expression* e = new binary_expression;
1943       e->left = op1;
1944       e->op = t->content;
1945       e->tok = t;
1946       next ();
1947       e->right = parse_boolean_xor ();
1948       op1 = e;
1949       t = peek ();
1950     }
1951
1952   return op1;
1953 }
1954
1955
1956 expression*
1957 parser::parse_boolean_xor ()
1958 {
1959   expression* op1 = parse_boolean_and ();
1960
1961   const token* t = peek ();
1962   while (t && t->type == tok_operator && t->content == "^")
1963     {
1964       binary_expression* e = new binary_expression;
1965       e->left = op1;
1966       e->op = t->content;
1967       e->tok = t;
1968       next ();
1969       e->right = parse_boolean_and ();
1970       op1 = e;
1971       t = peek ();
1972     }
1973
1974   return op1;
1975 }
1976
1977
1978 expression*
1979 parser::parse_boolean_and ()
1980 {
1981   expression* op1 = parse_array_in ();
1982
1983   const token* t = peek ();
1984   while (t && t->type == tok_operator && t->content == "&")
1985     {
1986       binary_expression* e = new binary_expression;
1987       e->left = op1;
1988       e->op = t->content;
1989       e->tok = t;
1990       next ();
1991       e->right = parse_array_in ();
1992       op1 = e;
1993       t = peek ();
1994     }
1995
1996   return op1;
1997 }
1998
1999
2000 expression*
2001 parser::parse_array_in ()
2002 {
2003   // This is a very tricky case.  All these are legit expressions:
2004   // "a in b"  "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
2005   vector<expression*> indexes;
2006   bool parenthesized = false;
2007
2008   const token* t = peek ();
2009   if (t && t->type == tok_operator && t->content == "[")
2010     {
2011       next ();
2012       parenthesized = true;
2013     }
2014
2015   while (1)
2016     {
2017       expression* op1 = parse_comparison ();
2018       indexes.push_back (op1);
2019
2020       if (parenthesized)
2021         {
2022           const token* t = peek ();
2023           if (t && t->type == tok_operator && t->content == ",")
2024             {
2025               next ();
2026               continue;
2027             }
2028           else if (t && t->type == tok_operator && t->content == "]")
2029             {
2030               next ();
2031               break;
2032             }
2033           else
2034             throw parse_error ("expected ',' or ']'");
2035         }
2036       else
2037         break; // expecting only one expression
2038     }
2039
2040   t = peek ();
2041   if (t && t->type == tok_keyword && t->content == "in")
2042     {
2043       array_in *e = new array_in;
2044       e->tok = t;
2045       next (); // swallow "in"
2046
2047       arrayindex* a = new arrayindex;
2048       a->indexes = indexes;
2049       a->base = parse_indexable();
2050       a->tok = a->base->get_tok();
2051       e->operand = a;
2052       return e;
2053     }
2054   else if (indexes.size() == 1) // no "in" - need one expression only
2055     return indexes[0];
2056   else
2057     throw parse_error ("unexpected comma-separated expression list");
2058 }
2059
2060
2061 expression*
2062 parser::parse_comparison ()
2063 {
2064   expression* op1 = parse_shift ();
2065
2066   const token* t = peek ();
2067   while (t && t->type == tok_operator
2068       && (t->content == ">" ||
2069           t->content == "<" ||
2070           t->content == "==" ||
2071           t->content == "!=" ||
2072           t->content == "<=" ||
2073           t->content == ">="))
2074     {
2075       comparison* e = new comparison;
2076       e->left = op1;
2077       e->op = t->content;
2078       e->tok = t;
2079       next ();
2080       e->right = parse_shift ();
2081       op1 = e;
2082       t = peek ();
2083     }
2084
2085   return op1;
2086 }
2087
2088
2089 expression*
2090 parser::parse_shift ()
2091 {
2092   expression* op1 = parse_concatenation ();
2093
2094   const token* t = peek ();
2095   while (t && t->type == tok_operator &&
2096          (t->content == "<<" || t->content == ">>"))
2097     {
2098       binary_expression* e = new binary_expression;
2099       e->left = op1;
2100       e->op = t->content;
2101       e->tok = t;
2102       next ();
2103       e->right = parse_concatenation ();
2104       op1 = e;
2105       t = peek ();
2106     }
2107
2108   return op1;
2109 }
2110
2111
2112 expression*
2113 parser::parse_concatenation ()
2114 {
2115   expression* op1 = parse_additive ();
2116
2117   const token* t = peek ();
2118   // XXX: the actual awk string-concatenation operator is *whitespace*.
2119   // I don't know how to easily to model that here.
2120   while (t && t->type == tok_operator && t->content == ".")
2121     {
2122       concatenation* e = new concatenation;
2123       e->left = op1;
2124       e->op = t->content;
2125       e->tok = t;
2126       next ();
2127       e->right = parse_additive ();
2128       op1 = e;
2129       t = peek ();
2130     }
2131
2132   return op1;
2133 }
2134
2135
2136 expression*
2137 parser::parse_additive ()
2138 {
2139   expression* op1 = parse_multiplicative ();
2140
2141   const token* t = peek ();
2142   while (t && t->type == tok_operator
2143       && (t->content == "+" || t->content == "-"))
2144     {
2145       binary_expression* e = new binary_expression;
2146       e->op = t->content;
2147       e->left = op1;
2148       e->tok = t;
2149       next ();
2150       e->right = parse_multiplicative ();
2151       op1 = e;
2152       t = peek ();
2153     }
2154
2155   return op1;
2156 }
2157
2158
2159 expression*
2160 parser::parse_multiplicative ()
2161 {
2162   expression* op1 = parse_unary ();
2163
2164   const token* t = peek ();
2165   while (t && t->type == tok_operator
2166       && (t->content == "*" || t->content == "/" || t->content == "%"))
2167     {
2168       binary_expression* e = new binary_expression;
2169       e->op = t->content;
2170       e->left = op1;
2171       e->tok = t;
2172       next ();
2173       e->right = parse_unary ();
2174       op1 = e;
2175       t = peek ();
2176     }
2177
2178   return op1;
2179 }
2180
2181
2182 expression*
2183 parser::parse_unary ()
2184 {
2185   const token* t = peek ();
2186   if (t && t->type == tok_operator
2187       && (t->content == "+" ||
2188           t->content == "-" ||
2189           t->content == "!" ||
2190           t->content == "~" ||
2191           false))
2192     {
2193       unary_expression* e = new unary_expression;
2194       e->op = t->content;
2195       e->tok = t;
2196       next ();
2197       e->operand = parse_crement ();
2198       return e;
2199     }
2200   else
2201     return parse_crement ();
2202 }
2203
2204
2205 expression*
2206 parser::parse_crement () // as in "increment" / "decrement"
2207 {
2208   // NB: Ideally, we'd parse only a symbol as an operand to the
2209   // *crement operators, instead of a general expression value.  We'd
2210   // need more complex lookahead code to tell apart the postfix cases.
2211   // So we just punt, and leave it to pass-3 to signal errors on
2212   // cases like "4++".
2213
2214   const token* t = peek ();
2215   if (t && t->type == tok_operator
2216       && (t->content == "++" || t->content == "--"))
2217     {
2218       pre_crement* e = new pre_crement;
2219       e->op = t->content;
2220       e->tok = t;
2221       next ();
2222       e->operand = parse_value ();
2223       return e;
2224     }
2225
2226   // post-crement or non-crement
2227   expression *op1 = parse_value ();
2228
2229   t = peek ();
2230   if (t && t->type == tok_operator
2231       && (t->content == "++" || t->content == "--"))
2232     {
2233       post_crement* e = new post_crement;
2234       e->op = t->content;
2235       e->tok = t;
2236       next ();
2237       e->operand = op1;
2238       return e;
2239     }
2240   else
2241     return op1;
2242 }
2243
2244
2245 expression*
2246 parser::parse_value ()
2247 {
2248   const token* t = peek ();
2249   if (! t)
2250     throw parse_error ("expected value");
2251
2252   if (t->type == tok_operator && t->content == "(")
2253     {
2254       next ();
2255       expression* e = parse_expression ();
2256       t = next ();
2257       if (! (t->type == tok_operator && t->content == ")"))
2258         throw parse_error ("expected ')'");
2259       return e;
2260     }
2261   else if (t->type == tok_identifier)
2262     return parse_symbol ();
2263   else
2264     return parse_literal ();
2265 }
2266
2267
2268 const token *
2269 parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
2270 {
2271   hop = NULL;
2272   const token* t = expect_ident (name);
2273   if (name == "@hist_linear" || name == "@hist_log")
2274     {
2275       hop = new hist_op;
2276       if (name == "@hist_linear")
2277         hop->htype = hist_linear;
2278       else if (name == "@hist_log")
2279         hop->htype = hist_log;
2280       hop->tok = t;
2281       expect_op("(");
2282       hop->stat = parse_expression ();
2283       int64_t tnum;
2284       if (hop->htype == hist_linear)
2285         {
2286           for (size_t i = 0; i < 3; ++i)
2287             {
2288               expect_op (",");
2289               expect_number (tnum);
2290               hop->params.push_back (tnum);
2291             }
2292         }
2293       expect_op(")");
2294     }
2295   return t;
2296 }
2297
2298
2299 indexable*
2300 parser::parse_indexable ()
2301 {
2302   hist_op *hop = NULL;
2303   string name;
2304   const token *tok = parse_hist_op_or_bare_name(hop, name);
2305   if (hop)
2306     return hop;
2307   else
2308     {
2309       symbol* sym = new symbol;
2310       sym->name = name;
2311       sym->tok = tok;
2312       return sym;
2313     }
2314 }
2315
2316
2317 // var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
2318 expression*
2319 parser::parse_symbol ()
2320 {
2321   hist_op *hop = NULL;
2322   symbol *sym = NULL;
2323   string name;
2324   const token *t = parse_hist_op_or_bare_name(hop, name);
2325
2326   if (!hop)
2327     {
2328       // If we didn't get a hist_op, then we did get an identifier. We can
2329       // now scrutinize this identifier for the various magic forms of identifier
2330       // (printf, @stat_op, and $var...)
2331
2332       bool pf_stream, pf_format, pf_delim, pf_newline, pf_char;
2333
2334       if (name == "@cast")
2335         {
2336           // type-punning time
2337           cast_op *cop = new cast_op;
2338           cop->tok = t;
2339           cop->base_name = name;
2340           expect_op("(");
2341           cop->operand = parse_expression ();
2342           expect_op(",");
2343           expect_unknown(tok_string, cop->type);
2344           // types never start with "struct<space>" or "union<space>",
2345           // so gobble it up.
2346           if (cop->type.compare(0, 7, "struct ") == 0)
2347             cop->type = cop->type.substr(7);
2348           if (cop->type.compare(0, 6, "union ") == 0)
2349             cop->type = cop->type.substr(6);
2350           if (peek_op (","))
2351             {
2352               next();
2353               expect_unknown(tok_string, cop->module);
2354             }
2355           expect_op(")");
2356           while (true)
2357             {
2358               string c;
2359               if (peek_op ("->"))
2360                 {
2361                   next();
2362                   expect_ident_or_keyword (c);
2363                   cop->components.push_back
2364                     (make_pair (target_symbol::comp_struct_member, c));
2365                 }
2366               else if (peek_op ("["))
2367                 {
2368                   next();
2369                   expect_unknown (tok_number, c);
2370                   expect_op ("]");
2371                   cop->components.push_back
2372                     (make_pair (target_symbol::comp_literal_array_index, c));
2373                 }
2374               else
2375                 break;
2376             }
2377           // if there aren't any dereferences, then the cast is pointless
2378           if (cop->components.empty())
2379             {
2380               expression *op = cop->operand;
2381               delete cop;
2382               return op;
2383             }
2384           return cop;
2385         }
2386
2387       else if (name.size() > 0 && name[0] == '@')
2388         {
2389           stat_op *sop = new stat_op;
2390           if (name == "@avg")
2391             sop->ctype = sc_average;
2392           else if (name == "@count")
2393             sop->ctype = sc_count;
2394           else if (name == "@sum")
2395             sop->ctype = sc_sum;
2396           else if (name == "@min")
2397             sop->ctype = sc_min;
2398           else if (name == "@max")
2399             sop->ctype = sc_max;
2400           else
2401             throw parse_error("unknown statistic operator " + name);
2402           expect_op("(");
2403           sop->tok = t;
2404           sop->stat = parse_expression ();
2405           expect_op(")");
2406           return sop;
2407         }
2408
2409       else if (print_format::parse_print(name,
2410          pf_stream, pf_format, pf_delim, pf_newline, pf_char))
2411         {
2412           print_format *fmt = new print_format;
2413           fmt->tok = t;
2414           fmt->print_to_stream = pf_stream;
2415           fmt->print_with_format = pf_format;
2416           fmt->print_with_delim = pf_delim;
2417           fmt->print_with_newline = pf_newline;
2418           fmt->print_char = pf_char;
2419
2420           expect_op("(");
2421           if ((name == "print" || name == "println") &&
2422               (peek_kw("@hist_linear") || peek_kw("@hist_log")))
2423             {
2424               // We have a special case where we recognize
2425               // print(@hist_foo(bar)) as a magic print-the-histogram
2426               // construct. This is sort of gross but it avoids
2427               // promoting histogram references to typeful
2428               // expressions.
2429
2430               hop = NULL;
2431               t = parse_hist_op_or_bare_name(hop, name);
2432               assert(hop);
2433
2434               // It is, sadly, possible that even while parsing a
2435               // hist_op, we *mis-guessed* and the user wishes to
2436               // print(@hist_op(foo)[bucket]), a scalar. In that case
2437               // we must parse the arrayindex and print an expression.
2438
2439               if (!peek_op ("["))
2440                 fmt->hist = hop;
2441               else
2442                 {
2443                   // This is simplified version of the
2444                   // multi-array-index parser below, because we can
2445                   // only ever have one index on a histogram anyways.
2446                   expect_op("[");
2447                   struct arrayindex* ai = new arrayindex;
2448                   ai->tok = t;
2449                   ai->base = hop;
2450                   ai->indexes.push_back (parse_expression ());
2451                   expect_op("]");
2452                   fmt->args.push_back(ai);
2453                 }
2454             }
2455           else
2456             {
2457               int min_args = 0;
2458               if (fmt->print_with_format)
2459                 {
2460                   // Consume and convert a format string. Agreement between the
2461                   // format string and the arguments is postponed to the
2462                   // typechecking phase.
2463                   string tmp;
2464                   expect_unknown (tok_string, tmp);
2465                   fmt->raw_components = tmp;
2466                   fmt->components = print_format::string_to_components (tmp);
2467                 }
2468               else if (fmt->print_with_delim)
2469                 {
2470                   // Consume a delimiter to separate arguments.
2471                   fmt->delimiter.clear();
2472                   fmt->delimiter.type = print_format::conv_literal;
2473                   expect_unknown (tok_string, fmt->delimiter.literal_string);
2474                   min_args = 2;
2475                 }
2476               else
2477                 {
2478                   // If we are not printing with a format string, we must have
2479                   // at least one argument (of any type).
2480                   expression *e = parse_expression ();
2481                   fmt->args.push_back(e);
2482                 }
2483
2484               // Consume any subsequent arguments.
2485               while (min_args || !peek_op (")"))
2486                 {
2487                   expect_op(",");
2488                   expression *e = parse_expression ();
2489                   fmt->args.push_back(e);
2490                   if (min_args)
2491                     --min_args;
2492                 }
2493             }
2494           expect_op(")");
2495           return fmt;
2496         }
2497
2498       else if (name.size() > 0 && name[0] == '$')
2499         {
2500           // target_symbol time
2501           target_symbol *tsym = new target_symbol;
2502           tsym->tok = t;
2503           tsym->base_name = name;
2504           while (true)
2505             {
2506               string c;
2507               if (peek_op ("->"))
2508                 {
2509                   next();
2510                   expect_ident_or_keyword (c);
2511                   tsym->components.push_back
2512                     (make_pair (target_symbol::comp_struct_member, c));
2513                 }
2514               else if (peek_op ("["))
2515                 {
2516                   next();
2517                   expect_unknown (tok_number, c);
2518                   expect_op ("]");
2519                   tsym->components.push_back
2520                     (make_pair (target_symbol::comp_literal_array_index, c));
2521                 }
2522               else
2523                 break;
2524             }
2525           return tsym;
2526         }
2527
2528       else if (peek_op ("(")) // function call
2529         {
2530           next ();
2531           struct functioncall* f = new functioncall;
2532           f->tok = t;
2533           f->function = name;
2534           // Allow empty actual parameter list
2535           if (peek_op (")"))
2536             {
2537               next ();
2538               return f;
2539             }
2540           while (1)
2541             {
2542               f->args.push_back (parse_expression ());
2543               if (peek_op (")"))
2544                 {
2545                   next();
2546                   break;
2547                 }
2548               else if (peek_op (","))
2549                 {
2550                   next();
2551                   continue;
2552                 }
2553               else
2554                 throw parse_error ("expected ',' or ')'");
2555             }
2556           return f;
2557         }
2558
2559       else
2560         {
2561           sym = new symbol;
2562           sym->name = name;
2563           sym->tok = t;
2564         }
2565     }
2566
2567   // By now, either we had a hist_op in the first place, or else
2568   // we had a plain word and it was converted to a symbol.
2569
2570   assert (!hop != !sym); // logical XOR
2571
2572   // All that remains is to check for array indexing
2573
2574   if (peek_op ("[")) // array
2575     {
2576       next ();
2577       struct arrayindex* ai = new arrayindex;
2578       ai->tok = t;
2579
2580       if (hop)
2581         ai->base = hop;
2582       else
2583         ai->base = sym;
2584
2585       while (1)
2586         {
2587           ai->indexes.push_back (parse_expression ());
2588           if (peek_op ("]"))
2589             {
2590               next();
2591               break;
2592             }
2593           else if (peek_op (","))
2594             {
2595               next();
2596               continue;
2597             }
2598           else
2599             throw parse_error ("expected ',' or ']'");
2600         }
2601       return ai;
2602     }
2603
2604   // If we got to here, we *should* have a symbol; if we have
2605   // a hist_op on its own, it doesn't count as an expression,
2606   // so we throw a parse error.
2607
2608   if (hop)
2609     throw parse_error("base histogram operator where expression expected", t);
2610
2611   return sym;
2612 }
2613
2614 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */