]> sourceware.org Git - systemtap.git/blame - parse.cxx
2006-06-02 David Smith <dsmith@redhat.com>
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
213bee8f 2// Copyright (C) 2005-2006 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
69c68955
FCE
4//
5// This file is part of systemtap, and is free software. You can
6// redistribute it and/or modify it under the terms of the GNU General
7// Public License (GPL); either version 2, or (at your option) any
8// later version.
2f1a1aea 9
2b066ec1 10#include "config.h"
2f1a1aea
FCE
11#include "staptree.h"
12#include "parse.h"
177a8ead 13#include "session.h"
2b066ec1
FCE
14#include <iostream>
15#include <fstream>
2f1a1aea 16#include <cctype>
9c0c0e46 17#include <cstdlib>
29e64872 18#include <cassert>
9c0c0e46
FCE
19#include <cerrno>
20#include <climits>
57b73400 21#include <sstream>
2f1a1aea
FCE
22
23using namespace std;
24
25// ------------------------------------------------------------------------
26
bb2e3076
FCE
27
28
177a8ead
FCE
29parser::parser (systemtap_session& s, istream& i, bool p):
30 session (s),
24cb178f 31 input_name ("<input>"), free_input (0),
213bee8f 32 input (i, input_name, s), privileged (p),
6e213f58 33 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
34{ }
35
177a8ead
FCE
36parser::parser (systemtap_session& s, const string& fn, bool p):
37 session (s),
2f1a1aea 38 input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
213bee8f 39 input (* free_input, input_name, s), privileged (p),
6e213f58 40 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
41{ }
42
43parser::~parser()
44{
45 if (free_input) delete free_input;
46}
47
48
82919855 49stapfile*
177a8ead 50parser::parse (systemtap_session& s, std::istream& i, bool pr)
82919855 51{
177a8ead 52 parser p (s, i, pr);
82919855
FCE
53 return p.parse ();
54}
55
56
57stapfile*
177a8ead 58parser::parse (systemtap_session& s, const std::string& n, bool pr)
82919855 59{
177a8ead 60 parser p (s, n, pr);
82919855
FCE
61 return p.parse ();
62}
63
d7f3e0c5
GH
64static string
65tt2str(token_type tt)
66{
67 switch (tt)
68 {
69 case tok_junk: return "junk";
70 case tok_identifier: return "identifier";
71 case tok_operator: return "operator";
72 case tok_string: return "string";
73 case tok_number: return "number";
74 case tok_embedded: return "embedded-code";
6e213f58 75 case tok_keyword: return "keyword";
d7f3e0c5
GH
76 }
77 return "unknown token";
78}
82919855 79
0323ed4d
WC
80ostream&
81operator << (ostream& o, const source_loc& loc)
82{
83 o << loc.file << ":"
84 << loc.line << ":"
85 << loc.column;
86
87 return o;
88}
89
56099f08
FCE
90ostream&
91operator << (ostream& o, const token& t)
92{
d7f3e0c5 93 o << tt2str(t.type);
56099f08 94
6e213f58 95 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 96 {
24cb178f
FCE
97 o << " '";
98 for (unsigned i=0; i<t.content.length(); i++)
99 {
100 char c = t.content[i];
101 o << (isprint (c) ? c : '?');
102 }
103 o << "'";
56099f08 104 }
56099f08
FCE
105
106 o << " at "
0323ed4d 107 << t.location;
56099f08
FCE
108
109 return o;
110}
111
112
2f1a1aea
FCE
113void
114parser::print_error (const parse_error &pe)
115{
116 cerr << "parse error: " << pe.what () << endl;
117
177a8ead
FCE
118 if (pe.tok)
119 {
120 cerr << "\tat: " << *pe.tok << endl;
121 }
2f1a1aea 122 else
177a8ead
FCE
123 {
124 const token* t = last_t;
125 if (t)
126 cerr << "\tsaw: " << *t << endl;
127 else
128 cerr << "\tsaw: " << input_name << " EOF" << endl;
129 }
2f1a1aea
FCE
130
131 // XXX: make it possible to print the last input line,
132 // so as to line up an arrow with the specific error column
133
134 num_errors ++;
135}
136
137
138const token*
139parser::last ()
140{
141 return last_t;
142}
143
144
177a8ead
FCE
145// Here, we perform on-the-fly preprocessing.
146// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
147// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
148// or: arch COMPARISON-OP "arch-string"
149// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
150//
151// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
44ce8ed5 152// e.g. %( arch != "i686" %? "foo" %: "baz" %)
177a8ead
FCE
153//
154// Up to an entire %( ... %) expression is processed by a single call
155// to this function. Tokens included by any nested conditions are
156// enqueued in a private vector.
157
158bool eval_pp_conditional (systemtap_session& s,
159 const token* l, const token* op, const token* r)
160{
44ce8ed5
FCE
161 if (l->type == tok_identifier && (l->content == "kernel_v" ||
162 l->content == "kernel_vr"))
163 {
164 string target_kernel_vr = s.kernel_release;
165 string target_kernel_v = target_kernel_vr;
166 // cut off any release code suffix
77a5c1f9 167 string::size_type dr = target_kernel_vr.find ('-');
44ce8ed5
FCE
168 if (dr > 0 && dr != string::npos)
169 target_kernel_v = target_kernel_vr.substr (0, dr);
170
171 if (! (r->type == tok_string))
172 throw parse_error ("expected string literal", r);
173 string query_kernel_vr = r->content;
174
175 // collect acceptable strverscmp results.
176 int rvc_ok1, rvc_ok2;
177 if (op->type == tok_operator && op->content == "<=")
178 { rvc_ok1 = -1; rvc_ok2 = 0; }
179 else if (op->type == tok_operator && op->content == ">=")
180 { rvc_ok1 = 1; rvc_ok2 = 0; }
181 else if (op->type == tok_operator && op->content == "<")
182 { rvc_ok1 = -1; rvc_ok2 = -1; }
183 else if (op->type == tok_operator && op->content == ">")
184 { rvc_ok1 = 1; rvc_ok2 = 1; }
185 else if (op->type == tok_operator && op->content == "==")
186 { rvc_ok1 = 0; rvc_ok2 = 0; }
187 else if (op->type == tok_operator && op->content == "!=")
188 { rvc_ok1 = -1; rvc_ok2 = 1; }
189 else
190 throw parse_error ("expected comparison operator", op);
191
192 int rvc_result = strverscmp ((l->content == "kernel_vr" ?
193 target_kernel_vr.c_str() :
194 target_kernel_v.c_str()),
195 query_kernel_vr.c_str());
196 // normalize rvc_result
197 if (rvc_result < 0) rvc_result = -1;
198 if (rvc_result > 0) rvc_result = 1;
199
200 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
201 }
202 else if (l->type == tok_identifier && l->content == "arch")
203 {
204 string target_architecture = s.architecture;
205 if (! (r->type == tok_string))
206 throw parse_error ("expected string literal", r);
207 string query_architecture = r->content;
208
209 bool result;
210 if (op->type == tok_operator && op->content == "==")
211 result = target_architecture == query_architecture;
212 else if (op->type == tok_operator && op->content == "!=")
213 result = target_architecture != query_architecture;
214 else
215 throw parse_error ("expected '==' or '!='", op);
216
217 return result;
218 }
219 // XXX: support other forms? "CONFIG_SMP" ?
177a8ead 220 else
44ce8ed5 221 throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr'", l);
177a8ead
FCE
222}
223
224
225const token*
226parser::scan_pp ()
227{
228 while (true)
229 {
230 if (enqueued_pp.size() > 0)
231 {
232 const token* t = enqueued_pp[0];
233 enqueued_pp.erase (enqueued_pp.begin());
234 return t;
235 }
236
237 const token* t = input.scan (); // NB: not recursive!
238 if (t == 0) // EOF
239 return t;
240
241 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
242 return t;
243
244 // We have a %( - it's time to throw a preprocessing party!
245
246 const token *l, *op, *r;
247 l = input.scan (); // NB: not recursive, though perhaps could be
248 op = input.scan ();
249 r = input.scan ();
250 if (l == 0 || op == 0 || r == 0)
251 throw parse_error ("incomplete condition after '%('", t);
252 // NB: consider generalizing to consume all tokens until %?, and
253 // passing that as a vector to an evaluator.
254
255 bool result = eval_pp_conditional (session, l, op, r);
70c743d8
JS
256 delete l;
257 delete op;
258 delete r;
177a8ead
FCE
259
260 const token *m = input.scan (); // NB: not recursive
261 if (! (m && m->type == tok_operator && m->content == "%?"))
262 throw parse_error ("expected '%?' marker for conditional", t);
70c743d8 263 delete m; // "%?"
177a8ead
FCE
264
265 vector<const token*> my_enqueued_pp;
266
267 while (true) // consume THEN tokens
268 {
269 m = scan_pp (); // NB: recursive
270 if (m == 0)
271 throw parse_error ("missing THEN tokens for conditional", t);
272
273 if (m->type == tok_operator && (m->content == "%:" || // ELSE
274 m->content == "%)")) // END
275 break;
276 // enqueue token
277 if (result)
278 my_enqueued_pp.push_back (m);
70c743d8
JS
279 else
280 delete m; // unused token
177a8ead
FCE
281 // continue
282 }
283
284 if (m && m->type == tok_operator && m->content == "%:") // ELSE
70c743d8
JS
285 {
286 delete m; // "%:"
287 while (true)
288 {
289 m = scan_pp (); // NB: recursive
290 if (m == 0)
291 throw parse_error ("missing ELSE tokens for conditional", t);
292
293 if (m->type == tok_operator && m->content == "%)") // END
294 break;
295 // enqueue token
296 if (! result)
297 my_enqueued_pp.push_back (m);
298 else
299 delete m; // unused token
300 // continue
301 }
302 }
303 delete t; // "%("
304 delete m; // "%)"
177a8ead
FCE
305
306 // NB: we transcribe the retained tokens here, and not inside
307 // the THEN/ELSE while loops. If it were done there, each loop
308 // would become infinite (each iteration consuming an ordinary
309 // token the previous one just pushed there). Guess how I
310 // figured that out.
311 enqueued_pp.insert (enqueued_pp.end(),
312 my_enqueued_pp.begin(),
313 my_enqueued_pp.end());
314
315 // Go back to outermost while(true) loop. We hope that at least
316 // some THEN or ELSE tokens were enqueued. If not, around we go
317 // again, until EOF.
318 }
319}
320
321
2f1a1aea
FCE
322const token*
323parser::next ()
324{
325 if (! next_t)
177a8ead 326 next_t = scan_pp ();
2f1a1aea
FCE
327 if (! next_t)
328 throw parse_error ("unexpected end-of-file");
329
2f1a1aea
FCE
330 last_t = next_t;
331 // advance by zeroing next_t
332 next_t = 0;
333 return last_t;
334}
335
336
337const token*
338parser::peek ()
339{
340 if (! next_t)
177a8ead 341 next_t = scan_pp ();
2f1a1aea
FCE
342
343 // don't advance by zeroing next_t
344 last_t = next_t;
345 return next_t;
346}
347
348
d7f3e0c5
GH
349static inline bool
350tok_is(token const * t, token_type tt, string const & expected)
351{
352 return t && t->type == tt && t->content == expected;
353}
354
355
356const token*
357parser::expect_known (token_type tt, string const & expected)
358{
359 const token *t = next();
57b73400 360 if (! (t && t->type == tt && t->content == expected))
d7f3e0c5
GH
361 throw parse_error ("expected '" + expected + "'");
362 return t;
363}
364
365
366const token*
367parser::expect_unknown (token_type tt, string & target)
368{
369 const token *t = next();
370 if (!(t && t->type == tt))
371 throw parse_error ("expected " + tt2str(tt));
372 target = t->content;
373 return t;
374}
375
376
377const token*
378parser::expect_op (std::string const & expected)
379{
380 return expect_known (tok_operator, expected);
381}
382
383
384const token*
385parser::expect_kw (std::string const & expected)
386{
387 return expect_known (tok_identifier, expected);
388}
389
57b73400
GH
390const token*
391parser::expect_number (int64_t & expected)
392{
393 std::string tmp;
394 token const * tt = expect_unknown (tok_number, tmp);
395 istringstream iss(tmp);
396 iss >> expected;
397 return tt;
398}
399
d7f3e0c5
GH
400
401const token*
402parser::expect_ident (std::string & target)
403{
404 return expect_unknown (tok_identifier, target);
405}
406
407
408bool
409parser::peek_op (std::string const & op)
410{
411 return tok_is (peek(), tok_operator, op);
412}
413
414
415bool
416parser::peek_kw (std::string const & kw)
417{
418 return tok_is (peek(), tok_identifier, kw);
419}
420
421
422
213bee8f
FCE
423lexer::lexer (istream& i, const string& in, systemtap_session& s):
424 input (i), input_name (in), cursor_line (1), cursor_column (1), session(s)
2f1a1aea
FCE
425{ }
426
bb2e3076
FCE
427
428int
429lexer::input_peek (unsigned n)
430{
431 while (lookahead.size() <= n)
432 {
433 int c = input.get ();
434 lookahead.push_back (input ? c : -1);
435 }
436 return lookahead[n];
437}
438
439
2f1a1aea
FCE
440int
441lexer::input_get ()
442{
bb2e3076
FCE
443 int c = input_peek (0);
444 lookahead.erase (lookahead.begin ());
445
446 if (c < 0) return c; // EOF
447
2f1a1aea
FCE
448 // update source cursor
449 if (c == '\n')
450 {
451 cursor_line ++;
452 cursor_column = 1;
453 }
454 else
455 cursor_column ++;
456
457 return c;
458}
459
460
461token*
462lexer::scan ()
463{
464 token* n = new token;
465 n->location.file = input_name;
466
467 skip:
468 n->location.line = cursor_line;
469 n->location.column = cursor_column;
470
471 int c = input_get();
472 if (c < 0)
473 {
474 delete n;
475 return 0;
476 }
477
478 if (isspace (c))
479 goto skip;
480
d02548c0 481 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
2f1a1aea
FCE
482 {
483 n->type = tok_identifier;
484 n->content = (char) c;
485 while (1)
486 {
bb2e3076 487 int c2 = input_peek ();
2f1a1aea
FCE
488 if (! input)
489 break;
0fefb486 490 if ((isalnum(c2) || c2 == '_' || c2 == '$'))
2f1a1aea
FCE
491 {
492 n->content.push_back(c2);
493 input_get ();
494 }
495 else
496 break;
497 }
213bee8f
FCE
498
499 // Expand command line arguments to literals. $1 .. $999 as
500 // numbers and @1 .. @999 as strings.
501 if (n->content[0] == '@' || n->content[0] == '$')
502 {
503 string idxstr = n->content.substr(1);
504 const char* startp = idxstr.c_str();
505 char *endp;
506 errno = 0;
507 unsigned long idx = strtoul (startp, &endp, 10);
508 if (endp == startp)
509 ; // no numbers at all - leave alone as identifier
510 else
511 {
512 // Use @1/$1 as the base, not @0/$0. Thus the idx-1.
513 if (errno == ERANGE || errno == EINVAL || *endp != '\0' ||
514 idx == 0 || idx-1 >= session.args.size ())
515 throw parse_error ("command line argument index invalid or out of range");
516
517 string arg = session.args[idx-1];
518 n->type = (n->content[0] == '@') ? tok_string : tok_number;
519 n->content = arg;
520 }
521 }
6e213f58
DS
522 else
523 {
524 if (n->content == "probe"
525 || n->content == "global"
526 || n->content == "function"
527 || n->content == "if"
528 || n->content == "else"
529 || n->content == "for"
530 || n->content == "foreach"
531 || n->content == "in"
532 || n->content == "return"
533 || n->content == "delete"
534 || n->content == "while"
535 || n->content == "break"
536 || n->content == "continue"
537 || n->content == "next"
538 || n->content == "string"
539 || n->content == "long")
540 n->type = tok_keyword;
541 }
213bee8f 542
2f1a1aea
FCE
543 return n;
544 }
545
3a20432b 546 else if (isdigit (c)) // positive literal
2f1a1aea 547 {
2f1a1aea 548 n->type = tok_number;
9c0c0e46
FCE
549 n->content = (char) c;
550
2f1a1aea
FCE
551 while (1)
552 {
bb2e3076 553 int c2 = input_peek ();
2f1a1aea
FCE
554 if (! input)
555 break;
9c0c0e46
FCE
556
557 // NB: isalnum is very permissive. We rely on strtol, called in
558 // parser::parse_literal below, to confirm that the number string
559 // is correctly formatted and in range.
560
561 if (isalnum (c2))
2f1a1aea 562 {
9c0c0e46 563 n->content.push_back (c2);
2f1a1aea
FCE
564 input_get ();
565 }
566 else
567 break;
568 }
569 return n;
570 }
571
572 else if (c == '\"')
573 {
574 n->type = tok_string;
575 while (1)
576 {
577 c = input_get ();
578
579 if (! input || c == '\n')
580 {
581 n->type = tok_junk;
582 break;
583 }
584 if (c == '\"') // closing double-quotes
585 break;
586 else if (c == '\\')
7d46afb8
GH
587 {
588 c = input_get ();
589 switch (c)
590 {
591 case 'a':
592 case 'b':
593 case 't':
594 case 'n':
595 case 'v':
596 case 'f':
597 case 'r':
f03954fd 598 case '0' ... '7': // NB: need only match the first digit
7d46afb8
GH
599 case '\\':
600
601 // Pass these escapes through to the string value
f03954fd 602 // beign parsed; it will be emitted into a C literal.
7d46afb8
GH
603
604 n->content.push_back('\\');
605
606 default:
607
608 n->content.push_back(c);
609 break;
610 }
2f1a1aea
FCE
611 }
612 else
613 n->content.push_back(c);
614 }
615 return n;
616 }
617
618 else if (ispunct (c))
619 {
bb2e3076
FCE
620 int c2 = input_peek ();
621 int c3 = input_peek (1);
622 string s1 = string("") + (char) c;
623 string s2 = (c2 > 0 ? s1 + (char) c2 : s1);
624 string s3 = (c3 > 0 ? s2 + (char) c3 : s2);
2f1a1aea 625
3a20432b
FCE
626 // NB: if we were to recognize negative numeric literals here,
627 // we'd introduce another grammar ambiguity:
628 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
629 // instead of tok_number(1) tok_operator('-') tok_number(1)
630
bb2e3076 631 if (s1 == "#") // shell comment
2f1a1aea
FCE
632 {
633 unsigned this_line = cursor_line;
bb2e3076
FCE
634 do { c = input_get (); }
635 while (c >= 0 && cursor_line == this_line);
2f1a1aea
FCE
636 goto skip;
637 }
bb2e3076 638 else if (s2 == "//") // C++ comment
63a7c90e
FCE
639 {
640 unsigned this_line = cursor_line;
bb2e3076
FCE
641 do { c = input_get (); }
642 while (c >= 0 && cursor_line == this_line);
63a7c90e
FCE
643 goto skip;
644 }
645 else if (c == '/' && c2 == '*') // C comment
646 {
647 c2 = input_get ();
648 unsigned chars = 0;
bb2e3076 649 while (c2 >= 0)
63a7c90e
FCE
650 {
651 chars ++; // track this to prevent "/*/" from being accepted
652 c = c2;
653 c2 = input_get ();
654 if (chars > 1 && c == '*' && c2 == '/')
bb2e3076 655 break;
63a7c90e 656 }
bb2e3076 657 goto skip;
63a7c90e 658 }
54dfabe9
FCE
659 else if (c == '%' && c2 == '{') // embedded code
660 {
661 n->type = tok_embedded;
662 (void) input_get (); // swallow '{' already in c2
663 while (true)
664 {
665 c = input_get ();
2ba0c474 666 if (c < 0) // EOF
54dfabe9
FCE
667 {
668 n->type = tok_junk;
669 break;
670 }
671 if (c == '%')
672 {
673 c2 = input_peek ();
674 if (c2 == '}')
675 {
676 (void) input_get (); // swallow '}' too
677 break;
678 }
679 }
680 n->content += c;
681 }
682 return n;
683 }
2f1a1aea 684
bb2e3076
FCE
685 // We're committed to recognizing at least the first character
686 // as an operator.
2f1a1aea 687 n->type = tok_operator;
2f1a1aea 688
bb2e3076
FCE
689 // match all valid operators, in decreasing size order
690 if (s3 == "<<<" ||
691 s3 == "<<=" ||
692 s3 == ">>=")
82919855 693 {
bb2e3076
FCE
694 n->content = s3;
695 input_get (); input_get (); // swallow other two characters
696 }
697 else if (s2 == "==" ||
698 s2 == "!=" ||
699 s2 == "<=" ||
700 s2 == ">=" ||
701 s2 == "+=" ||
702 s2 == "-=" ||
703 s2 == "*=" ||
704 s2 == "/=" ||
705 s2 == "%=" ||
706 s2 == "&=" ||
707 s2 == "^=" ||
708 s2 == "|=" ||
d5d7c2cc 709 s2 == ".=" ||
bb2e3076
FCE
710 s2 == "&&" ||
711 s2 == "||" ||
712 s2 == "++" ||
713 s2 == "--" ||
714 s2 == "->" ||
715 s2 == "<<" ||
177a8ead
FCE
716 s2 == ">>" ||
717 // preprocessor tokens
718 s2 == "%(" ||
719 s2 == "%?" ||
720 s2 == "%:" ||
721 s2 == "%)")
bb2e3076
FCE
722 {
723 n->content = s2;
724 input_get (); // swallow other character
725 }
726 else
727 {
728 n->content = s1;
82919855 729 }
2f1a1aea
FCE
730
731 return n;
732 }
733
734 else
735 {
736 n->type = tok_junk;
737 n->content = (char) c;
738 return n;
739 }
740}
741
742
743// ------------------------------------------------------------------------
744
745stapfile*
746parser::parse ()
747{
748 stapfile* f = new stapfile;
749 f->name = input_name;
56099f08
FCE
750
751 bool empty = true;
752
2f1a1aea
FCE
753 while (1)
754 {
755 try
756 {
757 const token* t = peek ();
56099f08 758 if (! t) // nice clean EOF
2f1a1aea
FCE
759 break;
760
56099f08 761 empty = false;
6e213f58
DS
762 if (t->type == tok_keyword && t->content == "probe")
763 {
764 context = con_probe;
765 parse_probe (f->probes, f->aliases);
766 }
767 else if (t->type == tok_keyword && t->content == "global")
768 {
769 context = con_global;
770 parse_global (f->globals);
771 }
772 else if (t->type == tok_keyword && t->content == "function")
773 {
774 context = con_function;
775 parse_functiondecl (f->functions);
776 }
54dfabe9 777 else if (t->type == tok_embedded)
6e213f58
DS
778 {
779 context = con_embedded;
780 f->embeds.push_back (parse_embeddedcode ());
781 }
2f1a1aea 782 else
6e213f58
DS
783 {
784 context = con_unknown;
785 throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
786 }
2f1a1aea
FCE
787 }
788 catch (parse_error& pe)
789 {
790 print_error (pe);
177a8ead
FCE
791 try
792 {
793 // Quietly swallow all tokens until the next '}'.
794 while (1)
795 {
796 const token* t = peek ();
797 if (! t)
798 break;
799 next ();
800 if (t->type == tok_operator && t->content == "}")
801 break;
802 }
803 }
804 catch (parse_error& pe2)
805 {
806 // parse error during recovery ... ugh
807 print_error (pe2);
808 }
809 }
2f1a1aea
FCE
810 }
811
56099f08
FCE
812 if (empty)
813 {
814 cerr << "Input file '" << input_name << "' is empty or missing." << endl;
815 delete f;
816 return 0;
817 }
818 else if (num_errors > 0)
2f1a1aea
FCE
819 {
820 cerr << num_errors << " parse error(s)." << endl;
821 delete f;
56099f08 822 return 0;
2f1a1aea
FCE
823 }
824
825 return f;
826}
827
828
20c6c071 829void
54dfabe9
FCE
830parser::parse_probe (std::vector<probe *> & probe_ret,
831 std::vector<probe_alias *> & alias_ret)
2f1a1aea 832{
82919855 833 const token* t0 = next ();
6e213f58 834 if (! (t0->type == tok_keyword && t0->content == "probe"))
82919855
FCE
835 throw parse_error ("expected 'probe'");
836
20c6c071
GH
837 vector<probe_point *> aliases;
838 vector<probe_point *> locations;
839
840 bool equals_ok = true;
82919855 841
97266278
LG
842 int epilogue_alias = 0;
843
2f1a1aea
FCE
844 while (1)
845 {
b4ceace2
FCE
846 probe_point * pp = parse_probe_point ();
847
848 const token* t = peek ();
849 if (equals_ok && t
850 && t->type == tok_operator && t->content == "=")
851 {
852 aliases.push_back(pp);
853 next ();
854 continue;
855 }
97266278
LG
856 else if (equals_ok && t
857 && t->type == tok_operator && t->content == "+=")
858 {
859 aliases.push_back(pp);
860 epilogue_alias = 1;
861 next ();
862 continue;
863 }
864
b4ceace2
FCE
865 else if (t && t->type == tok_operator && t->content == ",")
866 {
867 locations.push_back(pp);
868 equals_ok = false;
869 next ();
870 continue;
871 }
872 else if (t && t->type == tok_operator && t->content == "{")
873 {
874 locations.push_back(pp);
875 break;
876 }
2f1a1aea 877 else
9c0c0e46 878 throw parse_error ("expected probe point specifier");
2f1a1aea 879 }
20c6c071 880
20c6c071
GH
881 if (aliases.empty())
882 {
54dfabe9
FCE
883 probe* p = new probe;
884 p->tok = t0;
885 p->locations = locations;
886 p->body = parse_stmt_block ();
887 probe_ret.push_back (p);
20c6c071
GH
888 }
889 else
890 {
54dfabe9 891 probe_alias* p = new probe_alias (aliases);
97266278
LG
892 if(epilogue_alias)
893 p->epilogue_style = true;
894 else
895 p->epilogue_style = false;
54dfabe9
FCE
896 p->tok = t0;
897 p->locations = locations;
898 p->body = parse_stmt_block ();
899 alias_ret.push_back (p);
20c6c071 900 }
54dfabe9 901}
20c6c071 902
54dfabe9
FCE
903
904embeddedcode*
905parser::parse_embeddedcode ()
906{
907 embeddedcode* e = new embeddedcode;
908 const token* t = next ();
909 if (t->type != tok_embedded)
24cb178f
FCE
910 throw parse_error ("expected '%{'");
911
912 if (! privileged)
913 throw parse_error ("embedded code in unprivileged script");
54dfabe9
FCE
914
915 e->tok = t;
916 e->code = t->content;
917 return e;
2f1a1aea
FCE
918}
919
920
921block*
56099f08 922parser::parse_stmt_block ()
2f1a1aea
FCE
923{
924 block* pb = new block;
925
56099f08
FCE
926 const token* t = next ();
927 if (! (t->type == tok_operator && t->content == "{"))
928 throw parse_error ("expected '{'");
929
930 pb->tok = t;
2b066ec1 931
2f1a1aea
FCE
932 while (1)
933 {
934 try
935 {
2b066ec1
FCE
936 t = peek ();
937 if (t && t->type == tok_operator && t->content == "}")
938 {
939 next ();
940 break;
941 }
942
2f1a1aea 943 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
944 }
945 catch (parse_error& pe)
946 {
947 print_error (pe);
54dfabe9 948
2f1a1aea
FCE
949 // Quietly swallow all tokens until the next ';' or '}'.
950 while (1)
951 {
952 const token* t = peek ();
54dfabe9 953 if (! t) return 0;
2f1a1aea 954 next ();
54dfabe9
FCE
955 if (t->type == tok_operator
956 && (t->content == "}" || t->content == ";"))
2f1a1aea
FCE
957 break;
958 }
959 }
960 }
961
962 return pb;
963}
964
965
966statement*
967parser::parse_statement ()
968{
969 const token* t = peek ();
970 if (t && t->type == tok_operator && t->content == ";")
971 {
69c68955
FCE
972 null_statement* n = new null_statement ();
973 n->tok = next ();
974 return n;
2f1a1aea
FCE
975 }
976 else if (t && t->type == tok_operator && t->content == "{")
56099f08 977 return parse_stmt_block ();
6e213f58 978 else if (t && t->type == tok_keyword && t->content == "if")
56099f08 979 return parse_if_statement ();
6e213f58 980 else if (t && t->type == tok_keyword && t->content == "for")
69c68955 981 return parse_for_loop ();
6e213f58 982 else if (t && t->type == tok_keyword && t->content == "foreach")
69c68955 983 return parse_foreach_loop ();
6e213f58 984 else if (t && t->type == tok_keyword && t->content == "return")
56099f08 985 return parse_return_statement ();
6e213f58 986 else if (t && t->type == tok_keyword && t->content == "delete")
56099f08 987 return parse_delete_statement ();
6e213f58 988 else if (t && t->type == tok_keyword && t->content == "while")
f3c26ea5 989 return parse_while_loop ();
6e213f58 990 else if (t && t->type == tok_keyword && t->content == "break")
f3c26ea5 991 return parse_break_statement ();
6e213f58 992 else if (t && t->type == tok_keyword && t->content == "continue")
f3c26ea5 993 return parse_continue_statement ();
6e213f58 994 else if (t && t->type == tok_keyword && t->content == "next")
f3c26ea5
FCE
995 return parse_next_statement ();
996 // XXX: "do/while" statement?
2f1a1aea
FCE
997 else if (t && (t->type == tok_operator || // expressions are flexible
998 t->type == tok_identifier ||
999 t->type == tok_number ||
1000 t->type == tok_string))
69c68955 1001 return parse_expr_statement ();
54dfabe9 1002 // XXX: consider generally accepting tok_embedded here too
2f1a1aea
FCE
1003 else
1004 throw parse_error ("expected statement");
1005}
1006
1007
56099f08 1008void
07c17d67 1009parser::parse_global (vector <vardecl*>& globals)
2f1a1aea 1010{
82919855 1011 const token* t0 = next ();
6e213f58 1012 if (! (t0->type == tok_keyword && t0->content == "global"))
82919855
FCE
1013 throw parse_error ("expected 'global'");
1014
56099f08
FCE
1015 while (1)
1016 {
1017 const token* t = next ();
1018 if (! (t->type == tok_identifier))
1019 throw parse_error ("expected identifier");
1020
2b066ec1
FCE
1021 for (unsigned i=0; i<globals.size(); i++)
1022 if (globals[i]->name == t->content)
57b73400
GH
1023 throw parse_error ("duplicate global name");
1024
24cb178f
FCE
1025 vardecl* d = new vardecl;
1026 d->name = t->content;
1027 d->tok = t;
1028 globals.push_back (d);
56099f08 1029
82919855
FCE
1030 t = peek ();
1031 if (t && t->type == tok_operator && t->content == ",")
1032 {
1033 next ();
1034 continue;
1035 }
56099f08 1036 else
82919855 1037 break;
56099f08
FCE
1038 }
1039}
1040
1041
24cb178f
FCE
1042void
1043parser::parse_functiondecl (std::vector<functiondecl*>& functions)
56099f08 1044{
82919855 1045 const token* t = next ();
6e213f58 1046 if (! (t->type == tok_keyword && t->content == "function"))
82919855
FCE
1047 throw parse_error ("expected 'function'");
1048
56099f08 1049
82919855 1050 t = next ();
6e213f58
DS
1051 if (! (t->type == tok_identifier)
1052 && ! (t->type == tok_keyword
1053 && (t->content == "string" || t->content == "long")))
56099f08 1054 throw parse_error ("expected identifier");
24cb178f
FCE
1055
1056 for (unsigned i=0; i<functions.size(); i++)
1057 if (functions[i]->name == t->content)
1058 throw parse_error ("duplicate function name");
1059
1060 functiondecl *fd = new functiondecl ();
56099f08
FCE
1061 fd->name = t->content;
1062 fd->tok = t;
1063
1064 t = next ();
6a505121
FCE
1065 if (t->type == tok_operator && t->content == ":")
1066 {
1067 t = next ();
6e213f58 1068 if (t->type == tok_keyword && t->content == "string")
6a505121 1069 fd->type = pe_string;
6e213f58 1070 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1071 fd->type = pe_long;
1072 else throw parse_error ("expected 'string' or 'long'");
1073
1074 t = next ();
1075 }
1076
56099f08
FCE
1077 if (! (t->type == tok_operator && t->content == "("))
1078 throw parse_error ("expected '('");
1079
1080 while (1)
1081 {
1082 t = next ();
1083
1084 // permit zero-argument fuctions
1085 if (t->type == tok_operator && t->content == ")")
1086 break;
1087 else if (! (t->type == tok_identifier))
1088 throw parse_error ("expected identifier");
1089 vardecl* vd = new vardecl;
1090 vd->name = t->content;
1091 vd->tok = t;
1092 fd->formal_args.push_back (vd);
1093
1094 t = next ();
6a505121
FCE
1095 if (t->type == tok_operator && t->content == ":")
1096 {
1097 t = next ();
6e213f58 1098 if (t->type == tok_keyword && t->content == "string")
6a505121 1099 vd->type = pe_string;
6e213f58 1100 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1101 vd->type = pe_long;
1102 else throw parse_error ("expected 'string' or 'long'");
1103
1104 t = next ();
1105 }
56099f08
FCE
1106 if (t->type == tok_operator && t->content == ")")
1107 break;
1108 if (t->type == tok_operator && t->content == ",")
1109 continue;
1110 else
1111 throw parse_error ("expected ',' or ')'");
1112 }
1113
54dfabe9
FCE
1114 t = peek ();
1115 if (t && t->type == tok_embedded)
1116 fd->body = parse_embeddedcode ();
1117 else
1118 fd->body = parse_stmt_block ();
24cb178f
FCE
1119
1120 functions.push_back (fd);
2f1a1aea
FCE
1121}
1122
1123
9c0c0e46
FCE
1124probe_point*
1125parser::parse_probe_point ()
2f1a1aea 1126{
9c0c0e46 1127 probe_point* pl = new probe_point;
2f1a1aea 1128
9c0c0e46 1129 while (1)
2f1a1aea 1130 {
9c0c0e46 1131 const token* t = next ();
6e213f58
DS
1132 if (! (t->type == tok_identifier
1133 // we must allow ".return" and ".function", which are keywords
1134 || t->type == tok_keyword
1135 || (t->type == tok_operator && t->content == "*")))
b4ceace2 1136 throw parse_error ("expected identifier or '*'");
9c0c0e46
FCE
1137
1138 if (pl->tok == 0) pl->tok = t;
1139
1140 probe_point::component* c = new probe_point::component;
1141 c->functor = t->content;
1142 pl->components.push_back (c);
1143 // NB though we still may add c->arg soon
1144
a477f3f1 1145 const token* last_t = t;
9c0c0e46 1146 t = peek ();
a477f3f1
DS
1147
1148 // We need to keep going until we find something other than a
1149 // '*' or identifier, since a probe point wildcard can be
1150 // something like "*a", "*a*", "a*b", "a*b*", etc.
1151 while (t &&
1152 // case 1: '*{identifier}'
1153 ((last_t->type == tok_operator && last_t->content == "*"
1154 && (t->type == tok_identifier || t->type == tok_keyword))
1155 // case 2: '{identifier}*'
1156 || ((last_t->type == tok_identifier
1157 || last_t->type == tok_keyword)
1158 && t->type == tok_operator && t->content == "*")))
1159 {
1160 c->functor += t->content;
1161 next (); // consume the identifier or '*'
1162
1163 last_t = t;
1164 t = peek ();
1165 }
1166
9c0c0e46 1167 if (t && t->type == tok_operator
97266278
LG
1168 && (t->content == "{" || t->content == "," || t->content == "="
1169 || t->content == "+=" ))
9c0c0e46
FCE
1170 break;
1171
1172 if (t && t->type == tok_operator && t->content == "(")
1173 {
1174 next (); // consume "("
1175 c->arg = parse_literal ();
1176
1177 t = next ();
1178 if (! (t->type == tok_operator && t->content == ")"))
1179 throw parse_error ("expected ')'");
1180
1181 t = peek ();
1182 if (t && t->type == tok_operator
20c6c071 1183 && (t->content == "{" || t->content == "," || t->content == "="))
9c0c0e46 1184 break;
2b066ec1
FCE
1185 else if (t && t->type == tok_operator &&
1186 t->content == "(")
1187 throw parse_error ("unexpected '.' or ',' or '{'");
9c0c0e46
FCE
1188 }
1189 // fall through
1190
1191 if (t && t->type == tok_operator && t->content == ".")
1192 next ();
1193 else
97266278 1194 throw parse_error ("expected '.' or ',' or '(' or '{' or '=' or '+='");
2f1a1aea
FCE
1195 }
1196
1197 return pl;
1198}
1199
1200
1201literal*
1202parser::parse_literal ()
1203{
1204 const token* t = next ();
56099f08 1205 literal* l;
2f1a1aea 1206 if (t->type == tok_string)
56099f08 1207 l = new literal_string (t->content);
2f1a1aea 1208 else if (t->type == tok_number)
9c0c0e46
FCE
1209 {
1210 const char* startp = t->content.c_str ();
1211 char* endp = (char*) startp;
1212
3a20432b
FCE
1213 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1214 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1215 // since the lexer only gives us positive digit strings.
9c0c0e46 1216 errno = 0;
3a20432b 1217 long long value = (long long) strtoull (startp, & endp, 0);
9c0c0e46 1218 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
3a20432b
FCE
1219 || (unsigned long long) value > 18446744073709551615ULL
1220 || value < -9223372036854775807LL-1)
9c0c0e46
FCE
1221 throw parse_error ("number invalid or out of range");
1222
3a20432b 1223 l = new literal_number (value);
9c0c0e46 1224 }
2f1a1aea
FCE
1225 else
1226 throw parse_error ("expected literal string or number");
56099f08
FCE
1227
1228 l->tok = t;
1229 return l;
2f1a1aea
FCE
1230}
1231
1232
1233if_statement*
1234parser::parse_if_statement ()
1235{
1236 const token* t = next ();
6e213f58 1237 if (! (t->type == tok_keyword && t->content == "if"))
56099f08
FCE
1238 throw parse_error ("expected 'if'");
1239 if_statement* s = new if_statement;
1240 s->tok = t;
1241
1242 t = next ();
2f1a1aea
FCE
1243 if (! (t->type == tok_operator && t->content == "("))
1244 throw parse_error ("expected '('");
1245
2f1a1aea
FCE
1246 s->condition = parse_expression ();
1247
1248 t = next ();
1249 if (! (t->type == tok_operator && t->content == ")"))
1250 throw parse_error ("expected ')'");
1251
1252 s->thenblock = parse_statement ();
1253
1254 t = peek ();
6e213f58 1255 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea
FCE
1256 {
1257 next ();
1258 s->elseblock = parse_statement ();
1259 }
ed10c639
FCE
1260 else
1261 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
1262
1263 return s;
1264}
1265
1266
69c68955
FCE
1267expr_statement*
1268parser::parse_expr_statement ()
1269{
1270 expr_statement *es = new expr_statement;
1271 const token* t = peek ();
1272 es->tok = t;
1273 es->value = parse_expression ();
1274 return es;
1275}
1276
1277
56099f08
FCE
1278return_statement*
1279parser::parse_return_statement ()
1280{
1281 const token* t = next ();
6e213f58 1282 if (! (t->type == tok_keyword && t->content == "return"))
56099f08 1283 throw parse_error ("expected 'return'");
6e213f58
DS
1284 if (context != con_function)
1285 throw parse_error ("found 'return' not in function context");
56099f08
FCE
1286 return_statement* s = new return_statement;
1287 s->tok = t;
1288 s->value = parse_expression ();
1289 return s;
1290}
1291
1292
1293delete_statement*
1294parser::parse_delete_statement ()
1295{
1296 const token* t = next ();
6e213f58 1297 if (! (t->type == tok_keyword && t->content == "delete"))
56099f08
FCE
1298 throw parse_error ("expected 'delete'");
1299 delete_statement* s = new delete_statement;
1300 s->tok = t;
1301 s->value = parse_expression ();
1302 return s;
1303}
1304
1305
f3c26ea5
FCE
1306next_statement*
1307parser::parse_next_statement ()
1308{
1309 const token* t = next ();
6e213f58 1310 if (! (t->type == tok_keyword && t->content == "next"))
f3c26ea5 1311 throw parse_error ("expected 'next'");
6e213f58
DS
1312 if (context != con_probe)
1313 throw parse_error ("found 'next' not in probe context");
f3c26ea5
FCE
1314 next_statement* s = new next_statement;
1315 s->tok = t;
1316 return s;
1317}
1318
1319
1320break_statement*
1321parser::parse_break_statement ()
1322{
1323 const token* t = next ();
6e213f58 1324 if (! (t->type == tok_keyword && t->content == "break"))
f3c26ea5
FCE
1325 throw parse_error ("expected 'break'");
1326 break_statement* s = new break_statement;
1327 s->tok = t;
1328 return s;
1329}
1330
1331
1332continue_statement*
1333parser::parse_continue_statement ()
1334{
1335 const token* t = next ();
6e213f58 1336 if (! (t->type == tok_keyword && t->content == "continue"))
f3c26ea5
FCE
1337 throw parse_error ("expected 'continue'");
1338 continue_statement* s = new continue_statement;
1339 s->tok = t;
1340 return s;
1341}
1342
1343
69c68955
FCE
1344for_loop*
1345parser::parse_for_loop ()
1346{
f3c26ea5 1347 const token* t = next ();
6e213f58 1348 if (! (t->type == tok_keyword && t->content == "for"))
f3c26ea5
FCE
1349 throw parse_error ("expected 'for'");
1350 for_loop* s = new for_loop;
1351 s->tok = t;
1352
1353 t = next ();
1354 if (! (t->type == tok_operator && t->content == "("))
1355 throw parse_error ("expected '('");
1356
1357 // initializer + ";"
1358 t = peek ();
1359 if (t && t->type == tok_operator && t->content == ";")
1360 {
cbfbbf69
FCE
1361 s->init = 0;
1362 next ();
f3c26ea5
FCE
1363 }
1364 else
1365 {
1366 s->init = parse_expr_statement ();
1367 t = next ();
1368 if (! (t->type == tok_operator && t->content == ";"))
1369 throw parse_error ("expected ';'");
1370 }
1371
1372 // condition + ";"
1373 t = peek ();
1374 if (t && t->type == tok_operator && t->content == ";")
1375 {
1376 literal_number* l = new literal_number(1);
1377 s->cond = l;
1378 s->cond->tok = next ();
1379 }
1380 else
1381 {
1382 s->cond = parse_expression ();
1383 t = next ();
1384 if (! (t->type == tok_operator && t->content == ";"))
1385 throw parse_error ("expected ';'");
1386 }
1387
1388 // increment + ")"
1389 t = peek ();
1390 if (t && t->type == tok_operator && t->content == ")")
1391 {
cbfbbf69
FCE
1392 s->incr = 0;
1393 next ();
f3c26ea5
FCE
1394 }
1395 else
1396 {
1397 s->incr = parse_expr_statement ();
1398 t = next ();
1399 if (! (t->type == tok_operator && t->content == ")"))
c958a431 1400 throw parse_error ("expected ')'");
f3c26ea5
FCE
1401 }
1402
1403 // block
1404 s->block = parse_statement ();
1405
1406 return s;
1407}
1408
1409
1410for_loop*
1411parser::parse_while_loop ()
1412{
1413 const token* t = next ();
6e213f58 1414 if (! (t->type == tok_keyword && t->content == "while"))
f3c26ea5
FCE
1415 throw parse_error ("expected 'while'");
1416 for_loop* s = new for_loop;
1417 s->tok = t;
1418
1419 t = next ();
1420 if (! (t->type == tok_operator && t->content == "("))
1421 throw parse_error ("expected '('");
1422
1423 // dummy init and incr fields
cbfbbf69
FCE
1424 s->init = 0;
1425 s->incr = 0;
f3c26ea5
FCE
1426
1427 // condition
1428 s->cond = parse_expression ();
1429
f3c26ea5
FCE
1430 t = next ();
1431 if (! (t->type == tok_operator && t->content == ")"))
1432 throw parse_error ("expected ')'");
1433
1434 // block
1435 s->block = parse_statement ();
1436
1437 return s;
69c68955
FCE
1438}
1439
1440
1441foreach_loop*
1442parser::parse_foreach_loop ()
1443{
1444 const token* t = next ();
6e213f58 1445 if (! (t->type == tok_keyword && t->content == "foreach"))
69c68955
FCE
1446 throw parse_error ("expected 'foreach'");
1447 foreach_loop* s = new foreach_loop;
1448 s->tok = t;
93484556 1449 s->sort_direction = 0;
69c68955
FCE
1450
1451 t = next ();
1452 if (! (t->type == tok_operator && t->content == "("))
1453 throw parse_error ("expected '('");
1454
1455 // see also parse_array_in
1456
1457 bool parenthesized = false;
1458 t = peek ();
1459 if (t && t->type == tok_operator && t->content == "[")
1460 {
1461 next ();
1462 parenthesized = true;
1463 }
1464
1465 while (1)
1466 {
1467 t = next ();
1468 if (! (t->type == tok_identifier))
1469 throw parse_error ("expected identifier");
1470 symbol* sym = new symbol;
1471 sym->tok = t;
1472 sym->name = t->content;
1473 s->indexes.push_back (sym);
1474
93484556
FCE
1475 t = peek ();
1476 if (t && t->type == tok_operator &&
1477 (t->content == "+" || t->content == "-"))
1478 {
1479 if (s->sort_direction)
1480 throw parse_error ("multiple sort directives");
1481 s->sort_direction = (t->content == "+") ? 1 : -1;
1482 s->sort_column = s->indexes.size();
1483 next();
1484 }
1485
69c68955
FCE
1486 if (parenthesized)
1487 {
93484556 1488 t = peek ();
69c68955
FCE
1489 if (t && t->type == tok_operator && t->content == ",")
1490 {
1491 next ();
1492 continue;
1493 }
1494 else if (t && t->type == tok_operator && t->content == "]")
1495 {
1496 next ();
1497 break;
1498 }
1499 else
1500 throw parse_error ("expected ',' or ']'");
1501 }
1502 else
1503 break; // expecting only one expression
1504 }
1505
1506 t = next ();
6e213f58 1507 if (! (t->type == tok_keyword && t->content == "in"))
69c68955 1508 throw parse_error ("expected 'in'");
d02548c0
GH
1509
1510 s->base = parse_indexable();
69c68955 1511
93484556
FCE
1512 t = peek ();
1513 if (t && t->type == tok_operator &&
1514 (t->content == "+" || t->content == "-"))
1515 {
1516 if (s->sort_direction)
1517 throw parse_error ("multiple sort directives");
1518 s->sort_direction = (t->content == "+") ? 1 : -1;
1519 s->sort_column = 0;
1520 next();
1521 }
1522
69c68955
FCE
1523 t = next ();
1524 if (! (t->type == tok_operator && t->content == ")"))
1525 throw parse_error ("expected ')'");
1526
1527 s->block = parse_statement ();
1528 return s;
1529}
1530
1531
2f1a1aea
FCE
1532expression*
1533parser::parse_expression ()
1534{
1535 return parse_assignment ();
1536}
1537
2f1a1aea
FCE
1538
1539expression*
1540parser::parse_assignment ()
1541{
1542 expression* op1 = parse_ternary ();
1543
1544 const token* t = peek ();
82919855
FCE
1545 // right-associative operators
1546 if (t && t->type == tok_operator
2f1a1aea 1547 && (t->content == "=" ||
82919855 1548 t->content == "<<<" ||
2f1a1aea 1549 t->content == "+=" ||
bb2e3076
FCE
1550 t->content == "-=" ||
1551 t->content == "*=" ||
1552 t->content == "/=" ||
1553 t->content == "%=" ||
1554 t->content == "<<=" ||
1555 t->content == ">>=" ||
1556 t->content == "&=" ||
1557 t->content == "^=" ||
1558 t->content == "|=" ||
d5d7c2cc 1559 t->content == ".=" ||
bb2e3076 1560 false))
2f1a1aea 1561 {
bb2e3076 1562 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 1563 assignment* e = new assignment;
56099f08 1564 e->left = op1;
2f1a1aea 1565 e->op = t->content;
56099f08 1566 e->tok = t;
2f1a1aea 1567 next ();
82919855 1568 e->right = parse_expression ();
56099f08 1569 op1 = e;
2f1a1aea 1570 }
56099f08
FCE
1571
1572 return op1;
2f1a1aea
FCE
1573}
1574
1575
1576expression*
1577parser::parse_ternary ()
1578{
1579 expression* op1 = parse_logical_or ();
1580
1581 const token* t = peek ();
1582 if (t && t->type == tok_operator && t->content == "?")
1583 {
2f1a1aea 1584 ternary_expression* e = new ternary_expression;
56099f08 1585 e->tok = t;
2f1a1aea 1586 e->cond = op1;
56099f08
FCE
1587 next ();
1588 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
1589
1590 t = next ();
1591 if (! (t->type == tok_operator && t->content == ":"))
1592 throw parse_error ("expected ':'");
1593
56099f08 1594 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
1595 return e;
1596 }
1597 else
1598 return op1;
1599}
1600
1601
1602expression*
1603parser::parse_logical_or ()
1604{
1605 expression* op1 = parse_logical_and ();
1606
1607 const token* t = peek ();
56099f08 1608 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 1609 {
2f1a1aea 1610 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
1611 e->tok = t;
1612 e->op = t->content;
2f1a1aea 1613 e->left = op1;
56099f08
FCE
1614 next ();
1615 e->right = parse_logical_and ();
1616 op1 = e;
1617 t = peek ();
2f1a1aea 1618 }
56099f08
FCE
1619
1620 return op1;
2f1a1aea
FCE
1621}
1622
1623
1624expression*
1625parser::parse_logical_and ()
1626{
bb2e3076 1627 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
1628
1629 const token* t = peek ();
56099f08 1630 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 1631 {
2f1a1aea
FCE
1632 logical_and_expr *e = new logical_and_expr;
1633 e->left = op1;
56099f08
FCE
1634 e->op = t->content;
1635 e->tok = t;
1636 next ();
bb2e3076
FCE
1637 e->right = parse_boolean_or ();
1638 op1 = e;
1639 t = peek ();
1640 }
1641
1642 return op1;
1643}
1644
1645
1646expression*
1647parser::parse_boolean_or ()
1648{
1649 expression* op1 = parse_boolean_xor ();
1650
1651 const token* t = peek ();
1652 while (t && t->type == tok_operator && t->content == "|")
1653 {
1654 binary_expression* e = new binary_expression;
1655 e->left = op1;
1656 e->op = t->content;
1657 e->tok = t;
1658 next ();
1659 e->right = parse_boolean_xor ();
1660 op1 = e;
1661 t = peek ();
1662 }
1663
1664 return op1;
1665}
1666
1667
1668expression*
1669parser::parse_boolean_xor ()
1670{
1671 expression* op1 = parse_boolean_and ();
1672
1673 const token* t = peek ();
1674 while (t && t->type == tok_operator && t->content == "^")
1675 {
1676 binary_expression* e = new binary_expression;
1677 e->left = op1;
1678 e->op = t->content;
1679 e->tok = t;
1680 next ();
1681 e->right = parse_boolean_and ();
1682 op1 = e;
1683 t = peek ();
1684 }
1685
1686 return op1;
1687}
1688
1689
1690expression*
1691parser::parse_boolean_and ()
1692{
1693 expression* op1 = parse_array_in ();
1694
1695 const token* t = peek ();
1696 while (t && t->type == tok_operator && t->content == "&")
1697 {
1698 binary_expression* e = new binary_expression;
1699 e->left = op1;
1700 e->op = t->content;
1701 e->tok = t;
1702 next ();
56099f08
FCE
1703 e->right = parse_array_in ();
1704 op1 = e;
1705 t = peek ();
2f1a1aea 1706 }
56099f08
FCE
1707
1708 return op1;
2f1a1aea
FCE
1709}
1710
1711
1712expression*
1713parser::parse_array_in ()
1714{
ce10591c 1715 // This is a very tricky case. All these are legit expressions:
69c68955 1716 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
1717 vector<expression*> indexes;
1718 bool parenthesized = false;
2f1a1aea
FCE
1719
1720 const token* t = peek ();
69c68955 1721 if (t && t->type == tok_operator && t->content == "[")
ce10591c
FCE
1722 {
1723 next ();
1724 parenthesized = true;
1725 }
1726
1727 while (1)
1728 {
1729 expression* op1 = parse_comparison ();
1730 indexes.push_back (op1);
1731
1732 if (parenthesized)
1733 {
1734 const token* t = peek ();
1735 if (t && t->type == tok_operator && t->content == ",")
1736 {
1737 next ();
1738 continue;
1739 }
69c68955 1740 else if (t && t->type == tok_operator && t->content == "]")
ce10591c
FCE
1741 {
1742 next ();
1743 break;
1744 }
1745 else
69c68955 1746 throw parse_error ("expected ',' or ']'");
ce10591c
FCE
1747 }
1748 else
1749 break; // expecting only one expression
1750 }
1751
1752 t = peek ();
6e213f58 1753 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 1754 {
2f1a1aea 1755 array_in *e = new array_in;
56099f08 1756 e->tok = t;
ce10591c
FCE
1757 next (); // swallow "in"
1758
1759 arrayindex* a = new arrayindex;
1760 a->indexes = indexes;
d02548c0
GH
1761 a->base = parse_indexable();
1762 a->tok = a->base->get_tok();
ce10591c 1763 e->operand = a;
2f1a1aea
FCE
1764 return e;
1765 }
ce10591c
FCE
1766 else if (indexes.size() == 1) // no "in" - need one expression only
1767 return indexes[0];
2f1a1aea 1768 else
ce10591c 1769 throw parse_error ("unexpected comma-separated expression list");
2f1a1aea
FCE
1770}
1771
1772
1773expression*
1774parser::parse_comparison ()
1775{
bb2e3076 1776 expression* op1 = parse_shift ();
2f1a1aea
FCE
1777
1778 const token* t = peek ();
56099f08 1779 while (t && t->type == tok_operator
553d27a5
FCE
1780 && (t->content == ">" ||
1781 t->content == "<" ||
1782 t->content == "==" ||
1783 t->content == "!=" ||
1784 t->content == "<=" ||
bb2e3076 1785 t->content == ">="))
2f1a1aea
FCE
1786 {
1787 comparison* e = new comparison;
1788 e->left = op1;
1789 e->op = t->content;
56099f08 1790 e->tok = t;
2f1a1aea 1791 next ();
bb2e3076
FCE
1792 e->right = parse_shift ();
1793 op1 = e;
1794 t = peek ();
1795 }
1796
1797 return op1;
1798}
1799
1800
1801expression*
1802parser::parse_shift ()
1803{
1804 expression* op1 = parse_concatenation ();
1805
1806 const token* t = peek ();
1807 while (t && t->type == tok_operator &&
1808 (t->content == "<<" || t->content == ">>"))
1809 {
1810 binary_expression* e = new binary_expression;
1811 e->left = op1;
1812 e->op = t->content;
1813 e->tok = t;
1814 next ();
56099f08
FCE
1815 e->right = parse_concatenation ();
1816 op1 = e;
1817 t = peek ();
2f1a1aea 1818 }
56099f08
FCE
1819
1820 return op1;
2f1a1aea
FCE
1821}
1822
1823
1824expression*
1825parser::parse_concatenation ()
1826{
1827 expression* op1 = parse_additive ();
1828
1829 const token* t = peek ();
1830 // XXX: the actual awk string-concatenation operator is *whitespace*.
1831 // I don't know how to easily to model that here.
56099f08 1832 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
1833 {
1834 concatenation* e = new concatenation;
1835 e->left = op1;
1836 e->op = t->content;
56099f08 1837 e->tok = t;
2f1a1aea 1838 next ();
56099f08
FCE
1839 e->right = parse_additive ();
1840 op1 = e;
1841 t = peek ();
2f1a1aea 1842 }
56099f08
FCE
1843
1844 return op1;
2f1a1aea
FCE
1845}
1846
1847
1848expression*
1849parser::parse_additive ()
1850{
1851 expression* op1 = parse_multiplicative ();
1852
1853 const token* t = peek ();
56099f08 1854 while (t && t->type == tok_operator
2f1a1aea
FCE
1855 && (t->content == "+" || t->content == "-"))
1856 {
1857 binary_expression* e = new binary_expression;
1858 e->op = t->content;
1859 e->left = op1;
56099f08 1860 e->tok = t;
2f1a1aea 1861 next ();
56099f08
FCE
1862 e->right = parse_multiplicative ();
1863 op1 = e;
1864 t = peek ();
2f1a1aea 1865 }
56099f08
FCE
1866
1867 return op1;
2f1a1aea
FCE
1868}
1869
1870
1871expression*
1872parser::parse_multiplicative ()
1873{
1874 expression* op1 = parse_unary ();
1875
1876 const token* t = peek ();
56099f08 1877 while (t && t->type == tok_operator
2f1a1aea
FCE
1878 && (t->content == "*" || t->content == "/" || t->content == "%"))
1879 {
1880 binary_expression* e = new binary_expression;
1881 e->op = t->content;
1882 e->left = op1;
56099f08 1883 e->tok = t;
2f1a1aea 1884 next ();
56099f08
FCE
1885 e->right = parse_unary ();
1886 op1 = e;
1887 t = peek ();
2f1a1aea 1888 }
56099f08
FCE
1889
1890 return op1;
2f1a1aea
FCE
1891}
1892
1893
1894expression*
1895parser::parse_unary ()
1896{
1897 const token* t = peek ();
1898 if (t && t->type == tok_operator
bb2e3076
FCE
1899 && (t->content == "+" ||
1900 t->content == "-" ||
1901 t->content == "!" ||
1902 t->content == "~" ||
1903 false))
2f1a1aea
FCE
1904 {
1905 unary_expression* e = new unary_expression;
1906 e->op = t->content;
56099f08 1907 e->tok = t;
2f1a1aea 1908 next ();
3a20432b 1909 e->operand = parse_crement ();
2f1a1aea
FCE
1910 return e;
1911 }
1912 else
bb2e3076 1913 return parse_crement ();
2f1a1aea
FCE
1914}
1915
1916
1917expression*
1918parser::parse_crement () // as in "increment" / "decrement"
1919{
cbfbbf69
FCE
1920 // NB: Ideally, we'd parse only a symbol as an operand to the
1921 // *crement operators, instead of a general expression value. We'd
1922 // need more complex lookahead code to tell apart the postfix cases.
1923 // So we just punt, and leave it to pass-3 to signal errors on
1924 // cases like "4++".
1925
2f1a1aea
FCE
1926 const token* t = peek ();
1927 if (t && t->type == tok_operator
1928 && (t->content == "++" || t->content == "--"))
1929 {
1930 pre_crement* e = new pre_crement;
1931 e->op = t->content;
56099f08 1932 e->tok = t;
2f1a1aea
FCE
1933 next ();
1934 e->operand = parse_value ();
1935 return e;
1936 }
1937
1938 // post-crement or non-crement
1939 expression *op1 = parse_value ();
1940
1941 t = peek ();
1942 if (t && t->type == tok_operator
1943 && (t->content == "++" || t->content == "--"))
1944 {
1945 post_crement* e = new post_crement;
1946 e->op = t->content;
56099f08 1947 e->tok = t;
2f1a1aea
FCE
1948 next ();
1949 e->operand = op1;
1950 return e;
1951 }
1952 else
1953 return op1;
1954}
1955
1956
1957expression*
1958parser::parse_value ()
1959{
1960 const token* t = peek ();
1961 if (! t)
1962 throw parse_error ("expected value");
1963
1964 if (t->type == tok_operator && t->content == "(")
1965 {
1966 next ();
1967 expression* e = parse_expression ();
1968 t = next ();
1969 if (! (t->type == tok_operator && t->content == ")"))
1970 throw parse_error ("expected ')'");
1971 return e;
1972 }
1973 else if (t->type == tok_identifier)
1974 return parse_symbol ();
1975 else
1976 return parse_literal ();
1977}
1978
1979
d02548c0
GH
1980const token *
1981parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
1982{
1983 hop = NULL;
1984 const token* t = expect_ident (name);
1985 if (name == "@hist_linear" || name == "@hist_log")
1986 {
1987 hop = new hist_op;
1988 if (name == "@hist_linear")
1989 hop->htype = hist_linear;
1990 else if (name == "@hist_log")
1991 hop->htype = hist_log;
1992 hop->tok = t;
1993 expect_op("(");
1994 hop->stat = parse_expression ();
1995 int64_t tnum;
1996 if (hop->htype == hist_linear)
1997 {
1998 for (size_t i = 0; i < 3; ++i)
1999 {
2000 expect_op (",");
2001 expect_number (tnum);
2002 hop->params.push_back (tnum);
2003 }
2004 }
2005 else
2006 {
2007 assert(hop->htype == hist_log);
2008 if (peek_op (","))
2009 {
2010 expect_op (",");
2011 expect_number (tnum);
2012 hop->params.push_back (tnum);
2013 }
2014 else
2015 {
2016 // FIXME (magic value): Logarithmic histograms get 64
2017 // buckets by default.
2018 hop->params.push_back (64);
2019 }
2020 }
2021 expect_op(")");
2022 }
2023 return t;
2024}
2025
2026
2027indexable*
2028parser::parse_indexable ()
2029{
2030 hist_op *hop = NULL;
2031 string name;
2032 const token *tok = parse_hist_op_or_bare_name(hop, name);
2033 if (hop)
2034 return hop;
2035 else
2036 {
2037 symbol* sym = new symbol;
2038 sym->name = name;
2039 sym->tok = tok;
2040 return sym;
2041 }
2042}
2043
2044
2045// var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
2f1a1aea 2046expression*
0fefb486 2047parser::parse_symbol ()
2f1a1aea 2048{
d02548c0
GH
2049 hist_op *hop = NULL;
2050 symbol *sym = NULL;
d7f3e0c5 2051 string name;
d02548c0
GH
2052 const token *t = parse_hist_op_or_bare_name(hop, name);
2053
2054 if (!hop)
0fefb486 2055 {
d02548c0
GH
2056 // If we didn't get a hist_op, then we did get an identifier. We can
2057 // now scrutinize this identifier for the various magic forms of identifier
2058 // (printf, @stat_op, and $var...)
2059
2060 if (name.size() > 0 && name[0] == '@')
d7f3e0c5 2061 {
d02548c0
GH
2062 stat_op *sop = new stat_op;
2063 if (name == "@avg")
2064 sop->ctype = sc_average;
2065 else if (name == "@count")
2066 sop->ctype = sc_count;
2067 else if (name == "@sum")
2068 sop->ctype = sc_sum;
2069 else if (name == "@min")
2070 sop->ctype = sc_min;
2071 else if (name == "@max")
2072 sop->ctype = sc_max;
2073 else
2074 throw parse_error("unknown statistic operator " + name);
2075 expect_op("(");
2076 sop->tok = t;
2077 sop->stat = parse_expression ();
2078 expect_op(")");
2079 return sop;
2080 }
2081
2082 else if (name.size() > 0 && (name == "print"
2083 || name == "sprint"
2084 || name == "printf"
01133ccb
LG
2085 || name == "sprintf"
2086 || name == "lket_trace_extra"))
d02548c0
GH
2087 {
2088 print_format *fmt = new print_format;
2089 fmt->tok = t;
2090 fmt->print_with_format = (name[name.size() - 1] == 'f');
2091 fmt->print_to_stream = (name[0] == 'p');
01133ccb
LG
2092
2093 fmt->lket_trace_extra = false;
2094
2095 if(name == "lket_trace_extra")
2096 {
2097 fmt->print_with_format = true;
2098 fmt->print_to_stream = true;
2099 fmt->lket_trace_extra = true;
2100 }
2101
d02548c0
GH
2102 expect_op("(");
2103 if (fmt->print_with_format)
2104 {
2105 // Consume and convert a format string, and any subsequent
2106 // arguments. Agreement between the format string and the
2107 // arguments is postponed to the typechecking phase.
2108 string tmp;
2109 expect_unknown (tok_string, tmp);
a9c62ac9 2110 fmt->raw_components = tmp;
d02548c0
GH
2111 fmt->components = print_format::string_to_components (tmp);
2112 while (!peek_op (")"))
2113 {
2114 expect_op(",");
2115 expression *e = parse_expression ();
2116 fmt->args.push_back(e);
2117 }
d7f3e0c5 2118 }
a4636912
GH
2119 else if (name == "print" &&
2120 (peek_kw("@hist_linear") ||
2121 peek_kw("@hist_log")))
2122 {
2123 // We have a special case where we recognize
2124 // print(@hist_foo(bar)) as a magic print-the-histogram
2125 // construct. This is sort of gross but it avoids
2126 // promoting histogram references to typeful
2127 // expressions.
1bbeef03
GH
2128
2129 hop = NULL;
2130 t = parse_hist_op_or_bare_name(hop, name);
2131 assert(hop);
2132
2133 // It is, sadly, possible that even while parsing a
2134 // hist_op, we *mis-guessed* and the user wishes to
2135 // print(@hist_op(foo)[bucket]), a scalar. In that case
2136 // we must parse the arrayindex and print an expression.
2137
2138 if (!peek_op ("["))
2139 fmt->hist = hop;
2140 else
2141 {
2142 // This is simplified version of the
2143 // multi-array-index parser below, because we can
2144 // only ever have one index on a histogram anyways.
2145 expect_op("[");
2146 struct arrayindex* ai = new arrayindex;
2147 ai->tok = t;
2148 ai->base = hop;
2149 ai->indexes.push_back (parse_expression ());
2150 expect_op("]");
2151 fmt->args.push_back(ai);
2152 }
a4636912 2153 }
d7f3e0c5 2154 else
d02548c0
GH
2155 {
2156 // If we are not printing with a format string, we permit
2157 // exactly one argument (of any type).
2158 expression *e = parse_expression ();
2159 fmt->args.push_back(e);
2160 }
2161 expect_op(")");
2162 return fmt;
2163 }
2164
2165 else if (name.size() > 0 && name[0] == '$')
2166 {
2167 // target_symbol time
2168 target_symbol *tsym = new target_symbol;
2169 tsym->tok = t;
2170 tsym->base_name = name;
2171 while (true)
2172 {
2173 string c;
2174 if (peek_op ("->"))
2175 {
2176 next();
2177 expect_ident (c);
2178 tsym->components.push_back
2179 (make_pair (target_symbol::comp_struct_member, c));
2180 }
2181 else if (peek_op ("["))
2182 {
2183 next();
2184 expect_unknown (tok_number, c);
2185 expect_op ("]");
2186 tsym->components.push_back
2187 (make_pair (target_symbol::comp_literal_array_index, c));
2188 }
2189 else
2190 break;
2191 }
2192 return tsym;
2193 }
2194
2195 else if (peek_op ("(")) // function call
2196 {
2197 next ();
2198 struct functioncall* f = new functioncall;
2199 f->tok = t;
2200 f->function = name;
2201 // Allow empty actual parameter list
2202 if (peek_op (")"))
2203 {
2204 next ();
2205 return f;
2206 }
2207 while (1)
2208 {
2209 f->args.push_back (parse_expression ());
2210 if (peek_op (")"))
2211 {
2212 next();
2213 break;
2214 }
2215 else if (peek_op (","))
2216 {
2217 next();
2218 continue;
2219 }
2220 else
2221 throw parse_error ("expected ',' or ')'");
2222 }
2223 return f;
2224 }
2225
2226 else
2227 {
2228 sym = new symbol;
2229 sym->name = name;
2230 sym->tok = t;
d7f3e0c5 2231 }
0fefb486 2232 }
d7f3e0c5 2233
d02548c0
GH
2234 // By now, either we had a hist_op in the first place, or else
2235 // we had a plain word and it was converted to a symbol.
2236
70c743d8 2237 assert (!hop != !sym); // logical XOR
d02548c0
GH
2238
2239 // All that remains is to check for array indexing
2240
d7f3e0c5 2241 if (peek_op ("[")) // array
2f1a1aea
FCE
2242 {
2243 next ();
2244 struct arrayindex* ai = new arrayindex;
d02548c0
GH
2245 ai->tok = t;
2246
2247 if (hop)
2248 ai->base = hop;
2249 else
2250 ai->base = sym;
2251
2f1a1aea
FCE
2252 while (1)
2253 {
2254 ai->indexes.push_back (parse_expression ());
d7f3e0c5
GH
2255 if (peek_op ("]"))
2256 {
2257 next();
2258 break;
2259 }
2260 else if (peek_op (","))
2261 {
2262 next();
2263 continue;
2264 }
2f1a1aea
FCE
2265 else
2266 throw parse_error ("expected ',' or ']'");
2267 }
2268 return ai;
2269 }
d02548c0
GH
2270
2271 // If we got to here, we *should* have a symbol; if we have
2272 // a hist_op on its own, it doesn't count as an expression,
2273 // so we throw a parse error.
2274
2275 if (hop)
2276 throw parse_error("base histogram operator where expression expected", t);
2277
2278 return sym;
2f1a1aea 2279}
56099f08 2280
This page took 0.286527 seconds and 5 git commands to generate.