]> sourceware.org Git - systemtap.git/blob - parse.cxx
Version bumps for 0.9.6 release
[systemtap.git] / parse.cxx
1 // recursive descent parser for systemtap scripts
2 // Copyright (C) 2005-2009 Red Hat Inc.
3 // Copyright (C) 2006 Intel Corporation.
4 // Copyright (C) 2007 Bull S.A.S
5 //
6 // This file is part of systemtap, and is free software. You can
7 // redistribute it and/or modify it under the terms of the GNU General
8 // Public License (GPL); either version 2, or (at your option) any
9 // later version.
10
11 #include "config.h"
12 #include "staptree.h"
13 #include "parse.h"
14 #include "session.h"
15 #include "util.h"
16
17 #include <iostream>
18
19 #include <fstream>
20 #include <cctype>
21 #include <cstdlib>
22 #include <cassert>
23 #include <cerrno>
24 #include <climits>
25 #include <sstream>
26 #include <cstring>
27 #include <cctype>
28 #include <iterator>
29
30 extern "C" {
31 #include <fnmatch.h>
32 }
33
34 using namespace std;
35
36 // ------------------------------------------------------------------------
37
38
39
40 parser::parser (systemtap_session& s, istream& i, bool p):
41 session (s),
42 input_name ("<input>"), free_input (0),
43 input (i, input_name, s), privileged (p),
44 context(con_unknown), last_t (0), next_t (0), num_errors (0)
45 { }
46
47 parser::parser (systemtap_session& s, const string& fn, bool p):
48 session (s),
49 input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
50 input (* free_input, input_name, s), privileged (p),
51 context(con_unknown), last_t (0), next_t (0), num_errors (0)
52 { }
53
54 parser::~parser()
55 {
56 if (free_input) delete free_input;
57 }
58
59
60 stapfile*
61 parser::parse (systemtap_session& s, std::istream& i, bool pr)
62 {
63 parser p (s, i, pr);
64 return p.parse ();
65 }
66
67
68 stapfile*
69 parser::parse (systemtap_session& s, const std::string& n, bool pr)
70 {
71 parser p (s, n, pr);
72 return p.parse ();
73 }
74
75 static string
76 tt2str(token_type tt)
77 {
78 switch (tt)
79 {
80 case tok_junk: return "junk";
81 case tok_identifier: return "identifier";
82 case tok_operator: return "operator";
83 case tok_string: return "string";
84 case tok_number: return "number";
85 case tok_embedded: return "embedded-code";
86 case tok_keyword: return "keyword";
87 }
88 return "unknown token";
89 }
90
91 ostream&
92 operator << (ostream& o, const source_loc& loc)
93 {
94 o << loc.file << ":"
95 << loc.line << ":"
96 << loc.column;
97
98 return o;
99 }
100
101 ostream&
102 operator << (ostream& o, const token& t)
103 {
104 o << tt2str(t.type);
105
106 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
107 {
108 o << " '";
109 for (unsigned i=0; i<t.content.length(); i++)
110 {
111 char c = t.content[i];
112 o << (isprint (c) ? c : '?');
113 }
114 o << "'";
115 }
116
117 o << " at "
118 << t.location;
119
120 return o;
121 }
122
123
124 void
125 parser::print_error (const parse_error &pe)
126 {
127 string align_parse_error (" ");
128 cerr << "parse error: " << pe.what () << endl;
129
130 if (pe.tok)
131 {
132 cerr << "\tat: " << *pe.tok << endl;
133 session.print_error_source (cerr, align_parse_error, pe.tok);
134 }
135 else
136 {
137 const token* t = last_t;
138 if (t)
139 {
140 cerr << "\tsaw: " << *t << endl;
141 session.print_error_source (cerr, align_parse_error, t);
142 }
143 else
144 cerr << "\tsaw: " << input_name << " EOF" << endl;
145 }
146
147 // XXX: make it possible to print the last input line,
148 // so as to line up an arrow with the specific error column
149
150 num_errors ++;
151 }
152
153
154 const token*
155 parser::last ()
156 {
157 return last_t;
158 }
159
160
161
162 template <typename OPERAND>
163 bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
164 {
165 if (op->type == tok_operator && op->content == "<=")
166 { return lhs <= rhs; }
167 else if (op->type == tok_operator && op->content == ">=")
168 { return lhs >= rhs; }
169 else if (op->type == tok_operator && op->content == "<")
170 { return lhs < rhs; }
171 else if (op->type == tok_operator && op->content == ">")
172 { return lhs > rhs; }
173 else if (op->type == tok_operator && op->content == "==")
174 { return lhs == rhs; }
175 else if (op->type == tok_operator && op->content == "!=")
176 { return lhs != rhs; }
177 else
178 throw parse_error ("expected comparison operator", op);
179 }
180
181
182 // Here, we perform on-the-fly preprocessing.
183 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
184 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
185 // or: arch COMPARISON-OP "arch-string"
186 // or: "string1" COMPARISON-OP "string2"
187 // or: number1 COMPARISON-OP number2
188 // The %: ELSE-TOKENS part is optional.
189 //
190 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
191 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
192 //
193 // Up to an entire %( ... %) expression is processed by a single call
194 // to this function. Tokens included by any nested conditions are
195 // enqueued in a private vector.
196
197 bool eval_pp_conditional (systemtap_session& s,
198 const token* l, const token* op, const token* r)
199 {
200 if (l->type == tok_identifier && (l->content == "kernel_v" ||
201 l->content == "kernel_vr"))
202 {
203 string target_kernel_vr = s.kernel_release;
204 string target_kernel_v = s.kernel_base_release;
205
206 if (! (r->type == tok_string))
207 throw parse_error ("expected string literal", r);
208
209 string target = (l->content == "kernel_vr" ?
210 target_kernel_vr.c_str() :
211 target_kernel_v.c_str());
212 string query = r->content;
213 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
214
215 // collect acceptable strverscmp results.
216 int rvc_ok1, rvc_ok2;
217 bool wc_ok = false;
218 if (op->type == tok_operator && op->content == "<=")
219 { rvc_ok1 = -1; rvc_ok2 = 0; }
220 else if (op->type == tok_operator && op->content == ">=")
221 { rvc_ok1 = 1; rvc_ok2 = 0; }
222 else if (op->type == tok_operator && op->content == "<")
223 { rvc_ok1 = -1; rvc_ok2 = -1; }
224 else if (op->type == tok_operator && op->content == ">")
225 { rvc_ok1 = 1; rvc_ok2 = 1; }
226 else if (op->type == tok_operator && op->content == "==")
227 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
228 else if (op->type == tok_operator && op->content == "!=")
229 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
230 else
231 throw parse_error ("expected comparison operator", op);
232
233 if ((!wc_ok) && rhs_wildcard)
234 throw parse_error ("wildcard not allowed with order comparison operators", op);
235
236 if (rhs_wildcard)
237 {
238 int rvc_result = fnmatch (query.c_str(), target.c_str(),
239 FNM_NOESCAPE); // spooky
240 bool badness = (rvc_result == 0) ^ (op->content == "==");
241 return !badness;
242 }
243 else
244 {
245 int rvc_result = strverscmp (target.c_str(), query.c_str());
246 // normalize rvc_result
247 if (rvc_result < 0) rvc_result = -1;
248 if (rvc_result > 0) rvc_result = 1;
249 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
250 }
251 }
252 else if (l->type == tok_identifier && l->content == "arch")
253 {
254 string target_architecture = s.architecture;
255 if (! (r->type == tok_string))
256 throw parse_error ("expected string literal", r);
257 string query_architecture = r->content;
258
259 int nomatch = fnmatch (query_architecture.c_str(),
260 target_architecture.c_str(),
261 FNM_NOESCAPE); // still spooky
262
263 bool result;
264 if (op->type == tok_operator && op->content == "==")
265 result = !nomatch;
266 else if (op->type == tok_operator && op->content == "!=")
267 result = nomatch;
268 else
269 throw parse_error ("expected '==' or '!='", op);
270
271 return result;
272 }
273 else if (l->type == tok_string && r->type == tok_string)
274 {
275 string lhs = l->content;
276 string rhs = r->content;
277 return eval_comparison (lhs, op, rhs);
278 // NB: no wildcarding option here
279 }
280 else if (l->type == tok_number && r->type == tok_number)
281 {
282 int64_t lhs = lex_cast<int64_t>(l->content);
283 int64_t rhs = lex_cast<int64_t>(r->content);
284 return eval_comparison (lhs, op, rhs);
285 // NB: no wildcarding option here
286 }
287 else if (l->type == tok_string && r->type == tok_number
288 && op->type == tok_operator)
289 throw parse_error ("expected string literal as right value", r);
290 else if (l->type == tok_number && r->type == tok_string
291 && op->type == tok_operator)
292 throw parse_error ("expected number literal as right value", r);
293
294 // XXX: support other forms? "CONFIG_SMP" ?
295
296 else
297 throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr'\n"
298 " or comparison between strings or integers", l);
299 }
300
301
302 // Only tokens corresponding to the TRUE statement must be expanded
303 const token*
304 parser::scan_pp (bool wildcard)
305 {
306 while (true)
307 {
308 if (enqueued_pp.size() > 0)
309 {
310 const token* t = enqueued_pp[0];
311 enqueued_pp.erase (enqueued_pp.begin());
312 return t;
313 }
314
315 const token* t = input.scan (wildcard); // NB: not recursive!
316 if (t == 0) // EOF
317 return t;
318
319 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
320 return t;
321
322 // We have a %( - it's time to throw a preprocessing party!
323
324 const token *l, *op, *r;
325 l = input.scan (false); // NB: not recursive, though perhaps could be
326 op = input.scan (false);
327 r = input.scan (false);
328 if (l == 0 || op == 0 || r == 0)
329 throw parse_error ("incomplete condition after '%('", t);
330 // NB: consider generalizing to consume all tokens until %?, and
331 // passing that as a vector to an evaluator.
332
333 // Do not evaluate the condition if we haven't expanded everything.
334 // This may occur when having several recursive conditionals.
335 bool result = eval_pp_conditional (session, l, op, r);
336 delete l;
337 delete op;
338 delete r;
339
340 /*
341 clog << "PP eval (" << *t << ") == " << result << endl;
342 */
343
344 const token *m = input.scan (); // NB: not recursive
345 if (! (m && m->type == tok_operator && m->content == "%?"))
346 throw parse_error ("expected '%?' marker for conditional", t);
347 delete m; // "%?"
348
349 vector<const token*> my_enqueued_pp;
350
351 int nesting = 0;
352 while (true) // consume THEN tokens
353 {
354 try
355 {
356 m = result ? scan_pp (wildcard) : input.scan (wildcard);
357 }
358 catch (const parse_error &e)
359 {
360 if (result) throw e; // propagate errors if THEN branch taken
361 continue;
362 }
363
364 if (m && m->type == tok_operator && m->content == "%(") // nested %(
365 nesting ++;
366 if (nesting == 0 && m && (m->type == tok_operator && (m->content == "%:" || // ELSE
367 m->content == "%)"))) // END
368 break;
369 if (nesting && m && m->type == tok_operator && m->content == "%)") // nested %)
370 nesting --;
371
372 if (!m)
373 throw parse_error ("incomplete conditional - missing '%:' or '%)'", t);
374 if (result)
375 my_enqueued_pp.push_back (m);
376 if (!result)
377 delete m; // do nothing, just dispose of unkept THEN token
378
379 continue;
380 }
381
382 if (m && m->type == tok_operator && m->content == "%:") // ELSE
383 {
384 delete m; // "%:"
385 int nesting = 0;
386 while (true)
387 {
388 try
389 {
390 m = result ? input.scan (wildcard) : scan_pp (wildcard);
391 }
392 catch (const parse_error& e)
393 {
394 if (!result) throw e; // propagate errors if ELSE branch taken
395 continue;
396 }
397
398 if (m && m->type == tok_operator && m->content == "%(") // nested %(
399 nesting ++;
400 if (nesting == 0 && m && m->type == tok_operator && m->content == "%)") // END
401 break;
402 if (nesting && m && m->type == tok_operator && m->content == "%)") // nested %)
403 nesting --;
404
405 if (!m)
406 throw parse_error ("incomplete conditional - missing %)", t);
407 if (!result)
408 my_enqueued_pp.push_back (m);
409 if (result)
410 delete m; // do nothing, just dispose of unkept ELSE token
411
412 continue;
413 }
414 }
415
416 /*
417 clog << "PP eval (" << *t << ") == " << result << " tokens: " << endl;
418 for (unsigned k=0; k<my_enqueued_pp.size(); k++)
419 clog << * my_enqueued_pp[k] << endl;
420 clog << endl;
421 */
422
423 delete t; // "%("
424 delete m; // "%)"
425
426
427 // NB: we transcribe the retained tokens here, and not inside
428 // the THEN/ELSE while loops. If it were done there, each loop
429 // would become infinite (each iteration consuming an ordinary
430 // token the previous one just pushed there). Guess how I
431 // figured that out.
432 enqueued_pp.insert (enqueued_pp.end(),
433 my_enqueued_pp.begin(),
434 my_enqueued_pp.end());
435
436 // Go back to outermost while(true) loop. We hope that at least
437 // some THEN or ELSE tokens were enqueued. If not, around we go
438 // again, until EOF.
439 }
440 }
441
442
443 const token*
444 parser::next (bool wildcard)
445 {
446 if (! next_t)
447 next_t = scan_pp (wildcard);
448 if (! next_t)
449 throw parse_error ("unexpected end-of-file");
450
451 last_t = next_t;
452 // advance by zeroing next_t
453 next_t = 0;
454 return last_t;
455 }
456
457
458 const token*
459 parser::peek (bool wildcard)
460 {
461 if (! next_t)
462 next_t = scan_pp (wildcard);
463
464 // don't advance by zeroing next_t
465 last_t = next_t;
466 return next_t;
467 }
468
469
470 static inline bool
471 tok_is(token const * t, token_type tt, string const & expected)
472 {
473 return t && t->type == tt && t->content == expected;
474 }
475
476
477 const token*
478 parser::expect_known (token_type tt, string const & expected)
479 {
480 const token *t = next();
481 if (! (t && t->type == tt && t->content == expected))
482 throw parse_error ("expected '" + expected + "'");
483 return t;
484 }
485
486
487 const token*
488 parser::expect_unknown (token_type tt, string & target)
489 {
490 const token *t = next();
491 if (!(t && t->type == tt))
492 throw parse_error ("expected " + tt2str(tt));
493 target = t->content;
494 return t;
495 }
496
497
498 const token*
499 parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
500 {
501 const token *t = next();
502 if (!(t && (t->type == tt1 || t->type == tt2)))
503 throw parse_error ("expected " + tt2str(tt1) + " or " + tt2str(tt2));
504 target = t->content;
505 return t;
506 }
507
508
509 const token*
510 parser::expect_op (std::string const & expected)
511 {
512 return expect_known (tok_operator, expected);
513 }
514
515
516 const token*
517 parser::expect_kw (std::string const & expected)
518 {
519 return expect_known (tok_identifier, expected);
520 }
521
522 const token*
523 parser::expect_number (int64_t & value)
524 {
525 bool neg = false;
526 const token *t = next();
527 if (t->type == tok_operator && t->content == "-")
528 {
529 neg = true;
530 t = next ();
531 }
532 if (!(t && t->type == tok_number))
533 throw parse_error ("expected number");
534
535 const char* startp = t->content.c_str ();
536 char* endp = (char*) startp;
537
538 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
539 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
540 // since the lexer only gives us positive digit strings, but we'll
541 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
542 errno = 0;
543 value = (int64_t) strtoull (startp, & endp, 0);
544 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
545 || (neg && (unsigned long long) value > 9223372036854775808ULL)
546 || (unsigned long long) value > 18446744073709551615ULL
547 || value < -9223372036854775807LL-1)
548 throw parse_error ("number invalid or out of range");
549
550 if (neg)
551 value = -value;
552
553 return t;
554 }
555
556
557 const token*
558 parser::expect_ident (std::string & target)
559 {
560 return expect_unknown (tok_identifier, target);
561 }
562
563
564 const token*
565 parser::expect_ident_or_keyword (std::string & target)
566 {
567 return expect_unknown2 (tok_identifier, tok_keyword, target);
568 }
569
570
571 bool
572 parser::peek_op (std::string const & op)
573 {
574 return tok_is (peek(), tok_operator, op);
575 }
576
577
578 bool
579 parser::peek_kw (std::string const & kw)
580 {
581 return tok_is (peek(), tok_identifier, kw);
582 }
583
584
585
586 lexer::lexer (istream& i, const string& in, systemtap_session& s):
587 input (i), input_name (in), input_contents (""),
588 input_pointer (0), cursor_suspend_count(0),
589 cursor_line (1), cursor_column (1), session(s),
590 current_file (0)
591 {
592 char c;
593 while(input.get(c))
594 input_contents.push_back(c);
595 }
596
597 std::string
598 lexer::get_input_contents ()
599 {
600 return input_contents;
601 }
602
603 void
604 lexer::set_current_file (stapfile* f)
605 {
606 current_file = f;
607 }
608
609 int
610 lexer::input_peek (unsigned n)
611 {
612 if (input_contents.size() > (input_pointer + n))
613 return (int)(unsigned char)input_contents[input_pointer+n];
614 else
615 return -1;
616 }
617
618
619 int
620 lexer::input_get ()
621 {
622 int c = input_peek (0);
623 input_pointer ++;
624
625 if (c < 0) return c; // EOF
626
627 if (cursor_suspend_count)
628 // Track effect of input_put: preserve previous cursor/line_column
629 // until all of its characters are consumed.
630 cursor_suspend_count --;
631 else
632 {
633 // update source cursor
634 if (c == '\n')
635 {
636 cursor_line ++;
637 cursor_column = 1;
638 }
639 else
640 cursor_column ++;
641 }
642
643 // clog << "[" << (char)c << "]";
644 return c;
645 }
646
647
648 void
649 lexer::input_put (const string& chars)
650 {
651 // clog << "[put:" << chars << " @" << input_pointer << "]";
652 input_contents.insert (input_contents.begin() + input_pointer, chars.begin(), chars.end());
653 cursor_suspend_count += chars.size();
654 }
655
656
657 token*
658 lexer::scan (bool wildcard)
659 {
660 token* n = new token;
661 n->location.file = input_name;
662 if (current_file)
663 n->location.stap_file = current_file;
664
665 unsigned semiskipped_p = 0;
666
667 skip:
668 n->location.line = cursor_line;
669 n->location.column = cursor_column;
670
671 semiskip:
672 if (semiskipped_p > 1)
673 {
674 input_get ();
675 throw parse_error ("invalid nested substitution of command line arguments");
676 }
677
678 int c = input_get();
679 int c2 = input_peek ();
680 // clog << "{" << (char)c << (char)c2 << "}";
681 if (c < 0)
682 {
683 delete n;
684 return 0;
685 }
686
687 if (isspace (c))
688 goto skip;
689
690 // Paste command line arguments as character streams into
691 // the beginning of a token. $1..$999 go through as raw
692 // characters; @1..@999 are quoted/escaped as strings.
693 // $# and @# expand to the number of arguments, similarly
694 // raw or quoted.
695 if ((c == '$' || c == '@') &&
696 (c2 == '#'))
697 {
698 input_get(); // swallow '#'
699 stringstream converter;
700 converter << session.args.size ();
701 if (c == '$') input_put (converter.str());
702 else input_put (lex_cast_qstring (converter.str()));
703 semiskipped_p ++;
704 goto semiskip;
705 }
706 else if ((c == '$' || c == '@') &&
707 (isdigit (c2)))
708 {
709 unsigned idx = 0;
710 do
711 {
712 input_get ();
713 idx = (idx * 10) + (c2 - '0');
714 c2 = input_peek ();
715 } while (c2 > 0 &&
716 isdigit (c2) &&
717 idx <= session.args.size()); // prevent overflow
718 if (idx == 0 ||
719 idx-1 >= session.args.size())
720 throw parse_error ("command line argument index " + lex_cast<string>(idx)
721 + " out of range [1-" + lex_cast<string>(session.args.size()) + "]", n);
722
723 string arg = session.args[idx-1];
724 if (c == '$') input_put (arg);
725 else input_put (lex_cast_qstring (arg));
726 semiskipped_p ++;
727 goto semiskip;
728 }
729
730 else if (isalpha (c) || c == '$' || c == '@' || c == '_' ||
731 (wildcard && c == '*'))
732 {
733 n->type = tok_identifier;
734 n->content = (char) c;
735 while (isalnum (c2) || c2 == '_' || c2 == '$' ||
736 (wildcard && c2 == '*'))
737 {
738 input_get ();
739 n->content.push_back (c2);
740 c2 = input_peek ();
741 }
742
743 if (n->content == "probe"
744 || n->content == "global"
745 || n->content == "function"
746 || n->content == "if"
747 || n->content == "else"
748 || n->content == "for"
749 || n->content == "foreach"
750 || n->content == "in"
751 || n->content == "limit"
752 || n->content == "return"
753 || n->content == "delete"
754 || n->content == "while"
755 || n->content == "break"
756 || n->content == "continue"
757 || n->content == "next"
758 || n->content == "string"
759 || n->content == "long")
760 n->type = tok_keyword;
761
762 return n;
763 }
764
765 else if (isdigit (c)) // positive literal
766 {
767 n->type = tok_number;
768 n->content = (char) c;
769
770 while (1)
771 {
772 int c2 = input_peek ();
773 if (c2 < 0)
774 break;
775
776 // NB: isalnum is very permissive. We rely on strtol, called in
777 // parser::parse_literal below, to confirm that the number string
778 // is correctly formatted and in range.
779
780 if (isalnum (c2))
781 {
782 n->content.push_back (c2);
783 input_get ();
784 }
785 else
786 break;
787 }
788 return n;
789 }
790
791 else if (c == '\"')
792 {
793 n->type = tok_string;
794 while (1)
795 {
796 c = input_get ();
797
798 if (c < 0 || c == '\n')
799 {
800 n->type = tok_junk;
801 break;
802 }
803 if (c == '\"') // closing double-quotes
804 break;
805 else if (c == '\\') // see also input_put
806 {
807 c = input_get ();
808 switch (c)
809 {
810 case 'a':
811 case 'b':
812 case 't':
813 case 'n':
814 case 'v':
815 case 'f':
816 case 'r':
817 case '0' ... '7': // NB: need only match the first digit
818 case '\\':
819 // Pass these escapes through to the string value
820 // being parsed; it will be emitted into a C literal.
821
822 n->content.push_back('\\');
823
824 // fall through
825 default:
826 n->content.push_back(c);
827 break;
828 }
829 }
830 else
831 n->content.push_back(c);
832 }
833 return n;
834 }
835
836 else if (ispunct (c))
837 {
838 int c2 = input_peek ();
839 int c3 = input_peek (1);
840 string s1 = string("") + (char) c;
841 string s2 = (c2 > 0 ? s1 + (char) c2 : s1);
842 string s3 = (c3 > 0 ? s2 + (char) c3 : s2);
843
844 // NB: if we were to recognize negative numeric literals here,
845 // we'd introduce another grammar ambiguity:
846 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
847 // instead of tok_number(1) tok_operator('-') tok_number(1)
848
849 if (s1 == "#") // shell comment
850 {
851 unsigned this_line = cursor_line;
852 do { c = input_get (); }
853 while (c >= 0 && cursor_line == this_line);
854 goto skip;
855 }
856 else if (s2 == "//") // C++ comment
857 {
858 unsigned this_line = cursor_line;
859 do { c = input_get (); }
860 while (c >= 0 && cursor_line == this_line);
861 goto skip;
862 }
863 else if (c == '/' && c2 == '*') // C comment
864 {
865 c2 = input_get ();
866 unsigned chars = 0;
867 while (c2 >= 0)
868 {
869 chars ++; // track this to prevent "/*/" from being accepted
870 c = c2;
871 c2 = input_get ();
872 if (chars > 1 && c == '*' && c2 == '/')
873 break;
874 }
875 goto skip;
876 }
877 else if (c == '%' && c2 == '{') // embedded code
878 {
879 n->type = tok_embedded;
880 (void) input_get (); // swallow '{' already in c2
881 while (true)
882 {
883 c = input_get ();
884 if (c < 0) // EOF
885 {
886 n->type = tok_junk;
887 break;
888 }
889 if (c == '%')
890 {
891 c2 = input_peek ();
892 if (c2 == '}')
893 {
894 (void) input_get (); // swallow '}' too
895 break;
896 }
897 }
898 n->content += c;
899 }
900 return n;
901 }
902
903 // We're committed to recognizing at least the first character
904 // as an operator.
905 n->type = tok_operator;
906
907 // match all valid operators, in decreasing size order
908 if (s3 == "<<<" ||
909 s3 == "<<=" ||
910 s3 == ">>=")
911 {
912 n->content = s3;
913 input_get (); input_get (); // swallow other two characters
914 }
915 else if (s2 == "==" ||
916 s2 == "!=" ||
917 s2 == "<=" ||
918 s2 == ">=" ||
919 s2 == "+=" ||
920 s2 == "-=" ||
921 s2 == "*=" ||
922 s2 == "/=" ||
923 s2 == "%=" ||
924 s2 == "&=" ||
925 s2 == "^=" ||
926 s2 == "|=" ||
927 s2 == ".=" ||
928 s2 == "&&" ||
929 s2 == "||" ||
930 s2 == "++" ||
931 s2 == "--" ||
932 s2 == "->" ||
933 s2 == "<<" ||
934 s2 == ">>" ||
935 // preprocessor tokens
936 s2 == "%(" ||
937 s2 == "%?" ||
938 s2 == "%:" ||
939 s2 == "%)")
940 {
941 n->content = s2;
942 input_get (); // swallow other character
943 }
944 else
945 {
946 n->content = s1;
947 }
948
949 return n;
950 }
951
952 else
953 {
954 n->type = tok_junk;
955 n->content = (char) c;
956 return n;
957 }
958 }
959
960
961 // ------------------------------------------------------------------------
962
963 stapfile*
964 parser::parse ()
965 {
966 stapfile* f = new stapfile;
967 input.set_current_file (f);
968 f->file_contents = input.get_input_contents ();
969 f->name = input_name;
970
971 bool empty = true;
972
973 while (1)
974 {
975 try
976 {
977 const token* t = peek ();
978 if (! t) // nice clean EOF
979 break;
980
981 empty = false;
982 if (t->type == tok_keyword && t->content == "probe")
983 {
984 context = con_probe;
985 parse_probe (f->probes, f->aliases);
986 }
987 else if (t->type == tok_keyword && t->content == "global")
988 {
989 context = con_global;
990 parse_global (f->globals, f->probes);
991 }
992 else if (t->type == tok_keyword && t->content == "function")
993 {
994 context = con_function;
995 parse_functiondecl (f->functions);
996 }
997 else if (t->type == tok_embedded)
998 {
999 context = con_embedded;
1000 f->embeds.push_back (parse_embeddedcode ());
1001 }
1002 else
1003 {
1004 context = con_unknown;
1005 throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
1006 }
1007 }
1008 catch (parse_error& pe)
1009 {
1010 print_error (pe);
1011 if (pe.skip_some) // for recovery
1012 try
1013 {
1014 // Quietly swallow all tokens until the next '}'.
1015 while (1)
1016 {
1017 const token* t = peek ();
1018 if (! t)
1019 break;
1020 next ();
1021 if (t->type == tok_operator && t->content == "}")
1022 break;
1023 }
1024 }
1025 catch (parse_error& pe2)
1026 {
1027 // parse error during recovery ... ugh
1028 print_error (pe2);
1029 }
1030 }
1031 }
1032
1033 if (empty)
1034 {
1035 cerr << "Input file '" << input_name << "' is empty or missing." << endl;
1036 delete f;
1037 input.set_current_file (0);
1038 return 0;
1039 }
1040 else if (num_errors > 0)
1041 {
1042 cerr << num_errors << " parse error(s)." << endl;
1043 delete f;
1044 input.set_current_file (0);
1045 return 0;
1046 }
1047
1048 input.set_current_file (0);
1049 return f;
1050 }
1051
1052
1053 void
1054 parser::parse_probe (std::vector<probe *> & probe_ret,
1055 std::vector<probe_alias *> & alias_ret)
1056 {
1057 const token* t0 = next ();
1058 if (! (t0->type == tok_keyword && t0->content == "probe"))
1059 throw parse_error ("expected 'probe'");
1060
1061 vector<probe_point *> aliases;
1062 vector<probe_point *> locations;
1063
1064 bool equals_ok = true;
1065
1066 int epilogue_alias = 0;
1067
1068 while (1)
1069 {
1070 probe_point * pp = parse_probe_point ();
1071
1072 const token* t = peek ();
1073 if (equals_ok && t
1074 && t->type == tok_operator && t->content == "=")
1075 {
1076 if (pp->optional || pp->sufficient)
1077 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->tok);
1078 aliases.push_back(pp);
1079 next ();
1080 continue;
1081 }
1082 else if (equals_ok && t
1083 && t->type == tok_operator && t->content == "+=")
1084 {
1085 if (pp->optional || pp->sufficient)
1086 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->tok);
1087 aliases.push_back(pp);
1088 epilogue_alias = 1;
1089 next ();
1090 continue;
1091 }
1092 else if (t && t->type == tok_operator && t->content == ",")
1093 {
1094 locations.push_back(pp);
1095 equals_ok = false;
1096 next ();
1097 continue;
1098 }
1099 else if (t && t->type == tok_operator && t->content == "{")
1100 {
1101 locations.push_back(pp);
1102 break;
1103 }
1104 else
1105 throw parse_error ("expected probe point specifier");
1106 }
1107
1108 if (aliases.empty())
1109 {
1110 probe* p = new probe;
1111 p->tok = t0;
1112 p->locations = locations;
1113 p->body = parse_stmt_block ();
1114 p->privileged = privileged;
1115 probe_ret.push_back (p);
1116 }
1117 else
1118 {
1119 probe_alias* p = new probe_alias (aliases);
1120 if(epilogue_alias)
1121 p->epilogue_style = true;
1122 else
1123 p->epilogue_style = false;
1124 p->tok = t0;
1125 p->locations = locations;
1126 p->body = parse_stmt_block ();
1127 p->privileged = privileged;
1128 alias_ret.push_back (p);
1129 }
1130 }
1131
1132
1133 embeddedcode*
1134 parser::parse_embeddedcode ()
1135 {
1136 embeddedcode* e = new embeddedcode;
1137 const token* t = next ();
1138 if (t->type != tok_embedded)
1139 throw parse_error ("expected '%{'");
1140
1141 if (! privileged)
1142 throw parse_error ("embedded code in unprivileged script",
1143 false /* don't skip tokens for parse resumption */);
1144
1145 e->tok = t;
1146 e->code = t->content;
1147 return e;
1148 }
1149
1150
1151 block*
1152 parser::parse_stmt_block ()
1153 {
1154 block* pb = new block;
1155
1156 const token* t = next ();
1157 if (! (t->type == tok_operator && t->content == "{"))
1158 throw parse_error ("expected '{'");
1159
1160 pb->tok = t;
1161
1162 while (1)
1163 {
1164 try
1165 {
1166 t = peek ();
1167 if (t && t->type == tok_operator && t->content == "}")
1168 {
1169 next ();
1170 break;
1171 }
1172
1173 pb->statements.push_back (parse_statement ());
1174 }
1175 catch (parse_error& pe)
1176 {
1177 print_error (pe);
1178
1179 // Quietly swallow all tokens until the next ';' or '}'.
1180 while (1)
1181 {
1182 const token* t = peek ();
1183 if (! t) return 0;
1184 next ();
1185 if (t->type == tok_operator
1186 && (t->content == "}" || t->content == ";"))
1187 break;
1188 }
1189 }
1190 }
1191
1192 return pb;
1193 }
1194
1195
1196 statement*
1197 parser::parse_statement ()
1198 {
1199 const token* t = peek ();
1200 if (t && t->type == tok_operator && t->content == ";")
1201 {
1202 null_statement* n = new null_statement ();
1203 n->tok = next ();
1204 return n;
1205 }
1206 else if (t && t->type == tok_operator && t->content == "{")
1207 return parse_stmt_block ();
1208 else if (t && t->type == tok_keyword && t->content == "if")
1209 return parse_if_statement ();
1210 else if (t && t->type == tok_keyword && t->content == "for")
1211 return parse_for_loop ();
1212 else if (t && t->type == tok_keyword && t->content == "foreach")
1213 return parse_foreach_loop ();
1214 else if (t && t->type == tok_keyword && t->content == "return")
1215 return parse_return_statement ();
1216 else if (t && t->type == tok_keyword && t->content == "delete")
1217 return parse_delete_statement ();
1218 else if (t && t->type == tok_keyword && t->content == "while")
1219 return parse_while_loop ();
1220 else if (t && t->type == tok_keyword && t->content == "break")
1221 return parse_break_statement ();
1222 else if (t && t->type == tok_keyword && t->content == "continue")
1223 return parse_continue_statement ();
1224 else if (t && t->type == tok_keyword && t->content == "next")
1225 return parse_next_statement ();
1226 // XXX: "do/while" statement?
1227 else if (t && (t->type == tok_operator || // expressions are flexible
1228 t->type == tok_identifier ||
1229 t->type == tok_number ||
1230 t->type == tok_string))
1231 return parse_expr_statement ();
1232 // XXX: consider generally accepting tok_embedded here too
1233 else
1234 throw parse_error ("expected statement");
1235 }
1236
1237
1238 void
1239 parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
1240 {
1241 const token* t0 = next ();
1242 if (! (t0->type == tok_keyword && t0->content == "global"))
1243 throw parse_error ("expected 'global'");
1244
1245 while (1)
1246 {
1247 const token* t = next ();
1248 if (! (t->type == tok_identifier))
1249 throw parse_error ("expected identifier");
1250
1251 for (unsigned i=0; i<globals.size(); i++)
1252 if (globals[i]->name == t->content)
1253 throw parse_error ("duplicate global name");
1254
1255 vardecl* d = new vardecl;
1256 d->name = t->content;
1257 d->tok = t;
1258 globals.push_back (d);
1259
1260 t = peek ();
1261
1262 if (t && t->type == tok_operator && t->content == "[") // array size
1263 {
1264 int64_t size;
1265 next ();
1266 expect_number(size);
1267 if (size <= 0 || size > 1000000) // arbitrary max
1268 throw parse_error("array size out of range");
1269 d->maxsize = (int)size;
1270 expect_known(tok_operator, "]");
1271 t = peek ();
1272 }
1273
1274 if (t && t->type == tok_operator && t->content == "=") // initialization
1275 {
1276 if (!d->compatible_arity(0))
1277 throw parse_error("only scalar globals can be initialized");
1278 d->set_arity(0);
1279 next ();
1280 d->init = parse_literal ();
1281 d->type = d->init->type;
1282 t = peek ();
1283 }
1284
1285 if (t && t->type == tok_operator && t->content == ";") // termination
1286 next();
1287
1288 if (t && t->type == tok_operator && t->content == ",") // next global
1289 {
1290 next ();
1291 continue;
1292 }
1293 else
1294 break;
1295 }
1296 }
1297
1298
1299 void
1300 parser::parse_functiondecl (std::vector<functiondecl*>& functions)
1301 {
1302 const token* t = next ();
1303 if (! (t->type == tok_keyword && t->content == "function"))
1304 throw parse_error ("expected 'function'");
1305
1306
1307 t = next ();
1308 if (! (t->type == tok_identifier)
1309 && ! (t->type == tok_keyword
1310 && (t->content == "string" || t->content == "long")))
1311 throw parse_error ("expected identifier");
1312
1313 for (unsigned i=0; i<functions.size(); i++)
1314 if (functions[i]->name == t->content)
1315 throw parse_error ("duplicate function name");
1316
1317 functiondecl *fd = new functiondecl ();
1318 fd->name = t->content;
1319 fd->tok = t;
1320
1321 t = next ();
1322 if (t->type == tok_operator && t->content == ":")
1323 {
1324 t = next ();
1325 if (t->type == tok_keyword && t->content == "string")
1326 fd->type = pe_string;
1327 else if (t->type == tok_keyword && t->content == "long")
1328 fd->type = pe_long;
1329 else throw parse_error ("expected 'string' or 'long'");
1330
1331 t = next ();
1332 }
1333
1334 if (! (t->type == tok_operator && t->content == "("))
1335 throw parse_error ("expected '('");
1336
1337 while (1)
1338 {
1339 t = next ();
1340
1341 // permit zero-argument fuctions
1342 if (t->type == tok_operator && t->content == ")")
1343 break;
1344 else if (! (t->type == tok_identifier))
1345 throw parse_error ("expected identifier");
1346 vardecl* vd = new vardecl;
1347 vd->name = t->content;
1348 vd->tok = t;
1349 fd->formal_args.push_back (vd);
1350
1351 t = next ();
1352 if (t->type == tok_operator && t->content == ":")
1353 {
1354 t = next ();
1355 if (t->type == tok_keyword && t->content == "string")
1356 vd->type = pe_string;
1357 else if (t->type == tok_keyword && t->content == "long")
1358 vd->type = pe_long;
1359 else throw parse_error ("expected 'string' or 'long'");
1360
1361 t = next ();
1362 }
1363 if (t->type == tok_operator && t->content == ")")
1364 break;
1365 if (t->type == tok_operator && t->content == ",")
1366 continue;
1367 else
1368 throw parse_error ("expected ',' or ')'");
1369 }
1370
1371 t = peek ();
1372 if (t && t->type == tok_embedded)
1373 fd->body = parse_embeddedcode ();
1374 else
1375 fd->body = parse_stmt_block ();
1376
1377 functions.push_back (fd);
1378 }
1379
1380
1381 probe_point*
1382 parser::parse_probe_point ()
1383 {
1384 probe_point* pl = new probe_point;
1385
1386 while (1)
1387 {
1388 const token* t = next (true); // wildcard scanning here
1389 if (! (t->type == tok_identifier
1390 // we must allow ".return" and ".function", which are keywords
1391 || t->type == tok_keyword))
1392 throw parse_error ("expected identifier or '*'");
1393
1394 if (pl->tok == 0) pl->tok = t;
1395
1396 probe_point::component* c = new probe_point::component;
1397 c->functor = t->content;
1398 pl->components.push_back (c);
1399 // NB we may add c->arg soon
1400
1401 t = peek ();
1402
1403 // consume optional parameter
1404 if (t && t->type == tok_operator && t->content == "(")
1405 {
1406 next (); // consume "("
1407 c->arg = parse_literal ();
1408
1409 t = next ();
1410 if (! (t->type == tok_operator && t->content == ")"))
1411 throw parse_error ("expected ')'");
1412
1413 t = peek ();
1414 }
1415
1416 if (t && t->type == tok_operator && t->content == ".")
1417 {
1418 next ();
1419 continue;
1420 }
1421
1422 // We only fall through here at the end of a probe point (past
1423 // all the dotted/parametrized components).
1424
1425 if (t && t->type == tok_operator &&
1426 (t->content == "?" || t->content == "!"))
1427 {
1428 pl->optional = true;
1429 if (t->content == "!") pl->sufficient = true;
1430 // NB: sufficient implies optional
1431 next ();
1432 t = peek ();
1433 // fall through
1434 }
1435
1436 if (t && t->type == tok_keyword && t->content == "if")
1437 {
1438 next ();
1439 t = peek ();
1440 if (t && ! (t->type == tok_operator && t->content == "("))
1441 throw parse_error ("expected '('");
1442 next ();
1443
1444 pl->condition = parse_expression ();
1445
1446 t = peek ();
1447 if (t && ! (t->type == tok_operator && t->content == ")"))
1448 throw parse_error ("expected ')'");
1449 next ();
1450 t = peek ();
1451 // fall through
1452 }
1453
1454 if (t && t->type == tok_operator
1455 && (t->content == "{" || t->content == "," ||
1456 t->content == "=" || t->content == "+=" ))
1457 break;
1458
1459 throw parse_error ("expected one of '. , ( ? ! { = +='");
1460 }
1461
1462 return pl;
1463 }
1464
1465
1466 literal*
1467 parser::parse_literal ()
1468 {
1469 const token* t = next ();
1470 literal* l;
1471 if (t->type == tok_string)
1472 l = new literal_string (t->content);
1473 else
1474 {
1475 bool neg = false;
1476 if (t->type == tok_operator && t->content == "-")
1477 {
1478 neg = true;
1479 t = next ();
1480 }
1481
1482 if (t->type == tok_number)
1483 {
1484 const char* startp = t->content.c_str ();
1485 char* endp = (char*) startp;
1486
1487 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1488 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1489 // since the lexer only gives us positive digit strings, but we'll
1490 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1491 errno = 0;
1492 long long value = (long long) strtoull (startp, & endp, 0);
1493 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1494 || (neg && (unsigned long long) value > 9223372036854775808ULL)
1495 || (unsigned long long) value > 18446744073709551615ULL
1496 || value < -9223372036854775807LL-1)
1497 throw parse_error ("number invalid or out of range");
1498
1499 if (neg)
1500 value = -value;
1501
1502 l = new literal_number (value);
1503 }
1504 else
1505 throw parse_error ("expected literal string or number");
1506 }
1507
1508 l->tok = t;
1509 return l;
1510 }
1511
1512
1513 if_statement*
1514 parser::parse_if_statement ()
1515 {
1516 const token* t = next ();
1517 if (! (t->type == tok_keyword && t->content == "if"))
1518 throw parse_error ("expected 'if'");
1519 if_statement* s = new if_statement;
1520 s->tok = t;
1521
1522 t = next ();
1523 if (! (t->type == tok_operator && t->content == "("))
1524 throw parse_error ("expected '('");
1525
1526 s->condition = parse_expression ();
1527
1528 t = next ();
1529 if (! (t->type == tok_operator && t->content == ")"))
1530 throw parse_error ("expected ')'");
1531
1532 s->thenblock = parse_statement ();
1533
1534 t = peek ();
1535 if (t && t->type == tok_keyword && t->content == "else")
1536 {
1537 next ();
1538 s->elseblock = parse_statement ();
1539 }
1540 else
1541 s->elseblock = 0; // in case not otherwise initialized
1542
1543 return s;
1544 }
1545
1546
1547 expr_statement*
1548 parser::parse_expr_statement ()
1549 {
1550 expr_statement *es = new expr_statement;
1551 const token* t = peek ();
1552 es->tok = t;
1553 es->value = parse_expression ();
1554 return es;
1555 }
1556
1557
1558 return_statement*
1559 parser::parse_return_statement ()
1560 {
1561 const token* t = next ();
1562 if (! (t->type == tok_keyword && t->content == "return"))
1563 throw parse_error ("expected 'return'");
1564 if (context != con_function)
1565 throw parse_error ("found 'return' not in function context");
1566 return_statement* s = new return_statement;
1567 s->tok = t;
1568 s->value = parse_expression ();
1569 return s;
1570 }
1571
1572
1573 delete_statement*
1574 parser::parse_delete_statement ()
1575 {
1576 const token* t = next ();
1577 if (! (t->type == tok_keyword && t->content == "delete"))
1578 throw parse_error ("expected 'delete'");
1579 delete_statement* s = new delete_statement;
1580 s->tok = t;
1581 s->value = parse_expression ();
1582 return s;
1583 }
1584
1585
1586 next_statement*
1587 parser::parse_next_statement ()
1588 {
1589 const token* t = next ();
1590 if (! (t->type == tok_keyword && t->content == "next"))
1591 throw parse_error ("expected 'next'");
1592 if (context != con_probe)
1593 throw parse_error ("found 'next' not in probe context");
1594 next_statement* s = new next_statement;
1595 s->tok = t;
1596 return s;
1597 }
1598
1599
1600 break_statement*
1601 parser::parse_break_statement ()
1602 {
1603 const token* t = next ();
1604 if (! (t->type == tok_keyword && t->content == "break"))
1605 throw parse_error ("expected 'break'");
1606 break_statement* s = new break_statement;
1607 s->tok = t;
1608 return s;
1609 }
1610
1611
1612 continue_statement*
1613 parser::parse_continue_statement ()
1614 {
1615 const token* t = next ();
1616 if (! (t->type == tok_keyword && t->content == "continue"))
1617 throw parse_error ("expected 'continue'");
1618 continue_statement* s = new continue_statement;
1619 s->tok = t;
1620 return s;
1621 }
1622
1623
1624 for_loop*
1625 parser::parse_for_loop ()
1626 {
1627 const token* t = next ();
1628 if (! (t->type == tok_keyword && t->content == "for"))
1629 throw parse_error ("expected 'for'");
1630 for_loop* s = new for_loop;
1631 s->tok = t;
1632
1633 t = next ();
1634 if (! (t->type == tok_operator && t->content == "("))
1635 throw parse_error ("expected '('");
1636
1637 // initializer + ";"
1638 t = peek ();
1639 if (t && t->type == tok_operator && t->content == ";")
1640 {
1641 s->init = 0;
1642 next ();
1643 }
1644 else
1645 {
1646 s->init = parse_expr_statement ();
1647 t = next ();
1648 if (! (t->type == tok_operator && t->content == ";"))
1649 throw parse_error ("expected ';'");
1650 }
1651
1652 // condition + ";"
1653 t = peek ();
1654 if (t && t->type == tok_operator && t->content == ";")
1655 {
1656 literal_number* l = new literal_number(1);
1657 s->cond = l;
1658 s->cond->tok = next ();
1659 }
1660 else
1661 {
1662 s->cond = parse_expression ();
1663 t = next ();
1664 if (! (t->type == tok_operator && t->content == ";"))
1665 throw parse_error ("expected ';'");
1666 }
1667
1668 // increment + ")"
1669 t = peek ();
1670 if (t && t->type == tok_operator && t->content == ")")
1671 {
1672 s->incr = 0;
1673 next ();
1674 }
1675 else
1676 {
1677 s->incr = parse_expr_statement ();
1678 t = next ();
1679 if (! (t->type == tok_operator && t->content == ")"))
1680 throw parse_error ("expected ')'");
1681 }
1682
1683 // block
1684 s->block = parse_statement ();
1685
1686 return s;
1687 }
1688
1689
1690 for_loop*
1691 parser::parse_while_loop ()
1692 {
1693 const token* t = next ();
1694 if (! (t->type == tok_keyword && t->content == "while"))
1695 throw parse_error ("expected 'while'");
1696 for_loop* s = new for_loop;
1697 s->tok = t;
1698
1699 t = next ();
1700 if (! (t->type == tok_operator && t->content == "("))
1701 throw parse_error ("expected '('");
1702
1703 // dummy init and incr fields
1704 s->init = 0;
1705 s->incr = 0;
1706
1707 // condition
1708 s->cond = parse_expression ();
1709
1710 t = next ();
1711 if (! (t->type == tok_operator && t->content == ")"))
1712 throw parse_error ("expected ')'");
1713
1714 // block
1715 s->block = parse_statement ();
1716
1717 return s;
1718 }
1719
1720
1721 foreach_loop*
1722 parser::parse_foreach_loop ()
1723 {
1724 const token* t = next ();
1725 if (! (t->type == tok_keyword && t->content == "foreach"))
1726 throw parse_error ("expected 'foreach'");
1727 foreach_loop* s = new foreach_loop;
1728 s->tok = t;
1729 s->sort_direction = 0;
1730 s->limit = NULL;
1731
1732 t = next ();
1733 if (! (t->type == tok_operator && t->content == "("))
1734 throw parse_error ("expected '('");
1735
1736 // see also parse_array_in
1737
1738 bool parenthesized = false;
1739 t = peek ();
1740 if (t && t->type == tok_operator && t->content == "[")
1741 {
1742 next ();
1743 parenthesized = true;
1744 }
1745
1746 while (1)
1747 {
1748 t = next ();
1749 if (! (t->type == tok_identifier))
1750 throw parse_error ("expected identifier");
1751 symbol* sym = new symbol;
1752 sym->tok = t;
1753 sym->name = t->content;
1754 s->indexes.push_back (sym);
1755
1756 t = peek ();
1757 if (t && t->type == tok_operator &&
1758 (t->content == "+" || t->content == "-"))
1759 {
1760 if (s->sort_direction)
1761 throw parse_error ("multiple sort directives");
1762 s->sort_direction = (t->content == "+") ? 1 : -1;
1763 s->sort_column = s->indexes.size();
1764 next();
1765 }
1766
1767 if (parenthesized)
1768 {
1769 t = peek ();
1770 if (t && t->type == tok_operator && t->content == ",")
1771 {
1772 next ();
1773 continue;
1774 }
1775 else if (t && t->type == tok_operator && t->content == "]")
1776 {
1777 next ();
1778 break;
1779 }
1780 else
1781 throw parse_error ("expected ',' or ']'");
1782 }
1783 else
1784 break; // expecting only one expression
1785 }
1786
1787 t = next ();
1788 if (! (t->type == tok_keyword && t->content == "in"))
1789 throw parse_error ("expected 'in'");
1790
1791 s->base = parse_indexable();
1792
1793 t = peek ();
1794 if (t && t->type == tok_operator &&
1795 (t->content == "+" || t->content == "-"))
1796 {
1797 if (s->sort_direction)
1798 throw parse_error ("multiple sort directives");
1799 s->sort_direction = (t->content == "+") ? 1 : -1;
1800 s->sort_column = 0;
1801 next();
1802 }
1803
1804 t = peek ();
1805 if (tok_is(t, tok_keyword, "limit"))
1806 {
1807 next (); // get past the "limit"
1808 s->limit = parse_expression ();
1809 }
1810
1811 t = next ();
1812 if (! (t->type == tok_operator && t->content == ")"))
1813 throw parse_error ("expected ')'");
1814
1815 s->block = parse_statement ();
1816 return s;
1817 }
1818
1819
1820 expression*
1821 parser::parse_expression ()
1822 {
1823 return parse_assignment ();
1824 }
1825
1826
1827 expression*
1828 parser::parse_assignment ()
1829 {
1830 expression* op1 = parse_ternary ();
1831
1832 const token* t = peek ();
1833 // right-associative operators
1834 if (t && t->type == tok_operator
1835 && (t->content == "=" ||
1836 t->content == "<<<" ||
1837 t->content == "+=" ||
1838 t->content == "-=" ||
1839 t->content == "*=" ||
1840 t->content == "/=" ||
1841 t->content == "%=" ||
1842 t->content == "<<=" ||
1843 t->content == ">>=" ||
1844 t->content == "&=" ||
1845 t->content == "^=" ||
1846 t->content == "|=" ||
1847 t->content == ".=" ||
1848 false))
1849 {
1850 // NB: lvalueness is checked during elaboration / translation
1851 assignment* e = new assignment;
1852 e->left = op1;
1853 e->op = t->content;
1854 e->tok = t;
1855 next ();
1856 e->right = parse_expression ();
1857 op1 = e;
1858 }
1859
1860 return op1;
1861 }
1862
1863
1864 expression*
1865 parser::parse_ternary ()
1866 {
1867 expression* op1 = parse_logical_or ();
1868
1869 const token* t = peek ();
1870 if (t && t->type == tok_operator && t->content == "?")
1871 {
1872 ternary_expression* e = new ternary_expression;
1873 e->tok = t;
1874 e->cond = op1;
1875 next ();
1876 e->truevalue = parse_expression (); // XXX
1877
1878 t = next ();
1879 if (! (t->type == tok_operator && t->content == ":"))
1880 throw parse_error ("expected ':'");
1881
1882 e->falsevalue = parse_expression (); // XXX
1883 return e;
1884 }
1885 else
1886 return op1;
1887 }
1888
1889
1890 expression*
1891 parser::parse_logical_or ()
1892 {
1893 expression* op1 = parse_logical_and ();
1894
1895 const token* t = peek ();
1896 while (t && t->type == tok_operator && t->content == "||")
1897 {
1898 logical_or_expr* e = new logical_or_expr;
1899 e->tok = t;
1900 e->op = t->content;
1901 e->left = op1;
1902 next ();
1903 e->right = parse_logical_and ();
1904 op1 = e;
1905 t = peek ();
1906 }
1907
1908 return op1;
1909 }
1910
1911
1912 expression*
1913 parser::parse_logical_and ()
1914 {
1915 expression* op1 = parse_boolean_or ();
1916
1917 const token* t = peek ();
1918 while (t && t->type == tok_operator && t->content == "&&")
1919 {
1920 logical_and_expr *e = new logical_and_expr;
1921 e->left = op1;
1922 e->op = t->content;
1923 e->tok = t;
1924 next ();
1925 e->right = parse_boolean_or ();
1926 op1 = e;
1927 t = peek ();
1928 }
1929
1930 return op1;
1931 }
1932
1933
1934 expression*
1935 parser::parse_boolean_or ()
1936 {
1937 expression* op1 = parse_boolean_xor ();
1938
1939 const token* t = peek ();
1940 while (t && t->type == tok_operator && t->content == "|")
1941 {
1942 binary_expression* e = new binary_expression;
1943 e->left = op1;
1944 e->op = t->content;
1945 e->tok = t;
1946 next ();
1947 e->right = parse_boolean_xor ();
1948 op1 = e;
1949 t = peek ();
1950 }
1951
1952 return op1;
1953 }
1954
1955
1956 expression*
1957 parser::parse_boolean_xor ()
1958 {
1959 expression* op1 = parse_boolean_and ();
1960
1961 const token* t = peek ();
1962 while (t && t->type == tok_operator && t->content == "^")
1963 {
1964 binary_expression* e = new binary_expression;
1965 e->left = op1;
1966 e->op = t->content;
1967 e->tok = t;
1968 next ();
1969 e->right = parse_boolean_and ();
1970 op1 = e;
1971 t = peek ();
1972 }
1973
1974 return op1;
1975 }
1976
1977
1978 expression*
1979 parser::parse_boolean_and ()
1980 {
1981 expression* op1 = parse_array_in ();
1982
1983 const token* t = peek ();
1984 while (t && t->type == tok_operator && t->content == "&")
1985 {
1986 binary_expression* e = new binary_expression;
1987 e->left = op1;
1988 e->op = t->content;
1989 e->tok = t;
1990 next ();
1991 e->right = parse_array_in ();
1992 op1 = e;
1993 t = peek ();
1994 }
1995
1996 return op1;
1997 }
1998
1999
2000 expression*
2001 parser::parse_array_in ()
2002 {
2003 // This is a very tricky case. All these are legit expressions:
2004 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
2005 vector<expression*> indexes;
2006 bool parenthesized = false;
2007
2008 const token* t = peek ();
2009 if (t && t->type == tok_operator && t->content == "[")
2010 {
2011 next ();
2012 parenthesized = true;
2013 }
2014
2015 while (1)
2016 {
2017 expression* op1 = parse_comparison ();
2018 indexes.push_back (op1);
2019
2020 if (parenthesized)
2021 {
2022 const token* t = peek ();
2023 if (t && t->type == tok_operator && t->content == ",")
2024 {
2025 next ();
2026 continue;
2027 }
2028 else if (t && t->type == tok_operator && t->content == "]")
2029 {
2030 next ();
2031 break;
2032 }
2033 else
2034 throw parse_error ("expected ',' or ']'");
2035 }
2036 else
2037 break; // expecting only one expression
2038 }
2039
2040 t = peek ();
2041 if (t && t->type == tok_keyword && t->content == "in")
2042 {
2043 array_in *e = new array_in;
2044 e->tok = t;
2045 next (); // swallow "in"
2046
2047 arrayindex* a = new arrayindex;
2048 a->indexes = indexes;
2049 a->base = parse_indexable();
2050 a->tok = a->base->get_tok();
2051 e->operand = a;
2052 return e;
2053 }
2054 else if (indexes.size() == 1) // no "in" - need one expression only
2055 return indexes[0];
2056 else
2057 throw parse_error ("unexpected comma-separated expression list");
2058 }
2059
2060
2061 expression*
2062 parser::parse_comparison ()
2063 {
2064 expression* op1 = parse_shift ();
2065
2066 const token* t = peek ();
2067 while (t && t->type == tok_operator
2068 && (t->content == ">" ||
2069 t->content == "<" ||
2070 t->content == "==" ||
2071 t->content == "!=" ||
2072 t->content == "<=" ||
2073 t->content == ">="))
2074 {
2075 comparison* e = new comparison;
2076 e->left = op1;
2077 e->op = t->content;
2078 e->tok = t;
2079 next ();
2080 e->right = parse_shift ();
2081 op1 = e;
2082 t = peek ();
2083 }
2084
2085 return op1;
2086 }
2087
2088
2089 expression*
2090 parser::parse_shift ()
2091 {
2092 expression* op1 = parse_concatenation ();
2093
2094 const token* t = peek ();
2095 while (t && t->type == tok_operator &&
2096 (t->content == "<<" || t->content == ">>"))
2097 {
2098 binary_expression* e = new binary_expression;
2099 e->left = op1;
2100 e->op = t->content;
2101 e->tok = t;
2102 next ();
2103 e->right = parse_concatenation ();
2104 op1 = e;
2105 t = peek ();
2106 }
2107
2108 return op1;
2109 }
2110
2111
2112 expression*
2113 parser::parse_concatenation ()
2114 {
2115 expression* op1 = parse_additive ();
2116
2117 const token* t = peek ();
2118 // XXX: the actual awk string-concatenation operator is *whitespace*.
2119 // I don't know how to easily to model that here.
2120 while (t && t->type == tok_operator && t->content == ".")
2121 {
2122 concatenation* e = new concatenation;
2123 e->left = op1;
2124 e->op = t->content;
2125 e->tok = t;
2126 next ();
2127 e->right = parse_additive ();
2128 op1 = e;
2129 t = peek ();
2130 }
2131
2132 return op1;
2133 }
2134
2135
2136 expression*
2137 parser::parse_additive ()
2138 {
2139 expression* op1 = parse_multiplicative ();
2140
2141 const token* t = peek ();
2142 while (t && t->type == tok_operator
2143 && (t->content == "+" || t->content == "-"))
2144 {
2145 binary_expression* e = new binary_expression;
2146 e->op = t->content;
2147 e->left = op1;
2148 e->tok = t;
2149 next ();
2150 e->right = parse_multiplicative ();
2151 op1 = e;
2152 t = peek ();
2153 }
2154
2155 return op1;
2156 }
2157
2158
2159 expression*
2160 parser::parse_multiplicative ()
2161 {
2162 expression* op1 = parse_unary ();
2163
2164 const token* t = peek ();
2165 while (t && t->type == tok_operator
2166 && (t->content == "*" || t->content == "/" || t->content == "%"))
2167 {
2168 binary_expression* e = new binary_expression;
2169 e->op = t->content;
2170 e->left = op1;
2171 e->tok = t;
2172 next ();
2173 e->right = parse_unary ();
2174 op1 = e;
2175 t = peek ();
2176 }
2177
2178 return op1;
2179 }
2180
2181
2182 expression*
2183 parser::parse_unary ()
2184 {
2185 const token* t = peek ();
2186 if (t && t->type == tok_operator
2187 && (t->content == "+" ||
2188 t->content == "-" ||
2189 t->content == "!" ||
2190 t->content == "~" ||
2191 false))
2192 {
2193 unary_expression* e = new unary_expression;
2194 e->op = t->content;
2195 e->tok = t;
2196 next ();
2197 e->operand = parse_crement ();
2198 return e;
2199 }
2200 else
2201 return parse_crement ();
2202 }
2203
2204
2205 expression*
2206 parser::parse_crement () // as in "increment" / "decrement"
2207 {
2208 // NB: Ideally, we'd parse only a symbol as an operand to the
2209 // *crement operators, instead of a general expression value. We'd
2210 // need more complex lookahead code to tell apart the postfix cases.
2211 // So we just punt, and leave it to pass-3 to signal errors on
2212 // cases like "4++".
2213
2214 const token* t = peek ();
2215 if (t && t->type == tok_operator
2216 && (t->content == "++" || t->content == "--"))
2217 {
2218 pre_crement* e = new pre_crement;
2219 e->op = t->content;
2220 e->tok = t;
2221 next ();
2222 e->operand = parse_value ();
2223 return e;
2224 }
2225
2226 // post-crement or non-crement
2227 expression *op1 = parse_value ();
2228
2229 t = peek ();
2230 if (t && t->type == tok_operator
2231 && (t->content == "++" || t->content == "--"))
2232 {
2233 post_crement* e = new post_crement;
2234 e->op = t->content;
2235 e->tok = t;
2236 next ();
2237 e->operand = op1;
2238 return e;
2239 }
2240 else
2241 return op1;
2242 }
2243
2244
2245 expression*
2246 parser::parse_value ()
2247 {
2248 const token* t = peek ();
2249 if (! t)
2250 throw parse_error ("expected value");
2251
2252 if (t->type == tok_operator && t->content == "(")
2253 {
2254 next ();
2255 expression* e = parse_expression ();
2256 t = next ();
2257 if (! (t->type == tok_operator && t->content == ")"))
2258 throw parse_error ("expected ')'");
2259 return e;
2260 }
2261 else if (t->type == tok_identifier)
2262 return parse_symbol ();
2263 else
2264 return parse_literal ();
2265 }
2266
2267
2268 const token *
2269 parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
2270 {
2271 hop = NULL;
2272 const token* t = expect_ident (name);
2273 if (name == "@hist_linear" || name == "@hist_log")
2274 {
2275 hop = new hist_op;
2276 if (name == "@hist_linear")
2277 hop->htype = hist_linear;
2278 else if (name == "@hist_log")
2279 hop->htype = hist_log;
2280 hop->tok = t;
2281 expect_op("(");
2282 hop->stat = parse_expression ();
2283 int64_t tnum;
2284 if (hop->htype == hist_linear)
2285 {
2286 for (size_t i = 0; i < 3; ++i)
2287 {
2288 expect_op (",");
2289 expect_number (tnum);
2290 hop->params.push_back (tnum);
2291 }
2292 }
2293 expect_op(")");
2294 }
2295 return t;
2296 }
2297
2298
2299 indexable*
2300 parser::parse_indexable ()
2301 {
2302 hist_op *hop = NULL;
2303 string name;
2304 const token *tok = parse_hist_op_or_bare_name(hop, name);
2305 if (hop)
2306 return hop;
2307 else
2308 {
2309 symbol* sym = new symbol;
2310 sym->name = name;
2311 sym->tok = tok;
2312 return sym;
2313 }
2314 }
2315
2316
2317 // var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
2318 expression*
2319 parser::parse_symbol ()
2320 {
2321 hist_op *hop = NULL;
2322 symbol *sym = NULL;
2323 string name;
2324 const token *t = parse_hist_op_or_bare_name(hop, name);
2325
2326 if (!hop)
2327 {
2328 // If we didn't get a hist_op, then we did get an identifier. We can
2329 // now scrutinize this identifier for the various magic forms of identifier
2330 // (printf, @stat_op, and $var...)
2331
2332 bool pf_stream, pf_format, pf_delim, pf_newline, pf_char;
2333
2334 if (name == "@cast")
2335 {
2336 // type-punning time
2337 cast_op *cop = new cast_op;
2338 cop->tok = t;
2339 cop->base_name = name;
2340 expect_op("(");
2341 cop->operand = parse_expression ();
2342 expect_op(",");
2343 expect_unknown(tok_string, cop->type);
2344 // types never start with "struct<space>" or "union<space>",
2345 // so gobble it up.
2346 if (cop->type.compare(0, 7, "struct ") == 0)
2347 cop->type = cop->type.substr(7);
2348 if (cop->type.compare(0, 6, "union ") == 0)
2349 cop->type = cop->type.substr(6);
2350 if (peek_op (","))
2351 {
2352 next();
2353 expect_unknown(tok_string, cop->module);
2354 }
2355 expect_op(")");
2356 while (true)
2357 {
2358 string c;
2359 if (peek_op ("->"))
2360 {
2361 next();
2362 expect_ident_or_keyword (c);
2363 cop->components.push_back
2364 (make_pair (target_symbol::comp_struct_member, c));
2365 }
2366 else if (peek_op ("["))
2367 {
2368 next();
2369 expect_unknown (tok_number, c);
2370 expect_op ("]");
2371 cop->components.push_back
2372 (make_pair (target_symbol::comp_literal_array_index, c));
2373 }
2374 else
2375 break;
2376 }
2377 // if there aren't any dereferences, then the cast is pointless
2378 if (cop->components.empty())
2379 {
2380 expression *op = cop->operand;
2381 delete cop;
2382 return op;
2383 }
2384 return cop;
2385 }
2386
2387 else if (name.size() > 0 && name[0] == '@')
2388 {
2389 stat_op *sop = new stat_op;
2390 if (name == "@avg")
2391 sop->ctype = sc_average;
2392 else if (name == "@count")
2393 sop->ctype = sc_count;
2394 else if (name == "@sum")
2395 sop->ctype = sc_sum;
2396 else if (name == "@min")
2397 sop->ctype = sc_min;
2398 else if (name == "@max")
2399 sop->ctype = sc_max;
2400 else
2401 throw parse_error("unknown statistic operator " + name);
2402 expect_op("(");
2403 sop->tok = t;
2404 sop->stat = parse_expression ();
2405 expect_op(")");
2406 return sop;
2407 }
2408
2409 else if (print_format::parse_print(name,
2410 pf_stream, pf_format, pf_delim, pf_newline, pf_char))
2411 {
2412 print_format *fmt = new print_format;
2413 fmt->tok = t;
2414 fmt->print_to_stream = pf_stream;
2415 fmt->print_with_format = pf_format;
2416 fmt->print_with_delim = pf_delim;
2417 fmt->print_with_newline = pf_newline;
2418 fmt->print_char = pf_char;
2419
2420 expect_op("(");
2421 if ((name == "print" || name == "println") &&
2422 (peek_kw("@hist_linear") || peek_kw("@hist_log")))
2423 {
2424 // We have a special case where we recognize
2425 // print(@hist_foo(bar)) as a magic print-the-histogram
2426 // construct. This is sort of gross but it avoids
2427 // promoting histogram references to typeful
2428 // expressions.
2429
2430 hop = NULL;
2431 t = parse_hist_op_or_bare_name(hop, name);
2432 assert(hop);
2433
2434 // It is, sadly, possible that even while parsing a
2435 // hist_op, we *mis-guessed* and the user wishes to
2436 // print(@hist_op(foo)[bucket]), a scalar. In that case
2437 // we must parse the arrayindex and print an expression.
2438
2439 if (!peek_op ("["))
2440 fmt->hist = hop;
2441 else
2442 {
2443 // This is simplified version of the
2444 // multi-array-index parser below, because we can
2445 // only ever have one index on a histogram anyways.
2446 expect_op("[");
2447 struct arrayindex* ai = new arrayindex;
2448 ai->tok = t;
2449 ai->base = hop;
2450 ai->indexes.push_back (parse_expression ());
2451 expect_op("]");
2452 fmt->args.push_back(ai);
2453 }
2454 }
2455 else
2456 {
2457 int min_args = 0;
2458 if (fmt->print_with_format)
2459 {
2460 // Consume and convert a format string. Agreement between the
2461 // format string and the arguments is postponed to the
2462 // typechecking phase.
2463 string tmp;
2464 expect_unknown (tok_string, tmp);
2465 fmt->raw_components = tmp;
2466 fmt->components = print_format::string_to_components (tmp);
2467 }
2468 else if (fmt->print_with_delim)
2469 {
2470 // Consume a delimiter to separate arguments.
2471 fmt->delimiter.clear();
2472 fmt->delimiter.type = print_format::conv_literal;
2473 expect_unknown (tok_string, fmt->delimiter.literal_string);
2474 min_args = 2;
2475 }
2476 else
2477 {
2478 // If we are not printing with a format string, we must have
2479 // at least one argument (of any type).
2480 expression *e = parse_expression ();
2481 fmt->args.push_back(e);
2482 }
2483
2484 // Consume any subsequent arguments.
2485 while (min_args || !peek_op (")"))
2486 {
2487 expect_op(",");
2488 expression *e = parse_expression ();
2489 fmt->args.push_back(e);
2490 if (min_args)
2491 --min_args;
2492 }
2493 }
2494 expect_op(")");
2495 return fmt;
2496 }
2497
2498 else if (name.size() > 0 && name[0] == '$')
2499 {
2500 // target_symbol time
2501 target_symbol *tsym = new target_symbol;
2502 tsym->tok = t;
2503 tsym->base_name = name;
2504 while (true)
2505 {
2506 string c;
2507 if (peek_op ("->"))
2508 {
2509 next();
2510 expect_ident_or_keyword (c);
2511 tsym->components.push_back
2512 (make_pair (target_symbol::comp_struct_member, c));
2513 }
2514 else if (peek_op ("["))
2515 {
2516 next();
2517 expect_unknown (tok_number, c);
2518 expect_op ("]");
2519 tsym->components.push_back
2520 (make_pair (target_symbol::comp_literal_array_index, c));
2521 }
2522 else
2523 break;
2524 }
2525 return tsym;
2526 }
2527
2528 else if (peek_op ("(")) // function call
2529 {
2530 next ();
2531 struct functioncall* f = new functioncall;
2532 f->tok = t;
2533 f->function = name;
2534 // Allow empty actual parameter list
2535 if (peek_op (")"))
2536 {
2537 next ();
2538 return f;
2539 }
2540 while (1)
2541 {
2542 f->args.push_back (parse_expression ());
2543 if (peek_op (")"))
2544 {
2545 next();
2546 break;
2547 }
2548 else if (peek_op (","))
2549 {
2550 next();
2551 continue;
2552 }
2553 else
2554 throw parse_error ("expected ',' or ')'");
2555 }
2556 return f;
2557 }
2558
2559 else
2560 {
2561 sym = new symbol;
2562 sym->name = name;
2563 sym->tok = t;
2564 }
2565 }
2566
2567 // By now, either we had a hist_op in the first place, or else
2568 // we had a plain word and it was converted to a symbol.
2569
2570 assert (!hop != !sym); // logical XOR
2571
2572 // All that remains is to check for array indexing
2573
2574 if (peek_op ("[")) // array
2575 {
2576 next ();
2577 struct arrayindex* ai = new arrayindex;
2578 ai->tok = t;
2579
2580 if (hop)
2581 ai->base = hop;
2582 else
2583 ai->base = sym;
2584
2585 while (1)
2586 {
2587 ai->indexes.push_back (parse_expression ());
2588 if (peek_op ("]"))
2589 {
2590 next();
2591 break;
2592 }
2593 else if (peek_op (","))
2594 {
2595 next();
2596 continue;
2597 }
2598 else
2599 throw parse_error ("expected ',' or ']'");
2600 }
2601 return ai;
2602 }
2603
2604 // If we got to here, we *should* have a symbol; if we have
2605 // a hist_op on its own, it doesn't count as an expression,
2606 // so we throw a parse error.
2607
2608 if (hop)
2609 throw parse_error("base histogram operator where expression expected", t);
2610
2611 return sym;
2612 }
2613
2614 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.153825 seconds and 5 git commands to generate.