]> sourceware.org Git - systemtap.git/blame - parse.cxx
2005-11-28 Martin Hunt <hunt@redhat.com>
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
69c68955
FCE
2// Copyright (C) 2005 Red Hat Inc.
3//
4// This file is part of systemtap, and is free software. You can
5// redistribute it and/or modify it under the terms of the GNU General
6// Public License (GPL); either version 2, or (at your option) any
7// later version.
2f1a1aea 8
2b066ec1 9#include "config.h"
2f1a1aea
FCE
10#include "staptree.h"
11#include "parse.h"
177a8ead 12#include "session.h"
2b066ec1
FCE
13#include <iostream>
14#include <fstream>
2f1a1aea 15#include <cctype>
9c0c0e46 16#include <cstdlib>
9c0c0e46
FCE
17#include <cerrno>
18#include <climits>
57b73400 19#include <sstream>
2f1a1aea
FCE
20
21using namespace std;
22
23// ------------------------------------------------------------------------
24
bb2e3076
FCE
25
26
177a8ead
FCE
27parser::parser (systemtap_session& s, istream& i, bool p):
28 session (s),
24cb178f
FCE
29 input_name ("<input>"), free_input (0),
30 input (i, input_name), privileged (p),
2f1a1aea
FCE
31 last_t (0), next_t (0), num_errors (0)
32{ }
33
177a8ead
FCE
34parser::parser (systemtap_session& s, const string& fn, bool p):
35 session (s),
2f1a1aea 36 input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
24cb178f 37 input (* free_input, input_name), privileged (p),
2f1a1aea
FCE
38 last_t (0), next_t (0), num_errors (0)
39{ }
40
41parser::~parser()
42{
43 if (free_input) delete free_input;
44}
45
46
82919855 47stapfile*
177a8ead 48parser::parse (systemtap_session& s, std::istream& i, bool pr)
82919855 49{
177a8ead 50 parser p (s, i, pr);
82919855
FCE
51 return p.parse ();
52}
53
54
55stapfile*
177a8ead 56parser::parse (systemtap_session& s, const std::string& n, bool pr)
82919855 57{
177a8ead 58 parser p (s, n, pr);
82919855
FCE
59 return p.parse ();
60}
61
d7f3e0c5
GH
62static string
63tt2str(token_type tt)
64{
65 switch (tt)
66 {
67 case tok_junk: return "junk";
68 case tok_identifier: return "identifier";
69 case tok_operator: return "operator";
70 case tok_string: return "string";
71 case tok_number: return "number";
72 case tok_embedded: return "embedded-code";
73 }
74 return "unknown token";
75}
82919855 76
56099f08
FCE
77ostream&
78operator << (ostream& o, const token& t)
79{
d7f3e0c5 80 o << tt2str(t.type);
56099f08 81
24cb178f 82 if (t.type != tok_embedded) // XXX: other types?
56099f08 83 {
24cb178f
FCE
84 o << " '";
85 for (unsigned i=0; i<t.content.length(); i++)
86 {
87 char c = t.content[i];
88 o << (isprint (c) ? c : '?');
89 }
90 o << "'";
56099f08 91 }
56099f08
FCE
92
93 o << " at "
94 << t.location.file << ":"
95 << t.location.line << ":"
96 << t.location.column;
97
98 return o;
99}
100
101
2f1a1aea
FCE
102void
103parser::print_error (const parse_error &pe)
104{
105 cerr << "parse error: " << pe.what () << endl;
106
177a8ead
FCE
107 if (pe.tok)
108 {
109 cerr << "\tat: " << *pe.tok << endl;
110 }
2f1a1aea 111 else
177a8ead
FCE
112 {
113 const token* t = last_t;
114 if (t)
115 cerr << "\tsaw: " << *t << endl;
116 else
117 cerr << "\tsaw: " << input_name << " EOF" << endl;
118 }
2f1a1aea
FCE
119
120 // XXX: make it possible to print the last input line,
121 // so as to line up an arrow with the specific error column
122
123 num_errors ++;
124}
125
126
127const token*
128parser::last ()
129{
130 return last_t;
131}
132
133
177a8ead
FCE
134// Here, we perform on-the-fly preprocessing.
135// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
136// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
137// or: arch COMPARISON-OP "arch-string"
138// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
139//
140// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
44ce8ed5 141// e.g. %( arch != "i686" %? "foo" %: "baz" %)
177a8ead
FCE
142//
143// Up to an entire %( ... %) expression is processed by a single call
144// to this function. Tokens included by any nested conditions are
145// enqueued in a private vector.
146
147bool eval_pp_conditional (systemtap_session& s,
148 const token* l, const token* op, const token* r)
149{
44ce8ed5
FCE
150 if (l->type == tok_identifier && (l->content == "kernel_v" ||
151 l->content == "kernel_vr"))
152 {
153 string target_kernel_vr = s.kernel_release;
154 string target_kernel_v = target_kernel_vr;
155 // cut off any release code suffix
156 string::size_type dr = target_kernel_vr.rfind ('-');
157 if (dr > 0 && dr != string::npos)
158 target_kernel_v = target_kernel_vr.substr (0, dr);
159
160 if (! (r->type == tok_string))
161 throw parse_error ("expected string literal", r);
162 string query_kernel_vr = r->content;
163
164 // collect acceptable strverscmp results.
165 int rvc_ok1, rvc_ok2;
166 if (op->type == tok_operator && op->content == "<=")
167 { rvc_ok1 = -1; rvc_ok2 = 0; }
168 else if (op->type == tok_operator && op->content == ">=")
169 { rvc_ok1 = 1; rvc_ok2 = 0; }
170 else if (op->type == tok_operator && op->content == "<")
171 { rvc_ok1 = -1; rvc_ok2 = -1; }
172 else if (op->type == tok_operator && op->content == ">")
173 { rvc_ok1 = 1; rvc_ok2 = 1; }
174 else if (op->type == tok_operator && op->content == "==")
175 { rvc_ok1 = 0; rvc_ok2 = 0; }
176 else if (op->type == tok_operator && op->content == "!=")
177 { rvc_ok1 = -1; rvc_ok2 = 1; }
178 else
179 throw parse_error ("expected comparison operator", op);
180
181 int rvc_result = strverscmp ((l->content == "kernel_vr" ?
182 target_kernel_vr.c_str() :
183 target_kernel_v.c_str()),
184 query_kernel_vr.c_str());
185 // normalize rvc_result
186 if (rvc_result < 0) rvc_result = -1;
187 if (rvc_result > 0) rvc_result = 1;
188
189 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
190 }
191 else if (l->type == tok_identifier && l->content == "arch")
192 {
193 string target_architecture = s.architecture;
194 if (! (r->type == tok_string))
195 throw parse_error ("expected string literal", r);
196 string query_architecture = r->content;
197
198 bool result;
199 if (op->type == tok_operator && op->content == "==")
200 result = target_architecture == query_architecture;
201 else if (op->type == tok_operator && op->content == "!=")
202 result = target_architecture != query_architecture;
203 else
204 throw parse_error ("expected '==' or '!='", op);
205
206 return result;
207 }
208 // XXX: support other forms? "CONFIG_SMP" ?
177a8ead 209 else
44ce8ed5 210 throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr'", l);
177a8ead
FCE
211}
212
213
214const token*
215parser::scan_pp ()
216{
217 while (true)
218 {
219 if (enqueued_pp.size() > 0)
220 {
221 const token* t = enqueued_pp[0];
222 enqueued_pp.erase (enqueued_pp.begin());
223 return t;
224 }
225
226 const token* t = input.scan (); // NB: not recursive!
227 if (t == 0) // EOF
228 return t;
229
230 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
231 return t;
232
233 // We have a %( - it's time to throw a preprocessing party!
234
235 const token *l, *op, *r;
236 l = input.scan (); // NB: not recursive, though perhaps could be
237 op = input.scan ();
238 r = input.scan ();
239 if (l == 0 || op == 0 || r == 0)
240 throw parse_error ("incomplete condition after '%('", t);
241 // NB: consider generalizing to consume all tokens until %?, and
242 // passing that as a vector to an evaluator.
243
244 bool result = eval_pp_conditional (session, l, op, r);
245
246 const token *m = input.scan (); // NB: not recursive
247 if (! (m && m->type == tok_operator && m->content == "%?"))
248 throw parse_error ("expected '%?' marker for conditional", t);
249
250 vector<const token*> my_enqueued_pp;
251
252 while (true) // consume THEN tokens
253 {
254 m = scan_pp (); // NB: recursive
255 if (m == 0)
256 throw parse_error ("missing THEN tokens for conditional", t);
257
258 if (m->type == tok_operator && (m->content == "%:" || // ELSE
259 m->content == "%)")) // END
260 break;
261 // enqueue token
262 if (result)
263 my_enqueued_pp.push_back (m);
264 // continue
265 }
266
267 if (m && m->type == tok_operator && m->content == "%:") // ELSE
268 while (true)
269 {
270 m = scan_pp (); // NB: recursive
271 if (m == 0)
272 throw parse_error ("missing ELSE tokens for conditional", t);
273
274 if (m->type == tok_operator && m->content == "%)") // END
275 break;
276 // enqueue token
277 if (! result)
278 my_enqueued_pp.push_back (m);
279 // continue
280 }
281
282 // NB: we transcribe the retained tokens here, and not inside
283 // the THEN/ELSE while loops. If it were done there, each loop
284 // would become infinite (each iteration consuming an ordinary
285 // token the previous one just pushed there). Guess how I
286 // figured that out.
287 enqueued_pp.insert (enqueued_pp.end(),
288 my_enqueued_pp.begin(),
289 my_enqueued_pp.end());
290
291 // Go back to outermost while(true) loop. We hope that at least
292 // some THEN or ELSE tokens were enqueued. If not, around we go
293 // again, until EOF.
294 }
295}
296
297
2f1a1aea
FCE
298const token*
299parser::next ()
300{
301 if (! next_t)
177a8ead 302 next_t = scan_pp ();
2f1a1aea
FCE
303 if (! next_t)
304 throw parse_error ("unexpected end-of-file");
305
2f1a1aea
FCE
306 last_t = next_t;
307 // advance by zeroing next_t
308 next_t = 0;
309 return last_t;
310}
311
312
313const token*
314parser::peek ()
315{
316 if (! next_t)
177a8ead 317 next_t = scan_pp ();
2f1a1aea
FCE
318
319 // don't advance by zeroing next_t
320 last_t = next_t;
321 return next_t;
322}
323
324
d7f3e0c5
GH
325static inline bool
326tok_is(token const * t, token_type tt, string const & expected)
327{
328 return t && t->type == tt && t->content == expected;
329}
330
331
332const token*
333parser::expect_known (token_type tt, string const & expected)
334{
335 const token *t = next();
57b73400 336 if (! (t && t->type == tt && t->content == expected))
d7f3e0c5
GH
337 throw parse_error ("expected '" + expected + "'");
338 return t;
339}
340
341
342const token*
343parser::expect_unknown (token_type tt, string & target)
344{
345 const token *t = next();
346 if (!(t && t->type == tt))
347 throw parse_error ("expected " + tt2str(tt));
348 target = t->content;
349 return t;
350}
351
352
353const token*
354parser::expect_op (std::string const & expected)
355{
356 return expect_known (tok_operator, expected);
357}
358
359
360const token*
361parser::expect_kw (std::string const & expected)
362{
363 return expect_known (tok_identifier, expected);
364}
365
57b73400
GH
366const token*
367parser::expect_number (int64_t & expected)
368{
369 std::string tmp;
370 token const * tt = expect_unknown (tok_number, tmp);
371 istringstream iss(tmp);
372 iss >> expected;
373 return tt;
374}
375
d7f3e0c5
GH
376
377const token*
378parser::expect_ident (std::string & target)
379{
380 return expect_unknown (tok_identifier, target);
381}
382
383
384bool
385parser::peek_op (std::string const & op)
386{
387 return tok_is (peek(), tok_operator, op);
388}
389
390
391bool
392parser::peek_kw (std::string const & kw)
393{
394 return tok_is (peek(), tok_identifier, kw);
395}
396
397
398
2f1a1aea
FCE
399lexer::lexer (istream& i, const string& in):
400 input (i), input_name (in), cursor_line (1), cursor_column (1)
401{ }
402
bb2e3076
FCE
403
404int
405lexer::input_peek (unsigned n)
406{
407 while (lookahead.size() <= n)
408 {
409 int c = input.get ();
410 lookahead.push_back (input ? c : -1);
411 }
412 return lookahead[n];
413}
414
415
2f1a1aea
FCE
416int
417lexer::input_get ()
418{
bb2e3076
FCE
419 int c = input_peek (0);
420 lookahead.erase (lookahead.begin ());
421
422 if (c < 0) return c; // EOF
423
2f1a1aea
FCE
424 // update source cursor
425 if (c == '\n')
426 {
427 cursor_line ++;
428 cursor_column = 1;
429 }
430 else
431 cursor_column ++;
432
433 return c;
434}
435
436
437token*
438lexer::scan ()
439{
440 token* n = new token;
441 n->location.file = input_name;
442
443 skip:
444 n->location.line = cursor_line;
445 n->location.column = cursor_column;
446
447 int c = input_get();
448 if (c < 0)
449 {
450 delete n;
451 return 0;
452 }
453
454 if (isspace (c))
455 goto skip;
456
d02548c0 457 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
2f1a1aea
FCE
458 {
459 n->type = tok_identifier;
460 n->content = (char) c;
461 while (1)
462 {
bb2e3076 463 int c2 = input_peek ();
2f1a1aea
FCE
464 if (! input)
465 break;
0fefb486 466 if ((isalnum(c2) || c2 == '_' || c2 == '$'))
2f1a1aea
FCE
467 {
468 n->content.push_back(c2);
469 input_get ();
470 }
471 else
472 break;
473 }
474 return n;
475 }
476
3a20432b 477 else if (isdigit (c)) // positive literal
2f1a1aea 478 {
2f1a1aea 479 n->type = tok_number;
9c0c0e46
FCE
480 n->content = (char) c;
481
2f1a1aea
FCE
482 while (1)
483 {
bb2e3076 484 int c2 = input_peek ();
2f1a1aea
FCE
485 if (! input)
486 break;
9c0c0e46
FCE
487
488 // NB: isalnum is very permissive. We rely on strtol, called in
489 // parser::parse_literal below, to confirm that the number string
490 // is correctly formatted and in range.
491
492 if (isalnum (c2))
2f1a1aea 493 {
9c0c0e46 494 n->content.push_back (c2);
2f1a1aea
FCE
495 input_get ();
496 }
497 else
498 break;
499 }
500 return n;
501 }
502
503 else if (c == '\"')
504 {
505 n->type = tok_string;
506 while (1)
507 {
508 c = input_get ();
509
510 if (! input || c == '\n')
511 {
512 n->type = tok_junk;
513 break;
514 }
515 if (c == '\"') // closing double-quotes
516 break;
517 else if (c == '\\')
7d46afb8
GH
518 {
519 c = input_get ();
520 switch (c)
521 {
522 case 'a':
523 case 'b':
524 case 't':
525 case 'n':
526 case 'v':
527 case 'f':
528 case 'r':
529 case '\\':
530
531 // Pass these escapes through to the string value
532 // beign parsed; it will "likely" be emitted into
533 // a C literal.
534 //
535 // XXX: verify this assumption.
536
537 n->content.push_back('\\');
538
539 default:
540
541 n->content.push_back(c);
542 break;
543 }
2f1a1aea
FCE
544 }
545 else
546 n->content.push_back(c);
547 }
548 return n;
549 }
550
551 else if (ispunct (c))
552 {
bb2e3076
FCE
553 int c2 = input_peek ();
554 int c3 = input_peek (1);
555 string s1 = string("") + (char) c;
556 string s2 = (c2 > 0 ? s1 + (char) c2 : s1);
557 string s3 = (c3 > 0 ? s2 + (char) c3 : s2);
2f1a1aea 558
3a20432b
FCE
559 // NB: if we were to recognize negative numeric literals here,
560 // we'd introduce another grammar ambiguity:
561 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
562 // instead of tok_number(1) tok_operator('-') tok_number(1)
563
bb2e3076 564 if (s1 == "#") // shell comment
2f1a1aea
FCE
565 {
566 unsigned this_line = cursor_line;
bb2e3076
FCE
567 do { c = input_get (); }
568 while (c >= 0 && cursor_line == this_line);
2f1a1aea
FCE
569 goto skip;
570 }
bb2e3076 571 else if (s2 == "//") // C++ comment
63a7c90e
FCE
572 {
573 unsigned this_line = cursor_line;
bb2e3076
FCE
574 do { c = input_get (); }
575 while (c >= 0 && cursor_line == this_line);
63a7c90e
FCE
576 goto skip;
577 }
578 else if (c == '/' && c2 == '*') // C comment
579 {
580 c2 = input_get ();
581 unsigned chars = 0;
bb2e3076 582 while (c2 >= 0)
63a7c90e
FCE
583 {
584 chars ++; // track this to prevent "/*/" from being accepted
585 c = c2;
586 c2 = input_get ();
587 if (chars > 1 && c == '*' && c2 == '/')
bb2e3076 588 break;
63a7c90e 589 }
bb2e3076 590 goto skip;
63a7c90e 591 }
54dfabe9
FCE
592 else if (c == '%' && c2 == '{') // embedded code
593 {
594 n->type = tok_embedded;
595 (void) input_get (); // swallow '{' already in c2
596 while (true)
597 {
598 c = input_get ();
599 if (c == 0) // EOF
600 {
601 n->type = tok_junk;
602 break;
603 }
604 if (c == '%')
605 {
606 c2 = input_peek ();
607 if (c2 == '}')
608 {
609 (void) input_get (); // swallow '}' too
610 break;
611 }
612 }
613 n->content += c;
614 }
615 return n;
616 }
2f1a1aea 617
bb2e3076
FCE
618 // We're committed to recognizing at least the first character
619 // as an operator.
2f1a1aea 620 n->type = tok_operator;
2f1a1aea 621
bb2e3076
FCE
622 // match all valid operators, in decreasing size order
623 if (s3 == "<<<" ||
624 s3 == "<<=" ||
625 s3 == ">>=")
82919855 626 {
bb2e3076
FCE
627 n->content = s3;
628 input_get (); input_get (); // swallow other two characters
629 }
630 else if (s2 == "==" ||
631 s2 == "!=" ||
632 s2 == "<=" ||
633 s2 == ">=" ||
634 s2 == "+=" ||
635 s2 == "-=" ||
636 s2 == "*=" ||
637 s2 == "/=" ||
638 s2 == "%=" ||
639 s2 == "&=" ||
640 s2 == "^=" ||
641 s2 == "|=" ||
d5d7c2cc 642 s2 == ".=" ||
bb2e3076
FCE
643 s2 == "&&" ||
644 s2 == "||" ||
645 s2 == "++" ||
646 s2 == "--" ||
647 s2 == "->" ||
648 s2 == "<<" ||
177a8ead
FCE
649 s2 == ">>" ||
650 // preprocessor tokens
651 s2 == "%(" ||
652 s2 == "%?" ||
653 s2 == "%:" ||
654 s2 == "%)")
bb2e3076
FCE
655 {
656 n->content = s2;
657 input_get (); // swallow other character
658 }
659 else
660 {
661 n->content = s1;
82919855 662 }
2f1a1aea
FCE
663
664 return n;
665 }
666
667 else
668 {
669 n->type = tok_junk;
670 n->content = (char) c;
671 return n;
672 }
673}
674
675
676// ------------------------------------------------------------------------
677
678stapfile*
679parser::parse ()
680{
681 stapfile* f = new stapfile;
682 f->name = input_name;
56099f08
FCE
683
684 bool empty = true;
685
2f1a1aea
FCE
686 while (1)
687 {
688 try
689 {
690 const token* t = peek ();
56099f08 691 if (! t) // nice clean EOF
2f1a1aea
FCE
692 break;
693
56099f08 694 empty = false;
2f1a1aea 695 if (t->type == tok_identifier && t->content == "probe")
54dfabe9 696 parse_probe (f->probes, f->aliases);
2f1a1aea 697 else if (t->type == tok_identifier && t->content == "global")
07c17d67 698 parse_global (f->globals);
56099f08 699 else if (t->type == tok_identifier && t->content == "function")
24cb178f 700 parse_functiondecl (f->functions);
54dfabe9
FCE
701 else if (t->type == tok_embedded)
702 f->embeds.push_back (parse_embeddedcode ());
2f1a1aea 703 else
24cb178f 704 throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
2f1a1aea
FCE
705 }
706 catch (parse_error& pe)
707 {
708 print_error (pe);
177a8ead
FCE
709 try
710 {
711 // Quietly swallow all tokens until the next '}'.
712 while (1)
713 {
714 const token* t = peek ();
715 if (! t)
716 break;
717 next ();
718 if (t->type == tok_operator && t->content == "}")
719 break;
720 }
721 }
722 catch (parse_error& pe2)
723 {
724 // parse error during recovery ... ugh
725 print_error (pe2);
726 }
727 }
2f1a1aea
FCE
728 }
729
56099f08
FCE
730 if (empty)
731 {
732 cerr << "Input file '" << input_name << "' is empty or missing." << endl;
733 delete f;
734 return 0;
735 }
736 else if (num_errors > 0)
2f1a1aea
FCE
737 {
738 cerr << num_errors << " parse error(s)." << endl;
739 delete f;
56099f08 740 return 0;
2f1a1aea
FCE
741 }
742
743 return f;
744}
745
746
20c6c071 747void
54dfabe9
FCE
748parser::parse_probe (std::vector<probe *> & probe_ret,
749 std::vector<probe_alias *> & alias_ret)
2f1a1aea 750{
82919855
FCE
751 const token* t0 = next ();
752 if (! (t0->type == tok_identifier && t0->content == "probe"))
753 throw parse_error ("expected 'probe'");
754
20c6c071
GH
755 vector<probe_point *> aliases;
756 vector<probe_point *> locations;
757
758 bool equals_ok = true;
82919855 759
2f1a1aea
FCE
760 while (1)
761 {
b4ceace2
FCE
762 probe_point * pp = parse_probe_point ();
763
764 const token* t = peek ();
765 if (equals_ok && t
766 && t->type == tok_operator && t->content == "=")
767 {
768 aliases.push_back(pp);
769 next ();
770 continue;
771 }
772 else if (t && t->type == tok_operator && t->content == ",")
773 {
774 locations.push_back(pp);
775 equals_ok = false;
776 next ();
777 continue;
778 }
779 else if (t && t->type == tok_operator && t->content == "{")
780 {
781 locations.push_back(pp);
782 break;
783 }
2f1a1aea 784 else
9c0c0e46 785 throw parse_error ("expected probe point specifier");
2f1a1aea 786 }
20c6c071 787
20c6c071
GH
788 if (aliases.empty())
789 {
54dfabe9
FCE
790 probe* p = new probe;
791 p->tok = t0;
792 p->locations = locations;
793 p->body = parse_stmt_block ();
794 probe_ret.push_back (p);
20c6c071
GH
795 }
796 else
797 {
54dfabe9
FCE
798 probe_alias* p = new probe_alias (aliases);
799 p->tok = t0;
800 p->locations = locations;
801 p->body = parse_stmt_block ();
802 alias_ret.push_back (p);
20c6c071 803 }
54dfabe9 804}
20c6c071 805
54dfabe9
FCE
806
807embeddedcode*
808parser::parse_embeddedcode ()
809{
810 embeddedcode* e = new embeddedcode;
811 const token* t = next ();
812 if (t->type != tok_embedded)
24cb178f
FCE
813 throw parse_error ("expected '%{'");
814
815 if (! privileged)
816 throw parse_error ("embedded code in unprivileged script");
54dfabe9
FCE
817
818 e->tok = t;
819 e->code = t->content;
820 return e;
2f1a1aea
FCE
821}
822
823
824block*
56099f08 825parser::parse_stmt_block ()
2f1a1aea
FCE
826{
827 block* pb = new block;
828
56099f08
FCE
829 const token* t = next ();
830 if (! (t->type == tok_operator && t->content == "{"))
831 throw parse_error ("expected '{'");
832
833 pb->tok = t;
2b066ec1 834
2f1a1aea
FCE
835 while (1)
836 {
837 try
838 {
2b066ec1
FCE
839 t = peek ();
840 if (t && t->type == tok_operator && t->content == "}")
841 {
842 next ();
843 break;
844 }
845
2f1a1aea 846 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
847 }
848 catch (parse_error& pe)
849 {
850 print_error (pe);
54dfabe9 851
2f1a1aea
FCE
852 // Quietly swallow all tokens until the next ';' or '}'.
853 while (1)
854 {
855 const token* t = peek ();
54dfabe9 856 if (! t) return 0;
2f1a1aea 857 next ();
54dfabe9
FCE
858 if (t->type == tok_operator
859 && (t->content == "}" || t->content == ";"))
2f1a1aea
FCE
860 break;
861 }
862 }
863 }
864
865 return pb;
866}
867
868
869statement*
870parser::parse_statement ()
871{
872 const token* t = peek ();
873 if (t && t->type == tok_operator && t->content == ";")
874 {
69c68955
FCE
875 null_statement* n = new null_statement ();
876 n->tok = next ();
877 return n;
2f1a1aea
FCE
878 }
879 else if (t && t->type == tok_operator && t->content == "{")
56099f08 880 return parse_stmt_block ();
2f1a1aea 881 else if (t && t->type == tok_identifier && t->content == "if")
56099f08 882 return parse_if_statement ();
69c68955
FCE
883 else if (t && t->type == tok_identifier && t->content == "for")
884 return parse_for_loop ();
69c68955
FCE
885 else if (t && t->type == tok_identifier && t->content == "foreach")
886 return parse_foreach_loop ();
56099f08
FCE
887 else if (t && t->type == tok_identifier && t->content == "return")
888 return parse_return_statement ();
889 else if (t && t->type == tok_identifier && t->content == "delete")
890 return parse_delete_statement ();
f3c26ea5
FCE
891 else if (t && t->type == tok_identifier && t->content == "while")
892 return parse_while_loop ();
893 else if (t && t->type == tok_identifier && t->content == "break")
894 return parse_break_statement ();
895 else if (t && t->type == tok_identifier && t->content == "continue")
896 return parse_continue_statement ();
897 else if (t && t->type == tok_identifier && t->content == "next")
898 return parse_next_statement ();
899 // XXX: "do/while" statement?
2f1a1aea
FCE
900 else if (t && (t->type == tok_operator || // expressions are flexible
901 t->type == tok_identifier ||
902 t->type == tok_number ||
903 t->type == tok_string))
69c68955 904 return parse_expr_statement ();
54dfabe9 905 // XXX: consider generally accepting tok_embedded here too
2f1a1aea
FCE
906 else
907 throw parse_error ("expected statement");
908}
909
910
56099f08 911void
07c17d67 912parser::parse_global (vector <vardecl*>& globals)
2f1a1aea 913{
82919855
FCE
914 const token* t0 = next ();
915 if (! (t0->type == tok_identifier && t0->content == "global"))
916 throw parse_error ("expected 'global'");
917
56099f08
FCE
918 while (1)
919 {
920 const token* t = next ();
921 if (! (t->type == tok_identifier))
922 throw parse_error ("expected identifier");
923
2b066ec1
FCE
924 for (unsigned i=0; i<globals.size(); i++)
925 if (globals[i]->name == t->content)
57b73400
GH
926 throw parse_error ("duplicate global name");
927
24cb178f
FCE
928 vardecl* d = new vardecl;
929 d->name = t->content;
930 d->tok = t;
931 globals.push_back (d);
56099f08 932
82919855
FCE
933 t = peek ();
934 if (t && t->type == tok_operator && t->content == ",")
935 {
936 next ();
937 continue;
938 }
56099f08 939 else
82919855 940 break;
56099f08
FCE
941 }
942}
943
944
24cb178f
FCE
945void
946parser::parse_functiondecl (std::vector<functiondecl*>& functions)
56099f08 947{
82919855
FCE
948 const token* t = next ();
949 if (! (t->type == tok_identifier && t->content == "function"))
950 throw parse_error ("expected 'function'");
951
56099f08 952
82919855 953 t = next ();
56099f08
FCE
954 if (! (t->type == tok_identifier))
955 throw parse_error ("expected identifier");
24cb178f
FCE
956
957 for (unsigned i=0; i<functions.size(); i++)
958 if (functions[i]->name == t->content)
959 throw parse_error ("duplicate function name");
960
961 functiondecl *fd = new functiondecl ();
56099f08
FCE
962 fd->name = t->content;
963 fd->tok = t;
964
965 t = next ();
6a505121
FCE
966 if (t->type == tok_operator && t->content == ":")
967 {
968 t = next ();
969 if (t->type == tok_identifier && t->content == "string")
970 fd->type = pe_string;
971 else if (t->type == tok_identifier && t->content == "long")
972 fd->type = pe_long;
973 else throw parse_error ("expected 'string' or 'long'");
974
975 t = next ();
976 }
977
56099f08
FCE
978 if (! (t->type == tok_operator && t->content == "("))
979 throw parse_error ("expected '('");
980
981 while (1)
982 {
983 t = next ();
984
985 // permit zero-argument fuctions
986 if (t->type == tok_operator && t->content == ")")
987 break;
988 else if (! (t->type == tok_identifier))
989 throw parse_error ("expected identifier");
990 vardecl* vd = new vardecl;
991 vd->name = t->content;
992 vd->tok = t;
993 fd->formal_args.push_back (vd);
994
995 t = next ();
6a505121
FCE
996 if (t->type == tok_operator && t->content == ":")
997 {
998 t = next ();
999 if (t->type == tok_identifier && t->content == "string")
1000 vd->type = pe_string;
1001 else if (t->type == tok_identifier && t->content == "long")
1002 vd->type = pe_long;
1003 else throw parse_error ("expected 'string' or 'long'");
1004
1005 t = next ();
1006 }
56099f08
FCE
1007 if (t->type == tok_operator && t->content == ")")
1008 break;
1009 if (t->type == tok_operator && t->content == ",")
1010 continue;
1011 else
1012 throw parse_error ("expected ',' or ')'");
1013 }
1014
54dfabe9
FCE
1015 t = peek ();
1016 if (t && t->type == tok_embedded)
1017 fd->body = parse_embeddedcode ();
1018 else
1019 fd->body = parse_stmt_block ();
24cb178f
FCE
1020
1021 functions.push_back (fd);
2f1a1aea
FCE
1022}
1023
1024
9c0c0e46
FCE
1025probe_point*
1026parser::parse_probe_point ()
2f1a1aea 1027{
9c0c0e46 1028 probe_point* pl = new probe_point;
2f1a1aea 1029
9c0c0e46 1030 while (1)
2f1a1aea 1031 {
9c0c0e46 1032 const token* t = next ();
b4ceace2
FCE
1033 if (! (t->type == tok_identifier ||
1034 (t->type == tok_operator && t->content == "*")))
1035 throw parse_error ("expected identifier or '*'");
9c0c0e46
FCE
1036
1037 if (pl->tok == 0) pl->tok = t;
1038
1039 probe_point::component* c = new probe_point::component;
1040 c->functor = t->content;
1041 pl->components.push_back (c);
1042 // NB though we still may add c->arg soon
1043
1044 t = peek ();
1045 if (t && t->type == tok_operator
20c6c071 1046 && (t->content == "{" || t->content == "," || t->content == "="))
9c0c0e46
FCE
1047 break;
1048
1049 if (t && t->type == tok_operator && t->content == "(")
1050 {
1051 next (); // consume "("
1052 c->arg = parse_literal ();
1053
1054 t = next ();
1055 if (! (t->type == tok_operator && t->content == ")"))
1056 throw parse_error ("expected ')'");
1057
1058 t = peek ();
1059 if (t && t->type == tok_operator
20c6c071 1060 && (t->content == "{" || t->content == "," || t->content == "="))
9c0c0e46 1061 break;
2b066ec1
FCE
1062 else if (t && t->type == tok_operator &&
1063 t->content == "(")
1064 throw parse_error ("unexpected '.' or ',' or '{'");
9c0c0e46
FCE
1065 }
1066 // fall through
1067
1068 if (t && t->type == tok_operator && t->content == ".")
1069 next ();
1070 else
20c6c071 1071 throw parse_error ("expected '.' or ',' or '(' or '{' or '='");
2f1a1aea
FCE
1072 }
1073
1074 return pl;
1075}
1076
1077
1078literal*
1079parser::parse_literal ()
1080{
1081 const token* t = next ();
56099f08 1082 literal* l;
2f1a1aea 1083 if (t->type == tok_string)
56099f08 1084 l = new literal_string (t->content);
2f1a1aea 1085 else if (t->type == tok_number)
9c0c0e46
FCE
1086 {
1087 const char* startp = t->content.c_str ();
1088 char* endp = (char*) startp;
1089
3a20432b
FCE
1090 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1091 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1092 // since the lexer only gives us positive digit strings.
9c0c0e46 1093 errno = 0;
3a20432b 1094 long long value = (long long) strtoull (startp, & endp, 0);
9c0c0e46 1095 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
3a20432b
FCE
1096 || (unsigned long long) value > 18446744073709551615ULL
1097 || value < -9223372036854775807LL-1)
9c0c0e46
FCE
1098 throw parse_error ("number invalid or out of range");
1099
3a20432b 1100 l = new literal_number (value);
9c0c0e46 1101 }
2f1a1aea
FCE
1102 else
1103 throw parse_error ("expected literal string or number");
56099f08
FCE
1104
1105 l->tok = t;
1106 return l;
2f1a1aea
FCE
1107}
1108
1109
1110if_statement*
1111parser::parse_if_statement ()
1112{
1113 const token* t = next ();
56099f08
FCE
1114 if (! (t->type == tok_identifier && t->content == "if"))
1115 throw parse_error ("expected 'if'");
1116 if_statement* s = new if_statement;
1117 s->tok = t;
1118
1119 t = next ();
2f1a1aea
FCE
1120 if (! (t->type == tok_operator && t->content == "("))
1121 throw parse_error ("expected '('");
1122
2f1a1aea
FCE
1123 s->condition = parse_expression ();
1124
1125 t = next ();
1126 if (! (t->type == tok_operator && t->content == ")"))
1127 throw parse_error ("expected ')'");
1128
1129 s->thenblock = parse_statement ();
1130
1131 t = peek ();
1132 if (t && t->type == tok_identifier && t->content == "else")
1133 {
1134 next ();
1135 s->elseblock = parse_statement ();
1136 }
ed10c639
FCE
1137 else
1138 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
1139
1140 return s;
1141}
1142
1143
69c68955
FCE
1144expr_statement*
1145parser::parse_expr_statement ()
1146{
1147 expr_statement *es = new expr_statement;
1148 const token* t = peek ();
1149 es->tok = t;
1150 es->value = parse_expression ();
1151 return es;
1152}
1153
1154
56099f08
FCE
1155return_statement*
1156parser::parse_return_statement ()
1157{
1158 const token* t = next ();
1159 if (! (t->type == tok_identifier && t->content == "return"))
1160 throw parse_error ("expected 'return'");
1161 return_statement* s = new return_statement;
1162 s->tok = t;
1163 s->value = parse_expression ();
1164 return s;
1165}
1166
1167
1168delete_statement*
1169parser::parse_delete_statement ()
1170{
1171 const token* t = next ();
1172 if (! (t->type == tok_identifier && t->content == "delete"))
1173 throw parse_error ("expected 'delete'");
1174 delete_statement* s = new delete_statement;
1175 s->tok = t;
1176 s->value = parse_expression ();
1177 return s;
1178}
1179
1180
f3c26ea5
FCE
1181next_statement*
1182parser::parse_next_statement ()
1183{
1184 const token* t = next ();
1185 if (! (t->type == tok_identifier && t->content == "next"))
1186 throw parse_error ("expected 'next'");
1187 next_statement* s = new next_statement;
1188 s->tok = t;
1189 return s;
1190}
1191
1192
1193break_statement*
1194parser::parse_break_statement ()
1195{
1196 const token* t = next ();
1197 if (! (t->type == tok_identifier && t->content == "break"))
1198 throw parse_error ("expected 'break'");
1199 break_statement* s = new break_statement;
1200 s->tok = t;
1201 return s;
1202}
1203
1204
1205continue_statement*
1206parser::parse_continue_statement ()
1207{
1208 const token* t = next ();
1209 if (! (t->type == tok_identifier && t->content == "continue"))
1210 throw parse_error ("expected 'continue'");
1211 continue_statement* s = new continue_statement;
1212 s->tok = t;
1213 return s;
1214}
1215
1216
69c68955
FCE
1217for_loop*
1218parser::parse_for_loop ()
1219{
f3c26ea5
FCE
1220 const token* t = next ();
1221 if (! (t->type == tok_identifier && t->content == "for"))
1222 throw parse_error ("expected 'for'");
1223 for_loop* s = new for_loop;
1224 s->tok = t;
1225
1226 t = next ();
1227 if (! (t->type == tok_operator && t->content == "("))
1228 throw parse_error ("expected '('");
1229
1230 // initializer + ";"
1231 t = peek ();
1232 if (t && t->type == tok_operator && t->content == ";")
1233 {
1234 literal_number* l = new literal_number(0);
1235 expr_statement* es = new expr_statement;
1236 es->value = l;
1237 s->init = es;
1238 es->value->tok = es->tok = next ();
1239 }
1240 else
1241 {
1242 s->init = parse_expr_statement ();
1243 t = next ();
1244 if (! (t->type == tok_operator && t->content == ";"))
1245 throw parse_error ("expected ';'");
1246 }
1247
1248 // condition + ";"
1249 t = peek ();
1250 if (t && t->type == tok_operator && t->content == ";")
1251 {
1252 literal_number* l = new literal_number(1);
1253 s->cond = l;
1254 s->cond->tok = next ();
1255 }
1256 else
1257 {
1258 s->cond = parse_expression ();
1259 t = next ();
1260 if (! (t->type == tok_operator && t->content == ";"))
1261 throw parse_error ("expected ';'");
1262 }
1263
1264 // increment + ")"
1265 t = peek ();
1266 if (t && t->type == tok_operator && t->content == ")")
1267 {
1268 literal_number* l = new literal_number(2);
1269 expr_statement* es = new expr_statement;
1270 es->value = l;
1271 s->incr = es;
1272 es->value->tok = es->tok = next ();
1273 }
1274 else
1275 {
1276 s->incr = parse_expr_statement ();
1277 t = next ();
1278 if (! (t->type == tok_operator && t->content == ")"))
1279 throw parse_error ("expected ';'");
1280 }
1281
1282 // block
1283 s->block = parse_statement ();
1284
1285 return s;
1286}
1287
1288
1289for_loop*
1290parser::parse_while_loop ()
1291{
1292 const token* t = next ();
1293 if (! (t->type == tok_identifier && t->content == "while"))
1294 throw parse_error ("expected 'while'");
1295 for_loop* s = new for_loop;
1296 s->tok = t;
1297
1298 t = next ();
1299 if (! (t->type == tok_operator && t->content == "("))
1300 throw parse_error ("expected '('");
1301
1302 // dummy init and incr fields
1303 literal_number* l = new literal_number(0);
1304 expr_statement* es = new expr_statement;
1305 es->value = l;
1306 s->init = es;
1307 es->value->tok = es->tok = t;
1308
1309 l = new literal_number(2);
1310 es = new expr_statement;
1311 es->value = l;
1312 s->incr = es;
1313 es->value->tok = es->tok = t;
1314
1315
1316 // condition
1317 s->cond = parse_expression ();
1318
1319
1320 t = next ();
1321 if (! (t->type == tok_operator && t->content == ")"))
1322 throw parse_error ("expected ')'");
1323
1324 // block
1325 s->block = parse_statement ();
1326
1327 return s;
69c68955
FCE
1328}
1329
1330
1331foreach_loop*
1332parser::parse_foreach_loop ()
1333{
1334 const token* t = next ();
1335 if (! (t->type == tok_identifier && t->content == "foreach"))
1336 throw parse_error ("expected 'foreach'");
1337 foreach_loop* s = new foreach_loop;
1338 s->tok = t;
93484556 1339 s->sort_direction = 0;
69c68955
FCE
1340
1341 t = next ();
1342 if (! (t->type == tok_operator && t->content == "("))
1343 throw parse_error ("expected '('");
1344
1345 // see also parse_array_in
1346
1347 bool parenthesized = false;
1348 t = peek ();
1349 if (t && t->type == tok_operator && t->content == "[")
1350 {
1351 next ();
1352 parenthesized = true;
1353 }
1354
1355 while (1)
1356 {
1357 t = next ();
1358 if (! (t->type == tok_identifier))
1359 throw parse_error ("expected identifier");
1360 symbol* sym = new symbol;
1361 sym->tok = t;
1362 sym->name = t->content;
1363 s->indexes.push_back (sym);
1364
93484556
FCE
1365 t = peek ();
1366 if (t && t->type == tok_operator &&
1367 (t->content == "+" || t->content == "-"))
1368 {
1369 if (s->sort_direction)
1370 throw parse_error ("multiple sort directives");
1371 s->sort_direction = (t->content == "+") ? 1 : -1;
1372 s->sort_column = s->indexes.size();
1373 next();
1374 }
1375
69c68955
FCE
1376 if (parenthesized)
1377 {
93484556 1378 t = peek ();
69c68955
FCE
1379 if (t && t->type == tok_operator && t->content == ",")
1380 {
1381 next ();
1382 continue;
1383 }
1384 else if (t && t->type == tok_operator && t->content == "]")
1385 {
1386 next ();
1387 break;
1388 }
1389 else
1390 throw parse_error ("expected ',' or ']'");
1391 }
1392 else
1393 break; // expecting only one expression
1394 }
1395
1396 t = next ();
1397 if (! (t->type == tok_identifier && t->content == "in"))
1398 throw parse_error ("expected 'in'");
d02548c0
GH
1399
1400 s->base = parse_indexable();
69c68955 1401
93484556
FCE
1402 t = peek ();
1403 if (t && t->type == tok_operator &&
1404 (t->content == "+" || t->content == "-"))
1405 {
1406 if (s->sort_direction)
1407 throw parse_error ("multiple sort directives");
1408 s->sort_direction = (t->content == "+") ? 1 : -1;
1409 s->sort_column = 0;
1410 next();
1411 }
1412
69c68955
FCE
1413 t = next ();
1414 if (! (t->type == tok_operator && t->content == ")"))
1415 throw parse_error ("expected ')'");
1416
1417 s->block = parse_statement ();
1418 return s;
1419}
1420
1421
2f1a1aea
FCE
1422expression*
1423parser::parse_expression ()
1424{
1425 return parse_assignment ();
1426}
1427
2f1a1aea
FCE
1428
1429expression*
1430parser::parse_assignment ()
1431{
1432 expression* op1 = parse_ternary ();
1433
1434 const token* t = peek ();
82919855
FCE
1435 // right-associative operators
1436 if (t && t->type == tok_operator
2f1a1aea 1437 && (t->content == "=" ||
82919855 1438 t->content == "<<<" ||
2f1a1aea 1439 t->content == "+=" ||
bb2e3076
FCE
1440 t->content == "-=" ||
1441 t->content == "*=" ||
1442 t->content == "/=" ||
1443 t->content == "%=" ||
1444 t->content == "<<=" ||
1445 t->content == ">>=" ||
1446 t->content == "&=" ||
1447 t->content == "^=" ||
1448 t->content == "|=" ||
d5d7c2cc 1449 t->content == ".=" ||
bb2e3076 1450 false))
2f1a1aea 1451 {
bb2e3076 1452 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 1453 assignment* e = new assignment;
56099f08 1454 e->left = op1;
2f1a1aea 1455 e->op = t->content;
56099f08 1456 e->tok = t;
2f1a1aea 1457 next ();
82919855 1458 e->right = parse_expression ();
56099f08 1459 op1 = e;
2f1a1aea 1460 }
56099f08
FCE
1461
1462 return op1;
2f1a1aea
FCE
1463}
1464
1465
1466expression*
1467parser::parse_ternary ()
1468{
1469 expression* op1 = parse_logical_or ();
1470
1471 const token* t = peek ();
1472 if (t && t->type == tok_operator && t->content == "?")
1473 {
2f1a1aea 1474 ternary_expression* e = new ternary_expression;
56099f08 1475 e->tok = t;
2f1a1aea 1476 e->cond = op1;
56099f08
FCE
1477 next ();
1478 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
1479
1480 t = next ();
1481 if (! (t->type == tok_operator && t->content == ":"))
1482 throw parse_error ("expected ':'");
1483
56099f08 1484 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
1485 return e;
1486 }
1487 else
1488 return op1;
1489}
1490
1491
1492expression*
1493parser::parse_logical_or ()
1494{
1495 expression* op1 = parse_logical_and ();
1496
1497 const token* t = peek ();
56099f08 1498 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 1499 {
2f1a1aea 1500 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
1501 e->tok = t;
1502 e->op = t->content;
2f1a1aea 1503 e->left = op1;
56099f08
FCE
1504 next ();
1505 e->right = parse_logical_and ();
1506 op1 = e;
1507 t = peek ();
2f1a1aea 1508 }
56099f08
FCE
1509
1510 return op1;
2f1a1aea
FCE
1511}
1512
1513
1514expression*
1515parser::parse_logical_and ()
1516{
bb2e3076 1517 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
1518
1519 const token* t = peek ();
56099f08 1520 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 1521 {
2f1a1aea
FCE
1522 logical_and_expr *e = new logical_and_expr;
1523 e->left = op1;
56099f08
FCE
1524 e->op = t->content;
1525 e->tok = t;
1526 next ();
bb2e3076
FCE
1527 e->right = parse_boolean_or ();
1528 op1 = e;
1529 t = peek ();
1530 }
1531
1532 return op1;
1533}
1534
1535
1536expression*
1537parser::parse_boolean_or ()
1538{
1539 expression* op1 = parse_boolean_xor ();
1540
1541 const token* t = peek ();
1542 while (t && t->type == tok_operator && t->content == "|")
1543 {
1544 binary_expression* e = new binary_expression;
1545 e->left = op1;
1546 e->op = t->content;
1547 e->tok = t;
1548 next ();
1549 e->right = parse_boolean_xor ();
1550 op1 = e;
1551 t = peek ();
1552 }
1553
1554 return op1;
1555}
1556
1557
1558expression*
1559parser::parse_boolean_xor ()
1560{
1561 expression* op1 = parse_boolean_and ();
1562
1563 const token* t = peek ();
1564 while (t && t->type == tok_operator && t->content == "^")
1565 {
1566 binary_expression* e = new binary_expression;
1567 e->left = op1;
1568 e->op = t->content;
1569 e->tok = t;
1570 next ();
1571 e->right = parse_boolean_and ();
1572 op1 = e;
1573 t = peek ();
1574 }
1575
1576 return op1;
1577}
1578
1579
1580expression*
1581parser::parse_boolean_and ()
1582{
1583 expression* op1 = parse_array_in ();
1584
1585 const token* t = peek ();
1586 while (t && t->type == tok_operator && t->content == "&")
1587 {
1588 binary_expression* e = new binary_expression;
1589 e->left = op1;
1590 e->op = t->content;
1591 e->tok = t;
1592 next ();
56099f08
FCE
1593 e->right = parse_array_in ();
1594 op1 = e;
1595 t = peek ();
2f1a1aea 1596 }
56099f08
FCE
1597
1598 return op1;
2f1a1aea
FCE
1599}
1600
1601
1602expression*
1603parser::parse_array_in ()
1604{
ce10591c 1605 // This is a very tricky case. All these are legit expressions:
69c68955 1606 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
1607 vector<expression*> indexes;
1608 bool parenthesized = false;
2f1a1aea
FCE
1609
1610 const token* t = peek ();
69c68955 1611 if (t && t->type == tok_operator && t->content == "[")
ce10591c
FCE
1612 {
1613 next ();
1614 parenthesized = true;
1615 }
1616
1617 while (1)
1618 {
1619 expression* op1 = parse_comparison ();
1620 indexes.push_back (op1);
1621
1622 if (parenthesized)
1623 {
1624 const token* t = peek ();
1625 if (t && t->type == tok_operator && t->content == ",")
1626 {
1627 next ();
1628 continue;
1629 }
69c68955 1630 else if (t && t->type == tok_operator && t->content == "]")
ce10591c
FCE
1631 {
1632 next ();
1633 break;
1634 }
1635 else
69c68955 1636 throw parse_error ("expected ',' or ']'");
ce10591c
FCE
1637 }
1638 else
1639 break; // expecting only one expression
1640 }
1641
1642 t = peek ();
2f1a1aea
FCE
1643 if (t && t->type == tok_identifier && t->content == "in")
1644 {
2f1a1aea 1645 array_in *e = new array_in;
56099f08 1646 e->tok = t;
ce10591c
FCE
1647 next (); // swallow "in"
1648
1649 arrayindex* a = new arrayindex;
1650 a->indexes = indexes;
d02548c0
GH
1651 a->base = parse_indexable();
1652 a->tok = a->base->get_tok();
ce10591c 1653 e->operand = a;
2f1a1aea
FCE
1654 return e;
1655 }
ce10591c
FCE
1656 else if (indexes.size() == 1) // no "in" - need one expression only
1657 return indexes[0];
2f1a1aea 1658 else
ce10591c 1659 throw parse_error ("unexpected comma-separated expression list");
2f1a1aea
FCE
1660}
1661
1662
1663expression*
1664parser::parse_comparison ()
1665{
bb2e3076 1666 expression* op1 = parse_shift ();
2f1a1aea
FCE
1667
1668 const token* t = peek ();
56099f08 1669 while (t && t->type == tok_operator
553d27a5
FCE
1670 && (t->content == ">" ||
1671 t->content == "<" ||
1672 t->content == "==" ||
1673 t->content == "!=" ||
1674 t->content == "<=" ||
bb2e3076 1675 t->content == ">="))
2f1a1aea
FCE
1676 {
1677 comparison* e = new comparison;
1678 e->left = op1;
1679 e->op = t->content;
56099f08 1680 e->tok = t;
2f1a1aea 1681 next ();
bb2e3076
FCE
1682 e->right = parse_shift ();
1683 op1 = e;
1684 t = peek ();
1685 }
1686
1687 return op1;
1688}
1689
1690
1691expression*
1692parser::parse_shift ()
1693{
1694 expression* op1 = parse_concatenation ();
1695
1696 const token* t = peek ();
1697 while (t && t->type == tok_operator &&
1698 (t->content == "<<" || t->content == ">>"))
1699 {
1700 binary_expression* e = new binary_expression;
1701 e->left = op1;
1702 e->op = t->content;
1703 e->tok = t;
1704 next ();
56099f08
FCE
1705 e->right = parse_concatenation ();
1706 op1 = e;
1707 t = peek ();
2f1a1aea 1708 }
56099f08
FCE
1709
1710 return op1;
2f1a1aea
FCE
1711}
1712
1713
1714expression*
1715parser::parse_concatenation ()
1716{
1717 expression* op1 = parse_additive ();
1718
1719 const token* t = peek ();
1720 // XXX: the actual awk string-concatenation operator is *whitespace*.
1721 // I don't know how to easily to model that here.
56099f08 1722 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
1723 {
1724 concatenation* e = new concatenation;
1725 e->left = op1;
1726 e->op = t->content;
56099f08 1727 e->tok = t;
2f1a1aea 1728 next ();
56099f08
FCE
1729 e->right = parse_additive ();
1730 op1 = e;
1731 t = peek ();
2f1a1aea 1732 }
56099f08
FCE
1733
1734 return op1;
2f1a1aea
FCE
1735}
1736
1737
1738expression*
1739parser::parse_additive ()
1740{
1741 expression* op1 = parse_multiplicative ();
1742
1743 const token* t = peek ();
56099f08 1744 while (t && t->type == tok_operator
2f1a1aea
FCE
1745 && (t->content == "+" || t->content == "-"))
1746 {
1747 binary_expression* e = new binary_expression;
1748 e->op = t->content;
1749 e->left = op1;
56099f08 1750 e->tok = t;
2f1a1aea 1751 next ();
56099f08
FCE
1752 e->right = parse_multiplicative ();
1753 op1 = e;
1754 t = peek ();
2f1a1aea 1755 }
56099f08
FCE
1756
1757 return op1;
2f1a1aea
FCE
1758}
1759
1760
1761expression*
1762parser::parse_multiplicative ()
1763{
1764 expression* op1 = parse_unary ();
1765
1766 const token* t = peek ();
56099f08 1767 while (t && t->type == tok_operator
2f1a1aea
FCE
1768 && (t->content == "*" || t->content == "/" || t->content == "%"))
1769 {
1770 binary_expression* e = new binary_expression;
1771 e->op = t->content;
1772 e->left = op1;
56099f08 1773 e->tok = t;
2f1a1aea 1774 next ();
56099f08
FCE
1775 e->right = parse_unary ();
1776 op1 = e;
1777 t = peek ();
2f1a1aea 1778 }
56099f08
FCE
1779
1780 return op1;
2f1a1aea
FCE
1781}
1782
1783
1784expression*
1785parser::parse_unary ()
1786{
1787 const token* t = peek ();
1788 if (t && t->type == tok_operator
bb2e3076
FCE
1789 && (t->content == "+" ||
1790 t->content == "-" ||
1791 t->content == "!" ||
1792 t->content == "~" ||
1793 false))
2f1a1aea
FCE
1794 {
1795 unary_expression* e = new unary_expression;
1796 e->op = t->content;
56099f08 1797 e->tok = t;
2f1a1aea 1798 next ();
3a20432b 1799 e->operand = parse_crement ();
2f1a1aea
FCE
1800 return e;
1801 }
1802 else
bb2e3076 1803 return parse_crement ();
2f1a1aea
FCE
1804}
1805
1806
1807expression*
1808parser::parse_crement () // as in "increment" / "decrement"
1809{
1810 const token* t = peek ();
1811 if (t && t->type == tok_operator
1812 && (t->content == "++" || t->content == "--"))
1813 {
1814 pre_crement* e = new pre_crement;
1815 e->op = t->content;
56099f08 1816 e->tok = t;
2f1a1aea
FCE
1817 next ();
1818 e->operand = parse_value ();
1819 return e;
1820 }
1821
1822 // post-crement or non-crement
1823 expression *op1 = parse_value ();
1824
1825 t = peek ();
1826 if (t && t->type == tok_operator
1827 && (t->content == "++" || t->content == "--"))
1828 {
1829 post_crement* e = new post_crement;
1830 e->op = t->content;
56099f08 1831 e->tok = t;
2f1a1aea
FCE
1832 next ();
1833 e->operand = op1;
1834 return e;
1835 }
1836 else
1837 return op1;
1838}
1839
1840
1841expression*
1842parser::parse_value ()
1843{
1844 const token* t = peek ();
1845 if (! t)
1846 throw parse_error ("expected value");
1847
1848 if (t->type == tok_operator && t->content == "(")
1849 {
1850 next ();
1851 expression* e = parse_expression ();
1852 t = next ();
1853 if (! (t->type == tok_operator && t->content == ")"))
1854 throw parse_error ("expected ')'");
1855 return e;
1856 }
1857 else if (t->type == tok_identifier)
1858 return parse_symbol ();
1859 else
1860 return parse_literal ();
1861}
1862
1863
d02548c0
GH
1864const token *
1865parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
1866{
1867 hop = NULL;
1868 const token* t = expect_ident (name);
1869 if (name == "@hist_linear" || name == "@hist_log")
1870 {
1871 hop = new hist_op;
1872 if (name == "@hist_linear")
1873 hop->htype = hist_linear;
1874 else if (name == "@hist_log")
1875 hop->htype = hist_log;
1876 hop->tok = t;
1877 expect_op("(");
1878 hop->stat = parse_expression ();
1879 int64_t tnum;
1880 if (hop->htype == hist_linear)
1881 {
1882 for (size_t i = 0; i < 3; ++i)
1883 {
1884 expect_op (",");
1885 expect_number (tnum);
1886 hop->params.push_back (tnum);
1887 }
1888 }
1889 else
1890 {
1891 assert(hop->htype == hist_log);
1892 if (peek_op (","))
1893 {
1894 expect_op (",");
1895 expect_number (tnum);
1896 hop->params.push_back (tnum);
1897 }
1898 else
1899 {
1900 // FIXME (magic value): Logarithmic histograms get 64
1901 // buckets by default.
1902 hop->params.push_back (64);
1903 }
1904 }
1905 expect_op(")");
1906 }
1907 return t;
1908}
1909
1910
1911indexable*
1912parser::parse_indexable ()
1913{
1914 hist_op *hop = NULL;
1915 string name;
1916 const token *tok = parse_hist_op_or_bare_name(hop, name);
1917 if (hop)
1918 return hop;
1919 else
1920 {
1921 symbol* sym = new symbol;
1922 sym->name = name;
1923 sym->tok = tok;
1924 return sym;
1925 }
1926}
1927
1928
1929// var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
2f1a1aea 1930expression*
0fefb486 1931parser::parse_symbol ()
2f1a1aea 1932{
d02548c0
GH
1933 hist_op *hop = NULL;
1934 symbol *sym = NULL;
d7f3e0c5 1935 string name;
d02548c0
GH
1936 const token *t = parse_hist_op_or_bare_name(hop, name);
1937
1938 if (!hop)
0fefb486 1939 {
d02548c0
GH
1940 // If we didn't get a hist_op, then we did get an identifier. We can
1941 // now scrutinize this identifier for the various magic forms of identifier
1942 // (printf, @stat_op, and $var...)
1943
1944 if (name.size() > 0 && name[0] == '@')
d7f3e0c5 1945 {
d02548c0
GH
1946 stat_op *sop = new stat_op;
1947 if (name == "@avg")
1948 sop->ctype = sc_average;
1949 else if (name == "@count")
1950 sop->ctype = sc_count;
1951 else if (name == "@sum")
1952 sop->ctype = sc_sum;
1953 else if (name == "@min")
1954 sop->ctype = sc_min;
1955 else if (name == "@max")
1956 sop->ctype = sc_max;
1957 else
1958 throw parse_error("unknown statistic operator " + name);
1959 expect_op("(");
1960 sop->tok = t;
1961 sop->stat = parse_expression ();
1962 expect_op(")");
1963 return sop;
1964 }
1965
1966 else if (name.size() > 0 && (name == "print"
1967 || name == "sprint"
1968 || name == "printf"
1969 || name == "sprintf"))
1970 {
1971 print_format *fmt = new print_format;
1972 fmt->tok = t;
1973 fmt->print_with_format = (name[name.size() - 1] == 'f');
1974 fmt->print_to_stream = (name[0] == 'p');
1975 expect_op("(");
1976 if (fmt->print_with_format)
1977 {
1978 // Consume and convert a format string, and any subsequent
1979 // arguments. Agreement between the format string and the
1980 // arguments is postponed to the typechecking phase.
1981 string tmp;
1982 expect_unknown (tok_string, tmp);
1983 fmt->components = print_format::string_to_components (tmp);
1984 while (!peek_op (")"))
1985 {
1986 expect_op(",");
1987 expression *e = parse_expression ();
1988 fmt->args.push_back(e);
1989 }
d7f3e0c5 1990 }
d7f3e0c5 1991 else
d02548c0
GH
1992 {
1993 // If we are not printing with a format string, we permit
1994 // exactly one argument (of any type).
1995 expression *e = parse_expression ();
1996 fmt->args.push_back(e);
1997 }
1998 expect_op(")");
1999 return fmt;
2000 }
2001
2002 else if (name.size() > 0 && name[0] == '$')
2003 {
2004 // target_symbol time
2005 target_symbol *tsym = new target_symbol;
2006 tsym->tok = t;
2007 tsym->base_name = name;
2008 while (true)
2009 {
2010 string c;
2011 if (peek_op ("->"))
2012 {
2013 next();
2014 expect_ident (c);
2015 tsym->components.push_back
2016 (make_pair (target_symbol::comp_struct_member, c));
2017 }
2018 else if (peek_op ("["))
2019 {
2020 next();
2021 expect_unknown (tok_number, c);
2022 expect_op ("]");
2023 tsym->components.push_back
2024 (make_pair (target_symbol::comp_literal_array_index, c));
2025 }
2026 else
2027 break;
2028 }
2029 return tsym;
2030 }
2031
2032 else if (peek_op ("(")) // function call
2033 {
2034 next ();
2035 struct functioncall* f = new functioncall;
2036 f->tok = t;
2037 f->function = name;
2038 // Allow empty actual parameter list
2039 if (peek_op (")"))
2040 {
2041 next ();
2042 return f;
2043 }
2044 while (1)
2045 {
2046 f->args.push_back (parse_expression ());
2047 if (peek_op (")"))
2048 {
2049 next();
2050 break;
2051 }
2052 else if (peek_op (","))
2053 {
2054 next();
2055 continue;
2056 }
2057 else
2058 throw parse_error ("expected ',' or ')'");
2059 }
2060 return f;
2061 }
2062
2063 else
2064 {
2065 sym = new symbol;
2066 sym->name = name;
2067 sym->tok = t;
d7f3e0c5 2068 }
0fefb486 2069 }
d7f3e0c5 2070
d02548c0
GH
2071 // By now, either we had a hist_op in the first place, or else
2072 // we had a plain word and it was converted to a symbol.
2073
2074 assert (hop || sym);
2075
2076 // All that remains is to check for array indexing
2077
d7f3e0c5 2078 if (peek_op ("[")) // array
2f1a1aea
FCE
2079 {
2080 next ();
2081 struct arrayindex* ai = new arrayindex;
d02548c0
GH
2082 ai->tok = t;
2083
2084 if (hop)
2085 ai->base = hop;
2086 else
2087 ai->base = sym;
2088
2f1a1aea
FCE
2089 while (1)
2090 {
2091 ai->indexes.push_back (parse_expression ());
d7f3e0c5
GH
2092 if (peek_op ("]"))
2093 {
2094 next();
2095 break;
2096 }
2097 else if (peek_op (","))
2098 {
2099 next();
2100 continue;
2101 }
2f1a1aea
FCE
2102 else
2103 throw parse_error ("expected ',' or ']'");
2104 }
2105 return ai;
2106 }
d02548c0
GH
2107
2108 // If we got to here, we *should* have a symbol; if we have
2109 // a hist_op on its own, it doesn't count as an expression,
2110 // so we throw a parse error.
2111
2112 if (hop)
2113 throw parse_error("base histogram operator where expression expected", t);
2114
2115 return sym;
2f1a1aea 2116}
56099f08 2117
This page took 0.245531 seconds and 5 git commands to generate.