]> sourceware.org Git - systemtap.git/blame - parse.cxx
2005-07-28 Martin Hunt <hunt@redhat.com>
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
69c68955
FCE
2// Copyright (C) 2005 Red Hat Inc.
3//
4// This file is part of systemtap, and is free software. You can
5// redistribute it and/or modify it under the terms of the GNU General
6// Public License (GPL); either version 2, or (at your option) any
7// later version.
2f1a1aea 8
2b066ec1 9#include "config.h"
2f1a1aea
FCE
10#include "staptree.h"
11#include "parse.h"
2b066ec1
FCE
12#include <iostream>
13#include <fstream>
2f1a1aea 14#include <cctype>
9c0c0e46 15#include <cstdlib>
9c0c0e46
FCE
16#include <cerrno>
17#include <climits>
2f1a1aea
FCE
18
19using namespace std;
20
21// ------------------------------------------------------------------------
22
bb2e3076
FCE
23
24
2f1a1aea
FCE
25parser::parser (istream& i):
26 input_name ("<input>"), free_input (0), input (i, input_name),
27 last_t (0), next_t (0), num_errors (0)
28{ }
29
30parser::parser (const string& fn):
31 input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
32 input (* free_input, input_name),
33 last_t (0), next_t (0), num_errors (0)
34{ }
35
36parser::~parser()
37{
38 if (free_input) delete free_input;
39}
40
41
82919855
FCE
42stapfile*
43parser::parse (std::istream& i)
44{
45 parser p (i);
46 return p.parse ();
47}
48
49
50stapfile*
51parser::parse (const std::string& n)
52{
53 parser p (n);
54 return p.parse ();
55}
56
57
56099f08
FCE
58ostream&
59operator << (ostream& o, const token& t)
60{
61 o << (t.type == tok_junk ? "junk" :
62 t.type == tok_identifier ? "identifier" :
63 t.type == tok_operator ? "operator" :
64 t.type == tok_string ? "string" :
65 t.type == tok_number ? "number" :
54dfabe9 66 t.type == tok_embedded ? "embedded-code" :
56099f08
FCE
67 "unknown token");
68
54dfabe9 69 // XXX: filter out embedded-code contents?
56099f08
FCE
70 o << " '";
71 for (unsigned i=0; i<t.content.length(); i++)
72 {
73 char c = t.content[i];
74 o << (isprint (c) ? c : '?');
75 }
76 o << "'";
77
78 o << " at "
79 << t.location.file << ":"
80 << t.location.line << ":"
81 << t.location.column;
82
83 return o;
84}
85
86
2f1a1aea
FCE
87void
88parser::print_error (const parse_error &pe)
89{
90 cerr << "parse error: " << pe.what () << endl;
91
92 const token* t = last_t;
93 if (t)
56099f08 94 cerr << "\tsaw: " << *t << endl;
2f1a1aea 95 else
56099f08 96 cerr << "\tsaw: " << input_name << " EOF" << endl;
2f1a1aea
FCE
97
98 // XXX: make it possible to print the last input line,
99 // so as to line up an arrow with the specific error column
100
101 num_errors ++;
102}
103
104
105const token*
106parser::last ()
107{
108 return last_t;
109}
110
111
112const token*
113parser::next ()
114{
115 if (! next_t)
116 next_t = input.scan ();
117 if (! next_t)
118 throw parse_error ("unexpected end-of-file");
119
2f1a1aea
FCE
120 last_t = next_t;
121 // advance by zeroing next_t
122 next_t = 0;
123 return last_t;
124}
125
126
127const token*
128parser::peek ()
129{
130 if (! next_t)
131 next_t = input.scan ();
132
133 // cerr << "{" << (next_t ? next_t->content : "null") << "}";
134
135 // don't advance by zeroing next_t
136 last_t = next_t;
137 return next_t;
138}
139
140
141lexer::lexer (istream& i, const string& in):
142 input (i), input_name (in), cursor_line (1), cursor_column (1)
143{ }
144
bb2e3076
FCE
145
146int
147lexer::input_peek (unsigned n)
148{
149 while (lookahead.size() <= n)
150 {
151 int c = input.get ();
152 lookahead.push_back (input ? c : -1);
153 }
154 return lookahead[n];
155}
156
157
2f1a1aea
FCE
158int
159lexer::input_get ()
160{
bb2e3076
FCE
161 int c = input_peek (0);
162 lookahead.erase (lookahead.begin ());
163
164 if (c < 0) return c; // EOF
165
2f1a1aea
FCE
166 // update source cursor
167 if (c == '\n')
168 {
169 cursor_line ++;
170 cursor_column = 1;
171 }
172 else
173 cursor_column ++;
174
175 return c;
176}
177
178
179token*
180lexer::scan ()
181{
182 token* n = new token;
183 n->location.file = input_name;
184
185 skip:
186 n->location.line = cursor_line;
187 n->location.column = cursor_column;
188
189 int c = input_get();
190 if (c < 0)
191 {
192 delete n;
193 return 0;
194 }
195
196 if (isspace (c))
197 goto skip;
198
54dfabe9 199 else if (isalpha (c) || c == '$' || c == '_')
2f1a1aea
FCE
200 {
201 n->type = tok_identifier;
202 n->content = (char) c;
203 while (1)
204 {
bb2e3076 205 int c2 = input_peek ();
2f1a1aea
FCE
206 if (! input)
207 break;
0fefb486 208 if ((isalnum(c2) || c2 == '_' || c2 == '$'))
2f1a1aea
FCE
209 {
210 n->content.push_back(c2);
211 input_get ();
212 }
213 else
214 break;
215 }
216 return n;
217 }
218
219 else if (isdigit (c))
220 {
2f1a1aea 221 n->type = tok_number;
9c0c0e46
FCE
222 n->content = (char) c;
223
2f1a1aea
FCE
224 while (1)
225 {
bb2e3076 226 int c2 = input_peek ();
2f1a1aea
FCE
227 if (! input)
228 break;
9c0c0e46
FCE
229
230 // NB: isalnum is very permissive. We rely on strtol, called in
231 // parser::parse_literal below, to confirm that the number string
232 // is correctly formatted and in range.
233
234 if (isalnum (c2))
2f1a1aea 235 {
9c0c0e46 236 n->content.push_back (c2);
2f1a1aea
FCE
237 input_get ();
238 }
239 else
240 break;
241 }
242 return n;
243 }
244
245 else if (c == '\"')
246 {
247 n->type = tok_string;
248 while (1)
249 {
250 c = input_get ();
251
252 if (! input || c == '\n')
253 {
254 n->type = tok_junk;
255 break;
256 }
257 if (c == '\"') // closing double-quotes
258 break;
259 else if (c == '\\')
260 {
261 // XXX: handle escape sequences
262 }
263 else
264 n->content.push_back(c);
265 }
266 return n;
267 }
268
269 else if (ispunct (c))
270 {
bb2e3076
FCE
271 int c2 = input_peek ();
272 int c3 = input_peek (1);
273 string s1 = string("") + (char) c;
274 string s2 = (c2 > 0 ? s1 + (char) c2 : s1);
275 string s3 = (c3 > 0 ? s2 + (char) c3 : s2);
2f1a1aea 276
bb2e3076 277 if (s1 == "#") // shell comment
2f1a1aea
FCE
278 {
279 unsigned this_line = cursor_line;
bb2e3076
FCE
280 do { c = input_get (); }
281 while (c >= 0 && cursor_line == this_line);
2f1a1aea
FCE
282 goto skip;
283 }
bb2e3076 284 else if (s2 == "//") // C++ comment
63a7c90e
FCE
285 {
286 unsigned this_line = cursor_line;
bb2e3076
FCE
287 do { c = input_get (); }
288 while (c >= 0 && cursor_line == this_line);
63a7c90e
FCE
289 goto skip;
290 }
291 else if (c == '/' && c2 == '*') // C comment
292 {
293 c2 = input_get ();
294 unsigned chars = 0;
bb2e3076 295 while (c2 >= 0)
63a7c90e
FCE
296 {
297 chars ++; // track this to prevent "/*/" from being accepted
298 c = c2;
299 c2 = input_get ();
300 if (chars > 1 && c == '*' && c2 == '/')
bb2e3076 301 break;
63a7c90e 302 }
bb2e3076 303 goto skip;
63a7c90e 304 }
54dfabe9
FCE
305 else if (c == '%' && c2 == '{') // embedded code
306 {
307 n->type = tok_embedded;
308 (void) input_get (); // swallow '{' already in c2
309 while (true)
310 {
311 c = input_get ();
312 if (c == 0) // EOF
313 {
314 n->type = tok_junk;
315 break;
316 }
317 if (c == '%')
318 {
319 c2 = input_peek ();
320 if (c2 == '}')
321 {
322 (void) input_get (); // swallow '}' too
323 break;
324 }
325 }
326 n->content += c;
327 }
328 return n;
329 }
2f1a1aea 330
bb2e3076
FCE
331 // We're committed to recognizing at least the first character
332 // as an operator.
2f1a1aea 333 n->type = tok_operator;
2f1a1aea 334
bb2e3076
FCE
335 // match all valid operators, in decreasing size order
336 if (s3 == "<<<" ||
337 s3 == "<<=" ||
338 s3 == ">>=")
82919855 339 {
bb2e3076
FCE
340 n->content = s3;
341 input_get (); input_get (); // swallow other two characters
342 }
343 else if (s2 == "==" ||
344 s2 == "!=" ||
345 s2 == "<=" ||
346 s2 == ">=" ||
347 s2 == "+=" ||
348 s2 == "-=" ||
349 s2 == "*=" ||
350 s2 == "/=" ||
351 s2 == "%=" ||
352 s2 == "&=" ||
353 s2 == "^=" ||
354 s2 == "|=" ||
355 s2 == "&&" ||
356 s2 == "||" ||
357 s2 == "++" ||
358 s2 == "--" ||
359 s2 == "->" ||
360 s2 == "<<" ||
361 s2 == ">>")
362 {
363 n->content = s2;
364 input_get (); // swallow other character
365 }
366 else
367 {
368 n->content = s1;
82919855 369 }
2f1a1aea
FCE
370
371 return n;
372 }
373
374 else
375 {
376 n->type = tok_junk;
377 n->content = (char) c;
378 return n;
379 }
380}
381
382
383// ------------------------------------------------------------------------
384
385stapfile*
386parser::parse ()
387{
388 stapfile* f = new stapfile;
389 f->name = input_name;
56099f08
FCE
390
391 bool empty = true;
392
2f1a1aea
FCE
393 while (1)
394 {
395 try
396 {
397 const token* t = peek ();
56099f08 398 if (! t) // nice clean EOF
2f1a1aea
FCE
399 break;
400
56099f08 401 empty = false;
2f1a1aea 402 if (t->type == tok_identifier && t->content == "probe")
54dfabe9 403 parse_probe (f->probes, f->aliases);
2f1a1aea 404 else if (t->type == tok_identifier && t->content == "global")
82919855 405 parse_global (f->globals);
56099f08 406 else if (t->type == tok_identifier && t->content == "function")
82919855 407 f->functions.push_back (parse_functiondecl ());
54dfabe9
FCE
408 else if (t->type == tok_embedded)
409 f->embeds.push_back (parse_embeddedcode ());
2f1a1aea 410 else
54dfabe9 411 throw parse_error ("expected 'probe', 'global', 'function', or embedded code");
2f1a1aea
FCE
412 }
413 catch (parse_error& pe)
414 {
415 print_error (pe);
416 // Quietly swallow all tokens until the next '}'.
417 while (1)
418 {
419 const token* t = peek ();
420 if (! t)
421 break;
422 next ();
423 if (t->type == tok_operator && t->content == "}")
424 break;
425 }
426 }
427 }
428
56099f08
FCE
429 if (empty)
430 {
431 cerr << "Input file '" << input_name << "' is empty or missing." << endl;
432 delete f;
433 return 0;
434 }
435 else if (num_errors > 0)
2f1a1aea
FCE
436 {
437 cerr << num_errors << " parse error(s)." << endl;
438 delete f;
56099f08 439 return 0;
2f1a1aea
FCE
440 }
441
442 return f;
443}
444
445
20c6c071 446void
54dfabe9
FCE
447parser::parse_probe (std::vector<probe *> & probe_ret,
448 std::vector<probe_alias *> & alias_ret)
2f1a1aea 449{
82919855
FCE
450 const token* t0 = next ();
451 if (! (t0->type == tok_identifier && t0->content == "probe"))
452 throw parse_error ("expected 'probe'");
453
20c6c071
GH
454 vector<probe_point *> aliases;
455 vector<probe_point *> locations;
456
457 bool equals_ok = true;
82919855 458
2f1a1aea
FCE
459 while (1)
460 {
461 const token *t = peek ();
462 if (t && t->type == tok_identifier)
463 {
20c6c071 464 probe_point * pp = parse_probe_point ();
2f1a1aea 465
56099f08 466 t = peek ();
20c6c071
GH
467 if (equals_ok && t
468 && t->type == tok_operator && t->content == "=")
56099f08 469 {
20c6c071
GH
470 aliases.push_back(pp);
471 next ();
472 continue;
473 }
474 else if (t && t->type == tok_operator && t->content == ",")
475 {
476 locations.push_back(pp);
477 equals_ok = false;
56099f08
FCE
478 next ();
479 continue;
480 }
481 else if (t && t->type == tok_operator && t->content == "{")
20c6c071
GH
482 {
483 locations.push_back(pp);
484 break;
485 }
2f1a1aea 486 else
9c0c0e46 487 throw parse_error ("expected ',' or '{'");
2f1a1aea
FCE
488 // XXX: unify logic with that in parse_symbol()
489 }
490 else
9c0c0e46 491 throw parse_error ("expected probe point specifier");
2f1a1aea 492 }
20c6c071 493
20c6c071
GH
494 if (aliases.empty())
495 {
54dfabe9
FCE
496 probe* p = new probe;
497 p->tok = t0;
498 p->locations = locations;
499 p->body = parse_stmt_block ();
500 probe_ret.push_back (p);
20c6c071
GH
501 }
502 else
503 {
54dfabe9
FCE
504 probe_alias* p = new probe_alias (aliases);
505 p->tok = t0;
506 p->locations = locations;
507 p->body = parse_stmt_block ();
508 alias_ret.push_back (p);
20c6c071 509 }
54dfabe9 510}
20c6c071 511
54dfabe9
FCE
512
513embeddedcode*
514parser::parse_embeddedcode ()
515{
516 embeddedcode* e = new embeddedcode;
517 const token* t = next ();
518 if (t->type != tok_embedded)
519 throw parse_error ("expected embedded code");
520
521 e->tok = t;
522 e->code = t->content;
523 return e;
2f1a1aea
FCE
524}
525
526
527block*
56099f08 528parser::parse_stmt_block ()
2f1a1aea
FCE
529{
530 block* pb = new block;
531
56099f08
FCE
532 const token* t = next ();
533 if (! (t->type == tok_operator && t->content == "{"))
534 throw parse_error ("expected '{'");
535
536 pb->tok = t;
2b066ec1 537
2f1a1aea
FCE
538 while (1)
539 {
540 try
541 {
2b066ec1
FCE
542 t = peek ();
543 if (t && t->type == tok_operator && t->content == "}")
544 {
545 next ();
546 break;
547 }
548
2f1a1aea 549 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
550 }
551 catch (parse_error& pe)
552 {
553 print_error (pe);
54dfabe9 554
2f1a1aea
FCE
555 // Quietly swallow all tokens until the next ';' or '}'.
556 while (1)
557 {
558 const token* t = peek ();
54dfabe9 559 if (! t) return 0;
2f1a1aea 560 next ();
54dfabe9
FCE
561 if (t->type == tok_operator
562 && (t->content == "}" || t->content == ";"))
2f1a1aea
FCE
563 break;
564 }
565 }
566 }
567
568 return pb;
569}
570
571
572statement*
573parser::parse_statement ()
574{
575 const token* t = peek ();
576 if (t && t->type == tok_operator && t->content == ";")
577 {
69c68955
FCE
578 null_statement* n = new null_statement ();
579 n->tok = next ();
580 return n;
2f1a1aea
FCE
581 }
582 else if (t && t->type == tok_operator && t->content == "{")
56099f08 583 return parse_stmt_block ();
2f1a1aea 584 else if (t && t->type == tok_identifier && t->content == "if")
56099f08 585 return parse_if_statement ();
69c68955
FCE
586 else if (t && t->type == tok_identifier && t->content == "for")
587 return parse_for_loop ();
69c68955
FCE
588 else if (t && t->type == tok_identifier && t->content == "foreach")
589 return parse_foreach_loop ();
56099f08
FCE
590 else if (t && t->type == tok_identifier && t->content == "return")
591 return parse_return_statement ();
592 else if (t && t->type == tok_identifier && t->content == "delete")
593 return parse_delete_statement ();
f3c26ea5
FCE
594 else if (t && t->type == tok_identifier && t->content == "while")
595 return parse_while_loop ();
596 else if (t && t->type == tok_identifier && t->content == "break")
597 return parse_break_statement ();
598 else if (t && t->type == tok_identifier && t->content == "continue")
599 return parse_continue_statement ();
600 else if (t && t->type == tok_identifier && t->content == "next")
601 return parse_next_statement ();
602 // XXX: "do/while" statement?
2f1a1aea
FCE
603 else if (t && (t->type == tok_operator || // expressions are flexible
604 t->type == tok_identifier ||
605 t->type == tok_number ||
606 t->type == tok_string))
69c68955 607 return parse_expr_statement ();
54dfabe9 608 // XXX: consider generally accepting tok_embedded here too
2f1a1aea
FCE
609 else
610 throw parse_error ("expected statement");
611}
612
613
56099f08
FCE
614void
615parser::parse_global (vector <vardecl*>& globals)
2f1a1aea 616{
82919855
FCE
617 const token* t0 = next ();
618 if (! (t0->type == tok_identifier && t0->content == "global"))
619 throw parse_error ("expected 'global'");
620
56099f08
FCE
621 while (1)
622 {
623 const token* t = next ();
624 if (! (t->type == tok_identifier))
625 throw parse_error ("expected identifier");
626
2b066ec1
FCE
627 bool dupe = false;
628 for (unsigned i=0; i<globals.size(); i++)
629 if (globals[i]->name == t->content)
630 dupe = true;
631
632 if (! dupe)
633 {
634 vardecl* d = new vardecl;
635 d->name = t->content;
636 d->tok = t;
637 globals.push_back (d);
638 }
56099f08 639
82919855
FCE
640 t = peek ();
641 if (t && t->type == tok_operator && t->content == ",")
642 {
643 next ();
644 continue;
645 }
56099f08 646 else
82919855 647 break;
56099f08
FCE
648 }
649}
650
651
652functiondecl*
653parser::parse_functiondecl ()
654{
82919855
FCE
655 const token* t = next ();
656 if (! (t->type == tok_identifier && t->content == "function"))
657 throw parse_error ("expected 'function'");
658
56099f08
FCE
659 functiondecl *fd = new functiondecl ();
660
82919855 661 t = next ();
56099f08
FCE
662 if (! (t->type == tok_identifier))
663 throw parse_error ("expected identifier");
664 fd->name = t->content;
665 fd->tok = t;
666
667 t = next ();
668 if (! (t->type == tok_operator && t->content == "("))
669 throw parse_error ("expected '('");
670
671 while (1)
672 {
673 t = next ();
674
675 // permit zero-argument fuctions
676 if (t->type == tok_operator && t->content == ")")
677 break;
678 else if (! (t->type == tok_identifier))
679 throw parse_error ("expected identifier");
680 vardecl* vd = new vardecl;
681 vd->name = t->content;
682 vd->tok = t;
683 fd->formal_args.push_back (vd);
684
685 t = next ();
686 if (t->type == tok_operator && t->content == ")")
687 break;
688 if (t->type == tok_operator && t->content == ",")
689 continue;
690 else
691 throw parse_error ("expected ',' or ')'");
692 }
693
54dfabe9
FCE
694 t = peek ();
695 if (t && t->type == tok_embedded)
696 fd->body = parse_embeddedcode ();
697 else
698 fd->body = parse_stmt_block ();
56099f08 699 return fd;
2f1a1aea
FCE
700}
701
702
9c0c0e46
FCE
703probe_point*
704parser::parse_probe_point ()
2f1a1aea 705{
9c0c0e46 706 probe_point* pl = new probe_point;
2f1a1aea 707
2b066ec1 708 // XXX: add support for probe point aliases
f4b28491 709 // e.g. probe alias = foo { ... }
9c0c0e46 710 while (1)
2f1a1aea 711 {
9c0c0e46
FCE
712 const token* t = next ();
713 if (t->type != tok_identifier)
714 throw parse_error ("expected identifier");
715
716 if (pl->tok == 0) pl->tok = t;
717
718 probe_point::component* c = new probe_point::component;
719 c->functor = t->content;
720 pl->components.push_back (c);
721 // NB though we still may add c->arg soon
722
723 t = peek ();
724 if (t && t->type == tok_operator
20c6c071 725 && (t->content == "{" || t->content == "," || t->content == "="))
9c0c0e46
FCE
726 break;
727
728 if (t && t->type == tok_operator && t->content == "(")
729 {
730 next (); // consume "("
731 c->arg = parse_literal ();
732
733 t = next ();
734 if (! (t->type == tok_operator && t->content == ")"))
735 throw parse_error ("expected ')'");
736
737 t = peek ();
738 if (t && t->type == tok_operator
20c6c071 739 && (t->content == "{" || t->content == "," || t->content == "="))
9c0c0e46 740 break;
2b066ec1
FCE
741 else if (t && t->type == tok_operator &&
742 t->content == "(")
743 throw parse_error ("unexpected '.' or ',' or '{'");
9c0c0e46
FCE
744 }
745 // fall through
746
747 if (t && t->type == tok_operator && t->content == ".")
748 next ();
749 else
20c6c071 750 throw parse_error ("expected '.' or ',' or '(' or '{' or '='");
2f1a1aea
FCE
751 }
752
753 return pl;
754}
755
756
757literal*
758parser::parse_literal ()
759{
760 const token* t = next ();
56099f08 761 literal* l;
2f1a1aea 762 if (t->type == tok_string)
56099f08 763 l = new literal_string (t->content);
2f1a1aea 764 else if (t->type == tok_number)
9c0c0e46
FCE
765 {
766 const char* startp = t->content.c_str ();
767 char* endp = (char*) startp;
768
769 // NB: we allow controlled overflow from LONG_MIN .. ULONG_MAX
770 errno = 0;
771 long long value = strtoll (startp, & endp, 0);
772 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
3f43362a 773 || value > 4294967295LL || value < (-2147483647LL-1))
9c0c0e46
FCE
774 throw parse_error ("number invalid or out of range");
775
776 long value2 = (long) value;
777 l = new literal_number (value2);
778 }
2f1a1aea
FCE
779 else
780 throw parse_error ("expected literal string or number");
56099f08
FCE
781
782 l->tok = t;
783 return l;
2f1a1aea
FCE
784}
785
786
787if_statement*
788parser::parse_if_statement ()
789{
790 const token* t = next ();
56099f08
FCE
791 if (! (t->type == tok_identifier && t->content == "if"))
792 throw parse_error ("expected 'if'");
793 if_statement* s = new if_statement;
794 s->tok = t;
795
796 t = next ();
2f1a1aea
FCE
797 if (! (t->type == tok_operator && t->content == "("))
798 throw parse_error ("expected '('");
799
2f1a1aea
FCE
800 s->condition = parse_expression ();
801
802 t = next ();
803 if (! (t->type == tok_operator && t->content == ")"))
804 throw parse_error ("expected ')'");
805
806 s->thenblock = parse_statement ();
807
808 t = peek ();
809 if (t && t->type == tok_identifier && t->content == "else")
810 {
811 next ();
812 s->elseblock = parse_statement ();
813 }
814
815 return s;
816}
817
818
69c68955
FCE
819expr_statement*
820parser::parse_expr_statement ()
821{
822 expr_statement *es = new expr_statement;
823 const token* t = peek ();
824 es->tok = t;
825 es->value = parse_expression ();
826 return es;
827}
828
829
56099f08
FCE
830return_statement*
831parser::parse_return_statement ()
832{
833 const token* t = next ();
834 if (! (t->type == tok_identifier && t->content == "return"))
835 throw parse_error ("expected 'return'");
836 return_statement* s = new return_statement;
837 s->tok = t;
838 s->value = parse_expression ();
839 return s;
840}
841
842
843delete_statement*
844parser::parse_delete_statement ()
845{
846 const token* t = next ();
847 if (! (t->type == tok_identifier && t->content == "delete"))
848 throw parse_error ("expected 'delete'");
849 delete_statement* s = new delete_statement;
850 s->tok = t;
851 s->value = parse_expression ();
852 return s;
853}
854
855
f3c26ea5
FCE
856next_statement*
857parser::parse_next_statement ()
858{
859 const token* t = next ();
860 if (! (t->type == tok_identifier && t->content == "next"))
861 throw parse_error ("expected 'next'");
862 next_statement* s = new next_statement;
863 s->tok = t;
864 return s;
865}
866
867
868break_statement*
869parser::parse_break_statement ()
870{
871 const token* t = next ();
872 if (! (t->type == tok_identifier && t->content == "break"))
873 throw parse_error ("expected 'break'");
874 break_statement* s = new break_statement;
875 s->tok = t;
876 return s;
877}
878
879
880continue_statement*
881parser::parse_continue_statement ()
882{
883 const token* t = next ();
884 if (! (t->type == tok_identifier && t->content == "continue"))
885 throw parse_error ("expected 'continue'");
886 continue_statement* s = new continue_statement;
887 s->tok = t;
888 return s;
889}
890
891
69c68955
FCE
892for_loop*
893parser::parse_for_loop ()
894{
f3c26ea5
FCE
895 const token* t = next ();
896 if (! (t->type == tok_identifier && t->content == "for"))
897 throw parse_error ("expected 'for'");
898 for_loop* s = new for_loop;
899 s->tok = t;
900
901 t = next ();
902 if (! (t->type == tok_operator && t->content == "("))
903 throw parse_error ("expected '('");
904
905 // initializer + ";"
906 t = peek ();
907 if (t && t->type == tok_operator && t->content == ";")
908 {
909 literal_number* l = new literal_number(0);
910 expr_statement* es = new expr_statement;
911 es->value = l;
912 s->init = es;
913 es->value->tok = es->tok = next ();
914 }
915 else
916 {
917 s->init = parse_expr_statement ();
918 t = next ();
919 if (! (t->type == tok_operator && t->content == ";"))
920 throw parse_error ("expected ';'");
921 }
922
923 // condition + ";"
924 t = peek ();
925 if (t && t->type == tok_operator && t->content == ";")
926 {
927 literal_number* l = new literal_number(1);
928 s->cond = l;
929 s->cond->tok = next ();
930 }
931 else
932 {
933 s->cond = parse_expression ();
934 t = next ();
935 if (! (t->type == tok_operator && t->content == ";"))
936 throw parse_error ("expected ';'");
937 }
938
939 // increment + ")"
940 t = peek ();
941 if (t && t->type == tok_operator && t->content == ")")
942 {
943 literal_number* l = new literal_number(2);
944 expr_statement* es = new expr_statement;
945 es->value = l;
946 s->incr = es;
947 es->value->tok = es->tok = next ();
948 }
949 else
950 {
951 s->incr = parse_expr_statement ();
952 t = next ();
953 if (! (t->type == tok_operator && t->content == ")"))
954 throw parse_error ("expected ';'");
955 }
956
957 // block
958 s->block = parse_statement ();
959
960 return s;
961}
962
963
964for_loop*
965parser::parse_while_loop ()
966{
967 const token* t = next ();
968 if (! (t->type == tok_identifier && t->content == "while"))
969 throw parse_error ("expected 'while'");
970 for_loop* s = new for_loop;
971 s->tok = t;
972
973 t = next ();
974 if (! (t->type == tok_operator && t->content == "("))
975 throw parse_error ("expected '('");
976
977 // dummy init and incr fields
978 literal_number* l = new literal_number(0);
979 expr_statement* es = new expr_statement;
980 es->value = l;
981 s->init = es;
982 es->value->tok = es->tok = t;
983
984 l = new literal_number(2);
985 es = new expr_statement;
986 es->value = l;
987 s->incr = es;
988 es->value->tok = es->tok = t;
989
990
991 // condition
992 s->cond = parse_expression ();
993
994
995 t = next ();
996 if (! (t->type == tok_operator && t->content == ")"))
997 throw parse_error ("expected ')'");
998
999 // block
1000 s->block = parse_statement ();
1001
1002 return s;
69c68955
FCE
1003}
1004
1005
1006foreach_loop*
1007parser::parse_foreach_loop ()
1008{
1009 const token* t = next ();
1010 if (! (t->type == tok_identifier && t->content == "foreach"))
1011 throw parse_error ("expected 'foreach'");
1012 foreach_loop* s = new foreach_loop;
1013 s->tok = t;
1014
1015 t = next ();
1016 if (! (t->type == tok_operator && t->content == "("))
1017 throw parse_error ("expected '('");
1018
1019 // see also parse_array_in
1020
1021 bool parenthesized = false;
1022 t = peek ();
1023 if (t && t->type == tok_operator && t->content == "[")
1024 {
1025 next ();
1026 parenthesized = true;
1027 }
1028
1029 while (1)
1030 {
1031 t = next ();
1032 if (! (t->type == tok_identifier))
1033 throw parse_error ("expected identifier");
1034 symbol* sym = new symbol;
1035 sym->tok = t;
1036 sym->name = t->content;
1037 s->indexes.push_back (sym);
1038
1039 if (parenthesized)
1040 {
1041 const token* t = peek ();
1042 if (t && t->type == tok_operator && t->content == ",")
1043 {
1044 next ();
1045 continue;
1046 }
1047 else if (t && t->type == tok_operator && t->content == "]")
1048 {
1049 next ();
1050 break;
1051 }
1052 else
1053 throw parse_error ("expected ',' or ']'");
1054 }
1055 else
1056 break; // expecting only one expression
1057 }
1058
1059 t = next ();
1060 if (! (t->type == tok_identifier && t->content == "in"))
1061 throw parse_error ("expected 'in'");
1062
1063 t = next ();
1064 if (t->type != tok_identifier)
1065 throw parse_error ("expected identifier");
1066 s->base = t->content;
1067
1068 t = next ();
1069 if (! (t->type == tok_operator && t->content == ")"))
1070 throw parse_error ("expected ')'");
1071
1072 s->block = parse_statement ();
1073 return s;
1074}
1075
1076
2f1a1aea
FCE
1077expression*
1078parser::parse_expression ()
1079{
1080 return parse_assignment ();
1081}
1082
2f1a1aea
FCE
1083
1084expression*
1085parser::parse_assignment ()
1086{
1087 expression* op1 = parse_ternary ();
1088
1089 const token* t = peek ();
82919855
FCE
1090 // right-associative operators
1091 if (t && t->type == tok_operator
2f1a1aea 1092 && (t->content == "=" ||
82919855 1093 t->content == "<<<" ||
2f1a1aea 1094 t->content == "+=" ||
bb2e3076
FCE
1095 t->content == "-=" ||
1096 t->content == "*=" ||
1097 t->content == "/=" ||
1098 t->content == "%=" ||
1099 t->content == "<<=" ||
1100 t->content == ">>=" ||
1101 t->content == "&=" ||
1102 t->content == "^=" ||
1103 t->content == "|=" ||
1104 false))
2f1a1aea 1105 {
bb2e3076 1106 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 1107 assignment* e = new assignment;
56099f08 1108 e->left = op1;
2f1a1aea 1109 e->op = t->content;
56099f08 1110 e->tok = t;
2f1a1aea 1111 next ();
82919855 1112 e->right = parse_expression ();
56099f08 1113 op1 = e;
2f1a1aea 1114 }
56099f08
FCE
1115
1116 return op1;
2f1a1aea
FCE
1117}
1118
1119
1120expression*
1121parser::parse_ternary ()
1122{
1123 expression* op1 = parse_logical_or ();
1124
1125 const token* t = peek ();
1126 if (t && t->type == tok_operator && t->content == "?")
1127 {
2f1a1aea 1128 ternary_expression* e = new ternary_expression;
56099f08 1129 e->tok = t;
2f1a1aea 1130 e->cond = op1;
56099f08
FCE
1131 next ();
1132 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
1133
1134 t = next ();
1135 if (! (t->type == tok_operator && t->content == ":"))
1136 throw parse_error ("expected ':'");
1137
56099f08 1138 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
1139 return e;
1140 }
1141 else
1142 return op1;
1143}
1144
1145
1146expression*
1147parser::parse_logical_or ()
1148{
1149 expression* op1 = parse_logical_and ();
1150
1151 const token* t = peek ();
56099f08 1152 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 1153 {
2f1a1aea 1154 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
1155 e->tok = t;
1156 e->op = t->content;
2f1a1aea 1157 e->left = op1;
56099f08
FCE
1158 next ();
1159 e->right = parse_logical_and ();
1160 op1 = e;
1161 t = peek ();
2f1a1aea 1162 }
56099f08
FCE
1163
1164 return op1;
2f1a1aea
FCE
1165}
1166
1167
1168expression*
1169parser::parse_logical_and ()
1170{
bb2e3076 1171 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
1172
1173 const token* t = peek ();
56099f08 1174 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 1175 {
2f1a1aea
FCE
1176 logical_and_expr *e = new logical_and_expr;
1177 e->left = op1;
56099f08
FCE
1178 e->op = t->content;
1179 e->tok = t;
1180 next ();
bb2e3076
FCE
1181 e->right = parse_boolean_or ();
1182 op1 = e;
1183 t = peek ();
1184 }
1185
1186 return op1;
1187}
1188
1189
1190expression*
1191parser::parse_boolean_or ()
1192{
1193 expression* op1 = parse_boolean_xor ();
1194
1195 const token* t = peek ();
1196 while (t && t->type == tok_operator && t->content == "|")
1197 {
1198 binary_expression* e = new binary_expression;
1199 e->left = op1;
1200 e->op = t->content;
1201 e->tok = t;
1202 next ();
1203 e->right = parse_boolean_xor ();
1204 op1 = e;
1205 t = peek ();
1206 }
1207
1208 return op1;
1209}
1210
1211
1212expression*
1213parser::parse_boolean_xor ()
1214{
1215 expression* op1 = parse_boolean_and ();
1216
1217 const token* t = peek ();
1218 while (t && t->type == tok_operator && t->content == "^")
1219 {
1220 binary_expression* e = new binary_expression;
1221 e->left = op1;
1222 e->op = t->content;
1223 e->tok = t;
1224 next ();
1225 e->right = parse_boolean_and ();
1226 op1 = e;
1227 t = peek ();
1228 }
1229
1230 return op1;
1231}
1232
1233
1234expression*
1235parser::parse_boolean_and ()
1236{
1237 expression* op1 = parse_array_in ();
1238
1239 const token* t = peek ();
1240 while (t && t->type == tok_operator && t->content == "&")
1241 {
1242 binary_expression* e = new binary_expression;
1243 e->left = op1;
1244 e->op = t->content;
1245 e->tok = t;
1246 next ();
56099f08
FCE
1247 e->right = parse_array_in ();
1248 op1 = e;
1249 t = peek ();
2f1a1aea 1250 }
56099f08
FCE
1251
1252 return op1;
2f1a1aea
FCE
1253}
1254
1255
1256expression*
1257parser::parse_array_in ()
1258{
ce10591c 1259 // This is a very tricky case. All these are legit expressions:
69c68955 1260 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
1261 vector<expression*> indexes;
1262 bool parenthesized = false;
2f1a1aea
FCE
1263
1264 const token* t = peek ();
69c68955 1265 if (t && t->type == tok_operator && t->content == "[")
ce10591c
FCE
1266 {
1267 next ();
1268 parenthesized = true;
1269 }
1270
1271 while (1)
1272 {
1273 expression* op1 = parse_comparison ();
1274 indexes.push_back (op1);
1275
1276 if (parenthesized)
1277 {
1278 const token* t = peek ();
1279 if (t && t->type == tok_operator && t->content == ",")
1280 {
1281 next ();
1282 continue;
1283 }
69c68955 1284 else if (t && t->type == tok_operator && t->content == "]")
ce10591c
FCE
1285 {
1286 next ();
1287 break;
1288 }
1289 else
69c68955 1290 throw parse_error ("expected ',' or ']'");
ce10591c
FCE
1291 }
1292 else
1293 break; // expecting only one expression
1294 }
1295
1296 t = peek ();
2f1a1aea
FCE
1297 if (t && t->type == tok_identifier && t->content == "in")
1298 {
2f1a1aea 1299 array_in *e = new array_in;
56099f08 1300 e->tok = t;
ce10591c
FCE
1301 next (); // swallow "in"
1302
1303 arrayindex* a = new arrayindex;
1304 a->indexes = indexes;
1305
1306 t = next ();
1307 if (t->type != tok_identifier)
1308 throw parse_error ("expected identifier");
1309 a->tok = t;
1310 a->base = t->content;
1311
1312 e->operand = a;
2f1a1aea
FCE
1313 return e;
1314 }
ce10591c
FCE
1315 else if (indexes.size() == 1) // no "in" - need one expression only
1316 return indexes[0];
2f1a1aea 1317 else
ce10591c 1318 throw parse_error ("unexpected comma-separated expression list");
2f1a1aea
FCE
1319}
1320
1321
1322expression*
1323parser::parse_comparison ()
1324{
bb2e3076 1325 expression* op1 = parse_shift ();
2f1a1aea
FCE
1326
1327 const token* t = peek ();
56099f08 1328 while (t && t->type == tok_operator
553d27a5
FCE
1329 && (t->content == ">" ||
1330 t->content == "<" ||
1331 t->content == "==" ||
1332 t->content == "!=" ||
1333 t->content == "<=" ||
bb2e3076 1334 t->content == ">="))
2f1a1aea
FCE
1335 {
1336 comparison* e = new comparison;
1337 e->left = op1;
1338 e->op = t->content;
56099f08 1339 e->tok = t;
2f1a1aea 1340 next ();
bb2e3076
FCE
1341 e->right = parse_shift ();
1342 op1 = e;
1343 t = peek ();
1344 }
1345
1346 return op1;
1347}
1348
1349
1350expression*
1351parser::parse_shift ()
1352{
1353 expression* op1 = parse_concatenation ();
1354
1355 const token* t = peek ();
1356 while (t && t->type == tok_operator &&
1357 (t->content == "<<" || t->content == ">>"))
1358 {
1359 binary_expression* e = new binary_expression;
1360 e->left = op1;
1361 e->op = t->content;
1362 e->tok = t;
1363 next ();
56099f08
FCE
1364 e->right = parse_concatenation ();
1365 op1 = e;
1366 t = peek ();
2f1a1aea 1367 }
56099f08
FCE
1368
1369 return op1;
2f1a1aea
FCE
1370}
1371
1372
1373expression*
1374parser::parse_concatenation ()
1375{
1376 expression* op1 = parse_additive ();
1377
1378 const token* t = peek ();
1379 // XXX: the actual awk string-concatenation operator is *whitespace*.
1380 // I don't know how to easily to model that here.
56099f08 1381 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
1382 {
1383 concatenation* e = new concatenation;
1384 e->left = op1;
1385 e->op = t->content;
56099f08 1386 e->tok = t;
2f1a1aea 1387 next ();
56099f08
FCE
1388 e->right = parse_additive ();
1389 op1 = e;
1390 t = peek ();
2f1a1aea 1391 }
56099f08
FCE
1392
1393 return op1;
2f1a1aea
FCE
1394}
1395
1396
1397expression*
1398parser::parse_additive ()
1399{
1400 expression* op1 = parse_multiplicative ();
1401
1402 const token* t = peek ();
56099f08 1403 while (t && t->type == tok_operator
2f1a1aea
FCE
1404 && (t->content == "+" || t->content == "-"))
1405 {
1406 binary_expression* e = new binary_expression;
1407 e->op = t->content;
1408 e->left = op1;
56099f08 1409 e->tok = t;
2f1a1aea 1410 next ();
56099f08
FCE
1411 e->right = parse_multiplicative ();
1412 op1 = e;
1413 t = peek ();
2f1a1aea 1414 }
56099f08
FCE
1415
1416 return op1;
2f1a1aea
FCE
1417}
1418
1419
1420expression*
1421parser::parse_multiplicative ()
1422{
1423 expression* op1 = parse_unary ();
1424
1425 const token* t = peek ();
56099f08 1426 while (t && t->type == tok_operator
2f1a1aea
FCE
1427 && (t->content == "*" || t->content == "/" || t->content == "%"))
1428 {
1429 binary_expression* e = new binary_expression;
1430 e->op = t->content;
1431 e->left = op1;
56099f08 1432 e->tok = t;
2f1a1aea 1433 next ();
56099f08
FCE
1434 e->right = parse_unary ();
1435 op1 = e;
1436 t = peek ();
2f1a1aea 1437 }
56099f08
FCE
1438
1439 return op1;
2f1a1aea
FCE
1440}
1441
1442
1443expression*
1444parser::parse_unary ()
1445{
1446 const token* t = peek ();
1447 if (t && t->type == tok_operator
bb2e3076
FCE
1448 && (t->content == "+" ||
1449 t->content == "-" ||
1450 t->content == "!" ||
1451 t->content == "~" ||
1452 false))
2f1a1aea
FCE
1453 {
1454 unary_expression* e = new unary_expression;
1455 e->op = t->content;
56099f08 1456 e->tok = t;
2f1a1aea
FCE
1457 next ();
1458 e->operand = parse_expression ();
1459 return e;
1460 }
1461 else
bb2e3076 1462 return parse_crement ();
2f1a1aea
FCE
1463}
1464
1465
1466expression*
1467parser::parse_crement () // as in "increment" / "decrement"
1468{
1469 const token* t = peek ();
1470 if (t && t->type == tok_operator
1471 && (t->content == "++" || t->content == "--"))
1472 {
1473 pre_crement* e = new pre_crement;
1474 e->op = t->content;
56099f08 1475 e->tok = t;
2f1a1aea
FCE
1476 next ();
1477 e->operand = parse_value ();
1478 return e;
1479 }
1480
1481 // post-crement or non-crement
1482 expression *op1 = parse_value ();
1483
1484 t = peek ();
1485 if (t && t->type == tok_operator
1486 && (t->content == "++" || t->content == "--"))
1487 {
1488 post_crement* e = new post_crement;
1489 e->op = t->content;
56099f08 1490 e->tok = t;
2f1a1aea
FCE
1491 next ();
1492 e->operand = op1;
1493 return e;
1494 }
1495 else
1496 return op1;
1497}
1498
1499
1500expression*
1501parser::parse_value ()
1502{
1503 const token* t = peek ();
1504 if (! t)
1505 throw parse_error ("expected value");
1506
1507 if (t->type == tok_operator && t->content == "(")
1508 {
1509 next ();
1510 expression* e = parse_expression ();
1511 t = next ();
1512 if (! (t->type == tok_operator && t->content == ")"))
1513 throw parse_error ("expected ')'");
1514 return e;
1515 }
1516 else if (t->type == tok_identifier)
1517 return parse_symbol ();
1518 else
1519 return parse_literal ();
1520}
1521
1522
0fefb486 1523// var, var[index], func(parms), thread->var, process->var
2f1a1aea 1524expression*
0fefb486 1525parser::parse_symbol ()
2f1a1aea
FCE
1526{
1527 const token* t = next ();
1528 if (t->type != tok_identifier)
1529 throw parse_error ("expected identifier");
56099f08 1530 const token* t2 = t;
2f1a1aea 1531 string name = t->content;
56099f08 1532
2f1a1aea 1533 t = peek ();
0fefb486
FCE
1534 if (t && t->type == tok_operator && t->content == "->")
1535 {
1536 // shorthand for process- or thread-specific array element
1537 // map "thread->VAR" to "VAR[$tid]",
1538 // and "process->VAR" to "VAR[$pid]"
1539 symbol* sym = new symbol;
1540 if (name == "thread")
1541 sym->name = "$tid";
1542 else if (name == "process")
1543 sym->name = "$pid";
1544 else
1545 throw parse_error ("expected 'thread->' or 'process->'");
1546 struct token* t2prime = new token (*t2);
1547 t2prime->content = sym->name;
1548 sym->tok = t2prime;
1549
1550 next (); // swallow "->"
1551 t = next ();
1552 if (! (t->type == tok_identifier))
1553 throw parse_error ("expected identifier");
1554
1555 struct arrayindex* ai = new arrayindex;
1556 ai->tok = t;
1557 ai->base = t->content;
1558 ai->indexes.push_back (sym);
1559 return ai;
1560 }
1561 else if (t && t->type == tok_operator && t->content == "[") // array
2f1a1aea
FCE
1562 {
1563 next ();
1564 struct arrayindex* ai = new arrayindex;
56099f08
FCE
1565 ai->tok = t2;
1566 ai->base = name;
2f1a1aea
FCE
1567 while (1)
1568 {
1569 ai->indexes.push_back (parse_expression ());
1570 t = next ();
1571 if (t->type == tok_operator && t->content == "]")
1572 break;
1573 if (t->type == tok_operator && t->content == ",")
1574 continue;
1575 else
1576 throw parse_error ("expected ',' or ']'");
1577 }
1578 return ai;
1579 }
1580 else if (t && t->type == tok_operator && t->content == "(") // function call
1581 {
1582 next ();
1583 struct functioncall* f = new functioncall;
56099f08
FCE
1584 f->tok = t2;
1585 f->function = name;
82919855
FCE
1586 // Allow empty actual parameter list
1587 const token* t3 = peek ();
1588 if (t3 && t3->type == tok_operator && t3->content == ")")
1589 {
1590 next ();
1591 return f;
1592 }
2f1a1aea 1593 while (1)
82919855
FCE
1594 {
1595 f->args.push_back (parse_expression ());
1596 t = next ();
1597 if (t->type == tok_operator && t->content == ")")
1598 break;
1599 if (t->type == tok_operator && t->content == ",")
1600 continue;
1601 else
1602 throw parse_error ("expected ',' or ')'");
1603 }
2f1a1aea
FCE
1604 return f;
1605 }
1606 else
1607 {
56099f08
FCE
1608 symbol* sym = new symbol;
1609 sym->name = name;
1610 sym->tok = t2;
1611 return sym;
2f1a1aea
FCE
1612 }
1613}
56099f08 1614
This page took 0.197367 seconds and 5 git commands to generate.