]>
Commit | Line | Data |
---|---|---|
2f1a1aea | 1 | // recursive descent parser for systemtap scripts |
69c68955 FCE |
2 | // Copyright (C) 2005 Red Hat Inc. |
3 | // | |
4 | // This file is part of systemtap, and is free software. You can | |
5 | // redistribute it and/or modify it under the terms of the GNU General | |
6 | // Public License (GPL); either version 2, or (at your option) any | |
7 | // later version. | |
2f1a1aea | 8 | |
2b066ec1 | 9 | #include "config.h" |
2f1a1aea FCE |
10 | #include "staptree.h" |
11 | #include "parse.h" | |
2b066ec1 FCE |
12 | #include <iostream> |
13 | #include <fstream> | |
2f1a1aea | 14 | #include <cctype> |
9c0c0e46 | 15 | #include <cstdlib> |
9c0c0e46 FCE |
16 | #include <cerrno> |
17 | #include <climits> | |
2f1a1aea FCE |
18 | |
19 | using namespace std; | |
20 | ||
21 | // ------------------------------------------------------------------------ | |
22 | ||
bb2e3076 FCE |
23 | |
24 | ||
2f1a1aea FCE |
25 | parser::parser (istream& i): |
26 | input_name ("<input>"), free_input (0), input (i, input_name), | |
27 | last_t (0), next_t (0), num_errors (0) | |
28 | { } | |
29 | ||
30 | parser::parser (const string& fn): | |
31 | input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)), | |
32 | input (* free_input, input_name), | |
33 | last_t (0), next_t (0), num_errors (0) | |
34 | { } | |
35 | ||
36 | parser::~parser() | |
37 | { | |
38 | if (free_input) delete free_input; | |
39 | } | |
40 | ||
41 | ||
82919855 FCE |
42 | stapfile* |
43 | parser::parse (std::istream& i) | |
44 | { | |
45 | parser p (i); | |
46 | return p.parse (); | |
47 | } | |
48 | ||
49 | ||
50 | stapfile* | |
51 | parser::parse (const std::string& n) | |
52 | { | |
53 | parser p (n); | |
54 | return p.parse (); | |
55 | } | |
56 | ||
57 | ||
56099f08 FCE |
58 | ostream& |
59 | operator << (ostream& o, const token& t) | |
60 | { | |
61 | o << (t.type == tok_junk ? "junk" : | |
62 | t.type == tok_identifier ? "identifier" : | |
63 | t.type == tok_operator ? "operator" : | |
64 | t.type == tok_string ? "string" : | |
65 | t.type == tok_number ? "number" : | |
54dfabe9 | 66 | t.type == tok_embedded ? "embedded-code" : |
56099f08 FCE |
67 | "unknown token"); |
68 | ||
54dfabe9 | 69 | // XXX: filter out embedded-code contents? |
56099f08 FCE |
70 | o << " '"; |
71 | for (unsigned i=0; i<t.content.length(); i++) | |
72 | { | |
73 | char c = t.content[i]; | |
74 | o << (isprint (c) ? c : '?'); | |
75 | } | |
76 | o << "'"; | |
77 | ||
78 | o << " at " | |
79 | << t.location.file << ":" | |
80 | << t.location.line << ":" | |
81 | << t.location.column; | |
82 | ||
83 | return o; | |
84 | } | |
85 | ||
86 | ||
2f1a1aea FCE |
87 | void |
88 | parser::print_error (const parse_error &pe) | |
89 | { | |
90 | cerr << "parse error: " << pe.what () << endl; | |
91 | ||
92 | const token* t = last_t; | |
93 | if (t) | |
56099f08 | 94 | cerr << "\tsaw: " << *t << endl; |
2f1a1aea | 95 | else |
56099f08 | 96 | cerr << "\tsaw: " << input_name << " EOF" << endl; |
2f1a1aea FCE |
97 | |
98 | // XXX: make it possible to print the last input line, | |
99 | // so as to line up an arrow with the specific error column | |
100 | ||
101 | num_errors ++; | |
102 | } | |
103 | ||
104 | ||
105 | const token* | |
106 | parser::last () | |
107 | { | |
108 | return last_t; | |
109 | } | |
110 | ||
111 | ||
112 | const token* | |
113 | parser::next () | |
114 | { | |
115 | if (! next_t) | |
116 | next_t = input.scan (); | |
117 | if (! next_t) | |
118 | throw parse_error ("unexpected end-of-file"); | |
119 | ||
2f1a1aea FCE |
120 | last_t = next_t; |
121 | // advance by zeroing next_t | |
122 | next_t = 0; | |
123 | return last_t; | |
124 | } | |
125 | ||
126 | ||
127 | const token* | |
128 | parser::peek () | |
129 | { | |
130 | if (! next_t) | |
131 | next_t = input.scan (); | |
132 | ||
133 | // cerr << "{" << (next_t ? next_t->content : "null") << "}"; | |
134 | ||
135 | // don't advance by zeroing next_t | |
136 | last_t = next_t; | |
137 | return next_t; | |
138 | } | |
139 | ||
140 | ||
141 | lexer::lexer (istream& i, const string& in): | |
142 | input (i), input_name (in), cursor_line (1), cursor_column (1) | |
143 | { } | |
144 | ||
bb2e3076 FCE |
145 | |
146 | int | |
147 | lexer::input_peek (unsigned n) | |
148 | { | |
149 | while (lookahead.size() <= n) | |
150 | { | |
151 | int c = input.get (); | |
152 | lookahead.push_back (input ? c : -1); | |
153 | } | |
154 | return lookahead[n]; | |
155 | } | |
156 | ||
157 | ||
2f1a1aea FCE |
158 | int |
159 | lexer::input_get () | |
160 | { | |
bb2e3076 FCE |
161 | int c = input_peek (0); |
162 | lookahead.erase (lookahead.begin ()); | |
163 | ||
164 | if (c < 0) return c; // EOF | |
165 | ||
2f1a1aea FCE |
166 | // update source cursor |
167 | if (c == '\n') | |
168 | { | |
169 | cursor_line ++; | |
170 | cursor_column = 1; | |
171 | } | |
172 | else | |
173 | cursor_column ++; | |
174 | ||
175 | return c; | |
176 | } | |
177 | ||
178 | ||
179 | token* | |
180 | lexer::scan () | |
181 | { | |
182 | token* n = new token; | |
183 | n->location.file = input_name; | |
184 | ||
185 | skip: | |
186 | n->location.line = cursor_line; | |
187 | n->location.column = cursor_column; | |
188 | ||
189 | int c = input_get(); | |
190 | if (c < 0) | |
191 | { | |
192 | delete n; | |
193 | return 0; | |
194 | } | |
195 | ||
196 | if (isspace (c)) | |
197 | goto skip; | |
198 | ||
54dfabe9 | 199 | else if (isalpha (c) || c == '$' || c == '_') |
2f1a1aea FCE |
200 | { |
201 | n->type = tok_identifier; | |
202 | n->content = (char) c; | |
203 | while (1) | |
204 | { | |
bb2e3076 | 205 | int c2 = input_peek (); |
2f1a1aea FCE |
206 | if (! input) |
207 | break; | |
0fefb486 | 208 | if ((isalnum(c2) || c2 == '_' || c2 == '$')) |
2f1a1aea FCE |
209 | { |
210 | n->content.push_back(c2); | |
211 | input_get (); | |
212 | } | |
213 | else | |
214 | break; | |
215 | } | |
216 | return n; | |
217 | } | |
218 | ||
219 | else if (isdigit (c)) | |
220 | { | |
2f1a1aea | 221 | n->type = tok_number; |
9c0c0e46 FCE |
222 | n->content = (char) c; |
223 | ||
2f1a1aea FCE |
224 | while (1) |
225 | { | |
bb2e3076 | 226 | int c2 = input_peek (); |
2f1a1aea FCE |
227 | if (! input) |
228 | break; | |
9c0c0e46 FCE |
229 | |
230 | // NB: isalnum is very permissive. We rely on strtol, called in | |
231 | // parser::parse_literal below, to confirm that the number string | |
232 | // is correctly formatted and in range. | |
233 | ||
234 | if (isalnum (c2)) | |
2f1a1aea | 235 | { |
9c0c0e46 | 236 | n->content.push_back (c2); |
2f1a1aea FCE |
237 | input_get (); |
238 | } | |
239 | else | |
240 | break; | |
241 | } | |
242 | return n; | |
243 | } | |
244 | ||
245 | else if (c == '\"') | |
246 | { | |
247 | n->type = tok_string; | |
248 | while (1) | |
249 | { | |
250 | c = input_get (); | |
251 | ||
252 | if (! input || c == '\n') | |
253 | { | |
254 | n->type = tok_junk; | |
255 | break; | |
256 | } | |
257 | if (c == '\"') // closing double-quotes | |
258 | break; | |
259 | else if (c == '\\') | |
260 | { | |
261 | // XXX: handle escape sequences | |
262 | } | |
263 | else | |
264 | n->content.push_back(c); | |
265 | } | |
266 | return n; | |
267 | } | |
268 | ||
269 | else if (ispunct (c)) | |
270 | { | |
bb2e3076 FCE |
271 | int c2 = input_peek (); |
272 | int c3 = input_peek (1); | |
273 | string s1 = string("") + (char) c; | |
274 | string s2 = (c2 > 0 ? s1 + (char) c2 : s1); | |
275 | string s3 = (c3 > 0 ? s2 + (char) c3 : s2); | |
2f1a1aea | 276 | |
bb2e3076 | 277 | if (s1 == "#") // shell comment |
2f1a1aea FCE |
278 | { |
279 | unsigned this_line = cursor_line; | |
bb2e3076 FCE |
280 | do { c = input_get (); } |
281 | while (c >= 0 && cursor_line == this_line); | |
2f1a1aea FCE |
282 | goto skip; |
283 | } | |
bb2e3076 | 284 | else if (s2 == "//") // C++ comment |
63a7c90e FCE |
285 | { |
286 | unsigned this_line = cursor_line; | |
bb2e3076 FCE |
287 | do { c = input_get (); } |
288 | while (c >= 0 && cursor_line == this_line); | |
63a7c90e FCE |
289 | goto skip; |
290 | } | |
291 | else if (c == '/' && c2 == '*') // C comment | |
292 | { | |
293 | c2 = input_get (); | |
294 | unsigned chars = 0; | |
bb2e3076 | 295 | while (c2 >= 0) |
63a7c90e FCE |
296 | { |
297 | chars ++; // track this to prevent "/*/" from being accepted | |
298 | c = c2; | |
299 | c2 = input_get (); | |
300 | if (chars > 1 && c == '*' && c2 == '/') | |
bb2e3076 | 301 | break; |
63a7c90e | 302 | } |
bb2e3076 | 303 | goto skip; |
63a7c90e | 304 | } |
54dfabe9 FCE |
305 | else if (c == '%' && c2 == '{') // embedded code |
306 | { | |
307 | n->type = tok_embedded; | |
308 | (void) input_get (); // swallow '{' already in c2 | |
309 | while (true) | |
310 | { | |
311 | c = input_get (); | |
312 | if (c == 0) // EOF | |
313 | { | |
314 | n->type = tok_junk; | |
315 | break; | |
316 | } | |
317 | if (c == '%') | |
318 | { | |
319 | c2 = input_peek (); | |
320 | if (c2 == '}') | |
321 | { | |
322 | (void) input_get (); // swallow '}' too | |
323 | break; | |
324 | } | |
325 | } | |
326 | n->content += c; | |
327 | } | |
328 | return n; | |
329 | } | |
2f1a1aea | 330 | |
bb2e3076 FCE |
331 | // We're committed to recognizing at least the first character |
332 | // as an operator. | |
2f1a1aea | 333 | n->type = tok_operator; |
2f1a1aea | 334 | |
bb2e3076 FCE |
335 | // match all valid operators, in decreasing size order |
336 | if (s3 == "<<<" || | |
337 | s3 == "<<=" || | |
338 | s3 == ">>=") | |
82919855 | 339 | { |
bb2e3076 FCE |
340 | n->content = s3; |
341 | input_get (); input_get (); // swallow other two characters | |
342 | } | |
343 | else if (s2 == "==" || | |
344 | s2 == "!=" || | |
345 | s2 == "<=" || | |
346 | s2 == ">=" || | |
347 | s2 == "+=" || | |
348 | s2 == "-=" || | |
349 | s2 == "*=" || | |
350 | s2 == "/=" || | |
351 | s2 == "%=" || | |
352 | s2 == "&=" || | |
353 | s2 == "^=" || | |
354 | s2 == "|=" || | |
355 | s2 == "&&" || | |
356 | s2 == "||" || | |
357 | s2 == "++" || | |
358 | s2 == "--" || | |
359 | s2 == "->" || | |
360 | s2 == "<<" || | |
361 | s2 == ">>") | |
362 | { | |
363 | n->content = s2; | |
364 | input_get (); // swallow other character | |
365 | } | |
366 | else | |
367 | { | |
368 | n->content = s1; | |
82919855 | 369 | } |
2f1a1aea FCE |
370 | |
371 | return n; | |
372 | } | |
373 | ||
374 | else | |
375 | { | |
376 | n->type = tok_junk; | |
377 | n->content = (char) c; | |
378 | return n; | |
379 | } | |
380 | } | |
381 | ||
382 | ||
383 | // ------------------------------------------------------------------------ | |
384 | ||
385 | stapfile* | |
386 | parser::parse () | |
387 | { | |
388 | stapfile* f = new stapfile; | |
389 | f->name = input_name; | |
56099f08 FCE |
390 | |
391 | bool empty = true; | |
392 | ||
2f1a1aea FCE |
393 | while (1) |
394 | { | |
395 | try | |
396 | { | |
397 | const token* t = peek (); | |
56099f08 | 398 | if (! t) // nice clean EOF |
2f1a1aea FCE |
399 | break; |
400 | ||
56099f08 | 401 | empty = false; |
2f1a1aea | 402 | if (t->type == tok_identifier && t->content == "probe") |
54dfabe9 | 403 | parse_probe (f->probes, f->aliases); |
2f1a1aea | 404 | else if (t->type == tok_identifier && t->content == "global") |
82919855 | 405 | parse_global (f->globals); |
56099f08 | 406 | else if (t->type == tok_identifier && t->content == "function") |
82919855 | 407 | f->functions.push_back (parse_functiondecl ()); |
54dfabe9 FCE |
408 | else if (t->type == tok_embedded) |
409 | f->embeds.push_back (parse_embeddedcode ()); | |
2f1a1aea | 410 | else |
54dfabe9 | 411 | throw parse_error ("expected 'probe', 'global', 'function', or embedded code"); |
2f1a1aea FCE |
412 | } |
413 | catch (parse_error& pe) | |
414 | { | |
415 | print_error (pe); | |
416 | // Quietly swallow all tokens until the next '}'. | |
417 | while (1) | |
418 | { | |
419 | const token* t = peek (); | |
420 | if (! t) | |
421 | break; | |
422 | next (); | |
423 | if (t->type == tok_operator && t->content == "}") | |
424 | break; | |
425 | } | |
426 | } | |
427 | } | |
428 | ||
56099f08 FCE |
429 | if (empty) |
430 | { | |
431 | cerr << "Input file '" << input_name << "' is empty or missing." << endl; | |
432 | delete f; | |
433 | return 0; | |
434 | } | |
435 | else if (num_errors > 0) | |
2f1a1aea FCE |
436 | { |
437 | cerr << num_errors << " parse error(s)." << endl; | |
438 | delete f; | |
56099f08 | 439 | return 0; |
2f1a1aea FCE |
440 | } |
441 | ||
442 | return f; | |
443 | } | |
444 | ||
445 | ||
20c6c071 | 446 | void |
54dfabe9 FCE |
447 | parser::parse_probe (std::vector<probe *> & probe_ret, |
448 | std::vector<probe_alias *> & alias_ret) | |
2f1a1aea | 449 | { |
82919855 FCE |
450 | const token* t0 = next (); |
451 | if (! (t0->type == tok_identifier && t0->content == "probe")) | |
452 | throw parse_error ("expected 'probe'"); | |
453 | ||
20c6c071 GH |
454 | vector<probe_point *> aliases; |
455 | vector<probe_point *> locations; | |
456 | ||
457 | bool equals_ok = true; | |
82919855 | 458 | |
2f1a1aea FCE |
459 | while (1) |
460 | { | |
461 | const token *t = peek (); | |
462 | if (t && t->type == tok_identifier) | |
463 | { | |
20c6c071 | 464 | probe_point * pp = parse_probe_point (); |
2f1a1aea | 465 | |
56099f08 | 466 | t = peek (); |
20c6c071 GH |
467 | if (equals_ok && t |
468 | && t->type == tok_operator && t->content == "=") | |
56099f08 | 469 | { |
20c6c071 GH |
470 | aliases.push_back(pp); |
471 | next (); | |
472 | continue; | |
473 | } | |
474 | else if (t && t->type == tok_operator && t->content == ",") | |
475 | { | |
476 | locations.push_back(pp); | |
477 | equals_ok = false; | |
56099f08 FCE |
478 | next (); |
479 | continue; | |
480 | } | |
481 | else if (t && t->type == tok_operator && t->content == "{") | |
20c6c071 GH |
482 | { |
483 | locations.push_back(pp); | |
484 | break; | |
485 | } | |
2f1a1aea | 486 | else |
9c0c0e46 | 487 | throw parse_error ("expected ',' or '{'"); |
2f1a1aea FCE |
488 | // XXX: unify logic with that in parse_symbol() |
489 | } | |
490 | else | |
9c0c0e46 | 491 | throw parse_error ("expected probe point specifier"); |
2f1a1aea | 492 | } |
20c6c071 | 493 | |
20c6c071 GH |
494 | if (aliases.empty()) |
495 | { | |
54dfabe9 FCE |
496 | probe* p = new probe; |
497 | p->tok = t0; | |
498 | p->locations = locations; | |
499 | p->body = parse_stmt_block (); | |
500 | probe_ret.push_back (p); | |
20c6c071 GH |
501 | } |
502 | else | |
503 | { | |
54dfabe9 FCE |
504 | probe_alias* p = new probe_alias (aliases); |
505 | p->tok = t0; | |
506 | p->locations = locations; | |
507 | p->body = parse_stmt_block (); | |
508 | alias_ret.push_back (p); | |
20c6c071 | 509 | } |
54dfabe9 | 510 | } |
20c6c071 | 511 | |
54dfabe9 FCE |
512 | |
513 | embeddedcode* | |
514 | parser::parse_embeddedcode () | |
515 | { | |
516 | embeddedcode* e = new embeddedcode; | |
517 | const token* t = next (); | |
518 | if (t->type != tok_embedded) | |
519 | throw parse_error ("expected embedded code"); | |
520 | ||
521 | e->tok = t; | |
522 | e->code = t->content; | |
523 | return e; | |
2f1a1aea FCE |
524 | } |
525 | ||
526 | ||
527 | block* | |
56099f08 | 528 | parser::parse_stmt_block () |
2f1a1aea FCE |
529 | { |
530 | block* pb = new block; | |
531 | ||
56099f08 FCE |
532 | const token* t = next (); |
533 | if (! (t->type == tok_operator && t->content == "{")) | |
534 | throw parse_error ("expected '{'"); | |
535 | ||
536 | pb->tok = t; | |
2b066ec1 | 537 | |
2f1a1aea FCE |
538 | while (1) |
539 | { | |
540 | try | |
541 | { | |
2b066ec1 FCE |
542 | t = peek (); |
543 | if (t && t->type == tok_operator && t->content == "}") | |
544 | { | |
545 | next (); | |
546 | break; | |
547 | } | |
548 | ||
2f1a1aea | 549 | pb->statements.push_back (parse_statement ()); |
2f1a1aea FCE |
550 | } |
551 | catch (parse_error& pe) | |
552 | { | |
553 | print_error (pe); | |
54dfabe9 | 554 | |
2f1a1aea FCE |
555 | // Quietly swallow all tokens until the next ';' or '}'. |
556 | while (1) | |
557 | { | |
558 | const token* t = peek (); | |
54dfabe9 | 559 | if (! t) return 0; |
2f1a1aea | 560 | next (); |
54dfabe9 FCE |
561 | if (t->type == tok_operator |
562 | && (t->content == "}" || t->content == ";")) | |
2f1a1aea FCE |
563 | break; |
564 | } | |
565 | } | |
566 | } | |
567 | ||
568 | return pb; | |
569 | } | |
570 | ||
571 | ||
572 | statement* | |
573 | parser::parse_statement () | |
574 | { | |
575 | const token* t = peek (); | |
576 | if (t && t->type == tok_operator && t->content == ";") | |
577 | { | |
69c68955 FCE |
578 | null_statement* n = new null_statement (); |
579 | n->tok = next (); | |
580 | return n; | |
2f1a1aea FCE |
581 | } |
582 | else if (t && t->type == tok_operator && t->content == "{") | |
56099f08 | 583 | return parse_stmt_block (); |
2f1a1aea | 584 | else if (t && t->type == tok_identifier && t->content == "if") |
56099f08 | 585 | return parse_if_statement (); |
69c68955 FCE |
586 | else if (t && t->type == tok_identifier && t->content == "for") |
587 | return parse_for_loop (); | |
69c68955 FCE |
588 | else if (t && t->type == tok_identifier && t->content == "foreach") |
589 | return parse_foreach_loop (); | |
56099f08 FCE |
590 | else if (t && t->type == tok_identifier && t->content == "return") |
591 | return parse_return_statement (); | |
592 | else if (t && t->type == tok_identifier && t->content == "delete") | |
593 | return parse_delete_statement (); | |
f3c26ea5 FCE |
594 | else if (t && t->type == tok_identifier && t->content == "while") |
595 | return parse_while_loop (); | |
596 | else if (t && t->type == tok_identifier && t->content == "break") | |
597 | return parse_break_statement (); | |
598 | else if (t && t->type == tok_identifier && t->content == "continue") | |
599 | return parse_continue_statement (); | |
600 | else if (t && t->type == tok_identifier && t->content == "next") | |
601 | return parse_next_statement (); | |
602 | // XXX: "do/while" statement? | |
2f1a1aea FCE |
603 | else if (t && (t->type == tok_operator || // expressions are flexible |
604 | t->type == tok_identifier || | |
605 | t->type == tok_number || | |
606 | t->type == tok_string)) | |
69c68955 | 607 | return parse_expr_statement (); |
54dfabe9 | 608 | // XXX: consider generally accepting tok_embedded here too |
2f1a1aea FCE |
609 | else |
610 | throw parse_error ("expected statement"); | |
611 | } | |
612 | ||
613 | ||
56099f08 FCE |
614 | void |
615 | parser::parse_global (vector <vardecl*>& globals) | |
2f1a1aea | 616 | { |
82919855 FCE |
617 | const token* t0 = next (); |
618 | if (! (t0->type == tok_identifier && t0->content == "global")) | |
619 | throw parse_error ("expected 'global'"); | |
620 | ||
56099f08 FCE |
621 | while (1) |
622 | { | |
623 | const token* t = next (); | |
624 | if (! (t->type == tok_identifier)) | |
625 | throw parse_error ("expected identifier"); | |
626 | ||
2b066ec1 FCE |
627 | bool dupe = false; |
628 | for (unsigned i=0; i<globals.size(); i++) | |
629 | if (globals[i]->name == t->content) | |
630 | dupe = true; | |
631 | ||
632 | if (! dupe) | |
633 | { | |
634 | vardecl* d = new vardecl; | |
635 | d->name = t->content; | |
636 | d->tok = t; | |
637 | globals.push_back (d); | |
638 | } | |
56099f08 | 639 | |
82919855 FCE |
640 | t = peek (); |
641 | if (t && t->type == tok_operator && t->content == ",") | |
642 | { | |
643 | next (); | |
644 | continue; | |
645 | } | |
56099f08 | 646 | else |
82919855 | 647 | break; |
56099f08 FCE |
648 | } |
649 | } | |
650 | ||
651 | ||
652 | functiondecl* | |
653 | parser::parse_functiondecl () | |
654 | { | |
82919855 FCE |
655 | const token* t = next (); |
656 | if (! (t->type == tok_identifier && t->content == "function")) | |
657 | throw parse_error ("expected 'function'"); | |
658 | ||
56099f08 FCE |
659 | functiondecl *fd = new functiondecl (); |
660 | ||
82919855 | 661 | t = next (); |
56099f08 FCE |
662 | if (! (t->type == tok_identifier)) |
663 | throw parse_error ("expected identifier"); | |
664 | fd->name = t->content; | |
665 | fd->tok = t; | |
666 | ||
667 | t = next (); | |
668 | if (! (t->type == tok_operator && t->content == "(")) | |
669 | throw parse_error ("expected '('"); | |
670 | ||
671 | while (1) | |
672 | { | |
673 | t = next (); | |
674 | ||
675 | // permit zero-argument fuctions | |
676 | if (t->type == tok_operator && t->content == ")") | |
677 | break; | |
678 | else if (! (t->type == tok_identifier)) | |
679 | throw parse_error ("expected identifier"); | |
680 | vardecl* vd = new vardecl; | |
681 | vd->name = t->content; | |
682 | vd->tok = t; | |
683 | fd->formal_args.push_back (vd); | |
684 | ||
685 | t = next (); | |
686 | if (t->type == tok_operator && t->content == ")") | |
687 | break; | |
688 | if (t->type == tok_operator && t->content == ",") | |
689 | continue; | |
690 | else | |
691 | throw parse_error ("expected ',' or ')'"); | |
692 | } | |
693 | ||
54dfabe9 FCE |
694 | t = peek (); |
695 | if (t && t->type == tok_embedded) | |
696 | fd->body = parse_embeddedcode (); | |
697 | else | |
698 | fd->body = parse_stmt_block (); | |
56099f08 | 699 | return fd; |
2f1a1aea FCE |
700 | } |
701 | ||
702 | ||
9c0c0e46 FCE |
703 | probe_point* |
704 | parser::parse_probe_point () | |
2f1a1aea | 705 | { |
9c0c0e46 | 706 | probe_point* pl = new probe_point; |
2f1a1aea | 707 | |
2b066ec1 | 708 | // XXX: add support for probe point aliases |
f4b28491 | 709 | // e.g. probe alias = foo { ... } |
9c0c0e46 | 710 | while (1) |
2f1a1aea | 711 | { |
9c0c0e46 FCE |
712 | const token* t = next (); |
713 | if (t->type != tok_identifier) | |
714 | throw parse_error ("expected identifier"); | |
715 | ||
716 | if (pl->tok == 0) pl->tok = t; | |
717 | ||
718 | probe_point::component* c = new probe_point::component; | |
719 | c->functor = t->content; | |
720 | pl->components.push_back (c); | |
721 | // NB though we still may add c->arg soon | |
722 | ||
723 | t = peek (); | |
724 | if (t && t->type == tok_operator | |
20c6c071 | 725 | && (t->content == "{" || t->content == "," || t->content == "=")) |
9c0c0e46 FCE |
726 | break; |
727 | ||
728 | if (t && t->type == tok_operator && t->content == "(") | |
729 | { | |
730 | next (); // consume "(" | |
731 | c->arg = parse_literal (); | |
732 | ||
733 | t = next (); | |
734 | if (! (t->type == tok_operator && t->content == ")")) | |
735 | throw parse_error ("expected ')'"); | |
736 | ||
737 | t = peek (); | |
738 | if (t && t->type == tok_operator | |
20c6c071 | 739 | && (t->content == "{" || t->content == "," || t->content == "=")) |
9c0c0e46 | 740 | break; |
2b066ec1 FCE |
741 | else if (t && t->type == tok_operator && |
742 | t->content == "(") | |
743 | throw parse_error ("unexpected '.' or ',' or '{'"); | |
9c0c0e46 FCE |
744 | } |
745 | // fall through | |
746 | ||
747 | if (t && t->type == tok_operator && t->content == ".") | |
748 | next (); | |
749 | else | |
20c6c071 | 750 | throw parse_error ("expected '.' or ',' or '(' or '{' or '='"); |
2f1a1aea FCE |
751 | } |
752 | ||
753 | return pl; | |
754 | } | |
755 | ||
756 | ||
757 | literal* | |
758 | parser::parse_literal () | |
759 | { | |
760 | const token* t = next (); | |
56099f08 | 761 | literal* l; |
2f1a1aea | 762 | if (t->type == tok_string) |
56099f08 | 763 | l = new literal_string (t->content); |
2f1a1aea | 764 | else if (t->type == tok_number) |
9c0c0e46 FCE |
765 | { |
766 | const char* startp = t->content.c_str (); | |
767 | char* endp = (char*) startp; | |
768 | ||
769 | // NB: we allow controlled overflow from LONG_MIN .. ULONG_MAX | |
770 | errno = 0; | |
771 | long long value = strtoll (startp, & endp, 0); | |
772 | if (errno == ERANGE || errno == EINVAL || *endp != '\0' | |
3f43362a | 773 | || value > 4294967295LL || value < (-2147483647LL-1)) |
9c0c0e46 FCE |
774 | throw parse_error ("number invalid or out of range"); |
775 | ||
776 | long value2 = (long) value; | |
777 | l = new literal_number (value2); | |
778 | } | |
2f1a1aea FCE |
779 | else |
780 | throw parse_error ("expected literal string or number"); | |
56099f08 FCE |
781 | |
782 | l->tok = t; | |
783 | return l; | |
2f1a1aea FCE |
784 | } |
785 | ||
786 | ||
787 | if_statement* | |
788 | parser::parse_if_statement () | |
789 | { | |
790 | const token* t = next (); | |
56099f08 FCE |
791 | if (! (t->type == tok_identifier && t->content == "if")) |
792 | throw parse_error ("expected 'if'"); | |
793 | if_statement* s = new if_statement; | |
794 | s->tok = t; | |
795 | ||
796 | t = next (); | |
2f1a1aea FCE |
797 | if (! (t->type == tok_operator && t->content == "(")) |
798 | throw parse_error ("expected '('"); | |
799 | ||
2f1a1aea FCE |
800 | s->condition = parse_expression (); |
801 | ||
802 | t = next (); | |
803 | if (! (t->type == tok_operator && t->content == ")")) | |
804 | throw parse_error ("expected ')'"); | |
805 | ||
806 | s->thenblock = parse_statement (); | |
807 | ||
808 | t = peek (); | |
809 | if (t && t->type == tok_identifier && t->content == "else") | |
810 | { | |
811 | next (); | |
812 | s->elseblock = parse_statement (); | |
813 | } | |
814 | ||
815 | return s; | |
816 | } | |
817 | ||
818 | ||
69c68955 FCE |
819 | expr_statement* |
820 | parser::parse_expr_statement () | |
821 | { | |
822 | expr_statement *es = new expr_statement; | |
823 | const token* t = peek (); | |
824 | es->tok = t; | |
825 | es->value = parse_expression (); | |
826 | return es; | |
827 | } | |
828 | ||
829 | ||
56099f08 FCE |
830 | return_statement* |
831 | parser::parse_return_statement () | |
832 | { | |
833 | const token* t = next (); | |
834 | if (! (t->type == tok_identifier && t->content == "return")) | |
835 | throw parse_error ("expected 'return'"); | |
836 | return_statement* s = new return_statement; | |
837 | s->tok = t; | |
838 | s->value = parse_expression (); | |
839 | return s; | |
840 | } | |
841 | ||
842 | ||
843 | delete_statement* | |
844 | parser::parse_delete_statement () | |
845 | { | |
846 | const token* t = next (); | |
847 | if (! (t->type == tok_identifier && t->content == "delete")) | |
848 | throw parse_error ("expected 'delete'"); | |
849 | delete_statement* s = new delete_statement; | |
850 | s->tok = t; | |
851 | s->value = parse_expression (); | |
852 | return s; | |
853 | } | |
854 | ||
855 | ||
f3c26ea5 FCE |
856 | next_statement* |
857 | parser::parse_next_statement () | |
858 | { | |
859 | const token* t = next (); | |
860 | if (! (t->type == tok_identifier && t->content == "next")) | |
861 | throw parse_error ("expected 'next'"); | |
862 | next_statement* s = new next_statement; | |
863 | s->tok = t; | |
864 | return s; | |
865 | } | |
866 | ||
867 | ||
868 | break_statement* | |
869 | parser::parse_break_statement () | |
870 | { | |
871 | const token* t = next (); | |
872 | if (! (t->type == tok_identifier && t->content == "break")) | |
873 | throw parse_error ("expected 'break'"); | |
874 | break_statement* s = new break_statement; | |
875 | s->tok = t; | |
876 | return s; | |
877 | } | |
878 | ||
879 | ||
880 | continue_statement* | |
881 | parser::parse_continue_statement () | |
882 | { | |
883 | const token* t = next (); | |
884 | if (! (t->type == tok_identifier && t->content == "continue")) | |
885 | throw parse_error ("expected 'continue'"); | |
886 | continue_statement* s = new continue_statement; | |
887 | s->tok = t; | |
888 | return s; | |
889 | } | |
890 | ||
891 | ||
69c68955 FCE |
892 | for_loop* |
893 | parser::parse_for_loop () | |
894 | { | |
f3c26ea5 FCE |
895 | const token* t = next (); |
896 | if (! (t->type == tok_identifier && t->content == "for")) | |
897 | throw parse_error ("expected 'for'"); | |
898 | for_loop* s = new for_loop; | |
899 | s->tok = t; | |
900 | ||
901 | t = next (); | |
902 | if (! (t->type == tok_operator && t->content == "(")) | |
903 | throw parse_error ("expected '('"); | |
904 | ||
905 | // initializer + ";" | |
906 | t = peek (); | |
907 | if (t && t->type == tok_operator && t->content == ";") | |
908 | { | |
909 | literal_number* l = new literal_number(0); | |
910 | expr_statement* es = new expr_statement; | |
911 | es->value = l; | |
912 | s->init = es; | |
913 | es->value->tok = es->tok = next (); | |
914 | } | |
915 | else | |
916 | { | |
917 | s->init = parse_expr_statement (); | |
918 | t = next (); | |
919 | if (! (t->type == tok_operator && t->content == ";")) | |
920 | throw parse_error ("expected ';'"); | |
921 | } | |
922 | ||
923 | // condition + ";" | |
924 | t = peek (); | |
925 | if (t && t->type == tok_operator && t->content == ";") | |
926 | { | |
927 | literal_number* l = new literal_number(1); | |
928 | s->cond = l; | |
929 | s->cond->tok = next (); | |
930 | } | |
931 | else | |
932 | { | |
933 | s->cond = parse_expression (); | |
934 | t = next (); | |
935 | if (! (t->type == tok_operator && t->content == ";")) | |
936 | throw parse_error ("expected ';'"); | |
937 | } | |
938 | ||
939 | // increment + ")" | |
940 | t = peek (); | |
941 | if (t && t->type == tok_operator && t->content == ")") | |
942 | { | |
943 | literal_number* l = new literal_number(2); | |
944 | expr_statement* es = new expr_statement; | |
945 | es->value = l; | |
946 | s->incr = es; | |
947 | es->value->tok = es->tok = next (); | |
948 | } | |
949 | else | |
950 | { | |
951 | s->incr = parse_expr_statement (); | |
952 | t = next (); | |
953 | if (! (t->type == tok_operator && t->content == ")")) | |
954 | throw parse_error ("expected ';'"); | |
955 | } | |
956 | ||
957 | // block | |
958 | s->block = parse_statement (); | |
959 | ||
960 | return s; | |
961 | } | |
962 | ||
963 | ||
964 | for_loop* | |
965 | parser::parse_while_loop () | |
966 | { | |
967 | const token* t = next (); | |
968 | if (! (t->type == tok_identifier && t->content == "while")) | |
969 | throw parse_error ("expected 'while'"); | |
970 | for_loop* s = new for_loop; | |
971 | s->tok = t; | |
972 | ||
973 | t = next (); | |
974 | if (! (t->type == tok_operator && t->content == "(")) | |
975 | throw parse_error ("expected '('"); | |
976 | ||
977 | // dummy init and incr fields | |
978 | literal_number* l = new literal_number(0); | |
979 | expr_statement* es = new expr_statement; | |
980 | es->value = l; | |
981 | s->init = es; | |
982 | es->value->tok = es->tok = t; | |
983 | ||
984 | l = new literal_number(2); | |
985 | es = new expr_statement; | |
986 | es->value = l; | |
987 | s->incr = es; | |
988 | es->value->tok = es->tok = t; | |
989 | ||
990 | ||
991 | // condition | |
992 | s->cond = parse_expression (); | |
993 | ||
994 | ||
995 | t = next (); | |
996 | if (! (t->type == tok_operator && t->content == ")")) | |
997 | throw parse_error ("expected ')'"); | |
998 | ||
999 | // block | |
1000 | s->block = parse_statement (); | |
1001 | ||
1002 | return s; | |
69c68955 FCE |
1003 | } |
1004 | ||
1005 | ||
1006 | foreach_loop* | |
1007 | parser::parse_foreach_loop () | |
1008 | { | |
1009 | const token* t = next (); | |
1010 | if (! (t->type == tok_identifier && t->content == "foreach")) | |
1011 | throw parse_error ("expected 'foreach'"); | |
1012 | foreach_loop* s = new foreach_loop; | |
1013 | s->tok = t; | |
1014 | ||
1015 | t = next (); | |
1016 | if (! (t->type == tok_operator && t->content == "(")) | |
1017 | throw parse_error ("expected '('"); | |
1018 | ||
1019 | // see also parse_array_in | |
1020 | ||
1021 | bool parenthesized = false; | |
1022 | t = peek (); | |
1023 | if (t && t->type == tok_operator && t->content == "[") | |
1024 | { | |
1025 | next (); | |
1026 | parenthesized = true; | |
1027 | } | |
1028 | ||
1029 | while (1) | |
1030 | { | |
1031 | t = next (); | |
1032 | if (! (t->type == tok_identifier)) | |
1033 | throw parse_error ("expected identifier"); | |
1034 | symbol* sym = new symbol; | |
1035 | sym->tok = t; | |
1036 | sym->name = t->content; | |
1037 | s->indexes.push_back (sym); | |
1038 | ||
1039 | if (parenthesized) | |
1040 | { | |
1041 | const token* t = peek (); | |
1042 | if (t && t->type == tok_operator && t->content == ",") | |
1043 | { | |
1044 | next (); | |
1045 | continue; | |
1046 | } | |
1047 | else if (t && t->type == tok_operator && t->content == "]") | |
1048 | { | |
1049 | next (); | |
1050 | break; | |
1051 | } | |
1052 | else | |
1053 | throw parse_error ("expected ',' or ']'"); | |
1054 | } | |
1055 | else | |
1056 | break; // expecting only one expression | |
1057 | } | |
1058 | ||
1059 | t = next (); | |
1060 | if (! (t->type == tok_identifier && t->content == "in")) | |
1061 | throw parse_error ("expected 'in'"); | |
1062 | ||
1063 | t = next (); | |
1064 | if (t->type != tok_identifier) | |
1065 | throw parse_error ("expected identifier"); | |
1066 | s->base = t->content; | |
1067 | ||
1068 | t = next (); | |
1069 | if (! (t->type == tok_operator && t->content == ")")) | |
1070 | throw parse_error ("expected ')'"); | |
1071 | ||
1072 | s->block = parse_statement (); | |
1073 | return s; | |
1074 | } | |
1075 | ||
1076 | ||
2f1a1aea FCE |
1077 | expression* |
1078 | parser::parse_expression () | |
1079 | { | |
1080 | return parse_assignment (); | |
1081 | } | |
1082 | ||
2f1a1aea FCE |
1083 | |
1084 | expression* | |
1085 | parser::parse_assignment () | |
1086 | { | |
1087 | expression* op1 = parse_ternary (); | |
1088 | ||
1089 | const token* t = peek (); | |
82919855 FCE |
1090 | // right-associative operators |
1091 | if (t && t->type == tok_operator | |
2f1a1aea | 1092 | && (t->content == "=" || |
82919855 | 1093 | t->content == "<<<" || |
2f1a1aea | 1094 | t->content == "+=" || |
bb2e3076 FCE |
1095 | t->content == "-=" || |
1096 | t->content == "*=" || | |
1097 | t->content == "/=" || | |
1098 | t->content == "%=" || | |
1099 | t->content == "<<=" || | |
1100 | t->content == ">>=" || | |
1101 | t->content == "&=" || | |
1102 | t->content == "^=" || | |
1103 | t->content == "|=" || | |
1104 | false)) | |
2f1a1aea | 1105 | { |
bb2e3076 | 1106 | // NB: lvalueness is checked during elaboration / translation |
2f1a1aea | 1107 | assignment* e = new assignment; |
56099f08 | 1108 | e->left = op1; |
2f1a1aea | 1109 | e->op = t->content; |
56099f08 | 1110 | e->tok = t; |
2f1a1aea | 1111 | next (); |
82919855 | 1112 | e->right = parse_expression (); |
56099f08 | 1113 | op1 = e; |
2f1a1aea | 1114 | } |
56099f08 FCE |
1115 | |
1116 | return op1; | |
2f1a1aea FCE |
1117 | } |
1118 | ||
1119 | ||
1120 | expression* | |
1121 | parser::parse_ternary () | |
1122 | { | |
1123 | expression* op1 = parse_logical_or (); | |
1124 | ||
1125 | const token* t = peek (); | |
1126 | if (t && t->type == tok_operator && t->content == "?") | |
1127 | { | |
2f1a1aea | 1128 | ternary_expression* e = new ternary_expression; |
56099f08 | 1129 | e->tok = t; |
2f1a1aea | 1130 | e->cond = op1; |
56099f08 FCE |
1131 | next (); |
1132 | e->truevalue = parse_expression (); // XXX | |
2f1a1aea FCE |
1133 | |
1134 | t = next (); | |
1135 | if (! (t->type == tok_operator && t->content == ":")) | |
1136 | throw parse_error ("expected ':'"); | |
1137 | ||
56099f08 | 1138 | e->falsevalue = parse_expression (); // XXX |
2f1a1aea FCE |
1139 | return e; |
1140 | } | |
1141 | else | |
1142 | return op1; | |
1143 | } | |
1144 | ||
1145 | ||
1146 | expression* | |
1147 | parser::parse_logical_or () | |
1148 | { | |
1149 | expression* op1 = parse_logical_and (); | |
1150 | ||
1151 | const token* t = peek (); | |
56099f08 | 1152 | while (t && t->type == tok_operator && t->content == "||") |
2f1a1aea | 1153 | { |
2f1a1aea | 1154 | logical_or_expr* e = new logical_or_expr; |
56099f08 FCE |
1155 | e->tok = t; |
1156 | e->op = t->content; | |
2f1a1aea | 1157 | e->left = op1; |
56099f08 FCE |
1158 | next (); |
1159 | e->right = parse_logical_and (); | |
1160 | op1 = e; | |
1161 | t = peek (); | |
2f1a1aea | 1162 | } |
56099f08 FCE |
1163 | |
1164 | return op1; | |
2f1a1aea FCE |
1165 | } |
1166 | ||
1167 | ||
1168 | expression* | |
1169 | parser::parse_logical_and () | |
1170 | { | |
bb2e3076 | 1171 | expression* op1 = parse_boolean_or (); |
2f1a1aea FCE |
1172 | |
1173 | const token* t = peek (); | |
56099f08 | 1174 | while (t && t->type == tok_operator && t->content == "&&") |
2f1a1aea | 1175 | { |
2f1a1aea FCE |
1176 | logical_and_expr *e = new logical_and_expr; |
1177 | e->left = op1; | |
56099f08 FCE |
1178 | e->op = t->content; |
1179 | e->tok = t; | |
1180 | next (); | |
bb2e3076 FCE |
1181 | e->right = parse_boolean_or (); |
1182 | op1 = e; | |
1183 | t = peek (); | |
1184 | } | |
1185 | ||
1186 | return op1; | |
1187 | } | |
1188 | ||
1189 | ||
1190 | expression* | |
1191 | parser::parse_boolean_or () | |
1192 | { | |
1193 | expression* op1 = parse_boolean_xor (); | |
1194 | ||
1195 | const token* t = peek (); | |
1196 | while (t && t->type == tok_operator && t->content == "|") | |
1197 | { | |
1198 | binary_expression* e = new binary_expression; | |
1199 | e->left = op1; | |
1200 | e->op = t->content; | |
1201 | e->tok = t; | |
1202 | next (); | |
1203 | e->right = parse_boolean_xor (); | |
1204 | op1 = e; | |
1205 | t = peek (); | |
1206 | } | |
1207 | ||
1208 | return op1; | |
1209 | } | |
1210 | ||
1211 | ||
1212 | expression* | |
1213 | parser::parse_boolean_xor () | |
1214 | { | |
1215 | expression* op1 = parse_boolean_and (); | |
1216 | ||
1217 | const token* t = peek (); | |
1218 | while (t && t->type == tok_operator && t->content == "^") | |
1219 | { | |
1220 | binary_expression* e = new binary_expression; | |
1221 | e->left = op1; | |
1222 | e->op = t->content; | |
1223 | e->tok = t; | |
1224 | next (); | |
1225 | e->right = parse_boolean_and (); | |
1226 | op1 = e; | |
1227 | t = peek (); | |
1228 | } | |
1229 | ||
1230 | return op1; | |
1231 | } | |
1232 | ||
1233 | ||
1234 | expression* | |
1235 | parser::parse_boolean_and () | |
1236 | { | |
1237 | expression* op1 = parse_array_in (); | |
1238 | ||
1239 | const token* t = peek (); | |
1240 | while (t && t->type == tok_operator && t->content == "&") | |
1241 | { | |
1242 | binary_expression* e = new binary_expression; | |
1243 | e->left = op1; | |
1244 | e->op = t->content; | |
1245 | e->tok = t; | |
1246 | next (); | |
56099f08 FCE |
1247 | e->right = parse_array_in (); |
1248 | op1 = e; | |
1249 | t = peek (); | |
2f1a1aea | 1250 | } |
56099f08 FCE |
1251 | |
1252 | return op1; | |
2f1a1aea FCE |
1253 | } |
1254 | ||
1255 | ||
1256 | expression* | |
1257 | parser::parse_array_in () | |
1258 | { | |
ce10591c | 1259 | // This is a very tricky case. All these are legit expressions: |
69c68955 | 1260 | // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b" |
ce10591c FCE |
1261 | vector<expression*> indexes; |
1262 | bool parenthesized = false; | |
2f1a1aea FCE |
1263 | |
1264 | const token* t = peek (); | |
69c68955 | 1265 | if (t && t->type == tok_operator && t->content == "[") |
ce10591c FCE |
1266 | { |
1267 | next (); | |
1268 | parenthesized = true; | |
1269 | } | |
1270 | ||
1271 | while (1) | |
1272 | { | |
1273 | expression* op1 = parse_comparison (); | |
1274 | indexes.push_back (op1); | |
1275 | ||
1276 | if (parenthesized) | |
1277 | { | |
1278 | const token* t = peek (); | |
1279 | if (t && t->type == tok_operator && t->content == ",") | |
1280 | { | |
1281 | next (); | |
1282 | continue; | |
1283 | } | |
69c68955 | 1284 | else if (t && t->type == tok_operator && t->content == "]") |
ce10591c FCE |
1285 | { |
1286 | next (); | |
1287 | break; | |
1288 | } | |
1289 | else | |
69c68955 | 1290 | throw parse_error ("expected ',' or ']'"); |
ce10591c FCE |
1291 | } |
1292 | else | |
1293 | break; // expecting only one expression | |
1294 | } | |
1295 | ||
1296 | t = peek (); | |
2f1a1aea FCE |
1297 | if (t && t->type == tok_identifier && t->content == "in") |
1298 | { | |
2f1a1aea | 1299 | array_in *e = new array_in; |
56099f08 | 1300 | e->tok = t; |
ce10591c FCE |
1301 | next (); // swallow "in" |
1302 | ||
1303 | arrayindex* a = new arrayindex; | |
1304 | a->indexes = indexes; | |
1305 | ||
1306 | t = next (); | |
1307 | if (t->type != tok_identifier) | |
1308 | throw parse_error ("expected identifier"); | |
1309 | a->tok = t; | |
1310 | a->base = t->content; | |
1311 | ||
1312 | e->operand = a; | |
2f1a1aea FCE |
1313 | return e; |
1314 | } | |
ce10591c FCE |
1315 | else if (indexes.size() == 1) // no "in" - need one expression only |
1316 | return indexes[0]; | |
2f1a1aea | 1317 | else |
ce10591c | 1318 | throw parse_error ("unexpected comma-separated expression list"); |
2f1a1aea FCE |
1319 | } |
1320 | ||
1321 | ||
1322 | expression* | |
1323 | parser::parse_comparison () | |
1324 | { | |
bb2e3076 | 1325 | expression* op1 = parse_shift (); |
2f1a1aea FCE |
1326 | |
1327 | const token* t = peek (); | |
56099f08 | 1328 | while (t && t->type == tok_operator |
553d27a5 FCE |
1329 | && (t->content == ">" || |
1330 | t->content == "<" || | |
1331 | t->content == "==" || | |
1332 | t->content == "!=" || | |
1333 | t->content == "<=" || | |
bb2e3076 | 1334 | t->content == ">=")) |
2f1a1aea FCE |
1335 | { |
1336 | comparison* e = new comparison; | |
1337 | e->left = op1; | |
1338 | e->op = t->content; | |
56099f08 | 1339 | e->tok = t; |
2f1a1aea | 1340 | next (); |
bb2e3076 FCE |
1341 | e->right = parse_shift (); |
1342 | op1 = e; | |
1343 | t = peek (); | |
1344 | } | |
1345 | ||
1346 | return op1; | |
1347 | } | |
1348 | ||
1349 | ||
1350 | expression* | |
1351 | parser::parse_shift () | |
1352 | { | |
1353 | expression* op1 = parse_concatenation (); | |
1354 | ||
1355 | const token* t = peek (); | |
1356 | while (t && t->type == tok_operator && | |
1357 | (t->content == "<<" || t->content == ">>")) | |
1358 | { | |
1359 | binary_expression* e = new binary_expression; | |
1360 | e->left = op1; | |
1361 | e->op = t->content; | |
1362 | e->tok = t; | |
1363 | next (); | |
56099f08 FCE |
1364 | e->right = parse_concatenation (); |
1365 | op1 = e; | |
1366 | t = peek (); | |
2f1a1aea | 1367 | } |
56099f08 FCE |
1368 | |
1369 | return op1; | |
2f1a1aea FCE |
1370 | } |
1371 | ||
1372 | ||
1373 | expression* | |
1374 | parser::parse_concatenation () | |
1375 | { | |
1376 | expression* op1 = parse_additive (); | |
1377 | ||
1378 | const token* t = peek (); | |
1379 | // XXX: the actual awk string-concatenation operator is *whitespace*. | |
1380 | // I don't know how to easily to model that here. | |
56099f08 | 1381 | while (t && t->type == tok_operator && t->content == ".") |
2f1a1aea FCE |
1382 | { |
1383 | concatenation* e = new concatenation; | |
1384 | e->left = op1; | |
1385 | e->op = t->content; | |
56099f08 | 1386 | e->tok = t; |
2f1a1aea | 1387 | next (); |
56099f08 FCE |
1388 | e->right = parse_additive (); |
1389 | op1 = e; | |
1390 | t = peek (); | |
2f1a1aea | 1391 | } |
56099f08 FCE |
1392 | |
1393 | return op1; | |
2f1a1aea FCE |
1394 | } |
1395 | ||
1396 | ||
1397 | expression* | |
1398 | parser::parse_additive () | |
1399 | { | |
1400 | expression* op1 = parse_multiplicative (); | |
1401 | ||
1402 | const token* t = peek (); | |
56099f08 | 1403 | while (t && t->type == tok_operator |
2f1a1aea FCE |
1404 | && (t->content == "+" || t->content == "-")) |
1405 | { | |
1406 | binary_expression* e = new binary_expression; | |
1407 | e->op = t->content; | |
1408 | e->left = op1; | |
56099f08 | 1409 | e->tok = t; |
2f1a1aea | 1410 | next (); |
56099f08 FCE |
1411 | e->right = parse_multiplicative (); |
1412 | op1 = e; | |
1413 | t = peek (); | |
2f1a1aea | 1414 | } |
56099f08 FCE |
1415 | |
1416 | return op1; | |
2f1a1aea FCE |
1417 | } |
1418 | ||
1419 | ||
1420 | expression* | |
1421 | parser::parse_multiplicative () | |
1422 | { | |
1423 | expression* op1 = parse_unary (); | |
1424 | ||
1425 | const token* t = peek (); | |
56099f08 | 1426 | while (t && t->type == tok_operator |
2f1a1aea FCE |
1427 | && (t->content == "*" || t->content == "/" || t->content == "%")) |
1428 | { | |
1429 | binary_expression* e = new binary_expression; | |
1430 | e->op = t->content; | |
1431 | e->left = op1; | |
56099f08 | 1432 | e->tok = t; |
2f1a1aea | 1433 | next (); |
56099f08 FCE |
1434 | e->right = parse_unary (); |
1435 | op1 = e; | |
1436 | t = peek (); | |
2f1a1aea | 1437 | } |
56099f08 FCE |
1438 | |
1439 | return op1; | |
2f1a1aea FCE |
1440 | } |
1441 | ||
1442 | ||
1443 | expression* | |
1444 | parser::parse_unary () | |
1445 | { | |
1446 | const token* t = peek (); | |
1447 | if (t && t->type == tok_operator | |
bb2e3076 FCE |
1448 | && (t->content == "+" || |
1449 | t->content == "-" || | |
1450 | t->content == "!" || | |
1451 | t->content == "~" || | |
1452 | false)) | |
2f1a1aea FCE |
1453 | { |
1454 | unary_expression* e = new unary_expression; | |
1455 | e->op = t->content; | |
56099f08 | 1456 | e->tok = t; |
2f1a1aea FCE |
1457 | next (); |
1458 | e->operand = parse_expression (); | |
1459 | return e; | |
1460 | } | |
1461 | else | |
bb2e3076 | 1462 | return parse_crement (); |
2f1a1aea FCE |
1463 | } |
1464 | ||
1465 | ||
1466 | expression* | |
1467 | parser::parse_crement () // as in "increment" / "decrement" | |
1468 | { | |
1469 | const token* t = peek (); | |
1470 | if (t && t->type == tok_operator | |
1471 | && (t->content == "++" || t->content == "--")) | |
1472 | { | |
1473 | pre_crement* e = new pre_crement; | |
1474 | e->op = t->content; | |
56099f08 | 1475 | e->tok = t; |
2f1a1aea FCE |
1476 | next (); |
1477 | e->operand = parse_value (); | |
1478 | return e; | |
1479 | } | |
1480 | ||
1481 | // post-crement or non-crement | |
1482 | expression *op1 = parse_value (); | |
1483 | ||
1484 | t = peek (); | |
1485 | if (t && t->type == tok_operator | |
1486 | && (t->content == "++" || t->content == "--")) | |
1487 | { | |
1488 | post_crement* e = new post_crement; | |
1489 | e->op = t->content; | |
56099f08 | 1490 | e->tok = t; |
2f1a1aea FCE |
1491 | next (); |
1492 | e->operand = op1; | |
1493 | return e; | |
1494 | } | |
1495 | else | |
1496 | return op1; | |
1497 | } | |
1498 | ||
1499 | ||
1500 | expression* | |
1501 | parser::parse_value () | |
1502 | { | |
1503 | const token* t = peek (); | |
1504 | if (! t) | |
1505 | throw parse_error ("expected value"); | |
1506 | ||
1507 | if (t->type == tok_operator && t->content == "(") | |
1508 | { | |
1509 | next (); | |
1510 | expression* e = parse_expression (); | |
1511 | t = next (); | |
1512 | if (! (t->type == tok_operator && t->content == ")")) | |
1513 | throw parse_error ("expected ')'"); | |
1514 | return e; | |
1515 | } | |
1516 | else if (t->type == tok_identifier) | |
1517 | return parse_symbol (); | |
1518 | else | |
1519 | return parse_literal (); | |
1520 | } | |
1521 | ||
1522 | ||
0fefb486 | 1523 | // var, var[index], func(parms), thread->var, process->var |
2f1a1aea | 1524 | expression* |
0fefb486 | 1525 | parser::parse_symbol () |
2f1a1aea FCE |
1526 | { |
1527 | const token* t = next (); | |
1528 | if (t->type != tok_identifier) | |
1529 | throw parse_error ("expected identifier"); | |
56099f08 | 1530 | const token* t2 = t; |
2f1a1aea | 1531 | string name = t->content; |
56099f08 | 1532 | |
2f1a1aea | 1533 | t = peek (); |
0fefb486 FCE |
1534 | if (t && t->type == tok_operator && t->content == "->") |
1535 | { | |
1536 | // shorthand for process- or thread-specific array element | |
1537 | // map "thread->VAR" to "VAR[$tid]", | |
1538 | // and "process->VAR" to "VAR[$pid]" | |
1539 | symbol* sym = new symbol; | |
1540 | if (name == "thread") | |
1541 | sym->name = "$tid"; | |
1542 | else if (name == "process") | |
1543 | sym->name = "$pid"; | |
1544 | else | |
1545 | throw parse_error ("expected 'thread->' or 'process->'"); | |
1546 | struct token* t2prime = new token (*t2); | |
1547 | t2prime->content = sym->name; | |
1548 | sym->tok = t2prime; | |
1549 | ||
1550 | next (); // swallow "->" | |
1551 | t = next (); | |
1552 | if (! (t->type == tok_identifier)) | |
1553 | throw parse_error ("expected identifier"); | |
1554 | ||
1555 | struct arrayindex* ai = new arrayindex; | |
1556 | ai->tok = t; | |
1557 | ai->base = t->content; | |
1558 | ai->indexes.push_back (sym); | |
1559 | return ai; | |
1560 | } | |
1561 | else if (t && t->type == tok_operator && t->content == "[") // array | |
2f1a1aea FCE |
1562 | { |
1563 | next (); | |
1564 | struct arrayindex* ai = new arrayindex; | |
56099f08 FCE |
1565 | ai->tok = t2; |
1566 | ai->base = name; | |
2f1a1aea FCE |
1567 | while (1) |
1568 | { | |
1569 | ai->indexes.push_back (parse_expression ()); | |
1570 | t = next (); | |
1571 | if (t->type == tok_operator && t->content == "]") | |
1572 | break; | |
1573 | if (t->type == tok_operator && t->content == ",") | |
1574 | continue; | |
1575 | else | |
1576 | throw parse_error ("expected ',' or ']'"); | |
1577 | } | |
1578 | return ai; | |
1579 | } | |
1580 | else if (t && t->type == tok_operator && t->content == "(") // function call | |
1581 | { | |
1582 | next (); | |
1583 | struct functioncall* f = new functioncall; | |
56099f08 FCE |
1584 | f->tok = t2; |
1585 | f->function = name; | |
82919855 FCE |
1586 | // Allow empty actual parameter list |
1587 | const token* t3 = peek (); | |
1588 | if (t3 && t3->type == tok_operator && t3->content == ")") | |
1589 | { | |
1590 | next (); | |
1591 | return f; | |
1592 | } | |
2f1a1aea | 1593 | while (1) |
82919855 FCE |
1594 | { |
1595 | f->args.push_back (parse_expression ()); | |
1596 | t = next (); | |
1597 | if (t->type == tok_operator && t->content == ")") | |
1598 | break; | |
1599 | if (t->type == tok_operator && t->content == ",") | |
1600 | continue; | |
1601 | else | |
1602 | throw parse_error ("expected ',' or ')'"); | |
1603 | } | |
2f1a1aea FCE |
1604 | return f; |
1605 | } | |
1606 | else | |
1607 | { | |
56099f08 FCE |
1608 | symbol* sym = new symbol; |
1609 | sym->name = name; | |
1610 | sym->tok = t2; | |
1611 | return sym; | |
2f1a1aea FCE |
1612 | } |
1613 | } | |
56099f08 | 1614 |