]>
Commit | Line | Data |
---|---|---|
2f1a1aea | 1 | // recursive descent parser for systemtap scripts |
69c68955 FCE |
2 | // Copyright (C) 2005 Red Hat Inc. |
3 | // | |
4 | // This file is part of systemtap, and is free software. You can | |
5 | // redistribute it and/or modify it under the terms of the GNU General | |
6 | // Public License (GPL); either version 2, or (at your option) any | |
7 | // later version. | |
2f1a1aea | 8 | |
2b066ec1 | 9 | #include "config.h" |
2f1a1aea FCE |
10 | #include "staptree.h" |
11 | #include "parse.h" | |
177a8ead | 12 | #include "session.h" |
2b066ec1 FCE |
13 | #include <iostream> |
14 | #include <fstream> | |
2f1a1aea | 15 | #include <cctype> |
9c0c0e46 | 16 | #include <cstdlib> |
9c0c0e46 FCE |
17 | #include <cerrno> |
18 | #include <climits> | |
57b73400 | 19 | #include <sstream> |
2f1a1aea FCE |
20 | |
21 | using namespace std; | |
22 | ||
23 | // ------------------------------------------------------------------------ | |
24 | ||
bb2e3076 FCE |
25 | |
26 | ||
177a8ead FCE |
27 | parser::parser (systemtap_session& s, istream& i, bool p): |
28 | session (s), | |
24cb178f FCE |
29 | input_name ("<input>"), free_input (0), |
30 | input (i, input_name), privileged (p), | |
2f1a1aea FCE |
31 | last_t (0), next_t (0), num_errors (0) |
32 | { } | |
33 | ||
177a8ead FCE |
34 | parser::parser (systemtap_session& s, const string& fn, bool p): |
35 | session (s), | |
2f1a1aea | 36 | input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)), |
24cb178f | 37 | input (* free_input, input_name), privileged (p), |
2f1a1aea FCE |
38 | last_t (0), next_t (0), num_errors (0) |
39 | { } | |
40 | ||
41 | parser::~parser() | |
42 | { | |
43 | if (free_input) delete free_input; | |
44 | } | |
45 | ||
46 | ||
82919855 | 47 | stapfile* |
177a8ead | 48 | parser::parse (systemtap_session& s, std::istream& i, bool pr) |
82919855 | 49 | { |
177a8ead | 50 | parser p (s, i, pr); |
82919855 FCE |
51 | return p.parse (); |
52 | } | |
53 | ||
54 | ||
55 | stapfile* | |
177a8ead | 56 | parser::parse (systemtap_session& s, const std::string& n, bool pr) |
82919855 | 57 | { |
177a8ead | 58 | parser p (s, n, pr); |
82919855 FCE |
59 | return p.parse (); |
60 | } | |
61 | ||
d7f3e0c5 GH |
62 | static string |
63 | tt2str(token_type tt) | |
64 | { | |
65 | switch (tt) | |
66 | { | |
67 | case tok_junk: return "junk"; | |
68 | case tok_identifier: return "identifier"; | |
69 | case tok_operator: return "operator"; | |
70 | case tok_string: return "string"; | |
71 | case tok_number: return "number"; | |
72 | case tok_embedded: return "embedded-code"; | |
73 | } | |
74 | return "unknown token"; | |
75 | } | |
82919855 | 76 | |
56099f08 FCE |
77 | ostream& |
78 | operator << (ostream& o, const token& t) | |
79 | { | |
d7f3e0c5 | 80 | o << tt2str(t.type); |
56099f08 | 81 | |
24cb178f | 82 | if (t.type != tok_embedded) // XXX: other types? |
56099f08 | 83 | { |
24cb178f FCE |
84 | o << " '"; |
85 | for (unsigned i=0; i<t.content.length(); i++) | |
86 | { | |
87 | char c = t.content[i]; | |
88 | o << (isprint (c) ? c : '?'); | |
89 | } | |
90 | o << "'"; | |
56099f08 | 91 | } |
56099f08 FCE |
92 | |
93 | o << " at " | |
94 | << t.location.file << ":" | |
95 | << t.location.line << ":" | |
96 | << t.location.column; | |
97 | ||
98 | return o; | |
99 | } | |
100 | ||
101 | ||
2f1a1aea FCE |
102 | void |
103 | parser::print_error (const parse_error &pe) | |
104 | { | |
105 | cerr << "parse error: " << pe.what () << endl; | |
106 | ||
177a8ead FCE |
107 | if (pe.tok) |
108 | { | |
109 | cerr << "\tat: " << *pe.tok << endl; | |
110 | } | |
2f1a1aea | 111 | else |
177a8ead FCE |
112 | { |
113 | const token* t = last_t; | |
114 | if (t) | |
115 | cerr << "\tsaw: " << *t << endl; | |
116 | else | |
117 | cerr << "\tsaw: " << input_name << " EOF" << endl; | |
118 | } | |
2f1a1aea FCE |
119 | |
120 | // XXX: make it possible to print the last input line, | |
121 | // so as to line up an arrow with the specific error column | |
122 | ||
123 | num_errors ++; | |
124 | } | |
125 | ||
126 | ||
127 | const token* | |
128 | parser::last () | |
129 | { | |
130 | return last_t; | |
131 | } | |
132 | ||
133 | ||
177a8ead FCE |
134 | // Here, we perform on-the-fly preprocessing. |
135 | // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %) | |
44ce8ed5 FCE |
136 | // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string" |
137 | // or: arch COMPARISON-OP "arch-string" | |
138 | // The %: ELSE-TOKENS part is optional. | |
177a8ead FCE |
139 | // |
140 | // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %) | |
44ce8ed5 | 141 | // e.g. %( arch != "i686" %? "foo" %: "baz" %) |
177a8ead FCE |
142 | // |
143 | // Up to an entire %( ... %) expression is processed by a single call | |
144 | // to this function. Tokens included by any nested conditions are | |
145 | // enqueued in a private vector. | |
146 | ||
147 | bool eval_pp_conditional (systemtap_session& s, | |
148 | const token* l, const token* op, const token* r) | |
149 | { | |
44ce8ed5 FCE |
150 | if (l->type == tok_identifier && (l->content == "kernel_v" || |
151 | l->content == "kernel_vr")) | |
152 | { | |
153 | string target_kernel_vr = s.kernel_release; | |
154 | string target_kernel_v = target_kernel_vr; | |
155 | // cut off any release code suffix | |
156 | string::size_type dr = target_kernel_vr.rfind ('-'); | |
157 | if (dr > 0 && dr != string::npos) | |
158 | target_kernel_v = target_kernel_vr.substr (0, dr); | |
159 | ||
160 | if (! (r->type == tok_string)) | |
161 | throw parse_error ("expected string literal", r); | |
162 | string query_kernel_vr = r->content; | |
163 | ||
164 | // collect acceptable strverscmp results. | |
165 | int rvc_ok1, rvc_ok2; | |
166 | if (op->type == tok_operator && op->content == "<=") | |
167 | { rvc_ok1 = -1; rvc_ok2 = 0; } | |
168 | else if (op->type == tok_operator && op->content == ">=") | |
169 | { rvc_ok1 = 1; rvc_ok2 = 0; } | |
170 | else if (op->type == tok_operator && op->content == "<") | |
171 | { rvc_ok1 = -1; rvc_ok2 = -1; } | |
172 | else if (op->type == tok_operator && op->content == ">") | |
173 | { rvc_ok1 = 1; rvc_ok2 = 1; } | |
174 | else if (op->type == tok_operator && op->content == "==") | |
175 | { rvc_ok1 = 0; rvc_ok2 = 0; } | |
176 | else if (op->type == tok_operator && op->content == "!=") | |
177 | { rvc_ok1 = -1; rvc_ok2 = 1; } | |
178 | else | |
179 | throw parse_error ("expected comparison operator", op); | |
180 | ||
181 | int rvc_result = strverscmp ((l->content == "kernel_vr" ? | |
182 | target_kernel_vr.c_str() : | |
183 | target_kernel_v.c_str()), | |
184 | query_kernel_vr.c_str()); | |
185 | // normalize rvc_result | |
186 | if (rvc_result < 0) rvc_result = -1; | |
187 | if (rvc_result > 0) rvc_result = 1; | |
188 | ||
189 | return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2); | |
190 | } | |
191 | else if (l->type == tok_identifier && l->content == "arch") | |
192 | { | |
193 | string target_architecture = s.architecture; | |
194 | if (! (r->type == tok_string)) | |
195 | throw parse_error ("expected string literal", r); | |
196 | string query_architecture = r->content; | |
197 | ||
198 | bool result; | |
199 | if (op->type == tok_operator && op->content == "==") | |
200 | result = target_architecture == query_architecture; | |
201 | else if (op->type == tok_operator && op->content == "!=") | |
202 | result = target_architecture != query_architecture; | |
203 | else | |
204 | throw parse_error ("expected '==' or '!='", op); | |
205 | ||
206 | return result; | |
207 | } | |
208 | // XXX: support other forms? "CONFIG_SMP" ? | |
177a8ead | 209 | else |
44ce8ed5 | 210 | throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr'", l); |
177a8ead FCE |
211 | } |
212 | ||
213 | ||
214 | const token* | |
215 | parser::scan_pp () | |
216 | { | |
217 | while (true) | |
218 | { | |
219 | if (enqueued_pp.size() > 0) | |
220 | { | |
221 | const token* t = enqueued_pp[0]; | |
222 | enqueued_pp.erase (enqueued_pp.begin()); | |
223 | return t; | |
224 | } | |
225 | ||
226 | const token* t = input.scan (); // NB: not recursive! | |
227 | if (t == 0) // EOF | |
228 | return t; | |
229 | ||
230 | if (! (t->type == tok_operator && t->content == "%(")) // ordinary token | |
231 | return t; | |
232 | ||
233 | // We have a %( - it's time to throw a preprocessing party! | |
234 | ||
235 | const token *l, *op, *r; | |
236 | l = input.scan (); // NB: not recursive, though perhaps could be | |
237 | op = input.scan (); | |
238 | r = input.scan (); | |
239 | if (l == 0 || op == 0 || r == 0) | |
240 | throw parse_error ("incomplete condition after '%('", t); | |
241 | // NB: consider generalizing to consume all tokens until %?, and | |
242 | // passing that as a vector to an evaluator. | |
243 | ||
244 | bool result = eval_pp_conditional (session, l, op, r); | |
245 | ||
246 | const token *m = input.scan (); // NB: not recursive | |
247 | if (! (m && m->type == tok_operator && m->content == "%?")) | |
248 | throw parse_error ("expected '%?' marker for conditional", t); | |
249 | ||
250 | vector<const token*> my_enqueued_pp; | |
251 | ||
252 | while (true) // consume THEN tokens | |
253 | { | |
254 | m = scan_pp (); // NB: recursive | |
255 | if (m == 0) | |
256 | throw parse_error ("missing THEN tokens for conditional", t); | |
257 | ||
258 | if (m->type == tok_operator && (m->content == "%:" || // ELSE | |
259 | m->content == "%)")) // END | |
260 | break; | |
261 | // enqueue token | |
262 | if (result) | |
263 | my_enqueued_pp.push_back (m); | |
264 | // continue | |
265 | } | |
266 | ||
267 | if (m && m->type == tok_operator && m->content == "%:") // ELSE | |
268 | while (true) | |
269 | { | |
270 | m = scan_pp (); // NB: recursive | |
271 | if (m == 0) | |
272 | throw parse_error ("missing ELSE tokens for conditional", t); | |
273 | ||
274 | if (m->type == tok_operator && m->content == "%)") // END | |
275 | break; | |
276 | // enqueue token | |
277 | if (! result) | |
278 | my_enqueued_pp.push_back (m); | |
279 | // continue | |
280 | } | |
281 | ||
282 | // NB: we transcribe the retained tokens here, and not inside | |
283 | // the THEN/ELSE while loops. If it were done there, each loop | |
284 | // would become infinite (each iteration consuming an ordinary | |
285 | // token the previous one just pushed there). Guess how I | |
286 | // figured that out. | |
287 | enqueued_pp.insert (enqueued_pp.end(), | |
288 | my_enqueued_pp.begin(), | |
289 | my_enqueued_pp.end()); | |
290 | ||
291 | // Go back to outermost while(true) loop. We hope that at least | |
292 | // some THEN or ELSE tokens were enqueued. If not, around we go | |
293 | // again, until EOF. | |
294 | } | |
295 | } | |
296 | ||
297 | ||
2f1a1aea FCE |
298 | const token* |
299 | parser::next () | |
300 | { | |
301 | if (! next_t) | |
177a8ead | 302 | next_t = scan_pp (); |
2f1a1aea FCE |
303 | if (! next_t) |
304 | throw parse_error ("unexpected end-of-file"); | |
305 | ||
2f1a1aea FCE |
306 | last_t = next_t; |
307 | // advance by zeroing next_t | |
308 | next_t = 0; | |
309 | return last_t; | |
310 | } | |
311 | ||
312 | ||
313 | const token* | |
314 | parser::peek () | |
315 | { | |
316 | if (! next_t) | |
177a8ead | 317 | next_t = scan_pp (); |
2f1a1aea FCE |
318 | |
319 | // don't advance by zeroing next_t | |
320 | last_t = next_t; | |
321 | return next_t; | |
322 | } | |
323 | ||
324 | ||
d7f3e0c5 GH |
325 | static inline bool |
326 | tok_is(token const * t, token_type tt, string const & expected) | |
327 | { | |
328 | return t && t->type == tt && t->content == expected; | |
329 | } | |
330 | ||
331 | ||
332 | const token* | |
333 | parser::expect_known (token_type tt, string const & expected) | |
334 | { | |
335 | const token *t = next(); | |
57b73400 | 336 | if (! (t && t->type == tt && t->content == expected)) |
d7f3e0c5 GH |
337 | throw parse_error ("expected '" + expected + "'"); |
338 | return t; | |
339 | } | |
340 | ||
341 | ||
342 | const token* | |
343 | parser::expect_unknown (token_type tt, string & target) | |
344 | { | |
345 | const token *t = next(); | |
346 | if (!(t && t->type == tt)) | |
347 | throw parse_error ("expected " + tt2str(tt)); | |
348 | target = t->content; | |
349 | return t; | |
350 | } | |
351 | ||
352 | ||
353 | const token* | |
354 | parser::expect_op (std::string const & expected) | |
355 | { | |
356 | return expect_known (tok_operator, expected); | |
357 | } | |
358 | ||
359 | ||
360 | const token* | |
361 | parser::expect_kw (std::string const & expected) | |
362 | { | |
363 | return expect_known (tok_identifier, expected); | |
364 | } | |
365 | ||
57b73400 GH |
366 | const token* |
367 | parser::expect_number (int64_t & expected) | |
368 | { | |
369 | std::string tmp; | |
370 | token const * tt = expect_unknown (tok_number, tmp); | |
371 | istringstream iss(tmp); | |
372 | iss >> expected; | |
373 | return tt; | |
374 | } | |
375 | ||
d7f3e0c5 GH |
376 | |
377 | const token* | |
378 | parser::expect_ident (std::string & target) | |
379 | { | |
380 | return expect_unknown (tok_identifier, target); | |
381 | } | |
382 | ||
383 | ||
384 | bool | |
385 | parser::peek_op (std::string const & op) | |
386 | { | |
387 | return tok_is (peek(), tok_operator, op); | |
388 | } | |
389 | ||
390 | ||
391 | bool | |
392 | parser::peek_kw (std::string const & kw) | |
393 | { | |
394 | return tok_is (peek(), tok_identifier, kw); | |
395 | } | |
396 | ||
397 | ||
398 | ||
2f1a1aea FCE |
399 | lexer::lexer (istream& i, const string& in): |
400 | input (i), input_name (in), cursor_line (1), cursor_column (1) | |
401 | { } | |
402 | ||
bb2e3076 FCE |
403 | |
404 | int | |
405 | lexer::input_peek (unsigned n) | |
406 | { | |
407 | while (lookahead.size() <= n) | |
408 | { | |
409 | int c = input.get (); | |
410 | lookahead.push_back (input ? c : -1); | |
411 | } | |
412 | return lookahead[n]; | |
413 | } | |
414 | ||
415 | ||
2f1a1aea FCE |
416 | int |
417 | lexer::input_get () | |
418 | { | |
bb2e3076 FCE |
419 | int c = input_peek (0); |
420 | lookahead.erase (lookahead.begin ()); | |
421 | ||
422 | if (c < 0) return c; // EOF | |
423 | ||
2f1a1aea FCE |
424 | // update source cursor |
425 | if (c == '\n') | |
426 | { | |
427 | cursor_line ++; | |
428 | cursor_column = 1; | |
429 | } | |
430 | else | |
431 | cursor_column ++; | |
432 | ||
433 | return c; | |
434 | } | |
435 | ||
436 | ||
437 | token* | |
438 | lexer::scan () | |
439 | { | |
440 | token* n = new token; | |
441 | n->location.file = input_name; | |
442 | ||
443 | skip: | |
444 | n->location.line = cursor_line; | |
445 | n->location.column = cursor_column; | |
446 | ||
447 | int c = input_get(); | |
448 | if (c < 0) | |
449 | { | |
450 | delete n; | |
451 | return 0; | |
452 | } | |
453 | ||
454 | if (isspace (c)) | |
455 | goto skip; | |
456 | ||
d02548c0 | 457 | else if (isalpha (c) || c == '$' || c == '@' || c == '_') |
2f1a1aea FCE |
458 | { |
459 | n->type = tok_identifier; | |
460 | n->content = (char) c; | |
461 | while (1) | |
462 | { | |
bb2e3076 | 463 | int c2 = input_peek (); |
2f1a1aea FCE |
464 | if (! input) |
465 | break; | |
0fefb486 | 466 | if ((isalnum(c2) || c2 == '_' || c2 == '$')) |
2f1a1aea FCE |
467 | { |
468 | n->content.push_back(c2); | |
469 | input_get (); | |
470 | } | |
471 | else | |
472 | break; | |
473 | } | |
474 | return n; | |
475 | } | |
476 | ||
3a20432b | 477 | else if (isdigit (c)) // positive literal |
2f1a1aea | 478 | { |
2f1a1aea | 479 | n->type = tok_number; |
9c0c0e46 FCE |
480 | n->content = (char) c; |
481 | ||
2f1a1aea FCE |
482 | while (1) |
483 | { | |
bb2e3076 | 484 | int c2 = input_peek (); |
2f1a1aea FCE |
485 | if (! input) |
486 | break; | |
9c0c0e46 FCE |
487 | |
488 | // NB: isalnum is very permissive. We rely on strtol, called in | |
489 | // parser::parse_literal below, to confirm that the number string | |
490 | // is correctly formatted and in range. | |
491 | ||
492 | if (isalnum (c2)) | |
2f1a1aea | 493 | { |
9c0c0e46 | 494 | n->content.push_back (c2); |
2f1a1aea FCE |
495 | input_get (); |
496 | } | |
497 | else | |
498 | break; | |
499 | } | |
500 | return n; | |
501 | } | |
502 | ||
503 | else if (c == '\"') | |
504 | { | |
505 | n->type = tok_string; | |
506 | while (1) | |
507 | { | |
508 | c = input_get (); | |
509 | ||
510 | if (! input || c == '\n') | |
511 | { | |
512 | n->type = tok_junk; | |
513 | break; | |
514 | } | |
515 | if (c == '\"') // closing double-quotes | |
516 | break; | |
517 | else if (c == '\\') | |
7d46afb8 GH |
518 | { |
519 | c = input_get (); | |
520 | switch (c) | |
521 | { | |
522 | case 'a': | |
523 | case 'b': | |
524 | case 't': | |
525 | case 'n': | |
526 | case 'v': | |
527 | case 'f': | |
528 | case 'r': | |
529 | case '\\': | |
530 | ||
531 | // Pass these escapes through to the string value | |
532 | // beign parsed; it will "likely" be emitted into | |
533 | // a C literal. | |
534 | // | |
535 | // XXX: verify this assumption. | |
536 | ||
537 | n->content.push_back('\\'); | |
538 | ||
539 | default: | |
540 | ||
541 | n->content.push_back(c); | |
542 | break; | |
543 | } | |
2f1a1aea FCE |
544 | } |
545 | else | |
546 | n->content.push_back(c); | |
547 | } | |
548 | return n; | |
549 | } | |
550 | ||
551 | else if (ispunct (c)) | |
552 | { | |
bb2e3076 FCE |
553 | int c2 = input_peek (); |
554 | int c3 = input_peek (1); | |
555 | string s1 = string("") + (char) c; | |
556 | string s2 = (c2 > 0 ? s1 + (char) c2 : s1); | |
557 | string s3 = (c3 > 0 ? s2 + (char) c3 : s2); | |
2f1a1aea | 558 | |
3a20432b FCE |
559 | // NB: if we were to recognize negative numeric literals here, |
560 | // we'd introduce another grammar ambiguity: | |
561 | // 1-1 would be parsed as tok_number(1) and tok_number(-1) | |
562 | // instead of tok_number(1) tok_operator('-') tok_number(1) | |
563 | ||
bb2e3076 | 564 | if (s1 == "#") // shell comment |
2f1a1aea FCE |
565 | { |
566 | unsigned this_line = cursor_line; | |
bb2e3076 FCE |
567 | do { c = input_get (); } |
568 | while (c >= 0 && cursor_line == this_line); | |
2f1a1aea FCE |
569 | goto skip; |
570 | } | |
bb2e3076 | 571 | else if (s2 == "//") // C++ comment |
63a7c90e FCE |
572 | { |
573 | unsigned this_line = cursor_line; | |
bb2e3076 FCE |
574 | do { c = input_get (); } |
575 | while (c >= 0 && cursor_line == this_line); | |
63a7c90e FCE |
576 | goto skip; |
577 | } | |
578 | else if (c == '/' && c2 == '*') // C comment | |
579 | { | |
580 | c2 = input_get (); | |
581 | unsigned chars = 0; | |
bb2e3076 | 582 | while (c2 >= 0) |
63a7c90e FCE |
583 | { |
584 | chars ++; // track this to prevent "/*/" from being accepted | |
585 | c = c2; | |
586 | c2 = input_get (); | |
587 | if (chars > 1 && c == '*' && c2 == '/') | |
bb2e3076 | 588 | break; |
63a7c90e | 589 | } |
bb2e3076 | 590 | goto skip; |
63a7c90e | 591 | } |
54dfabe9 FCE |
592 | else if (c == '%' && c2 == '{') // embedded code |
593 | { | |
594 | n->type = tok_embedded; | |
595 | (void) input_get (); // swallow '{' already in c2 | |
596 | while (true) | |
597 | { | |
598 | c = input_get (); | |
599 | if (c == 0) // EOF | |
600 | { | |
601 | n->type = tok_junk; | |
602 | break; | |
603 | } | |
604 | if (c == '%') | |
605 | { | |
606 | c2 = input_peek (); | |
607 | if (c2 == '}') | |
608 | { | |
609 | (void) input_get (); // swallow '}' too | |
610 | break; | |
611 | } | |
612 | } | |
613 | n->content += c; | |
614 | } | |
615 | return n; | |
616 | } | |
2f1a1aea | 617 | |
bb2e3076 FCE |
618 | // We're committed to recognizing at least the first character |
619 | // as an operator. | |
2f1a1aea | 620 | n->type = tok_operator; |
2f1a1aea | 621 | |
bb2e3076 FCE |
622 | // match all valid operators, in decreasing size order |
623 | if (s3 == "<<<" || | |
624 | s3 == "<<=" || | |
625 | s3 == ">>=") | |
82919855 | 626 | { |
bb2e3076 FCE |
627 | n->content = s3; |
628 | input_get (); input_get (); // swallow other two characters | |
629 | } | |
630 | else if (s2 == "==" || | |
631 | s2 == "!=" || | |
632 | s2 == "<=" || | |
633 | s2 == ">=" || | |
634 | s2 == "+=" || | |
635 | s2 == "-=" || | |
636 | s2 == "*=" || | |
637 | s2 == "/=" || | |
638 | s2 == "%=" || | |
639 | s2 == "&=" || | |
640 | s2 == "^=" || | |
641 | s2 == "|=" || | |
d5d7c2cc | 642 | s2 == ".=" || |
bb2e3076 FCE |
643 | s2 == "&&" || |
644 | s2 == "||" || | |
645 | s2 == "++" || | |
646 | s2 == "--" || | |
647 | s2 == "->" || | |
648 | s2 == "<<" || | |
177a8ead FCE |
649 | s2 == ">>" || |
650 | // preprocessor tokens | |
651 | s2 == "%(" || | |
652 | s2 == "%?" || | |
653 | s2 == "%:" || | |
654 | s2 == "%)") | |
bb2e3076 FCE |
655 | { |
656 | n->content = s2; | |
657 | input_get (); // swallow other character | |
658 | } | |
659 | else | |
660 | { | |
661 | n->content = s1; | |
82919855 | 662 | } |
2f1a1aea FCE |
663 | |
664 | return n; | |
665 | } | |
666 | ||
667 | else | |
668 | { | |
669 | n->type = tok_junk; | |
670 | n->content = (char) c; | |
671 | return n; | |
672 | } | |
673 | } | |
674 | ||
675 | ||
676 | // ------------------------------------------------------------------------ | |
677 | ||
678 | stapfile* | |
679 | parser::parse () | |
680 | { | |
681 | stapfile* f = new stapfile; | |
682 | f->name = input_name; | |
56099f08 FCE |
683 | |
684 | bool empty = true; | |
685 | ||
2f1a1aea FCE |
686 | while (1) |
687 | { | |
688 | try | |
689 | { | |
690 | const token* t = peek (); | |
56099f08 | 691 | if (! t) // nice clean EOF |
2f1a1aea FCE |
692 | break; |
693 | ||
56099f08 | 694 | empty = false; |
2f1a1aea | 695 | if (t->type == tok_identifier && t->content == "probe") |
54dfabe9 | 696 | parse_probe (f->probes, f->aliases); |
2f1a1aea | 697 | else if (t->type == tok_identifier && t->content == "global") |
07c17d67 | 698 | parse_global (f->globals); |
56099f08 | 699 | else if (t->type == tok_identifier && t->content == "function") |
24cb178f | 700 | parse_functiondecl (f->functions); |
54dfabe9 FCE |
701 | else if (t->type == tok_embedded) |
702 | f->embeds.push_back (parse_embeddedcode ()); | |
2f1a1aea | 703 | else |
24cb178f | 704 | throw parse_error ("expected 'probe', 'global', 'function', or '%{'"); |
2f1a1aea FCE |
705 | } |
706 | catch (parse_error& pe) | |
707 | { | |
708 | print_error (pe); | |
177a8ead FCE |
709 | try |
710 | { | |
711 | // Quietly swallow all tokens until the next '}'. | |
712 | while (1) | |
713 | { | |
714 | const token* t = peek (); | |
715 | if (! t) | |
716 | break; | |
717 | next (); | |
718 | if (t->type == tok_operator && t->content == "}") | |
719 | break; | |
720 | } | |
721 | } | |
722 | catch (parse_error& pe2) | |
723 | { | |
724 | // parse error during recovery ... ugh | |
725 | print_error (pe2); | |
726 | } | |
727 | } | |
2f1a1aea FCE |
728 | } |
729 | ||
56099f08 FCE |
730 | if (empty) |
731 | { | |
732 | cerr << "Input file '" << input_name << "' is empty or missing." << endl; | |
733 | delete f; | |
734 | return 0; | |
735 | } | |
736 | else if (num_errors > 0) | |
2f1a1aea FCE |
737 | { |
738 | cerr << num_errors << " parse error(s)." << endl; | |
739 | delete f; | |
56099f08 | 740 | return 0; |
2f1a1aea FCE |
741 | } |
742 | ||
743 | return f; | |
744 | } | |
745 | ||
746 | ||
20c6c071 | 747 | void |
54dfabe9 FCE |
748 | parser::parse_probe (std::vector<probe *> & probe_ret, |
749 | std::vector<probe_alias *> & alias_ret) | |
2f1a1aea | 750 | { |
82919855 FCE |
751 | const token* t0 = next (); |
752 | if (! (t0->type == tok_identifier && t0->content == "probe")) | |
753 | throw parse_error ("expected 'probe'"); | |
754 | ||
20c6c071 GH |
755 | vector<probe_point *> aliases; |
756 | vector<probe_point *> locations; | |
757 | ||
758 | bool equals_ok = true; | |
82919855 | 759 | |
2f1a1aea FCE |
760 | while (1) |
761 | { | |
b4ceace2 FCE |
762 | probe_point * pp = parse_probe_point (); |
763 | ||
764 | const token* t = peek (); | |
765 | if (equals_ok && t | |
766 | && t->type == tok_operator && t->content == "=") | |
767 | { | |
768 | aliases.push_back(pp); | |
769 | next (); | |
770 | continue; | |
771 | } | |
772 | else if (t && t->type == tok_operator && t->content == ",") | |
773 | { | |
774 | locations.push_back(pp); | |
775 | equals_ok = false; | |
776 | next (); | |
777 | continue; | |
778 | } | |
779 | else if (t && t->type == tok_operator && t->content == "{") | |
780 | { | |
781 | locations.push_back(pp); | |
782 | break; | |
783 | } | |
2f1a1aea | 784 | else |
9c0c0e46 | 785 | throw parse_error ("expected probe point specifier"); |
2f1a1aea | 786 | } |
20c6c071 | 787 | |
20c6c071 GH |
788 | if (aliases.empty()) |
789 | { | |
54dfabe9 FCE |
790 | probe* p = new probe; |
791 | p->tok = t0; | |
792 | p->locations = locations; | |
793 | p->body = parse_stmt_block (); | |
794 | probe_ret.push_back (p); | |
20c6c071 GH |
795 | } |
796 | else | |
797 | { | |
54dfabe9 FCE |
798 | probe_alias* p = new probe_alias (aliases); |
799 | p->tok = t0; | |
800 | p->locations = locations; | |
801 | p->body = parse_stmt_block (); | |
802 | alias_ret.push_back (p); | |
20c6c071 | 803 | } |
54dfabe9 | 804 | } |
20c6c071 | 805 | |
54dfabe9 FCE |
806 | |
807 | embeddedcode* | |
808 | parser::parse_embeddedcode () | |
809 | { | |
810 | embeddedcode* e = new embeddedcode; | |
811 | const token* t = next (); | |
812 | if (t->type != tok_embedded) | |
24cb178f FCE |
813 | throw parse_error ("expected '%{'"); |
814 | ||
815 | if (! privileged) | |
816 | throw parse_error ("embedded code in unprivileged script"); | |
54dfabe9 FCE |
817 | |
818 | e->tok = t; | |
819 | e->code = t->content; | |
820 | return e; | |
2f1a1aea FCE |
821 | } |
822 | ||
823 | ||
824 | block* | |
56099f08 | 825 | parser::parse_stmt_block () |
2f1a1aea FCE |
826 | { |
827 | block* pb = new block; | |
828 | ||
56099f08 FCE |
829 | const token* t = next (); |
830 | if (! (t->type == tok_operator && t->content == "{")) | |
831 | throw parse_error ("expected '{'"); | |
832 | ||
833 | pb->tok = t; | |
2b066ec1 | 834 | |
2f1a1aea FCE |
835 | while (1) |
836 | { | |
837 | try | |
838 | { | |
2b066ec1 FCE |
839 | t = peek (); |
840 | if (t && t->type == tok_operator && t->content == "}") | |
841 | { | |
842 | next (); | |
843 | break; | |
844 | } | |
845 | ||
2f1a1aea | 846 | pb->statements.push_back (parse_statement ()); |
2f1a1aea FCE |
847 | } |
848 | catch (parse_error& pe) | |
849 | { | |
850 | print_error (pe); | |
54dfabe9 | 851 | |
2f1a1aea FCE |
852 | // Quietly swallow all tokens until the next ';' or '}'. |
853 | while (1) | |
854 | { | |
855 | const token* t = peek (); | |
54dfabe9 | 856 | if (! t) return 0; |
2f1a1aea | 857 | next (); |
54dfabe9 FCE |
858 | if (t->type == tok_operator |
859 | && (t->content == "}" || t->content == ";")) | |
2f1a1aea FCE |
860 | break; |
861 | } | |
862 | } | |
863 | } | |
864 | ||
865 | return pb; | |
866 | } | |
867 | ||
868 | ||
869 | statement* | |
870 | parser::parse_statement () | |
871 | { | |
872 | const token* t = peek (); | |
873 | if (t && t->type == tok_operator && t->content == ";") | |
874 | { | |
69c68955 FCE |
875 | null_statement* n = new null_statement (); |
876 | n->tok = next (); | |
877 | return n; | |
2f1a1aea FCE |
878 | } |
879 | else if (t && t->type == tok_operator && t->content == "{") | |
56099f08 | 880 | return parse_stmt_block (); |
2f1a1aea | 881 | else if (t && t->type == tok_identifier && t->content == "if") |
56099f08 | 882 | return parse_if_statement (); |
69c68955 FCE |
883 | else if (t && t->type == tok_identifier && t->content == "for") |
884 | return parse_for_loop (); | |
69c68955 FCE |
885 | else if (t && t->type == tok_identifier && t->content == "foreach") |
886 | return parse_foreach_loop (); | |
56099f08 FCE |
887 | else if (t && t->type == tok_identifier && t->content == "return") |
888 | return parse_return_statement (); | |
889 | else if (t && t->type == tok_identifier && t->content == "delete") | |
890 | return parse_delete_statement (); | |
f3c26ea5 FCE |
891 | else if (t && t->type == tok_identifier && t->content == "while") |
892 | return parse_while_loop (); | |
893 | else if (t && t->type == tok_identifier && t->content == "break") | |
894 | return parse_break_statement (); | |
895 | else if (t && t->type == tok_identifier && t->content == "continue") | |
896 | return parse_continue_statement (); | |
897 | else if (t && t->type == tok_identifier && t->content == "next") | |
898 | return parse_next_statement (); | |
899 | // XXX: "do/while" statement? | |
2f1a1aea FCE |
900 | else if (t && (t->type == tok_operator || // expressions are flexible |
901 | t->type == tok_identifier || | |
902 | t->type == tok_number || | |
903 | t->type == tok_string)) | |
69c68955 | 904 | return parse_expr_statement (); |
54dfabe9 | 905 | // XXX: consider generally accepting tok_embedded here too |
2f1a1aea FCE |
906 | else |
907 | throw parse_error ("expected statement"); | |
908 | } | |
909 | ||
910 | ||
56099f08 | 911 | void |
07c17d67 | 912 | parser::parse_global (vector <vardecl*>& globals) |
2f1a1aea | 913 | { |
82919855 FCE |
914 | const token* t0 = next (); |
915 | if (! (t0->type == tok_identifier && t0->content == "global")) | |
916 | throw parse_error ("expected 'global'"); | |
917 | ||
56099f08 FCE |
918 | while (1) |
919 | { | |
920 | const token* t = next (); | |
921 | if (! (t->type == tok_identifier)) | |
922 | throw parse_error ("expected identifier"); | |
923 | ||
2b066ec1 FCE |
924 | for (unsigned i=0; i<globals.size(); i++) |
925 | if (globals[i]->name == t->content) | |
57b73400 GH |
926 | throw parse_error ("duplicate global name"); |
927 | ||
24cb178f FCE |
928 | vardecl* d = new vardecl; |
929 | d->name = t->content; | |
930 | d->tok = t; | |
931 | globals.push_back (d); | |
56099f08 | 932 | |
82919855 FCE |
933 | t = peek (); |
934 | if (t && t->type == tok_operator && t->content == ",") | |
935 | { | |
936 | next (); | |
937 | continue; | |
938 | } | |
56099f08 | 939 | else |
82919855 | 940 | break; |
56099f08 FCE |
941 | } |
942 | } | |
943 | ||
944 | ||
24cb178f FCE |
945 | void |
946 | parser::parse_functiondecl (std::vector<functiondecl*>& functions) | |
56099f08 | 947 | { |
82919855 FCE |
948 | const token* t = next (); |
949 | if (! (t->type == tok_identifier && t->content == "function")) | |
950 | throw parse_error ("expected 'function'"); | |
951 | ||
56099f08 | 952 | |
82919855 | 953 | t = next (); |
56099f08 FCE |
954 | if (! (t->type == tok_identifier)) |
955 | throw parse_error ("expected identifier"); | |
24cb178f FCE |
956 | |
957 | for (unsigned i=0; i<functions.size(); i++) | |
958 | if (functions[i]->name == t->content) | |
959 | throw parse_error ("duplicate function name"); | |
960 | ||
961 | functiondecl *fd = new functiondecl (); | |
56099f08 FCE |
962 | fd->name = t->content; |
963 | fd->tok = t; | |
964 | ||
965 | t = next (); | |
6a505121 FCE |
966 | if (t->type == tok_operator && t->content == ":") |
967 | { | |
968 | t = next (); | |
969 | if (t->type == tok_identifier && t->content == "string") | |
970 | fd->type = pe_string; | |
971 | else if (t->type == tok_identifier && t->content == "long") | |
972 | fd->type = pe_long; | |
973 | else throw parse_error ("expected 'string' or 'long'"); | |
974 | ||
975 | t = next (); | |
976 | } | |
977 | ||
56099f08 FCE |
978 | if (! (t->type == tok_operator && t->content == "(")) |
979 | throw parse_error ("expected '('"); | |
980 | ||
981 | while (1) | |
982 | { | |
983 | t = next (); | |
984 | ||
985 | // permit zero-argument fuctions | |
986 | if (t->type == tok_operator && t->content == ")") | |
987 | break; | |
988 | else if (! (t->type == tok_identifier)) | |
989 | throw parse_error ("expected identifier"); | |
990 | vardecl* vd = new vardecl; | |
991 | vd->name = t->content; | |
992 | vd->tok = t; | |
993 | fd->formal_args.push_back (vd); | |
994 | ||
995 | t = next (); | |
6a505121 FCE |
996 | if (t->type == tok_operator && t->content == ":") |
997 | { | |
998 | t = next (); | |
999 | if (t->type == tok_identifier && t->content == "string") | |
1000 | vd->type = pe_string; | |
1001 | else if (t->type == tok_identifier && t->content == "long") | |
1002 | vd->type = pe_long; | |
1003 | else throw parse_error ("expected 'string' or 'long'"); | |
1004 | ||
1005 | t = next (); | |
1006 | } | |
56099f08 FCE |
1007 | if (t->type == tok_operator && t->content == ")") |
1008 | break; | |
1009 | if (t->type == tok_operator && t->content == ",") | |
1010 | continue; | |
1011 | else | |
1012 | throw parse_error ("expected ',' or ')'"); | |
1013 | } | |
1014 | ||
54dfabe9 FCE |
1015 | t = peek (); |
1016 | if (t && t->type == tok_embedded) | |
1017 | fd->body = parse_embeddedcode (); | |
1018 | else | |
1019 | fd->body = parse_stmt_block (); | |
24cb178f FCE |
1020 | |
1021 | functions.push_back (fd); | |
2f1a1aea FCE |
1022 | } |
1023 | ||
1024 | ||
9c0c0e46 FCE |
1025 | probe_point* |
1026 | parser::parse_probe_point () | |
2f1a1aea | 1027 | { |
9c0c0e46 | 1028 | probe_point* pl = new probe_point; |
2f1a1aea | 1029 | |
9c0c0e46 | 1030 | while (1) |
2f1a1aea | 1031 | { |
9c0c0e46 | 1032 | const token* t = next (); |
b4ceace2 FCE |
1033 | if (! (t->type == tok_identifier || |
1034 | (t->type == tok_operator && t->content == "*"))) | |
1035 | throw parse_error ("expected identifier or '*'"); | |
9c0c0e46 FCE |
1036 | |
1037 | if (pl->tok == 0) pl->tok = t; | |
1038 | ||
1039 | probe_point::component* c = new probe_point::component; | |
1040 | c->functor = t->content; | |
1041 | pl->components.push_back (c); | |
1042 | // NB though we still may add c->arg soon | |
1043 | ||
1044 | t = peek (); | |
1045 | if (t && t->type == tok_operator | |
20c6c071 | 1046 | && (t->content == "{" || t->content == "," || t->content == "=")) |
9c0c0e46 FCE |
1047 | break; |
1048 | ||
1049 | if (t && t->type == tok_operator && t->content == "(") | |
1050 | { | |
1051 | next (); // consume "(" | |
1052 | c->arg = parse_literal (); | |
1053 | ||
1054 | t = next (); | |
1055 | if (! (t->type == tok_operator && t->content == ")")) | |
1056 | throw parse_error ("expected ')'"); | |
1057 | ||
1058 | t = peek (); | |
1059 | if (t && t->type == tok_operator | |
20c6c071 | 1060 | && (t->content == "{" || t->content == "," || t->content == "=")) |
9c0c0e46 | 1061 | break; |
2b066ec1 FCE |
1062 | else if (t && t->type == tok_operator && |
1063 | t->content == "(") | |
1064 | throw parse_error ("unexpected '.' or ',' or '{'"); | |
9c0c0e46 FCE |
1065 | } |
1066 | // fall through | |
1067 | ||
1068 | if (t && t->type == tok_operator && t->content == ".") | |
1069 | next (); | |
1070 | else | |
20c6c071 | 1071 | throw parse_error ("expected '.' or ',' or '(' or '{' or '='"); |
2f1a1aea FCE |
1072 | } |
1073 | ||
1074 | return pl; | |
1075 | } | |
1076 | ||
1077 | ||
1078 | literal* | |
1079 | parser::parse_literal () | |
1080 | { | |
1081 | const token* t = next (); | |
56099f08 | 1082 | literal* l; |
2f1a1aea | 1083 | if (t->type == tok_string) |
56099f08 | 1084 | l = new literal_string (t->content); |
2f1a1aea | 1085 | else if (t->type == tok_number) |
9c0c0e46 FCE |
1086 | { |
1087 | const char* startp = t->content.c_str (); | |
1088 | char* endp = (char*) startp; | |
1089 | ||
3a20432b FCE |
1090 | // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX |
1091 | // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX, | |
1092 | // since the lexer only gives us positive digit strings. | |
9c0c0e46 | 1093 | errno = 0; |
3a20432b | 1094 | long long value = (long long) strtoull (startp, & endp, 0); |
9c0c0e46 | 1095 | if (errno == ERANGE || errno == EINVAL || *endp != '\0' |
3a20432b FCE |
1096 | || (unsigned long long) value > 18446744073709551615ULL |
1097 | || value < -9223372036854775807LL-1) | |
9c0c0e46 FCE |
1098 | throw parse_error ("number invalid or out of range"); |
1099 | ||
3a20432b | 1100 | l = new literal_number (value); |
9c0c0e46 | 1101 | } |
2f1a1aea FCE |
1102 | else |
1103 | throw parse_error ("expected literal string or number"); | |
56099f08 FCE |
1104 | |
1105 | l->tok = t; | |
1106 | return l; | |
2f1a1aea FCE |
1107 | } |
1108 | ||
1109 | ||
1110 | if_statement* | |
1111 | parser::parse_if_statement () | |
1112 | { | |
1113 | const token* t = next (); | |
56099f08 FCE |
1114 | if (! (t->type == tok_identifier && t->content == "if")) |
1115 | throw parse_error ("expected 'if'"); | |
1116 | if_statement* s = new if_statement; | |
1117 | s->tok = t; | |
1118 | ||
1119 | t = next (); | |
2f1a1aea FCE |
1120 | if (! (t->type == tok_operator && t->content == "(")) |
1121 | throw parse_error ("expected '('"); | |
1122 | ||
2f1a1aea FCE |
1123 | s->condition = parse_expression (); |
1124 | ||
1125 | t = next (); | |
1126 | if (! (t->type == tok_operator && t->content == ")")) | |
1127 | throw parse_error ("expected ')'"); | |
1128 | ||
1129 | s->thenblock = parse_statement (); | |
1130 | ||
1131 | t = peek (); | |
1132 | if (t && t->type == tok_identifier && t->content == "else") | |
1133 | { | |
1134 | next (); | |
1135 | s->elseblock = parse_statement (); | |
1136 | } | |
ed10c639 FCE |
1137 | else |
1138 | s->elseblock = 0; // in case not otherwise initialized | |
2f1a1aea FCE |
1139 | |
1140 | return s; | |
1141 | } | |
1142 | ||
1143 | ||
69c68955 FCE |
1144 | expr_statement* |
1145 | parser::parse_expr_statement () | |
1146 | { | |
1147 | expr_statement *es = new expr_statement; | |
1148 | const token* t = peek (); | |
1149 | es->tok = t; | |
1150 | es->value = parse_expression (); | |
1151 | return es; | |
1152 | } | |
1153 | ||
1154 | ||
56099f08 FCE |
1155 | return_statement* |
1156 | parser::parse_return_statement () | |
1157 | { | |
1158 | const token* t = next (); | |
1159 | if (! (t->type == tok_identifier && t->content == "return")) | |
1160 | throw parse_error ("expected 'return'"); | |
1161 | return_statement* s = new return_statement; | |
1162 | s->tok = t; | |
1163 | s->value = parse_expression (); | |
1164 | return s; | |
1165 | } | |
1166 | ||
1167 | ||
1168 | delete_statement* | |
1169 | parser::parse_delete_statement () | |
1170 | { | |
1171 | const token* t = next (); | |
1172 | if (! (t->type == tok_identifier && t->content == "delete")) | |
1173 | throw parse_error ("expected 'delete'"); | |
1174 | delete_statement* s = new delete_statement; | |
1175 | s->tok = t; | |
1176 | s->value = parse_expression (); | |
1177 | return s; | |
1178 | } | |
1179 | ||
1180 | ||
f3c26ea5 FCE |
1181 | next_statement* |
1182 | parser::parse_next_statement () | |
1183 | { | |
1184 | const token* t = next (); | |
1185 | if (! (t->type == tok_identifier && t->content == "next")) | |
1186 | throw parse_error ("expected 'next'"); | |
1187 | next_statement* s = new next_statement; | |
1188 | s->tok = t; | |
1189 | return s; | |
1190 | } | |
1191 | ||
1192 | ||
1193 | break_statement* | |
1194 | parser::parse_break_statement () | |
1195 | { | |
1196 | const token* t = next (); | |
1197 | if (! (t->type == tok_identifier && t->content == "break")) | |
1198 | throw parse_error ("expected 'break'"); | |
1199 | break_statement* s = new break_statement; | |
1200 | s->tok = t; | |
1201 | return s; | |
1202 | } | |
1203 | ||
1204 | ||
1205 | continue_statement* | |
1206 | parser::parse_continue_statement () | |
1207 | { | |
1208 | const token* t = next (); | |
1209 | if (! (t->type == tok_identifier && t->content == "continue")) | |
1210 | throw parse_error ("expected 'continue'"); | |
1211 | continue_statement* s = new continue_statement; | |
1212 | s->tok = t; | |
1213 | return s; | |
1214 | } | |
1215 | ||
1216 | ||
69c68955 FCE |
1217 | for_loop* |
1218 | parser::parse_for_loop () | |
1219 | { | |
f3c26ea5 FCE |
1220 | const token* t = next (); |
1221 | if (! (t->type == tok_identifier && t->content == "for")) | |
1222 | throw parse_error ("expected 'for'"); | |
1223 | for_loop* s = new for_loop; | |
1224 | s->tok = t; | |
1225 | ||
1226 | t = next (); | |
1227 | if (! (t->type == tok_operator && t->content == "(")) | |
1228 | throw parse_error ("expected '('"); | |
1229 | ||
1230 | // initializer + ";" | |
1231 | t = peek (); | |
1232 | if (t && t->type == tok_operator && t->content == ";") | |
1233 | { | |
1234 | literal_number* l = new literal_number(0); | |
1235 | expr_statement* es = new expr_statement; | |
1236 | es->value = l; | |
1237 | s->init = es; | |
1238 | es->value->tok = es->tok = next (); | |
1239 | } | |
1240 | else | |
1241 | { | |
1242 | s->init = parse_expr_statement (); | |
1243 | t = next (); | |
1244 | if (! (t->type == tok_operator && t->content == ";")) | |
1245 | throw parse_error ("expected ';'"); | |
1246 | } | |
1247 | ||
1248 | // condition + ";" | |
1249 | t = peek (); | |
1250 | if (t && t->type == tok_operator && t->content == ";") | |
1251 | { | |
1252 | literal_number* l = new literal_number(1); | |
1253 | s->cond = l; | |
1254 | s->cond->tok = next (); | |
1255 | } | |
1256 | else | |
1257 | { | |
1258 | s->cond = parse_expression (); | |
1259 | t = next (); | |
1260 | if (! (t->type == tok_operator && t->content == ";")) | |
1261 | throw parse_error ("expected ';'"); | |
1262 | } | |
1263 | ||
1264 | // increment + ")" | |
1265 | t = peek (); | |
1266 | if (t && t->type == tok_operator && t->content == ")") | |
1267 | { | |
1268 | literal_number* l = new literal_number(2); | |
1269 | expr_statement* es = new expr_statement; | |
1270 | es->value = l; | |
1271 | s->incr = es; | |
1272 | es->value->tok = es->tok = next (); | |
1273 | } | |
1274 | else | |
1275 | { | |
1276 | s->incr = parse_expr_statement (); | |
1277 | t = next (); | |
1278 | if (! (t->type == tok_operator && t->content == ")")) | |
1279 | throw parse_error ("expected ';'"); | |
1280 | } | |
1281 | ||
1282 | // block | |
1283 | s->block = parse_statement (); | |
1284 | ||
1285 | return s; | |
1286 | } | |
1287 | ||
1288 | ||
1289 | for_loop* | |
1290 | parser::parse_while_loop () | |
1291 | { | |
1292 | const token* t = next (); | |
1293 | if (! (t->type == tok_identifier && t->content == "while")) | |
1294 | throw parse_error ("expected 'while'"); | |
1295 | for_loop* s = new for_loop; | |
1296 | s->tok = t; | |
1297 | ||
1298 | t = next (); | |
1299 | if (! (t->type == tok_operator && t->content == "(")) | |
1300 | throw parse_error ("expected '('"); | |
1301 | ||
1302 | // dummy init and incr fields | |
1303 | literal_number* l = new literal_number(0); | |
1304 | expr_statement* es = new expr_statement; | |
1305 | es->value = l; | |
1306 | s->init = es; | |
1307 | es->value->tok = es->tok = t; | |
1308 | ||
1309 | l = new literal_number(2); | |
1310 | es = new expr_statement; | |
1311 | es->value = l; | |
1312 | s->incr = es; | |
1313 | es->value->tok = es->tok = t; | |
1314 | ||
1315 | ||
1316 | // condition | |
1317 | s->cond = parse_expression (); | |
1318 | ||
1319 | ||
1320 | t = next (); | |
1321 | if (! (t->type == tok_operator && t->content == ")")) | |
1322 | throw parse_error ("expected ')'"); | |
1323 | ||
1324 | // block | |
1325 | s->block = parse_statement (); | |
1326 | ||
1327 | return s; | |
69c68955 FCE |
1328 | } |
1329 | ||
1330 | ||
1331 | foreach_loop* | |
1332 | parser::parse_foreach_loop () | |
1333 | { | |
1334 | const token* t = next (); | |
1335 | if (! (t->type == tok_identifier && t->content == "foreach")) | |
1336 | throw parse_error ("expected 'foreach'"); | |
1337 | foreach_loop* s = new foreach_loop; | |
1338 | s->tok = t; | |
93484556 | 1339 | s->sort_direction = 0; |
69c68955 FCE |
1340 | |
1341 | t = next (); | |
1342 | if (! (t->type == tok_operator && t->content == "(")) | |
1343 | throw parse_error ("expected '('"); | |
1344 | ||
1345 | // see also parse_array_in | |
1346 | ||
1347 | bool parenthesized = false; | |
1348 | t = peek (); | |
1349 | if (t && t->type == tok_operator && t->content == "[") | |
1350 | { | |
1351 | next (); | |
1352 | parenthesized = true; | |
1353 | } | |
1354 | ||
1355 | while (1) | |
1356 | { | |
1357 | t = next (); | |
1358 | if (! (t->type == tok_identifier)) | |
1359 | throw parse_error ("expected identifier"); | |
1360 | symbol* sym = new symbol; | |
1361 | sym->tok = t; | |
1362 | sym->name = t->content; | |
1363 | s->indexes.push_back (sym); | |
1364 | ||
93484556 FCE |
1365 | t = peek (); |
1366 | if (t && t->type == tok_operator && | |
1367 | (t->content == "+" || t->content == "-")) | |
1368 | { | |
1369 | if (s->sort_direction) | |
1370 | throw parse_error ("multiple sort directives"); | |
1371 | s->sort_direction = (t->content == "+") ? 1 : -1; | |
1372 | s->sort_column = s->indexes.size(); | |
1373 | next(); | |
1374 | } | |
1375 | ||
69c68955 FCE |
1376 | if (parenthesized) |
1377 | { | |
93484556 | 1378 | t = peek (); |
69c68955 FCE |
1379 | if (t && t->type == tok_operator && t->content == ",") |
1380 | { | |
1381 | next (); | |
1382 | continue; | |
1383 | } | |
1384 | else if (t && t->type == tok_operator && t->content == "]") | |
1385 | { | |
1386 | next (); | |
1387 | break; | |
1388 | } | |
1389 | else | |
1390 | throw parse_error ("expected ',' or ']'"); | |
1391 | } | |
1392 | else | |
1393 | break; // expecting only one expression | |
1394 | } | |
1395 | ||
1396 | t = next (); | |
1397 | if (! (t->type == tok_identifier && t->content == "in")) | |
1398 | throw parse_error ("expected 'in'"); | |
d02548c0 GH |
1399 | |
1400 | s->base = parse_indexable(); | |
69c68955 | 1401 | |
93484556 FCE |
1402 | t = peek (); |
1403 | if (t && t->type == tok_operator && | |
1404 | (t->content == "+" || t->content == "-")) | |
1405 | { | |
1406 | if (s->sort_direction) | |
1407 | throw parse_error ("multiple sort directives"); | |
1408 | s->sort_direction = (t->content == "+") ? 1 : -1; | |
1409 | s->sort_column = 0; | |
1410 | next(); | |
1411 | } | |
1412 | ||
69c68955 FCE |
1413 | t = next (); |
1414 | if (! (t->type == tok_operator && t->content == ")")) | |
1415 | throw parse_error ("expected ')'"); | |
1416 | ||
1417 | s->block = parse_statement (); | |
1418 | return s; | |
1419 | } | |
1420 | ||
1421 | ||
2f1a1aea FCE |
1422 | expression* |
1423 | parser::parse_expression () | |
1424 | { | |
1425 | return parse_assignment (); | |
1426 | } | |
1427 | ||
2f1a1aea FCE |
1428 | |
1429 | expression* | |
1430 | parser::parse_assignment () | |
1431 | { | |
1432 | expression* op1 = parse_ternary (); | |
1433 | ||
1434 | const token* t = peek (); | |
82919855 FCE |
1435 | // right-associative operators |
1436 | if (t && t->type == tok_operator | |
2f1a1aea | 1437 | && (t->content == "=" || |
82919855 | 1438 | t->content == "<<<" || |
2f1a1aea | 1439 | t->content == "+=" || |
bb2e3076 FCE |
1440 | t->content == "-=" || |
1441 | t->content == "*=" || | |
1442 | t->content == "/=" || | |
1443 | t->content == "%=" || | |
1444 | t->content == "<<=" || | |
1445 | t->content == ">>=" || | |
1446 | t->content == "&=" || | |
1447 | t->content == "^=" || | |
1448 | t->content == "|=" || | |
d5d7c2cc | 1449 | t->content == ".=" || |
bb2e3076 | 1450 | false)) |
2f1a1aea | 1451 | { |
bb2e3076 | 1452 | // NB: lvalueness is checked during elaboration / translation |
2f1a1aea | 1453 | assignment* e = new assignment; |
56099f08 | 1454 | e->left = op1; |
2f1a1aea | 1455 | e->op = t->content; |
56099f08 | 1456 | e->tok = t; |
2f1a1aea | 1457 | next (); |
82919855 | 1458 | e->right = parse_expression (); |
56099f08 | 1459 | op1 = e; |
2f1a1aea | 1460 | } |
56099f08 FCE |
1461 | |
1462 | return op1; | |
2f1a1aea FCE |
1463 | } |
1464 | ||
1465 | ||
1466 | expression* | |
1467 | parser::parse_ternary () | |
1468 | { | |
1469 | expression* op1 = parse_logical_or (); | |
1470 | ||
1471 | const token* t = peek (); | |
1472 | if (t && t->type == tok_operator && t->content == "?") | |
1473 | { | |
2f1a1aea | 1474 | ternary_expression* e = new ternary_expression; |
56099f08 | 1475 | e->tok = t; |
2f1a1aea | 1476 | e->cond = op1; |
56099f08 FCE |
1477 | next (); |
1478 | e->truevalue = parse_expression (); // XXX | |
2f1a1aea FCE |
1479 | |
1480 | t = next (); | |
1481 | if (! (t->type == tok_operator && t->content == ":")) | |
1482 | throw parse_error ("expected ':'"); | |
1483 | ||
56099f08 | 1484 | e->falsevalue = parse_expression (); // XXX |
2f1a1aea FCE |
1485 | return e; |
1486 | } | |
1487 | else | |
1488 | return op1; | |
1489 | } | |
1490 | ||
1491 | ||
1492 | expression* | |
1493 | parser::parse_logical_or () | |
1494 | { | |
1495 | expression* op1 = parse_logical_and (); | |
1496 | ||
1497 | const token* t = peek (); | |
56099f08 | 1498 | while (t && t->type == tok_operator && t->content == "||") |
2f1a1aea | 1499 | { |
2f1a1aea | 1500 | logical_or_expr* e = new logical_or_expr; |
56099f08 FCE |
1501 | e->tok = t; |
1502 | e->op = t->content; | |
2f1a1aea | 1503 | e->left = op1; |
56099f08 FCE |
1504 | next (); |
1505 | e->right = parse_logical_and (); | |
1506 | op1 = e; | |
1507 | t = peek (); | |
2f1a1aea | 1508 | } |
56099f08 FCE |
1509 | |
1510 | return op1; | |
2f1a1aea FCE |
1511 | } |
1512 | ||
1513 | ||
1514 | expression* | |
1515 | parser::parse_logical_and () | |
1516 | { | |
bb2e3076 | 1517 | expression* op1 = parse_boolean_or (); |
2f1a1aea FCE |
1518 | |
1519 | const token* t = peek (); | |
56099f08 | 1520 | while (t && t->type == tok_operator && t->content == "&&") |
2f1a1aea | 1521 | { |
2f1a1aea FCE |
1522 | logical_and_expr *e = new logical_and_expr; |
1523 | e->left = op1; | |
56099f08 FCE |
1524 | e->op = t->content; |
1525 | e->tok = t; | |
1526 | next (); | |
bb2e3076 FCE |
1527 | e->right = parse_boolean_or (); |
1528 | op1 = e; | |
1529 | t = peek (); | |
1530 | } | |
1531 | ||
1532 | return op1; | |
1533 | } | |
1534 | ||
1535 | ||
1536 | expression* | |
1537 | parser::parse_boolean_or () | |
1538 | { | |
1539 | expression* op1 = parse_boolean_xor (); | |
1540 | ||
1541 | const token* t = peek (); | |
1542 | while (t && t->type == tok_operator && t->content == "|") | |
1543 | { | |
1544 | binary_expression* e = new binary_expression; | |
1545 | e->left = op1; | |
1546 | e->op = t->content; | |
1547 | e->tok = t; | |
1548 | next (); | |
1549 | e->right = parse_boolean_xor (); | |
1550 | op1 = e; | |
1551 | t = peek (); | |
1552 | } | |
1553 | ||
1554 | return op1; | |
1555 | } | |
1556 | ||
1557 | ||
1558 | expression* | |
1559 | parser::parse_boolean_xor () | |
1560 | { | |
1561 | expression* op1 = parse_boolean_and (); | |
1562 | ||
1563 | const token* t = peek (); | |
1564 | while (t && t->type == tok_operator && t->content == "^") | |
1565 | { | |
1566 | binary_expression* e = new binary_expression; | |
1567 | e->left = op1; | |
1568 | e->op = t->content; | |
1569 | e->tok = t; | |
1570 | next (); | |
1571 | e->right = parse_boolean_and (); | |
1572 | op1 = e; | |
1573 | t = peek (); | |
1574 | } | |
1575 | ||
1576 | return op1; | |
1577 | } | |
1578 | ||
1579 | ||
1580 | expression* | |
1581 | parser::parse_boolean_and () | |
1582 | { | |
1583 | expression* op1 = parse_array_in (); | |
1584 | ||
1585 | const token* t = peek (); | |
1586 | while (t && t->type == tok_operator && t->content == "&") | |
1587 | { | |
1588 | binary_expression* e = new binary_expression; | |
1589 | e->left = op1; | |
1590 | e->op = t->content; | |
1591 | e->tok = t; | |
1592 | next (); | |
56099f08 FCE |
1593 | e->right = parse_array_in (); |
1594 | op1 = e; | |
1595 | t = peek (); | |
2f1a1aea | 1596 | } |
56099f08 FCE |
1597 | |
1598 | return op1; | |
2f1a1aea FCE |
1599 | } |
1600 | ||
1601 | ||
1602 | expression* | |
1603 | parser::parse_array_in () | |
1604 | { | |
ce10591c | 1605 | // This is a very tricky case. All these are legit expressions: |
69c68955 | 1606 | // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b" |
ce10591c FCE |
1607 | vector<expression*> indexes; |
1608 | bool parenthesized = false; | |
2f1a1aea FCE |
1609 | |
1610 | const token* t = peek (); | |
69c68955 | 1611 | if (t && t->type == tok_operator && t->content == "[") |
ce10591c FCE |
1612 | { |
1613 | next (); | |
1614 | parenthesized = true; | |
1615 | } | |
1616 | ||
1617 | while (1) | |
1618 | { | |
1619 | expression* op1 = parse_comparison (); | |
1620 | indexes.push_back (op1); | |
1621 | ||
1622 | if (parenthesized) | |
1623 | { | |
1624 | const token* t = peek (); | |
1625 | if (t && t->type == tok_operator && t->content == ",") | |
1626 | { | |
1627 | next (); | |
1628 | continue; | |
1629 | } | |
69c68955 | 1630 | else if (t && t->type == tok_operator && t->content == "]") |
ce10591c FCE |
1631 | { |
1632 | next (); | |
1633 | break; | |
1634 | } | |
1635 | else | |
69c68955 | 1636 | throw parse_error ("expected ',' or ']'"); |
ce10591c FCE |
1637 | } |
1638 | else | |
1639 | break; // expecting only one expression | |
1640 | } | |
1641 | ||
1642 | t = peek (); | |
2f1a1aea FCE |
1643 | if (t && t->type == tok_identifier && t->content == "in") |
1644 | { | |
2f1a1aea | 1645 | array_in *e = new array_in; |
56099f08 | 1646 | e->tok = t; |
ce10591c FCE |
1647 | next (); // swallow "in" |
1648 | ||
1649 | arrayindex* a = new arrayindex; | |
1650 | a->indexes = indexes; | |
d02548c0 GH |
1651 | a->base = parse_indexable(); |
1652 | a->tok = a->base->get_tok(); | |
ce10591c | 1653 | e->operand = a; |
2f1a1aea FCE |
1654 | return e; |
1655 | } | |
ce10591c FCE |
1656 | else if (indexes.size() == 1) // no "in" - need one expression only |
1657 | return indexes[0]; | |
2f1a1aea | 1658 | else |
ce10591c | 1659 | throw parse_error ("unexpected comma-separated expression list"); |
2f1a1aea FCE |
1660 | } |
1661 | ||
1662 | ||
1663 | expression* | |
1664 | parser::parse_comparison () | |
1665 | { | |
bb2e3076 | 1666 | expression* op1 = parse_shift (); |
2f1a1aea FCE |
1667 | |
1668 | const token* t = peek (); | |
56099f08 | 1669 | while (t && t->type == tok_operator |
553d27a5 FCE |
1670 | && (t->content == ">" || |
1671 | t->content == "<" || | |
1672 | t->content == "==" || | |
1673 | t->content == "!=" || | |
1674 | t->content == "<=" || | |
bb2e3076 | 1675 | t->content == ">=")) |
2f1a1aea FCE |
1676 | { |
1677 | comparison* e = new comparison; | |
1678 | e->left = op1; | |
1679 | e->op = t->content; | |
56099f08 | 1680 | e->tok = t; |
2f1a1aea | 1681 | next (); |
bb2e3076 FCE |
1682 | e->right = parse_shift (); |
1683 | op1 = e; | |
1684 | t = peek (); | |
1685 | } | |
1686 | ||
1687 | return op1; | |
1688 | } | |
1689 | ||
1690 | ||
1691 | expression* | |
1692 | parser::parse_shift () | |
1693 | { | |
1694 | expression* op1 = parse_concatenation (); | |
1695 | ||
1696 | const token* t = peek (); | |
1697 | while (t && t->type == tok_operator && | |
1698 | (t->content == "<<" || t->content == ">>")) | |
1699 | { | |
1700 | binary_expression* e = new binary_expression; | |
1701 | e->left = op1; | |
1702 | e->op = t->content; | |
1703 | e->tok = t; | |
1704 | next (); | |
56099f08 FCE |
1705 | e->right = parse_concatenation (); |
1706 | op1 = e; | |
1707 | t = peek (); | |
2f1a1aea | 1708 | } |
56099f08 FCE |
1709 | |
1710 | return op1; | |
2f1a1aea FCE |
1711 | } |
1712 | ||
1713 | ||
1714 | expression* | |
1715 | parser::parse_concatenation () | |
1716 | { | |
1717 | expression* op1 = parse_additive (); | |
1718 | ||
1719 | const token* t = peek (); | |
1720 | // XXX: the actual awk string-concatenation operator is *whitespace*. | |
1721 | // I don't know how to easily to model that here. | |
56099f08 | 1722 | while (t && t->type == tok_operator && t->content == ".") |
2f1a1aea FCE |
1723 | { |
1724 | concatenation* e = new concatenation; | |
1725 | e->left = op1; | |
1726 | e->op = t->content; | |
56099f08 | 1727 | e->tok = t; |
2f1a1aea | 1728 | next (); |
56099f08 FCE |
1729 | e->right = parse_additive (); |
1730 | op1 = e; | |
1731 | t = peek (); | |
2f1a1aea | 1732 | } |
56099f08 FCE |
1733 | |
1734 | return op1; | |
2f1a1aea FCE |
1735 | } |
1736 | ||
1737 | ||
1738 | expression* | |
1739 | parser::parse_additive () | |
1740 | { | |
1741 | expression* op1 = parse_multiplicative (); | |
1742 | ||
1743 | const token* t = peek (); | |
56099f08 | 1744 | while (t && t->type == tok_operator |
2f1a1aea FCE |
1745 | && (t->content == "+" || t->content == "-")) |
1746 | { | |
1747 | binary_expression* e = new binary_expression; | |
1748 | e->op = t->content; | |
1749 | e->left = op1; | |
56099f08 | 1750 | e->tok = t; |
2f1a1aea | 1751 | next (); |
56099f08 FCE |
1752 | e->right = parse_multiplicative (); |
1753 | op1 = e; | |
1754 | t = peek (); | |
2f1a1aea | 1755 | } |
56099f08 FCE |
1756 | |
1757 | return op1; | |
2f1a1aea FCE |
1758 | } |
1759 | ||
1760 | ||
1761 | expression* | |
1762 | parser::parse_multiplicative () | |
1763 | { | |
1764 | expression* op1 = parse_unary (); | |
1765 | ||
1766 | const token* t = peek (); | |
56099f08 | 1767 | while (t && t->type == tok_operator |
2f1a1aea FCE |
1768 | && (t->content == "*" || t->content == "/" || t->content == "%")) |
1769 | { | |
1770 | binary_expression* e = new binary_expression; | |
1771 | e->op = t->content; | |
1772 | e->left = op1; | |
56099f08 | 1773 | e->tok = t; |
2f1a1aea | 1774 | next (); |
56099f08 FCE |
1775 | e->right = parse_unary (); |
1776 | op1 = e; | |
1777 | t = peek (); | |
2f1a1aea | 1778 | } |
56099f08 FCE |
1779 | |
1780 | return op1; | |
2f1a1aea FCE |
1781 | } |
1782 | ||
1783 | ||
1784 | expression* | |
1785 | parser::parse_unary () | |
1786 | { | |
1787 | const token* t = peek (); | |
1788 | if (t && t->type == tok_operator | |
bb2e3076 FCE |
1789 | && (t->content == "+" || |
1790 | t->content == "-" || | |
1791 | t->content == "!" || | |
1792 | t->content == "~" || | |
1793 | false)) | |
2f1a1aea FCE |
1794 | { |
1795 | unary_expression* e = new unary_expression; | |
1796 | e->op = t->content; | |
56099f08 | 1797 | e->tok = t; |
2f1a1aea | 1798 | next (); |
3a20432b | 1799 | e->operand = parse_crement (); |
2f1a1aea FCE |
1800 | return e; |
1801 | } | |
1802 | else | |
bb2e3076 | 1803 | return parse_crement (); |
2f1a1aea FCE |
1804 | } |
1805 | ||
1806 | ||
1807 | expression* | |
1808 | parser::parse_crement () // as in "increment" / "decrement" | |
1809 | { | |
1810 | const token* t = peek (); | |
1811 | if (t && t->type == tok_operator | |
1812 | && (t->content == "++" || t->content == "--")) | |
1813 | { | |
1814 | pre_crement* e = new pre_crement; | |
1815 | e->op = t->content; | |
56099f08 | 1816 | e->tok = t; |
2f1a1aea FCE |
1817 | next (); |
1818 | e->operand = parse_value (); | |
1819 | return e; | |
1820 | } | |
1821 | ||
1822 | // post-crement or non-crement | |
1823 | expression *op1 = parse_value (); | |
1824 | ||
1825 | t = peek (); | |
1826 | if (t && t->type == tok_operator | |
1827 | && (t->content == "++" || t->content == "--")) | |
1828 | { | |
1829 | post_crement* e = new post_crement; | |
1830 | e->op = t->content; | |
56099f08 | 1831 | e->tok = t; |
2f1a1aea FCE |
1832 | next (); |
1833 | e->operand = op1; | |
1834 | return e; | |
1835 | } | |
1836 | else | |
1837 | return op1; | |
1838 | } | |
1839 | ||
1840 | ||
1841 | expression* | |
1842 | parser::parse_value () | |
1843 | { | |
1844 | const token* t = peek (); | |
1845 | if (! t) | |
1846 | throw parse_error ("expected value"); | |
1847 | ||
1848 | if (t->type == tok_operator && t->content == "(") | |
1849 | { | |
1850 | next (); | |
1851 | expression* e = parse_expression (); | |
1852 | t = next (); | |
1853 | if (! (t->type == tok_operator && t->content == ")")) | |
1854 | throw parse_error ("expected ')'"); | |
1855 | return e; | |
1856 | } | |
1857 | else if (t->type == tok_identifier) | |
1858 | return parse_symbol (); | |
1859 | else | |
1860 | return parse_literal (); | |
1861 | } | |
1862 | ||
1863 | ||
d02548c0 GH |
1864 | const token * |
1865 | parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name) | |
1866 | { | |
1867 | hop = NULL; | |
1868 | const token* t = expect_ident (name); | |
1869 | if (name == "@hist_linear" || name == "@hist_log") | |
1870 | { | |
1871 | hop = new hist_op; | |
1872 | if (name == "@hist_linear") | |
1873 | hop->htype = hist_linear; | |
1874 | else if (name == "@hist_log") | |
1875 | hop->htype = hist_log; | |
1876 | hop->tok = t; | |
1877 | expect_op("("); | |
1878 | hop->stat = parse_expression (); | |
1879 | int64_t tnum; | |
1880 | if (hop->htype == hist_linear) | |
1881 | { | |
1882 | for (size_t i = 0; i < 3; ++i) | |
1883 | { | |
1884 | expect_op (","); | |
1885 | expect_number (tnum); | |
1886 | hop->params.push_back (tnum); | |
1887 | } | |
1888 | } | |
1889 | else | |
1890 | { | |
1891 | assert(hop->htype == hist_log); | |
1892 | if (peek_op (",")) | |
1893 | { | |
1894 | expect_op (","); | |
1895 | expect_number (tnum); | |
1896 | hop->params.push_back (tnum); | |
1897 | } | |
1898 | else | |
1899 | { | |
1900 | // FIXME (magic value): Logarithmic histograms get 64 | |
1901 | // buckets by default. | |
1902 | hop->params.push_back (64); | |
1903 | } | |
1904 | } | |
1905 | expect_op(")"); | |
1906 | } | |
1907 | return t; | |
1908 | } | |
1909 | ||
1910 | ||
1911 | indexable* | |
1912 | parser::parse_indexable () | |
1913 | { | |
1914 | hist_op *hop = NULL; | |
1915 | string name; | |
1916 | const token *tok = parse_hist_op_or_bare_name(hop, name); | |
1917 | if (hop) | |
1918 | return hop; | |
1919 | else | |
1920 | { | |
1921 | symbol* sym = new symbol; | |
1922 | sym->name = name; | |
1923 | sym->tok = tok; | |
1924 | return sym; | |
1925 | } | |
1926 | } | |
1927 | ||
1928 | ||
1929 | // var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat) | |
2f1a1aea | 1930 | expression* |
0fefb486 | 1931 | parser::parse_symbol () |
2f1a1aea | 1932 | { |
d02548c0 GH |
1933 | hist_op *hop = NULL; |
1934 | symbol *sym = NULL; | |
d7f3e0c5 | 1935 | string name; |
d02548c0 GH |
1936 | const token *t = parse_hist_op_or_bare_name(hop, name); |
1937 | ||
1938 | if (!hop) | |
0fefb486 | 1939 | { |
d02548c0 GH |
1940 | // If we didn't get a hist_op, then we did get an identifier. We can |
1941 | // now scrutinize this identifier for the various magic forms of identifier | |
1942 | // (printf, @stat_op, and $var...) | |
1943 | ||
1944 | if (name.size() > 0 && name[0] == '@') | |
d7f3e0c5 | 1945 | { |
d02548c0 GH |
1946 | stat_op *sop = new stat_op; |
1947 | if (name == "@avg") | |
1948 | sop->ctype = sc_average; | |
1949 | else if (name == "@count") | |
1950 | sop->ctype = sc_count; | |
1951 | else if (name == "@sum") | |
1952 | sop->ctype = sc_sum; | |
1953 | else if (name == "@min") | |
1954 | sop->ctype = sc_min; | |
1955 | else if (name == "@max") | |
1956 | sop->ctype = sc_max; | |
1957 | else | |
1958 | throw parse_error("unknown statistic operator " + name); | |
1959 | expect_op("("); | |
1960 | sop->tok = t; | |
1961 | sop->stat = parse_expression (); | |
1962 | expect_op(")"); | |
1963 | return sop; | |
1964 | } | |
1965 | ||
1966 | else if (name.size() > 0 && (name == "print" | |
1967 | || name == "sprint" | |
1968 | || name == "printf" | |
1969 | || name == "sprintf")) | |
1970 | { | |
1971 | print_format *fmt = new print_format; | |
1972 | fmt->tok = t; | |
1973 | fmt->print_with_format = (name[name.size() - 1] == 'f'); | |
1974 | fmt->print_to_stream = (name[0] == 'p'); | |
1975 | expect_op("("); | |
1976 | if (fmt->print_with_format) | |
1977 | { | |
1978 | // Consume and convert a format string, and any subsequent | |
1979 | // arguments. Agreement between the format string and the | |
1980 | // arguments is postponed to the typechecking phase. | |
1981 | string tmp; | |
1982 | expect_unknown (tok_string, tmp); | |
1983 | fmt->components = print_format::string_to_components (tmp); | |
1984 | while (!peek_op (")")) | |
1985 | { | |
1986 | expect_op(","); | |
1987 | expression *e = parse_expression (); | |
1988 | fmt->args.push_back(e); | |
1989 | } | |
d7f3e0c5 | 1990 | } |
d7f3e0c5 | 1991 | else |
d02548c0 GH |
1992 | { |
1993 | // If we are not printing with a format string, we permit | |
1994 | // exactly one argument (of any type). | |
1995 | expression *e = parse_expression (); | |
1996 | fmt->args.push_back(e); | |
1997 | } | |
1998 | expect_op(")"); | |
1999 | return fmt; | |
2000 | } | |
2001 | ||
2002 | else if (name.size() > 0 && name[0] == '$') | |
2003 | { | |
2004 | // target_symbol time | |
2005 | target_symbol *tsym = new target_symbol; | |
2006 | tsym->tok = t; | |
2007 | tsym->base_name = name; | |
2008 | while (true) | |
2009 | { | |
2010 | string c; | |
2011 | if (peek_op ("->")) | |
2012 | { | |
2013 | next(); | |
2014 | expect_ident (c); | |
2015 | tsym->components.push_back | |
2016 | (make_pair (target_symbol::comp_struct_member, c)); | |
2017 | } | |
2018 | else if (peek_op ("[")) | |
2019 | { | |
2020 | next(); | |
2021 | expect_unknown (tok_number, c); | |
2022 | expect_op ("]"); | |
2023 | tsym->components.push_back | |
2024 | (make_pair (target_symbol::comp_literal_array_index, c)); | |
2025 | } | |
2026 | else | |
2027 | break; | |
2028 | } | |
2029 | return tsym; | |
2030 | } | |
2031 | ||
2032 | else if (peek_op ("(")) // function call | |
2033 | { | |
2034 | next (); | |
2035 | struct functioncall* f = new functioncall; | |
2036 | f->tok = t; | |
2037 | f->function = name; | |
2038 | // Allow empty actual parameter list | |
2039 | if (peek_op (")")) | |
2040 | { | |
2041 | next (); | |
2042 | return f; | |
2043 | } | |
2044 | while (1) | |
2045 | { | |
2046 | f->args.push_back (parse_expression ()); | |
2047 | if (peek_op (")")) | |
2048 | { | |
2049 | next(); | |
2050 | break; | |
2051 | } | |
2052 | else if (peek_op (",")) | |
2053 | { | |
2054 | next(); | |
2055 | continue; | |
2056 | } | |
2057 | else | |
2058 | throw parse_error ("expected ',' or ')'"); | |
2059 | } | |
2060 | return f; | |
2061 | } | |
2062 | ||
2063 | else | |
2064 | { | |
2065 | sym = new symbol; | |
2066 | sym->name = name; | |
2067 | sym->tok = t; | |
d7f3e0c5 | 2068 | } |
0fefb486 | 2069 | } |
d7f3e0c5 | 2070 | |
d02548c0 GH |
2071 | // By now, either we had a hist_op in the first place, or else |
2072 | // we had a plain word and it was converted to a symbol. | |
2073 | ||
2074 | assert (hop || sym); | |
2075 | ||
2076 | // All that remains is to check for array indexing | |
2077 | ||
d7f3e0c5 | 2078 | if (peek_op ("[")) // array |
2f1a1aea FCE |
2079 | { |
2080 | next (); | |
2081 | struct arrayindex* ai = new arrayindex; | |
d02548c0 GH |
2082 | ai->tok = t; |
2083 | ||
2084 | if (hop) | |
2085 | ai->base = hop; | |
2086 | else | |
2087 | ai->base = sym; | |
2088 | ||
2f1a1aea FCE |
2089 | while (1) |
2090 | { | |
2091 | ai->indexes.push_back (parse_expression ()); | |
d7f3e0c5 GH |
2092 | if (peek_op ("]")) |
2093 | { | |
2094 | next(); | |
2095 | break; | |
2096 | } | |
2097 | else if (peek_op (",")) | |
2098 | { | |
2099 | next(); | |
2100 | continue; | |
2101 | } | |
2f1a1aea FCE |
2102 | else |
2103 | throw parse_error ("expected ',' or ']'"); | |
2104 | } | |
2105 | return ai; | |
2106 | } | |
d02548c0 GH |
2107 | |
2108 | // If we got to here, we *should* have a symbol; if we have | |
2109 | // a hist_op on its own, it doesn't count as an expression, | |
2110 | // so we throw a parse error. | |
2111 | ||
2112 | if (hop) | |
2113 | throw parse_error("base histogram operator where expression expected", t); | |
2114 | ||
2115 | return sym; | |
2f1a1aea | 2116 | } |
56099f08 | 2117 |