]> sourceware.org Git - systemtap.git/blame - parse.cxx
PR2168: Support foreach over pmap histograms
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
f4fe2e93 2// Copyright (C) 2005-2010 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
5811366a 4// Copyright (C) 2007 Bull S.A.S
69c68955
FCE
5//
6// This file is part of systemtap, and is free software. You can
7// redistribute it and/or modify it under the terms of the GNU General
8// Public License (GPL); either version 2, or (at your option) any
9// later version.
2f1a1aea 10
2b066ec1 11#include "config.h"
2f1a1aea
FCE
12#include "staptree.h"
13#include "parse.h"
177a8ead 14#include "session.h"
3f99432c
FCE
15#include "util.h"
16
2b066ec1 17#include <iostream>
eacb10ce 18
2b066ec1 19#include <fstream>
2f1a1aea 20#include <cctype>
9c0c0e46 21#include <cstdlib>
29e64872 22#include <cassert>
9c0c0e46
FCE
23#include <cerrno>
24#include <climits>
57b73400 25#include <sstream>
f74fb737 26#include <cstring>
3f99432c 27#include <cctype>
eacb10ce
FCE
28#include <iterator>
29
7a468d68
FCE
30extern "C" {
31#include <fnmatch.h>
32}
2f1a1aea
FCE
33
34using namespace std;
35
36// ------------------------------------------------------------------------
37
bb2e3076
FCE
38
39
177a8ead
FCE
40parser::parser (systemtap_session& s, istream& i, bool p):
41 session (s),
24cb178f 42 input_name ("<input>"), free_input (0),
213bee8f 43 input (i, input_name, s), privileged (p),
6e213f58 44 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
45{ }
46
177a8ead
FCE
47parser::parser (systemtap_session& s, const string& fn, bool p):
48 session (s),
2f1a1aea 49 input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
213bee8f 50 input (* free_input, input_name, s), privileged (p),
6e213f58 51 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
52{ }
53
54parser::~parser()
55{
56 if (free_input) delete free_input;
57}
58
59
82919855 60stapfile*
177a8ead 61parser::parse (systemtap_session& s, std::istream& i, bool pr)
82919855 62{
177a8ead 63 parser p (s, i, pr);
82919855
FCE
64 return p.parse ();
65}
66
67
68stapfile*
177a8ead 69parser::parse (systemtap_session& s, const std::string& n, bool pr)
82919855 70{
177a8ead 71 parser p (s, n, pr);
82919855
FCE
72 return p.parse ();
73}
74
d7f3e0c5
GH
75static string
76tt2str(token_type tt)
77{
78 switch (tt)
79 {
80 case tok_junk: return "junk";
81 case tok_identifier: return "identifier";
82 case tok_operator: return "operator";
83 case tok_string: return "string";
84 case tok_number: return "number";
85 case tok_embedded: return "embedded-code";
6e213f58 86 case tok_keyword: return "keyword";
d7f3e0c5
GH
87 }
88 return "unknown token";
89}
82919855 90
0323ed4d
WC
91ostream&
92operator << (ostream& o, const source_loc& loc)
93{
a704a23b 94 o << loc.file->name << ":"
0323ed4d
WC
95 << loc.line << ":"
96 << loc.column;
97
98 return o;
99}
100
56099f08
FCE
101ostream&
102operator << (ostream& o, const token& t)
103{
d7f3e0c5 104 o << tt2str(t.type);
56099f08 105
6e213f58 106 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 107 {
24cb178f
FCE
108 o << " '";
109 for (unsigned i=0; i<t.content.length(); i++)
110 {
111 char c = t.content[i];
112 o << (isprint (c) ? c : '?');
113 }
114 o << "'";
56099f08 115 }
56099f08 116
dff50e09 117 o << " at "
0323ed4d 118 << t.location;
56099f08
FCE
119
120 return o;
121}
122
123
dff50e09 124void
2f1a1aea
FCE
125parser::print_error (const parse_error &pe)
126{
1b1b4ceb 127 string align_parse_error (" ");
2f1a1aea
FCE
128 cerr << "parse error: " << pe.what () << endl;
129
177a8ead
FCE
130 if (pe.tok)
131 {
132 cerr << "\tat: " << *pe.tok << endl;
1b1b4ceb 133 session.print_error_source (cerr, align_parse_error, pe.tok);
177a8ead 134 }
2f1a1aea 135 else
177a8ead
FCE
136 {
137 const token* t = last_t;
138 if (t)
1b1b4ceb
RA
139 {
140 cerr << "\tsaw: " << *t << endl;
141 session.print_error_source (cerr, align_parse_error, t);
142 }
177a8ead
FCE
143 else
144 cerr << "\tsaw: " << input_name << " EOF" << endl;
145 }
2f1a1aea
FCE
146
147 // XXX: make it possible to print the last input line,
148 // so as to line up an arrow with the specific error column
149
150 num_errors ++;
151}
152
153
dff50e09 154const token*
2f1a1aea
FCE
155parser::last ()
156{
157 return last_t;
158}
159
160
c434ec7e
FCE
161
162template <typename OPERAND>
163bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
164{
165 if (op->type == tok_operator && op->content == "<=")
166 { return lhs <= rhs; }
167 else if (op->type == tok_operator && op->content == ">=")
168 { return lhs >= rhs; }
169 else if (op->type == tok_operator && op->content == "<")
170 { return lhs < rhs; }
171 else if (op->type == tok_operator && op->content == ">")
172 { return lhs > rhs; }
173 else if (op->type == tok_operator && op->content == "==")
174 { return lhs == rhs; }
175 else if (op->type == tok_operator && op->content == "!=")
176 { return lhs != rhs; }
177 else
178 throw parse_error ("expected comparison operator", op);
179}
180
181
177a8ead
FCE
182// Here, we perform on-the-fly preprocessing.
183// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
184// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
185// or: arch COMPARISON-OP "arch-string"
db135493 186// or: systemtap_v COMPARISON-OP "version-string"
561079c8 187// or: CONFIG_foo COMPARISON-OP "config-string"
717a457b 188// or: CONFIG_foo COMPARISON-OP number
4227f98d 189// or: CONFIG_foo COMPARISON-OP CONFIG_bar
5811366a
FCE
190// or: "string1" COMPARISON-OP "string2"
191// or: number1 COMPARISON-OP number2
44ce8ed5 192// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
193//
194// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
c434ec7e 195// e.g. %( arch != "i?86" %? "foo" %: "baz" %)
561079c8 196// e.g. %( CONFIG_foo %? "foo" %: "baz" %)
177a8ead
FCE
197//
198// Up to an entire %( ... %) expression is processed by a single call
199// to this function. Tokens included by any nested conditions are
200// enqueued in a private vector.
201
202bool eval_pp_conditional (systemtap_session& s,
203 const token* l, const token* op, const token* r)
204{
44ce8ed5 205 if (l->type == tok_identifier && (l->content == "kernel_v" ||
db135493
FCE
206 l->content == "kernel_vr" ||
207 l->content == "systemtap_v"))
44ce8ed5 208 {
db135493
FCE
209 if (! (r->type == tok_string))
210 throw parse_error ("expected string literal", r);
211
44ce8ed5 212 string target_kernel_vr = s.kernel_release;
197a4d62 213 string target_kernel_v = s.kernel_base_release;
db135493 214 string target;
dff50e09 215
db135493
FCE
216 if (l->content == "kernel_v") target = target_kernel_v;
217 else if (l->content == "kernel_vr") target = target_kernel_vr;
218 else if (l->content == "systemtap_v") target = s.compatible;
219 else assert (0);
7a468d68 220
7a468d68
FCE
221 string query = r->content;
222 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
223
44ce8ed5
FCE
224 // collect acceptable strverscmp results.
225 int rvc_ok1, rvc_ok2;
7a468d68 226 bool wc_ok = false;
44ce8ed5
FCE
227 if (op->type == tok_operator && op->content == "<=")
228 { rvc_ok1 = -1; rvc_ok2 = 0; }
229 else if (op->type == tok_operator && op->content == ">=")
230 { rvc_ok1 = 1; rvc_ok2 = 0; }
231 else if (op->type == tok_operator && op->content == "<")
232 { rvc_ok1 = -1; rvc_ok2 = -1; }
233 else if (op->type == tok_operator && op->content == ">")
234 { rvc_ok1 = 1; rvc_ok2 = 1; }
235 else if (op->type == tok_operator && op->content == "==")
7a468d68 236 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
44ce8ed5 237 else if (op->type == tok_operator && op->content == "!=")
7a468d68 238 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
44ce8ed5
FCE
239 else
240 throw parse_error ("expected comparison operator", op);
7a468d68
FCE
241
242 if ((!wc_ok) && rhs_wildcard)
243 throw parse_error ("wildcard not allowed with order comparison operators", op);
244
245 if (rhs_wildcard)
246 {
247 int rvc_result = fnmatch (query.c_str(), target.c_str(),
248 FNM_NOESCAPE); // spooky
249 bool badness = (rvc_result == 0) ^ (op->content == "==");
250 return !badness;
251 }
252 else
253 {
254 int rvc_result = strverscmp (target.c_str(), query.c_str());
255 // normalize rvc_result
256 if (rvc_result < 0) rvc_result = -1;
257 if (rvc_result > 0) rvc_result = 1;
258 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
259 }
44ce8ed5
FCE
260 }
261 else if (l->type == tok_identifier && l->content == "arch")
262 {
263 string target_architecture = s.architecture;
264 if (! (r->type == tok_string))
265 throw parse_error ("expected string literal", r);
266 string query_architecture = r->content;
dff50e09 267
7a468d68
FCE
268 int nomatch = fnmatch (query_architecture.c_str(),
269 target_architecture.c_str(),
270 FNM_NOESCAPE); // still spooky
271
561079c8
FCE
272 bool result;
273 if (op->type == tok_operator && op->content == "==")
274 result = !nomatch;
275 else if (op->type == tok_operator && op->content == "!=")
276 result = nomatch;
277 else
278 throw parse_error ("expected '==' or '!='", op);
279
280 return result;
281 }
60d98537 282 else if (l->type == tok_identifier && startswith(l->content, "CONFIG_"))
561079c8 283 {
717a457b
MW
284 if (r->type == tok_string)
285 {
286 string lhs = s.kernel_config[l->content]; // may be empty
287 string rhs = r->content;
561079c8 288
717a457b 289 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
561079c8 290
717a457b
MW
291 bool result;
292 if (op->type == tok_operator && op->content == "==")
293 result = !nomatch;
294 else if (op->type == tok_operator && op->content == "!=")
295 result = nomatch;
296 else
297 throw parse_error ("expected '==' or '!='", op);
dff50e09 298
717a457b
MW
299 return result;
300 }
301 else if (r->type == tok_number)
302 {
303 const char* startp = s.kernel_config[l->content].c_str ();
304 char* endp = (char*) startp;
305 errno = 0;
306 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
307 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
308 throw parse_error ("Config option value not a number", l);
309
310 int64_t rhs = lex_cast<int64_t>(r->content);
311 return eval_comparison (lhs, op, rhs);
312 }
4227f98d 313 else if (r->type == tok_identifier
60d98537 314 && startswith(r->content, "CONFIG_"))
4227f98d
MW
315 {
316 // First try to convert both to numbers,
317 // otherwise threat both as strings.
318 const char* startp = s.kernel_config[l->content].c_str ();
319 char* endp = (char*) startp;
320 errno = 0;
321 int64_t val = (int64_t) strtoll (startp, & endp, 0);
322 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
323 {
324 int64_t lhs = val;
325 startp = s.kernel_config[r->content].c_str ();
326 endp = (char*) startp;
327 errno = 0;
328 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
329 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
330 return eval_comparison (lhs, op, rhs);
331 }
332
333 string lhs = s.kernel_config[l->content];
334 string rhs = s.kernel_config[r->content];
335 return eval_comparison (lhs, op, rhs);
336 }
717a457b 337 else
4227f98d 338 throw parse_error ("expected string, number literal or other CONFIG_... as right value", r);
dff50e09 339 }
c434ec7e 340 else if (l->type == tok_string && r->type == tok_string)
5811366a 341 {
c434ec7e
FCE
342 string lhs = l->content;
343 string rhs = r->content;
344 return eval_comparison (lhs, op, rhs);
345 // NB: no wildcarding option here
346 }
347 else if (l->type == tok_number && r->type == tok_number)
348 {
349 int64_t lhs = lex_cast<int64_t>(l->content);
350 int64_t rhs = lex_cast<int64_t>(r->content);
351 return eval_comparison (lhs, op, rhs);
7a468d68 352 // NB: no wildcarding option here
5811366a
FCE
353 }
354 else if (l->type == tok_string && r->type == tok_number
355 && op->type == tok_operator)
356 throw parse_error ("expected string literal as right value", r);
357 else if (l->type == tok_number && r->type == tok_string
358 && op->type == tok_operator)
359 throw parse_error ("expected number literal as right value", r);
c434ec7e 360
177a8ead 361 else
561079c8 362 throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr' or 'CONFIG_...'\n"
5811366a 363 " or comparison between strings or integers", l);
177a8ead
FCE
364}
365
366
5811366a 367// Only tokens corresponding to the TRUE statement must be expanded
177a8ead 368const token*
3f847830 369parser::scan_pp (bool wildcard)
177a8ead
FCE
370{
371 while (true)
372 {
373 if (enqueued_pp.size() > 0)
374 {
375 const token* t = enqueued_pp[0];
376 enqueued_pp.erase (enqueued_pp.begin());
377 return t;
378 }
379
3f847830 380 const token* t = input.scan (wildcard); // NB: not recursive!
177a8ead
FCE
381 if (t == 0) // EOF
382 return t;
dff50e09 383
177a8ead
FCE
384 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
385 return t;
386
387 // We have a %( - it's time to throw a preprocessing party!
388
2d7881bf
PP
389 bool result = false;
390 bool and_result = true;
391 const token *n = NULL;
392 do {
393 const token *l, *op, *r;
394 l = input.scan (false); // NB: not recursive, though perhaps could be
395 op = input.scan (false);
396 r = input.scan (false);
397 if (l == 0 || op == 0 || r == 0)
398 throw parse_error ("incomplete condition after '%('", t);
399 // NB: consider generalizing to consume all tokens until %?, and
400 // passing that as a vector to an evaluator.
401
402 // Do not evaluate the condition if we haven't expanded everything.
403 // This may occur when having several recursive conditionals.
404 and_result &= eval_pp_conditional (session, l, op, r);
405 delete l;
406 delete op;
407 delete r;
408 delete n;
409
410 n = input.scan ();
411 if (n && n->type == tok_operator && n->content == "&&")
412 continue;
413 result |= and_result;
414 and_result = true;
415 if (! (n && n->type == tok_operator && n->content == "||"))
416 break;
417 } while (true);
3f847830
FCE
418
419 /*
420 clog << "PP eval (" << *t << ") == " << result << endl;
421 */
422
2d7881bf 423 const token *m = n; // NB: not recursive
177a8ead
FCE
424 if (! (m && m->type == tok_operator && m->content == "%?"))
425 throw parse_error ("expected '%?' marker for conditional", t);
70c743d8 426 delete m; // "%?"
177a8ead
FCE
427
428 vector<const token*> my_enqueued_pp;
3f847830
FCE
429
430 int nesting = 0;
c28668ea 431 int then = 0;
177a8ead
FCE
432 while (true) // consume THEN tokens
433 {
3f847830
FCE
434 try
435 {
436 m = result ? scan_pp (wildcard) : input.scan (wildcard);
437 }
438 catch (const parse_error &e)
439 {
dff50e09 440 if (result) throw e; // propagate errors if THEN branch taken
d57671d3 441 continue;
3f847830
FCE
442 }
443
444 if (m && m->type == tok_operator && m->content == "%(") // nested %(
445 nesting ++;
c28668ea
WH
446 if (m && m->type == tok_operator && m->content == "%?") {
447 then ++;
448 if (nesting != then)
449 throw parse_error ("incomplete conditional - missing '%('", m);
450 }
3f847830
FCE
451 if (nesting == 0 && m && (m->type == tok_operator && (m->content == "%:" || // ELSE
452 m->content == "%)"))) // END
177a8ead 453 break;
c28668ea 454 if (nesting && m && m->type == tok_operator && m->content == "%)") { // nested %)
3f847830 455 nesting --;
c28668ea
WH
456 then --;
457 }
3f847830 458
d57671d3
FCE
459 if (!m)
460 throw parse_error ("incomplete conditional - missing '%:' or '%)'", t);
461 if (result)
177a8ead 462 my_enqueued_pp.push_back (m);
d57671d3 463 if (!result)
3f847830
FCE
464 delete m; // do nothing, just dispose of unkept THEN token
465
466 continue;
177a8ead 467 }
dff50e09 468
177a8ead 469 if (m && m->type == tok_operator && m->content == "%:") // ELSE
70c743d8
JS
470 {
471 delete m; // "%:"
3f847830 472 int nesting = 0;
c28668ea 473 int then = 0;
70c743d8
JS
474 while (true)
475 {
3f847830
FCE
476 try
477 {
478 m = result ? input.scan (wildcard) : scan_pp (wildcard);
dff50e09 479 }
3f847830
FCE
480 catch (const parse_error& e)
481 {
dff50e09 482 if (!result) throw e; // propagate errors if ELSE branch taken
d57671d3 483 continue;
3f847830
FCE
484 }
485
486 if (m && m->type == tok_operator && m->content == "%(") // nested %(
487 nesting ++;
c28668ea
WH
488 if (m && m->type == tok_operator && m->content == "%?") {
489 then ++;
490 if (nesting != then)
491 throw parse_error ("incomplete conditional - missing '%('", m);
492 }
3f847830 493 if (nesting == 0 && m && m->type == tok_operator && m->content == "%)") // END
70c743d8 494 break;
c28668ea 495 if (nesting && m && m->type == tok_operator && m->content == "%)") { // nested %)
3f847830 496 nesting --;
c28668ea
WH
497 then --;
498 }
3f847830 499
d57671d3 500 if (!m)
3f847830 501 throw parse_error ("incomplete conditional - missing %)", t);
d57671d3 502 if (!result)
dff50e09 503 my_enqueued_pp.push_back (m);
d57671d3 504 if (result)
3f847830
FCE
505 delete m; // do nothing, just dispose of unkept ELSE token
506
507 continue;
70c743d8
JS
508 }
509 }
3f847830
FCE
510
511 /*
512 clog << "PP eval (" << *t << ") == " << result << " tokens: " << endl;
513 for (unsigned k=0; k<my_enqueued_pp.size(); k++)
514 clog << * my_enqueued_pp[k] << endl;
515 clog << endl;
516 */
517
70c743d8
JS
518 delete t; // "%("
519 delete m; // "%)"
177a8ead 520
3f847830 521
177a8ead
FCE
522 // NB: we transcribe the retained tokens here, and not inside
523 // the THEN/ELSE while loops. If it were done there, each loop
524 // would become infinite (each iteration consuming an ordinary
525 // token the previous one just pushed there). Guess how I
526 // figured that out.
527 enqueued_pp.insert (enqueued_pp.end(),
528 my_enqueued_pp.begin(),
529 my_enqueued_pp.end());
530
531 // Go back to outermost while(true) loop. We hope that at least
532 // some THEN or ELSE tokens were enqueued. If not, around we go
533 // again, until EOF.
534 }
535}
536
537
2f1a1aea 538const token*
0c218afb 539parser::next (bool wildcard)
2f1a1aea
FCE
540{
541 if (! next_t)
0c218afb 542 next_t = scan_pp (wildcard);
2f1a1aea
FCE
543 if (! next_t)
544 throw parse_error ("unexpected end-of-file");
545
2f1a1aea
FCE
546 last_t = next_t;
547 // advance by zeroing next_t
548 next_t = 0;
549 return last_t;
550}
551
552
553const token*
0c218afb 554parser::peek (bool wildcard)
2f1a1aea
FCE
555{
556 if (! next_t)
0c218afb 557 next_t = scan_pp (wildcard);
2f1a1aea
FCE
558
559 // don't advance by zeroing next_t
560 last_t = next_t;
561 return next_t;
562}
563
564
d7f3e0c5
GH
565static inline bool
566tok_is(token const * t, token_type tt, string const & expected)
567{
568 return t && t->type == tt && t->content == expected;
569}
570
571
dff50e09 572const token*
d7f3e0c5
GH
573parser::expect_known (token_type tt, string const & expected)
574{
575 const token *t = next();
57b73400 576 if (! (t && t->type == tt && t->content == expected))
d7f3e0c5
GH
577 throw parse_error ("expected '" + expected + "'");
578 return t;
579}
580
581
dff50e09 582const token*
d7f3e0c5
GH
583parser::expect_unknown (token_type tt, string & target)
584{
585 const token *t = next();
586 if (!(t && t->type == tt))
587 throw parse_error ("expected " + tt2str(tt));
588 target = t->content;
589 return t;
590}
591
592
dff50e09 593const token*
493ee224
DS
594parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
595{
596 const token *t = next();
597 if (!(t && (t->type == tt1 || t->type == tt2)))
598 throw parse_error ("expected " + tt2str(tt1) + " or " + tt2str(tt2));
599 target = t->content;
600 return t;
601}
602
603
dff50e09 604const token*
d7f3e0c5
GH
605parser::expect_op (std::string const & expected)
606{
607 return expect_known (tok_operator, expected);
608}
609
610
dff50e09 611const token*
d7f3e0c5
GH
612parser::expect_kw (std::string const & expected)
613{
f4fe2e93 614 return expect_known (tok_keyword, expected);
d7f3e0c5
GH
615}
616
dff50e09 617const token*
e38723d2 618parser::expect_number (int64_t & value)
57b73400 619{
e38723d2
MH
620 bool neg = false;
621 const token *t = next();
622 if (t->type == tok_operator && t->content == "-")
623 {
624 neg = true;
625 t = next ();
626 }
627 if (!(t && t->type == tok_number))
628 throw parse_error ("expected number");
629
630 const char* startp = t->content.c_str ();
631 char* endp = (char*) startp;
632
633 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
634 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
635 // since the lexer only gives us positive digit strings, but we'll
636 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
637 errno = 0;
638 value = (int64_t) strtoull (startp, & endp, 0);
639 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
640 || (neg && (unsigned long long) value > 9223372036854775808ULL)
641 || (unsigned long long) value > 18446744073709551615ULL
642 || value < -9223372036854775807LL-1)
dff50e09
FCE
643 throw parse_error ("number invalid or out of range");
644
e38723d2
MH
645 if (neg)
646 value = -value;
647
648 return t;
57b73400
GH
649}
650
d7f3e0c5 651
dff50e09 652const token*
d7f3e0c5
GH
653parser::expect_ident (std::string & target)
654{
655 return expect_unknown (tok_identifier, target);
656}
657
658
dff50e09 659const token*
493ee224
DS
660parser::expect_ident_or_keyword (std::string & target)
661{
662 return expect_unknown2 (tok_identifier, tok_keyword, target);
663}
664
665
dff50e09 666bool
d7f3e0c5
GH
667parser::peek_op (std::string const & op)
668{
669 return tok_is (peek(), tok_operator, op);
670}
671
672
dff50e09 673bool
d7f3e0c5
GH
674parser::peek_kw (std::string const & kw)
675{
676 return tok_is (peek(), tok_identifier, kw);
677}
678
679
680
66c7d4c1 681lexer::lexer (istream& input, const string& in, systemtap_session& s):
2203b032 682 input_name (in), input_pointer (0), input_end (0),
9300f661
JS
683 cursor_suspend_count(0), cursor_suspend_line (1), cursor_suspend_column (1),
684 cursor_line (1), cursor_column (1),
66c7d4c1 685 session(s), current_file (0)
eacb10ce 686{
66c7d4c1 687 getline(input, input_contents, '\0');
2203b032 688
66c7d4c1
JS
689 input_pointer = input_contents.data();
690 input_end = input_contents.data() + input_contents.size();
691
692 if (keywords.empty())
693 {
694 keywords.insert("probe");
695 keywords.insert("global");
696 keywords.insert("function");
697 keywords.insert("if");
698 keywords.insert("else");
699 keywords.insert("for");
700 keywords.insert("foreach");
701 keywords.insert("in");
702 keywords.insert("limit");
703 keywords.insert("return");
704 keywords.insert("delete");
705 keywords.insert("while");
706 keywords.insert("break");
707 keywords.insert("continue");
708 keywords.insert("next");
709 keywords.insert("string");
710 keywords.insert("long");
f4fe2e93
FCE
711 keywords.insert("try");
712 keywords.insert("catch");
66c7d4c1 713 }
eacb10ce 714}
2f1a1aea 715
66c7d4c1
JS
716set<string> lexer::keywords;
717
1b1b4ceb
RA
718void
719lexer::set_current_file (stapfile* f)
720{
721 current_file = f;
2203b032
JS
722 if (f)
723 {
724 f->file_contents = input_contents;
725 f->name = input_name;
726 }
1b1b4ceb 727}
bb2e3076
FCE
728
729int
730lexer::input_peek (unsigned n)
731{
66c7d4c1
JS
732 if (input_pointer + n >= input_end)
733 return -1; // EOF
734 return (unsigned char)*(input_pointer + n);
bb2e3076
FCE
735}
736
737
dff50e09 738int
2f1a1aea
FCE
739lexer::input_get ()
740{
66c7d4c1 741 int c = input_peek();
bb2e3076
FCE
742 if (c < 0) return c; // EOF
743
66c7d4c1
JS
744 ++input_pointer;
745
3f99432c 746 if (cursor_suspend_count)
9300f661
JS
747 {
748 // Track effect of input_put: preserve previous cursor/line_column
749 // until all of its characters are consumed.
750 if (--cursor_suspend_count == 0)
751 {
752 cursor_line = cursor_suspend_line;
753 cursor_column = cursor_suspend_column;
754 }
755 }
3f99432c 756 else
2f1a1aea 757 {
3f99432c
FCE
758 // update source cursor
759 if (c == '\n')
760 {
761 cursor_line ++;
762 cursor_column = 1;
763 }
764 else
765 cursor_column ++;
2f1a1aea 766 }
2f1a1aea 767
eacb10ce 768 // clog << "[" << (char)c << "]";
2f1a1aea
FCE
769 return c;
770}
771
772
3f99432c 773void
9300f661 774lexer::input_put (const string& chars, const token* t)
3f99432c 775{
66c7d4c1
JS
776 size_t pos = input_pointer - input_contents.data();
777 // clog << "[put:" << chars << " @" << pos << "]";
778 input_contents.insert (pos, chars);
eacb10ce 779 cursor_suspend_count += chars.size();
9300f661
JS
780 cursor_suspend_line = cursor_line;
781 cursor_suspend_column = cursor_column;
782 cursor_line = t->location.line;
783 cursor_column = t->location.column;
66c7d4c1
JS
784 input_pointer = input_contents.data() + pos;
785 input_end = input_contents.data() + input_contents.size();
3f99432c
FCE
786}
787
788
2f1a1aea 789token*
3f847830 790lexer::scan (bool wildcard)
2f1a1aea
FCE
791{
792 token* n = new token;
2203b032 793 n->location.file = current_file;
2f1a1aea 794
9300f661
JS
795skip:
796 bool suspended = (cursor_suspend_count > 0);
2f1a1aea
FCE
797 n->location.line = cursor_line;
798 n->location.column = cursor_column;
799
800 int c = input_get();
3f99432c 801 // clog << "{" << (char)c << (char)c2 << "}";
2f1a1aea
FCE
802 if (c < 0)
803 {
804 delete n;
805 return 0;
806 }
807
808 if (isspace (c))
809 goto skip;
810
66c7d4c1
JS
811 int c2 = input_peek ();
812
3f99432c
FCE
813 // Paste command line arguments as character streams into
814 // the beginning of a token. $1..$999 go through as raw
815 // characters; @1..@999 are quoted/escaped as strings.
816 // $# and @# expand to the number of arguments, similarly
817 // raw or quoted.
9300f661 818 if ((c == '$' || c == '@') && (c2 == '#'))
3f99432c 819 {
9300f661
JS
820 n->content.push_back (c);
821 n->content.push_back (c2);
3f99432c 822 input_get(); // swallow '#'
9300f661
JS
823 if (suspended)
824 throw parse_error ("invalid nested substitution of command line arguments", n);
825 size_t num_args = session.args.size ();
826 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
827 n->content.clear();
828 goto skip;
3f99432c 829 }
9300f661 830 else if ((c == '$' || c == '@') && (isdigit (c2)))
3f99432c 831 {
9300f661 832 n->content.push_back (c);
3f99432c
FCE
833 unsigned idx = 0;
834 do
835 {
836 input_get ();
837 idx = (idx * 10) + (c2 - '0');
9300f661 838 n->content.push_back (c2);
3f99432c
FCE
839 c2 = input_peek ();
840 } while (c2 > 0 &&
dff50e09 841 isdigit (c2) &&
3f99432c 842 idx <= session.args.size()); // prevent overflow
9300f661
JS
843 if (suspended)
844 throw parse_error ("invalid nested substitution of command line arguments", n);
3f99432c
FCE
845 if (idx == 0 ||
846 idx-1 >= session.args.size())
aca66a36
JS
847 throw parse_error ("command line argument index " + lex_cast(idx)
848 + " out of range [1-" + lex_cast(session.args.size()) + "]", n);
9300f661
JS
849 const string& arg = session.args[idx-1];
850 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
851 n->content.clear();
852 goto skip;
3f99432c
FCE
853 }
854
0c218afb
MH
855 else if (isalpha (c) || c == '$' || c == '@' || c == '_' ||
856 (wildcard && c == '*'))
2f1a1aea
FCE
857 {
858 n->type = tok_identifier;
859 n->content = (char) c;
0c218afb
MH
860 while (isalnum (c2) || c2 == '_' || c2 == '$' ||
861 (wildcard && c2 == '*'))
2f1a1aea 862 {
3f99432c
FCE
863 input_get ();
864 n->content.push_back (c2);
865 c2 = input_peek ();
6e213f58 866 }
213bee8f 867
66c7d4c1 868 if (keywords.count(n->content))
3f99432c 869 n->type = tok_keyword;
dff50e09 870
2f1a1aea
FCE
871 return n;
872 }
873
3a20432b 874 else if (isdigit (c)) // positive literal
2f1a1aea 875 {
2f1a1aea 876 n->type = tok_number;
9c0c0e46
FCE
877 n->content = (char) c;
878
66c7d4c1 879 while (isalnum (c2))
2f1a1aea 880 {
9c0c0e46
FCE
881 // NB: isalnum is very permissive. We rely on strtol, called in
882 // parser::parse_literal below, to confirm that the number string
883 // is correctly formatted and in range.
884
66c7d4c1
JS
885 input_get ();
886 n->content.push_back (c2);
887 c2 = input_peek ();
2f1a1aea
FCE
888 }
889 return n;
890 }
891
892 else if (c == '\"')
893 {
894 n->type = tok_string;
489e3d51 895 another_string:
2f1a1aea
FCE
896 while (1)
897 {
898 c = input_get ();
899
3f99432c 900 if (c < 0 || c == '\n')
2f1a1aea 901 {
72cdb9cd 902 throw parse_error("Could not find matching closing quote", n);
2f1a1aea
FCE
903 }
904 if (c == '\"') // closing double-quotes
905 break;
3f99432c 906 else if (c == '\\') // see also input_put
dff50e09 907 {
7d46afb8
GH
908 c = input_get ();
909 switch (c)
910 {
911 case 'a':
912 case 'b':
913 case 't':
914 case 'n':
915 case 'v':
916 case 'f':
917 case 'r':
f03954fd 918 case '0' ... '7': // NB: need only match the first digit
7d46afb8 919 case '\\':
7d46afb8 920 // Pass these escapes through to the string value
dff50e09 921 // being parsed; it will be emitted into a C literal.
7d46afb8
GH
922
923 n->content.push_back('\\');
924
3f99432c 925 // fall through
7d46afb8 926 default:
7d46afb8
GH
927 n->content.push_back(c);
928 break;
929 }
2f1a1aea
FCE
930 }
931 else
932 n->content.push_back(c);
933 }
489e3d51
FCE
934 // PR11208: check if the next token is also a string literal; auto-concatenate it
935 // This is complicated to the extent that we need to skip intermediate whitespace.
936 // XXX: but not comments
937 unsigned nspace = 0;
938 do {
939 c = input_peek(nspace++);
940 if (c == '\"') // new string literal?
941 {
942 // consume all whitespace plus the opening quote
943 while (nspace-- > 0) input_get();
944 goto another_string; // and append the rest to this token
945 }
946 } while (isspace(c));
2f1a1aea
FCE
947 return n;
948 }
949
950 else if (ispunct (c))
951 {
bb2e3076 952 int c3 = input_peek (1);
2f1a1aea 953
3a20432b
FCE
954 // NB: if we were to recognize negative numeric literals here,
955 // we'd introduce another grammar ambiguity:
956 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
957 // instead of tok_number(1) tok_operator('-') tok_number(1)
958
66c7d4c1 959 if (c == '#') // shell comment
2f1a1aea
FCE
960 {
961 unsigned this_line = cursor_line;
bb2e3076
FCE
962 do { c = input_get (); }
963 while (c >= 0 && cursor_line == this_line);
2f1a1aea
FCE
964 goto skip;
965 }
66c7d4c1 966 else if ((c == '/' && c2 == '/')) // C++ comment
63a7c90e
FCE
967 {
968 unsigned this_line = cursor_line;
bb2e3076
FCE
969 do { c = input_get (); }
970 while (c >= 0 && cursor_line == this_line);
63a7c90e
FCE
971 goto skip;
972 }
973 else if (c == '/' && c2 == '*') // C comment
974 {
66c7d4c1
JS
975 (void) input_get (); // swallow '*' already in c2
976 c = input_get ();
63a7c90e 977 c2 = input_get ();
bb2e3076 978 while (c2 >= 0)
63a7c90e 979 {
66c7d4c1
JS
980 if (c == '*' && c2 == '/')
981 break;
63a7c90e
FCE
982 c = c2;
983 c2 = input_get ();
63a7c90e 984 }
bb2e3076 985 goto skip;
63a7c90e 986 }
54dfabe9
FCE
987 else if (c == '%' && c2 == '{') // embedded code
988 {
989 n->type = tok_embedded;
990 (void) input_get (); // swallow '{' already in c2
66c7d4c1
JS
991 c = input_get ();
992 c2 = input_get ();
993 while (c2 >= 0)
54dfabe9 994 {
66c7d4c1
JS
995 if (c == '%' && c2 == '}')
996 return n;
54dfabe9 997 n->content += c;
66c7d4c1
JS
998 c = c2;
999 c2 = input_get ();
54dfabe9 1000 }
72cdb9cd
CW
1001
1002 throw parse_error ("Could not find matching '%}' to close embedded function block", n);
54dfabe9 1003 }
2f1a1aea 1004
bb2e3076
FCE
1005 // We're committed to recognizing at least the first character
1006 // as an operator.
2f1a1aea 1007 n->type = tok_operator;
66c7d4c1 1008 n->content = c;
2f1a1aea 1009
bb2e3076 1010 // match all valid operators, in decreasing size order
66c7d4c1
JS
1011 if ((c == '<' && c2 == '<' && c3 == '<') ||
1012 (c == '<' && c2 == '<' && c3 == '=') ||
1013 (c == '>' && c2 == '>' && c3 == '='))
82919855 1014 {
66c7d4c1
JS
1015 n->content += c2;
1016 n->content += c3;
bb2e3076
FCE
1017 input_get (); input_get (); // swallow other two characters
1018 }
66c7d4c1
JS
1019 else if ((c == '=' && c2 == '=') ||
1020 (c == '!' && c2 == '=') ||
1021 (c == '<' && c2 == '=') ||
1022 (c == '>' && c2 == '=') ||
1023 (c == '+' && c2 == '=') ||
1024 (c == '-' && c2 == '=') ||
1025 (c == '*' && c2 == '=') ||
1026 (c == '/' && c2 == '=') ||
1027 (c == '%' && c2 == '=') ||
1028 (c == '&' && c2 == '=') ||
1029 (c == '^' && c2 == '=') ||
1030 (c == '|' && c2 == '=') ||
1031 (c == '.' && c2 == '=') ||
1032 (c == '&' && c2 == '&') ||
1033 (c == '|' && c2 == '|') ||
1034 (c == '+' && c2 == '+') ||
1035 (c == '-' && c2 == '-') ||
1036 (c == '-' && c2 == '>') ||
1037 (c == '<' && c2 == '<') ||
1038 (c == '>' && c2 == '>') ||
177a8ead 1039 // preprocessor tokens
66c7d4c1
JS
1040 (c == '%' && c2 == '(') ||
1041 (c == '%' && c2 == '?') ||
1042 (c == '%' && c2 == ':') ||
1043 (c == '%' && c2 == ')'))
bb2e3076 1044 {
66c7d4c1 1045 n->content += c2;
bb2e3076 1046 input_get (); // swallow other character
dff50e09 1047 }
2f1a1aea
FCE
1048
1049 return n;
1050 }
1051
1052 else
1053 {
1054 n->type = tok_junk;
1055 n->content = (char) c;
1056 return n;
1057 }
1058}
1059
1060
1061// ------------------------------------------------------------------------
1062
1063stapfile*
1064parser::parse ()
1065{
1066 stapfile* f = new stapfile;
1b1b4ceb 1067 input.set_current_file (f);
56099f08
FCE
1068
1069 bool empty = true;
1070
2f1a1aea
FCE
1071 while (1)
1072 {
1073 try
1074 {
1075 const token* t = peek ();
56099f08 1076 if (! t) // nice clean EOF
2f1a1aea
FCE
1077 break;
1078
56099f08 1079 empty = false;
6e213f58
DS
1080 if (t->type == tok_keyword && t->content == "probe")
1081 {
1082 context = con_probe;
1083 parse_probe (f->probes, f->aliases);
1084 }
1085 else if (t->type == tok_keyword && t->content == "global")
1086 {
1087 context = con_global;
4b5f3e45 1088 parse_global (f->globals, f->probes);
6e213f58
DS
1089 }
1090 else if (t->type == tok_keyword && t->content == "function")
1091 {
1092 context = con_function;
1093 parse_functiondecl (f->functions);
1094 }
54dfabe9 1095 else if (t->type == tok_embedded)
6e213f58
DS
1096 {
1097 context = con_embedded;
1098 f->embeds.push_back (parse_embeddedcode ());
1099 }
2f1a1aea 1100 else
6e213f58
DS
1101 {
1102 context = con_unknown;
1103 throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
1104 }
2f1a1aea
FCE
1105 }
1106 catch (parse_error& pe)
1107 {
1108 print_error (pe);
cd7116b8 1109 if (pe.skip_some) // for recovery
dff50e09 1110 try
cd7116b8
FCE
1111 {
1112 // Quietly swallow all tokens until the next '}'.
1113 while (1)
1114 {
1115 const token* t = peek ();
1116 if (! t)
1117 break;
1118 next ();
1119 if (t->type == tok_operator && t->content == "}")
1120 break;
1121 }
1122 }
1123 catch (parse_error& pe2)
1124 {
1125 // parse error during recovery ... ugh
1126 print_error (pe2);
1127 }
177a8ead 1128 }
2f1a1aea
FCE
1129 }
1130
56099f08
FCE
1131 if (empty)
1132 {
1133 cerr << "Input file '" << input_name << "' is empty or missing." << endl;
1134 delete f;
2203b032 1135 f = 0;
56099f08
FCE
1136 }
1137 else if (num_errors > 0)
2f1a1aea
FCE
1138 {
1139 cerr << num_errors << " parse error(s)." << endl;
1140 delete f;
2203b032 1141 f = 0;
2f1a1aea 1142 }
dff50e09 1143
2203b032 1144 input.set_current_file(0);
2f1a1aea
FCE
1145 return f;
1146}
1147
1148
20c6c071 1149void
54dfabe9
FCE
1150parser::parse_probe (std::vector<probe *> & probe_ret,
1151 std::vector<probe_alias *> & alias_ret)
2f1a1aea 1152{
82919855 1153 const token* t0 = next ();
6e213f58 1154 if (! (t0->type == tok_keyword && t0->content == "probe"))
82919855
FCE
1155 throw parse_error ("expected 'probe'");
1156
20c6c071
GH
1157 vector<probe_point *> aliases;
1158 vector<probe_point *> locations;
1159
1160 bool equals_ok = true;
82919855 1161
97266278
LG
1162 int epilogue_alias = 0;
1163
2f1a1aea
FCE
1164 while (1)
1165 {
b4ceace2 1166 probe_point * pp = parse_probe_point ();
dff50e09 1167
b4ceace2 1168 const token* t = peek ();
dff50e09 1169 if (equals_ok && t
b4ceace2
FCE
1170 && t->type == tok_operator && t->content == "=")
1171 {
1ad820e3 1172 if (pp->optional || pp->sufficient)
f1a0157a 1173 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->components.front()->tok);
b4ceace2
FCE
1174 aliases.push_back(pp);
1175 next ();
1176 continue;
1177 }
dff50e09 1178 else if (equals_ok && t
97266278
LG
1179 && t->type == tok_operator && t->content == "+=")
1180 {
1ad820e3 1181 if (pp->optional || pp->sufficient)
f1a0157a 1182 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->components.front()->tok);
97266278
LG
1183 aliases.push_back(pp);
1184 epilogue_alias = 1;
1185 next ();
1186 continue;
1187 }
b4ceace2
FCE
1188 else if (t && t->type == tok_operator && t->content == ",")
1189 {
1190 locations.push_back(pp);
1191 equals_ok = false;
1192 next ();
1193 continue;
1194 }
1195 else if (t && t->type == tok_operator && t->content == "{")
1196 {
1197 locations.push_back(pp);
1198 break;
1199 }
2f1a1aea 1200 else
9c0c0e46 1201 throw parse_error ("expected probe point specifier");
2f1a1aea 1202 }
20c6c071 1203
20c6c071
GH
1204 if (aliases.empty())
1205 {
54dfabe9
FCE
1206 probe* p = new probe;
1207 p->tok = t0;
1208 p->locations = locations;
1209 p->body = parse_stmt_block ();
37ebca01 1210 p->privileged = privileged;
54dfabe9 1211 probe_ret.push_back (p);
20c6c071
GH
1212 }
1213 else
1214 {
54dfabe9 1215 probe_alias* p = new probe_alias (aliases);
97266278
LG
1216 if(epilogue_alias)
1217 p->epilogue_style = true;
1218 else
1219 p->epilogue_style = false;
54dfabe9
FCE
1220 p->tok = t0;
1221 p->locations = locations;
1222 p->body = parse_stmt_block ();
37ebca01 1223 p->privileged = privileged;
54dfabe9 1224 alias_ret.push_back (p);
20c6c071 1225 }
54dfabe9 1226}
20c6c071 1227
54dfabe9
FCE
1228
1229embeddedcode*
1230parser::parse_embeddedcode ()
1231{
1232 embeddedcode* e = new embeddedcode;
1233 const token* t = next ();
1234 if (t->type != tok_embedded)
24cb178f
FCE
1235 throw parse_error ("expected '%{'");
1236
1237 if (! privileged)
cd7116b8
FCE
1238 throw parse_error ("embedded code in unprivileged script",
1239 false /* don't skip tokens for parse resumption */);
54dfabe9
FCE
1240
1241 e->tok = t;
1242 e->code = t->content;
1243 return e;
2f1a1aea
FCE
1244}
1245
1246
1247block*
56099f08 1248parser::parse_stmt_block ()
2f1a1aea
FCE
1249{
1250 block* pb = new block;
1251
56099f08
FCE
1252 const token* t = next ();
1253 if (! (t->type == tok_operator && t->content == "{"))
1254 throw parse_error ("expected '{'");
1255
1256 pb->tok = t;
2b066ec1 1257
2f1a1aea
FCE
1258 while (1)
1259 {
1260 try
1261 {
2b066ec1
FCE
1262 t = peek ();
1263 if (t && t->type == tok_operator && t->content == "}")
1264 {
1265 next ();
1266 break;
1267 }
1268
2f1a1aea 1269 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
1270 }
1271 catch (parse_error& pe)
1272 {
1273 print_error (pe);
54dfabe9 1274
2f1a1aea
FCE
1275 // Quietly swallow all tokens until the next ';' or '}'.
1276 while (1)
1277 {
1278 const token* t = peek ();
54dfabe9 1279 if (! t) return 0;
2f1a1aea 1280 next ();
54dfabe9
FCE
1281 if (t->type == tok_operator
1282 && (t->content == "}" || t->content == ";"))
2f1a1aea
FCE
1283 break;
1284 }
1285 }
1286 }
1287
1288 return pb;
1289}
1290
1291
f4fe2e93
FCE
1292try_block*
1293parser::parse_try_block ()
1294{
1295 try_block* pb = new try_block;
1296
1297 pb->tok = expect_kw ("try");
1298 pb->try_block = parse_stmt_block();
1299 expect_kw ("catch");
1300
1301 const token* t = peek ();
1302 if (t->type == tok_operator && t->content == "(")
1303 {
1304 next (); // swallow the '('
1305
1306 t = next();
1307 if (! (t->type == tok_identifier))
1308 throw parse_error ("expected identifier");
1309 symbol* sym = new symbol;
1310 sym->tok = t;
1311 sym->name = t->content;
1312 pb->catch_error_var = sym;
1313
1314 expect_op (")");
1315 }
1316 else
1317 pb->catch_error_var = 0;
1318
1319 pb->catch_block = parse_stmt_block();
1320
1321 return pb;
1322}
1323
1324
1325
2f1a1aea
FCE
1326statement*
1327parser::parse_statement ()
1328{
40b71c47 1329 statement *ret;
2f1a1aea
FCE
1330 const token* t = peek ();
1331 if (t && t->type == tok_operator && t->content == ";")
f946b10f 1332 return new null_statement (next ());
dff50e09 1333 else if (t && t->type == tok_operator && t->content == "{")
40b71c47 1334 return parse_stmt_block (); // Don't squash semicolons.
f4fe2e93
FCE
1335 else if (t && t->type == tok_keyword && t->content == "try")
1336 return parse_try_block (); // Don't squash semicolons.
6e213f58 1337 else if (t && t->type == tok_keyword && t->content == "if")
40b71c47 1338 return parse_if_statement (); // Don't squash semicolons.
6e213f58 1339 else if (t && t->type == tok_keyword && t->content == "for")
40b71c47 1340 return parse_for_loop (); // Don't squash semicolons.
6e213f58 1341 else if (t && t->type == tok_keyword && t->content == "foreach")
40b71c47
MW
1342 return parse_foreach_loop (); // Don't squash semicolons.
1343 else if (t && t->type == tok_keyword && t->content == "while")
1344 return parse_while_loop (); // Don't squash semicolons.
6e213f58 1345 else if (t && t->type == tok_keyword && t->content == "return")
40b71c47 1346 ret = parse_return_statement ();
6e213f58 1347 else if (t && t->type == tok_keyword && t->content == "delete")
40b71c47 1348 ret = parse_delete_statement ();
6e213f58 1349 else if (t && t->type == tok_keyword && t->content == "break")
40b71c47 1350 ret = parse_break_statement ();
6e213f58 1351 else if (t && t->type == tok_keyword && t->content == "continue")
40b71c47 1352 ret = parse_continue_statement ();
6e213f58 1353 else if (t && t->type == tok_keyword && t->content == "next")
40b71c47 1354 ret = parse_next_statement ();
2f1a1aea
FCE
1355 else if (t && (t->type == tok_operator || // expressions are flexible
1356 t->type == tok_identifier ||
1357 t->type == tok_number ||
1358 t->type == tok_string))
40b71c47 1359 ret = parse_expr_statement ();
54dfabe9 1360 // XXX: consider generally accepting tok_embedded here too
2f1a1aea
FCE
1361 else
1362 throw parse_error ("expected statement");
40b71c47
MW
1363
1364 // Squash "empty" trailing colons after any "non-block-like" statement.
1365 t = peek ();
1366 if (t && t->type == tok_operator && t->content == ";")
1367 {
1368 next (); // Silently eat trailing ; after statement
1369 }
1370
1371 return ret;
2f1a1aea
FCE
1372}
1373
1374
56099f08 1375void
78f6bba6 1376parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
2f1a1aea 1377{
82919855 1378 const token* t0 = next ();
6e213f58 1379 if (! (t0->type == tok_keyword && t0->content == "global"))
82919855
FCE
1380 throw parse_error ("expected 'global'");
1381
56099f08
FCE
1382 while (1)
1383 {
1384 const token* t = next ();
1385 if (! (t->type == tok_identifier))
1386 throw parse_error ("expected identifier");
1387
2b066ec1
FCE
1388 for (unsigned i=0; i<globals.size(); i++)
1389 if (globals[i]->name == t->content)
57b73400 1390 throw parse_error ("duplicate global name");
dff50e09 1391
24cb178f
FCE
1392 vardecl* d = new vardecl;
1393 d->name = t->content;
1394 d->tok = t;
1395 globals.push_back (d);
56099f08 1396
82919855 1397 t = peek ();
ef474d24
JS
1398
1399 if (t && t->type == tok_operator && t->content == "[") // array size
1400 {
1401 int64_t size;
1402 next ();
1403 expect_number(size);
1404 if (size <= 0 || size > 1000000) // arbitrary max
1405 throw parse_error("array size out of range");
1406 d->maxsize = (int)size;
1407 expect_known(tok_operator, "]");
1408 t = peek ();
1409 }
1410
4b5f3e45 1411 if (t && t->type == tok_operator && t->content == "=") // initialization
ef474d24
JS
1412 {
1413 if (!d->compatible_arity(0))
1414 throw parse_error("only scalar globals can be initialized");
1415 d->set_arity(0);
1416 next ();
1417 d->init = parse_literal ();
1418 d->type = d->init->type;
1419 t = peek ();
1420 }
4b5f3e45 1421
c3799d72
AM
1422 if (t && t->type == tok_operator && t->content == ";") // termination
1423 next();
1424
4b5f3e45 1425 if (t && t->type == tok_operator && t->content == ",") // next global
82919855
FCE
1426 {
1427 next ();
1428 continue;
1429 }
56099f08 1430 else
82919855 1431 break;
56099f08
FCE
1432 }
1433}
1434
1435
24cb178f
FCE
1436void
1437parser::parse_functiondecl (std::vector<functiondecl*>& functions)
56099f08 1438{
82919855 1439 const token* t = next ();
6e213f58 1440 if (! (t->type == tok_keyword && t->content == "function"))
82919855
FCE
1441 throw parse_error ("expected 'function'");
1442
56099f08 1443
82919855 1444 t = next ();
6e213f58
DS
1445 if (! (t->type == tok_identifier)
1446 && ! (t->type == tok_keyword
1447 && (t->content == "string" || t->content == "long")))
56099f08 1448 throw parse_error ("expected identifier");
24cb178f
FCE
1449
1450 for (unsigned i=0; i<functions.size(); i++)
1451 if (functions[i]->name == t->content)
1452 throw parse_error ("duplicate function name");
1453
1454 functiondecl *fd = new functiondecl ();
56099f08
FCE
1455 fd->name = t->content;
1456 fd->tok = t;
1457
1458 t = next ();
6a505121
FCE
1459 if (t->type == tok_operator && t->content == ":")
1460 {
1461 t = next ();
6e213f58 1462 if (t->type == tok_keyword && t->content == "string")
6a505121 1463 fd->type = pe_string;
6e213f58 1464 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1465 fd->type = pe_long;
1466 else throw parse_error ("expected 'string' or 'long'");
1467
1468 t = next ();
1469 }
1470
56099f08
FCE
1471 if (! (t->type == tok_operator && t->content == "("))
1472 throw parse_error ("expected '('");
1473
1474 while (1)
1475 {
1476 t = next ();
1477
1478 // permit zero-argument fuctions
1479 if (t->type == tok_operator && t->content == ")")
1480 break;
1481 else if (! (t->type == tok_identifier))
1482 throw parse_error ("expected identifier");
1483 vardecl* vd = new vardecl;
1484 vd->name = t->content;
1485 vd->tok = t;
1486 fd->formal_args.push_back (vd);
1487
1488 t = next ();
6a505121
FCE
1489 if (t->type == tok_operator && t->content == ":")
1490 {
1491 t = next ();
6e213f58 1492 if (t->type == tok_keyword && t->content == "string")
6a505121 1493 vd->type = pe_string;
6e213f58 1494 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1495 vd->type = pe_long;
1496 else throw parse_error ("expected 'string' or 'long'");
dff50e09 1497
6a505121
FCE
1498 t = next ();
1499 }
56099f08
FCE
1500 if (t->type == tok_operator && t->content == ")")
1501 break;
1502 if (t->type == tok_operator && t->content == ",")
1503 continue;
1504 else
1505 throw parse_error ("expected ',' or ')'");
1506 }
1507
54dfabe9
FCE
1508 t = peek ();
1509 if (t && t->type == tok_embedded)
1510 fd->body = parse_embeddedcode ();
1511 else
1512 fd->body = parse_stmt_block ();
24cb178f
FCE
1513
1514 functions.push_back (fd);
2f1a1aea
FCE
1515}
1516
1517
9c0c0e46
FCE
1518probe_point*
1519parser::parse_probe_point ()
2f1a1aea 1520{
9c0c0e46 1521 probe_point* pl = new probe_point;
2f1a1aea 1522
9c0c0e46 1523 while (1)
2f1a1aea 1524 {
0c218afb 1525 const token* t = next (true); // wildcard scanning here
6e213f58
DS
1526 if (! (t->type == tok_identifier
1527 // we must allow ".return" and ".function", which are keywords
0c218afb 1528 || t->type == tok_keyword))
b4ceace2 1529 throw parse_error ("expected identifier or '*'");
9c0c0e46 1530
9c0c0e46
FCE
1531
1532 probe_point::component* c = new probe_point::component;
1533 c->functor = t->content;
f1a0157a 1534 c->tok = t;
9c0c0e46 1535 pl->components.push_back (c);
6e3347a9 1536 // NB we may add c->arg soon
9c0c0e46
FCE
1537
1538 t = peek ();
a477f3f1 1539
6e3347a9 1540 // consume optional parameter
9c0c0e46
FCE
1541 if (t && t->type == tok_operator && t->content == "(")
1542 {
1543 next (); // consume "("
1544 c->arg = parse_literal ();
1545
1546 t = next ();
1547 if (! (t->type == tok_operator && t->content == ")"))
1548 throw parse_error ("expected ')'");
1549
1550 t = peek ();
9c0c0e46 1551 }
9c0c0e46
FCE
1552
1553 if (t && t->type == tok_operator && t->content == ".")
6e3347a9
FCE
1554 {
1555 next ();
1556 continue;
1557 }
1558
f1a0157a 1559 // We only fall through here at the end of a probe point (past
6e3347a9
FCE
1560 // all the dotted/parametrized components).
1561
d898100a
FCE
1562 if (t && t->type == tok_operator &&
1563 (t->content == "?" || t->content == "!"))
6e3347a9
FCE
1564 {
1565 pl->optional = true;
d898100a
FCE
1566 if (t->content == "!") pl->sufficient = true;
1567 // NB: sufficient implies optional
6e3347a9
FCE
1568 next ();
1569 t = peek ();
1570 // fall through
cbbe8080
MH
1571 }
1572
1573 if (t && t->type == tok_keyword && t->content == "if")
1574 {
1575 next ();
1576 t = peek ();
75686668 1577 if (t && ! (t->type == tok_operator && t->content == "("))
cbbe8080
MH
1578 throw parse_error ("expected '('");
1579 next ();
1580
1581 pl->condition = parse_expression ();
1582
1583 t = peek ();
75686668 1584 if (t && ! (t->type == tok_operator && t->content == ")"))
cbbe8080
MH
1585 throw parse_error ("expected ')'");
1586 next ();
1587 t = peek ();
1588 // fall through
6e3347a9
FCE
1589 }
1590
dff50e09 1591 if (t && t->type == tok_operator
6e3347a9
FCE
1592 && (t->content == "{" || t->content == "," ||
1593 t->content == "=" || t->content == "+=" ))
1594 break;
dff50e09 1595
d898100a 1596 throw parse_error ("expected one of '. , ( ? ! { = +='");
2f1a1aea
FCE
1597 }
1598
1599 return pl;
1600}
1601
1602
1603literal*
1604parser::parse_literal ()
1605{
1606 const token* t = next ();
56099f08 1607 literal* l;
2f1a1aea 1608 if (t->type == tok_string)
56099f08 1609 l = new literal_string (t->content);
16e8f21f 1610 else
9c0c0e46 1611 {
16e8f21f
JS
1612 bool neg = false;
1613 if (t->type == tok_operator && t->content == "-")
1614 {
1615 neg = true;
1616 t = next ();
1617 }
1618
1619 if (t->type == tok_number)
1620 {
1621 const char* startp = t->content.c_str ();
1622 char* endp = (char*) startp;
1623
1624 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1625 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
79e6d33f
JS
1626 // since the lexer only gives us positive digit strings, but we'll
1627 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
16e8f21f
JS
1628 errno = 0;
1629 long long value = (long long) strtoull (startp, & endp, 0);
16e8f21f 1630 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
79e6d33f 1631 || (neg && (unsigned long long) value > 9223372036854775808ULL)
16e8f21f
JS
1632 || (unsigned long long) value > 18446744073709551615ULL
1633 || value < -9223372036854775807LL-1)
dff50e09 1634 throw parse_error ("number invalid or out of range");
16e8f21f 1635
79e6d33f
JS
1636 if (neg)
1637 value = -value;
1638
16e8f21f
JS
1639 l = new literal_number (value);
1640 }
1641 else
1642 throw parse_error ("expected literal string or number");
9c0c0e46 1643 }
56099f08
FCE
1644
1645 l->tok = t;
1646 return l;
2f1a1aea
FCE
1647}
1648
1649
1650if_statement*
1651parser::parse_if_statement ()
1652{
1653 const token* t = next ();
6e213f58 1654 if (! (t->type == tok_keyword && t->content == "if"))
56099f08
FCE
1655 throw parse_error ("expected 'if'");
1656 if_statement* s = new if_statement;
1657 s->tok = t;
1658
1659 t = next ();
2f1a1aea
FCE
1660 if (! (t->type == tok_operator && t->content == "("))
1661 throw parse_error ("expected '('");
1662
2f1a1aea
FCE
1663 s->condition = parse_expression ();
1664
1665 t = next ();
1666 if (! (t->type == tok_operator && t->content == ")"))
1667 throw parse_error ("expected ')'");
1668
1669 s->thenblock = parse_statement ();
1670
1671 t = peek ();
6e213f58 1672 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea
FCE
1673 {
1674 next ();
1675 s->elseblock = parse_statement ();
1676 }
ed10c639
FCE
1677 else
1678 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
1679
1680 return s;
1681}
1682
1683
69c68955
FCE
1684expr_statement*
1685parser::parse_expr_statement ()
1686{
1687 expr_statement *es = new expr_statement;
1688 const token* t = peek ();
1689 es->tok = t;
1690 es->value = parse_expression ();
1691 return es;
1692}
1693
1694
56099f08
FCE
1695return_statement*
1696parser::parse_return_statement ()
1697{
1698 const token* t = next ();
6e213f58 1699 if (! (t->type == tok_keyword && t->content == "return"))
56099f08 1700 throw parse_error ("expected 'return'");
6e213f58
DS
1701 if (context != con_function)
1702 throw parse_error ("found 'return' not in function context");
56099f08
FCE
1703 return_statement* s = new return_statement;
1704 s->tok = t;
1705 s->value = parse_expression ();
1706 return s;
1707}
1708
1709
1710delete_statement*
1711parser::parse_delete_statement ()
1712{
1713 const token* t = next ();
6e213f58 1714 if (! (t->type == tok_keyword && t->content == "delete"))
56099f08
FCE
1715 throw parse_error ("expected 'delete'");
1716 delete_statement* s = new delete_statement;
1717 s->tok = t;
1718 s->value = parse_expression ();
1719 return s;
1720}
1721
1722
f3c26ea5
FCE
1723next_statement*
1724parser::parse_next_statement ()
1725{
1726 const token* t = next ();
6e213f58 1727 if (! (t->type == tok_keyword && t->content == "next"))
f3c26ea5 1728 throw parse_error ("expected 'next'");
6e213f58
DS
1729 if (context != con_probe)
1730 throw parse_error ("found 'next' not in probe context");
f3c26ea5
FCE
1731 next_statement* s = new next_statement;
1732 s->tok = t;
1733 return s;
1734}
1735
1736
1737break_statement*
1738parser::parse_break_statement ()
1739{
1740 const token* t = next ();
6e213f58 1741 if (! (t->type == tok_keyword && t->content == "break"))
f3c26ea5
FCE
1742 throw parse_error ("expected 'break'");
1743 break_statement* s = new break_statement;
1744 s->tok = t;
1745 return s;
1746}
1747
1748
1749continue_statement*
1750parser::parse_continue_statement ()
1751{
1752 const token* t = next ();
6e213f58 1753 if (! (t->type == tok_keyword && t->content == "continue"))
f3c26ea5
FCE
1754 throw parse_error ("expected 'continue'");
1755 continue_statement* s = new continue_statement;
1756 s->tok = t;
1757 return s;
1758}
1759
1760
69c68955
FCE
1761for_loop*
1762parser::parse_for_loop ()
1763{
f3c26ea5 1764 const token* t = next ();
6e213f58 1765 if (! (t->type == tok_keyword && t->content == "for"))
f3c26ea5
FCE
1766 throw parse_error ("expected 'for'");
1767 for_loop* s = new for_loop;
1768 s->tok = t;
1769
1770 t = next ();
1771 if (! (t->type == tok_operator && t->content == "("))
1772 throw parse_error ("expected '('");
1773
1774 // initializer + ";"
1775 t = peek ();
1776 if (t && t->type == tok_operator && t->content == ";")
1777 {
cbfbbf69
FCE
1778 s->init = 0;
1779 next ();
f3c26ea5
FCE
1780 }
1781 else
1782 {
1783 s->init = parse_expr_statement ();
1784 t = next ();
1785 if (! (t->type == tok_operator && t->content == ";"))
1786 throw parse_error ("expected ';'");
1787 }
1788
1789 // condition + ";"
1790 t = peek ();
1791 if (t && t->type == tok_operator && t->content == ";")
1792 {
1793 literal_number* l = new literal_number(1);
1794 s->cond = l;
1795 s->cond->tok = next ();
1796 }
1797 else
1798 {
1799 s->cond = parse_expression ();
1800 t = next ();
1801 if (! (t->type == tok_operator && t->content == ";"))
1802 throw parse_error ("expected ';'");
1803 }
dff50e09 1804
f3c26ea5
FCE
1805 // increment + ")"
1806 t = peek ();
1807 if (t && t->type == tok_operator && t->content == ")")
1808 {
cbfbbf69
FCE
1809 s->incr = 0;
1810 next ();
f3c26ea5
FCE
1811 }
1812 else
1813 {
1814 s->incr = parse_expr_statement ();
1815 t = next ();
1816 if (! (t->type == tok_operator && t->content == ")"))
c958a431 1817 throw parse_error ("expected ')'");
f3c26ea5
FCE
1818 }
1819
1820 // block
1821 s->block = parse_statement ();
1822
1823 return s;
1824}
1825
1826
1827for_loop*
1828parser::parse_while_loop ()
1829{
1830 const token* t = next ();
6e213f58 1831 if (! (t->type == tok_keyword && t->content == "while"))
f3c26ea5
FCE
1832 throw parse_error ("expected 'while'");
1833 for_loop* s = new for_loop;
1834 s->tok = t;
1835
1836 t = next ();
1837 if (! (t->type == tok_operator && t->content == "("))
1838 throw parse_error ("expected '('");
1839
1840 // dummy init and incr fields
cbfbbf69
FCE
1841 s->init = 0;
1842 s->incr = 0;
f3c26ea5
FCE
1843
1844 // condition
1845 s->cond = parse_expression ();
1846
f3c26ea5
FCE
1847 t = next ();
1848 if (! (t->type == tok_operator && t->content == ")"))
1849 throw parse_error ("expected ')'");
dff50e09 1850
f3c26ea5
FCE
1851 // block
1852 s->block = parse_statement ();
1853
1854 return s;
69c68955
FCE
1855}
1856
1857
1858foreach_loop*
1859parser::parse_foreach_loop ()
1860{
1861 const token* t = next ();
6e213f58 1862 if (! (t->type == tok_keyword && t->content == "foreach"))
69c68955
FCE
1863 throw parse_error ("expected 'foreach'");
1864 foreach_loop* s = new foreach_loop;
1865 s->tok = t;
93484556 1866 s->sort_direction = 0;
c261711d 1867 s->value = NULL;
27f21e8c 1868 s->limit = NULL;
69c68955
FCE
1869
1870 t = next ();
1871 if (! (t->type == tok_operator && t->content == "("))
1872 throw parse_error ("expected '('");
1873
c261711d
JS
1874 symbol* lookahead_sym = NULL;
1875 int lookahead_sort = 0;
1876
1877 t = peek ();
1878 if (t && t->type == tok_identifier)
1879 {
1880 next ();
1881 lookahead_sym = new symbol;
1882 lookahead_sym->tok = t;
1883 lookahead_sym->name = t->content;
1884
1885 t = peek ();
1886 if (t && t->type == tok_operator &&
1887 (t->content == "+" || t->content == "-"))
1888 {
1889 next ();
1890 lookahead_sort = (t->content == "+") ? 1 : -1;
1891 }
1892
1893 t = peek ();
1894 if (t && t->type == tok_operator && t->content == "=")
1895 {
1896 next ();
1897 s->value = lookahead_sym;
1898 if (lookahead_sort)
1899 {
1900 s->sort_direction = lookahead_sort;
1901 s->sort_column = 0;
1902 }
1903 lookahead_sym = NULL;
1904 }
1905 }
1906
69c68955
FCE
1907 // see also parse_array_in
1908
1909 bool parenthesized = false;
1910 t = peek ();
c261711d 1911 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
69c68955
FCE
1912 {
1913 next ();
1914 parenthesized = true;
1915 }
1916
c261711d
JS
1917 if (lookahead_sym)
1918 {
1919 s->indexes.push_back (lookahead_sym);
1920 if (lookahead_sort)
1921 {
1922 s->sort_direction = lookahead_sort;
1923 s->sort_column = 1;
1924 }
1925 lookahead_sym = NULL;
1926 }
1927 else while (1)
69c68955
FCE
1928 {
1929 t = next ();
1930 if (! (t->type == tok_identifier))
1931 throw parse_error ("expected identifier");
1932 symbol* sym = new symbol;
1933 sym->tok = t;
1934 sym->name = t->content;
1935 s->indexes.push_back (sym);
1936
93484556
FCE
1937 t = peek ();
1938 if (t && t->type == tok_operator &&
1939 (t->content == "+" || t->content == "-"))
1940 {
1941 if (s->sort_direction)
1942 throw parse_error ("multiple sort directives");
1943 s->sort_direction = (t->content == "+") ? 1 : -1;
1944 s->sort_column = s->indexes.size();
1945 next();
1946 }
1947
69c68955
FCE
1948 if (parenthesized)
1949 {
93484556 1950 t = peek ();
69c68955
FCE
1951 if (t && t->type == tok_operator && t->content == ",")
1952 {
1953 next ();
1954 continue;
1955 }
1956 else if (t && t->type == tok_operator && t->content == "]")
1957 {
1958 next ();
1959 break;
1960 }
dff50e09 1961 else
69c68955
FCE
1962 throw parse_error ("expected ',' or ']'");
1963 }
1964 else
1965 break; // expecting only one expression
1966 }
1967
1968 t = next ();
6e213f58 1969 if (! (t->type == tok_keyword && t->content == "in"))
69c68955 1970 throw parse_error ("expected 'in'");
dff50e09 1971
d02548c0 1972 s->base = parse_indexable();
69c68955 1973
93484556
FCE
1974 t = peek ();
1975 if (t && t->type == tok_operator &&
1976 (t->content == "+" || t->content == "-"))
1977 {
1978 if (s->sort_direction)
1979 throw parse_error ("multiple sort directives");
1980 s->sort_direction = (t->content == "+") ? 1 : -1;
1981 s->sort_column = 0;
1982 next();
1983 }
1984
27f21e8c
DS
1985 t = peek ();
1986 if (tok_is(t, tok_keyword, "limit"))
1987 {
1988 next (); // get past the "limit"
1989 s->limit = parse_expression ();
1990 }
1991
69c68955
FCE
1992 t = next ();
1993 if (! (t->type == tok_operator && t->content == ")"))
1994 throw parse_error ("expected ')'");
1995
1996 s->block = parse_statement ();
1997 return s;
1998}
1999
2000
2f1a1aea
FCE
2001expression*
2002parser::parse_expression ()
2003{
2004 return parse_assignment ();
2005}
2006
2f1a1aea
FCE
2007
2008expression*
2009parser::parse_assignment ()
2010{
2011 expression* op1 = parse_ternary ();
2012
2013 const token* t = peek ();
82919855 2014 // right-associative operators
dff50e09 2015 if (t && t->type == tok_operator
2f1a1aea 2016 && (t->content == "=" ||
82919855 2017 t->content == "<<<" ||
2f1a1aea 2018 t->content == "+=" ||
bb2e3076
FCE
2019 t->content == "-=" ||
2020 t->content == "*=" ||
2021 t->content == "/=" ||
2022 t->content == "%=" ||
2023 t->content == "<<=" ||
2024 t->content == ">>=" ||
2025 t->content == "&=" ||
2026 t->content == "^=" ||
2027 t->content == "|=" ||
d5d7c2cc 2028 t->content == ".=" ||
dff50e09 2029 false))
2f1a1aea 2030 {
bb2e3076 2031 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 2032 assignment* e = new assignment;
56099f08 2033 e->left = op1;
2f1a1aea 2034 e->op = t->content;
56099f08 2035 e->tok = t;
2f1a1aea 2036 next ();
82919855 2037 e->right = parse_expression ();
56099f08 2038 op1 = e;
2f1a1aea 2039 }
56099f08
FCE
2040
2041 return op1;
2f1a1aea
FCE
2042}
2043
2044
2045expression*
2046parser::parse_ternary ()
2047{
2048 expression* op1 = parse_logical_or ();
2049
2050 const token* t = peek ();
2051 if (t && t->type == tok_operator && t->content == "?")
2052 {
2f1a1aea 2053 ternary_expression* e = new ternary_expression;
56099f08 2054 e->tok = t;
2f1a1aea 2055 e->cond = op1;
56099f08
FCE
2056 next ();
2057 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
2058
2059 t = next ();
2060 if (! (t->type == tok_operator && t->content == ":"))
2061 throw parse_error ("expected ':'");
2062
56099f08 2063 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
2064 return e;
2065 }
2066 else
2067 return op1;
2068}
2069
2070
2071expression*
2072parser::parse_logical_or ()
2073{
2074 expression* op1 = parse_logical_and ();
dff50e09 2075
2f1a1aea 2076 const token* t = peek ();
56099f08 2077 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 2078 {
2f1a1aea 2079 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
2080 e->tok = t;
2081 e->op = t->content;
2f1a1aea 2082 e->left = op1;
56099f08
FCE
2083 next ();
2084 e->right = parse_logical_and ();
2085 op1 = e;
2086 t = peek ();
2f1a1aea 2087 }
56099f08
FCE
2088
2089 return op1;
2f1a1aea
FCE
2090}
2091
2092
2093expression*
2094parser::parse_logical_and ()
2095{
bb2e3076 2096 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
2097
2098 const token* t = peek ();
56099f08 2099 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 2100 {
2f1a1aea
FCE
2101 logical_and_expr *e = new logical_and_expr;
2102 e->left = op1;
56099f08
FCE
2103 e->op = t->content;
2104 e->tok = t;
2105 next ();
bb2e3076
FCE
2106 e->right = parse_boolean_or ();
2107 op1 = e;
2108 t = peek ();
2109 }
2110
2111 return op1;
2112}
2113
2114
2115expression*
2116parser::parse_boolean_or ()
2117{
2118 expression* op1 = parse_boolean_xor ();
2119
2120 const token* t = peek ();
2121 while (t && t->type == tok_operator && t->content == "|")
2122 {
2123 binary_expression* e = new binary_expression;
2124 e->left = op1;
2125 e->op = t->content;
2126 e->tok = t;
2127 next ();
2128 e->right = parse_boolean_xor ();
2129 op1 = e;
2130 t = peek ();
2131 }
2132
2133 return op1;
2134}
2135
2136
2137expression*
2138parser::parse_boolean_xor ()
2139{
2140 expression* op1 = parse_boolean_and ();
2141
2142 const token* t = peek ();
2143 while (t && t->type == tok_operator && t->content == "^")
2144 {
2145 binary_expression* e = new binary_expression;
2146 e->left = op1;
2147 e->op = t->content;
2148 e->tok = t;
2149 next ();
2150 e->right = parse_boolean_and ();
2151 op1 = e;
2152 t = peek ();
2153 }
2154
2155 return op1;
2156}
2157
2158
2159expression*
2160parser::parse_boolean_and ()
2161{
2162 expression* op1 = parse_array_in ();
2163
2164 const token* t = peek ();
2165 while (t && t->type == tok_operator && t->content == "&")
2166 {
2167 binary_expression* e = new binary_expression;
2168 e->left = op1;
2169 e->op = t->content;
2170 e->tok = t;
2171 next ();
56099f08
FCE
2172 e->right = parse_array_in ();
2173 op1 = e;
2174 t = peek ();
2f1a1aea 2175 }
56099f08
FCE
2176
2177 return op1;
2f1a1aea
FCE
2178}
2179
2180
2181expression*
2182parser::parse_array_in ()
2183{
ce10591c 2184 // This is a very tricky case. All these are legit expressions:
69c68955 2185 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
2186 vector<expression*> indexes;
2187 bool parenthesized = false;
2f1a1aea
FCE
2188
2189 const token* t = peek ();
69c68955 2190 if (t && t->type == tok_operator && t->content == "[")
ce10591c
FCE
2191 {
2192 next ();
2193 parenthesized = true;
2194 }
2195
2196 while (1)
2197 {
2198 expression* op1 = parse_comparison ();
2199 indexes.push_back (op1);
2200
2201 if (parenthesized)
2202 {
2203 const token* t = peek ();
2204 if (t && t->type == tok_operator && t->content == ",")
2205 {
2206 next ();
2207 continue;
2208 }
69c68955 2209 else if (t && t->type == tok_operator && t->content == "]")
ce10591c
FCE
2210 {
2211 next ();
2212 break;
2213 }
dff50e09 2214 else
69c68955 2215 throw parse_error ("expected ',' or ']'");
ce10591c
FCE
2216 }
2217 else
2218 break; // expecting only one expression
2219 }
2220
2221 t = peek ();
6e213f58 2222 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 2223 {
2f1a1aea 2224 array_in *e = new array_in;
56099f08 2225 e->tok = t;
ce10591c
FCE
2226 next (); // swallow "in"
2227
2228 arrayindex* a = new arrayindex;
2229 a->indexes = indexes;
d02548c0
GH
2230 a->base = parse_indexable();
2231 a->tok = a->base->get_tok();
ce10591c 2232 e->operand = a;
2f1a1aea
FCE
2233 return e;
2234 }
ce10591c
FCE
2235 else if (indexes.size() == 1) // no "in" - need one expression only
2236 return indexes[0];
2f1a1aea 2237 else
ce10591c 2238 throw parse_error ("unexpected comma-separated expression list");
2f1a1aea
FCE
2239}
2240
2241
2242expression*
2243parser::parse_comparison ()
2244{
bb2e3076 2245 expression* op1 = parse_shift ();
2f1a1aea
FCE
2246
2247 const token* t = peek ();
dff50e09 2248 while (t && t->type == tok_operator
553d27a5
FCE
2249 && (t->content == ">" ||
2250 t->content == "<" ||
2251 t->content == "==" ||
2252 t->content == "!=" ||
2253 t->content == "<=" ||
bb2e3076 2254 t->content == ">="))
2f1a1aea
FCE
2255 {
2256 comparison* e = new comparison;
2257 e->left = op1;
2258 e->op = t->content;
56099f08 2259 e->tok = t;
2f1a1aea 2260 next ();
bb2e3076
FCE
2261 e->right = parse_shift ();
2262 op1 = e;
2263 t = peek ();
2264 }
2265
2266 return op1;
2267}
2268
2269
2270expression*
2271parser::parse_shift ()
2272{
2273 expression* op1 = parse_concatenation ();
2274
2275 const token* t = peek ();
dff50e09 2276 while (t && t->type == tok_operator &&
bb2e3076
FCE
2277 (t->content == "<<" || t->content == ">>"))
2278 {
2279 binary_expression* e = new binary_expression;
2280 e->left = op1;
2281 e->op = t->content;
2282 e->tok = t;
2283 next ();
56099f08
FCE
2284 e->right = parse_concatenation ();
2285 op1 = e;
2286 t = peek ();
2f1a1aea 2287 }
56099f08
FCE
2288
2289 return op1;
2f1a1aea
FCE
2290}
2291
2292
2293expression*
2294parser::parse_concatenation ()
2295{
2296 expression* op1 = parse_additive ();
2297
2298 const token* t = peek ();
2299 // XXX: the actual awk string-concatenation operator is *whitespace*.
2300 // I don't know how to easily to model that here.
56099f08 2301 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
2302 {
2303 concatenation* e = new concatenation;
2304 e->left = op1;
2305 e->op = t->content;
56099f08 2306 e->tok = t;
2f1a1aea 2307 next ();
56099f08
FCE
2308 e->right = parse_additive ();
2309 op1 = e;
2310 t = peek ();
2f1a1aea 2311 }
56099f08
FCE
2312
2313 return op1;
2f1a1aea
FCE
2314}
2315
2316
2317expression*
2318parser::parse_additive ()
2319{
2320 expression* op1 = parse_multiplicative ();
2321
2322 const token* t = peek ();
dff50e09 2323 while (t && t->type == tok_operator
2f1a1aea
FCE
2324 && (t->content == "+" || t->content == "-"))
2325 {
2326 binary_expression* e = new binary_expression;
2327 e->op = t->content;
2328 e->left = op1;
56099f08 2329 e->tok = t;
2f1a1aea 2330 next ();
56099f08
FCE
2331 e->right = parse_multiplicative ();
2332 op1 = e;
2333 t = peek ();
2f1a1aea 2334 }
56099f08
FCE
2335
2336 return op1;
2f1a1aea
FCE
2337}
2338
2339
2340expression*
2341parser::parse_multiplicative ()
2342{
2343 expression* op1 = parse_unary ();
2344
2345 const token* t = peek ();
dff50e09 2346 while (t && t->type == tok_operator
2f1a1aea
FCE
2347 && (t->content == "*" || t->content == "/" || t->content == "%"))
2348 {
2349 binary_expression* e = new binary_expression;
2350 e->op = t->content;
2351 e->left = op1;
56099f08 2352 e->tok = t;
2f1a1aea 2353 next ();
56099f08
FCE
2354 e->right = parse_unary ();
2355 op1 = e;
2356 t = peek ();
2f1a1aea 2357 }
56099f08
FCE
2358
2359 return op1;
2f1a1aea
FCE
2360}
2361
2362
2363expression*
2364parser::parse_unary ()
2365{
2366 const token* t = peek ();
dff50e09
FCE
2367 if (t && t->type == tok_operator
2368 && (t->content == "+" ||
2369 t->content == "-" ||
bb2e3076
FCE
2370 t->content == "!" ||
2371 t->content == "~" ||
2372 false))
2f1a1aea
FCE
2373 {
2374 unary_expression* e = new unary_expression;
2375 e->op = t->content;
56099f08 2376 e->tok = t;
2f1a1aea 2377 next ();
1cb79a72 2378 e->operand = parse_unary ();
2f1a1aea
FCE
2379 return e;
2380 }
2381 else
bb2e3076 2382 return parse_crement ();
2f1a1aea
FCE
2383}
2384
2385
2386expression*
2387parser::parse_crement () // as in "increment" / "decrement"
2388{
cbfbbf69
FCE
2389 // NB: Ideally, we'd parse only a symbol as an operand to the
2390 // *crement operators, instead of a general expression value. We'd
2391 // need more complex lookahead code to tell apart the postfix cases.
2392 // So we just punt, and leave it to pass-3 to signal errors on
2393 // cases like "4++".
2394
2f1a1aea 2395 const token* t = peek ();
dff50e09 2396 if (t && t->type == tok_operator
2f1a1aea
FCE
2397 && (t->content == "++" || t->content == "--"))
2398 {
2399 pre_crement* e = new pre_crement;
2400 e->op = t->content;
56099f08 2401 e->tok = t;
2f1a1aea
FCE
2402 next ();
2403 e->operand = parse_value ();
2404 return e;
2405 }
2406
2407 // post-crement or non-crement
2408 expression *op1 = parse_value ();
dff50e09 2409
2f1a1aea 2410 t = peek ();
dff50e09 2411 if (t && t->type == tok_operator
2f1a1aea
FCE
2412 && (t->content == "++" || t->content == "--"))
2413 {
2414 post_crement* e = new post_crement;
2415 e->op = t->content;
56099f08 2416 e->tok = t;
2f1a1aea
FCE
2417 next ();
2418 e->operand = op1;
2419 return e;
2420 }
2421 else
2422 return op1;
2423}
2424
2425
2426expression*
2427parser::parse_value ()
2428{
2429 const token* t = peek ();
2430 if (! t)
2431 throw parse_error ("expected value");
2432
2433 if (t->type == tok_operator && t->content == "(")
2434 {
2435 next ();
2436 expression* e = parse_expression ();
2437 t = next ();
2438 if (! (t->type == tok_operator && t->content == ")"))
2439 throw parse_error ("expected ')'");
2440 return e;
2441 }
03c75a4a
JS
2442 else if (t->type == tok_operator && t->content == "&")
2443 {
2444 next ();
30263a73
FCE
2445 t = next ();
2446 target_symbol *ts = parse_target_symbol (t);
03c75a4a
JS
2447 ts->addressof = true;
2448 return ts;
2449 }
2f1a1aea
FCE
2450 else if (t->type == tok_identifier)
2451 return parse_symbol ();
2452 else
2453 return parse_literal ();
2454}
2455
2456
d02548c0
GH
2457const token *
2458parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
2459{
2460 hop = NULL;
2461 const token* t = expect_ident (name);
2462 if (name == "@hist_linear" || name == "@hist_log")
2463 {
2464 hop = new hist_op;
2465 if (name == "@hist_linear")
2466 hop->htype = hist_linear;
2467 else if (name == "@hist_log")
2468 hop->htype = hist_log;
2469 hop->tok = t;
2470 expect_op("(");
2471 hop->stat = parse_expression ();
2472 int64_t tnum;
2473 if (hop->htype == hist_linear)
2474 {
2475 for (size_t i = 0; i < 3; ++i)
2476 {
2477 expect_op (",");
2478 expect_number (tnum);
2479 hop->params.push_back (tnum);
2480 }
2481 }
d02548c0
GH
2482 expect_op(")");
2483 }
2484 return t;
2485}
2486
2487
2488indexable*
2489parser::parse_indexable ()
2490{
2491 hist_op *hop = NULL;
2492 string name;
2493 const token *tok = parse_hist_op_or_bare_name(hop, name);
2494 if (hop)
2495 return hop;
2496 else
2497 {
2498 symbol* sym = new symbol;
2499 sym->name = name;
2500 sym->tok = tok;
2501 return sym;
2502 }
2503}
2504
2505
2506// var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
30263a73 2507expression* parser::parse_symbol ()
2f1a1aea 2508{
d02548c0
GH
2509 hist_op *hop = NULL;
2510 symbol *sym = NULL;
d7f3e0c5 2511 string name;
d02548c0
GH
2512 const token *t = parse_hist_op_or_bare_name(hop, name);
2513
2514 if (!hop)
0fefb486 2515 {
dff50e09 2516 // If we didn't get a hist_op, then we did get an identifier. We can
d02548c0
GH
2517 // now scrutinize this identifier for the various magic forms of identifier
2518 // (printf, @stat_op, and $var...)
2519
30263a73
FCE
2520 if (name == "@cast" || (name.size()>0 && name[0] == '$'))
2521 return parse_target_symbol (t);
9b5af295 2522
db135493
FCE
2523 // NB: PR11343: @defined() is not incompatible with earlier versions
2524 // of stap, so no need to check session.compatible for 1.2
30263a73
FCE
2525 if (name == "@defined")
2526 return parse_defined_op (t);
2527
9b5af295 2528 else if (name.size() > 0 && name[0] == '@')
d7f3e0c5 2529 {
d02548c0
GH
2530 stat_op *sop = new stat_op;
2531 if (name == "@avg")
2532 sop->ctype = sc_average;
2533 else if (name == "@count")
2534 sop->ctype = sc_count;
2535 else if (name == "@sum")
2536 sop->ctype = sc_sum;
2537 else if (name == "@min")
2538 sop->ctype = sc_min;
2539 else if (name == "@max")
2540 sop->ctype = sc_max;
2541 else
2542 throw parse_error("unknown statistic operator " + name);
2543 expect_op("(");
2544 sop->tok = t;
2545 sop->stat = parse_expression ();
2546 expect_op(")");
2547 return sop;
2548 }
dff50e09 2549
d5e178c1 2550 else if (print_format *fmt = print_format::create(t))
d02548c0 2551 {
d02548c0 2552 expect_op("(");
b15c465c
PP
2553 if ((name == "print" || name == "println" ||
2554 name == "sprint" || name == "sprintln") &&
3cb17058 2555 (peek_kw("@hist_linear") || peek_kw("@hist_log")))
a4636912
GH
2556 {
2557 // We have a special case where we recognize
2558 // print(@hist_foo(bar)) as a magic print-the-histogram
2559 // construct. This is sort of gross but it avoids
2560 // promoting histogram references to typeful
2561 // expressions.
dff50e09 2562
1bbeef03
GH
2563 hop = NULL;
2564 t = parse_hist_op_or_bare_name(hop, name);
2565 assert(hop);
dff50e09 2566
1bbeef03
GH
2567 // It is, sadly, possible that even while parsing a
2568 // hist_op, we *mis-guessed* and the user wishes to
2569 // print(@hist_op(foo)[bucket]), a scalar. In that case
2570 // we must parse the arrayindex and print an expression.
dff50e09 2571
1bbeef03
GH
2572 if (!peek_op ("["))
2573 fmt->hist = hop;
2574 else
2575 {
2576 // This is simplified version of the
2577 // multi-array-index parser below, because we can
2578 // only ever have one index on a histogram anyways.
2579 expect_op("[");
2580 struct arrayindex* ai = new arrayindex;
2581 ai->tok = t;
2582 ai->base = hop;
2583 ai->indexes.push_back (parse_expression ());
2584 expect_op("]");
2585 fmt->args.push_back(ai);
2586 }
a4636912 2587 }
d7f3e0c5 2588 else
d02548c0 2589 {
3cb17058
JS
2590 int min_args = 0;
2591 if (fmt->print_with_format)
2592 {
2593 // Consume and convert a format string. Agreement between the
2594 // format string and the arguments is postponed to the
2595 // typechecking phase.
2596 string tmp;
2597 expect_unknown (tok_string, tmp);
2598 fmt->raw_components = tmp;
2599 fmt->components = print_format::string_to_components (tmp);
2600 }
2601 else if (fmt->print_with_delim)
2602 {
2603 // Consume a delimiter to separate arguments.
2604 fmt->delimiter.clear();
2605 fmt->delimiter.type = print_format::conv_literal;
2606 expect_unknown (tok_string, fmt->delimiter.literal_string);
2607 min_args = 2;
2608 }
2609 else
2610 {
2611 // If we are not printing with a format string, we must have
2612 // at least one argument (of any type).
2613 expression *e = parse_expression ();
2614 fmt->args.push_back(e);
2615 }
2616
2617 // Consume any subsequent arguments.
2618 while (min_args || !peek_op (")"))
2619 {
2620 expect_op(",");
2621 expression *e = parse_expression ();
2622 fmt->args.push_back(e);
2623 if (min_args)
2624 --min_args;
2625 }
d02548c0
GH
2626 }
2627 expect_op(")");
2628 return fmt;
2629 }
dff50e09 2630
d02548c0
GH
2631 else if (peek_op ("(")) // function call
2632 {
2633 next ();
2634 struct functioncall* f = new functioncall;
2635 f->tok = t;
2636 f->function = name;
2637 // Allow empty actual parameter list
2638 if (peek_op (")"))
2639 {
2640 next ();
2641 return f;
2642 }
2643 while (1)
2644 {
2645 f->args.push_back (parse_expression ());
2646 if (peek_op (")"))
2647 {
2648 next();
2649 break;
2650 }
2651 else if (peek_op (","))
2652 {
2653 next();
2654 continue;
2655 }
2656 else
2657 throw parse_error ("expected ',' or ')'");
2658 }
2659 return f;
2660 }
2661
2662 else
2663 {
2664 sym = new symbol;
2665 sym->name = name;
2666 sym->tok = t;
d7f3e0c5 2667 }
0fefb486 2668 }
dff50e09
FCE
2669
2670 // By now, either we had a hist_op in the first place, or else
d02548c0
GH
2671 // we had a plain word and it was converted to a symbol.
2672
70c743d8 2673 assert (!hop != !sym); // logical XOR
d02548c0
GH
2674
2675 // All that remains is to check for array indexing
2676
d7f3e0c5 2677 if (peek_op ("[")) // array
2f1a1aea
FCE
2678 {
2679 next ();
2680 struct arrayindex* ai = new arrayindex;
d02548c0
GH
2681 ai->tok = t;
2682
2683 if (hop)
2684 ai->base = hop;
2685 else
2686 ai->base = sym;
2687
2f1a1aea
FCE
2688 while (1)
2689 {
2690 ai->indexes.push_back (parse_expression ());
d7f3e0c5 2691 if (peek_op ("]"))
dff50e09
FCE
2692 {
2693 next();
2694 break;
d7f3e0c5
GH
2695 }
2696 else if (peek_op (","))
2697 {
2698 next();
2699 continue;
2700 }
2f1a1aea
FCE
2701 else
2702 throw parse_error ("expected ',' or ']'");
2703 }
2704 return ai;
2705 }
d02548c0
GH
2706
2707 // If we got to here, we *should* have a symbol; if we have
2708 // a hist_op on its own, it doesn't count as an expression,
2709 // so we throw a parse error.
2710
2711 if (hop)
2712 throw parse_error("base histogram operator where expression expected", t);
dff50e09
FCE
2713
2714 return sym;
2f1a1aea 2715}
56099f08 2716
81931eab 2717
30263a73
FCE
2718// Parse a @cast or $var. Given head token has already been consumed.
2719target_symbol* parser::parse_target_symbol (const token* t)
2720{
2721 if (t->type == tok_identifier && t->content == "@cast")
2722 {
2723 cast_op *cop = new cast_op;
2724 cop->tok = t;
2725 cop->base_name = t->content;
2726 expect_op("(");
2727 cop->operand = parse_expression ();
2728 expect_op(",");
2729 expect_unknown(tok_string, cop->type);
2730 // types never start with "struct<space>" or "union<space>",
2731 // so gobble it up.
60d98537 2732 if (startswith(cop->type, "struct "))
30263a73 2733 cop->type = cop->type.substr(7);
60d98537 2734 if (startswith(cop->type, "union "))
30263a73
FCE
2735 cop->type = cop->type.substr(6);
2736 if (peek_op (","))
2737 {
2738 next();
2739 expect_unknown(tok_string, cop->module);
2740 }
2741 expect_op(")");
2742 parse_target_symbol_components(cop);
2743 return cop;
2744 }
2745
2746 if (t->type == tok_identifier && t->content[0]=='$')
2747 {
2748 // target_symbol time
2749 target_symbol *tsym = new target_symbol;
2750 tsym->tok = t;
2751 tsym->base_name = t->content;
2752 parse_target_symbol_components(tsym);
2753 return tsym;
2754 }
2755
2756 throw parse_error ("expected @cast or $var");
2757}
2758
2759
2760// Parse a @defined(). Given head token has already been consumed.
2761expression* parser::parse_defined_op (const token* t)
2762{
2763 defined_op* dop = new defined_op;
2764 dop->tok = t;
2765 expect_op("(");
2766 string nm;
2767 // no need for parse_hist_op... etc., as @defined takes only target_symbols as its operand.
2768 const token* tt = expect_ident (nm);
2769 dop->operand = parse_target_symbol (tt);
2770 expect_op(")");
2771 return dop;
2772}
2773
2774
2775
81931eab
JS
2776void
2777parser::parse_target_symbol_components (target_symbol* e)
2778{
2779 while (true)
2780 {
81931eab
JS
2781 if (peek_op ("->"))
2782 {
c67847a0
JS
2783 const token* t = next();
2784 string member;
2785 expect_ident_or_keyword (member);
2786 e->components.push_back (target_symbol::component(t, member));
81931eab
JS
2787 }
2788 else if (peek_op ("["))
2789 {
c67847a0 2790 const token* t = next();
6fda2dff
JS
2791 expression* index = parse_expression();
2792 literal_number* ln = dynamic_cast<literal_number*>(index);
2793 if (ln)
2794 e->components.push_back (target_symbol::component(t, ln->value));
2795 else
2796 e->components.push_back (target_symbol::component(t, index));
81931eab 2797 expect_op ("]");
81931eab
JS
2798 }
2799 else
2800 break;
2801 }
2802}
2803
73267b89 2804/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.424274 seconds and 5 git commands to generate.