]> sourceware.org Git - systemtap.git/blame - parse.cxx
Allow CONFIG_foo COMPARISON-OP number in preprocessor conditionals.
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
482fe2af 2// Copyright (C) 2005-2009 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
5811366a 4// Copyright (C) 2007 Bull S.A.S
69c68955
FCE
5//
6// This file is part of systemtap, and is free software. You can
7// redistribute it and/or modify it under the terms of the GNU General
8// Public License (GPL); either version 2, or (at your option) any
9// later version.
2f1a1aea 10
2b066ec1 11#include "config.h"
2f1a1aea
FCE
12#include "staptree.h"
13#include "parse.h"
177a8ead 14#include "session.h"
3f99432c
FCE
15#include "util.h"
16
2b066ec1 17#include <iostream>
eacb10ce 18
2b066ec1 19#include <fstream>
2f1a1aea 20#include <cctype>
9c0c0e46 21#include <cstdlib>
29e64872 22#include <cassert>
9c0c0e46
FCE
23#include <cerrno>
24#include <climits>
57b73400 25#include <sstream>
f74fb737 26#include <cstring>
3f99432c 27#include <cctype>
eacb10ce
FCE
28#include <iterator>
29
7a468d68
FCE
30extern "C" {
31#include <fnmatch.h>
32}
2f1a1aea
FCE
33
34using namespace std;
35
36// ------------------------------------------------------------------------
37
bb2e3076
FCE
38
39
177a8ead
FCE
40parser::parser (systemtap_session& s, istream& i, bool p):
41 session (s),
24cb178f 42 input_name ("<input>"), free_input (0),
213bee8f 43 input (i, input_name, s), privileged (p),
6e213f58 44 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
45{ }
46
177a8ead
FCE
47parser::parser (systemtap_session& s, const string& fn, bool p):
48 session (s),
2f1a1aea 49 input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
213bee8f 50 input (* free_input, input_name, s), privileged (p),
6e213f58 51 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
52{ }
53
54parser::~parser()
55{
56 if (free_input) delete free_input;
57}
58
59
82919855 60stapfile*
177a8ead 61parser::parse (systemtap_session& s, std::istream& i, bool pr)
82919855 62{
177a8ead 63 parser p (s, i, pr);
82919855
FCE
64 return p.parse ();
65}
66
67
68stapfile*
177a8ead 69parser::parse (systemtap_session& s, const std::string& n, bool pr)
82919855 70{
177a8ead 71 parser p (s, n, pr);
82919855
FCE
72 return p.parse ();
73}
74
d7f3e0c5
GH
75static string
76tt2str(token_type tt)
77{
78 switch (tt)
79 {
80 case tok_junk: return "junk";
81 case tok_identifier: return "identifier";
82 case tok_operator: return "operator";
83 case tok_string: return "string";
84 case tok_number: return "number";
85 case tok_embedded: return "embedded-code";
6e213f58 86 case tok_keyword: return "keyword";
d7f3e0c5
GH
87 }
88 return "unknown token";
89}
82919855 90
0323ed4d
WC
91ostream&
92operator << (ostream& o, const source_loc& loc)
93{
a704a23b 94 o << loc.file->name << ":"
0323ed4d
WC
95 << loc.line << ":"
96 << loc.column;
97
98 return o;
99}
100
56099f08
FCE
101ostream&
102operator << (ostream& o, const token& t)
103{
d7f3e0c5 104 o << tt2str(t.type);
56099f08 105
6e213f58 106 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 107 {
24cb178f
FCE
108 o << " '";
109 for (unsigned i=0; i<t.content.length(); i++)
110 {
111 char c = t.content[i];
112 o << (isprint (c) ? c : '?');
113 }
114 o << "'";
56099f08 115 }
56099f08 116
dff50e09 117 o << " at "
0323ed4d 118 << t.location;
56099f08
FCE
119
120 return o;
121}
122
123
dff50e09 124void
2f1a1aea
FCE
125parser::print_error (const parse_error &pe)
126{
1b1b4ceb 127 string align_parse_error (" ");
2f1a1aea
FCE
128 cerr << "parse error: " << pe.what () << endl;
129
177a8ead
FCE
130 if (pe.tok)
131 {
132 cerr << "\tat: " << *pe.tok << endl;
1b1b4ceb 133 session.print_error_source (cerr, align_parse_error, pe.tok);
177a8ead 134 }
2f1a1aea 135 else
177a8ead
FCE
136 {
137 const token* t = last_t;
138 if (t)
1b1b4ceb
RA
139 {
140 cerr << "\tsaw: " << *t << endl;
141 session.print_error_source (cerr, align_parse_error, t);
142 }
177a8ead
FCE
143 else
144 cerr << "\tsaw: " << input_name << " EOF" << endl;
145 }
2f1a1aea
FCE
146
147 // XXX: make it possible to print the last input line,
148 // so as to line up an arrow with the specific error column
149
150 num_errors ++;
151}
152
153
dff50e09 154const token*
2f1a1aea
FCE
155parser::last ()
156{
157 return last_t;
158}
159
160
c434ec7e
FCE
161
162template <typename OPERAND>
163bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
164{
165 if (op->type == tok_operator && op->content == "<=")
166 { return lhs <= rhs; }
167 else if (op->type == tok_operator && op->content == ">=")
168 { return lhs >= rhs; }
169 else if (op->type == tok_operator && op->content == "<")
170 { return lhs < rhs; }
171 else if (op->type == tok_operator && op->content == ">")
172 { return lhs > rhs; }
173 else if (op->type == tok_operator && op->content == "==")
174 { return lhs == rhs; }
175 else if (op->type == tok_operator && op->content == "!=")
176 { return lhs != rhs; }
177 else
178 throw parse_error ("expected comparison operator", op);
179}
180
181
177a8ead
FCE
182// Here, we perform on-the-fly preprocessing.
183// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
184// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
185// or: arch COMPARISON-OP "arch-string"
561079c8 186// or: CONFIG_foo COMPARISON-OP "config-string"
717a457b 187// or: CONFIG_foo COMPARISON-OP number
5811366a
FCE
188// or: "string1" COMPARISON-OP "string2"
189// or: number1 COMPARISON-OP number2
44ce8ed5 190// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
191//
192// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
c434ec7e 193// e.g. %( arch != "i?86" %? "foo" %: "baz" %)
561079c8 194// e.g. %( CONFIG_foo %? "foo" %: "baz" %)
177a8ead
FCE
195//
196// Up to an entire %( ... %) expression is processed by a single call
197// to this function. Tokens included by any nested conditions are
198// enqueued in a private vector.
199
200bool eval_pp_conditional (systemtap_session& s,
201 const token* l, const token* op, const token* r)
202{
44ce8ed5
FCE
203 if (l->type == tok_identifier && (l->content == "kernel_v" ||
204 l->content == "kernel_vr"))
205 {
206 string target_kernel_vr = s.kernel_release;
197a4d62 207 string target_kernel_v = s.kernel_base_release;
dff50e09 208
44ce8ed5
FCE
209 if (! (r->type == tok_string))
210 throw parse_error ("expected string literal", r);
7a468d68 211
dff50e09 212 string target = (l->content == "kernel_vr" ?
7a468d68
FCE
213 target_kernel_vr.c_str() :
214 target_kernel_v.c_str());
215 string query = r->content;
216 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
217
44ce8ed5
FCE
218 // collect acceptable strverscmp results.
219 int rvc_ok1, rvc_ok2;
7a468d68 220 bool wc_ok = false;
44ce8ed5
FCE
221 if (op->type == tok_operator && op->content == "<=")
222 { rvc_ok1 = -1; rvc_ok2 = 0; }
223 else if (op->type == tok_operator && op->content == ">=")
224 { rvc_ok1 = 1; rvc_ok2 = 0; }
225 else if (op->type == tok_operator && op->content == "<")
226 { rvc_ok1 = -1; rvc_ok2 = -1; }
227 else if (op->type == tok_operator && op->content == ">")
228 { rvc_ok1 = 1; rvc_ok2 = 1; }
229 else if (op->type == tok_operator && op->content == "==")
7a468d68 230 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
44ce8ed5 231 else if (op->type == tok_operator && op->content == "!=")
7a468d68 232 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
44ce8ed5
FCE
233 else
234 throw parse_error ("expected comparison operator", op);
7a468d68
FCE
235
236 if ((!wc_ok) && rhs_wildcard)
237 throw parse_error ("wildcard not allowed with order comparison operators", op);
238
239 if (rhs_wildcard)
240 {
241 int rvc_result = fnmatch (query.c_str(), target.c_str(),
242 FNM_NOESCAPE); // spooky
243 bool badness = (rvc_result == 0) ^ (op->content == "==");
244 return !badness;
245 }
246 else
247 {
248 int rvc_result = strverscmp (target.c_str(), query.c_str());
249 // normalize rvc_result
250 if (rvc_result < 0) rvc_result = -1;
251 if (rvc_result > 0) rvc_result = 1;
252 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
253 }
44ce8ed5
FCE
254 }
255 else if (l->type == tok_identifier && l->content == "arch")
256 {
257 string target_architecture = s.architecture;
258 if (! (r->type == tok_string))
259 throw parse_error ("expected string literal", r);
260 string query_architecture = r->content;
dff50e09 261
7a468d68
FCE
262 int nomatch = fnmatch (query_architecture.c_str(),
263 target_architecture.c_str(),
264 FNM_NOESCAPE); // still spooky
265
561079c8
FCE
266 bool result;
267 if (op->type == tok_operator && op->content == "==")
268 result = !nomatch;
269 else if (op->type == tok_operator && op->content == "!=")
270 result = nomatch;
271 else
272 throw parse_error ("expected '==' or '!='", op);
273
274 return result;
275 }
717a457b 276 else if (l->type == tok_identifier && l->content.substr(0,7) == "CONFIG_")
561079c8 277 {
717a457b
MW
278 if (r->type == tok_string)
279 {
280 string lhs = s.kernel_config[l->content]; // may be empty
281 string rhs = r->content;
561079c8 282
717a457b 283 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
561079c8 284
717a457b
MW
285 bool result;
286 if (op->type == tok_operator && op->content == "==")
287 result = !nomatch;
288 else if (op->type == tok_operator && op->content == "!=")
289 result = nomatch;
290 else
291 throw parse_error ("expected '==' or '!='", op);
dff50e09 292
717a457b
MW
293 return result;
294 }
295 else if (r->type == tok_number)
296 {
297 const char* startp = s.kernel_config[l->content].c_str ();
298 char* endp = (char*) startp;
299 errno = 0;
300 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
301 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
302 throw parse_error ("Config option value not a number", l);
303
304 int64_t rhs = lex_cast<int64_t>(r->content);
305 return eval_comparison (lhs, op, rhs);
306 }
307 else
308 throw parse_error ("expected string or number literal as right value",
309 r);
dff50e09 310 }
c434ec7e 311 else if (l->type == tok_string && r->type == tok_string)
5811366a 312 {
c434ec7e
FCE
313 string lhs = l->content;
314 string rhs = r->content;
315 return eval_comparison (lhs, op, rhs);
316 // NB: no wildcarding option here
317 }
318 else if (l->type == tok_number && r->type == tok_number)
319 {
320 int64_t lhs = lex_cast<int64_t>(l->content);
321 int64_t rhs = lex_cast<int64_t>(r->content);
322 return eval_comparison (lhs, op, rhs);
7a468d68 323 // NB: no wildcarding option here
5811366a
FCE
324 }
325 else if (l->type == tok_string && r->type == tok_number
326 && op->type == tok_operator)
327 throw parse_error ("expected string literal as right value", r);
328 else if (l->type == tok_number && r->type == tok_string
329 && op->type == tok_operator)
330 throw parse_error ("expected number literal as right value", r);
c434ec7e 331
177a8ead 332 else
561079c8 333 throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr' or 'CONFIG_...'\n"
5811366a 334 " or comparison between strings or integers", l);
177a8ead
FCE
335}
336
337
5811366a 338// Only tokens corresponding to the TRUE statement must be expanded
177a8ead 339const token*
3f847830 340parser::scan_pp (bool wildcard)
177a8ead
FCE
341{
342 while (true)
343 {
344 if (enqueued_pp.size() > 0)
345 {
346 const token* t = enqueued_pp[0];
347 enqueued_pp.erase (enqueued_pp.begin());
348 return t;
349 }
350
3f847830 351 const token* t = input.scan (wildcard); // NB: not recursive!
177a8ead
FCE
352 if (t == 0) // EOF
353 return t;
dff50e09 354
177a8ead
FCE
355 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
356 return t;
357
358 // We have a %( - it's time to throw a preprocessing party!
359
2d7881bf
PP
360 bool result = false;
361 bool and_result = true;
362 const token *n = NULL;
363 do {
364 const token *l, *op, *r;
365 l = input.scan (false); // NB: not recursive, though perhaps could be
366 op = input.scan (false);
367 r = input.scan (false);
368 if (l == 0 || op == 0 || r == 0)
369 throw parse_error ("incomplete condition after '%('", t);
370 // NB: consider generalizing to consume all tokens until %?, and
371 // passing that as a vector to an evaluator.
372
373 // Do not evaluate the condition if we haven't expanded everything.
374 // This may occur when having several recursive conditionals.
375 and_result &= eval_pp_conditional (session, l, op, r);
376 delete l;
377 delete op;
378 delete r;
379 delete n;
380
381 n = input.scan ();
382 if (n && n->type == tok_operator && n->content == "&&")
383 continue;
384 result |= and_result;
385 and_result = true;
386 if (! (n && n->type == tok_operator && n->content == "||"))
387 break;
388 } while (true);
3f847830
FCE
389
390 /*
391 clog << "PP eval (" << *t << ") == " << result << endl;
392 */
393
2d7881bf 394 const token *m = n; // NB: not recursive
177a8ead
FCE
395 if (! (m && m->type == tok_operator && m->content == "%?"))
396 throw parse_error ("expected '%?' marker for conditional", t);
70c743d8 397 delete m; // "%?"
177a8ead
FCE
398
399 vector<const token*> my_enqueued_pp;
3f847830
FCE
400
401 int nesting = 0;
c28668ea 402 int then = 0;
177a8ead
FCE
403 while (true) // consume THEN tokens
404 {
3f847830
FCE
405 try
406 {
407 m = result ? scan_pp (wildcard) : input.scan (wildcard);
408 }
409 catch (const parse_error &e)
410 {
dff50e09 411 if (result) throw e; // propagate errors if THEN branch taken
d57671d3 412 continue;
3f847830
FCE
413 }
414
415 if (m && m->type == tok_operator && m->content == "%(") // nested %(
416 nesting ++;
c28668ea
WH
417 if (m && m->type == tok_operator && m->content == "%?") {
418 then ++;
419 if (nesting != then)
420 throw parse_error ("incomplete conditional - missing '%('", m);
421 }
3f847830
FCE
422 if (nesting == 0 && m && (m->type == tok_operator && (m->content == "%:" || // ELSE
423 m->content == "%)"))) // END
177a8ead 424 break;
c28668ea 425 if (nesting && m && m->type == tok_operator && m->content == "%)") { // nested %)
3f847830 426 nesting --;
c28668ea
WH
427 then --;
428 }
3f847830 429
d57671d3
FCE
430 if (!m)
431 throw parse_error ("incomplete conditional - missing '%:' or '%)'", t);
432 if (result)
177a8ead 433 my_enqueued_pp.push_back (m);
d57671d3 434 if (!result)
3f847830
FCE
435 delete m; // do nothing, just dispose of unkept THEN token
436
437 continue;
177a8ead 438 }
dff50e09 439
177a8ead 440 if (m && m->type == tok_operator && m->content == "%:") // ELSE
70c743d8
JS
441 {
442 delete m; // "%:"
3f847830 443 int nesting = 0;
c28668ea 444 int then = 0;
70c743d8
JS
445 while (true)
446 {
3f847830
FCE
447 try
448 {
449 m = result ? input.scan (wildcard) : scan_pp (wildcard);
dff50e09 450 }
3f847830
FCE
451 catch (const parse_error& e)
452 {
dff50e09 453 if (!result) throw e; // propagate errors if ELSE branch taken
d57671d3 454 continue;
3f847830
FCE
455 }
456
457 if (m && m->type == tok_operator && m->content == "%(") // nested %(
458 nesting ++;
c28668ea
WH
459 if (m && m->type == tok_operator && m->content == "%?") {
460 then ++;
461 if (nesting != then)
462 throw parse_error ("incomplete conditional - missing '%('", m);
463 }
3f847830 464 if (nesting == 0 && m && m->type == tok_operator && m->content == "%)") // END
70c743d8 465 break;
c28668ea 466 if (nesting && m && m->type == tok_operator && m->content == "%)") { // nested %)
3f847830 467 nesting --;
c28668ea
WH
468 then --;
469 }
3f847830 470
d57671d3 471 if (!m)
3f847830 472 throw parse_error ("incomplete conditional - missing %)", t);
d57671d3 473 if (!result)
dff50e09 474 my_enqueued_pp.push_back (m);
d57671d3 475 if (result)
3f847830
FCE
476 delete m; // do nothing, just dispose of unkept ELSE token
477
478 continue;
70c743d8
JS
479 }
480 }
3f847830
FCE
481
482 /*
483 clog << "PP eval (" << *t << ") == " << result << " tokens: " << endl;
484 for (unsigned k=0; k<my_enqueued_pp.size(); k++)
485 clog << * my_enqueued_pp[k] << endl;
486 clog << endl;
487 */
488
70c743d8
JS
489 delete t; // "%("
490 delete m; // "%)"
177a8ead 491
3f847830 492
177a8ead
FCE
493 // NB: we transcribe the retained tokens here, and not inside
494 // the THEN/ELSE while loops. If it were done there, each loop
495 // would become infinite (each iteration consuming an ordinary
496 // token the previous one just pushed there). Guess how I
497 // figured that out.
498 enqueued_pp.insert (enqueued_pp.end(),
499 my_enqueued_pp.begin(),
500 my_enqueued_pp.end());
501
502 // Go back to outermost while(true) loop. We hope that at least
503 // some THEN or ELSE tokens were enqueued. If not, around we go
504 // again, until EOF.
505 }
506}
507
508
2f1a1aea 509const token*
0c218afb 510parser::next (bool wildcard)
2f1a1aea
FCE
511{
512 if (! next_t)
0c218afb 513 next_t = scan_pp (wildcard);
2f1a1aea
FCE
514 if (! next_t)
515 throw parse_error ("unexpected end-of-file");
516
2f1a1aea
FCE
517 last_t = next_t;
518 // advance by zeroing next_t
519 next_t = 0;
520 return last_t;
521}
522
523
524const token*
0c218afb 525parser::peek (bool wildcard)
2f1a1aea
FCE
526{
527 if (! next_t)
0c218afb 528 next_t = scan_pp (wildcard);
2f1a1aea
FCE
529
530 // don't advance by zeroing next_t
531 last_t = next_t;
532 return next_t;
533}
534
535
d7f3e0c5
GH
536static inline bool
537tok_is(token const * t, token_type tt, string const & expected)
538{
539 return t && t->type == tt && t->content == expected;
540}
541
542
dff50e09 543const token*
d7f3e0c5
GH
544parser::expect_known (token_type tt, string const & expected)
545{
546 const token *t = next();
57b73400 547 if (! (t && t->type == tt && t->content == expected))
d7f3e0c5
GH
548 throw parse_error ("expected '" + expected + "'");
549 return t;
550}
551
552
dff50e09 553const token*
d7f3e0c5
GH
554parser::expect_unknown (token_type tt, string & target)
555{
556 const token *t = next();
557 if (!(t && t->type == tt))
558 throw parse_error ("expected " + tt2str(tt));
559 target = t->content;
560 return t;
561}
562
563
dff50e09 564const token*
493ee224
DS
565parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
566{
567 const token *t = next();
568 if (!(t && (t->type == tt1 || t->type == tt2)))
569 throw parse_error ("expected " + tt2str(tt1) + " or " + tt2str(tt2));
570 target = t->content;
571 return t;
572}
573
574
dff50e09 575const token*
d7f3e0c5
GH
576parser::expect_op (std::string const & expected)
577{
578 return expect_known (tok_operator, expected);
579}
580
581
dff50e09 582const token*
d7f3e0c5
GH
583parser::expect_kw (std::string const & expected)
584{
585 return expect_known (tok_identifier, expected);
586}
587
dff50e09 588const token*
e38723d2 589parser::expect_number (int64_t & value)
57b73400 590{
e38723d2
MH
591 bool neg = false;
592 const token *t = next();
593 if (t->type == tok_operator && t->content == "-")
594 {
595 neg = true;
596 t = next ();
597 }
598 if (!(t && t->type == tok_number))
599 throw parse_error ("expected number");
600
601 const char* startp = t->content.c_str ();
602 char* endp = (char*) startp;
603
604 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
605 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
606 // since the lexer only gives us positive digit strings, but we'll
607 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
608 errno = 0;
609 value = (int64_t) strtoull (startp, & endp, 0);
610 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
611 || (neg && (unsigned long long) value > 9223372036854775808ULL)
612 || (unsigned long long) value > 18446744073709551615ULL
613 || value < -9223372036854775807LL-1)
dff50e09
FCE
614 throw parse_error ("number invalid or out of range");
615
e38723d2
MH
616 if (neg)
617 value = -value;
618
619 return t;
57b73400
GH
620}
621
d7f3e0c5 622
dff50e09 623const token*
d7f3e0c5
GH
624parser::expect_ident (std::string & target)
625{
626 return expect_unknown (tok_identifier, target);
627}
628
629
dff50e09 630const token*
493ee224
DS
631parser::expect_ident_or_keyword (std::string & target)
632{
633 return expect_unknown2 (tok_identifier, tok_keyword, target);
634}
635
636
dff50e09 637bool
d7f3e0c5
GH
638parser::peek_op (std::string const & op)
639{
640 return tok_is (peek(), tok_operator, op);
641}
642
643
dff50e09 644bool
d7f3e0c5
GH
645parser::peek_kw (std::string const & kw)
646{
647 return tok_is (peek(), tok_identifier, kw);
648}
649
650
651
66c7d4c1 652lexer::lexer (istream& input, const string& in, systemtap_session& s):
2203b032 653 input_name (in), input_pointer (0), input_end (0),
9300f661
JS
654 cursor_suspend_count(0), cursor_suspend_line (1), cursor_suspend_column (1),
655 cursor_line (1), cursor_column (1),
66c7d4c1 656 session(s), current_file (0)
eacb10ce 657{
66c7d4c1 658 getline(input, input_contents, '\0');
2203b032 659
66c7d4c1
JS
660 input_pointer = input_contents.data();
661 input_end = input_contents.data() + input_contents.size();
662
663 if (keywords.empty())
664 {
665 keywords.insert("probe");
666 keywords.insert("global");
667 keywords.insert("function");
668 keywords.insert("if");
669 keywords.insert("else");
670 keywords.insert("for");
671 keywords.insert("foreach");
672 keywords.insert("in");
673 keywords.insert("limit");
674 keywords.insert("return");
675 keywords.insert("delete");
676 keywords.insert("while");
677 keywords.insert("break");
678 keywords.insert("continue");
679 keywords.insert("next");
680 keywords.insert("string");
681 keywords.insert("long");
682 }
eacb10ce 683}
2f1a1aea 684
66c7d4c1
JS
685set<string> lexer::keywords;
686
1b1b4ceb
RA
687void
688lexer::set_current_file (stapfile* f)
689{
690 current_file = f;
2203b032
JS
691 if (f)
692 {
693 f->file_contents = input_contents;
694 f->name = input_name;
695 }
1b1b4ceb 696}
bb2e3076
FCE
697
698int
699lexer::input_peek (unsigned n)
700{
66c7d4c1
JS
701 if (input_pointer + n >= input_end)
702 return -1; // EOF
703 return (unsigned char)*(input_pointer + n);
bb2e3076
FCE
704}
705
706
dff50e09 707int
2f1a1aea
FCE
708lexer::input_get ()
709{
66c7d4c1 710 int c = input_peek();
bb2e3076
FCE
711 if (c < 0) return c; // EOF
712
66c7d4c1
JS
713 ++input_pointer;
714
3f99432c 715 if (cursor_suspend_count)
9300f661
JS
716 {
717 // Track effect of input_put: preserve previous cursor/line_column
718 // until all of its characters are consumed.
719 if (--cursor_suspend_count == 0)
720 {
721 cursor_line = cursor_suspend_line;
722 cursor_column = cursor_suspend_column;
723 }
724 }
3f99432c 725 else
2f1a1aea 726 {
3f99432c
FCE
727 // update source cursor
728 if (c == '\n')
729 {
730 cursor_line ++;
731 cursor_column = 1;
732 }
733 else
734 cursor_column ++;
2f1a1aea 735 }
2f1a1aea 736
eacb10ce 737 // clog << "[" << (char)c << "]";
2f1a1aea
FCE
738 return c;
739}
740
741
3f99432c 742void
9300f661 743lexer::input_put (const string& chars, const token* t)
3f99432c 744{
66c7d4c1
JS
745 size_t pos = input_pointer - input_contents.data();
746 // clog << "[put:" << chars << " @" << pos << "]";
747 input_contents.insert (pos, chars);
eacb10ce 748 cursor_suspend_count += chars.size();
9300f661
JS
749 cursor_suspend_line = cursor_line;
750 cursor_suspend_column = cursor_column;
751 cursor_line = t->location.line;
752 cursor_column = t->location.column;
66c7d4c1
JS
753 input_pointer = input_contents.data() + pos;
754 input_end = input_contents.data() + input_contents.size();
3f99432c
FCE
755}
756
757
2f1a1aea 758token*
3f847830 759lexer::scan (bool wildcard)
2f1a1aea
FCE
760{
761 token* n = new token;
2203b032 762 n->location.file = current_file;
2f1a1aea 763
9300f661
JS
764skip:
765 bool suspended = (cursor_suspend_count > 0);
2f1a1aea
FCE
766 n->location.line = cursor_line;
767 n->location.column = cursor_column;
768
769 int c = input_get();
3f99432c 770 // clog << "{" << (char)c << (char)c2 << "}";
2f1a1aea
FCE
771 if (c < 0)
772 {
773 delete n;
774 return 0;
775 }
776
777 if (isspace (c))
778 goto skip;
779
66c7d4c1
JS
780 int c2 = input_peek ();
781
3f99432c
FCE
782 // Paste command line arguments as character streams into
783 // the beginning of a token. $1..$999 go through as raw
784 // characters; @1..@999 are quoted/escaped as strings.
785 // $# and @# expand to the number of arguments, similarly
786 // raw or quoted.
9300f661 787 if ((c == '$' || c == '@') && (c2 == '#'))
3f99432c 788 {
9300f661
JS
789 n->content.push_back (c);
790 n->content.push_back (c2);
3f99432c 791 input_get(); // swallow '#'
9300f661
JS
792 if (suspended)
793 throw parse_error ("invalid nested substitution of command line arguments", n);
794 size_t num_args = session.args.size ();
795 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
796 n->content.clear();
797 goto skip;
3f99432c 798 }
9300f661 799 else if ((c == '$' || c == '@') && (isdigit (c2)))
3f99432c 800 {
9300f661 801 n->content.push_back (c);
3f99432c
FCE
802 unsigned idx = 0;
803 do
804 {
805 input_get ();
806 idx = (idx * 10) + (c2 - '0');
9300f661 807 n->content.push_back (c2);
3f99432c
FCE
808 c2 = input_peek ();
809 } while (c2 > 0 &&
dff50e09 810 isdigit (c2) &&
3f99432c 811 idx <= session.args.size()); // prevent overflow
9300f661
JS
812 if (suspended)
813 throw parse_error ("invalid nested substitution of command line arguments", n);
3f99432c
FCE
814 if (idx == 0 ||
815 idx-1 >= session.args.size())
aca66a36
JS
816 throw parse_error ("command line argument index " + lex_cast(idx)
817 + " out of range [1-" + lex_cast(session.args.size()) + "]", n);
9300f661
JS
818 const string& arg = session.args[idx-1];
819 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
820 n->content.clear();
821 goto skip;
3f99432c
FCE
822 }
823
0c218afb
MH
824 else if (isalpha (c) || c == '$' || c == '@' || c == '_' ||
825 (wildcard && c == '*'))
2f1a1aea
FCE
826 {
827 n->type = tok_identifier;
828 n->content = (char) c;
0c218afb
MH
829 while (isalnum (c2) || c2 == '_' || c2 == '$' ||
830 (wildcard && c2 == '*'))
2f1a1aea 831 {
3f99432c
FCE
832 input_get ();
833 n->content.push_back (c2);
834 c2 = input_peek ();
6e213f58 835 }
213bee8f 836
66c7d4c1 837 if (keywords.count(n->content))
3f99432c 838 n->type = tok_keyword;
dff50e09 839
2f1a1aea
FCE
840 return n;
841 }
842
3a20432b 843 else if (isdigit (c)) // positive literal
2f1a1aea 844 {
2f1a1aea 845 n->type = tok_number;
9c0c0e46
FCE
846 n->content = (char) c;
847
66c7d4c1 848 while (isalnum (c2))
2f1a1aea 849 {
9c0c0e46
FCE
850 // NB: isalnum is very permissive. We rely on strtol, called in
851 // parser::parse_literal below, to confirm that the number string
852 // is correctly formatted and in range.
853
66c7d4c1
JS
854 input_get ();
855 n->content.push_back (c2);
856 c2 = input_peek ();
2f1a1aea
FCE
857 }
858 return n;
859 }
860
861 else if (c == '\"')
862 {
863 n->type = tok_string;
864 while (1)
865 {
866 c = input_get ();
867
3f99432c 868 if (c < 0 || c == '\n')
2f1a1aea
FCE
869 {
870 n->type = tok_junk;
871 break;
872 }
873 if (c == '\"') // closing double-quotes
874 break;
3f99432c 875 else if (c == '\\') // see also input_put
dff50e09 876 {
7d46afb8
GH
877 c = input_get ();
878 switch (c)
879 {
880 case 'a':
881 case 'b':
882 case 't':
883 case 'n':
884 case 'v':
885 case 'f':
886 case 'r':
f03954fd 887 case '0' ... '7': // NB: need only match the first digit
7d46afb8 888 case '\\':
7d46afb8 889 // Pass these escapes through to the string value
dff50e09 890 // being parsed; it will be emitted into a C literal.
7d46afb8
GH
891
892 n->content.push_back('\\');
893
3f99432c 894 // fall through
7d46afb8 895 default:
7d46afb8
GH
896 n->content.push_back(c);
897 break;
898 }
2f1a1aea
FCE
899 }
900 else
901 n->content.push_back(c);
902 }
903 return n;
904 }
905
906 else if (ispunct (c))
907 {
bb2e3076 908 int c3 = input_peek (1);
2f1a1aea 909
3a20432b
FCE
910 // NB: if we were to recognize negative numeric literals here,
911 // we'd introduce another grammar ambiguity:
912 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
913 // instead of tok_number(1) tok_operator('-') tok_number(1)
914
66c7d4c1 915 if (c == '#') // shell comment
2f1a1aea
FCE
916 {
917 unsigned this_line = cursor_line;
bb2e3076
FCE
918 do { c = input_get (); }
919 while (c >= 0 && cursor_line == this_line);
2f1a1aea
FCE
920 goto skip;
921 }
66c7d4c1 922 else if ((c == '/' && c2 == '/')) // C++ comment
63a7c90e
FCE
923 {
924 unsigned this_line = cursor_line;
bb2e3076
FCE
925 do { c = input_get (); }
926 while (c >= 0 && cursor_line == this_line);
63a7c90e
FCE
927 goto skip;
928 }
929 else if (c == '/' && c2 == '*') // C comment
930 {
66c7d4c1
JS
931 (void) input_get (); // swallow '*' already in c2
932 c = input_get ();
63a7c90e 933 c2 = input_get ();
bb2e3076 934 while (c2 >= 0)
63a7c90e 935 {
66c7d4c1
JS
936 if (c == '*' && c2 == '/')
937 break;
63a7c90e
FCE
938 c = c2;
939 c2 = input_get ();
63a7c90e 940 }
bb2e3076 941 goto skip;
63a7c90e 942 }
54dfabe9
FCE
943 else if (c == '%' && c2 == '{') // embedded code
944 {
945 n->type = tok_embedded;
946 (void) input_get (); // swallow '{' already in c2
66c7d4c1
JS
947 c = input_get ();
948 c2 = input_get ();
949 while (c2 >= 0)
54dfabe9 950 {
66c7d4c1
JS
951 if (c == '%' && c2 == '}')
952 return n;
54dfabe9 953 n->content += c;
66c7d4c1
JS
954 c = c2;
955 c2 = input_get ();
54dfabe9 956 }
66c7d4c1 957 n->type = tok_junk;
54dfabe9
FCE
958 return n;
959 }
2f1a1aea 960
bb2e3076
FCE
961 // We're committed to recognizing at least the first character
962 // as an operator.
2f1a1aea 963 n->type = tok_operator;
66c7d4c1 964 n->content = c;
2f1a1aea 965
bb2e3076 966 // match all valid operators, in decreasing size order
66c7d4c1
JS
967 if ((c == '<' && c2 == '<' && c3 == '<') ||
968 (c == '<' && c2 == '<' && c3 == '=') ||
969 (c == '>' && c2 == '>' && c3 == '='))
82919855 970 {
66c7d4c1
JS
971 n->content += c2;
972 n->content += c3;
bb2e3076
FCE
973 input_get (); input_get (); // swallow other two characters
974 }
66c7d4c1
JS
975 else if ((c == '=' && c2 == '=') ||
976 (c == '!' && c2 == '=') ||
977 (c == '<' && c2 == '=') ||
978 (c == '>' && c2 == '=') ||
979 (c == '+' && c2 == '=') ||
980 (c == '-' && c2 == '=') ||
981 (c == '*' && c2 == '=') ||
982 (c == '/' && c2 == '=') ||
983 (c == '%' && c2 == '=') ||
984 (c == '&' && c2 == '=') ||
985 (c == '^' && c2 == '=') ||
986 (c == '|' && c2 == '=') ||
987 (c == '.' && c2 == '=') ||
988 (c == '&' && c2 == '&') ||
989 (c == '|' && c2 == '|') ||
990 (c == '+' && c2 == '+') ||
991 (c == '-' && c2 == '-') ||
992 (c == '-' && c2 == '>') ||
993 (c == '<' && c2 == '<') ||
994 (c == '>' && c2 == '>') ||
177a8ead 995 // preprocessor tokens
66c7d4c1
JS
996 (c == '%' && c2 == '(') ||
997 (c == '%' && c2 == '?') ||
998 (c == '%' && c2 == ':') ||
999 (c == '%' && c2 == ')'))
bb2e3076 1000 {
66c7d4c1 1001 n->content += c2;
bb2e3076 1002 input_get (); // swallow other character
dff50e09 1003 }
2f1a1aea
FCE
1004
1005 return n;
1006 }
1007
1008 else
1009 {
1010 n->type = tok_junk;
1011 n->content = (char) c;
1012 return n;
1013 }
1014}
1015
1016
1017// ------------------------------------------------------------------------
1018
1019stapfile*
1020parser::parse ()
1021{
1022 stapfile* f = new stapfile;
1b1b4ceb 1023 input.set_current_file (f);
56099f08
FCE
1024
1025 bool empty = true;
1026
2f1a1aea
FCE
1027 while (1)
1028 {
1029 try
1030 {
1031 const token* t = peek ();
56099f08 1032 if (! t) // nice clean EOF
2f1a1aea
FCE
1033 break;
1034
56099f08 1035 empty = false;
6e213f58
DS
1036 if (t->type == tok_keyword && t->content == "probe")
1037 {
1038 context = con_probe;
1039 parse_probe (f->probes, f->aliases);
1040 }
1041 else if (t->type == tok_keyword && t->content == "global")
1042 {
1043 context = con_global;
4b5f3e45 1044 parse_global (f->globals, f->probes);
6e213f58
DS
1045 }
1046 else if (t->type == tok_keyword && t->content == "function")
1047 {
1048 context = con_function;
1049 parse_functiondecl (f->functions);
1050 }
54dfabe9 1051 else if (t->type == tok_embedded)
6e213f58
DS
1052 {
1053 context = con_embedded;
1054 f->embeds.push_back (parse_embeddedcode ());
1055 }
2f1a1aea 1056 else
6e213f58
DS
1057 {
1058 context = con_unknown;
1059 throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
1060 }
2f1a1aea
FCE
1061 }
1062 catch (parse_error& pe)
1063 {
1064 print_error (pe);
cd7116b8 1065 if (pe.skip_some) // for recovery
dff50e09 1066 try
cd7116b8
FCE
1067 {
1068 // Quietly swallow all tokens until the next '}'.
1069 while (1)
1070 {
1071 const token* t = peek ();
1072 if (! t)
1073 break;
1074 next ();
1075 if (t->type == tok_operator && t->content == "}")
1076 break;
1077 }
1078 }
1079 catch (parse_error& pe2)
1080 {
1081 // parse error during recovery ... ugh
1082 print_error (pe2);
1083 }
177a8ead 1084 }
2f1a1aea
FCE
1085 }
1086
56099f08
FCE
1087 if (empty)
1088 {
1089 cerr << "Input file '" << input_name << "' is empty or missing." << endl;
1090 delete f;
2203b032 1091 f = 0;
56099f08
FCE
1092 }
1093 else if (num_errors > 0)
2f1a1aea
FCE
1094 {
1095 cerr << num_errors << " parse error(s)." << endl;
1096 delete f;
2203b032 1097 f = 0;
2f1a1aea 1098 }
dff50e09 1099
2203b032 1100 input.set_current_file(0);
2f1a1aea
FCE
1101 return f;
1102}
1103
1104
20c6c071 1105void
54dfabe9
FCE
1106parser::parse_probe (std::vector<probe *> & probe_ret,
1107 std::vector<probe_alias *> & alias_ret)
2f1a1aea 1108{
82919855 1109 const token* t0 = next ();
6e213f58 1110 if (! (t0->type == tok_keyword && t0->content == "probe"))
82919855
FCE
1111 throw parse_error ("expected 'probe'");
1112
20c6c071
GH
1113 vector<probe_point *> aliases;
1114 vector<probe_point *> locations;
1115
1116 bool equals_ok = true;
82919855 1117
97266278
LG
1118 int epilogue_alias = 0;
1119
2f1a1aea
FCE
1120 while (1)
1121 {
b4ceace2 1122 probe_point * pp = parse_probe_point ();
dff50e09 1123
b4ceace2 1124 const token* t = peek ();
dff50e09 1125 if (equals_ok && t
b4ceace2
FCE
1126 && t->type == tok_operator && t->content == "=")
1127 {
1ad820e3 1128 if (pp->optional || pp->sufficient)
f1a0157a 1129 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->components.front()->tok);
b4ceace2
FCE
1130 aliases.push_back(pp);
1131 next ();
1132 continue;
1133 }
dff50e09 1134 else if (equals_ok && t
97266278
LG
1135 && t->type == tok_operator && t->content == "+=")
1136 {
1ad820e3 1137 if (pp->optional || pp->sufficient)
f1a0157a 1138 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->components.front()->tok);
97266278
LG
1139 aliases.push_back(pp);
1140 epilogue_alias = 1;
1141 next ();
1142 continue;
1143 }
b4ceace2
FCE
1144 else if (t && t->type == tok_operator && t->content == ",")
1145 {
1146 locations.push_back(pp);
1147 equals_ok = false;
1148 next ();
1149 continue;
1150 }
1151 else if (t && t->type == tok_operator && t->content == "{")
1152 {
1153 locations.push_back(pp);
1154 break;
1155 }
2f1a1aea 1156 else
9c0c0e46 1157 throw parse_error ("expected probe point specifier");
2f1a1aea 1158 }
20c6c071 1159
20c6c071
GH
1160 if (aliases.empty())
1161 {
54dfabe9
FCE
1162 probe* p = new probe;
1163 p->tok = t0;
1164 p->locations = locations;
1165 p->body = parse_stmt_block ();
37ebca01 1166 p->privileged = privileged;
54dfabe9 1167 probe_ret.push_back (p);
20c6c071
GH
1168 }
1169 else
1170 {
54dfabe9 1171 probe_alias* p = new probe_alias (aliases);
97266278
LG
1172 if(epilogue_alias)
1173 p->epilogue_style = true;
1174 else
1175 p->epilogue_style = false;
54dfabe9
FCE
1176 p->tok = t0;
1177 p->locations = locations;
1178 p->body = parse_stmt_block ();
37ebca01 1179 p->privileged = privileged;
54dfabe9 1180 alias_ret.push_back (p);
20c6c071 1181 }
54dfabe9 1182}
20c6c071 1183
54dfabe9
FCE
1184
1185embeddedcode*
1186parser::parse_embeddedcode ()
1187{
1188 embeddedcode* e = new embeddedcode;
1189 const token* t = next ();
1190 if (t->type != tok_embedded)
24cb178f
FCE
1191 throw parse_error ("expected '%{'");
1192
1193 if (! privileged)
cd7116b8
FCE
1194 throw parse_error ("embedded code in unprivileged script",
1195 false /* don't skip tokens for parse resumption */);
54dfabe9
FCE
1196
1197 e->tok = t;
1198 e->code = t->content;
1199 return e;
2f1a1aea
FCE
1200}
1201
1202
1203block*
56099f08 1204parser::parse_stmt_block ()
2f1a1aea
FCE
1205{
1206 block* pb = new block;
1207
56099f08
FCE
1208 const token* t = next ();
1209 if (! (t->type == tok_operator && t->content == "{"))
1210 throw parse_error ("expected '{'");
1211
1212 pb->tok = t;
2b066ec1 1213
2f1a1aea
FCE
1214 while (1)
1215 {
1216 try
1217 {
2b066ec1
FCE
1218 t = peek ();
1219 if (t && t->type == tok_operator && t->content == "}")
1220 {
1221 next ();
1222 break;
1223 }
1224
2f1a1aea 1225 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
1226 }
1227 catch (parse_error& pe)
1228 {
1229 print_error (pe);
54dfabe9 1230
2f1a1aea
FCE
1231 // Quietly swallow all tokens until the next ';' or '}'.
1232 while (1)
1233 {
1234 const token* t = peek ();
54dfabe9 1235 if (! t) return 0;
2f1a1aea 1236 next ();
54dfabe9
FCE
1237 if (t->type == tok_operator
1238 && (t->content == "}" || t->content == ";"))
2f1a1aea
FCE
1239 break;
1240 }
1241 }
1242 }
1243
1244 return pb;
1245}
1246
1247
1248statement*
1249parser::parse_statement ()
1250{
40b71c47 1251 statement *ret;
2f1a1aea
FCE
1252 const token* t = peek ();
1253 if (t && t->type == tok_operator && t->content == ";")
1254 {
69c68955
FCE
1255 null_statement* n = new null_statement ();
1256 n->tok = next ();
1257 return n;
2f1a1aea 1258 }
dff50e09 1259 else if (t && t->type == tok_operator && t->content == "{")
40b71c47 1260 return parse_stmt_block (); // Don't squash semicolons.
6e213f58 1261 else if (t && t->type == tok_keyword && t->content == "if")
40b71c47 1262 return parse_if_statement (); // Don't squash semicolons.
6e213f58 1263 else if (t && t->type == tok_keyword && t->content == "for")
40b71c47 1264 return parse_for_loop (); // Don't squash semicolons.
6e213f58 1265 else if (t && t->type == tok_keyword && t->content == "foreach")
40b71c47
MW
1266 return parse_foreach_loop (); // Don't squash semicolons.
1267 else if (t && t->type == tok_keyword && t->content == "while")
1268 return parse_while_loop (); // Don't squash semicolons.
6e213f58 1269 else if (t && t->type == tok_keyword && t->content == "return")
40b71c47 1270 ret = parse_return_statement ();
6e213f58 1271 else if (t && t->type == tok_keyword && t->content == "delete")
40b71c47 1272 ret = parse_delete_statement ();
6e213f58 1273 else if (t && t->type == tok_keyword && t->content == "break")
40b71c47 1274 ret = parse_break_statement ();
6e213f58 1275 else if (t && t->type == tok_keyword && t->content == "continue")
40b71c47 1276 ret = parse_continue_statement ();
6e213f58 1277 else if (t && t->type == tok_keyword && t->content == "next")
40b71c47 1278 ret = parse_next_statement ();
2f1a1aea
FCE
1279 else if (t && (t->type == tok_operator || // expressions are flexible
1280 t->type == tok_identifier ||
1281 t->type == tok_number ||
1282 t->type == tok_string))
40b71c47 1283 ret = parse_expr_statement ();
54dfabe9 1284 // XXX: consider generally accepting tok_embedded here too
2f1a1aea
FCE
1285 else
1286 throw parse_error ("expected statement");
40b71c47
MW
1287
1288 // Squash "empty" trailing colons after any "non-block-like" statement.
1289 t = peek ();
1290 if (t && t->type == tok_operator && t->content == ";")
1291 {
1292 next (); // Silently eat trailing ; after statement
1293 }
1294
1295 return ret;
2f1a1aea
FCE
1296}
1297
1298
56099f08 1299void
78f6bba6 1300parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
2f1a1aea 1301{
82919855 1302 const token* t0 = next ();
6e213f58 1303 if (! (t0->type == tok_keyword && t0->content == "global"))
82919855
FCE
1304 throw parse_error ("expected 'global'");
1305
56099f08
FCE
1306 while (1)
1307 {
1308 const token* t = next ();
1309 if (! (t->type == tok_identifier))
1310 throw parse_error ("expected identifier");
1311
2b066ec1
FCE
1312 for (unsigned i=0; i<globals.size(); i++)
1313 if (globals[i]->name == t->content)
57b73400 1314 throw parse_error ("duplicate global name");
dff50e09 1315
24cb178f
FCE
1316 vardecl* d = new vardecl;
1317 d->name = t->content;
1318 d->tok = t;
1319 globals.push_back (d);
56099f08 1320
82919855 1321 t = peek ();
ef474d24
JS
1322
1323 if (t && t->type == tok_operator && t->content == "[") // array size
1324 {
1325 int64_t size;
1326 next ();
1327 expect_number(size);
1328 if (size <= 0 || size > 1000000) // arbitrary max
1329 throw parse_error("array size out of range");
1330 d->maxsize = (int)size;
1331 expect_known(tok_operator, "]");
1332 t = peek ();
1333 }
1334
4b5f3e45 1335 if (t && t->type == tok_operator && t->content == "=") // initialization
ef474d24
JS
1336 {
1337 if (!d->compatible_arity(0))
1338 throw parse_error("only scalar globals can be initialized");
1339 d->set_arity(0);
1340 next ();
1341 d->init = parse_literal ();
1342 d->type = d->init->type;
1343 t = peek ();
1344 }
4b5f3e45 1345
c3799d72
AM
1346 if (t && t->type == tok_operator && t->content == ";") // termination
1347 next();
1348
4b5f3e45 1349 if (t && t->type == tok_operator && t->content == ",") // next global
82919855
FCE
1350 {
1351 next ();
1352 continue;
1353 }
56099f08 1354 else
82919855 1355 break;
56099f08
FCE
1356 }
1357}
1358
1359
24cb178f
FCE
1360void
1361parser::parse_functiondecl (std::vector<functiondecl*>& functions)
56099f08 1362{
82919855 1363 const token* t = next ();
6e213f58 1364 if (! (t->type == tok_keyword && t->content == "function"))
82919855
FCE
1365 throw parse_error ("expected 'function'");
1366
56099f08 1367
82919855 1368 t = next ();
6e213f58
DS
1369 if (! (t->type == tok_identifier)
1370 && ! (t->type == tok_keyword
1371 && (t->content == "string" || t->content == "long")))
56099f08 1372 throw parse_error ("expected identifier");
24cb178f
FCE
1373
1374 for (unsigned i=0; i<functions.size(); i++)
1375 if (functions[i]->name == t->content)
1376 throw parse_error ("duplicate function name");
1377
1378 functiondecl *fd = new functiondecl ();
56099f08
FCE
1379 fd->name = t->content;
1380 fd->tok = t;
1381
1382 t = next ();
6a505121
FCE
1383 if (t->type == tok_operator && t->content == ":")
1384 {
1385 t = next ();
6e213f58 1386 if (t->type == tok_keyword && t->content == "string")
6a505121 1387 fd->type = pe_string;
6e213f58 1388 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1389 fd->type = pe_long;
1390 else throw parse_error ("expected 'string' or 'long'");
1391
1392 t = next ();
1393 }
1394
56099f08
FCE
1395 if (! (t->type == tok_operator && t->content == "("))
1396 throw parse_error ("expected '('");
1397
1398 while (1)
1399 {
1400 t = next ();
1401
1402 // permit zero-argument fuctions
1403 if (t->type == tok_operator && t->content == ")")
1404 break;
1405 else if (! (t->type == tok_identifier))
1406 throw parse_error ("expected identifier");
1407 vardecl* vd = new vardecl;
1408 vd->name = t->content;
1409 vd->tok = t;
1410 fd->formal_args.push_back (vd);
1411
1412 t = next ();
6a505121
FCE
1413 if (t->type == tok_operator && t->content == ":")
1414 {
1415 t = next ();
6e213f58 1416 if (t->type == tok_keyword && t->content == "string")
6a505121 1417 vd->type = pe_string;
6e213f58 1418 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1419 vd->type = pe_long;
1420 else throw parse_error ("expected 'string' or 'long'");
dff50e09 1421
6a505121
FCE
1422 t = next ();
1423 }
56099f08
FCE
1424 if (t->type == tok_operator && t->content == ")")
1425 break;
1426 if (t->type == tok_operator && t->content == ",")
1427 continue;
1428 else
1429 throw parse_error ("expected ',' or ')'");
1430 }
1431
54dfabe9
FCE
1432 t = peek ();
1433 if (t && t->type == tok_embedded)
1434 fd->body = parse_embeddedcode ();
1435 else
1436 fd->body = parse_stmt_block ();
24cb178f
FCE
1437
1438 functions.push_back (fd);
2f1a1aea
FCE
1439}
1440
1441
9c0c0e46
FCE
1442probe_point*
1443parser::parse_probe_point ()
2f1a1aea 1444{
9c0c0e46 1445 probe_point* pl = new probe_point;
2f1a1aea 1446
9c0c0e46 1447 while (1)
2f1a1aea 1448 {
0c218afb 1449 const token* t = next (true); // wildcard scanning here
6e213f58
DS
1450 if (! (t->type == tok_identifier
1451 // we must allow ".return" and ".function", which are keywords
0c218afb 1452 || t->type == tok_keyword))
b4ceace2 1453 throw parse_error ("expected identifier or '*'");
9c0c0e46 1454
9c0c0e46
FCE
1455
1456 probe_point::component* c = new probe_point::component;
1457 c->functor = t->content;
f1a0157a 1458 c->tok = t;
9c0c0e46 1459 pl->components.push_back (c);
6e3347a9 1460 // NB we may add c->arg soon
9c0c0e46
FCE
1461
1462 t = peek ();
a477f3f1 1463
6e3347a9 1464 // consume optional parameter
9c0c0e46
FCE
1465 if (t && t->type == tok_operator && t->content == "(")
1466 {
1467 next (); // consume "("
1468 c->arg = parse_literal ();
1469
1470 t = next ();
1471 if (! (t->type == tok_operator && t->content == ")"))
1472 throw parse_error ("expected ')'");
1473
1474 t = peek ();
9c0c0e46 1475 }
9c0c0e46
FCE
1476
1477 if (t && t->type == tok_operator && t->content == ".")
6e3347a9
FCE
1478 {
1479 next ();
1480 continue;
1481 }
1482
f1a0157a 1483 // We only fall through here at the end of a probe point (past
6e3347a9
FCE
1484 // all the dotted/parametrized components).
1485
d898100a
FCE
1486 if (t && t->type == tok_operator &&
1487 (t->content == "?" || t->content == "!"))
6e3347a9
FCE
1488 {
1489 pl->optional = true;
d898100a
FCE
1490 if (t->content == "!") pl->sufficient = true;
1491 // NB: sufficient implies optional
6e3347a9
FCE
1492 next ();
1493 t = peek ();
1494 // fall through
cbbe8080
MH
1495 }
1496
1497 if (t && t->type == tok_keyword && t->content == "if")
1498 {
1499 next ();
1500 t = peek ();
75686668 1501 if (t && ! (t->type == tok_operator && t->content == "("))
cbbe8080
MH
1502 throw parse_error ("expected '('");
1503 next ();
1504
1505 pl->condition = parse_expression ();
1506
1507 t = peek ();
75686668 1508 if (t && ! (t->type == tok_operator && t->content == ")"))
cbbe8080
MH
1509 throw parse_error ("expected ')'");
1510 next ();
1511 t = peek ();
1512 // fall through
6e3347a9
FCE
1513 }
1514
dff50e09 1515 if (t && t->type == tok_operator
6e3347a9
FCE
1516 && (t->content == "{" || t->content == "," ||
1517 t->content == "=" || t->content == "+=" ))
1518 break;
dff50e09 1519
d898100a 1520 throw parse_error ("expected one of '. , ( ? ! { = +='");
2f1a1aea
FCE
1521 }
1522
1523 return pl;
1524}
1525
1526
1527literal*
1528parser::parse_literal ()
1529{
1530 const token* t = next ();
56099f08 1531 literal* l;
2f1a1aea 1532 if (t->type == tok_string)
56099f08 1533 l = new literal_string (t->content);
16e8f21f 1534 else
9c0c0e46 1535 {
16e8f21f
JS
1536 bool neg = false;
1537 if (t->type == tok_operator && t->content == "-")
1538 {
1539 neg = true;
1540 t = next ();
1541 }
1542
1543 if (t->type == tok_number)
1544 {
1545 const char* startp = t->content.c_str ();
1546 char* endp = (char*) startp;
1547
1548 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1549 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
79e6d33f
JS
1550 // since the lexer only gives us positive digit strings, but we'll
1551 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
16e8f21f
JS
1552 errno = 0;
1553 long long value = (long long) strtoull (startp, & endp, 0);
16e8f21f 1554 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
79e6d33f 1555 || (neg && (unsigned long long) value > 9223372036854775808ULL)
16e8f21f
JS
1556 || (unsigned long long) value > 18446744073709551615ULL
1557 || value < -9223372036854775807LL-1)
dff50e09 1558 throw parse_error ("number invalid or out of range");
16e8f21f 1559
79e6d33f
JS
1560 if (neg)
1561 value = -value;
1562
16e8f21f
JS
1563 l = new literal_number (value);
1564 }
1565 else
1566 throw parse_error ("expected literal string or number");
9c0c0e46 1567 }
56099f08
FCE
1568
1569 l->tok = t;
1570 return l;
2f1a1aea
FCE
1571}
1572
1573
1574if_statement*
1575parser::parse_if_statement ()
1576{
1577 const token* t = next ();
6e213f58 1578 if (! (t->type == tok_keyword && t->content == "if"))
56099f08
FCE
1579 throw parse_error ("expected 'if'");
1580 if_statement* s = new if_statement;
1581 s->tok = t;
1582
1583 t = next ();
2f1a1aea
FCE
1584 if (! (t->type == tok_operator && t->content == "("))
1585 throw parse_error ("expected '('");
1586
2f1a1aea
FCE
1587 s->condition = parse_expression ();
1588
1589 t = next ();
1590 if (! (t->type == tok_operator && t->content == ")"))
1591 throw parse_error ("expected ')'");
1592
1593 s->thenblock = parse_statement ();
1594
1595 t = peek ();
6e213f58 1596 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea
FCE
1597 {
1598 next ();
1599 s->elseblock = parse_statement ();
1600 }
ed10c639
FCE
1601 else
1602 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
1603
1604 return s;
1605}
1606
1607
69c68955
FCE
1608expr_statement*
1609parser::parse_expr_statement ()
1610{
1611 expr_statement *es = new expr_statement;
1612 const token* t = peek ();
1613 es->tok = t;
1614 es->value = parse_expression ();
1615 return es;
1616}
1617
1618
56099f08
FCE
1619return_statement*
1620parser::parse_return_statement ()
1621{
1622 const token* t = next ();
6e213f58 1623 if (! (t->type == tok_keyword && t->content == "return"))
56099f08 1624 throw parse_error ("expected 'return'");
6e213f58
DS
1625 if (context != con_function)
1626 throw parse_error ("found 'return' not in function context");
56099f08
FCE
1627 return_statement* s = new return_statement;
1628 s->tok = t;
1629 s->value = parse_expression ();
1630 return s;
1631}
1632
1633
1634delete_statement*
1635parser::parse_delete_statement ()
1636{
1637 const token* t = next ();
6e213f58 1638 if (! (t->type == tok_keyword && t->content == "delete"))
56099f08
FCE
1639 throw parse_error ("expected 'delete'");
1640 delete_statement* s = new delete_statement;
1641 s->tok = t;
1642 s->value = parse_expression ();
1643 return s;
1644}
1645
1646
f3c26ea5
FCE
1647next_statement*
1648parser::parse_next_statement ()
1649{
1650 const token* t = next ();
6e213f58 1651 if (! (t->type == tok_keyword && t->content == "next"))
f3c26ea5 1652 throw parse_error ("expected 'next'");
6e213f58
DS
1653 if (context != con_probe)
1654 throw parse_error ("found 'next' not in probe context");
f3c26ea5
FCE
1655 next_statement* s = new next_statement;
1656 s->tok = t;
1657 return s;
1658}
1659
1660
1661break_statement*
1662parser::parse_break_statement ()
1663{
1664 const token* t = next ();
6e213f58 1665 if (! (t->type == tok_keyword && t->content == "break"))
f3c26ea5
FCE
1666 throw parse_error ("expected 'break'");
1667 break_statement* s = new break_statement;
1668 s->tok = t;
1669 return s;
1670}
1671
1672
1673continue_statement*
1674parser::parse_continue_statement ()
1675{
1676 const token* t = next ();
6e213f58 1677 if (! (t->type == tok_keyword && t->content == "continue"))
f3c26ea5
FCE
1678 throw parse_error ("expected 'continue'");
1679 continue_statement* s = new continue_statement;
1680 s->tok = t;
1681 return s;
1682}
1683
1684
69c68955
FCE
1685for_loop*
1686parser::parse_for_loop ()
1687{
f3c26ea5 1688 const token* t = next ();
6e213f58 1689 if (! (t->type == tok_keyword && t->content == "for"))
f3c26ea5
FCE
1690 throw parse_error ("expected 'for'");
1691 for_loop* s = new for_loop;
1692 s->tok = t;
1693
1694 t = next ();
1695 if (! (t->type == tok_operator && t->content == "("))
1696 throw parse_error ("expected '('");
1697
1698 // initializer + ";"
1699 t = peek ();
1700 if (t && t->type == tok_operator && t->content == ";")
1701 {
cbfbbf69
FCE
1702 s->init = 0;
1703 next ();
f3c26ea5
FCE
1704 }
1705 else
1706 {
1707 s->init = parse_expr_statement ();
1708 t = next ();
1709 if (! (t->type == tok_operator && t->content == ";"))
1710 throw parse_error ("expected ';'");
1711 }
1712
1713 // condition + ";"
1714 t = peek ();
1715 if (t && t->type == tok_operator && t->content == ";")
1716 {
1717 literal_number* l = new literal_number(1);
1718 s->cond = l;
1719 s->cond->tok = next ();
1720 }
1721 else
1722 {
1723 s->cond = parse_expression ();
1724 t = next ();
1725 if (! (t->type == tok_operator && t->content == ";"))
1726 throw parse_error ("expected ';'");
1727 }
dff50e09 1728
f3c26ea5
FCE
1729 // increment + ")"
1730 t = peek ();
1731 if (t && t->type == tok_operator && t->content == ")")
1732 {
cbfbbf69
FCE
1733 s->incr = 0;
1734 next ();
f3c26ea5
FCE
1735 }
1736 else
1737 {
1738 s->incr = parse_expr_statement ();
1739 t = next ();
1740 if (! (t->type == tok_operator && t->content == ")"))
c958a431 1741 throw parse_error ("expected ')'");
f3c26ea5
FCE
1742 }
1743
1744 // block
1745 s->block = parse_statement ();
1746
1747 return s;
1748}
1749
1750
1751for_loop*
1752parser::parse_while_loop ()
1753{
1754 const token* t = next ();
6e213f58 1755 if (! (t->type == tok_keyword && t->content == "while"))
f3c26ea5
FCE
1756 throw parse_error ("expected 'while'");
1757 for_loop* s = new for_loop;
1758 s->tok = t;
1759
1760 t = next ();
1761 if (! (t->type == tok_operator && t->content == "("))
1762 throw parse_error ("expected '('");
1763
1764 // dummy init and incr fields
cbfbbf69
FCE
1765 s->init = 0;
1766 s->incr = 0;
f3c26ea5
FCE
1767
1768 // condition
1769 s->cond = parse_expression ();
1770
f3c26ea5
FCE
1771 t = next ();
1772 if (! (t->type == tok_operator && t->content == ")"))
1773 throw parse_error ("expected ')'");
dff50e09 1774
f3c26ea5
FCE
1775 // block
1776 s->block = parse_statement ();
1777
1778 return s;
69c68955
FCE
1779}
1780
1781
1782foreach_loop*
1783parser::parse_foreach_loop ()
1784{
1785 const token* t = next ();
6e213f58 1786 if (! (t->type == tok_keyword && t->content == "foreach"))
69c68955
FCE
1787 throw parse_error ("expected 'foreach'");
1788 foreach_loop* s = new foreach_loop;
1789 s->tok = t;
93484556 1790 s->sort_direction = 0;
27f21e8c 1791 s->limit = NULL;
69c68955
FCE
1792
1793 t = next ();
1794 if (! (t->type == tok_operator && t->content == "("))
1795 throw parse_error ("expected '('");
1796
1797 // see also parse_array_in
1798
1799 bool parenthesized = false;
1800 t = peek ();
1801 if (t && t->type == tok_operator && t->content == "[")
1802 {
1803 next ();
1804 parenthesized = true;
1805 }
1806
1807 while (1)
1808 {
1809 t = next ();
1810 if (! (t->type == tok_identifier))
1811 throw parse_error ("expected identifier");
1812 symbol* sym = new symbol;
1813 sym->tok = t;
1814 sym->name = t->content;
1815 s->indexes.push_back (sym);
1816
93484556
FCE
1817 t = peek ();
1818 if (t && t->type == tok_operator &&
1819 (t->content == "+" || t->content == "-"))
1820 {
1821 if (s->sort_direction)
1822 throw parse_error ("multiple sort directives");
1823 s->sort_direction = (t->content == "+") ? 1 : -1;
1824 s->sort_column = s->indexes.size();
1825 next();
1826 }
1827
69c68955
FCE
1828 if (parenthesized)
1829 {
93484556 1830 t = peek ();
69c68955
FCE
1831 if (t && t->type == tok_operator && t->content == ",")
1832 {
1833 next ();
1834 continue;
1835 }
1836 else if (t && t->type == tok_operator && t->content == "]")
1837 {
1838 next ();
1839 break;
1840 }
dff50e09 1841 else
69c68955
FCE
1842 throw parse_error ("expected ',' or ']'");
1843 }
1844 else
1845 break; // expecting only one expression
1846 }
1847
1848 t = next ();
6e213f58 1849 if (! (t->type == tok_keyword && t->content == "in"))
69c68955 1850 throw parse_error ("expected 'in'");
dff50e09 1851
d02548c0 1852 s->base = parse_indexable();
69c68955 1853
93484556
FCE
1854 t = peek ();
1855 if (t && t->type == tok_operator &&
1856 (t->content == "+" || t->content == "-"))
1857 {
1858 if (s->sort_direction)
1859 throw parse_error ("multiple sort directives");
1860 s->sort_direction = (t->content == "+") ? 1 : -1;
1861 s->sort_column = 0;
1862 next();
1863 }
1864
27f21e8c
DS
1865 t = peek ();
1866 if (tok_is(t, tok_keyword, "limit"))
1867 {
1868 next (); // get past the "limit"
1869 s->limit = parse_expression ();
1870 }
1871
69c68955
FCE
1872 t = next ();
1873 if (! (t->type == tok_operator && t->content == ")"))
1874 throw parse_error ("expected ')'");
1875
1876 s->block = parse_statement ();
1877 return s;
1878}
1879
1880
2f1a1aea
FCE
1881expression*
1882parser::parse_expression ()
1883{
1884 return parse_assignment ();
1885}
1886
2f1a1aea
FCE
1887
1888expression*
1889parser::parse_assignment ()
1890{
1891 expression* op1 = parse_ternary ();
1892
1893 const token* t = peek ();
82919855 1894 // right-associative operators
dff50e09 1895 if (t && t->type == tok_operator
2f1a1aea 1896 && (t->content == "=" ||
82919855 1897 t->content == "<<<" ||
2f1a1aea 1898 t->content == "+=" ||
bb2e3076
FCE
1899 t->content == "-=" ||
1900 t->content == "*=" ||
1901 t->content == "/=" ||
1902 t->content == "%=" ||
1903 t->content == "<<=" ||
1904 t->content == ">>=" ||
1905 t->content == "&=" ||
1906 t->content == "^=" ||
1907 t->content == "|=" ||
d5d7c2cc 1908 t->content == ".=" ||
dff50e09 1909 false))
2f1a1aea 1910 {
bb2e3076 1911 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 1912 assignment* e = new assignment;
56099f08 1913 e->left = op1;
2f1a1aea 1914 e->op = t->content;
56099f08 1915 e->tok = t;
2f1a1aea 1916 next ();
82919855 1917 e->right = parse_expression ();
56099f08 1918 op1 = e;
2f1a1aea 1919 }
56099f08
FCE
1920
1921 return op1;
2f1a1aea
FCE
1922}
1923
1924
1925expression*
1926parser::parse_ternary ()
1927{
1928 expression* op1 = parse_logical_or ();
1929
1930 const token* t = peek ();
1931 if (t && t->type == tok_operator && t->content == "?")
1932 {
2f1a1aea 1933 ternary_expression* e = new ternary_expression;
56099f08 1934 e->tok = t;
2f1a1aea 1935 e->cond = op1;
56099f08
FCE
1936 next ();
1937 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
1938
1939 t = next ();
1940 if (! (t->type == tok_operator && t->content == ":"))
1941 throw parse_error ("expected ':'");
1942
56099f08 1943 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
1944 return e;
1945 }
1946 else
1947 return op1;
1948}
1949
1950
1951expression*
1952parser::parse_logical_or ()
1953{
1954 expression* op1 = parse_logical_and ();
dff50e09 1955
2f1a1aea 1956 const token* t = peek ();
56099f08 1957 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 1958 {
2f1a1aea 1959 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
1960 e->tok = t;
1961 e->op = t->content;
2f1a1aea 1962 e->left = op1;
56099f08
FCE
1963 next ();
1964 e->right = parse_logical_and ();
1965 op1 = e;
1966 t = peek ();
2f1a1aea 1967 }
56099f08
FCE
1968
1969 return op1;
2f1a1aea
FCE
1970}
1971
1972
1973expression*
1974parser::parse_logical_and ()
1975{
bb2e3076 1976 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
1977
1978 const token* t = peek ();
56099f08 1979 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 1980 {
2f1a1aea
FCE
1981 logical_and_expr *e = new logical_and_expr;
1982 e->left = op1;
56099f08
FCE
1983 e->op = t->content;
1984 e->tok = t;
1985 next ();
bb2e3076
FCE
1986 e->right = parse_boolean_or ();
1987 op1 = e;
1988 t = peek ();
1989 }
1990
1991 return op1;
1992}
1993
1994
1995expression*
1996parser::parse_boolean_or ()
1997{
1998 expression* op1 = parse_boolean_xor ();
1999
2000 const token* t = peek ();
2001 while (t && t->type == tok_operator && t->content == "|")
2002 {
2003 binary_expression* e = new binary_expression;
2004 e->left = op1;
2005 e->op = t->content;
2006 e->tok = t;
2007 next ();
2008 e->right = parse_boolean_xor ();
2009 op1 = e;
2010 t = peek ();
2011 }
2012
2013 return op1;
2014}
2015
2016
2017expression*
2018parser::parse_boolean_xor ()
2019{
2020 expression* op1 = parse_boolean_and ();
2021
2022 const token* t = peek ();
2023 while (t && t->type == tok_operator && t->content == "^")
2024 {
2025 binary_expression* e = new binary_expression;
2026 e->left = op1;
2027 e->op = t->content;
2028 e->tok = t;
2029 next ();
2030 e->right = parse_boolean_and ();
2031 op1 = e;
2032 t = peek ();
2033 }
2034
2035 return op1;
2036}
2037
2038
2039expression*
2040parser::parse_boolean_and ()
2041{
2042 expression* op1 = parse_array_in ();
2043
2044 const token* t = peek ();
2045 while (t && t->type == tok_operator && t->content == "&")
2046 {
2047 binary_expression* e = new binary_expression;
2048 e->left = op1;
2049 e->op = t->content;
2050 e->tok = t;
2051 next ();
56099f08
FCE
2052 e->right = parse_array_in ();
2053 op1 = e;
2054 t = peek ();
2f1a1aea 2055 }
56099f08
FCE
2056
2057 return op1;
2f1a1aea
FCE
2058}
2059
2060
2061expression*
2062parser::parse_array_in ()
2063{
ce10591c 2064 // This is a very tricky case. All these are legit expressions:
69c68955 2065 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
2066 vector<expression*> indexes;
2067 bool parenthesized = false;
2f1a1aea
FCE
2068
2069 const token* t = peek ();
69c68955 2070 if (t && t->type == tok_operator && t->content == "[")
ce10591c
FCE
2071 {
2072 next ();
2073 parenthesized = true;
2074 }
2075
2076 while (1)
2077 {
2078 expression* op1 = parse_comparison ();
2079 indexes.push_back (op1);
2080
2081 if (parenthesized)
2082 {
2083 const token* t = peek ();
2084 if (t && t->type == tok_operator && t->content == ",")
2085 {
2086 next ();
2087 continue;
2088 }
69c68955 2089 else if (t && t->type == tok_operator && t->content == "]")
ce10591c
FCE
2090 {
2091 next ();
2092 break;
2093 }
dff50e09 2094 else
69c68955 2095 throw parse_error ("expected ',' or ']'");
ce10591c
FCE
2096 }
2097 else
2098 break; // expecting only one expression
2099 }
2100
2101 t = peek ();
6e213f58 2102 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 2103 {
2f1a1aea 2104 array_in *e = new array_in;
56099f08 2105 e->tok = t;
ce10591c
FCE
2106 next (); // swallow "in"
2107
2108 arrayindex* a = new arrayindex;
2109 a->indexes = indexes;
d02548c0
GH
2110 a->base = parse_indexable();
2111 a->tok = a->base->get_tok();
ce10591c 2112 e->operand = a;
2f1a1aea
FCE
2113 return e;
2114 }
ce10591c
FCE
2115 else if (indexes.size() == 1) // no "in" - need one expression only
2116 return indexes[0];
2f1a1aea 2117 else
ce10591c 2118 throw parse_error ("unexpected comma-separated expression list");
2f1a1aea
FCE
2119}
2120
2121
2122expression*
2123parser::parse_comparison ()
2124{
bb2e3076 2125 expression* op1 = parse_shift ();
2f1a1aea
FCE
2126
2127 const token* t = peek ();
dff50e09 2128 while (t && t->type == tok_operator
553d27a5
FCE
2129 && (t->content == ">" ||
2130 t->content == "<" ||
2131 t->content == "==" ||
2132 t->content == "!=" ||
2133 t->content == "<=" ||
bb2e3076 2134 t->content == ">="))
2f1a1aea
FCE
2135 {
2136 comparison* e = new comparison;
2137 e->left = op1;
2138 e->op = t->content;
56099f08 2139 e->tok = t;
2f1a1aea 2140 next ();
bb2e3076
FCE
2141 e->right = parse_shift ();
2142 op1 = e;
2143 t = peek ();
2144 }
2145
2146 return op1;
2147}
2148
2149
2150expression*
2151parser::parse_shift ()
2152{
2153 expression* op1 = parse_concatenation ();
2154
2155 const token* t = peek ();
dff50e09 2156 while (t && t->type == tok_operator &&
bb2e3076
FCE
2157 (t->content == "<<" || t->content == ">>"))
2158 {
2159 binary_expression* e = new binary_expression;
2160 e->left = op1;
2161 e->op = t->content;
2162 e->tok = t;
2163 next ();
56099f08
FCE
2164 e->right = parse_concatenation ();
2165 op1 = e;
2166 t = peek ();
2f1a1aea 2167 }
56099f08
FCE
2168
2169 return op1;
2f1a1aea
FCE
2170}
2171
2172
2173expression*
2174parser::parse_concatenation ()
2175{
2176 expression* op1 = parse_additive ();
2177
2178 const token* t = peek ();
2179 // XXX: the actual awk string-concatenation operator is *whitespace*.
2180 // I don't know how to easily to model that here.
56099f08 2181 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
2182 {
2183 concatenation* e = new concatenation;
2184 e->left = op1;
2185 e->op = t->content;
56099f08 2186 e->tok = t;
2f1a1aea 2187 next ();
56099f08
FCE
2188 e->right = parse_additive ();
2189 op1 = e;
2190 t = peek ();
2f1a1aea 2191 }
56099f08
FCE
2192
2193 return op1;
2f1a1aea
FCE
2194}
2195
2196
2197expression*
2198parser::parse_additive ()
2199{
2200 expression* op1 = parse_multiplicative ();
2201
2202 const token* t = peek ();
dff50e09 2203 while (t && t->type == tok_operator
2f1a1aea
FCE
2204 && (t->content == "+" || t->content == "-"))
2205 {
2206 binary_expression* e = new binary_expression;
2207 e->op = t->content;
2208 e->left = op1;
56099f08 2209 e->tok = t;
2f1a1aea 2210 next ();
56099f08
FCE
2211 e->right = parse_multiplicative ();
2212 op1 = e;
2213 t = peek ();
2f1a1aea 2214 }
56099f08
FCE
2215
2216 return op1;
2f1a1aea
FCE
2217}
2218
2219
2220expression*
2221parser::parse_multiplicative ()
2222{
2223 expression* op1 = parse_unary ();
2224
2225 const token* t = peek ();
dff50e09 2226 while (t && t->type == tok_operator
2f1a1aea
FCE
2227 && (t->content == "*" || t->content == "/" || t->content == "%"))
2228 {
2229 binary_expression* e = new binary_expression;
2230 e->op = t->content;
2231 e->left = op1;
56099f08 2232 e->tok = t;
2f1a1aea 2233 next ();
56099f08
FCE
2234 e->right = parse_unary ();
2235 op1 = e;
2236 t = peek ();
2f1a1aea 2237 }
56099f08
FCE
2238
2239 return op1;
2f1a1aea
FCE
2240}
2241
2242
2243expression*
2244parser::parse_unary ()
2245{
2246 const token* t = peek ();
dff50e09
FCE
2247 if (t && t->type == tok_operator
2248 && (t->content == "+" ||
2249 t->content == "-" ||
bb2e3076
FCE
2250 t->content == "!" ||
2251 t->content == "~" ||
2252 false))
2f1a1aea
FCE
2253 {
2254 unary_expression* e = new unary_expression;
2255 e->op = t->content;
56099f08 2256 e->tok = t;
2f1a1aea 2257 next ();
3a20432b 2258 e->operand = parse_crement ();
2f1a1aea
FCE
2259 return e;
2260 }
2261 else
bb2e3076 2262 return parse_crement ();
2f1a1aea
FCE
2263}
2264
2265
2266expression*
2267parser::parse_crement () // as in "increment" / "decrement"
2268{
cbfbbf69
FCE
2269 // NB: Ideally, we'd parse only a symbol as an operand to the
2270 // *crement operators, instead of a general expression value. We'd
2271 // need more complex lookahead code to tell apart the postfix cases.
2272 // So we just punt, and leave it to pass-3 to signal errors on
2273 // cases like "4++".
2274
2f1a1aea 2275 const token* t = peek ();
dff50e09 2276 if (t && t->type == tok_operator
2f1a1aea
FCE
2277 && (t->content == "++" || t->content == "--"))
2278 {
2279 pre_crement* e = new pre_crement;
2280 e->op = t->content;
56099f08 2281 e->tok = t;
2f1a1aea
FCE
2282 next ();
2283 e->operand = parse_value ();
2284 return e;
2285 }
2286
2287 // post-crement or non-crement
2288 expression *op1 = parse_value ();
dff50e09 2289
2f1a1aea 2290 t = peek ();
dff50e09 2291 if (t && t->type == tok_operator
2f1a1aea
FCE
2292 && (t->content == "++" || t->content == "--"))
2293 {
2294 post_crement* e = new post_crement;
2295 e->op = t->content;
56099f08 2296 e->tok = t;
2f1a1aea
FCE
2297 next ();
2298 e->operand = op1;
2299 return e;
2300 }
2301 else
2302 return op1;
2303}
2304
2305
2306expression*
2307parser::parse_value ()
2308{
2309 const token* t = peek ();
2310 if (! t)
2311 throw parse_error ("expected value");
2312
2313 if (t->type == tok_operator && t->content == "(")
2314 {
2315 next ();
2316 expression* e = parse_expression ();
2317 t = next ();
2318 if (! (t->type == tok_operator && t->content == ")"))
2319 throw parse_error ("expected ')'");
2320 return e;
2321 }
03c75a4a
JS
2322 else if (t->type == tok_operator && t->content == "&")
2323 {
2324 next ();
2325 t = peek ();
2326 if (t->type != tok_identifier ||
2327 (t->content != "@cast" && t->content[0] != '$'))
2328 throw parse_error ("expected @cast or $var");
2329
2330 target_symbol *ts = static_cast<target_symbol*>(parse_symbol());
2331 ts->addressof = true;
2332 return ts;
2333 }
2f1a1aea
FCE
2334 else if (t->type == tok_identifier)
2335 return parse_symbol ();
2336 else
2337 return parse_literal ();
2338}
2339
2340
d02548c0
GH
2341const token *
2342parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
2343{
2344 hop = NULL;
2345 const token* t = expect_ident (name);
2346 if (name == "@hist_linear" || name == "@hist_log")
2347 {
2348 hop = new hist_op;
2349 if (name == "@hist_linear")
2350 hop->htype = hist_linear;
2351 else if (name == "@hist_log")
2352 hop->htype = hist_log;
2353 hop->tok = t;
2354 expect_op("(");
2355 hop->stat = parse_expression ();
2356 int64_t tnum;
2357 if (hop->htype == hist_linear)
2358 {
2359 for (size_t i = 0; i < 3; ++i)
2360 {
2361 expect_op (",");
2362 expect_number (tnum);
2363 hop->params.push_back (tnum);
2364 }
2365 }
d02548c0
GH
2366 expect_op(")");
2367 }
2368 return t;
2369}
2370
2371
2372indexable*
2373parser::parse_indexable ()
2374{
2375 hist_op *hop = NULL;
2376 string name;
2377 const token *tok = parse_hist_op_or_bare_name(hop, name);
2378 if (hop)
2379 return hop;
2380 else
2381 {
2382 symbol* sym = new symbol;
2383 sym->name = name;
2384 sym->tok = tok;
2385 return sym;
2386 }
2387}
2388
2389
2390// var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
2f1a1aea 2391expression*
dff50e09 2392parser::parse_symbol ()
2f1a1aea 2393{
d02548c0
GH
2394 hist_op *hop = NULL;
2395 symbol *sym = NULL;
d7f3e0c5 2396 string name;
d02548c0
GH
2397 const token *t = parse_hist_op_or_bare_name(hop, name);
2398
2399 if (!hop)
0fefb486 2400 {
dff50e09 2401 // If we didn't get a hist_op, then we did get an identifier. We can
d02548c0
GH
2402 // now scrutinize this identifier for the various magic forms of identifier
2403 // (printf, @stat_op, and $var...)
2404
9b5af295
JS
2405 if (name == "@cast")
2406 {
2407 // type-punning time
2408 cast_op *cop = new cast_op;
2409 cop->tok = t;
2410 cop->base_name = name;
2411 expect_op("(");
2412 cop->operand = parse_expression ();
2413 expect_op(",");
2414 expect_unknown(tok_string, cop->type);
c15b6083
MW
2415 // types never start with "struct<space>" or "union<space>",
2416 // so gobble it up.
2417 if (cop->type.compare(0, 7, "struct ") == 0)
2418 cop->type = cop->type.substr(7);
2419 if (cop->type.compare(0, 6, "union ") == 0)
2420 cop->type = cop->type.substr(6);
9b5af295
JS
2421 if (peek_op (","))
2422 {
2423 next();
2424 expect_unknown(tok_string, cop->module);
2425 }
2426 expect_op(")");
81931eab
JS
2427 parse_target_symbol_components(cop);
2428
9b5af295
JS
2429 // if there aren't any dereferences, then the cast is pointless
2430 if (cop->components.empty())
2431 {
2432 expression *op = cop->operand;
2433 delete cop;
2434 return op;
2435 }
2436 return cop;
2437 }
2438
2439 else if (name.size() > 0 && name[0] == '@')
d7f3e0c5 2440 {
d02548c0
GH
2441 stat_op *sop = new stat_op;
2442 if (name == "@avg")
2443 sop->ctype = sc_average;
2444 else if (name == "@count")
2445 sop->ctype = sc_count;
2446 else if (name == "@sum")
2447 sop->ctype = sc_sum;
2448 else if (name == "@min")
2449 sop->ctype = sc_min;
2450 else if (name == "@max")
2451 sop->ctype = sc_max;
2452 else
2453 throw parse_error("unknown statistic operator " + name);
2454 expect_op("(");
2455 sop->tok = t;
2456 sop->stat = parse_expression ();
2457 expect_op(")");
2458 return sop;
2459 }
dff50e09 2460
d5e178c1 2461 else if (print_format *fmt = print_format::create(t))
d02548c0 2462 {
d02548c0 2463 expect_op("(");
b15c465c
PP
2464 if ((name == "print" || name == "println" ||
2465 name == "sprint" || name == "sprintln") &&
3cb17058 2466 (peek_kw("@hist_linear") || peek_kw("@hist_log")))
a4636912
GH
2467 {
2468 // We have a special case where we recognize
2469 // print(@hist_foo(bar)) as a magic print-the-histogram
2470 // construct. This is sort of gross but it avoids
2471 // promoting histogram references to typeful
2472 // expressions.
dff50e09 2473
1bbeef03
GH
2474 hop = NULL;
2475 t = parse_hist_op_or_bare_name(hop, name);
2476 assert(hop);
dff50e09 2477
1bbeef03
GH
2478 // It is, sadly, possible that even while parsing a
2479 // hist_op, we *mis-guessed* and the user wishes to
2480 // print(@hist_op(foo)[bucket]), a scalar. In that case
2481 // we must parse the arrayindex and print an expression.
dff50e09 2482
1bbeef03
GH
2483 if (!peek_op ("["))
2484 fmt->hist = hop;
2485 else
2486 {
2487 // This is simplified version of the
2488 // multi-array-index parser below, because we can
2489 // only ever have one index on a histogram anyways.
2490 expect_op("[");
2491 struct arrayindex* ai = new arrayindex;
2492 ai->tok = t;
2493 ai->base = hop;
2494 ai->indexes.push_back (parse_expression ());
2495 expect_op("]");
2496 fmt->args.push_back(ai);
2497 }
a4636912 2498 }
d7f3e0c5 2499 else
d02548c0 2500 {
3cb17058
JS
2501 int min_args = 0;
2502 if (fmt->print_with_format)
2503 {
2504 // Consume and convert a format string. Agreement between the
2505 // format string and the arguments is postponed to the
2506 // typechecking phase.
2507 string tmp;
2508 expect_unknown (tok_string, tmp);
2509 fmt->raw_components = tmp;
2510 fmt->components = print_format::string_to_components (tmp);
2511 }
2512 else if (fmt->print_with_delim)
2513 {
2514 // Consume a delimiter to separate arguments.
2515 fmt->delimiter.clear();
2516 fmt->delimiter.type = print_format::conv_literal;
2517 expect_unknown (tok_string, fmt->delimiter.literal_string);
2518 min_args = 2;
2519 }
2520 else
2521 {
2522 // If we are not printing with a format string, we must have
2523 // at least one argument (of any type).
2524 expression *e = parse_expression ();
2525 fmt->args.push_back(e);
2526 }
2527
2528 // Consume any subsequent arguments.
2529 while (min_args || !peek_op (")"))
2530 {
2531 expect_op(",");
2532 expression *e = parse_expression ();
2533 fmt->args.push_back(e);
2534 if (min_args)
2535 --min_args;
2536 }
d02548c0
GH
2537 }
2538 expect_op(")");
2539 return fmt;
2540 }
dff50e09 2541
d02548c0
GH
2542 else if (name.size() > 0 && name[0] == '$')
2543 {
2544 // target_symbol time
2545 target_symbol *tsym = new target_symbol;
2546 tsym->tok = t;
2547 tsym->base_name = name;
81931eab 2548 parse_target_symbol_components(tsym);
d02548c0
GH
2549 return tsym;
2550 }
2551
2552 else if (peek_op ("(")) // function call
2553 {
2554 next ();
2555 struct functioncall* f = new functioncall;
2556 f->tok = t;
2557 f->function = name;
2558 // Allow empty actual parameter list
2559 if (peek_op (")"))
2560 {
2561 next ();
2562 return f;
2563 }
2564 while (1)
2565 {
2566 f->args.push_back (parse_expression ());
2567 if (peek_op (")"))
2568 {
2569 next();
2570 break;
2571 }
2572 else if (peek_op (","))
2573 {
2574 next();
2575 continue;
2576 }
2577 else
2578 throw parse_error ("expected ',' or ')'");
2579 }
2580 return f;
2581 }
2582
2583 else
2584 {
2585 sym = new symbol;
2586 sym->name = name;
2587 sym->tok = t;
d7f3e0c5 2588 }
0fefb486 2589 }
dff50e09
FCE
2590
2591 // By now, either we had a hist_op in the first place, or else
d02548c0
GH
2592 // we had a plain word and it was converted to a symbol.
2593
70c743d8 2594 assert (!hop != !sym); // logical XOR
d02548c0
GH
2595
2596 // All that remains is to check for array indexing
2597
d7f3e0c5 2598 if (peek_op ("[")) // array
2f1a1aea
FCE
2599 {
2600 next ();
2601 struct arrayindex* ai = new arrayindex;
d02548c0
GH
2602 ai->tok = t;
2603
2604 if (hop)
2605 ai->base = hop;
2606 else
2607 ai->base = sym;
2608
2f1a1aea
FCE
2609 while (1)
2610 {
2611 ai->indexes.push_back (parse_expression ());
d7f3e0c5 2612 if (peek_op ("]"))
dff50e09
FCE
2613 {
2614 next();
2615 break;
d7f3e0c5
GH
2616 }
2617 else if (peek_op (","))
2618 {
2619 next();
2620 continue;
2621 }
2f1a1aea
FCE
2622 else
2623 throw parse_error ("expected ',' or ']'");
2624 }
2625 return ai;
2626 }
d02548c0
GH
2627
2628 // If we got to here, we *should* have a symbol; if we have
2629 // a hist_op on its own, it doesn't count as an expression,
2630 // so we throw a parse error.
2631
2632 if (hop)
2633 throw parse_error("base histogram operator where expression expected", t);
dff50e09
FCE
2634
2635 return sym;
2f1a1aea 2636}
56099f08 2637
81931eab
JS
2638
2639void
2640parser::parse_target_symbol_components (target_symbol* e)
2641{
2642 while (true)
2643 {
81931eab
JS
2644 if (peek_op ("->"))
2645 {
c67847a0
JS
2646 const token* t = next();
2647 string member;
2648 expect_ident_or_keyword (member);
2649 e->components.push_back (target_symbol::component(t, member));
81931eab
JS
2650 }
2651 else if (peek_op ("["))
2652 {
c67847a0 2653 const token* t = next();
6fda2dff
JS
2654 expression* index = parse_expression();
2655 literal_number* ln = dynamic_cast<literal_number*>(index);
2656 if (ln)
2657 e->components.push_back (target_symbol::component(t, ln->value));
2658 else
2659 e->components.push_back (target_symbol::component(t, index));
81931eab 2660 expect_op ("]");
81931eab
JS
2661 }
2662 else
2663 break;
2664 }
2665}
2666
73267b89 2667/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.370976 seconds and 5 git commands to generate.