]> sourceware.org Git - systemtap.git/blame - parse.cxx
build: fix !HAVE_NSS case
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
482fe2af 2// Copyright (C) 2005-2009 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
5811366a 4// Copyright (C) 2007 Bull S.A.S
69c68955
FCE
5//
6// This file is part of systemtap, and is free software. You can
7// redistribute it and/or modify it under the terms of the GNU General
8// Public License (GPL); either version 2, or (at your option) any
9// later version.
2f1a1aea 10
2b066ec1 11#include "config.h"
2f1a1aea
FCE
12#include "staptree.h"
13#include "parse.h"
177a8ead 14#include "session.h"
3f99432c
FCE
15#include "util.h"
16
2b066ec1 17#include <iostream>
eacb10ce 18
2b066ec1 19#include <fstream>
2f1a1aea 20#include <cctype>
9c0c0e46 21#include <cstdlib>
29e64872 22#include <cassert>
9c0c0e46
FCE
23#include <cerrno>
24#include <climits>
57b73400 25#include <sstream>
f74fb737 26#include <cstring>
3f99432c 27#include <cctype>
eacb10ce
FCE
28#include <iterator>
29
7a468d68
FCE
30extern "C" {
31#include <fnmatch.h>
32}
2f1a1aea
FCE
33
34using namespace std;
35
36// ------------------------------------------------------------------------
37
bb2e3076
FCE
38
39
177a8ead
FCE
40parser::parser (systemtap_session& s, istream& i, bool p):
41 session (s),
24cb178f 42 input_name ("<input>"), free_input (0),
213bee8f 43 input (i, input_name, s), privileged (p),
6e213f58 44 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
45{ }
46
177a8ead
FCE
47parser::parser (systemtap_session& s, const string& fn, bool p):
48 session (s),
2f1a1aea 49 input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
213bee8f 50 input (* free_input, input_name, s), privileged (p),
6e213f58 51 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
52{ }
53
54parser::~parser()
55{
56 if (free_input) delete free_input;
57}
58
59
82919855 60stapfile*
177a8ead 61parser::parse (systemtap_session& s, std::istream& i, bool pr)
82919855 62{
177a8ead 63 parser p (s, i, pr);
82919855
FCE
64 return p.parse ();
65}
66
67
68stapfile*
177a8ead 69parser::parse (systemtap_session& s, const std::string& n, bool pr)
82919855 70{
177a8ead 71 parser p (s, n, pr);
82919855
FCE
72 return p.parse ();
73}
74
d7f3e0c5
GH
75static string
76tt2str(token_type tt)
77{
78 switch (tt)
79 {
80 case tok_junk: return "junk";
81 case tok_identifier: return "identifier";
82 case tok_operator: return "operator";
83 case tok_string: return "string";
84 case tok_number: return "number";
85 case tok_embedded: return "embedded-code";
6e213f58 86 case tok_keyword: return "keyword";
d7f3e0c5
GH
87 }
88 return "unknown token";
89}
82919855 90
0323ed4d
WC
91ostream&
92operator << (ostream& o, const source_loc& loc)
93{
a704a23b 94 o << loc.file->name << ":"
0323ed4d
WC
95 << loc.line << ":"
96 << loc.column;
97
98 return o;
99}
100
56099f08
FCE
101ostream&
102operator << (ostream& o, const token& t)
103{
d7f3e0c5 104 o << tt2str(t.type);
56099f08 105
6e213f58 106 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 107 {
24cb178f
FCE
108 o << " '";
109 for (unsigned i=0; i<t.content.length(); i++)
110 {
111 char c = t.content[i];
112 o << (isprint (c) ? c : '?');
113 }
114 o << "'";
56099f08 115 }
56099f08 116
dff50e09 117 o << " at "
0323ed4d 118 << t.location;
56099f08
FCE
119
120 return o;
121}
122
123
dff50e09 124void
2f1a1aea
FCE
125parser::print_error (const parse_error &pe)
126{
1b1b4ceb 127 string align_parse_error (" ");
2f1a1aea
FCE
128 cerr << "parse error: " << pe.what () << endl;
129
177a8ead
FCE
130 if (pe.tok)
131 {
132 cerr << "\tat: " << *pe.tok << endl;
1b1b4ceb 133 session.print_error_source (cerr, align_parse_error, pe.tok);
177a8ead 134 }
2f1a1aea 135 else
177a8ead
FCE
136 {
137 const token* t = last_t;
138 if (t)
1b1b4ceb
RA
139 {
140 cerr << "\tsaw: " << *t << endl;
141 session.print_error_source (cerr, align_parse_error, t);
142 }
177a8ead
FCE
143 else
144 cerr << "\tsaw: " << input_name << " EOF" << endl;
145 }
2f1a1aea
FCE
146
147 // XXX: make it possible to print the last input line,
148 // so as to line up an arrow with the specific error column
149
150 num_errors ++;
151}
152
153
dff50e09 154const token*
2f1a1aea
FCE
155parser::last ()
156{
157 return last_t;
158}
159
160
c434ec7e
FCE
161
162template <typename OPERAND>
163bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
164{
165 if (op->type == tok_operator && op->content == "<=")
166 { return lhs <= rhs; }
167 else if (op->type == tok_operator && op->content == ">=")
168 { return lhs >= rhs; }
169 else if (op->type == tok_operator && op->content == "<")
170 { return lhs < rhs; }
171 else if (op->type == tok_operator && op->content == ">")
172 { return lhs > rhs; }
173 else if (op->type == tok_operator && op->content == "==")
174 { return lhs == rhs; }
175 else if (op->type == tok_operator && op->content == "!=")
176 { return lhs != rhs; }
177 else
178 throw parse_error ("expected comparison operator", op);
179}
180
181
177a8ead
FCE
182// Here, we perform on-the-fly preprocessing.
183// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
184// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
185// or: arch COMPARISON-OP "arch-string"
5811366a
FCE
186// or: "string1" COMPARISON-OP "string2"
187// or: number1 COMPARISON-OP number2
44ce8ed5 188// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
189//
190// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
c434ec7e 191// e.g. %( arch != "i?86" %? "foo" %: "baz" %)
177a8ead
FCE
192//
193// Up to an entire %( ... %) expression is processed by a single call
194// to this function. Tokens included by any nested conditions are
195// enqueued in a private vector.
196
197bool eval_pp_conditional (systemtap_session& s,
198 const token* l, const token* op, const token* r)
199{
44ce8ed5
FCE
200 if (l->type == tok_identifier && (l->content == "kernel_v" ||
201 l->content == "kernel_vr"))
202 {
203 string target_kernel_vr = s.kernel_release;
197a4d62 204 string target_kernel_v = s.kernel_base_release;
dff50e09 205
44ce8ed5
FCE
206 if (! (r->type == tok_string))
207 throw parse_error ("expected string literal", r);
7a468d68 208
dff50e09 209 string target = (l->content == "kernel_vr" ?
7a468d68
FCE
210 target_kernel_vr.c_str() :
211 target_kernel_v.c_str());
212 string query = r->content;
213 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
214
44ce8ed5
FCE
215 // collect acceptable strverscmp results.
216 int rvc_ok1, rvc_ok2;
7a468d68 217 bool wc_ok = false;
44ce8ed5
FCE
218 if (op->type == tok_operator && op->content == "<=")
219 { rvc_ok1 = -1; rvc_ok2 = 0; }
220 else if (op->type == tok_operator && op->content == ">=")
221 { rvc_ok1 = 1; rvc_ok2 = 0; }
222 else if (op->type == tok_operator && op->content == "<")
223 { rvc_ok1 = -1; rvc_ok2 = -1; }
224 else if (op->type == tok_operator && op->content == ">")
225 { rvc_ok1 = 1; rvc_ok2 = 1; }
226 else if (op->type == tok_operator && op->content == "==")
7a468d68 227 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
44ce8ed5 228 else if (op->type == tok_operator && op->content == "!=")
7a468d68 229 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
44ce8ed5
FCE
230 else
231 throw parse_error ("expected comparison operator", op);
7a468d68
FCE
232
233 if ((!wc_ok) && rhs_wildcard)
234 throw parse_error ("wildcard not allowed with order comparison operators", op);
235
236 if (rhs_wildcard)
237 {
238 int rvc_result = fnmatch (query.c_str(), target.c_str(),
239 FNM_NOESCAPE); // spooky
240 bool badness = (rvc_result == 0) ^ (op->content == "==");
241 return !badness;
242 }
243 else
244 {
245 int rvc_result = strverscmp (target.c_str(), query.c_str());
246 // normalize rvc_result
247 if (rvc_result < 0) rvc_result = -1;
248 if (rvc_result > 0) rvc_result = 1;
249 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
250 }
44ce8ed5
FCE
251 }
252 else if (l->type == tok_identifier && l->content == "arch")
253 {
254 string target_architecture = s.architecture;
255 if (! (r->type == tok_string))
256 throw parse_error ("expected string literal", r);
257 string query_architecture = r->content;
dff50e09 258
7a468d68
FCE
259 int nomatch = fnmatch (query_architecture.c_str(),
260 target_architecture.c_str(),
261 FNM_NOESCAPE); // still spooky
262
44ce8ed5
FCE
263 bool result;
264 if (op->type == tok_operator && op->content == "==")
7a468d68 265 result = !nomatch;
44ce8ed5 266 else if (op->type == tok_operator && op->content == "!=")
7a468d68 267 result = nomatch;
44ce8ed5
FCE
268 else
269 throw parse_error ("expected '==' or '!='", op);
dff50e09 270
44ce8ed5 271 return result;
dff50e09 272 }
c434ec7e 273 else if (l->type == tok_string && r->type == tok_string)
5811366a 274 {
c434ec7e
FCE
275 string lhs = l->content;
276 string rhs = r->content;
277 return eval_comparison (lhs, op, rhs);
278 // NB: no wildcarding option here
279 }
280 else if (l->type == tok_number && r->type == tok_number)
281 {
282 int64_t lhs = lex_cast<int64_t>(l->content);
283 int64_t rhs = lex_cast<int64_t>(r->content);
284 return eval_comparison (lhs, op, rhs);
7a468d68 285 // NB: no wildcarding option here
5811366a
FCE
286 }
287 else if (l->type == tok_string && r->type == tok_number
288 && op->type == tok_operator)
289 throw parse_error ("expected string literal as right value", r);
290 else if (l->type == tok_number && r->type == tok_string
291 && op->type == tok_operator)
292 throw parse_error ("expected number literal as right value", r);
c434ec7e 293
44ce8ed5 294 // XXX: support other forms? "CONFIG_SMP" ?
c434ec7e 295
177a8ead 296 else
5811366a
FCE
297 throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr'\n"
298 " or comparison between strings or integers", l);
177a8ead
FCE
299}
300
301
5811366a 302// Only tokens corresponding to the TRUE statement must be expanded
177a8ead 303const token*
3f847830 304parser::scan_pp (bool wildcard)
177a8ead
FCE
305{
306 while (true)
307 {
308 if (enqueued_pp.size() > 0)
309 {
310 const token* t = enqueued_pp[0];
311 enqueued_pp.erase (enqueued_pp.begin());
312 return t;
313 }
314
3f847830 315 const token* t = input.scan (wildcard); // NB: not recursive!
177a8ead
FCE
316 if (t == 0) // EOF
317 return t;
dff50e09 318
177a8ead
FCE
319 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
320 return t;
321
322 // We have a %( - it's time to throw a preprocessing party!
323
2d7881bf
PP
324 bool result = false;
325 bool and_result = true;
326 const token *n = NULL;
327 do {
328 const token *l, *op, *r;
329 l = input.scan (false); // NB: not recursive, though perhaps could be
330 op = input.scan (false);
331 r = input.scan (false);
332 if (l == 0 || op == 0 || r == 0)
333 throw parse_error ("incomplete condition after '%('", t);
334 // NB: consider generalizing to consume all tokens until %?, and
335 // passing that as a vector to an evaluator.
336
337 // Do not evaluate the condition if we haven't expanded everything.
338 // This may occur when having several recursive conditionals.
339 and_result &= eval_pp_conditional (session, l, op, r);
340 delete l;
341 delete op;
342 delete r;
343 delete n;
344
345 n = input.scan ();
346 if (n && n->type == tok_operator && n->content == "&&")
347 continue;
348 result |= and_result;
349 and_result = true;
350 if (! (n && n->type == tok_operator && n->content == "||"))
351 break;
352 } while (true);
3f847830
FCE
353
354 /*
355 clog << "PP eval (" << *t << ") == " << result << endl;
356 */
357
2d7881bf 358 const token *m = n; // NB: not recursive
177a8ead
FCE
359 if (! (m && m->type == tok_operator && m->content == "%?"))
360 throw parse_error ("expected '%?' marker for conditional", t);
70c743d8 361 delete m; // "%?"
177a8ead
FCE
362
363 vector<const token*> my_enqueued_pp;
3f847830
FCE
364
365 int nesting = 0;
177a8ead
FCE
366 while (true) // consume THEN tokens
367 {
3f847830
FCE
368 try
369 {
370 m = result ? scan_pp (wildcard) : input.scan (wildcard);
371 }
372 catch (const parse_error &e)
373 {
dff50e09 374 if (result) throw e; // propagate errors if THEN branch taken
d57671d3 375 continue;
3f847830
FCE
376 }
377
378 if (m && m->type == tok_operator && m->content == "%(") // nested %(
379 nesting ++;
380 if (nesting == 0 && m && (m->type == tok_operator && (m->content == "%:" || // ELSE
381 m->content == "%)"))) // END
177a8ead 382 break;
3f847830
FCE
383 if (nesting && m && m->type == tok_operator && m->content == "%)") // nested %)
384 nesting --;
385
d57671d3
FCE
386 if (!m)
387 throw parse_error ("incomplete conditional - missing '%:' or '%)'", t);
388 if (result)
177a8ead 389 my_enqueued_pp.push_back (m);
d57671d3 390 if (!result)
3f847830
FCE
391 delete m; // do nothing, just dispose of unkept THEN token
392
393 continue;
177a8ead 394 }
dff50e09 395
177a8ead 396 if (m && m->type == tok_operator && m->content == "%:") // ELSE
70c743d8
JS
397 {
398 delete m; // "%:"
3f847830 399 int nesting = 0;
70c743d8
JS
400 while (true)
401 {
3f847830
FCE
402 try
403 {
404 m = result ? input.scan (wildcard) : scan_pp (wildcard);
dff50e09 405 }
3f847830
FCE
406 catch (const parse_error& e)
407 {
dff50e09 408 if (!result) throw e; // propagate errors if ELSE branch taken
d57671d3 409 continue;
3f847830
FCE
410 }
411
412 if (m && m->type == tok_operator && m->content == "%(") // nested %(
413 nesting ++;
414 if (nesting == 0 && m && m->type == tok_operator && m->content == "%)") // END
70c743d8 415 break;
3f847830
FCE
416 if (nesting && m && m->type == tok_operator && m->content == "%)") // nested %)
417 nesting --;
418
d57671d3 419 if (!m)
3f847830 420 throw parse_error ("incomplete conditional - missing %)", t);
d57671d3 421 if (!result)
dff50e09 422 my_enqueued_pp.push_back (m);
d57671d3 423 if (result)
3f847830
FCE
424 delete m; // do nothing, just dispose of unkept ELSE token
425
426 continue;
70c743d8
JS
427 }
428 }
3f847830
FCE
429
430 /*
431 clog << "PP eval (" << *t << ") == " << result << " tokens: " << endl;
432 for (unsigned k=0; k<my_enqueued_pp.size(); k++)
433 clog << * my_enqueued_pp[k] << endl;
434 clog << endl;
435 */
436
70c743d8
JS
437 delete t; // "%("
438 delete m; // "%)"
177a8ead 439
3f847830 440
177a8ead
FCE
441 // NB: we transcribe the retained tokens here, and not inside
442 // the THEN/ELSE while loops. If it were done there, each loop
443 // would become infinite (each iteration consuming an ordinary
444 // token the previous one just pushed there). Guess how I
445 // figured that out.
446 enqueued_pp.insert (enqueued_pp.end(),
447 my_enqueued_pp.begin(),
448 my_enqueued_pp.end());
449
450 // Go back to outermost while(true) loop. We hope that at least
451 // some THEN or ELSE tokens were enqueued. If not, around we go
452 // again, until EOF.
453 }
454}
455
456
2f1a1aea 457const token*
0c218afb 458parser::next (bool wildcard)
2f1a1aea
FCE
459{
460 if (! next_t)
0c218afb 461 next_t = scan_pp (wildcard);
2f1a1aea
FCE
462 if (! next_t)
463 throw parse_error ("unexpected end-of-file");
464
2f1a1aea
FCE
465 last_t = next_t;
466 // advance by zeroing next_t
467 next_t = 0;
468 return last_t;
469}
470
471
472const token*
0c218afb 473parser::peek (bool wildcard)
2f1a1aea
FCE
474{
475 if (! next_t)
0c218afb 476 next_t = scan_pp (wildcard);
2f1a1aea
FCE
477
478 // don't advance by zeroing next_t
479 last_t = next_t;
480 return next_t;
481}
482
483
d7f3e0c5
GH
484static inline bool
485tok_is(token const * t, token_type tt, string const & expected)
486{
487 return t && t->type == tt && t->content == expected;
488}
489
490
dff50e09 491const token*
d7f3e0c5
GH
492parser::expect_known (token_type tt, string const & expected)
493{
494 const token *t = next();
57b73400 495 if (! (t && t->type == tt && t->content == expected))
d7f3e0c5
GH
496 throw parse_error ("expected '" + expected + "'");
497 return t;
498}
499
500
dff50e09 501const token*
d7f3e0c5
GH
502parser::expect_unknown (token_type tt, string & target)
503{
504 const token *t = next();
505 if (!(t && t->type == tt))
506 throw parse_error ("expected " + tt2str(tt));
507 target = t->content;
508 return t;
509}
510
511
dff50e09 512const token*
493ee224
DS
513parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
514{
515 const token *t = next();
516 if (!(t && (t->type == tt1 || t->type == tt2)))
517 throw parse_error ("expected " + tt2str(tt1) + " or " + tt2str(tt2));
518 target = t->content;
519 return t;
520}
521
522
dff50e09 523const token*
d7f3e0c5
GH
524parser::expect_op (std::string const & expected)
525{
526 return expect_known (tok_operator, expected);
527}
528
529
dff50e09 530const token*
d7f3e0c5
GH
531parser::expect_kw (std::string const & expected)
532{
533 return expect_known (tok_identifier, expected);
534}
535
dff50e09 536const token*
e38723d2 537parser::expect_number (int64_t & value)
57b73400 538{
e38723d2
MH
539 bool neg = false;
540 const token *t = next();
541 if (t->type == tok_operator && t->content == "-")
542 {
543 neg = true;
544 t = next ();
545 }
546 if (!(t && t->type == tok_number))
547 throw parse_error ("expected number");
548
549 const char* startp = t->content.c_str ();
550 char* endp = (char*) startp;
551
552 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
553 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
554 // since the lexer only gives us positive digit strings, but we'll
555 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
556 errno = 0;
557 value = (int64_t) strtoull (startp, & endp, 0);
558 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
559 || (neg && (unsigned long long) value > 9223372036854775808ULL)
560 || (unsigned long long) value > 18446744073709551615ULL
561 || value < -9223372036854775807LL-1)
dff50e09
FCE
562 throw parse_error ("number invalid or out of range");
563
e38723d2
MH
564 if (neg)
565 value = -value;
566
567 return t;
57b73400
GH
568}
569
d7f3e0c5 570
dff50e09 571const token*
d7f3e0c5
GH
572parser::expect_ident (std::string & target)
573{
574 return expect_unknown (tok_identifier, target);
575}
576
577
dff50e09 578const token*
493ee224
DS
579parser::expect_ident_or_keyword (std::string & target)
580{
581 return expect_unknown2 (tok_identifier, tok_keyword, target);
582}
583
584
dff50e09 585bool
d7f3e0c5
GH
586parser::peek_op (std::string const & op)
587{
588 return tok_is (peek(), tok_operator, op);
589}
590
591
dff50e09 592bool
d7f3e0c5
GH
593parser::peek_kw (std::string const & kw)
594{
595 return tok_is (peek(), tok_identifier, kw);
596}
597
598
599
66c7d4c1 600lexer::lexer (istream& input, const string& in, systemtap_session& s):
2203b032 601 input_name (in), input_pointer (0), input_end (0),
66c7d4c1
JS
602 cursor_suspend_count(0), cursor_line (1), cursor_column (1),
603 session(s), current_file (0)
eacb10ce 604{
66c7d4c1 605 getline(input, input_contents, '\0');
2203b032 606
66c7d4c1
JS
607 input_pointer = input_contents.data();
608 input_end = input_contents.data() + input_contents.size();
609
610 if (keywords.empty())
611 {
612 keywords.insert("probe");
613 keywords.insert("global");
614 keywords.insert("function");
615 keywords.insert("if");
616 keywords.insert("else");
617 keywords.insert("for");
618 keywords.insert("foreach");
619 keywords.insert("in");
620 keywords.insert("limit");
621 keywords.insert("return");
622 keywords.insert("delete");
623 keywords.insert("while");
624 keywords.insert("break");
625 keywords.insert("continue");
626 keywords.insert("next");
627 keywords.insert("string");
628 keywords.insert("long");
629 }
eacb10ce 630}
2f1a1aea 631
66c7d4c1
JS
632set<string> lexer::keywords;
633
1b1b4ceb
RA
634void
635lexer::set_current_file (stapfile* f)
636{
637 current_file = f;
2203b032
JS
638 if (f)
639 {
640 f->file_contents = input_contents;
641 f->name = input_name;
642 }
1b1b4ceb 643}
bb2e3076
FCE
644
645int
646lexer::input_peek (unsigned n)
647{
66c7d4c1
JS
648 if (input_pointer + n >= input_end)
649 return -1; // EOF
650 return (unsigned char)*(input_pointer + n);
bb2e3076
FCE
651}
652
653
dff50e09 654int
2f1a1aea
FCE
655lexer::input_get ()
656{
66c7d4c1 657 int c = input_peek();
bb2e3076
FCE
658 if (c < 0) return c; // EOF
659
66c7d4c1
JS
660 ++input_pointer;
661
3f99432c
FCE
662 if (cursor_suspend_count)
663 // Track effect of input_put: preserve previous cursor/line_column
664 // until all of its characters are consumed.
665 cursor_suspend_count --;
666 else
2f1a1aea 667 {
3f99432c
FCE
668 // update source cursor
669 if (c == '\n')
670 {
671 cursor_line ++;
672 cursor_column = 1;
673 }
674 else
675 cursor_column ++;
2f1a1aea 676 }
2f1a1aea 677
eacb10ce 678 // clog << "[" << (char)c << "]";
2f1a1aea
FCE
679 return c;
680}
681
682
3f99432c
FCE
683void
684lexer::input_put (const string& chars)
685{
66c7d4c1
JS
686 size_t pos = input_pointer - input_contents.data();
687 // clog << "[put:" << chars << " @" << pos << "]";
688 input_contents.insert (pos, chars);
eacb10ce 689 cursor_suspend_count += chars.size();
66c7d4c1
JS
690 input_pointer = input_contents.data() + pos;
691 input_end = input_contents.data() + input_contents.size();
3f99432c
FCE
692}
693
694
2f1a1aea 695token*
3f847830 696lexer::scan (bool wildcard)
2f1a1aea
FCE
697{
698 token* n = new token;
2203b032 699 n->location.file = current_file;
2f1a1aea 700
3f99432c
FCE
701 unsigned semiskipped_p = 0;
702
2f1a1aea
FCE
703 skip:
704 n->location.line = cursor_line;
705 n->location.column = cursor_column;
706
3f99432c
FCE
707 semiskip:
708 if (semiskipped_p > 1)
709 {
710 input_get ();
711 throw parse_error ("invalid nested substitution of command line arguments");
712 }
713
2f1a1aea 714 int c = input_get();
3f99432c 715 // clog << "{" << (char)c << (char)c2 << "}";
2f1a1aea
FCE
716 if (c < 0)
717 {
718 delete n;
719 return 0;
720 }
721
722 if (isspace (c))
723 goto skip;
724
66c7d4c1
JS
725 int c2 = input_peek ();
726
3f99432c
FCE
727 // Paste command line arguments as character streams into
728 // the beginning of a token. $1..$999 go through as raw
729 // characters; @1..@999 are quoted/escaped as strings.
730 // $# and @# expand to the number of arguments, similarly
731 // raw or quoted.
3f847830 732 if ((c == '$' || c == '@') &&
3f99432c
FCE
733 (c2 == '#'))
734 {
735 input_get(); // swallow '#'
736 stringstream converter;
737 converter << session.args.size ();
738 if (c == '$') input_put (converter.str());
739 else input_put (lex_cast_qstring (converter.str()));
740 semiskipped_p ++;
741 goto semiskip;
742 }
3f847830 743 else if ((c == '$' || c == '@') &&
3f99432c
FCE
744 (isdigit (c2)))
745 {
746 unsigned idx = 0;
747 do
748 {
749 input_get ();
750 idx = (idx * 10) + (c2 - '0');
751 c2 = input_peek ();
752 } while (c2 > 0 &&
dff50e09 753 isdigit (c2) &&
3f99432c
FCE
754 idx <= session.args.size()); // prevent overflow
755 if (idx == 0 ||
756 idx-1 >= session.args.size())
aca66a36
JS
757 throw parse_error ("command line argument index " + lex_cast(idx)
758 + " out of range [1-" + lex_cast(session.args.size()) + "]", n);
3f99432c
FCE
759 string arg = session.args[idx-1];
760 if (c == '$') input_put (arg);
761 else input_put (lex_cast_qstring (arg));
762 semiskipped_p ++;
763 goto semiskip;
764 }
765
0c218afb
MH
766 else if (isalpha (c) || c == '$' || c == '@' || c == '_' ||
767 (wildcard && c == '*'))
2f1a1aea
FCE
768 {
769 n->type = tok_identifier;
770 n->content = (char) c;
0c218afb
MH
771 while (isalnum (c2) || c2 == '_' || c2 == '$' ||
772 (wildcard && c2 == '*'))
2f1a1aea 773 {
3f99432c
FCE
774 input_get ();
775 n->content.push_back (c2);
776 c2 = input_peek ();
6e213f58 777 }
213bee8f 778
66c7d4c1 779 if (keywords.count(n->content))
3f99432c 780 n->type = tok_keyword;
dff50e09 781
2f1a1aea
FCE
782 return n;
783 }
784
3a20432b 785 else if (isdigit (c)) // positive literal
2f1a1aea 786 {
2f1a1aea 787 n->type = tok_number;
9c0c0e46
FCE
788 n->content = (char) c;
789
66c7d4c1 790 while (isalnum (c2))
2f1a1aea 791 {
9c0c0e46
FCE
792 // NB: isalnum is very permissive. We rely on strtol, called in
793 // parser::parse_literal below, to confirm that the number string
794 // is correctly formatted and in range.
795
66c7d4c1
JS
796 input_get ();
797 n->content.push_back (c2);
798 c2 = input_peek ();
2f1a1aea
FCE
799 }
800 return n;
801 }
802
803 else if (c == '\"')
804 {
805 n->type = tok_string;
806 while (1)
807 {
808 c = input_get ();
809
3f99432c 810 if (c < 0 || c == '\n')
2f1a1aea
FCE
811 {
812 n->type = tok_junk;
813 break;
814 }
815 if (c == '\"') // closing double-quotes
816 break;
3f99432c 817 else if (c == '\\') // see also input_put
dff50e09 818 {
7d46afb8
GH
819 c = input_get ();
820 switch (c)
821 {
822 case 'a':
823 case 'b':
824 case 't':
825 case 'n':
826 case 'v':
827 case 'f':
828 case 'r':
f03954fd 829 case '0' ... '7': // NB: need only match the first digit
7d46afb8 830 case '\\':
7d46afb8 831 // Pass these escapes through to the string value
dff50e09 832 // being parsed; it will be emitted into a C literal.
7d46afb8
GH
833
834 n->content.push_back('\\');
835
3f99432c 836 // fall through
7d46afb8 837 default:
7d46afb8
GH
838 n->content.push_back(c);
839 break;
840 }
2f1a1aea
FCE
841 }
842 else
843 n->content.push_back(c);
844 }
845 return n;
846 }
847
848 else if (ispunct (c))
849 {
bb2e3076 850 int c3 = input_peek (1);
2f1a1aea 851
3a20432b
FCE
852 // NB: if we were to recognize negative numeric literals here,
853 // we'd introduce another grammar ambiguity:
854 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
855 // instead of tok_number(1) tok_operator('-') tok_number(1)
856
66c7d4c1 857 if (c == '#') // shell comment
2f1a1aea
FCE
858 {
859 unsigned this_line = cursor_line;
bb2e3076
FCE
860 do { c = input_get (); }
861 while (c >= 0 && cursor_line == this_line);
2f1a1aea
FCE
862 goto skip;
863 }
66c7d4c1 864 else if ((c == '/' && c2 == '/')) // C++ comment
63a7c90e
FCE
865 {
866 unsigned this_line = cursor_line;
bb2e3076
FCE
867 do { c = input_get (); }
868 while (c >= 0 && cursor_line == this_line);
63a7c90e
FCE
869 goto skip;
870 }
871 else if (c == '/' && c2 == '*') // C comment
872 {
66c7d4c1
JS
873 (void) input_get (); // swallow '*' already in c2
874 c = input_get ();
63a7c90e 875 c2 = input_get ();
bb2e3076 876 while (c2 >= 0)
63a7c90e 877 {
66c7d4c1
JS
878 if (c == '*' && c2 == '/')
879 break;
63a7c90e
FCE
880 c = c2;
881 c2 = input_get ();
63a7c90e 882 }
bb2e3076 883 goto skip;
63a7c90e 884 }
54dfabe9
FCE
885 else if (c == '%' && c2 == '{') // embedded code
886 {
887 n->type = tok_embedded;
888 (void) input_get (); // swallow '{' already in c2
66c7d4c1
JS
889 c = input_get ();
890 c2 = input_get ();
891 while (c2 >= 0)
54dfabe9 892 {
66c7d4c1
JS
893 if (c == '%' && c2 == '}')
894 return n;
54dfabe9 895 n->content += c;
66c7d4c1
JS
896 c = c2;
897 c2 = input_get ();
54dfabe9 898 }
66c7d4c1 899 n->type = tok_junk;
54dfabe9
FCE
900 return n;
901 }
2f1a1aea 902
bb2e3076
FCE
903 // We're committed to recognizing at least the first character
904 // as an operator.
2f1a1aea 905 n->type = tok_operator;
66c7d4c1 906 n->content = c;
2f1a1aea 907
bb2e3076 908 // match all valid operators, in decreasing size order
66c7d4c1
JS
909 if ((c == '<' && c2 == '<' && c3 == '<') ||
910 (c == '<' && c2 == '<' && c3 == '=') ||
911 (c == '>' && c2 == '>' && c3 == '='))
82919855 912 {
66c7d4c1
JS
913 n->content += c2;
914 n->content += c3;
bb2e3076
FCE
915 input_get (); input_get (); // swallow other two characters
916 }
66c7d4c1
JS
917 else if ((c == '=' && c2 == '=') ||
918 (c == '!' && c2 == '=') ||
919 (c == '<' && c2 == '=') ||
920 (c == '>' && c2 == '=') ||
921 (c == '+' && c2 == '=') ||
922 (c == '-' && c2 == '=') ||
923 (c == '*' && c2 == '=') ||
924 (c == '/' && c2 == '=') ||
925 (c == '%' && c2 == '=') ||
926 (c == '&' && c2 == '=') ||
927 (c == '^' && c2 == '=') ||
928 (c == '|' && c2 == '=') ||
929 (c == '.' && c2 == '=') ||
930 (c == '&' && c2 == '&') ||
931 (c == '|' && c2 == '|') ||
932 (c == '+' && c2 == '+') ||
933 (c == '-' && c2 == '-') ||
934 (c == '-' && c2 == '>') ||
935 (c == '<' && c2 == '<') ||
936 (c == '>' && c2 == '>') ||
177a8ead 937 // preprocessor tokens
66c7d4c1
JS
938 (c == '%' && c2 == '(') ||
939 (c == '%' && c2 == '?') ||
940 (c == '%' && c2 == ':') ||
941 (c == '%' && c2 == ')'))
bb2e3076 942 {
66c7d4c1 943 n->content += c2;
bb2e3076 944 input_get (); // swallow other character
dff50e09 945 }
2f1a1aea
FCE
946
947 return n;
948 }
949
950 else
951 {
952 n->type = tok_junk;
953 n->content = (char) c;
954 return n;
955 }
956}
957
958
959// ------------------------------------------------------------------------
960
961stapfile*
962parser::parse ()
963{
964 stapfile* f = new stapfile;
1b1b4ceb 965 input.set_current_file (f);
56099f08
FCE
966
967 bool empty = true;
968
2f1a1aea
FCE
969 while (1)
970 {
971 try
972 {
973 const token* t = peek ();
56099f08 974 if (! t) // nice clean EOF
2f1a1aea
FCE
975 break;
976
56099f08 977 empty = false;
6e213f58
DS
978 if (t->type == tok_keyword && t->content == "probe")
979 {
980 context = con_probe;
981 parse_probe (f->probes, f->aliases);
982 }
983 else if (t->type == tok_keyword && t->content == "global")
984 {
985 context = con_global;
4b5f3e45 986 parse_global (f->globals, f->probes);
6e213f58
DS
987 }
988 else if (t->type == tok_keyword && t->content == "function")
989 {
990 context = con_function;
991 parse_functiondecl (f->functions);
992 }
54dfabe9 993 else if (t->type == tok_embedded)
6e213f58
DS
994 {
995 context = con_embedded;
996 f->embeds.push_back (parse_embeddedcode ());
997 }
2f1a1aea 998 else
6e213f58
DS
999 {
1000 context = con_unknown;
1001 throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
1002 }
2f1a1aea
FCE
1003 }
1004 catch (parse_error& pe)
1005 {
1006 print_error (pe);
cd7116b8 1007 if (pe.skip_some) // for recovery
dff50e09 1008 try
cd7116b8
FCE
1009 {
1010 // Quietly swallow all tokens until the next '}'.
1011 while (1)
1012 {
1013 const token* t = peek ();
1014 if (! t)
1015 break;
1016 next ();
1017 if (t->type == tok_operator && t->content == "}")
1018 break;
1019 }
1020 }
1021 catch (parse_error& pe2)
1022 {
1023 // parse error during recovery ... ugh
1024 print_error (pe2);
1025 }
177a8ead 1026 }
2f1a1aea
FCE
1027 }
1028
56099f08
FCE
1029 if (empty)
1030 {
1031 cerr << "Input file '" << input_name << "' is empty or missing." << endl;
1032 delete f;
2203b032 1033 f = 0;
56099f08
FCE
1034 }
1035 else if (num_errors > 0)
2f1a1aea
FCE
1036 {
1037 cerr << num_errors << " parse error(s)." << endl;
1038 delete f;
2203b032 1039 f = 0;
2f1a1aea 1040 }
dff50e09 1041
2203b032 1042 input.set_current_file(0);
2f1a1aea
FCE
1043 return f;
1044}
1045
1046
20c6c071 1047void
54dfabe9
FCE
1048parser::parse_probe (std::vector<probe *> & probe_ret,
1049 std::vector<probe_alias *> & alias_ret)
2f1a1aea 1050{
82919855 1051 const token* t0 = next ();
6e213f58 1052 if (! (t0->type == tok_keyword && t0->content == "probe"))
82919855
FCE
1053 throw parse_error ("expected 'probe'");
1054
20c6c071
GH
1055 vector<probe_point *> aliases;
1056 vector<probe_point *> locations;
1057
1058 bool equals_ok = true;
82919855 1059
97266278
LG
1060 int epilogue_alias = 0;
1061
2f1a1aea
FCE
1062 while (1)
1063 {
b4ceace2 1064 probe_point * pp = parse_probe_point ();
dff50e09 1065
b4ceace2 1066 const token* t = peek ();
dff50e09 1067 if (equals_ok && t
b4ceace2
FCE
1068 && t->type == tok_operator && t->content == "=")
1069 {
1ad820e3
FCE
1070 if (pp->optional || pp->sufficient)
1071 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->tok);
b4ceace2
FCE
1072 aliases.push_back(pp);
1073 next ();
1074 continue;
1075 }
dff50e09 1076 else if (equals_ok && t
97266278
LG
1077 && t->type == tok_operator && t->content == "+=")
1078 {
1ad820e3
FCE
1079 if (pp->optional || pp->sufficient)
1080 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->tok);
97266278
LG
1081 aliases.push_back(pp);
1082 epilogue_alias = 1;
1083 next ();
1084 continue;
1085 }
b4ceace2
FCE
1086 else if (t && t->type == tok_operator && t->content == ",")
1087 {
1088 locations.push_back(pp);
1089 equals_ok = false;
1090 next ();
1091 continue;
1092 }
1093 else if (t && t->type == tok_operator && t->content == "{")
1094 {
1095 locations.push_back(pp);
1096 break;
1097 }
2f1a1aea 1098 else
9c0c0e46 1099 throw parse_error ("expected probe point specifier");
2f1a1aea 1100 }
20c6c071 1101
20c6c071
GH
1102 if (aliases.empty())
1103 {
54dfabe9
FCE
1104 probe* p = new probe;
1105 p->tok = t0;
1106 p->locations = locations;
1107 p->body = parse_stmt_block ();
37ebca01 1108 p->privileged = privileged;
54dfabe9 1109 probe_ret.push_back (p);
20c6c071
GH
1110 }
1111 else
1112 {
54dfabe9 1113 probe_alias* p = new probe_alias (aliases);
97266278
LG
1114 if(epilogue_alias)
1115 p->epilogue_style = true;
1116 else
1117 p->epilogue_style = false;
54dfabe9
FCE
1118 p->tok = t0;
1119 p->locations = locations;
1120 p->body = parse_stmt_block ();
37ebca01 1121 p->privileged = privileged;
54dfabe9 1122 alias_ret.push_back (p);
20c6c071 1123 }
54dfabe9 1124}
20c6c071 1125
54dfabe9
FCE
1126
1127embeddedcode*
1128parser::parse_embeddedcode ()
1129{
1130 embeddedcode* e = new embeddedcode;
1131 const token* t = next ();
1132 if (t->type != tok_embedded)
24cb178f
FCE
1133 throw parse_error ("expected '%{'");
1134
1135 if (! privileged)
cd7116b8
FCE
1136 throw parse_error ("embedded code in unprivileged script",
1137 false /* don't skip tokens for parse resumption */);
54dfabe9
FCE
1138
1139 e->tok = t;
1140 e->code = t->content;
1141 return e;
2f1a1aea
FCE
1142}
1143
1144
1145block*
56099f08 1146parser::parse_stmt_block ()
2f1a1aea
FCE
1147{
1148 block* pb = new block;
1149
56099f08
FCE
1150 const token* t = next ();
1151 if (! (t->type == tok_operator && t->content == "{"))
1152 throw parse_error ("expected '{'");
1153
1154 pb->tok = t;
2b066ec1 1155
2f1a1aea
FCE
1156 while (1)
1157 {
1158 try
1159 {
2b066ec1
FCE
1160 t = peek ();
1161 if (t && t->type == tok_operator && t->content == "}")
1162 {
1163 next ();
1164 break;
1165 }
1166
2f1a1aea 1167 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
1168 }
1169 catch (parse_error& pe)
1170 {
1171 print_error (pe);
54dfabe9 1172
2f1a1aea
FCE
1173 // Quietly swallow all tokens until the next ';' or '}'.
1174 while (1)
1175 {
1176 const token* t = peek ();
54dfabe9 1177 if (! t) return 0;
2f1a1aea 1178 next ();
54dfabe9
FCE
1179 if (t->type == tok_operator
1180 && (t->content == "}" || t->content == ";"))
2f1a1aea
FCE
1181 break;
1182 }
1183 }
1184 }
1185
1186 return pb;
1187}
1188
1189
1190statement*
1191parser::parse_statement ()
1192{
1193 const token* t = peek ();
1194 if (t && t->type == tok_operator && t->content == ";")
1195 {
69c68955
FCE
1196 null_statement* n = new null_statement ();
1197 n->tok = next ();
1198 return n;
2f1a1aea 1199 }
dff50e09 1200 else if (t && t->type == tok_operator && t->content == "{")
56099f08 1201 return parse_stmt_block ();
6e213f58 1202 else if (t && t->type == tok_keyword && t->content == "if")
56099f08 1203 return parse_if_statement ();
6e213f58 1204 else if (t && t->type == tok_keyword && t->content == "for")
69c68955 1205 return parse_for_loop ();
6e213f58 1206 else if (t && t->type == tok_keyword && t->content == "foreach")
69c68955 1207 return parse_foreach_loop ();
6e213f58 1208 else if (t && t->type == tok_keyword && t->content == "return")
56099f08 1209 return parse_return_statement ();
6e213f58 1210 else if (t && t->type == tok_keyword && t->content == "delete")
56099f08 1211 return parse_delete_statement ();
6e213f58 1212 else if (t && t->type == tok_keyword && t->content == "while")
f3c26ea5 1213 return parse_while_loop ();
6e213f58 1214 else if (t && t->type == tok_keyword && t->content == "break")
f3c26ea5 1215 return parse_break_statement ();
6e213f58 1216 else if (t && t->type == tok_keyword && t->content == "continue")
f3c26ea5 1217 return parse_continue_statement ();
6e213f58 1218 else if (t && t->type == tok_keyword && t->content == "next")
f3c26ea5
FCE
1219 return parse_next_statement ();
1220 // XXX: "do/while" statement?
2f1a1aea
FCE
1221 else if (t && (t->type == tok_operator || // expressions are flexible
1222 t->type == tok_identifier ||
1223 t->type == tok_number ||
1224 t->type == tok_string))
69c68955 1225 return parse_expr_statement ();
54dfabe9 1226 // XXX: consider generally accepting tok_embedded here too
2f1a1aea
FCE
1227 else
1228 throw parse_error ("expected statement");
1229}
1230
1231
56099f08 1232void
78f6bba6 1233parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
2f1a1aea 1234{
82919855 1235 const token* t0 = next ();
6e213f58 1236 if (! (t0->type == tok_keyword && t0->content == "global"))
82919855
FCE
1237 throw parse_error ("expected 'global'");
1238
56099f08
FCE
1239 while (1)
1240 {
1241 const token* t = next ();
1242 if (! (t->type == tok_identifier))
1243 throw parse_error ("expected identifier");
1244
2b066ec1
FCE
1245 for (unsigned i=0; i<globals.size(); i++)
1246 if (globals[i]->name == t->content)
57b73400 1247 throw parse_error ("duplicate global name");
dff50e09 1248
24cb178f
FCE
1249 vardecl* d = new vardecl;
1250 d->name = t->content;
1251 d->tok = t;
1252 globals.push_back (d);
56099f08 1253
82919855 1254 t = peek ();
ef474d24
JS
1255
1256 if (t && t->type == tok_operator && t->content == "[") // array size
1257 {
1258 int64_t size;
1259 next ();
1260 expect_number(size);
1261 if (size <= 0 || size > 1000000) // arbitrary max
1262 throw parse_error("array size out of range");
1263 d->maxsize = (int)size;
1264 expect_known(tok_operator, "]");
1265 t = peek ();
1266 }
1267
4b5f3e45 1268 if (t && t->type == tok_operator && t->content == "=") // initialization
ef474d24
JS
1269 {
1270 if (!d->compatible_arity(0))
1271 throw parse_error("only scalar globals can be initialized");
1272 d->set_arity(0);
1273 next ();
1274 d->init = parse_literal ();
1275 d->type = d->init->type;
1276 t = peek ();
1277 }
4b5f3e45 1278
c3799d72
AM
1279 if (t && t->type == tok_operator && t->content == ";") // termination
1280 next();
1281
4b5f3e45 1282 if (t && t->type == tok_operator && t->content == ",") // next global
82919855
FCE
1283 {
1284 next ();
1285 continue;
1286 }
56099f08 1287 else
82919855 1288 break;
56099f08
FCE
1289 }
1290}
1291
1292
24cb178f
FCE
1293void
1294parser::parse_functiondecl (std::vector<functiondecl*>& functions)
56099f08 1295{
82919855 1296 const token* t = next ();
6e213f58 1297 if (! (t->type == tok_keyword && t->content == "function"))
82919855
FCE
1298 throw parse_error ("expected 'function'");
1299
56099f08 1300
82919855 1301 t = next ();
6e213f58
DS
1302 if (! (t->type == tok_identifier)
1303 && ! (t->type == tok_keyword
1304 && (t->content == "string" || t->content == "long")))
56099f08 1305 throw parse_error ("expected identifier");
24cb178f
FCE
1306
1307 for (unsigned i=0; i<functions.size(); i++)
1308 if (functions[i]->name == t->content)
1309 throw parse_error ("duplicate function name");
1310
1311 functiondecl *fd = new functiondecl ();
56099f08
FCE
1312 fd->name = t->content;
1313 fd->tok = t;
1314
1315 t = next ();
6a505121
FCE
1316 if (t->type == tok_operator && t->content == ":")
1317 {
1318 t = next ();
6e213f58 1319 if (t->type == tok_keyword && t->content == "string")
6a505121 1320 fd->type = pe_string;
6e213f58 1321 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1322 fd->type = pe_long;
1323 else throw parse_error ("expected 'string' or 'long'");
1324
1325 t = next ();
1326 }
1327
56099f08
FCE
1328 if (! (t->type == tok_operator && t->content == "("))
1329 throw parse_error ("expected '('");
1330
1331 while (1)
1332 {
1333 t = next ();
1334
1335 // permit zero-argument fuctions
1336 if (t->type == tok_operator && t->content == ")")
1337 break;
1338 else if (! (t->type == tok_identifier))
1339 throw parse_error ("expected identifier");
1340 vardecl* vd = new vardecl;
1341 vd->name = t->content;
1342 vd->tok = t;
1343 fd->formal_args.push_back (vd);
1344
1345 t = next ();
6a505121
FCE
1346 if (t->type == tok_operator && t->content == ":")
1347 {
1348 t = next ();
6e213f58 1349 if (t->type == tok_keyword && t->content == "string")
6a505121 1350 vd->type = pe_string;
6e213f58 1351 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1352 vd->type = pe_long;
1353 else throw parse_error ("expected 'string' or 'long'");
dff50e09 1354
6a505121
FCE
1355 t = next ();
1356 }
56099f08
FCE
1357 if (t->type == tok_operator && t->content == ")")
1358 break;
1359 if (t->type == tok_operator && t->content == ",")
1360 continue;
1361 else
1362 throw parse_error ("expected ',' or ')'");
1363 }
1364
54dfabe9
FCE
1365 t = peek ();
1366 if (t && t->type == tok_embedded)
1367 fd->body = parse_embeddedcode ();
1368 else
1369 fd->body = parse_stmt_block ();
24cb178f
FCE
1370
1371 functions.push_back (fd);
2f1a1aea
FCE
1372}
1373
1374
9c0c0e46
FCE
1375probe_point*
1376parser::parse_probe_point ()
2f1a1aea 1377{
9c0c0e46 1378 probe_point* pl = new probe_point;
2f1a1aea 1379
9c0c0e46 1380 while (1)
2f1a1aea 1381 {
0c218afb 1382 const token* t = next (true); // wildcard scanning here
6e213f58
DS
1383 if (! (t->type == tok_identifier
1384 // we must allow ".return" and ".function", which are keywords
0c218afb 1385 || t->type == tok_keyword))
b4ceace2 1386 throw parse_error ("expected identifier or '*'");
9c0c0e46
FCE
1387
1388 if (pl->tok == 0) pl->tok = t;
1389
1390 probe_point::component* c = new probe_point::component;
1391 c->functor = t->content;
1392 pl->components.push_back (c);
6e3347a9 1393 // NB we may add c->arg soon
9c0c0e46
FCE
1394
1395 t = peek ();
a477f3f1 1396
6e3347a9 1397 // consume optional parameter
9c0c0e46
FCE
1398 if (t && t->type == tok_operator && t->content == "(")
1399 {
1400 next (); // consume "("
1401 c->arg = parse_literal ();
1402
1403 t = next ();
1404 if (! (t->type == tok_operator && t->content == ")"))
1405 throw parse_error ("expected ')'");
1406
1407 t = peek ();
9c0c0e46 1408 }
9c0c0e46
FCE
1409
1410 if (t && t->type == tok_operator && t->content == ".")
6e3347a9
FCE
1411 {
1412 next ();
1413 continue;
1414 }
1415
1416 // We only fall through here at the end of a probe point (past
1417 // all the dotted/parametrized components).
1418
d898100a
FCE
1419 if (t && t->type == tok_operator &&
1420 (t->content == "?" || t->content == "!"))
6e3347a9
FCE
1421 {
1422 pl->optional = true;
d898100a
FCE
1423 if (t->content == "!") pl->sufficient = true;
1424 // NB: sufficient implies optional
6e3347a9
FCE
1425 next ();
1426 t = peek ();
1427 // fall through
cbbe8080
MH
1428 }
1429
1430 if (t && t->type == tok_keyword && t->content == "if")
1431 {
1432 next ();
1433 t = peek ();
75686668 1434 if (t && ! (t->type == tok_operator && t->content == "("))
cbbe8080
MH
1435 throw parse_error ("expected '('");
1436 next ();
1437
1438 pl->condition = parse_expression ();
1439
1440 t = peek ();
75686668 1441 if (t && ! (t->type == tok_operator && t->content == ")"))
cbbe8080
MH
1442 throw parse_error ("expected ')'");
1443 next ();
1444 t = peek ();
1445 // fall through
6e3347a9
FCE
1446 }
1447
dff50e09 1448 if (t && t->type == tok_operator
6e3347a9
FCE
1449 && (t->content == "{" || t->content == "," ||
1450 t->content == "=" || t->content == "+=" ))
1451 break;
dff50e09 1452
d898100a 1453 throw parse_error ("expected one of '. , ( ? ! { = +='");
2f1a1aea
FCE
1454 }
1455
1456 return pl;
1457}
1458
1459
1460literal*
1461parser::parse_literal ()
1462{
1463 const token* t = next ();
56099f08 1464 literal* l;
2f1a1aea 1465 if (t->type == tok_string)
56099f08 1466 l = new literal_string (t->content);
16e8f21f 1467 else
9c0c0e46 1468 {
16e8f21f
JS
1469 bool neg = false;
1470 if (t->type == tok_operator && t->content == "-")
1471 {
1472 neg = true;
1473 t = next ();
1474 }
1475
1476 if (t->type == tok_number)
1477 {
1478 const char* startp = t->content.c_str ();
1479 char* endp = (char*) startp;
1480
1481 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1482 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
79e6d33f
JS
1483 // since the lexer only gives us positive digit strings, but we'll
1484 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
16e8f21f
JS
1485 errno = 0;
1486 long long value = (long long) strtoull (startp, & endp, 0);
16e8f21f 1487 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
79e6d33f 1488 || (neg && (unsigned long long) value > 9223372036854775808ULL)
16e8f21f
JS
1489 || (unsigned long long) value > 18446744073709551615ULL
1490 || value < -9223372036854775807LL-1)
dff50e09 1491 throw parse_error ("number invalid or out of range");
16e8f21f 1492
79e6d33f
JS
1493 if (neg)
1494 value = -value;
1495
16e8f21f
JS
1496 l = new literal_number (value);
1497 }
1498 else
1499 throw parse_error ("expected literal string or number");
9c0c0e46 1500 }
56099f08
FCE
1501
1502 l->tok = t;
1503 return l;
2f1a1aea
FCE
1504}
1505
1506
1507if_statement*
1508parser::parse_if_statement ()
1509{
1510 const token* t = next ();
6e213f58 1511 if (! (t->type == tok_keyword && t->content == "if"))
56099f08
FCE
1512 throw parse_error ("expected 'if'");
1513 if_statement* s = new if_statement;
1514 s->tok = t;
1515
1516 t = next ();
2f1a1aea
FCE
1517 if (! (t->type == tok_operator && t->content == "("))
1518 throw parse_error ("expected '('");
1519
2f1a1aea
FCE
1520 s->condition = parse_expression ();
1521
1522 t = next ();
1523 if (! (t->type == tok_operator && t->content == ")"))
1524 throw parse_error ("expected ')'");
1525
1526 s->thenblock = parse_statement ();
1527
1528 t = peek ();
6e213f58 1529 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea
FCE
1530 {
1531 next ();
1532 s->elseblock = parse_statement ();
1533 }
ed10c639
FCE
1534 else
1535 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
1536
1537 return s;
1538}
1539
1540
69c68955
FCE
1541expr_statement*
1542parser::parse_expr_statement ()
1543{
1544 expr_statement *es = new expr_statement;
1545 const token* t = peek ();
1546 es->tok = t;
1547 es->value = parse_expression ();
1548 return es;
1549}
1550
1551
56099f08
FCE
1552return_statement*
1553parser::parse_return_statement ()
1554{
1555 const token* t = next ();
6e213f58 1556 if (! (t->type == tok_keyword && t->content == "return"))
56099f08 1557 throw parse_error ("expected 'return'");
6e213f58
DS
1558 if (context != con_function)
1559 throw parse_error ("found 'return' not in function context");
56099f08
FCE
1560 return_statement* s = new return_statement;
1561 s->tok = t;
1562 s->value = parse_expression ();
1563 return s;
1564}
1565
1566
1567delete_statement*
1568parser::parse_delete_statement ()
1569{
1570 const token* t = next ();
6e213f58 1571 if (! (t->type == tok_keyword && t->content == "delete"))
56099f08
FCE
1572 throw parse_error ("expected 'delete'");
1573 delete_statement* s = new delete_statement;
1574 s->tok = t;
1575 s->value = parse_expression ();
1576 return s;
1577}
1578
1579
f3c26ea5
FCE
1580next_statement*
1581parser::parse_next_statement ()
1582{
1583 const token* t = next ();
6e213f58 1584 if (! (t->type == tok_keyword && t->content == "next"))
f3c26ea5 1585 throw parse_error ("expected 'next'");
6e213f58
DS
1586 if (context != con_probe)
1587 throw parse_error ("found 'next' not in probe context");
f3c26ea5
FCE
1588 next_statement* s = new next_statement;
1589 s->tok = t;
1590 return s;
1591}
1592
1593
1594break_statement*
1595parser::parse_break_statement ()
1596{
1597 const token* t = next ();
6e213f58 1598 if (! (t->type == tok_keyword && t->content == "break"))
f3c26ea5
FCE
1599 throw parse_error ("expected 'break'");
1600 break_statement* s = new break_statement;
1601 s->tok = t;
1602 return s;
1603}
1604
1605
1606continue_statement*
1607parser::parse_continue_statement ()
1608{
1609 const token* t = next ();
6e213f58 1610 if (! (t->type == tok_keyword && t->content == "continue"))
f3c26ea5
FCE
1611 throw parse_error ("expected 'continue'");
1612 continue_statement* s = new continue_statement;
1613 s->tok = t;
1614 return s;
1615}
1616
1617
69c68955
FCE
1618for_loop*
1619parser::parse_for_loop ()
1620{
f3c26ea5 1621 const token* t = next ();
6e213f58 1622 if (! (t->type == tok_keyword && t->content == "for"))
f3c26ea5
FCE
1623 throw parse_error ("expected 'for'");
1624 for_loop* s = new for_loop;
1625 s->tok = t;
1626
1627 t = next ();
1628 if (! (t->type == tok_operator && t->content == "("))
1629 throw parse_error ("expected '('");
1630
1631 // initializer + ";"
1632 t = peek ();
1633 if (t && t->type == tok_operator && t->content == ";")
1634 {
cbfbbf69
FCE
1635 s->init = 0;
1636 next ();
f3c26ea5
FCE
1637 }
1638 else
1639 {
1640 s->init = parse_expr_statement ();
1641 t = next ();
1642 if (! (t->type == tok_operator && t->content == ";"))
1643 throw parse_error ("expected ';'");
1644 }
1645
1646 // condition + ";"
1647 t = peek ();
1648 if (t && t->type == tok_operator && t->content == ";")
1649 {
1650 literal_number* l = new literal_number(1);
1651 s->cond = l;
1652 s->cond->tok = next ();
1653 }
1654 else
1655 {
1656 s->cond = parse_expression ();
1657 t = next ();
1658 if (! (t->type == tok_operator && t->content == ";"))
1659 throw parse_error ("expected ';'");
1660 }
dff50e09 1661
f3c26ea5
FCE
1662 // increment + ")"
1663 t = peek ();
1664 if (t && t->type == tok_operator && t->content == ")")
1665 {
cbfbbf69
FCE
1666 s->incr = 0;
1667 next ();
f3c26ea5
FCE
1668 }
1669 else
1670 {
1671 s->incr = parse_expr_statement ();
1672 t = next ();
1673 if (! (t->type == tok_operator && t->content == ")"))
c958a431 1674 throw parse_error ("expected ')'");
f3c26ea5
FCE
1675 }
1676
1677 // block
1678 s->block = parse_statement ();
1679
1680 return s;
1681}
1682
1683
1684for_loop*
1685parser::parse_while_loop ()
1686{
1687 const token* t = next ();
6e213f58 1688 if (! (t->type == tok_keyword && t->content == "while"))
f3c26ea5
FCE
1689 throw parse_error ("expected 'while'");
1690 for_loop* s = new for_loop;
1691 s->tok = t;
1692
1693 t = next ();
1694 if (! (t->type == tok_operator && t->content == "("))
1695 throw parse_error ("expected '('");
1696
1697 // dummy init and incr fields
cbfbbf69
FCE
1698 s->init = 0;
1699 s->incr = 0;
f3c26ea5
FCE
1700
1701 // condition
1702 s->cond = parse_expression ();
1703
f3c26ea5
FCE
1704 t = next ();
1705 if (! (t->type == tok_operator && t->content == ")"))
1706 throw parse_error ("expected ')'");
dff50e09 1707
f3c26ea5
FCE
1708 // block
1709 s->block = parse_statement ();
1710
1711 return s;
69c68955
FCE
1712}
1713
1714
1715foreach_loop*
1716parser::parse_foreach_loop ()
1717{
1718 const token* t = next ();
6e213f58 1719 if (! (t->type == tok_keyword && t->content == "foreach"))
69c68955
FCE
1720 throw parse_error ("expected 'foreach'");
1721 foreach_loop* s = new foreach_loop;
1722 s->tok = t;
93484556 1723 s->sort_direction = 0;
27f21e8c 1724 s->limit = NULL;
69c68955
FCE
1725
1726 t = next ();
1727 if (! (t->type == tok_operator && t->content == "("))
1728 throw parse_error ("expected '('");
1729
1730 // see also parse_array_in
1731
1732 bool parenthesized = false;
1733 t = peek ();
1734 if (t && t->type == tok_operator && t->content == "[")
1735 {
1736 next ();
1737 parenthesized = true;
1738 }
1739
1740 while (1)
1741 {
1742 t = next ();
1743 if (! (t->type == tok_identifier))
1744 throw parse_error ("expected identifier");
1745 symbol* sym = new symbol;
1746 sym->tok = t;
1747 sym->name = t->content;
1748 s->indexes.push_back (sym);
1749
93484556
FCE
1750 t = peek ();
1751 if (t && t->type == tok_operator &&
1752 (t->content == "+" || t->content == "-"))
1753 {
1754 if (s->sort_direction)
1755 throw parse_error ("multiple sort directives");
1756 s->sort_direction = (t->content == "+") ? 1 : -1;
1757 s->sort_column = s->indexes.size();
1758 next();
1759 }
1760
69c68955
FCE
1761 if (parenthesized)
1762 {
93484556 1763 t = peek ();
69c68955
FCE
1764 if (t && t->type == tok_operator && t->content == ",")
1765 {
1766 next ();
1767 continue;
1768 }
1769 else if (t && t->type == tok_operator && t->content == "]")
1770 {
1771 next ();
1772 break;
1773 }
dff50e09 1774 else
69c68955
FCE
1775 throw parse_error ("expected ',' or ']'");
1776 }
1777 else
1778 break; // expecting only one expression
1779 }
1780
1781 t = next ();
6e213f58 1782 if (! (t->type == tok_keyword && t->content == "in"))
69c68955 1783 throw parse_error ("expected 'in'");
dff50e09 1784
d02548c0 1785 s->base = parse_indexable();
69c68955 1786
93484556
FCE
1787 t = peek ();
1788 if (t && t->type == tok_operator &&
1789 (t->content == "+" || t->content == "-"))
1790 {
1791 if (s->sort_direction)
1792 throw parse_error ("multiple sort directives");
1793 s->sort_direction = (t->content == "+") ? 1 : -1;
1794 s->sort_column = 0;
1795 next();
1796 }
1797
27f21e8c
DS
1798 t = peek ();
1799 if (tok_is(t, tok_keyword, "limit"))
1800 {
1801 next (); // get past the "limit"
1802 s->limit = parse_expression ();
1803 }
1804
69c68955
FCE
1805 t = next ();
1806 if (! (t->type == tok_operator && t->content == ")"))
1807 throw parse_error ("expected ')'");
1808
1809 s->block = parse_statement ();
1810 return s;
1811}
1812
1813
2f1a1aea
FCE
1814expression*
1815parser::parse_expression ()
1816{
1817 return parse_assignment ();
1818}
1819
2f1a1aea
FCE
1820
1821expression*
1822parser::parse_assignment ()
1823{
1824 expression* op1 = parse_ternary ();
1825
1826 const token* t = peek ();
82919855 1827 // right-associative operators
dff50e09 1828 if (t && t->type == tok_operator
2f1a1aea 1829 && (t->content == "=" ||
82919855 1830 t->content == "<<<" ||
2f1a1aea 1831 t->content == "+=" ||
bb2e3076
FCE
1832 t->content == "-=" ||
1833 t->content == "*=" ||
1834 t->content == "/=" ||
1835 t->content == "%=" ||
1836 t->content == "<<=" ||
1837 t->content == ">>=" ||
1838 t->content == "&=" ||
1839 t->content == "^=" ||
1840 t->content == "|=" ||
d5d7c2cc 1841 t->content == ".=" ||
dff50e09 1842 false))
2f1a1aea 1843 {
bb2e3076 1844 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 1845 assignment* e = new assignment;
56099f08 1846 e->left = op1;
2f1a1aea 1847 e->op = t->content;
56099f08 1848 e->tok = t;
2f1a1aea 1849 next ();
82919855 1850 e->right = parse_expression ();
56099f08 1851 op1 = e;
2f1a1aea 1852 }
56099f08
FCE
1853
1854 return op1;
2f1a1aea
FCE
1855}
1856
1857
1858expression*
1859parser::parse_ternary ()
1860{
1861 expression* op1 = parse_logical_or ();
1862
1863 const token* t = peek ();
1864 if (t && t->type == tok_operator && t->content == "?")
1865 {
2f1a1aea 1866 ternary_expression* e = new ternary_expression;
56099f08 1867 e->tok = t;
2f1a1aea 1868 e->cond = op1;
56099f08
FCE
1869 next ();
1870 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
1871
1872 t = next ();
1873 if (! (t->type == tok_operator && t->content == ":"))
1874 throw parse_error ("expected ':'");
1875
56099f08 1876 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
1877 return e;
1878 }
1879 else
1880 return op1;
1881}
1882
1883
1884expression*
1885parser::parse_logical_or ()
1886{
1887 expression* op1 = parse_logical_and ();
dff50e09 1888
2f1a1aea 1889 const token* t = peek ();
56099f08 1890 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 1891 {
2f1a1aea 1892 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
1893 e->tok = t;
1894 e->op = t->content;
2f1a1aea 1895 e->left = op1;
56099f08
FCE
1896 next ();
1897 e->right = parse_logical_and ();
1898 op1 = e;
1899 t = peek ();
2f1a1aea 1900 }
56099f08
FCE
1901
1902 return op1;
2f1a1aea
FCE
1903}
1904
1905
1906expression*
1907parser::parse_logical_and ()
1908{
bb2e3076 1909 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
1910
1911 const token* t = peek ();
56099f08 1912 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 1913 {
2f1a1aea
FCE
1914 logical_and_expr *e = new logical_and_expr;
1915 e->left = op1;
56099f08
FCE
1916 e->op = t->content;
1917 e->tok = t;
1918 next ();
bb2e3076
FCE
1919 e->right = parse_boolean_or ();
1920 op1 = e;
1921 t = peek ();
1922 }
1923
1924 return op1;
1925}
1926
1927
1928expression*
1929parser::parse_boolean_or ()
1930{
1931 expression* op1 = parse_boolean_xor ();
1932
1933 const token* t = peek ();
1934 while (t && t->type == tok_operator && t->content == "|")
1935 {
1936 binary_expression* e = new binary_expression;
1937 e->left = op1;
1938 e->op = t->content;
1939 e->tok = t;
1940 next ();
1941 e->right = parse_boolean_xor ();
1942 op1 = e;
1943 t = peek ();
1944 }
1945
1946 return op1;
1947}
1948
1949
1950expression*
1951parser::parse_boolean_xor ()
1952{
1953 expression* op1 = parse_boolean_and ();
1954
1955 const token* t = peek ();
1956 while (t && t->type == tok_operator && t->content == "^")
1957 {
1958 binary_expression* e = new binary_expression;
1959 e->left = op1;
1960 e->op = t->content;
1961 e->tok = t;
1962 next ();
1963 e->right = parse_boolean_and ();
1964 op1 = e;
1965 t = peek ();
1966 }
1967
1968 return op1;
1969}
1970
1971
1972expression*
1973parser::parse_boolean_and ()
1974{
1975 expression* op1 = parse_array_in ();
1976
1977 const token* t = peek ();
1978 while (t && t->type == tok_operator && t->content == "&")
1979 {
1980 binary_expression* e = new binary_expression;
1981 e->left = op1;
1982 e->op = t->content;
1983 e->tok = t;
1984 next ();
56099f08
FCE
1985 e->right = parse_array_in ();
1986 op1 = e;
1987 t = peek ();
2f1a1aea 1988 }
56099f08
FCE
1989
1990 return op1;
2f1a1aea
FCE
1991}
1992
1993
1994expression*
1995parser::parse_array_in ()
1996{
ce10591c 1997 // This is a very tricky case. All these are legit expressions:
69c68955 1998 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
1999 vector<expression*> indexes;
2000 bool parenthesized = false;
2f1a1aea
FCE
2001
2002 const token* t = peek ();
69c68955 2003 if (t && t->type == tok_operator && t->content == "[")
ce10591c
FCE
2004 {
2005 next ();
2006 parenthesized = true;
2007 }
2008
2009 while (1)
2010 {
2011 expression* op1 = parse_comparison ();
2012 indexes.push_back (op1);
2013
2014 if (parenthesized)
2015 {
2016 const token* t = peek ();
2017 if (t && t->type == tok_operator && t->content == ",")
2018 {
2019 next ();
2020 continue;
2021 }
69c68955 2022 else if (t && t->type == tok_operator && t->content == "]")
ce10591c
FCE
2023 {
2024 next ();
2025 break;
2026 }
dff50e09 2027 else
69c68955 2028 throw parse_error ("expected ',' or ']'");
ce10591c
FCE
2029 }
2030 else
2031 break; // expecting only one expression
2032 }
2033
2034 t = peek ();
6e213f58 2035 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 2036 {
2f1a1aea 2037 array_in *e = new array_in;
56099f08 2038 e->tok = t;
ce10591c
FCE
2039 next (); // swallow "in"
2040
2041 arrayindex* a = new arrayindex;
2042 a->indexes = indexes;
d02548c0
GH
2043 a->base = parse_indexable();
2044 a->tok = a->base->get_tok();
ce10591c 2045 e->operand = a;
2f1a1aea
FCE
2046 return e;
2047 }
ce10591c
FCE
2048 else if (indexes.size() == 1) // no "in" - need one expression only
2049 return indexes[0];
2f1a1aea 2050 else
ce10591c 2051 throw parse_error ("unexpected comma-separated expression list");
2f1a1aea
FCE
2052}
2053
2054
2055expression*
2056parser::parse_comparison ()
2057{
bb2e3076 2058 expression* op1 = parse_shift ();
2f1a1aea
FCE
2059
2060 const token* t = peek ();
dff50e09 2061 while (t && t->type == tok_operator
553d27a5
FCE
2062 && (t->content == ">" ||
2063 t->content == "<" ||
2064 t->content == "==" ||
2065 t->content == "!=" ||
2066 t->content == "<=" ||
bb2e3076 2067 t->content == ">="))
2f1a1aea
FCE
2068 {
2069 comparison* e = new comparison;
2070 e->left = op1;
2071 e->op = t->content;
56099f08 2072 e->tok = t;
2f1a1aea 2073 next ();
bb2e3076
FCE
2074 e->right = parse_shift ();
2075 op1 = e;
2076 t = peek ();
2077 }
2078
2079 return op1;
2080}
2081
2082
2083expression*
2084parser::parse_shift ()
2085{
2086 expression* op1 = parse_concatenation ();
2087
2088 const token* t = peek ();
dff50e09 2089 while (t && t->type == tok_operator &&
bb2e3076
FCE
2090 (t->content == "<<" || t->content == ">>"))
2091 {
2092 binary_expression* e = new binary_expression;
2093 e->left = op1;
2094 e->op = t->content;
2095 e->tok = t;
2096 next ();
56099f08
FCE
2097 e->right = parse_concatenation ();
2098 op1 = e;
2099 t = peek ();
2f1a1aea 2100 }
56099f08
FCE
2101
2102 return op1;
2f1a1aea
FCE
2103}
2104
2105
2106expression*
2107parser::parse_concatenation ()
2108{
2109 expression* op1 = parse_additive ();
2110
2111 const token* t = peek ();
2112 // XXX: the actual awk string-concatenation operator is *whitespace*.
2113 // I don't know how to easily to model that here.
56099f08 2114 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
2115 {
2116 concatenation* e = new concatenation;
2117 e->left = op1;
2118 e->op = t->content;
56099f08 2119 e->tok = t;
2f1a1aea 2120 next ();
56099f08
FCE
2121 e->right = parse_additive ();
2122 op1 = e;
2123 t = peek ();
2f1a1aea 2124 }
56099f08
FCE
2125
2126 return op1;
2f1a1aea
FCE
2127}
2128
2129
2130expression*
2131parser::parse_additive ()
2132{
2133 expression* op1 = parse_multiplicative ();
2134
2135 const token* t = peek ();
dff50e09 2136 while (t && t->type == tok_operator
2f1a1aea
FCE
2137 && (t->content == "+" || t->content == "-"))
2138 {
2139 binary_expression* e = new binary_expression;
2140 e->op = t->content;
2141 e->left = op1;
56099f08 2142 e->tok = t;
2f1a1aea 2143 next ();
56099f08
FCE
2144 e->right = parse_multiplicative ();
2145 op1 = e;
2146 t = peek ();
2f1a1aea 2147 }
56099f08
FCE
2148
2149 return op1;
2f1a1aea
FCE
2150}
2151
2152
2153expression*
2154parser::parse_multiplicative ()
2155{
2156 expression* op1 = parse_unary ();
2157
2158 const token* t = peek ();
dff50e09 2159 while (t && t->type == tok_operator
2f1a1aea
FCE
2160 && (t->content == "*" || t->content == "/" || t->content == "%"))
2161 {
2162 binary_expression* e = new binary_expression;
2163 e->op = t->content;
2164 e->left = op1;
56099f08 2165 e->tok = t;
2f1a1aea 2166 next ();
56099f08
FCE
2167 e->right = parse_unary ();
2168 op1 = e;
2169 t = peek ();
2f1a1aea 2170 }
56099f08
FCE
2171
2172 return op1;
2f1a1aea
FCE
2173}
2174
2175
2176expression*
2177parser::parse_unary ()
2178{
2179 const token* t = peek ();
dff50e09
FCE
2180 if (t && t->type == tok_operator
2181 && (t->content == "+" ||
2182 t->content == "-" ||
bb2e3076
FCE
2183 t->content == "!" ||
2184 t->content == "~" ||
2185 false))
2f1a1aea
FCE
2186 {
2187 unary_expression* e = new unary_expression;
2188 e->op = t->content;
56099f08 2189 e->tok = t;
2f1a1aea 2190 next ();
3a20432b 2191 e->operand = parse_crement ();
2f1a1aea
FCE
2192 return e;
2193 }
2194 else
bb2e3076 2195 return parse_crement ();
2f1a1aea
FCE
2196}
2197
2198
2199expression*
2200parser::parse_crement () // as in "increment" / "decrement"
2201{
cbfbbf69
FCE
2202 // NB: Ideally, we'd parse only a symbol as an operand to the
2203 // *crement operators, instead of a general expression value. We'd
2204 // need more complex lookahead code to tell apart the postfix cases.
2205 // So we just punt, and leave it to pass-3 to signal errors on
2206 // cases like "4++".
2207
2f1a1aea 2208 const token* t = peek ();
dff50e09 2209 if (t && t->type == tok_operator
2f1a1aea
FCE
2210 && (t->content == "++" || t->content == "--"))
2211 {
2212 pre_crement* e = new pre_crement;
2213 e->op = t->content;
56099f08 2214 e->tok = t;
2f1a1aea
FCE
2215 next ();
2216 e->operand = parse_value ();
2217 return e;
2218 }
2219
2220 // post-crement or non-crement
2221 expression *op1 = parse_value ();
dff50e09 2222
2f1a1aea 2223 t = peek ();
dff50e09 2224 if (t && t->type == tok_operator
2f1a1aea
FCE
2225 && (t->content == "++" || t->content == "--"))
2226 {
2227 post_crement* e = new post_crement;
2228 e->op = t->content;
56099f08 2229 e->tok = t;
2f1a1aea
FCE
2230 next ();
2231 e->operand = op1;
2232 return e;
2233 }
2234 else
2235 return op1;
2236}
2237
2238
2239expression*
2240parser::parse_value ()
2241{
2242 const token* t = peek ();
2243 if (! t)
2244 throw parse_error ("expected value");
2245
2246 if (t->type == tok_operator && t->content == "(")
2247 {
2248 next ();
2249 expression* e = parse_expression ();
2250 t = next ();
2251 if (! (t->type == tok_operator && t->content == ")"))
2252 throw parse_error ("expected ')'");
2253 return e;
2254 }
03c75a4a
JS
2255 else if (t->type == tok_operator && t->content == "&")
2256 {
2257 next ();
2258 t = peek ();
2259 if (t->type != tok_identifier ||
2260 (t->content != "@cast" && t->content[0] != '$'))
2261 throw parse_error ("expected @cast or $var");
2262
2263 target_symbol *ts = static_cast<target_symbol*>(parse_symbol());
2264 ts->addressof = true;
2265 return ts;
2266 }
2f1a1aea
FCE
2267 else if (t->type == tok_identifier)
2268 return parse_symbol ();
2269 else
2270 return parse_literal ();
2271}
2272
2273
d02548c0
GH
2274const token *
2275parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
2276{
2277 hop = NULL;
2278 const token* t = expect_ident (name);
2279 if (name == "@hist_linear" || name == "@hist_log")
2280 {
2281 hop = new hist_op;
2282 if (name == "@hist_linear")
2283 hop->htype = hist_linear;
2284 else if (name == "@hist_log")
2285 hop->htype = hist_log;
2286 hop->tok = t;
2287 expect_op("(");
2288 hop->stat = parse_expression ();
2289 int64_t tnum;
2290 if (hop->htype == hist_linear)
2291 {
2292 for (size_t i = 0; i < 3; ++i)
2293 {
2294 expect_op (",");
2295 expect_number (tnum);
2296 hop->params.push_back (tnum);
2297 }
2298 }
d02548c0
GH
2299 expect_op(")");
2300 }
2301 return t;
2302}
2303
2304
2305indexable*
2306parser::parse_indexable ()
2307{
2308 hist_op *hop = NULL;
2309 string name;
2310 const token *tok = parse_hist_op_or_bare_name(hop, name);
2311 if (hop)
2312 return hop;
2313 else
2314 {
2315 symbol* sym = new symbol;
2316 sym->name = name;
2317 sym->tok = tok;
2318 return sym;
2319 }
2320}
2321
2322
2323// var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
2f1a1aea 2324expression*
dff50e09 2325parser::parse_symbol ()
2f1a1aea 2326{
d02548c0
GH
2327 hist_op *hop = NULL;
2328 symbol *sym = NULL;
d7f3e0c5 2329 string name;
d02548c0
GH
2330 const token *t = parse_hist_op_or_bare_name(hop, name);
2331
2332 if (!hop)
0fefb486 2333 {
dff50e09 2334 // If we didn't get a hist_op, then we did get an identifier. We can
d02548c0
GH
2335 // now scrutinize this identifier for the various magic forms of identifier
2336 // (printf, @stat_op, and $var...)
2337
32784362 2338 bool pf_stream, pf_format, pf_delim, pf_newline, pf_char;
3cb17058 2339
9b5af295
JS
2340 if (name == "@cast")
2341 {
2342 // type-punning time
2343 cast_op *cop = new cast_op;
2344 cop->tok = t;
2345 cop->base_name = name;
2346 expect_op("(");
2347 cop->operand = parse_expression ();
2348 expect_op(",");
2349 expect_unknown(tok_string, cop->type);
c15b6083
MW
2350 // types never start with "struct<space>" or "union<space>",
2351 // so gobble it up.
2352 if (cop->type.compare(0, 7, "struct ") == 0)
2353 cop->type = cop->type.substr(7);
2354 if (cop->type.compare(0, 6, "union ") == 0)
2355 cop->type = cop->type.substr(6);
9b5af295
JS
2356 if (peek_op (","))
2357 {
2358 next();
2359 expect_unknown(tok_string, cop->module);
2360 }
2361 expect_op(")");
81931eab
JS
2362 parse_target_symbol_components(cop);
2363
9b5af295
JS
2364 // if there aren't any dereferences, then the cast is pointless
2365 if (cop->components.empty())
2366 {
2367 expression *op = cop->operand;
2368 delete cop;
2369 return op;
2370 }
2371 return cop;
2372 }
2373
2374 else if (name.size() > 0 && name[0] == '@')
d7f3e0c5 2375 {
d02548c0
GH
2376 stat_op *sop = new stat_op;
2377 if (name == "@avg")
2378 sop->ctype = sc_average;
2379 else if (name == "@count")
2380 sop->ctype = sc_count;
2381 else if (name == "@sum")
2382 sop->ctype = sc_sum;
2383 else if (name == "@min")
2384 sop->ctype = sc_min;
2385 else if (name == "@max")
2386 sop->ctype = sc_max;
2387 else
2388 throw parse_error("unknown statistic operator " + name);
2389 expect_op("(");
2390 sop->tok = t;
2391 sop->stat = parse_expression ();
2392 expect_op(")");
2393 return sop;
2394 }
dff50e09 2395
3cb17058 2396 else if (print_format::parse_print(name,
32784362 2397 pf_stream, pf_format, pf_delim, pf_newline, pf_char))
d02548c0
GH
2398 {
2399 print_format *fmt = new print_format;
2400 fmt->tok = t;
3cb17058
JS
2401 fmt->print_to_stream = pf_stream;
2402 fmt->print_with_format = pf_format;
2403 fmt->print_with_delim = pf_delim;
2404 fmt->print_with_newline = pf_newline;
32784362 2405 fmt->print_char = pf_char;
01133ccb 2406
d02548c0 2407 expect_op("(");
2f5a9fd3 2408 if ((name == "print" || name == "println") &&
3cb17058 2409 (peek_kw("@hist_linear") || peek_kw("@hist_log")))
a4636912
GH
2410 {
2411 // We have a special case where we recognize
2412 // print(@hist_foo(bar)) as a magic print-the-histogram
2413 // construct. This is sort of gross but it avoids
2414 // promoting histogram references to typeful
2415 // expressions.
dff50e09 2416
1bbeef03
GH
2417 hop = NULL;
2418 t = parse_hist_op_or_bare_name(hop, name);
2419 assert(hop);
dff50e09 2420
1bbeef03
GH
2421 // It is, sadly, possible that even while parsing a
2422 // hist_op, we *mis-guessed* and the user wishes to
2423 // print(@hist_op(foo)[bucket]), a scalar. In that case
2424 // we must parse the arrayindex and print an expression.
dff50e09 2425
1bbeef03
GH
2426 if (!peek_op ("["))
2427 fmt->hist = hop;
2428 else
2429 {
2430 // This is simplified version of the
2431 // multi-array-index parser below, because we can
2432 // only ever have one index on a histogram anyways.
2433 expect_op("[");
2434 struct arrayindex* ai = new arrayindex;
2435 ai->tok = t;
2436 ai->base = hop;
2437 ai->indexes.push_back (parse_expression ());
2438 expect_op("]");
2439 fmt->args.push_back(ai);
2440 }
a4636912 2441 }
d7f3e0c5 2442 else
d02548c0 2443 {
3cb17058
JS
2444 int min_args = 0;
2445 if (fmt->print_with_format)
2446 {
2447 // Consume and convert a format string. Agreement between the
2448 // format string and the arguments is postponed to the
2449 // typechecking phase.
2450 string tmp;
2451 expect_unknown (tok_string, tmp);
2452 fmt->raw_components = tmp;
2453 fmt->components = print_format::string_to_components (tmp);
2454 }
2455 else if (fmt->print_with_delim)
2456 {
2457 // Consume a delimiter to separate arguments.
2458 fmt->delimiter.clear();
2459 fmt->delimiter.type = print_format::conv_literal;
2460 expect_unknown (tok_string, fmt->delimiter.literal_string);
2461 min_args = 2;
2462 }
2463 else
2464 {
2465 // If we are not printing with a format string, we must have
2466 // at least one argument (of any type).
2467 expression *e = parse_expression ();
2468 fmt->args.push_back(e);
2469 }
2470
2471 // Consume any subsequent arguments.
2472 while (min_args || !peek_op (")"))
2473 {
2474 expect_op(",");
2475 expression *e = parse_expression ();
2476 fmt->args.push_back(e);
2477 if (min_args)
2478 --min_args;
2479 }
d02548c0
GH
2480 }
2481 expect_op(")");
2482 return fmt;
2483 }
dff50e09 2484
d02548c0
GH
2485 else if (name.size() > 0 && name[0] == '$')
2486 {
2487 // target_symbol time
2488 target_symbol *tsym = new target_symbol;
2489 tsym->tok = t;
2490 tsym->base_name = name;
81931eab 2491 parse_target_symbol_components(tsym);
d02548c0
GH
2492 return tsym;
2493 }
2494
2495 else if (peek_op ("(")) // function call
2496 {
2497 next ();
2498 struct functioncall* f = new functioncall;
2499 f->tok = t;
2500 f->function = name;
2501 // Allow empty actual parameter list
2502 if (peek_op (")"))
2503 {
2504 next ();
2505 return f;
2506 }
2507 while (1)
2508 {
2509 f->args.push_back (parse_expression ());
2510 if (peek_op (")"))
2511 {
2512 next();
2513 break;
2514 }
2515 else if (peek_op (","))
2516 {
2517 next();
2518 continue;
2519 }
2520 else
2521 throw parse_error ("expected ',' or ')'");
2522 }
2523 return f;
2524 }
2525
2526 else
2527 {
2528 sym = new symbol;
2529 sym->name = name;
2530 sym->tok = t;
d7f3e0c5 2531 }
0fefb486 2532 }
dff50e09
FCE
2533
2534 // By now, either we had a hist_op in the first place, or else
d02548c0
GH
2535 // we had a plain word and it was converted to a symbol.
2536
70c743d8 2537 assert (!hop != !sym); // logical XOR
d02548c0
GH
2538
2539 // All that remains is to check for array indexing
2540
d7f3e0c5 2541 if (peek_op ("[")) // array
2f1a1aea
FCE
2542 {
2543 next ();
2544 struct arrayindex* ai = new arrayindex;
d02548c0
GH
2545 ai->tok = t;
2546
2547 if (hop)
2548 ai->base = hop;
2549 else
2550 ai->base = sym;
2551
2f1a1aea
FCE
2552 while (1)
2553 {
2554 ai->indexes.push_back (parse_expression ());
d7f3e0c5 2555 if (peek_op ("]"))
dff50e09
FCE
2556 {
2557 next();
2558 break;
d7f3e0c5
GH
2559 }
2560 else if (peek_op (","))
2561 {
2562 next();
2563 continue;
2564 }
2f1a1aea
FCE
2565 else
2566 throw parse_error ("expected ',' or ']'");
2567 }
2568 return ai;
2569 }
d02548c0
GH
2570
2571 // If we got to here, we *should* have a symbol; if we have
2572 // a hist_op on its own, it doesn't count as an expression,
2573 // so we throw a parse error.
2574
2575 if (hop)
2576 throw parse_error("base histogram operator where expression expected", t);
dff50e09
FCE
2577
2578 return sym;
2f1a1aea 2579}
56099f08 2580
81931eab
JS
2581
2582void
2583parser::parse_target_symbol_components (target_symbol* e)
2584{
2585 while (true)
2586 {
81931eab
JS
2587 if (peek_op ("->"))
2588 {
c67847a0
JS
2589 const token* t = next();
2590 string member;
2591 expect_ident_or_keyword (member);
2592 e->components.push_back (target_symbol::component(t, member));
81931eab
JS
2593 }
2594 else if (peek_op ("["))
2595 {
c67847a0 2596 const token* t = next();
6fda2dff
JS
2597 expression* index = parse_expression();
2598 literal_number* ln = dynamic_cast<literal_number*>(index);
2599 if (ln)
2600 e->components.push_back (target_symbol::component(t, ln->value));
2601 else
2602 e->components.push_back (target_symbol::component(t, index));
81931eab 2603 expect_op ("]");
81931eab
JS
2604 }
2605 else
2606 break;
2607 }
2608}
2609
73267b89 2610/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.370652 seconds and 5 git commands to generate.