]> sourceware.org Git - systemtap.git/blame - parse.cxx
Consolidate task_finder/vma tracker initialization.
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
f4fe2e93 2// Copyright (C) 2005-2010 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
5811366a 4// Copyright (C) 2007 Bull S.A.S
69c68955
FCE
5//
6// This file is part of systemtap, and is free software. You can
7// redistribute it and/or modify it under the terms of the GNU General
8// Public License (GPL); either version 2, or (at your option) any
9// later version.
2f1a1aea 10
2b066ec1 11#include "config.h"
2f1a1aea
FCE
12#include "staptree.h"
13#include "parse.h"
177a8ead 14#include "session.h"
3f99432c
FCE
15#include "util.h"
16
2b066ec1 17#include <iostream>
eacb10ce 18
2b066ec1 19#include <fstream>
2f1a1aea 20#include <cctype>
9c0c0e46 21#include <cstdlib>
29e64872 22#include <cassert>
9c0c0e46
FCE
23#include <cerrno>
24#include <climits>
57b73400 25#include <sstream>
f74fb737 26#include <cstring>
3f99432c 27#include <cctype>
eacb10ce
FCE
28#include <iterator>
29
7a468d68
FCE
30extern "C" {
31#include <fnmatch.h>
32}
2f1a1aea
FCE
33
34using namespace std;
35
c18f07f8
JS
36
37class lexer
38{
39public:
c5be7511 40 bool ate_comment; // the most recent token followed a comment
c18f07f8
JS
41 token* scan (bool wildcard=false);
42 lexer (istream&, const string&, systemtap_session&);
43 void set_current_file (stapfile* f);
44
45private:
46 inline int input_get ();
47 inline int input_peek (unsigned n=0);
48 void input_put (const string&, const token*);
49 string input_name;
50 string input_contents;
51 const char *input_pointer; // index into input_contents
52 const char *input_end;
53 unsigned cursor_suspend_count;
54 unsigned cursor_suspend_line;
55 unsigned cursor_suspend_column;
56 unsigned cursor_line;
57 unsigned cursor_column;
58 systemtap_session& session;
59 stapfile* current_file;
60 static set<string> keywords;
61};
62
63
64class parser
65{
66public:
67 parser (systemtap_session& s, istream& i, bool p);
68 parser (systemtap_session& s, const string& n, bool p);
69 ~parser ();
70
71 stapfile* parse ();
72
73private:
74 typedef enum {
75 PP_NONE,
76 PP_KEEP_THEN,
77 PP_SKIP_THEN,
78 PP_KEEP_ELSE,
79 PP_SKIP_ELSE,
80 } pp_state_t;
81
82 systemtap_session& session;
83 string input_name;
84 istream* free_input;
85 lexer input;
86 bool privileged;
87 parse_context context;
88
89 // preprocessing subordinate
90 vector<pair<const token*, pp_state_t> > pp_state;
91 const token* scan_pp (bool wildcard=false);
92 const token* skip_pp ();
93
94 // scanning state
95 const token* last ();
96 const token* next (bool wildcard=false);
97 const token* peek (bool wildcard=false);
98
99 const token* last_t; // the last value returned by peek() or next()
100 const token* next_t; // lookahead token
101
102 // expectations
103 const token* expect_known (token_type tt, string const & expected);
104 const token* expect_unknown (token_type tt, string & target);
105 const token* expect_unknown2 (token_type tt1, token_type tt2,
106 string & target);
107
108 // convenience forms
109 const token* expect_op (string const & expected);
110 const token* expect_kw (string const & expected);
111 const token* expect_number (int64_t & expected);
112 const token* expect_ident (string & target);
113 const token* expect_ident_or_keyword (string & target);
114 bool peek_op (string const & op);
115 bool peek_kw (string const & kw);
116
117 void print_error (const parse_error& pe);
118 unsigned num_errors;
119
120private: // nonterminals
121 void parse_probe (vector<probe*>&, vector<probe_alias*>&);
122 void parse_global (vector<vardecl*>&, vector<probe*>&);
123 void parse_functiondecl (vector<functiondecl*>&);
124 embeddedcode* parse_embeddedcode ();
125 probe_point* parse_probe_point ();
126 literal* parse_literal ();
127 block* parse_stmt_block ();
128 try_block* parse_try_block ();
129 statement* parse_statement ();
130 if_statement* parse_if_statement ();
131 for_loop* parse_for_loop ();
132 for_loop* parse_while_loop ();
133 foreach_loop* parse_foreach_loop ();
134 expr_statement* parse_expr_statement ();
135 return_statement* parse_return_statement ();
136 delete_statement* parse_delete_statement ();
137 next_statement* parse_next_statement ();
138 break_statement* parse_break_statement ();
139 continue_statement* parse_continue_statement ();
140 indexable* parse_indexable ();
141 const token *parse_hist_op_or_bare_name (hist_op *&hop, string &name);
142 target_symbol *parse_target_symbol (const token* t);
143 expression* parse_defined_op (const token* t);
144 expression* parse_expression ();
145 expression* parse_assignment ();
146 expression* parse_ternary ();
147 expression* parse_logical_or ();
148 expression* parse_logical_and ();
149 expression* parse_boolean_or ();
150 expression* parse_boolean_xor ();
151 expression* parse_boolean_and ();
152 expression* parse_array_in ();
153 expression* parse_comparison ();
154 expression* parse_shift ();
155 expression* parse_concatenation ();
156 expression* parse_additive ();
157 expression* parse_multiplicative ();
158 expression* parse_unary ();
159 expression* parse_crement ();
160 expression* parse_value ();
161 expression* parse_symbol ();
162
163 void parse_target_symbol_components (target_symbol* e);
164};
165
166
2f1a1aea
FCE
167// ------------------------------------------------------------------------
168
c18f07f8
JS
169stapfile*
170parse (systemtap_session& s, istream& i, bool pr)
171{
172 parser p (s, i, pr);
173 return p.parse ();
174}
175
176
177stapfile*
178parse (systemtap_session& s, const string& n, bool pr)
179{
180 parser p (s, n, pr);
181 return p.parse ();
182}
183
184// ------------------------------------------------------------------------
bb2e3076
FCE
185
186
177a8ead
FCE
187parser::parser (systemtap_session& s, istream& i, bool p):
188 session (s),
24cb178f 189 input_name ("<input>"), free_input (0),
213bee8f 190 input (i, input_name, s), privileged (p),
6e213f58 191 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
192{ }
193
177a8ead
FCE
194parser::parser (systemtap_session& s, const string& fn, bool p):
195 session (s),
2f1a1aea 196 input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
213bee8f 197 input (* free_input, input_name, s), privileged (p),
6e213f58 198 context(con_unknown), last_t (0), next_t (0), num_errors (0)
2f1a1aea
FCE
199{ }
200
201parser::~parser()
202{
203 if (free_input) delete free_input;
204}
205
d7f3e0c5
GH
206static string
207tt2str(token_type tt)
208{
209 switch (tt)
210 {
211 case tok_junk: return "junk";
212 case tok_identifier: return "identifier";
213 case tok_operator: return "operator";
214 case tok_string: return "string";
215 case tok_number: return "number";
216 case tok_embedded: return "embedded-code";
6e213f58 217 case tok_keyword: return "keyword";
d7f3e0c5
GH
218 }
219 return "unknown token";
220}
82919855 221
0323ed4d
WC
222ostream&
223operator << (ostream& o, const source_loc& loc)
224{
a704a23b 225 o << loc.file->name << ":"
0323ed4d
WC
226 << loc.line << ":"
227 << loc.column;
228
229 return o;
230}
231
56099f08
FCE
232ostream&
233operator << (ostream& o, const token& t)
234{
d7f3e0c5 235 o << tt2str(t.type);
56099f08 236
6e213f58 237 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 238 {
24cb178f
FCE
239 o << " '";
240 for (unsigned i=0; i<t.content.length(); i++)
241 {
242 char c = t.content[i];
243 o << (isprint (c) ? c : '?');
244 }
245 o << "'";
56099f08 246 }
56099f08 247
dff50e09 248 o << " at "
0323ed4d 249 << t.location;
56099f08
FCE
250
251 return o;
252}
253
254
dff50e09 255void
2f1a1aea
FCE
256parser::print_error (const parse_error &pe)
257{
1b1b4ceb 258 string align_parse_error (" ");
2f1a1aea
FCE
259 cerr << "parse error: " << pe.what () << endl;
260
177a8ead
FCE
261 if (pe.tok)
262 {
263 cerr << "\tat: " << *pe.tok << endl;
1b1b4ceb 264 session.print_error_source (cerr, align_parse_error, pe.tok);
177a8ead 265 }
2f1a1aea 266 else
177a8ead
FCE
267 {
268 const token* t = last_t;
269 if (t)
1b1b4ceb
RA
270 {
271 cerr << "\tsaw: " << *t << endl;
272 session.print_error_source (cerr, align_parse_error, t);
273 }
177a8ead
FCE
274 else
275 cerr << "\tsaw: " << input_name << " EOF" << endl;
276 }
2f1a1aea
FCE
277
278 // XXX: make it possible to print the last input line,
279 // so as to line up an arrow with the specific error column
280
281 num_errors ++;
282}
283
284
dff50e09 285const token*
2f1a1aea
FCE
286parser::last ()
287{
288 return last_t;
289}
290
291
c434ec7e
FCE
292
293template <typename OPERAND>
294bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
295{
296 if (op->type == tok_operator && op->content == "<=")
297 { return lhs <= rhs; }
298 else if (op->type == tok_operator && op->content == ">=")
299 { return lhs >= rhs; }
300 else if (op->type == tok_operator && op->content == "<")
301 { return lhs < rhs; }
302 else if (op->type == tok_operator && op->content == ">")
303 { return lhs > rhs; }
304 else if (op->type == tok_operator && op->content == "==")
305 { return lhs == rhs; }
306 else if (op->type == tok_operator && op->content == "!=")
307 { return lhs != rhs; }
308 else
309 throw parse_error ("expected comparison operator", op);
310}
311
312
177a8ead
FCE
313// Here, we perform on-the-fly preprocessing.
314// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
315// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
316// or: arch COMPARISON-OP "arch-string"
db135493 317// or: systemtap_v COMPARISON-OP "version-string"
561079c8 318// or: CONFIG_foo COMPARISON-OP "config-string"
717a457b 319// or: CONFIG_foo COMPARISON-OP number
4227f98d 320// or: CONFIG_foo COMPARISON-OP CONFIG_bar
5811366a
FCE
321// or: "string1" COMPARISON-OP "string2"
322// or: number1 COMPARISON-OP number2
44ce8ed5 323// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
324//
325// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
c434ec7e 326// e.g. %( arch != "i?86" %? "foo" %: "baz" %)
561079c8 327// e.g. %( CONFIG_foo %? "foo" %: "baz" %)
177a8ead
FCE
328//
329// Up to an entire %( ... %) expression is processed by a single call
330// to this function. Tokens included by any nested conditions are
331// enqueued in a private vector.
332
333bool eval_pp_conditional (systemtap_session& s,
334 const token* l, const token* op, const token* r)
335{
44ce8ed5 336 if (l->type == tok_identifier && (l->content == "kernel_v" ||
db135493
FCE
337 l->content == "kernel_vr" ||
338 l->content == "systemtap_v"))
44ce8ed5 339 {
db135493
FCE
340 if (! (r->type == tok_string))
341 throw parse_error ("expected string literal", r);
342
44ce8ed5 343 string target_kernel_vr = s.kernel_release;
197a4d62 344 string target_kernel_v = s.kernel_base_release;
db135493 345 string target;
dff50e09 346
db135493
FCE
347 if (l->content == "kernel_v") target = target_kernel_v;
348 else if (l->content == "kernel_vr") target = target_kernel_vr;
349 else if (l->content == "systemtap_v") target = s.compatible;
350 else assert (0);
7a468d68 351
7a468d68
FCE
352 string query = r->content;
353 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
354
44ce8ed5
FCE
355 // collect acceptable strverscmp results.
356 int rvc_ok1, rvc_ok2;
7a468d68 357 bool wc_ok = false;
44ce8ed5
FCE
358 if (op->type == tok_operator && op->content == "<=")
359 { rvc_ok1 = -1; rvc_ok2 = 0; }
360 else if (op->type == tok_operator && op->content == ">=")
361 { rvc_ok1 = 1; rvc_ok2 = 0; }
362 else if (op->type == tok_operator && op->content == "<")
363 { rvc_ok1 = -1; rvc_ok2 = -1; }
364 else if (op->type == tok_operator && op->content == ">")
365 { rvc_ok1 = 1; rvc_ok2 = 1; }
366 else if (op->type == tok_operator && op->content == "==")
7a468d68 367 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
44ce8ed5 368 else if (op->type == tok_operator && op->content == "!=")
7a468d68 369 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
44ce8ed5
FCE
370 else
371 throw parse_error ("expected comparison operator", op);
7a468d68
FCE
372
373 if ((!wc_ok) && rhs_wildcard)
374 throw parse_error ("wildcard not allowed with order comparison operators", op);
375
376 if (rhs_wildcard)
377 {
378 int rvc_result = fnmatch (query.c_str(), target.c_str(),
379 FNM_NOESCAPE); // spooky
380 bool badness = (rvc_result == 0) ^ (op->content == "==");
381 return !badness;
382 }
383 else
384 {
385 int rvc_result = strverscmp (target.c_str(), query.c_str());
386 // normalize rvc_result
387 if (rvc_result < 0) rvc_result = -1;
388 if (rvc_result > 0) rvc_result = 1;
389 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
390 }
44ce8ed5
FCE
391 }
392 else if (l->type == tok_identifier && l->content == "arch")
393 {
394 string target_architecture = s.architecture;
395 if (! (r->type == tok_string))
396 throw parse_error ("expected string literal", r);
397 string query_architecture = r->content;
dff50e09 398
7a468d68
FCE
399 int nomatch = fnmatch (query_architecture.c_str(),
400 target_architecture.c_str(),
401 FNM_NOESCAPE); // still spooky
402
561079c8
FCE
403 bool result;
404 if (op->type == tok_operator && op->content == "==")
405 result = !nomatch;
406 else if (op->type == tok_operator && op->content == "!=")
407 result = nomatch;
408 else
409 throw parse_error ("expected '==' or '!='", op);
410
411 return result;
412 }
60d98537 413 else if (l->type == tok_identifier && startswith(l->content, "CONFIG_"))
561079c8 414 {
717a457b
MW
415 if (r->type == tok_string)
416 {
417 string lhs = s.kernel_config[l->content]; // may be empty
418 string rhs = r->content;
561079c8 419
717a457b 420 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
561079c8 421
717a457b
MW
422 bool result;
423 if (op->type == tok_operator && op->content == "==")
424 result = !nomatch;
425 else if (op->type == tok_operator && op->content == "!=")
426 result = nomatch;
427 else
428 throw parse_error ("expected '==' or '!='", op);
dff50e09 429
717a457b
MW
430 return result;
431 }
432 else if (r->type == tok_number)
433 {
434 const char* startp = s.kernel_config[l->content].c_str ();
435 char* endp = (char*) startp;
436 errno = 0;
437 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
438 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
439 throw parse_error ("Config option value not a number", l);
440
441 int64_t rhs = lex_cast<int64_t>(r->content);
442 return eval_comparison (lhs, op, rhs);
443 }
4227f98d 444 else if (r->type == tok_identifier
60d98537 445 && startswith(r->content, "CONFIG_"))
4227f98d
MW
446 {
447 // First try to convert both to numbers,
448 // otherwise threat both as strings.
449 const char* startp = s.kernel_config[l->content].c_str ();
450 char* endp = (char*) startp;
451 errno = 0;
452 int64_t val = (int64_t) strtoll (startp, & endp, 0);
453 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
454 {
455 int64_t lhs = val;
456 startp = s.kernel_config[r->content].c_str ();
457 endp = (char*) startp;
458 errno = 0;
459 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
460 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
461 return eval_comparison (lhs, op, rhs);
462 }
463
464 string lhs = s.kernel_config[l->content];
465 string rhs = s.kernel_config[r->content];
466 return eval_comparison (lhs, op, rhs);
467 }
717a457b 468 else
4227f98d 469 throw parse_error ("expected string, number literal or other CONFIG_... as right value", r);
dff50e09 470 }
c434ec7e 471 else if (l->type == tok_string && r->type == tok_string)
5811366a 472 {
c434ec7e
FCE
473 string lhs = l->content;
474 string rhs = r->content;
475 return eval_comparison (lhs, op, rhs);
476 // NB: no wildcarding option here
477 }
478 else if (l->type == tok_number && r->type == tok_number)
479 {
480 int64_t lhs = lex_cast<int64_t>(l->content);
481 int64_t rhs = lex_cast<int64_t>(r->content);
482 return eval_comparison (lhs, op, rhs);
7a468d68 483 // NB: no wildcarding option here
5811366a
FCE
484 }
485 else if (l->type == tok_string && r->type == tok_number
486 && op->type == tok_operator)
487 throw parse_error ("expected string literal as right value", r);
488 else if (l->type == tok_number && r->type == tok_string
489 && op->type == tok_operator)
490 throw parse_error ("expected number literal as right value", r);
c434ec7e 491
177a8ead 492 else
561079c8 493 throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr' or 'CONFIG_...'\n"
5811366a 494 " or comparison between strings or integers", l);
177a8ead
FCE
495}
496
497
5811366a 498// Only tokens corresponding to the TRUE statement must be expanded
177a8ead 499const token*
3f847830 500parser::scan_pp (bool wildcard)
177a8ead
FCE
501{
502 while (true)
503 {
e92f2566
JS
504 pp_state_t pp = PP_NONE;
505 if (!pp_state.empty())
506 pp = pp_state.back().second;
507
508 const token* t = 0;
509 if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
510 t = skip_pp ();
511 else
512 t = input.scan (wildcard);
513
514 if (t == 0) // EOF
177a8ead 515 {
e92f2566
JS
516 if (pp != PP_NONE)
517 {
518 t = pp_state.back().first;
519 pp_state.pop_back(); // so skip_some doesn't keep trying to close this
520 throw parse_error ("incomplete conditional at end of file", t);
521 }
177a8ead
FCE
522 return t;
523 }
524
e92f2566
JS
525 // misplaced preprocessor "then"
526 if (t->type == tok_operator && t->content == "%?")
527 throw parse_error ("incomplete conditional - missing '%('", t);
528
529 // preprocessor "else"
530 if (t->type == tok_operator && t->content == "%:")
531 {
532 if (pp == PP_NONE)
533 throw parse_error ("incomplete conditional - missing '%('", t);
534 if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
535 throw parse_error ("invalid conditional - duplicate '%:'", t);
536
537 pp_state.back().second = (pp == PP_KEEP_THEN) ?
538 PP_SKIP_ELSE : PP_KEEP_ELSE;
539 delete t;
540 continue;
541 }
542
543 // preprocessor close
544 if (t->type == tok_operator && t->content == "%)")
545 {
546 if (pp == PP_NONE)
547 throw parse_error ("incomplete conditional - missing '%('", t);
548 delete pp_state.back().first;
549 delete t;
550 pp_state.pop_back();
551 continue;
552 }
dff50e09 553
177a8ead
FCE
554 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
555 return t;
556
557 // We have a %( - it's time to throw a preprocessing party!
558
2d7881bf
PP
559 bool result = false;
560 bool and_result = true;
561 const token *n = NULL;
562 do {
563 const token *l, *op, *r;
e92f2566 564 l = input.scan (false);
2d7881bf
PP
565 op = input.scan (false);
566 r = input.scan (false);
567 if (l == 0 || op == 0 || r == 0)
568 throw parse_error ("incomplete condition after '%('", t);
569 // NB: consider generalizing to consume all tokens until %?, and
570 // passing that as a vector to an evaluator.
571
572 // Do not evaluate the condition if we haven't expanded everything.
573 // This may occur when having several recursive conditionals.
574 and_result &= eval_pp_conditional (session, l, op, r);
575 delete l;
576 delete op;
577 delete r;
578 delete n;
579
580 n = input.scan ();
581 if (n && n->type == tok_operator && n->content == "&&")
582 continue;
583 result |= and_result;
584 and_result = true;
585 if (! (n && n->type == tok_operator && n->content == "||"))
586 break;
587 } while (true);
3f847830
FCE
588
589 /*
590 clog << "PP eval (" << *t << ") == " << result << endl;
591 */
592
e92f2566 593 const token *m = n;
177a8ead
FCE
594 if (! (m && m->type == tok_operator && m->content == "%?"))
595 throw parse_error ("expected '%?' marker for conditional", t);
70c743d8 596 delete m; // "%?"
177a8ead 597
e92f2566
JS
598 pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
599 pp_state.push_back (make_pair (t, pp));
3f847830 600
e92f2566
JS
601 // Now loop around to look for a real token.
602 }
603}
3f847830 604
3f847830 605
e92f2566
JS
606// Skip over tokens and any errors, heeding
607// only nested preprocessor starts and ends.
608const token*
609parser::skip_pp ()
610{
611 const token* t = 0;
612 unsigned nesting = 0;
613 do
614 {
615 try
616 {
617 t = input.scan ();
177a8ead 618 }
e92f2566 619 catch (const parse_error &e)
70c743d8 620 {
e92f2566 621 continue;
70c743d8 622 }
e92f2566
JS
623 if (!t)
624 break;
625 if (t->type == tok_operator && t->content == "%(")
626 ++nesting;
627 else if (nesting && t->type == tok_operator && t->content == "%)")
628 --nesting;
629 else if (!nesting && t->type == tok_operator &&
630 (t->content == "%:" || t->content == "%?" || t->content == "%)"))
631 break;
632 delete t;
177a8ead 633 }
e92f2566
JS
634 while (true);
635 return t;
177a8ead
FCE
636}
637
638
2f1a1aea 639const token*
0c218afb 640parser::next (bool wildcard)
2f1a1aea
FCE
641{
642 if (! next_t)
0c218afb 643 next_t = scan_pp (wildcard);
2f1a1aea
FCE
644 if (! next_t)
645 throw parse_error ("unexpected end-of-file");
646
2f1a1aea
FCE
647 last_t = next_t;
648 // advance by zeroing next_t
649 next_t = 0;
650 return last_t;
651}
652
653
654const token*
0c218afb 655parser::peek (bool wildcard)
2f1a1aea
FCE
656{
657 if (! next_t)
0c218afb 658 next_t = scan_pp (wildcard);
2f1a1aea
FCE
659
660 // don't advance by zeroing next_t
661 last_t = next_t;
662 return next_t;
663}
664
665
d7f3e0c5
GH
666static inline bool
667tok_is(token const * t, token_type tt, string const & expected)
668{
669 return t && t->type == tt && t->content == expected;
670}
671
672
dff50e09 673const token*
d7f3e0c5
GH
674parser::expect_known (token_type tt, string const & expected)
675{
676 const token *t = next();
57b73400 677 if (! (t && t->type == tt && t->content == expected))
d7f3e0c5
GH
678 throw parse_error ("expected '" + expected + "'");
679 return t;
680}
681
682
dff50e09 683const token*
d7f3e0c5
GH
684parser::expect_unknown (token_type tt, string & target)
685{
686 const token *t = next();
687 if (!(t && t->type == tt))
688 throw parse_error ("expected " + tt2str(tt));
689 target = t->content;
690 return t;
691}
692
693
dff50e09 694const token*
493ee224
DS
695parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
696{
697 const token *t = next();
698 if (!(t && (t->type == tt1 || t->type == tt2)))
699 throw parse_error ("expected " + tt2str(tt1) + " or " + tt2str(tt2));
700 target = t->content;
701 return t;
702}
703
704
dff50e09 705const token*
d7f3e0c5
GH
706parser::expect_op (std::string const & expected)
707{
708 return expect_known (tok_operator, expected);
709}
710
711
dff50e09 712const token*
d7f3e0c5
GH
713parser::expect_kw (std::string const & expected)
714{
f4fe2e93 715 return expect_known (tok_keyword, expected);
d7f3e0c5
GH
716}
717
dff50e09 718const token*
e38723d2 719parser::expect_number (int64_t & value)
57b73400 720{
e38723d2
MH
721 bool neg = false;
722 const token *t = next();
723 if (t->type == tok_operator && t->content == "-")
724 {
725 neg = true;
726 t = next ();
727 }
728 if (!(t && t->type == tok_number))
729 throw parse_error ("expected number");
730
731 const char* startp = t->content.c_str ();
732 char* endp = (char*) startp;
733
734 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
735 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
736 // since the lexer only gives us positive digit strings, but we'll
737 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
738 errno = 0;
739 value = (int64_t) strtoull (startp, & endp, 0);
740 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
741 || (neg && (unsigned long long) value > 9223372036854775808ULL)
742 || (unsigned long long) value > 18446744073709551615ULL
743 || value < -9223372036854775807LL-1)
dff50e09
FCE
744 throw parse_error ("number invalid or out of range");
745
e38723d2
MH
746 if (neg)
747 value = -value;
748
749 return t;
57b73400
GH
750}
751
d7f3e0c5 752
dff50e09 753const token*
d7f3e0c5
GH
754parser::expect_ident (std::string & target)
755{
756 return expect_unknown (tok_identifier, target);
757}
758
759
dff50e09 760const token*
493ee224
DS
761parser::expect_ident_or_keyword (std::string & target)
762{
763 return expect_unknown2 (tok_identifier, tok_keyword, target);
764}
765
766
dff50e09 767bool
d7f3e0c5
GH
768parser::peek_op (std::string const & op)
769{
770 return tok_is (peek(), tok_operator, op);
771}
772
773
dff50e09 774bool
d7f3e0c5
GH
775parser::peek_kw (std::string const & kw)
776{
777 return tok_is (peek(), tok_identifier, kw);
778}
779
780
781
66c7d4c1 782lexer::lexer (istream& input, const string& in, systemtap_session& s):
c5be7511 783 ate_comment(false), input_name (in), input_pointer (0), input_end (0),
9300f661
JS
784 cursor_suspend_count(0), cursor_suspend_line (1), cursor_suspend_column (1),
785 cursor_line (1), cursor_column (1),
66c7d4c1 786 session(s), current_file (0)
eacb10ce 787{
66c7d4c1 788 getline(input, input_contents, '\0');
2203b032 789
66c7d4c1
JS
790 input_pointer = input_contents.data();
791 input_end = input_contents.data() + input_contents.size();
792
793 if (keywords.empty())
794 {
795 keywords.insert("probe");
796 keywords.insert("global");
797 keywords.insert("function");
798 keywords.insert("if");
799 keywords.insert("else");
800 keywords.insert("for");
801 keywords.insert("foreach");
802 keywords.insert("in");
803 keywords.insert("limit");
804 keywords.insert("return");
805 keywords.insert("delete");
806 keywords.insert("while");
807 keywords.insert("break");
808 keywords.insert("continue");
809 keywords.insert("next");
810 keywords.insert("string");
811 keywords.insert("long");
f4fe2e93
FCE
812 keywords.insert("try");
813 keywords.insert("catch");
66c7d4c1 814 }
eacb10ce 815}
2f1a1aea 816
66c7d4c1
JS
817set<string> lexer::keywords;
818
1b1b4ceb
RA
819void
820lexer::set_current_file (stapfile* f)
821{
822 current_file = f;
2203b032
JS
823 if (f)
824 {
825 f->file_contents = input_contents;
826 f->name = input_name;
827 }
1b1b4ceb 828}
bb2e3076
FCE
829
830int
831lexer::input_peek (unsigned n)
832{
66c7d4c1
JS
833 if (input_pointer + n >= input_end)
834 return -1; // EOF
835 return (unsigned char)*(input_pointer + n);
bb2e3076
FCE
836}
837
838
dff50e09 839int
2f1a1aea
FCE
840lexer::input_get ()
841{
66c7d4c1 842 int c = input_peek();
bb2e3076
FCE
843 if (c < 0) return c; // EOF
844
66c7d4c1
JS
845 ++input_pointer;
846
3f99432c 847 if (cursor_suspend_count)
9300f661
JS
848 {
849 // Track effect of input_put: preserve previous cursor/line_column
850 // until all of its characters are consumed.
851 if (--cursor_suspend_count == 0)
852 {
853 cursor_line = cursor_suspend_line;
854 cursor_column = cursor_suspend_column;
855 }
856 }
3f99432c 857 else
2f1a1aea 858 {
3f99432c
FCE
859 // update source cursor
860 if (c == '\n')
861 {
862 cursor_line ++;
863 cursor_column = 1;
864 }
865 else
866 cursor_column ++;
2f1a1aea 867 }
2f1a1aea 868
eacb10ce 869 // clog << "[" << (char)c << "]";
2f1a1aea
FCE
870 return c;
871}
872
873
3f99432c 874void
9300f661 875lexer::input_put (const string& chars, const token* t)
3f99432c 876{
66c7d4c1
JS
877 size_t pos = input_pointer - input_contents.data();
878 // clog << "[put:" << chars << " @" << pos << "]";
879 input_contents.insert (pos, chars);
eacb10ce 880 cursor_suspend_count += chars.size();
9300f661
JS
881 cursor_suspend_line = cursor_line;
882 cursor_suspend_column = cursor_column;
883 cursor_line = t->location.line;
884 cursor_column = t->location.column;
66c7d4c1
JS
885 input_pointer = input_contents.data() + pos;
886 input_end = input_contents.data() + input_contents.size();
3f99432c
FCE
887}
888
889
2f1a1aea 890token*
3f847830 891lexer::scan (bool wildcard)
2f1a1aea 892{
c5be7511 893 ate_comment = false; // reset for each new token
2f1a1aea 894 token* n = new token;
2203b032 895 n->location.file = current_file;
2f1a1aea 896
9300f661
JS
897skip:
898 bool suspended = (cursor_suspend_count > 0);
2f1a1aea
FCE
899 n->location.line = cursor_line;
900 n->location.column = cursor_column;
901
902 int c = input_get();
3f99432c 903 // clog << "{" << (char)c << (char)c2 << "}";
2f1a1aea
FCE
904 if (c < 0)
905 {
906 delete n;
907 return 0;
908 }
909
910 if (isspace (c))
911 goto skip;
912
66c7d4c1
JS
913 int c2 = input_peek ();
914
3f99432c
FCE
915 // Paste command line arguments as character streams into
916 // the beginning of a token. $1..$999 go through as raw
917 // characters; @1..@999 are quoted/escaped as strings.
918 // $# and @# expand to the number of arguments, similarly
919 // raw or quoted.
9300f661 920 if ((c == '$' || c == '@') && (c2 == '#'))
3f99432c 921 {
9300f661
JS
922 n->content.push_back (c);
923 n->content.push_back (c2);
3f99432c 924 input_get(); // swallow '#'
9300f661
JS
925 if (suspended)
926 throw parse_error ("invalid nested substitution of command line arguments", n);
927 size_t num_args = session.args.size ();
928 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
929 n->content.clear();
930 goto skip;
3f99432c 931 }
9300f661 932 else if ((c == '$' || c == '@') && (isdigit (c2)))
3f99432c 933 {
9300f661 934 n->content.push_back (c);
3f99432c
FCE
935 unsigned idx = 0;
936 do
937 {
938 input_get ();
939 idx = (idx * 10) + (c2 - '0');
9300f661 940 n->content.push_back (c2);
3f99432c
FCE
941 c2 = input_peek ();
942 } while (c2 > 0 &&
dff50e09 943 isdigit (c2) &&
3f99432c 944 idx <= session.args.size()); // prevent overflow
9300f661
JS
945 if (suspended)
946 throw parse_error ("invalid nested substitution of command line arguments", n);
3f99432c
FCE
947 if (idx == 0 ||
948 idx-1 >= session.args.size())
aca66a36
JS
949 throw parse_error ("command line argument index " + lex_cast(idx)
950 + " out of range [1-" + lex_cast(session.args.size()) + "]", n);
9300f661
JS
951 const string& arg = session.args[idx-1];
952 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
953 n->content.clear();
954 goto skip;
3f99432c
FCE
955 }
956
0c218afb
MH
957 else if (isalpha (c) || c == '$' || c == '@' || c == '_' ||
958 (wildcard && c == '*'))
2f1a1aea
FCE
959 {
960 n->type = tok_identifier;
961 n->content = (char) c;
0c218afb
MH
962 while (isalnum (c2) || c2 == '_' || c2 == '$' ||
963 (wildcard && c2 == '*'))
2f1a1aea 964 {
3f99432c
FCE
965 input_get ();
966 n->content.push_back (c2);
967 c2 = input_peek ();
6e213f58 968 }
213bee8f 969
66c7d4c1 970 if (keywords.count(n->content))
3f99432c 971 n->type = tok_keyword;
dff50e09 972
2f1a1aea
FCE
973 return n;
974 }
975
3a20432b 976 else if (isdigit (c)) // positive literal
2f1a1aea 977 {
2f1a1aea 978 n->type = tok_number;
9c0c0e46
FCE
979 n->content = (char) c;
980
66c7d4c1 981 while (isalnum (c2))
2f1a1aea 982 {
9c0c0e46
FCE
983 // NB: isalnum is very permissive. We rely on strtol, called in
984 // parser::parse_literal below, to confirm that the number string
985 // is correctly formatted and in range.
986
66c7d4c1
JS
987 input_get ();
988 n->content.push_back (c2);
989 c2 = input_peek ();
2f1a1aea
FCE
990 }
991 return n;
992 }
993
994 else if (c == '\"')
995 {
996 n->type = tok_string;
997 while (1)
998 {
999 c = input_get ();
1000
3f99432c 1001 if (c < 0 || c == '\n')
2f1a1aea 1002 {
72cdb9cd 1003 throw parse_error("Could not find matching closing quote", n);
2f1a1aea
FCE
1004 }
1005 if (c == '\"') // closing double-quotes
1006 break;
3f99432c 1007 else if (c == '\\') // see also input_put
dff50e09 1008 {
7d46afb8
GH
1009 c = input_get ();
1010 switch (c)
1011 {
1012 case 'a':
1013 case 'b':
1014 case 't':
1015 case 'n':
1016 case 'v':
1017 case 'f':
1018 case 'r':
f03954fd 1019 case '0' ... '7': // NB: need only match the first digit
7d46afb8 1020 case '\\':
7d46afb8 1021 // Pass these escapes through to the string value
dff50e09 1022 // being parsed; it will be emitted into a C literal.
7d46afb8
GH
1023
1024 n->content.push_back('\\');
1025
3f99432c 1026 // fall through
7d46afb8 1027 default:
7d46afb8
GH
1028 n->content.push_back(c);
1029 break;
1030 }
2f1a1aea
FCE
1031 }
1032 else
1033 n->content.push_back(c);
1034 }
1035 return n;
1036 }
1037
1038 else if (ispunct (c))
1039 {
bb2e3076 1040 int c3 = input_peek (1);
2f1a1aea 1041
3a20432b
FCE
1042 // NB: if we were to recognize negative numeric literals here,
1043 // we'd introduce another grammar ambiguity:
1044 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1045 // instead of tok_number(1) tok_operator('-') tok_number(1)
1046
66c7d4c1 1047 if (c == '#') // shell comment
2f1a1aea
FCE
1048 {
1049 unsigned this_line = cursor_line;
bb2e3076
FCE
1050 do { c = input_get (); }
1051 while (c >= 0 && cursor_line == this_line);
c5be7511 1052 ate_comment = true;
2f1a1aea
FCE
1053 goto skip;
1054 }
66c7d4c1 1055 else if ((c == '/' && c2 == '/')) // C++ comment
63a7c90e
FCE
1056 {
1057 unsigned this_line = cursor_line;
bb2e3076
FCE
1058 do { c = input_get (); }
1059 while (c >= 0 && cursor_line == this_line);
c5be7511 1060 ate_comment = true;
63a7c90e
FCE
1061 goto skip;
1062 }
1063 else if (c == '/' && c2 == '*') // C comment
1064 {
66c7d4c1
JS
1065 (void) input_get (); // swallow '*' already in c2
1066 c = input_get ();
63a7c90e 1067 c2 = input_get ();
bb2e3076 1068 while (c2 >= 0)
63a7c90e 1069 {
66c7d4c1
JS
1070 if (c == '*' && c2 == '/')
1071 break;
63a7c90e
FCE
1072 c = c2;
1073 c2 = input_get ();
63a7c90e 1074 }
c5be7511 1075 ate_comment = true;
bb2e3076 1076 goto skip;
63a7c90e 1077 }
54dfabe9
FCE
1078 else if (c == '%' && c2 == '{') // embedded code
1079 {
1080 n->type = tok_embedded;
1081 (void) input_get (); // swallow '{' already in c2
66c7d4c1
JS
1082 c = input_get ();
1083 c2 = input_get ();
1084 while (c2 >= 0)
54dfabe9 1085 {
66c7d4c1
JS
1086 if (c == '%' && c2 == '}')
1087 return n;
54dfabe9 1088 n->content += c;
66c7d4c1
JS
1089 c = c2;
1090 c2 = input_get ();
54dfabe9 1091 }
72cdb9cd
CW
1092
1093 throw parse_error ("Could not find matching '%}' to close embedded function block", n);
54dfabe9 1094 }
2f1a1aea 1095
bb2e3076
FCE
1096 // We're committed to recognizing at least the first character
1097 // as an operator.
2f1a1aea 1098 n->type = tok_operator;
66c7d4c1 1099 n->content = c;
2f1a1aea 1100
bb2e3076 1101 // match all valid operators, in decreasing size order
66c7d4c1
JS
1102 if ((c == '<' && c2 == '<' && c3 == '<') ||
1103 (c == '<' && c2 == '<' && c3 == '=') ||
1104 (c == '>' && c2 == '>' && c3 == '='))
82919855 1105 {
66c7d4c1
JS
1106 n->content += c2;
1107 n->content += c3;
bb2e3076
FCE
1108 input_get (); input_get (); // swallow other two characters
1109 }
66c7d4c1
JS
1110 else if ((c == '=' && c2 == '=') ||
1111 (c == '!' && c2 == '=') ||
1112 (c == '<' && c2 == '=') ||
1113 (c == '>' && c2 == '=') ||
1114 (c == '+' && c2 == '=') ||
1115 (c == '-' && c2 == '=') ||
1116 (c == '*' && c2 == '=') ||
1117 (c == '/' && c2 == '=') ||
1118 (c == '%' && c2 == '=') ||
1119 (c == '&' && c2 == '=') ||
1120 (c == '^' && c2 == '=') ||
1121 (c == '|' && c2 == '=') ||
1122 (c == '.' && c2 == '=') ||
1123 (c == '&' && c2 == '&') ||
1124 (c == '|' && c2 == '|') ||
1125 (c == '+' && c2 == '+') ||
1126 (c == '-' && c2 == '-') ||
1127 (c == '-' && c2 == '>') ||
1128 (c == '<' && c2 == '<') ||
1129 (c == '>' && c2 == '>') ||
177a8ead 1130 // preprocessor tokens
66c7d4c1
JS
1131 (c == '%' && c2 == '(') ||
1132 (c == '%' && c2 == '?') ||
1133 (c == '%' && c2 == ':') ||
1134 (c == '%' && c2 == ')'))
bb2e3076 1135 {
66c7d4c1 1136 n->content += c2;
bb2e3076 1137 input_get (); // swallow other character
dff50e09 1138 }
2f1a1aea
FCE
1139
1140 return n;
1141 }
1142
1143 else
1144 {
1145 n->type = tok_junk;
1146 n->content = (char) c;
1147 return n;
1148 }
1149}
1150
1151
1152// ------------------------------------------------------------------------
1153
1154stapfile*
1155parser::parse ()
1156{
1157 stapfile* f = new stapfile;
1b1b4ceb 1158 input.set_current_file (f);
56099f08
FCE
1159
1160 bool empty = true;
1161
2f1a1aea
FCE
1162 while (1)
1163 {
1164 try
1165 {
1166 const token* t = peek ();
56099f08 1167 if (! t) // nice clean EOF
2f1a1aea
FCE
1168 break;
1169
56099f08 1170 empty = false;
6e213f58
DS
1171 if (t->type == tok_keyword && t->content == "probe")
1172 {
1173 context = con_probe;
1174 parse_probe (f->probes, f->aliases);
1175 }
1176 else if (t->type == tok_keyword && t->content == "global")
1177 {
1178 context = con_global;
4b5f3e45 1179 parse_global (f->globals, f->probes);
6e213f58
DS
1180 }
1181 else if (t->type == tok_keyword && t->content == "function")
1182 {
1183 context = con_function;
1184 parse_functiondecl (f->functions);
1185 }
54dfabe9 1186 else if (t->type == tok_embedded)
6e213f58
DS
1187 {
1188 context = con_embedded;
1189 f->embeds.push_back (parse_embeddedcode ());
1190 }
2f1a1aea 1191 else
6e213f58
DS
1192 {
1193 context = con_unknown;
1194 throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
1195 }
2f1a1aea
FCE
1196 }
1197 catch (parse_error& pe)
1198 {
1199 print_error (pe);
cd7116b8 1200 if (pe.skip_some) // for recovery
dff50e09 1201 try
cd7116b8
FCE
1202 {
1203 // Quietly swallow all tokens until the next '}'.
1204 while (1)
1205 {
1206 const token* t = peek ();
1207 if (! t)
1208 break;
1209 next ();
1210 if (t->type == tok_operator && t->content == "}")
1211 break;
1212 }
1213 }
1214 catch (parse_error& pe2)
1215 {
1216 // parse error during recovery ... ugh
1217 print_error (pe2);
1218 }
177a8ead 1219 }
2f1a1aea
FCE
1220 }
1221
56099f08
FCE
1222 if (empty)
1223 {
1224 cerr << "Input file '" << input_name << "' is empty or missing." << endl;
1225 delete f;
2203b032 1226 f = 0;
56099f08
FCE
1227 }
1228 else if (num_errors > 0)
2f1a1aea
FCE
1229 {
1230 cerr << num_errors << " parse error(s)." << endl;
1231 delete f;
2203b032 1232 f = 0;
2f1a1aea 1233 }
dff50e09 1234
2203b032 1235 input.set_current_file(0);
2f1a1aea
FCE
1236 return f;
1237}
1238
1239
20c6c071 1240void
54dfabe9
FCE
1241parser::parse_probe (std::vector<probe *> & probe_ret,
1242 std::vector<probe_alias *> & alias_ret)
2f1a1aea 1243{
82919855 1244 const token* t0 = next ();
6e213f58 1245 if (! (t0->type == tok_keyword && t0->content == "probe"))
82919855
FCE
1246 throw parse_error ("expected 'probe'");
1247
20c6c071
GH
1248 vector<probe_point *> aliases;
1249 vector<probe_point *> locations;
1250
1251 bool equals_ok = true;
82919855 1252
97266278
LG
1253 int epilogue_alias = 0;
1254
2f1a1aea
FCE
1255 while (1)
1256 {
b4ceace2 1257 probe_point * pp = parse_probe_point ();
dff50e09 1258
b4ceace2 1259 const token* t = peek ();
dff50e09 1260 if (equals_ok && t
b4ceace2
FCE
1261 && t->type == tok_operator && t->content == "=")
1262 {
1ad820e3 1263 if (pp->optional || pp->sufficient)
f1a0157a 1264 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->components.front()->tok);
b4ceace2
FCE
1265 aliases.push_back(pp);
1266 next ();
1267 continue;
1268 }
dff50e09 1269 else if (equals_ok && t
97266278
LG
1270 && t->type == tok_operator && t->content == "+=")
1271 {
1ad820e3 1272 if (pp->optional || pp->sufficient)
f1a0157a 1273 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->components.front()->tok);
97266278
LG
1274 aliases.push_back(pp);
1275 epilogue_alias = 1;
1276 next ();
1277 continue;
1278 }
b4ceace2
FCE
1279 else if (t && t->type == tok_operator && t->content == ",")
1280 {
1281 locations.push_back(pp);
1282 equals_ok = false;
1283 next ();
1284 continue;
1285 }
1286 else if (t && t->type == tok_operator && t->content == "{")
1287 {
1288 locations.push_back(pp);
1289 break;
1290 }
2f1a1aea 1291 else
9c0c0e46 1292 throw parse_error ("expected probe point specifier");
2f1a1aea 1293 }
20c6c071 1294
20c6c071
GH
1295 if (aliases.empty())
1296 {
54dfabe9
FCE
1297 probe* p = new probe;
1298 p->tok = t0;
1299 p->locations = locations;
1300 p->body = parse_stmt_block ();
37ebca01 1301 p->privileged = privileged;
54dfabe9 1302 probe_ret.push_back (p);
20c6c071
GH
1303 }
1304 else
1305 {
54dfabe9 1306 probe_alias* p = new probe_alias (aliases);
97266278
LG
1307 if(epilogue_alias)
1308 p->epilogue_style = true;
1309 else
1310 p->epilogue_style = false;
54dfabe9
FCE
1311 p->tok = t0;
1312 p->locations = locations;
1313 p->body = parse_stmt_block ();
37ebca01 1314 p->privileged = privileged;
54dfabe9 1315 alias_ret.push_back (p);
20c6c071 1316 }
54dfabe9 1317}
20c6c071 1318
54dfabe9
FCE
1319
1320embeddedcode*
1321parser::parse_embeddedcode ()
1322{
1323 embeddedcode* e = new embeddedcode;
1324 const token* t = next ();
1325 if (t->type != tok_embedded)
24cb178f
FCE
1326 throw parse_error ("expected '%{'");
1327
1328 if (! privileged)
cd7116b8
FCE
1329 throw parse_error ("embedded code in unprivileged script",
1330 false /* don't skip tokens for parse resumption */);
54dfabe9
FCE
1331
1332 e->tok = t;
1333 e->code = t->content;
1334 return e;
2f1a1aea
FCE
1335}
1336
1337
1338block*
56099f08 1339parser::parse_stmt_block ()
2f1a1aea
FCE
1340{
1341 block* pb = new block;
1342
56099f08
FCE
1343 const token* t = next ();
1344 if (! (t->type == tok_operator && t->content == "{"))
1345 throw parse_error ("expected '{'");
1346
1347 pb->tok = t;
2b066ec1 1348
2f1a1aea
FCE
1349 while (1)
1350 {
1351 try
1352 {
2b066ec1
FCE
1353 t = peek ();
1354 if (t && t->type == tok_operator && t->content == "}")
1355 {
1356 next ();
1357 break;
1358 }
1359
2f1a1aea 1360 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
1361 }
1362 catch (parse_error& pe)
1363 {
1364 print_error (pe);
54dfabe9 1365
2f1a1aea
FCE
1366 // Quietly swallow all tokens until the next ';' or '}'.
1367 while (1)
1368 {
1369 const token* t = peek ();
54dfabe9 1370 if (! t) return 0;
2f1a1aea 1371 next ();
54dfabe9
FCE
1372 if (t->type == tok_operator
1373 && (t->content == "}" || t->content == ";"))
2f1a1aea
FCE
1374 break;
1375 }
1376 }
1377 }
1378
1379 return pb;
1380}
1381
1382
f4fe2e93
FCE
1383try_block*
1384parser::parse_try_block ()
1385{
1386 try_block* pb = new try_block;
1387
1388 pb->tok = expect_kw ("try");
1389 pb->try_block = parse_stmt_block();
1390 expect_kw ("catch");
1391
1392 const token* t = peek ();
1393 if (t->type == tok_operator && t->content == "(")
1394 {
1395 next (); // swallow the '('
1396
1397 t = next();
1398 if (! (t->type == tok_identifier))
1399 throw parse_error ("expected identifier");
1400 symbol* sym = new symbol;
1401 sym->tok = t;
1402 sym->name = t->content;
1403 pb->catch_error_var = sym;
1404
1405 expect_op (")");
1406 }
1407 else
1408 pb->catch_error_var = 0;
1409
1410 pb->catch_block = parse_stmt_block();
1411
1412 return pb;
1413}
1414
1415
1416
2f1a1aea
FCE
1417statement*
1418parser::parse_statement ()
1419{
40b71c47 1420 statement *ret;
2f1a1aea
FCE
1421 const token* t = peek ();
1422 if (t && t->type == tok_operator && t->content == ";")
f946b10f 1423 return new null_statement (next ());
dff50e09 1424 else if (t && t->type == tok_operator && t->content == "{")
40b71c47 1425 return parse_stmt_block (); // Don't squash semicolons.
f4fe2e93
FCE
1426 else if (t && t->type == tok_keyword && t->content == "try")
1427 return parse_try_block (); // Don't squash semicolons.
6e213f58 1428 else if (t && t->type == tok_keyword && t->content == "if")
40b71c47 1429 return parse_if_statement (); // Don't squash semicolons.
6e213f58 1430 else if (t && t->type == tok_keyword && t->content == "for")
40b71c47 1431 return parse_for_loop (); // Don't squash semicolons.
6e213f58 1432 else if (t && t->type == tok_keyword && t->content == "foreach")
40b71c47
MW
1433 return parse_foreach_loop (); // Don't squash semicolons.
1434 else if (t && t->type == tok_keyword && t->content == "while")
1435 return parse_while_loop (); // Don't squash semicolons.
6e213f58 1436 else if (t && t->type == tok_keyword && t->content == "return")
40b71c47 1437 ret = parse_return_statement ();
6e213f58 1438 else if (t && t->type == tok_keyword && t->content == "delete")
40b71c47 1439 ret = parse_delete_statement ();
6e213f58 1440 else if (t && t->type == tok_keyword && t->content == "break")
40b71c47 1441 ret = parse_break_statement ();
6e213f58 1442 else if (t && t->type == tok_keyword && t->content == "continue")
40b71c47 1443 ret = parse_continue_statement ();
6e213f58 1444 else if (t && t->type == tok_keyword && t->content == "next")
40b71c47 1445 ret = parse_next_statement ();
2f1a1aea
FCE
1446 else if (t && (t->type == tok_operator || // expressions are flexible
1447 t->type == tok_identifier ||
1448 t->type == tok_number ||
7d902887
FCE
1449 t->type == tok_string ||
1450 t->type == tok_embedded ))
40b71c47 1451 ret = parse_expr_statement ();
54dfabe9 1452 // XXX: consider generally accepting tok_embedded here too
2f1a1aea
FCE
1453 else
1454 throw parse_error ("expected statement");
40b71c47
MW
1455
1456 // Squash "empty" trailing colons after any "non-block-like" statement.
1457 t = peek ();
1458 if (t && t->type == tok_operator && t->content == ";")
1459 {
1460 next (); // Silently eat trailing ; after statement
1461 }
1462
1463 return ret;
2f1a1aea
FCE
1464}
1465
1466
56099f08 1467void
78f6bba6 1468parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
2f1a1aea 1469{
82919855 1470 const token* t0 = next ();
6e213f58 1471 if (! (t0->type == tok_keyword && t0->content == "global"))
82919855
FCE
1472 throw parse_error ("expected 'global'");
1473
56099f08
FCE
1474 while (1)
1475 {
1476 const token* t = next ();
1477 if (! (t->type == tok_identifier))
1478 throw parse_error ("expected identifier");
1479
2b066ec1
FCE
1480 for (unsigned i=0; i<globals.size(); i++)
1481 if (globals[i]->name == t->content)
57b73400 1482 throw parse_error ("duplicate global name");
dff50e09 1483
24cb178f
FCE
1484 vardecl* d = new vardecl;
1485 d->name = t->content;
1486 d->tok = t;
1487 globals.push_back (d);
56099f08 1488
82919855 1489 t = peek ();
ef474d24
JS
1490
1491 if (t && t->type == tok_operator && t->content == "[") // array size
1492 {
1493 int64_t size;
1494 next ();
1495 expect_number(size);
1496 if (size <= 0 || size > 1000000) // arbitrary max
1497 throw parse_error("array size out of range");
1498 d->maxsize = (int)size;
1499 expect_known(tok_operator, "]");
1500 t = peek ();
1501 }
1502
4b5f3e45 1503 if (t && t->type == tok_operator && t->content == "=") // initialization
ef474d24
JS
1504 {
1505 if (!d->compatible_arity(0))
1506 throw parse_error("only scalar globals can be initialized");
58701b78 1507 d->set_arity(0, t);
ef474d24
JS
1508 next ();
1509 d->init = parse_literal ();
1510 d->type = d->init->type;
1511 t = peek ();
1512 }
4b5f3e45 1513
c3799d72
AM
1514 if (t && t->type == tok_operator && t->content == ";") // termination
1515 next();
1516
4b5f3e45 1517 if (t && t->type == tok_operator && t->content == ",") // next global
82919855
FCE
1518 {
1519 next ();
1520 continue;
1521 }
56099f08 1522 else
82919855 1523 break;
56099f08
FCE
1524 }
1525}
1526
1527
24cb178f
FCE
1528void
1529parser::parse_functiondecl (std::vector<functiondecl*>& functions)
56099f08 1530{
82919855 1531 const token* t = next ();
6e213f58 1532 if (! (t->type == tok_keyword && t->content == "function"))
82919855
FCE
1533 throw parse_error ("expected 'function'");
1534
56099f08 1535
82919855 1536 t = next ();
6e213f58
DS
1537 if (! (t->type == tok_identifier)
1538 && ! (t->type == tok_keyword
1539 && (t->content == "string" || t->content == "long")))
56099f08 1540 throw parse_error ("expected identifier");
24cb178f
FCE
1541
1542 for (unsigned i=0; i<functions.size(); i++)
1543 if (functions[i]->name == t->content)
1544 throw parse_error ("duplicate function name");
1545
1546 functiondecl *fd = new functiondecl ();
56099f08
FCE
1547 fd->name = t->content;
1548 fd->tok = t;
1549
1550 t = next ();
6a505121
FCE
1551 if (t->type == tok_operator && t->content == ":")
1552 {
1553 t = next ();
6e213f58 1554 if (t->type == tok_keyword && t->content == "string")
6a505121 1555 fd->type = pe_string;
6e213f58 1556 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1557 fd->type = pe_long;
1558 else throw parse_error ("expected 'string' or 'long'");
1559
1560 t = next ();
1561 }
1562
56099f08
FCE
1563 if (! (t->type == tok_operator && t->content == "("))
1564 throw parse_error ("expected '('");
1565
1566 while (1)
1567 {
1568 t = next ();
1569
1570 // permit zero-argument fuctions
1571 if (t->type == tok_operator && t->content == ")")
1572 break;
1573 else if (! (t->type == tok_identifier))
1574 throw parse_error ("expected identifier");
1575 vardecl* vd = new vardecl;
1576 vd->name = t->content;
1577 vd->tok = t;
1578 fd->formal_args.push_back (vd);
1579
1580 t = next ();
6a505121
FCE
1581 if (t->type == tok_operator && t->content == ":")
1582 {
1583 t = next ();
6e213f58 1584 if (t->type == tok_keyword && t->content == "string")
6a505121 1585 vd->type = pe_string;
6e213f58 1586 else if (t->type == tok_keyword && t->content == "long")
6a505121
FCE
1587 vd->type = pe_long;
1588 else throw parse_error ("expected 'string' or 'long'");
dff50e09 1589
6a505121
FCE
1590 t = next ();
1591 }
56099f08
FCE
1592 if (t->type == tok_operator && t->content == ")")
1593 break;
1594 if (t->type == tok_operator && t->content == ",")
1595 continue;
1596 else
1597 throw parse_error ("expected ',' or ')'");
1598 }
1599
54dfabe9
FCE
1600 t = peek ();
1601 if (t && t->type == tok_embedded)
1602 fd->body = parse_embeddedcode ();
1603 else
1604 fd->body = parse_stmt_block ();
24cb178f
FCE
1605
1606 functions.push_back (fd);
2f1a1aea
FCE
1607}
1608
1609
9c0c0e46
FCE
1610probe_point*
1611parser::parse_probe_point ()
2f1a1aea 1612{
9c0c0e46 1613 probe_point* pl = new probe_point;
2f1a1aea 1614
9c0c0e46 1615 while (1)
2f1a1aea 1616 {
0c218afb 1617 const token* t = next (true); // wildcard scanning here
6e213f58
DS
1618 if (! (t->type == tok_identifier
1619 // we must allow ".return" and ".function", which are keywords
0c218afb 1620 || t->type == tok_keyword))
b4ceace2 1621 throw parse_error ("expected identifier or '*'");
9c0c0e46 1622
9c0c0e46
FCE
1623
1624 probe_point::component* c = new probe_point::component;
1625 c->functor = t->content;
f1a0157a 1626 c->tok = t;
9c0c0e46 1627 pl->components.push_back (c);
6e3347a9 1628 // NB we may add c->arg soon
9c0c0e46
FCE
1629
1630 t = peek ();
a477f3f1 1631
6e3347a9 1632 // consume optional parameter
9c0c0e46
FCE
1633 if (t && t->type == tok_operator && t->content == "(")
1634 {
1635 next (); // consume "("
1636 c->arg = parse_literal ();
1637
1638 t = next ();
1639 if (! (t->type == tok_operator && t->content == ")"))
1640 throw parse_error ("expected ')'");
1641
1642 t = peek ();
9c0c0e46 1643 }
9c0c0e46
FCE
1644
1645 if (t && t->type == tok_operator && t->content == ".")
6e3347a9
FCE
1646 {
1647 next ();
1648 continue;
1649 }
1650
f1a0157a 1651 // We only fall through here at the end of a probe point (past
6e3347a9
FCE
1652 // all the dotted/parametrized components).
1653
d898100a
FCE
1654 if (t && t->type == tok_operator &&
1655 (t->content == "?" || t->content == "!"))
6e3347a9
FCE
1656 {
1657 pl->optional = true;
d898100a
FCE
1658 if (t->content == "!") pl->sufficient = true;
1659 // NB: sufficient implies optional
6e3347a9
FCE
1660 next ();
1661 t = peek ();
1662 // fall through
cbbe8080
MH
1663 }
1664
1665 if (t && t->type == tok_keyword && t->content == "if")
1666 {
1667 next ();
1668 t = peek ();
75686668 1669 if (t && ! (t->type == tok_operator && t->content == "("))
cbbe8080
MH
1670 throw parse_error ("expected '('");
1671 next ();
1672
1673 pl->condition = parse_expression ();
1674
1675 t = peek ();
75686668 1676 if (t && ! (t->type == tok_operator && t->content == ")"))
cbbe8080
MH
1677 throw parse_error ("expected ')'");
1678 next ();
1679 t = peek ();
1680 // fall through
6e3347a9
FCE
1681 }
1682
dff50e09 1683 if (t && t->type == tok_operator
6e3347a9
FCE
1684 && (t->content == "{" || t->content == "," ||
1685 t->content == "=" || t->content == "+=" ))
1686 break;
dff50e09 1687
d898100a 1688 throw parse_error ("expected one of '. , ( ? ! { = +='");
2f1a1aea
FCE
1689 }
1690
1691 return pl;
1692}
1693
1694
1695literal*
1696parser::parse_literal ()
1697{
1698 const token* t = next ();
56099f08 1699 literal* l;
2f1a1aea 1700 if (t->type == tok_string)
c5be7511
JS
1701 {
1702 literal_string *ls = new literal_string (t->content);
1703
1704 // PR11208: check if the next token is also a string literal; auto-concatenate it
1705 // This is complicated to the extent that we need to skip intermediate whitespace.
1706 // XXX: but not comments
1707 while (peek()->type == tok_string && !input.ate_comment)
1708 ls->value.append(next()->content); // consume and append the token
1709
1710 l = ls;
1711 }
16e8f21f 1712 else
9c0c0e46 1713 {
16e8f21f
JS
1714 bool neg = false;
1715 if (t->type == tok_operator && t->content == "-")
1716 {
1717 neg = true;
1718 t = next ();
1719 }
1720
1721 if (t->type == tok_number)
1722 {
1723 const char* startp = t->content.c_str ();
1724 char* endp = (char*) startp;
1725
1726 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1727 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
79e6d33f
JS
1728 // since the lexer only gives us positive digit strings, but we'll
1729 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
16e8f21f
JS
1730 errno = 0;
1731 long long value = (long long) strtoull (startp, & endp, 0);
16e8f21f 1732 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
79e6d33f 1733 || (neg && (unsigned long long) value > 9223372036854775808ULL)
16e8f21f
JS
1734 || (unsigned long long) value > 18446744073709551615ULL
1735 || value < -9223372036854775807LL-1)
dff50e09 1736 throw parse_error ("number invalid or out of range");
16e8f21f 1737
79e6d33f
JS
1738 if (neg)
1739 value = -value;
1740
16e8f21f
JS
1741 l = new literal_number (value);
1742 }
1743 else
1744 throw parse_error ("expected literal string or number");
9c0c0e46 1745 }
56099f08
FCE
1746
1747 l->tok = t;
1748 return l;
2f1a1aea
FCE
1749}
1750
1751
1752if_statement*
1753parser::parse_if_statement ()
1754{
1755 const token* t = next ();
6e213f58 1756 if (! (t->type == tok_keyword && t->content == "if"))
56099f08
FCE
1757 throw parse_error ("expected 'if'");
1758 if_statement* s = new if_statement;
1759 s->tok = t;
1760
1761 t = next ();
2f1a1aea
FCE
1762 if (! (t->type == tok_operator && t->content == "("))
1763 throw parse_error ("expected '('");
1764
2f1a1aea
FCE
1765 s->condition = parse_expression ();
1766
1767 t = next ();
1768 if (! (t->type == tok_operator && t->content == ")"))
1769 throw parse_error ("expected ')'");
1770
1771 s->thenblock = parse_statement ();
1772
1773 t = peek ();
6e213f58 1774 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea
FCE
1775 {
1776 next ();
1777 s->elseblock = parse_statement ();
1778 }
ed10c639
FCE
1779 else
1780 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
1781
1782 return s;
1783}
1784
1785
69c68955
FCE
1786expr_statement*
1787parser::parse_expr_statement ()
1788{
1789 expr_statement *es = new expr_statement;
1790 const token* t = peek ();
1791 es->tok = t;
1792 es->value = parse_expression ();
1793 return es;
1794}
1795
1796
56099f08
FCE
1797return_statement*
1798parser::parse_return_statement ()
1799{
1800 const token* t = next ();
6e213f58 1801 if (! (t->type == tok_keyword && t->content == "return"))
56099f08 1802 throw parse_error ("expected 'return'");
6e213f58
DS
1803 if (context != con_function)
1804 throw parse_error ("found 'return' not in function context");
56099f08
FCE
1805 return_statement* s = new return_statement;
1806 s->tok = t;
1807 s->value = parse_expression ();
1808 return s;
1809}
1810
1811
1812delete_statement*
1813parser::parse_delete_statement ()
1814{
1815 const token* t = next ();
6e213f58 1816 if (! (t->type == tok_keyword && t->content == "delete"))
56099f08
FCE
1817 throw parse_error ("expected 'delete'");
1818 delete_statement* s = new delete_statement;
1819 s->tok = t;
1820 s->value = parse_expression ();
1821 return s;
1822}
1823
1824
f3c26ea5
FCE
1825next_statement*
1826parser::parse_next_statement ()
1827{
1828 const token* t = next ();
6e213f58 1829 if (! (t->type == tok_keyword && t->content == "next"))
f3c26ea5 1830 throw parse_error ("expected 'next'");
6e213f58
DS
1831 if (context != con_probe)
1832 throw parse_error ("found 'next' not in probe context");
f3c26ea5
FCE
1833 next_statement* s = new next_statement;
1834 s->tok = t;
1835 return s;
1836}
1837
1838
1839break_statement*
1840parser::parse_break_statement ()
1841{
1842 const token* t = next ();
6e213f58 1843 if (! (t->type == tok_keyword && t->content == "break"))
f3c26ea5
FCE
1844 throw parse_error ("expected 'break'");
1845 break_statement* s = new break_statement;
1846 s->tok = t;
1847 return s;
1848}
1849
1850
1851continue_statement*
1852parser::parse_continue_statement ()
1853{
1854 const token* t = next ();
6e213f58 1855 if (! (t->type == tok_keyword && t->content == "continue"))
f3c26ea5
FCE
1856 throw parse_error ("expected 'continue'");
1857 continue_statement* s = new continue_statement;
1858 s->tok = t;
1859 return s;
1860}
1861
1862
69c68955
FCE
1863for_loop*
1864parser::parse_for_loop ()
1865{
f3c26ea5 1866 const token* t = next ();
6e213f58 1867 if (! (t->type == tok_keyword && t->content == "for"))
f3c26ea5
FCE
1868 throw parse_error ("expected 'for'");
1869 for_loop* s = new for_loop;
1870 s->tok = t;
1871
1872 t = next ();
1873 if (! (t->type == tok_operator && t->content == "("))
1874 throw parse_error ("expected '('");
1875
1876 // initializer + ";"
1877 t = peek ();
1878 if (t && t->type == tok_operator && t->content == ";")
1879 {
cbfbbf69
FCE
1880 s->init = 0;
1881 next ();
f3c26ea5
FCE
1882 }
1883 else
1884 {
1885 s->init = parse_expr_statement ();
1886 t = next ();
1887 if (! (t->type == tok_operator && t->content == ";"))
1888 throw parse_error ("expected ';'");
1889 }
1890
1891 // condition + ";"
1892 t = peek ();
1893 if (t && t->type == tok_operator && t->content == ";")
1894 {
1895 literal_number* l = new literal_number(1);
1896 s->cond = l;
1897 s->cond->tok = next ();
1898 }
1899 else
1900 {
1901 s->cond = parse_expression ();
1902 t = next ();
1903 if (! (t->type == tok_operator && t->content == ";"))
1904 throw parse_error ("expected ';'");
1905 }
dff50e09 1906
f3c26ea5
FCE
1907 // increment + ")"
1908 t = peek ();
1909 if (t && t->type == tok_operator && t->content == ")")
1910 {
cbfbbf69
FCE
1911 s->incr = 0;
1912 next ();
f3c26ea5
FCE
1913 }
1914 else
1915 {
1916 s->incr = parse_expr_statement ();
1917 t = next ();
1918 if (! (t->type == tok_operator && t->content == ")"))
c958a431 1919 throw parse_error ("expected ')'");
f3c26ea5
FCE
1920 }
1921
1922 // block
1923 s->block = parse_statement ();
1924
1925 return s;
1926}
1927
1928
1929for_loop*
1930parser::parse_while_loop ()
1931{
1932 const token* t = next ();
6e213f58 1933 if (! (t->type == tok_keyword && t->content == "while"))
f3c26ea5
FCE
1934 throw parse_error ("expected 'while'");
1935 for_loop* s = new for_loop;
1936 s->tok = t;
1937
1938 t = next ();
1939 if (! (t->type == tok_operator && t->content == "("))
1940 throw parse_error ("expected '('");
1941
1942 // dummy init and incr fields
cbfbbf69
FCE
1943 s->init = 0;
1944 s->incr = 0;
f3c26ea5
FCE
1945
1946 // condition
1947 s->cond = parse_expression ();
1948
f3c26ea5
FCE
1949 t = next ();
1950 if (! (t->type == tok_operator && t->content == ")"))
1951 throw parse_error ("expected ')'");
dff50e09 1952
f3c26ea5
FCE
1953 // block
1954 s->block = parse_statement ();
1955
1956 return s;
69c68955
FCE
1957}
1958
1959
1960foreach_loop*
1961parser::parse_foreach_loop ()
1962{
1963 const token* t = next ();
6e213f58 1964 if (! (t->type == tok_keyword && t->content == "foreach"))
69c68955
FCE
1965 throw parse_error ("expected 'foreach'");
1966 foreach_loop* s = new foreach_loop;
1967 s->tok = t;
93484556 1968 s->sort_direction = 0;
c261711d 1969 s->value = NULL;
27f21e8c 1970 s->limit = NULL;
69c68955
FCE
1971
1972 t = next ();
1973 if (! (t->type == tok_operator && t->content == "("))
1974 throw parse_error ("expected '('");
1975
c261711d
JS
1976 symbol* lookahead_sym = NULL;
1977 int lookahead_sort = 0;
1978
1979 t = peek ();
1980 if (t && t->type == tok_identifier)
1981 {
1982 next ();
1983 lookahead_sym = new symbol;
1984 lookahead_sym->tok = t;
1985 lookahead_sym->name = t->content;
1986
1987 t = peek ();
1988 if (t && t->type == tok_operator &&
1989 (t->content == "+" || t->content == "-"))
1990 {
1991 next ();
1992 lookahead_sort = (t->content == "+") ? 1 : -1;
1993 }
1994
1995 t = peek ();
1996 if (t && t->type == tok_operator && t->content == "=")
1997 {
1998 next ();
1999 s->value = lookahead_sym;
2000 if (lookahead_sort)
2001 {
2002 s->sort_direction = lookahead_sort;
2003 s->sort_column = 0;
2004 }
2005 lookahead_sym = NULL;
2006 }
2007 }
2008
69c68955
FCE
2009 // see also parse_array_in
2010
2011 bool parenthesized = false;
2012 t = peek ();
c261711d 2013 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
69c68955
FCE
2014 {
2015 next ();
2016 parenthesized = true;
2017 }
2018
c261711d
JS
2019 if (lookahead_sym)
2020 {
2021 s->indexes.push_back (lookahead_sym);
2022 if (lookahead_sort)
2023 {
2024 s->sort_direction = lookahead_sort;
2025 s->sort_column = 1;
2026 }
2027 lookahead_sym = NULL;
2028 }
2029 else while (1)
69c68955
FCE
2030 {
2031 t = next ();
2032 if (! (t->type == tok_identifier))
2033 throw parse_error ("expected identifier");
2034 symbol* sym = new symbol;
2035 sym->tok = t;
2036 sym->name = t->content;
2037 s->indexes.push_back (sym);
2038
93484556
FCE
2039 t = peek ();
2040 if (t && t->type == tok_operator &&
2041 (t->content == "+" || t->content == "-"))
2042 {
2043 if (s->sort_direction)
2044 throw parse_error ("multiple sort directives");
2045 s->sort_direction = (t->content == "+") ? 1 : -1;
2046 s->sort_column = s->indexes.size();
2047 next();
2048 }
2049
69c68955
FCE
2050 if (parenthesized)
2051 {
93484556 2052 t = peek ();
69c68955
FCE
2053 if (t && t->type == tok_operator && t->content == ",")
2054 {
2055 next ();
2056 continue;
2057 }
2058 else if (t && t->type == tok_operator && t->content == "]")
2059 {
2060 next ();
2061 break;
2062 }
dff50e09 2063 else
69c68955
FCE
2064 throw parse_error ("expected ',' or ']'");
2065 }
2066 else
2067 break; // expecting only one expression
2068 }
2069
2070 t = next ();
6e213f58 2071 if (! (t->type == tok_keyword && t->content == "in"))
69c68955 2072 throw parse_error ("expected 'in'");
dff50e09 2073
d02548c0 2074 s->base = parse_indexable();
69c68955 2075
93484556
FCE
2076 t = peek ();
2077 if (t && t->type == tok_operator &&
2078 (t->content == "+" || t->content == "-"))
2079 {
2080 if (s->sort_direction)
2081 throw parse_error ("multiple sort directives");
2082 s->sort_direction = (t->content == "+") ? 1 : -1;
2083 s->sort_column = 0;
2084 next();
2085 }
2086
27f21e8c
DS
2087 t = peek ();
2088 if (tok_is(t, tok_keyword, "limit"))
2089 {
2090 next (); // get past the "limit"
2091 s->limit = parse_expression ();
2092 }
2093
69c68955
FCE
2094 t = next ();
2095 if (! (t->type == tok_operator && t->content == ")"))
2096 throw parse_error ("expected ')'");
2097
2098 s->block = parse_statement ();
2099 return s;
2100}
2101
2102
2f1a1aea
FCE
2103expression*
2104parser::parse_expression ()
2105{
2106 return parse_assignment ();
2107}
2108
2f1a1aea
FCE
2109
2110expression*
2111parser::parse_assignment ()
2112{
2113 expression* op1 = parse_ternary ();
2114
2115 const token* t = peek ();
82919855 2116 // right-associative operators
dff50e09 2117 if (t && t->type == tok_operator
2f1a1aea 2118 && (t->content == "=" ||
82919855 2119 t->content == "<<<" ||
2f1a1aea 2120 t->content == "+=" ||
bb2e3076
FCE
2121 t->content == "-=" ||
2122 t->content == "*=" ||
2123 t->content == "/=" ||
2124 t->content == "%=" ||
2125 t->content == "<<=" ||
2126 t->content == ">>=" ||
2127 t->content == "&=" ||
2128 t->content == "^=" ||
2129 t->content == "|=" ||
d5d7c2cc 2130 t->content == ".=" ||
dff50e09 2131 false))
2f1a1aea 2132 {
bb2e3076 2133 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 2134 assignment* e = new assignment;
56099f08 2135 e->left = op1;
2f1a1aea 2136 e->op = t->content;
56099f08 2137 e->tok = t;
2f1a1aea 2138 next ();
82919855 2139 e->right = parse_expression ();
56099f08 2140 op1 = e;
2f1a1aea 2141 }
56099f08
FCE
2142
2143 return op1;
2f1a1aea
FCE
2144}
2145
2146
2147expression*
2148parser::parse_ternary ()
2149{
2150 expression* op1 = parse_logical_or ();
2151
2152 const token* t = peek ();
2153 if (t && t->type == tok_operator && t->content == "?")
2154 {
2f1a1aea 2155 ternary_expression* e = new ternary_expression;
56099f08 2156 e->tok = t;
2f1a1aea 2157 e->cond = op1;
56099f08
FCE
2158 next ();
2159 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
2160
2161 t = next ();
2162 if (! (t->type == tok_operator && t->content == ":"))
2163 throw parse_error ("expected ':'");
2164
56099f08 2165 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
2166 return e;
2167 }
2168 else
2169 return op1;
2170}
2171
2172
2173expression*
2174parser::parse_logical_or ()
2175{
2176 expression* op1 = parse_logical_and ();
dff50e09 2177
2f1a1aea 2178 const token* t = peek ();
56099f08 2179 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 2180 {
2f1a1aea 2181 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
2182 e->tok = t;
2183 e->op = t->content;
2f1a1aea 2184 e->left = op1;
56099f08
FCE
2185 next ();
2186 e->right = parse_logical_and ();
2187 op1 = e;
2188 t = peek ();
2f1a1aea 2189 }
56099f08
FCE
2190
2191 return op1;
2f1a1aea
FCE
2192}
2193
2194
2195expression*
2196parser::parse_logical_and ()
2197{
bb2e3076 2198 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
2199
2200 const token* t = peek ();
56099f08 2201 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 2202 {
2f1a1aea
FCE
2203 logical_and_expr *e = new logical_and_expr;
2204 e->left = op1;
56099f08
FCE
2205 e->op = t->content;
2206 e->tok = t;
2207 next ();
bb2e3076
FCE
2208 e->right = parse_boolean_or ();
2209 op1 = e;
2210 t = peek ();
2211 }
2212
2213 return op1;
2214}
2215
2216
2217expression*
2218parser::parse_boolean_or ()
2219{
2220 expression* op1 = parse_boolean_xor ();
2221
2222 const token* t = peek ();
2223 while (t && t->type == tok_operator && t->content == "|")
2224 {
2225 binary_expression* e = new binary_expression;
2226 e->left = op1;
2227 e->op = t->content;
2228 e->tok = t;
2229 next ();
2230 e->right = parse_boolean_xor ();
2231 op1 = e;
2232 t = peek ();
2233 }
2234
2235 return op1;
2236}
2237
2238
2239expression*
2240parser::parse_boolean_xor ()
2241{
2242 expression* op1 = parse_boolean_and ();
2243
2244 const token* t = peek ();
2245 while (t && t->type == tok_operator && t->content == "^")
2246 {
2247 binary_expression* e = new binary_expression;
2248 e->left = op1;
2249 e->op = t->content;
2250 e->tok = t;
2251 next ();
2252 e->right = parse_boolean_and ();
2253 op1 = e;
2254 t = peek ();
2255 }
2256
2257 return op1;
2258}
2259
2260
2261expression*
2262parser::parse_boolean_and ()
2263{
2264 expression* op1 = parse_array_in ();
2265
2266 const token* t = peek ();
2267 while (t && t->type == tok_operator && t->content == "&")
2268 {
2269 binary_expression* e = new binary_expression;
2270 e->left = op1;
2271 e->op = t->content;
2272 e->tok = t;
2273 next ();
56099f08
FCE
2274 e->right = parse_array_in ();
2275 op1 = e;
2276 t = peek ();
2f1a1aea 2277 }
56099f08
FCE
2278
2279 return op1;
2f1a1aea
FCE
2280}
2281
2282
2283expression*
2284parser::parse_array_in ()
2285{
ce10591c 2286 // This is a very tricky case. All these are legit expressions:
69c68955 2287 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
2288 vector<expression*> indexes;
2289 bool parenthesized = false;
2f1a1aea
FCE
2290
2291 const token* t = peek ();
69c68955 2292 if (t && t->type == tok_operator && t->content == "[")
ce10591c
FCE
2293 {
2294 next ();
2295 parenthesized = true;
2296 }
2297
2298 while (1)
2299 {
2300 expression* op1 = parse_comparison ();
2301 indexes.push_back (op1);
2302
2303 if (parenthesized)
2304 {
2305 const token* t = peek ();
2306 if (t && t->type == tok_operator && t->content == ",")
2307 {
2308 next ();
2309 continue;
2310 }
69c68955 2311 else if (t && t->type == tok_operator && t->content == "]")
ce10591c
FCE
2312 {
2313 next ();
2314 break;
2315 }
dff50e09 2316 else
69c68955 2317 throw parse_error ("expected ',' or ']'");
ce10591c
FCE
2318 }
2319 else
2320 break; // expecting only one expression
2321 }
2322
2323 t = peek ();
6e213f58 2324 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 2325 {
2f1a1aea 2326 array_in *e = new array_in;
56099f08 2327 e->tok = t;
ce10591c
FCE
2328 next (); // swallow "in"
2329
2330 arrayindex* a = new arrayindex;
2331 a->indexes = indexes;
d02548c0
GH
2332 a->base = parse_indexable();
2333 a->tok = a->base->get_tok();
ce10591c 2334 e->operand = a;
2f1a1aea
FCE
2335 return e;
2336 }
ce10591c
FCE
2337 else if (indexes.size() == 1) // no "in" - need one expression only
2338 return indexes[0];
2f1a1aea 2339 else
ce10591c 2340 throw parse_error ("unexpected comma-separated expression list");
2f1a1aea
FCE
2341}
2342
2343
2344expression*
2345parser::parse_comparison ()
2346{
bb2e3076 2347 expression* op1 = parse_shift ();
2f1a1aea
FCE
2348
2349 const token* t = peek ();
dff50e09 2350 while (t && t->type == tok_operator
553d27a5
FCE
2351 && (t->content == ">" ||
2352 t->content == "<" ||
2353 t->content == "==" ||
2354 t->content == "!=" ||
2355 t->content == "<=" ||
bb2e3076 2356 t->content == ">="))
2f1a1aea
FCE
2357 {
2358 comparison* e = new comparison;
2359 e->left = op1;
2360 e->op = t->content;
56099f08 2361 e->tok = t;
2f1a1aea 2362 next ();
bb2e3076
FCE
2363 e->right = parse_shift ();
2364 op1 = e;
2365 t = peek ();
2366 }
2367
2368 return op1;
2369}
2370
2371
2372expression*
2373parser::parse_shift ()
2374{
2375 expression* op1 = parse_concatenation ();
2376
2377 const token* t = peek ();
dff50e09 2378 while (t && t->type == tok_operator &&
bb2e3076
FCE
2379 (t->content == "<<" || t->content == ">>"))
2380 {
2381 binary_expression* e = new binary_expression;
2382 e->left = op1;
2383 e->op = t->content;
2384 e->tok = t;
2385 next ();
56099f08
FCE
2386 e->right = parse_concatenation ();
2387 op1 = e;
2388 t = peek ();
2f1a1aea 2389 }
56099f08
FCE
2390
2391 return op1;
2f1a1aea
FCE
2392}
2393
2394
2395expression*
2396parser::parse_concatenation ()
2397{
2398 expression* op1 = parse_additive ();
2399
2400 const token* t = peek ();
2401 // XXX: the actual awk string-concatenation operator is *whitespace*.
2402 // I don't know how to easily to model that here.
56099f08 2403 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
2404 {
2405 concatenation* e = new concatenation;
2406 e->left = op1;
2407 e->op = t->content;
56099f08 2408 e->tok = t;
2f1a1aea 2409 next ();
56099f08
FCE
2410 e->right = parse_additive ();
2411 op1 = e;
2412 t = peek ();
2f1a1aea 2413 }
56099f08
FCE
2414
2415 return op1;
2f1a1aea
FCE
2416}
2417
2418
2419expression*
2420parser::parse_additive ()
2421{
2422 expression* op1 = parse_multiplicative ();
2423
2424 const token* t = peek ();
dff50e09 2425 while (t && t->type == tok_operator
2f1a1aea
FCE
2426 && (t->content == "+" || t->content == "-"))
2427 {
2428 binary_expression* e = new binary_expression;
2429 e->op = t->content;
2430 e->left = op1;
56099f08 2431 e->tok = t;
2f1a1aea 2432 next ();
56099f08
FCE
2433 e->right = parse_multiplicative ();
2434 op1 = e;
2435 t = peek ();
2f1a1aea 2436 }
56099f08
FCE
2437
2438 return op1;
2f1a1aea
FCE
2439}
2440
2441
2442expression*
2443parser::parse_multiplicative ()
2444{
2445 expression* op1 = parse_unary ();
2446
2447 const token* t = peek ();
dff50e09 2448 while (t && t->type == tok_operator
2f1a1aea
FCE
2449 && (t->content == "*" || t->content == "/" || t->content == "%"))
2450 {
2451 binary_expression* e = new binary_expression;
2452 e->op = t->content;
2453 e->left = op1;
56099f08 2454 e->tok = t;
2f1a1aea 2455 next ();
56099f08
FCE
2456 e->right = parse_unary ();
2457 op1 = e;
2458 t = peek ();
2f1a1aea 2459 }
56099f08
FCE
2460
2461 return op1;
2f1a1aea
FCE
2462}
2463
2464
2465expression*
2466parser::parse_unary ()
2467{
2468 const token* t = peek ();
dff50e09
FCE
2469 if (t && t->type == tok_operator
2470 && (t->content == "+" ||
2471 t->content == "-" ||
bb2e3076
FCE
2472 t->content == "!" ||
2473 t->content == "~" ||
2474 false))
2f1a1aea
FCE
2475 {
2476 unary_expression* e = new unary_expression;
2477 e->op = t->content;
56099f08 2478 e->tok = t;
2f1a1aea 2479 next ();
1cb79a72 2480 e->operand = parse_unary ();
2f1a1aea
FCE
2481 return e;
2482 }
2483 else
bb2e3076 2484 return parse_crement ();
2f1a1aea
FCE
2485}
2486
2487
2488expression*
2489parser::parse_crement () // as in "increment" / "decrement"
2490{
cbfbbf69
FCE
2491 // NB: Ideally, we'd parse only a symbol as an operand to the
2492 // *crement operators, instead of a general expression value. We'd
2493 // need more complex lookahead code to tell apart the postfix cases.
2494 // So we just punt, and leave it to pass-3 to signal errors on
2495 // cases like "4++".
2496
2f1a1aea 2497 const token* t = peek ();
dff50e09 2498 if (t && t->type == tok_operator
2f1a1aea
FCE
2499 && (t->content == "++" || t->content == "--"))
2500 {
2501 pre_crement* e = new pre_crement;
2502 e->op = t->content;
56099f08 2503 e->tok = t;
2f1a1aea
FCE
2504 next ();
2505 e->operand = parse_value ();
2506 return e;
2507 }
2508
2509 // post-crement or non-crement
2510 expression *op1 = parse_value ();
dff50e09 2511
2f1a1aea 2512 t = peek ();
dff50e09 2513 if (t && t->type == tok_operator
2f1a1aea
FCE
2514 && (t->content == "++" || t->content == "--"))
2515 {
2516 post_crement* e = new post_crement;
2517 e->op = t->content;
56099f08 2518 e->tok = t;
2f1a1aea
FCE
2519 next ();
2520 e->operand = op1;
2521 return e;
2522 }
2523 else
2524 return op1;
2525}
2526
2527
2528expression*
2529parser::parse_value ()
2530{
2531 const token* t = peek ();
2532 if (! t)
2533 throw parse_error ("expected value");
2534
7d902887
FCE
2535 if (t->type == tok_embedded)
2536 {
2537 next ();
2538 if (! privileged)
2539 throw parse_error ("embedded expression code in unprivileged script", false);
2540
2541 embedded_expr *e = new embedded_expr;
2542 e->tok = t;
2543 e->code = t->content;
2544 return e;
2545 }
2546
2f1a1aea
FCE
2547 if (t->type == tok_operator && t->content == "(")
2548 {
2549 next ();
2550 expression* e = parse_expression ();
2551 t = next ();
2552 if (! (t->type == tok_operator && t->content == ")"))
2553 throw parse_error ("expected ')'");
2554 return e;
2555 }
03c75a4a
JS
2556 else if (t->type == tok_operator && t->content == "&")
2557 {
2558 next ();
d48afc20 2559 return parse_target_symbol (t);
03c75a4a 2560 }
2f1a1aea
FCE
2561 else if (t->type == tok_identifier)
2562 return parse_symbol ();
2563 else
2564 return parse_literal ();
2565}
2566
2567
d02548c0
GH
2568const token *
2569parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
2570{
2571 hop = NULL;
2572 const token* t = expect_ident (name);
2573 if (name == "@hist_linear" || name == "@hist_log")
2574 {
2575 hop = new hist_op;
2576 if (name == "@hist_linear")
2577 hop->htype = hist_linear;
2578 else if (name == "@hist_log")
2579 hop->htype = hist_log;
2580 hop->tok = t;
2581 expect_op("(");
2582 hop->stat = parse_expression ();
2583 int64_t tnum;
2584 if (hop->htype == hist_linear)
2585 {
2586 for (size_t i = 0; i < 3; ++i)
2587 {
2588 expect_op (",");
2589 expect_number (tnum);
2590 hop->params.push_back (tnum);
2591 }
2592 }
d02548c0
GH
2593 expect_op(")");
2594 }
2595 return t;
2596}
2597
2598
2599indexable*
2600parser::parse_indexable ()
2601{
2602 hist_op *hop = NULL;
2603 string name;
2604 const token *tok = parse_hist_op_or_bare_name(hop, name);
2605 if (hop)
2606 return hop;
2607 else
2608 {
2609 symbol* sym = new symbol;
2610 sym->name = name;
2611 sym->tok = tok;
2612 return sym;
2613 }
2614}
2615
2616
2617// var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
30263a73 2618expression* parser::parse_symbol ()
2f1a1aea 2619{
d02548c0
GH
2620 hist_op *hop = NULL;
2621 symbol *sym = NULL;
d7f3e0c5 2622 string name;
d02548c0
GH
2623 const token *t = parse_hist_op_or_bare_name(hop, name);
2624
2625 if (!hop)
0fefb486 2626 {
dff50e09 2627 // If we didn't get a hist_op, then we did get an identifier. We can
d02548c0
GH
2628 // now scrutinize this identifier for the various magic forms of identifier
2629 // (printf, @stat_op, and $var...)
2630
30263a73
FCE
2631 if (name == "@cast" || (name.size()>0 && name[0] == '$'))
2632 return parse_target_symbol (t);
9b5af295 2633
db135493
FCE
2634 // NB: PR11343: @defined() is not incompatible with earlier versions
2635 // of stap, so no need to check session.compatible for 1.2
30263a73
FCE
2636 if (name == "@defined")
2637 return parse_defined_op (t);
2638
9b5af295 2639 else if (name.size() > 0 && name[0] == '@')
d7f3e0c5 2640 {
d02548c0
GH
2641 stat_op *sop = new stat_op;
2642 if (name == "@avg")
2643 sop->ctype = sc_average;
2644 else if (name == "@count")
2645 sop->ctype = sc_count;
2646 else if (name == "@sum")
2647 sop->ctype = sc_sum;
2648 else if (name == "@min")
2649 sop->ctype = sc_min;
2650 else if (name == "@max")
2651 sop->ctype = sc_max;
2652 else
2653 throw parse_error("unknown statistic operator " + name);
2654 expect_op("(");
2655 sop->tok = t;
2656 sop->stat = parse_expression ();
2657 expect_op(")");
2658 return sop;
2659 }
dff50e09 2660
d5e178c1 2661 else if (print_format *fmt = print_format::create(t))
d02548c0 2662 {
d02548c0 2663 expect_op("(");
b15c465c
PP
2664 if ((name == "print" || name == "println" ||
2665 name == "sprint" || name == "sprintln") &&
3cb17058 2666 (peek_kw("@hist_linear") || peek_kw("@hist_log")))
a4636912
GH
2667 {
2668 // We have a special case where we recognize
2669 // print(@hist_foo(bar)) as a magic print-the-histogram
2670 // construct. This is sort of gross but it avoids
2671 // promoting histogram references to typeful
2672 // expressions.
dff50e09 2673
1bbeef03
GH
2674 hop = NULL;
2675 t = parse_hist_op_or_bare_name(hop, name);
2676 assert(hop);
dff50e09 2677
1bbeef03
GH
2678 // It is, sadly, possible that even while parsing a
2679 // hist_op, we *mis-guessed* and the user wishes to
2680 // print(@hist_op(foo)[bucket]), a scalar. In that case
2681 // we must parse the arrayindex and print an expression.
839325a1
JS
2682 //
2683 // XXX: This still fails if the arrayindex is part of a
2684 // larger expression. To really handle everything, we'd
2685 // need to push back all the hist tokens start over.
dff50e09 2686
1bbeef03
GH
2687 if (!peek_op ("["))
2688 fmt->hist = hop;
2689 else
2690 {
2691 // This is simplified version of the
2692 // multi-array-index parser below, because we can
2693 // only ever have one index on a histogram anyways.
2694 expect_op("[");
2695 struct arrayindex* ai = new arrayindex;
2696 ai->tok = t;
2697 ai->base = hop;
2698 ai->indexes.push_back (parse_expression ());
2699 expect_op("]");
2700 fmt->args.push_back(ai);
839325a1
JS
2701
2702 // Consume any subsequent arguments.
2703 while (!peek_op (")"))
2704 {
2705 expect_op(",");
2706 expression *e = parse_expression ();
2707 fmt->args.push_back(e);
2708 }
1bbeef03 2709 }
a4636912 2710 }
d7f3e0c5 2711 else
d02548c0 2712 {
3cb17058
JS
2713 int min_args = 0;
2714 if (fmt->print_with_format)
2715 {
2716 // Consume and convert a format string. Agreement between the
2717 // format string and the arguments is postponed to the
2718 // typechecking phase.
2719 string tmp;
2720 expect_unknown (tok_string, tmp);
2721 fmt->raw_components = tmp;
2722 fmt->components = print_format::string_to_components (tmp);
2723 }
2724 else if (fmt->print_with_delim)
2725 {
2726 // Consume a delimiter to separate arguments.
2727 fmt->delimiter.clear();
2728 fmt->delimiter.type = print_format::conv_literal;
2729 expect_unknown (tok_string, fmt->delimiter.literal_string);
2730 min_args = 2;
2731 }
2732 else
2733 {
2734 // If we are not printing with a format string, we must have
2735 // at least one argument (of any type).
2736 expression *e = parse_expression ();
2737 fmt->args.push_back(e);
2738 }
2739
2740 // Consume any subsequent arguments.
2741 while (min_args || !peek_op (")"))
2742 {
2743 expect_op(",");
2744 expression *e = parse_expression ();
2745 fmt->args.push_back(e);
2746 if (min_args)
2747 --min_args;
2748 }
d02548c0
GH
2749 }
2750 expect_op(")");
2751 return fmt;
2752 }
dff50e09 2753
d02548c0
GH
2754 else if (peek_op ("(")) // function call
2755 {
2756 next ();
2757 struct functioncall* f = new functioncall;
2758 f->tok = t;
2759 f->function = name;
2760 // Allow empty actual parameter list
2761 if (peek_op (")"))
2762 {
2763 next ();
2764 return f;
2765 }
2766 while (1)
2767 {
2768 f->args.push_back (parse_expression ());
2769 if (peek_op (")"))
2770 {
2771 next();
2772 break;
2773 }
2774 else if (peek_op (","))
2775 {
2776 next();
2777 continue;
2778 }
2779 else
2780 throw parse_error ("expected ',' or ')'");
2781 }
2782 return f;
2783 }
2784
2785 else
2786 {
2787 sym = new symbol;
2788 sym->name = name;
2789 sym->tok = t;
d7f3e0c5 2790 }
0fefb486 2791 }
dff50e09
FCE
2792
2793 // By now, either we had a hist_op in the first place, or else
d02548c0
GH
2794 // we had a plain word and it was converted to a symbol.
2795
70c743d8 2796 assert (!hop != !sym); // logical XOR
d02548c0
GH
2797
2798 // All that remains is to check for array indexing
2799
d7f3e0c5 2800 if (peek_op ("[")) // array
2f1a1aea
FCE
2801 {
2802 next ();
2803 struct arrayindex* ai = new arrayindex;
d02548c0
GH
2804 ai->tok = t;
2805
2806 if (hop)
2807 ai->base = hop;
2808 else
2809 ai->base = sym;
2810
2f1a1aea
FCE
2811 while (1)
2812 {
2813 ai->indexes.push_back (parse_expression ());
d7f3e0c5 2814 if (peek_op ("]"))
dff50e09
FCE
2815 {
2816 next();
2817 break;
d7f3e0c5
GH
2818 }
2819 else if (peek_op (","))
2820 {
2821 next();
2822 continue;
2823 }
2f1a1aea
FCE
2824 else
2825 throw parse_error ("expected ',' or ']'");
2826 }
2827 return ai;
2828 }
d02548c0
GH
2829
2830 // If we got to here, we *should* have a symbol; if we have
2831 // a hist_op on its own, it doesn't count as an expression,
2832 // so we throw a parse error.
2833
2834 if (hop)
2835 throw parse_error("base histogram operator where expression expected", t);
dff50e09
FCE
2836
2837 return sym;
2f1a1aea 2838}
56099f08 2839
81931eab 2840
30263a73
FCE
2841// Parse a @cast or $var. Given head token has already been consumed.
2842target_symbol* parser::parse_target_symbol (const token* t)
2843{
d48afc20
JS
2844 bool addressof = false;
2845 if (t->type == tok_operator && t->content == "&")
2846 {
2847 addressof = true;
2848 t = next ();
2849 }
2850
30263a73
FCE
2851 if (t->type == tok_identifier && t->content == "@cast")
2852 {
2853 cast_op *cop = new cast_op;
2854 cop->tok = t;
2855 cop->base_name = t->content;
2856 expect_op("(");
2857 cop->operand = parse_expression ();
2858 expect_op(",");
2859 expect_unknown(tok_string, cop->type);
2860 // types never start with "struct<space>" or "union<space>",
2861 // so gobble it up.
60d98537 2862 if (startswith(cop->type, "struct "))
30263a73 2863 cop->type = cop->type.substr(7);
60d98537 2864 if (startswith(cop->type, "union "))
30263a73
FCE
2865 cop->type = cop->type.substr(6);
2866 if (peek_op (","))
2867 {
2868 next();
2869 expect_unknown(tok_string, cop->module);
2870 }
2871 expect_op(")");
2872 parse_target_symbol_components(cop);
d48afc20 2873 cop->addressof = addressof;
30263a73
FCE
2874 return cop;
2875 }
2876
2877 if (t->type == tok_identifier && t->content[0]=='$')
2878 {
2879 // target_symbol time
2880 target_symbol *tsym = new target_symbol;
2881 tsym->tok = t;
2882 tsym->base_name = t->content;
2883 parse_target_symbol_components(tsym);
d48afc20 2884 tsym->addressof = addressof;
30263a73
FCE
2885 return tsym;
2886 }
2887
2888 throw parse_error ("expected @cast or $var");
2889}
2890
2891
2892// Parse a @defined(). Given head token has already been consumed.
2893expression* parser::parse_defined_op (const token* t)
2894{
2895 defined_op* dop = new defined_op;
2896 dop->tok = t;
2897 expect_op("(");
30263a73 2898 // no need for parse_hist_op... etc., as @defined takes only target_symbols as its operand.
d48afc20 2899 const token* tt = next ();
30263a73
FCE
2900 dop->operand = parse_target_symbol (tt);
2901 expect_op(")");
2902 return dop;
2903}
2904
2905
2906
81931eab
JS
2907void
2908parser::parse_target_symbol_components (target_symbol* e)
2909{
5f36109e
JS
2910 bool pprint = false;
2911
2912 // check for pretty-print in the form $foo$
2913 string &base = e->base_name;
2914 size_t pprint_pos = base.find_last_not_of('$');
2915 if (0 < pprint_pos && pprint_pos < base.length() - 1)
2916 {
2917 string pprint_val = base.substr(pprint_pos + 1);
2918 base.erase(pprint_pos + 1);
2919 e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
2920 pprint = true;
2921 }
2922
2923 while (!pprint)
81931eab 2924 {
81931eab
JS
2925 if (peek_op ("->"))
2926 {
c67847a0
JS
2927 const token* t = next();
2928 string member;
2929 expect_ident_or_keyword (member);
5f36109e
JS
2930
2931 // check for pretty-print in the form $foo->$ or $foo->bar$
2932 pprint_pos = member.find_last_not_of('$');
2933 string pprint_val;
2934 if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
2935 {
2936 pprint_val = member.substr(pprint_pos + 1);
2937 member.erase(pprint_pos + 1);
2938 pprint = true;
2939 }
2940
2941 if (!member.empty())
2942 e->components.push_back (target_symbol::component(t, member));
2943 if (pprint)
2944 e->components.push_back (target_symbol::component(t, pprint_val, true));
81931eab
JS
2945 }
2946 else if (peek_op ("["))
2947 {
c67847a0 2948 const token* t = next();
6fda2dff
JS
2949 expression* index = parse_expression();
2950 literal_number* ln = dynamic_cast<literal_number*>(index);
2951 if (ln)
2952 e->components.push_back (target_symbol::component(t, ln->value));
2953 else
2954 e->components.push_back (target_symbol::component(t, index));
81931eab 2955 expect_op ("]");
81931eab
JS
2956 }
2957 else
2958 break;
2959 }
5f36109e
JS
2960
2961 if (!pprint)
2962 {
2963 // check for pretty-print in the form $foo $
2964 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
2965 const token* t = peek();
2966 if (t->type == tok_identifier &&
2967 t->content.find_first_not_of('$') == string::npos)
2968 {
2969 t = next();
2970 e->components.push_back (target_symbol::component(t, t->content, true));
2971 pprint = true;
2972 }
2973 }
2974
2975 if (pprint && (peek_op ("->") || peek_op("[")))
2976 throw parse_error("can't dereference after pretty-printing");
81931eab
JS
2977}
2978
73267b89 2979/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.465782 seconds and 5 git commands to generate.