]> sourceware.org Git - systemtap.git/blame - parse.cxx
stapdyn: retry incomplete/EINTR transport writes
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
3e6a17ee 2// Copyright (C) 2005-2013 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
5811366a 4// Copyright (C) 2007 Bull S.A.S
69c68955
FCE
5//
6// This file is part of systemtap, and is free software. You can
7// redistribute it and/or modify it under the terms of the GNU General
8// Public License (GPL); either version 2, or (at your option) any
9// later version.
2f1a1aea 10
2b066ec1 11#include "config.h"
2f1a1aea
FCE
12#include "staptree.h"
13#include "parse.h"
177a8ead 14#include "session.h"
3f99432c
FCE
15#include "util.h"
16
2b066ec1 17#include <iostream>
eacb10ce 18
2b066ec1 19#include <fstream>
2f1a1aea 20#include <cctype>
9c0c0e46 21#include <cstdlib>
29e64872 22#include <cassert>
9c0c0e46
FCE
23#include <cerrno>
24#include <climits>
57b73400 25#include <sstream>
f74fb737 26#include <cstring>
3f99432c 27#include <cctype>
eacb10ce
FCE
28#include <iterator>
29
7a468d68
FCE
30extern "C" {
31#include <fnmatch.h>
32}
2f1a1aea
FCE
33
34using namespace std;
35
c18f07f8
JS
36
37class lexer
38{
39public:
fee28e5c 40 bool ate_comment; // current token follows a comment
b5477cd9 41 bool ate_whitespace; // the most recent token followed whitespace
534aad8b
SM
42 bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
43
b5477cd9 44 token* scan ();
c18f07f8
JS
45 lexer (istream&, const string&, systemtap_session&);
46 void set_current_file (stapfile* f);
47
2524d1fd
SM
48 static set<string> keywords;
49 static set<string> atwords;
c18f07f8
JS
50private:
51 inline int input_get ();
52 inline int input_peek (unsigned n=0);
53 void input_put (const string&, const token*);
54 string input_name;
55 string input_contents;
56 const char *input_pointer; // index into input_contents
57 const char *input_end;
58 unsigned cursor_suspend_count;
59 unsigned cursor_suspend_line;
60 unsigned cursor_suspend_column;
61 unsigned cursor_line;
62 unsigned cursor_column;
63 systemtap_session& session;
64 stapfile* current_file;
c18f07f8
JS
65};
66
67
68class parser
69{
70public:
4cd32d8c 71 parser (systemtap_session& s, const string& n, istream& i, bool p);
c18f07f8
JS
72 ~parser ();
73
74 stapfile* parse ();
fe410f52 75 stapfile* parse_library_macros ();
c18f07f8
JS
76
77private:
78 typedef enum {
79 PP_NONE,
80 PP_KEEP_THEN,
81 PP_SKIP_THEN,
82 PP_KEEP_ELSE,
83 PP_SKIP_ELSE,
84 } pp_state_t;
85
534aad8b
SM
86 struct pp1_activation;
87
fe410f52
SM
88 struct pp_macrodecl : public macrodecl {
89 pp1_activation* parent_act; // used for param bindings
90 virtual bool is_closure() { return parent_act != 0; }
91 pp_macrodecl () : macrodecl(), parent_act(0) { }
534aad8b
SM
92 };
93
c18f07f8
JS
94 systemtap_session& session;
95 string input_name;
c18f07f8
JS
96 lexer input;
97 bool privileged;
98 parse_context context;
99
534aad8b
SM
100 // preprocessing subordinate, first pass (macros)
101 struct pp1_activation {
102 const token* tok;
103 unsigned cursor; // position within macro body
104 map<string, pp_macrodecl*> params;
534aad8b 105
fe410f52 106 macrodecl* curr_macro;
534aad8b 107
fe410f52
SM
108 pp1_activation (const token tok, macrodecl* curr_macro)
109 : tok(new token(tok)), cursor(0), curr_macro(curr_macro) { }
534aad8b
SM
110 ~pp1_activation ();
111 };
112
fe410f52 113 map<string, macrodecl*> pp1_namespace;
534aad8b
SM
114 vector<pp1_activation*> pp1_state;
115 const token* next_pp1 ();
116 const token* scan_pp1 ();
117 const token* slurp_pp1_param (vector<const token*>& param);
118 const token* slurp_pp1_body (vector<const token*>& body);
119
120 // preprocessing subordinate, final pass (conditionals)
c18f07f8 121 vector<pair<const token*, pp_state_t> > pp_state;
b5477cd9 122 const token* scan_pp ();
c18f07f8
JS
123 const token* skip_pp ();
124
125 // scanning state
b5477cd9
SM
126 const token* next ();
127 const token* peek ();
c18f07f8 128
731a5359
MW
129 // Advance past and throw away current token after peek () or next ().
130 void swallow ();
131
a07a2c28 132 const token* systemtap_v_seen;
c18f07f8
JS
133 const token* last_t; // the last value returned by peek() or next()
134 const token* next_t; // lookahead token
135
731a5359
MW
136 // expectations, these swallow the token
137 void expect_known (token_type tt, string const & expected);
138 void expect_unknown (token_type tt, string & target);
139 void expect_unknown2 (token_type tt1, token_type tt2, string & target);
140
141 // convenience forms, these also swallow the token
142 void expect_op (string const & expected);
143 void expect_kw (string const & expected);
144 void expect_number (int64_t & expected);
145 void expect_ident_or_keyword (string & target);
146
147 // convenience forms, which return true or false, these don't swallow token
c18f07f8
JS
148 bool peek_op (string const & op);
149 bool peek_kw (string const & kw);
150
731a5359
MW
151 // convenience forms, which return the token
152 const token* expect_kw_token (string const & expected);
153 const token* expect_ident_or_atword (string & target);
154
c18f07f8
JS
155 void print_error (const parse_error& pe);
156 unsigned num_errors;
157
158private: // nonterminals
159 void parse_probe (vector<probe*>&, vector<probe_alias*>&);
160 void parse_global (vector<vardecl*>&, vector<probe*>&);
161 void parse_functiondecl (vector<functiondecl*>&);
162 embeddedcode* parse_embeddedcode ();
163 probe_point* parse_probe_point ();
d24f1ff4
SM
164 literal_string* consume_string_literals (const token*);
165 literal_string* parse_literal_string ();
c18f07f8
JS
166 literal* parse_literal ();
167 block* parse_stmt_block ();
168 try_block* parse_try_block ();
169 statement* parse_statement ();
170 if_statement* parse_if_statement ();
171 for_loop* parse_for_loop ();
172 for_loop* parse_while_loop ();
173 foreach_loop* parse_foreach_loop ();
174 expr_statement* parse_expr_statement ();
175 return_statement* parse_return_statement ();
176 delete_statement* parse_delete_statement ();
177 next_statement* parse_next_statement ();
178 break_statement* parse_break_statement ();
179 continue_statement* parse_continue_statement ();
180 indexable* parse_indexable ();
181 const token *parse_hist_op_or_bare_name (hist_op *&hop, string &name);
182 target_symbol *parse_target_symbol (const token* t);
8cc799a5 183 expression* parse_entry_op (const token* t);
c18f07f8 184 expression* parse_defined_op (const token* t);
3689db05 185 expression* parse_perf_op (const token* t);
c18f07f8
JS
186 expression* parse_expression ();
187 expression* parse_assignment ();
188 expression* parse_ternary ();
189 expression* parse_logical_or ();
190 expression* parse_logical_and ();
191 expression* parse_boolean_or ();
192 expression* parse_boolean_xor ();
193 expression* parse_boolean_and ();
194 expression* parse_array_in ();
93daaca8 195 expression* parse_comparison_or_regex_query ();
c18f07f8
JS
196 expression* parse_shift ();
197 expression* parse_concatenation ();
198 expression* parse_additive ();
199 expression* parse_multiplicative ();
200 expression* parse_unary ();
201 expression* parse_crement ();
202 expression* parse_value ();
203 expression* parse_symbol ();
204
205 void parse_target_symbol_components (target_symbol* e);
206};
207
208
2f1a1aea
FCE
209// ------------------------------------------------------------------------
210
c18f07f8
JS
211stapfile*
212parse (systemtap_session& s, istream& i, bool pr)
213{
4cd32d8c 214 parser p (s, "<input>", i, pr);
c18f07f8
JS
215 return p.parse ();
216}
217
218
219stapfile*
4cd32d8c 220parse (systemtap_session& s, const string& name, bool pr)
c18f07f8 221{
4cd32d8c
JS
222 ifstream i(name.c_str(), ios::in);
223 if (i.fail())
224 {
225 cerr << (file_exists(name)
226 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
227 : _F("Input file '%s' is missing.", name.c_str()))
228 << endl;
229 return 0;
230 }
231
232 parser p (s, name, i, pr);
c18f07f8
JS
233 return p.parse ();
234}
235
fe410f52
SM
236stapfile*
237parse_library_macros (systemtap_session& s, const string& name)
238{
239 ifstream i(name.c_str(), ios::in);
240 if (i.fail())
241 {
242 cerr << (file_exists(name)
243 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
244 : _F("Input file '%s' is missing.", name.c_str()))
245 << endl;
246 return 0;
247 }
248
249 parser p (s, name, i, false); // TODOXX pr is ...? should path be full??
250 return p.parse_library_macros ();
251}
252
c18f07f8 253// ------------------------------------------------------------------------
bb2e3076
FCE
254
255
4cd32d8c
JS
256parser::parser (systemtap_session& s, const string &n, istream& i, bool p):
257 session (s), input_name (n), input (i, input_name, s), privileged (p),
a07a2c28 258 context(con_unknown), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
4cd32d8c
JS
259{
260}
2f1a1aea
FCE
261
262parser::~parser()
263{
2f1a1aea
FCE
264}
265
d7f3e0c5
GH
266static string
267tt2str(token_type tt)
268{
269 switch (tt)
270 {
271 case tok_junk: return "junk";
272 case tok_identifier: return "identifier";
273 case tok_operator: return "operator";
274 case tok_string: return "string";
275 case tok_number: return "number";
276 case tok_embedded: return "embedded-code";
6e213f58 277 case tok_keyword: return "keyword";
d7f3e0c5
GH
278 }
279 return "unknown token";
280}
82919855 281
0323ed4d
WC
282ostream&
283operator << (ostream& o, const source_loc& loc)
284{
a704a23b 285 o << loc.file->name << ":"
0323ed4d
WC
286 << loc.line << ":"
287 << loc.column;
288
289 return o;
290}
291
56099f08
FCE
292ostream&
293operator << (ostream& o, const token& t)
294{
d7f3e0c5 295 o << tt2str(t.type);
56099f08 296
6e213f58 297 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 298 {
24cb178f
FCE
299 o << " '";
300 for (unsigned i=0; i<t.content.length(); i++)
301 {
302 char c = t.content[i];
303 o << (isprint (c) ? c : '?');
304 }
305 o << "'";
56099f08 306 }
56099f08 307
dff50e09 308 o << " at "
0323ed4d 309 << t.location;
56099f08
FCE
310
311 return o;
312}
313
314
dff50e09 315void
2f1a1aea
FCE
316parser::print_error (const parse_error &pe)
317{
1b1b4ceb 318 string align_parse_error (" ");
2f1a1aea 319
16fc963f
SM
320 const token *tok = pe.tok ? pe.tok : last_t;
321
322 // print either pe.what() or a deferred error from the lexer
323 bool found_junk = false;
324 if (tok && tok->type == tok_junk && tok->msg != "")
177a8ead 325 {
16fc963f
SM
326 found_junk = true;
327 cerr << _("parse error: ") << tok->msg << endl;
177a8ead 328 }
2f1a1aea 329 else
177a8ead 330 {
16fc963f
SM
331 cerr << _("parse error: ") << pe.what() << endl;
332 }
333
334 // NB: It makes sense for lexer errors to always override parser
335 // errors, since the original obvious scheme was for the lexer to
336 // throw an exception before the token reached the parser.
337
338 if (pe.tok || found_junk)
339 {
340 cerr << _("\tat: ") << *tok << endl;
341 session.print_error_source (cerr, align_parse_error, tok);
342 }
343 else if (tok) // "expected" type error
344 {
345 cerr << _("\tsaw: ") << *tok << endl;
346 session.print_error_source (cerr, align_parse_error, tok);
347 }
348 else
349 {
350 cerr << _("\tsaw: ") << input_name << " EOF" << endl;
177a8ead 351 }
2f1a1aea 352
534aad8b
SM
353 // print chained macro invocations
354 while (tok && tok->chain) {
355 tok = tok->chain;
356 cerr << _("\tin expansion of macro: ") << *tok << endl;
357 session.print_error_source (cerr, align_parse_error, tok);
358 }
359
2f1a1aea
FCE
360 num_errors ++;
361}
362
363
2f1a1aea 364
c434ec7e
FCE
365
366template <typename OPERAND>
367bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
368{
369 if (op->type == tok_operator && op->content == "<=")
370 { return lhs <= rhs; }
371 else if (op->type == tok_operator && op->content == ">=")
372 { return lhs >= rhs; }
373 else if (op->type == tok_operator && op->content == "<")
374 { return lhs < rhs; }
375 else if (op->type == tok_operator && op->content == ">")
376 { return lhs > rhs; }
377 else if (op->type == tok_operator && op->content == "==")
378 { return lhs == rhs; }
379 else if (op->type == tok_operator && op->content == "!=")
380 { return lhs != rhs; }
381 else
2677d2fb 382 throw parse_error (_("expected comparison operator"), op);
c434ec7e
FCE
383}
384
385
534aad8b
SM
386// Here, we perform on-the-fly preprocessing in two passes.
387
388// First pass - macro declaration and expansion.
389//
390// The basic form of a declaration is @define SIGNATURE %( BODY %)
391// where SIGNATURE is of the form macro_name (a, b, c, ...)
392// and BODY can obtain the parameter contents as @a, @b, @c, ....
393// Note that parameterless macros can also be declared.
394//
3932c705 395// Macro definitions may not be nested.
534aad8b
SM
396// A macro is available textually after it has been defined.
397//
398// The basic form of a macro invocation
399// for a parameterless macro is @macro_name,
400// for a macro with parameters is @macro_name(param_1, param_2, ...).
401//
402// TODOXXX NB: this means that a parameterless macro @foo called as
403// @foo(a, b, c) leaves its 'parameters' alone, rather than consuming
404// them to result in a "too many parameters error".
405//
406// Invocations of unknown macros are left unexpanded, to allow
407// the continued use of constructs such as @cast, @var, etc.
408
fe410f52 409macrodecl::~macrodecl ()
534aad8b
SM
410{
411 delete tok;
412 for (vector<const token*>::iterator it = body.begin();
413 it != body.end(); it++)
414 delete *it;
415}
416
417parser::pp1_activation::~pp1_activation ()
418{
419 delete tok;
fe410f52 420 if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
534aad8b
SM
421 for (map<string, pp_macrodecl*>::iterator it = params.begin();
422 it != params.end(); it++)
423 delete it->second;
424}
425
426// Grab a token from the current input source (main file or macro body):
427const token*
428parser::next_pp1 ()
429{
430 if (pp1_state.empty())
431 return input.scan ();
432
433 // otherwise, we're inside a macro
434 pp1_activation* act = pp1_state.back();
435 unsigned& cursor = act->cursor;
436 if (cursor < act->curr_macro->body.size())
437 {
438 token* t = new token(*act->curr_macro->body[cursor]);
0002fc51 439 t->chain = new token(*act->tok); // mark chained token
534aad8b
SM
440 cursor++;
441 return t;
442 }
443 else
444 return 0; // reached end of macro body
445}
446
447const token*
448parser::scan_pp1 ()
449{
450 while (true)
451 {
452 const token* t = next_pp1 ();
453 if (t == 0) // EOF or end of macro body
454 {
455 if (pp1_state.empty()) // actual EOF
456 return 0;
457
458 // Exit macro and loop around to look for the next token.
459 pp1_activation* act = pp1_state.back();
460 pp1_state.pop_back(); delete act;
461 continue;
462 }
463
464 // macro definition
465 if (t->type == tok_operator && t->content == "@define")
466 {
467 if (!pp1_state.empty())
468 throw parse_error (_("'@define' forbidden inside macro body"), t);
469 delete t;
470
471 // handle macro definition
472 // (1) consume macro signature
3932c705 473 t = input.scan();
534aad8b
SM
474 if (! (t && t->type == tok_identifier))
475 throw parse_error (_("expected identifier"), t);
476 string name = t->content;
477
478 // check for redefinition of existing macro
479 if (pp1_namespace.find(name) != pp1_namespace.end())
480 // TODOXXX use a slightly different chaining hack to also point to
481 // pp1_namespace[name]->tok, the site of the original definition?
482 throw parse_error (_F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
483 // TODOXXX this is only really necessary if we want to leave open the possibility of statically-scoped semantics in the future...?
484
1d94e4e5 485 // XXX this cascades into further parse errors as the
534aad8b
SM
486 // parser tries to parse the remaining definition...
487 if (name == "define")
488 throw parse_error (_("attempt to redefine '@define'"), t);
489 if (input.atwords.count("@" + name))
490 session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
491
fe410f52
SM
492 macrodecl* decl = (pp1_namespace[name] = new macrodecl);
493 decl->tok = t;
534aad8b
SM
494
495 // determine if the macro takes parameters
4ac28d7e 496 bool saw_params = false;
3932c705
SM
497 t = input.scan();
498 if (t && t->type == tok_operator && t->content == "(")
4ac28d7e
SM
499 {
500 saw_params = true;
501 do
502 {
503 delete t;
504
505 t = input.scan ();
506 if (! (t && t->type == tok_identifier))
507 throw parse_error(_("expected identifier"), t);
fe410f52 508 decl->formal_args.push_back(t->content);
4ac28d7e
SM
509 delete t;
510
511 t = input.scan ();
512 if (t && t->type == tok_operator && t->content == ",")
513 {
514 continue;
515 }
516 else if (t && t->type == tok_operator && t->content == ")")
517 {
518 delete t;
519 t = input.scan();
520 break;
521 }
522 else
523 {
524 throw parse_error (_("expected ',' or ')'"), t);
525 }
526 }
527 while (true);
528 }
534aad8b
SM
529
530 // (2) identify & consume macro body
3932c705 531 if (! (t && t->type == tok_operator && t->content == "%("))
4ac28d7e
SM
532 {
533 if (saw_params)
534 throw parse_error (_("expected '%('"), t);
535 else
536 throw parse_error (_("expected '%(' or '('"), t);
537 }
3932c705 538 delete t;
534aad8b 539
3932c705
SM
540 t = slurp_pp1_body (decl->body);
541 if (!t)
542 throw parse_error (_("incomplete macro definition - missing '%)'"), decl->tok);
543 delete t;
534aad8b
SM
544
545 // Now loop around to look for a real token.
546 continue;
547 }
548
549 // (potential) macro invocation
550 if (t->type == tok_operator && t->content[0] == '@')
551 {
552 string name = t->content.substr(1); // strip initial '@'
553
554 // check if name refers to a real parameter or macro
fe410f52 555 macrodecl* decl;
534aad8b
SM
556 pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
557 if (act && act->params.find(name) != act->params.end())
558 decl = act->params[name];
fe410f52
SM
559 else if (!(act && act->curr_macro->context == ctx_library)
560 && pp1_namespace.find(name) != pp1_namespace.end())
534aad8b 561 decl = pp1_namespace[name];
fe410f52
SM
562 else if (session.library_macros.find(name)
563 != session.library_macros.end())
564 decl = session.library_macros[name];
534aad8b
SM
565 else // this is an ordinary @operator
566 return t;
567
568 // handle macro invocation
569 pp1_activation *new_act = new pp1_activation(*t, decl);
fe410f52 570 unsigned num_params = decl->formal_args.size();
534aad8b
SM
571
572 // (1a) restore parameter invocation closure
fe410f52 573 if (num_params == 0 && decl->is_closure())
534aad8b
SM
574 {
575 // NB: decl->parent_act is always safe since the
576 // parameter decl (if any) comes from an activation
577 // record which deeper in the stack than new_act.
578
579 // decl is a macro parameter which must be evaluated in
580 // the context of the original point of invocation:
fe410f52 581 new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
534aad8b
SM
582 goto expand;
583 }
584
585 // (1b) consume macro parameters (if any)
586 if (num_params == 0)
587 goto expand;
588
589 // for simplicity, we do not allow macro constructs here
590 // -- if we did, we'd have to recursively call scan_pp1()
591 t = next_pp1 ();
39566df2 592 if (! (t && t->type == tok_operator && t->content == "("))
534aad8b
SM
593 {
594 delete new_act;
52c2652f 595 throw parse_error (_NF
534aad8b 596 ("expected '(' in invocation of macro '@%s'"
f499dee5 597 " taking %d parameter",
534aad8b 598 "expected '(' in invocation of macro '@%s'"
f499dee5 599 " taking %d parameters",
52c2652f 600 num_params, name.c_str(), num_params), t);
534aad8b
SM
601 }
602
603 // XXX perhaps parse/count the full number of params,
604 // so we can say "expected x, found y params" on error?
605 for (unsigned i = 0; i < num_params; i++)
606 {
607 delete t;
608
609 // create parameter closure
fe410f52 610 string param_name = decl->formal_args[i];
534aad8b 611 pp_macrodecl* p = (new_act->params[param_name]
fe410f52
SM
612 = new pp_macrodecl);
613 p->tok = new token(*new_act->tok);
614 p->parent_act = act;
534aad8b
SM
615 // NB: *new_act->tok points to invocation, act is NULL at top level
616
617 t = slurp_pp1_param (p->body);
618
619 // check correct usage of ',' or ')'
620 if (t == 0) // hit unexpected EOF or end of macro
621 {
622 // XXX could we pop the stack and continue parsing
623 // the invocation, allowing macros to construct new
624 // invocations in piecemeal fashion??
625 const token* orig_t = new token(*new_act->tok);
626 delete new_act;
627 throw parse_error (_("could not find end of macro invocation"), orig_t);
628 }
629 if (t->type == tok_operator && t->content == ",")
630 {
631 if (i + 1 == num_params)
632 {
633 delete new_act;
634 throw parse_error (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
635 }
636 }
637 else if (t->type == tok_operator && t->content == ")")
638 {
639 if (i + 1 != num_params)
640 {
641 delete new_act;
642 throw parse_error (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
643 }
644 }
645 else
646 {
647 // XXX this is, incidentally, impossible
648 delete new_act;
649 throw parse_error(_("expected ',' or ')' after macro parameter"), t);
650 }
651 }
652
653 delete t;
654
655 // (2) set up macro expansion
656 expand:
657 pp1_state.push_back (new_act);
658
659 // Now loop around to look for a real token.
660 continue;
661 }
662
663 // Otherwise, we have an ordinary token.
664 return t;
665 }
666}
667
668// Consume a single macro invocation's parameters, heeding nested ( )
669// brackets and stopping on an unbalanced ')' or an unbracketed ','
670// (and returning the final separator token).
671const token*
672parser::slurp_pp1_param (vector<const token*>& param)
673{
674 const token* t = 0;
675 unsigned nesting = 0;
676 do
677 {
678 t = next_pp1 ();
679
680 if (!t)
681 break;
682 if (t->type == tok_operator && t->content == "(")
683 ++nesting;
684 else if (nesting && t->type == tok_operator && t->content == ")")
685 --nesting;
686 else if (!nesting && t->type == tok_operator
687 && (t->content == ")" || t->content == ","))
688 break;
689 param.push_back(t);
690 }
691 while (true);
692 return t; // report ")" or "," or NULL
693}
694
695
696// Consume a macro declaration's body, heeding nested %( %) brackets.
697const token*
698parser::slurp_pp1_body (vector<const token*>& body)
699{
700 const token* t = 0;
701 unsigned nesting = 0;
702 do
703 {
704 t = next_pp1 ();
705
706 if (!t)
707 break;
708 if (t->type == tok_operator && t->content == "%(")
709 ++nesting;
710 else if (nesting && t->type == tok_operator && t->content == "%)")
711 --nesting;
712 else if (!nesting && t->type == tok_operator && t->content == "%)")
713 break;
714 body.push_back(t);
715 }
716 while (true);
717 return t; // report final "%)" or NULL
718}
719
fe410f52
SM
720// Used for parsing .stpm files.
721stapfile*
722parser::parse_library_macros ()
723{
724 stapfile* f = new stapfile;
725 input.set_current_file (f);
726
727 try
728 {
729 const token* t = scan_pp1 ();
730
731 // Currently we only take objection to macro invocations if they
732 // produce a non-whitespace token after being expanded.
733
734 // XXX should we prevent macro invocations even if they expand to empty??
735
736 if (t != 0)
737 throw parse_error (_F("library macro file '%s' contains non-@define construct", input_name.c_str()), t);
738
739 // We need to first check whether *any* of the macros are duplicates,
740 // then commit to including the entire file in the global namespace
741 // (or not). Yuck.
742 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
743 it != pp1_namespace.end(); it++)
744 {
745 string name = it->first;
746
747 if (session.library_macros.find(name) != session.library_macros.end())
748 {
749 // XXX ugly hack simulates chaining
750 parse_error* er1 = new parse_error (_F("duplicate definition of library macro '%s'", name.c_str()), it->second->tok);
751 parse_error* er2 = new parse_error (_("location of original definition was"), session.library_macros[name]->tok);
752 print_error (*er1);
753 print_error (*er2);
754 delete er1; delete er2;
755
756 delete f;
757 return 0;
758 }
759 }
760
761 }
762 catch (const parse_error& pe)
763 {
764 print_error (pe);
765 delete f;
766 return 0;
767 }
768
769 // If no errors, include the entire file. Note how this is outside
770 // of the try-catch block -- no errors possible.
771 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
772 it != pp1_namespace.end(); it++)
773 {
774 string name = it->first;
775
776 session.library_macros[name] = it->second;
777 session.library_macros[name]->context = ctx_library;
778 // TODOXXX be sure declaration is retained and not deleted
779 }
780
781 return f;
782}
783
534aad8b
SM
784// Second pass - preprocessor conditional expansion.
785//
177a8ead 786// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
787// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
788// or: arch COMPARISON-OP "arch-string"
db135493 789// or: systemtap_v COMPARISON-OP "version-string"
2e6dd9d0 790// or: systemtap_privilege COMPARISON-OP "privilege-string"
561079c8 791// or: CONFIG_foo COMPARISON-OP "config-string"
717a457b 792// or: CONFIG_foo COMPARISON-OP number
4227f98d 793// or: CONFIG_foo COMPARISON-OP CONFIG_bar
5811366a
FCE
794// or: "string1" COMPARISON-OP "string2"
795// or: number1 COMPARISON-OP number2
44ce8ed5 796// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
797//
798// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
c434ec7e 799// e.g. %( arch != "i?86" %? "foo" %: "baz" %)
561079c8 800// e.g. %( CONFIG_foo %? "foo" %: "baz" %)
177a8ead
FCE
801//
802// Up to an entire %( ... %) expression is processed by a single call
803// to this function. Tokens included by any nested conditions are
804// enqueued in a private vector.
805
806bool eval_pp_conditional (systemtap_session& s,
807 const token* l, const token* op, const token* r)
808{
44ce8ed5 809 if (l->type == tok_identifier && (l->content == "kernel_v" ||
db135493
FCE
810 l->content == "kernel_vr" ||
811 l->content == "systemtap_v"))
44ce8ed5 812 {
db135493 813 if (! (r->type == tok_string))
2677d2fb 814 throw parse_error (_("expected string literal"), r);
db135493 815
44ce8ed5 816 string target_kernel_vr = s.kernel_release;
197a4d62 817 string target_kernel_v = s.kernel_base_release;
db135493 818 string target;
dff50e09 819
db135493
FCE
820 if (l->content == "kernel_v") target = target_kernel_v;
821 else if (l->content == "kernel_vr") target = target_kernel_vr;
822 else if (l->content == "systemtap_v") target = s.compatible;
823 else assert (0);
7a468d68 824
7a468d68
FCE
825 string query = r->content;
826 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
827
44ce8ed5
FCE
828 // collect acceptable strverscmp results.
829 int rvc_ok1, rvc_ok2;
7a468d68 830 bool wc_ok = false;
44ce8ed5
FCE
831 if (op->type == tok_operator && op->content == "<=")
832 { rvc_ok1 = -1; rvc_ok2 = 0; }
833 else if (op->type == tok_operator && op->content == ">=")
834 { rvc_ok1 = 1; rvc_ok2 = 0; }
835 else if (op->type == tok_operator && op->content == "<")
836 { rvc_ok1 = -1; rvc_ok2 = -1; }
837 else if (op->type == tok_operator && op->content == ">")
838 { rvc_ok1 = 1; rvc_ok2 = 1; }
839 else if (op->type == tok_operator && op->content == "==")
7a468d68 840 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
44ce8ed5 841 else if (op->type == tok_operator && op->content == "!=")
7a468d68 842 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
44ce8ed5 843 else
2677d2fb 844 throw parse_error (_("expected comparison operator"), op);
7a468d68
FCE
845
846 if ((!wc_ok) && rhs_wildcard)
2677d2fb 847 throw parse_error (_("wildcard not allowed with order comparison operators"), op);
7a468d68
FCE
848
849 if (rhs_wildcard)
850 {
851 int rvc_result = fnmatch (query.c_str(), target.c_str(),
852 FNM_NOESCAPE); // spooky
853 bool badness = (rvc_result == 0) ^ (op->content == "==");
854 return !badness;
855 }
856 else
857 {
858 int rvc_result = strverscmp (target.c_str(), query.c_str());
859 // normalize rvc_result
860 if (rvc_result < 0) rvc_result = -1;
861 if (rvc_result > 0) rvc_result = 1;
862 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
863 }
44ce8ed5 864 }
2e6dd9d0
SM
865 else if (l->type == tok_identifier && l->content == "systemtap_privilege")
866 {
867 string target_privilege =
cba5b802
SM
868 /* XXX perhaps include a "guru" state */
869 pr_contains(s.privilege, pr_stapdev) ? "stapdev"
870 : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
871 : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
2e6dd9d0
SM
872 : "none"; /* should be impossible -- s.privilege always one of above */
873 assert(target_privilege != "none");
874
875 if (! (r->type == tok_string))
876 throw parse_error (_("expected string literal"), r);
877 string query_privilege = r->content;
878
879 bool nomatch = (target_privilege != query_privilege);
880
881 bool result;
882 if (op->type == tok_operator && op->content == "==")
883 result = !nomatch;
884 else if (op->type == tok_operator && op->content == "!=")
885 result = nomatch;
886 else
887 throw parse_error (_("expected '==' or '!='"), op);
cba5b802 888 /* XXX perhaps allow <= >= and similar comparisons */
2e6dd9d0
SM
889
890 return result;
891 }
44ce8ed5
FCE
892 else if (l->type == tok_identifier && l->content == "arch")
893 {
894 string target_architecture = s.architecture;
895 if (! (r->type == tok_string))
2677d2fb 896 throw parse_error (_("expected string literal"), r);
44ce8ed5 897 string query_architecture = r->content;
dff50e09 898
7a468d68
FCE
899 int nomatch = fnmatch (query_architecture.c_str(),
900 target_architecture.c_str(),
901 FNM_NOESCAPE); // still spooky
902
561079c8
FCE
903 bool result;
904 if (op->type == tok_operator && op->content == "==")
905 result = !nomatch;
906 else if (op->type == tok_operator && op->content == "!=")
907 result = nomatch;
908 else
2677d2fb 909 throw parse_error (_("expected '==' or '!='"), op);
561079c8
FCE
910
911 return result;
912 }
60d98537 913 else if (l->type == tok_identifier && startswith(l->content, "CONFIG_"))
561079c8 914 {
717a457b
MW
915 if (r->type == tok_string)
916 {
917 string lhs = s.kernel_config[l->content]; // may be empty
918 string rhs = r->content;
561079c8 919
717a457b 920 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
561079c8 921
717a457b
MW
922 bool result;
923 if (op->type == tok_operator && op->content == "==")
924 result = !nomatch;
925 else if (op->type == tok_operator && op->content == "!=")
926 result = nomatch;
927 else
2677d2fb 928 throw parse_error (_("expected '==' or '!='"), op);
dff50e09 929
717a457b
MW
930 return result;
931 }
932 else if (r->type == tok_number)
933 {
934 const char* startp = s.kernel_config[l->content].c_str ();
935 char* endp = (char*) startp;
936 errno = 0;
937 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
938 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
939 throw parse_error ("Config option value not a number", l);
940
941 int64_t rhs = lex_cast<int64_t>(r->content);
942 return eval_comparison (lhs, op, rhs);
943 }
4227f98d 944 else if (r->type == tok_identifier
60d98537 945 && startswith(r->content, "CONFIG_"))
4227f98d
MW
946 {
947 // First try to convert both to numbers,
948 // otherwise threat both as strings.
949 const char* startp = s.kernel_config[l->content].c_str ();
950 char* endp = (char*) startp;
951 errno = 0;
952 int64_t val = (int64_t) strtoll (startp, & endp, 0);
953 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
954 {
955 int64_t lhs = val;
956 startp = s.kernel_config[r->content].c_str ();
957 endp = (char*) startp;
958 errno = 0;
959 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
960 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
961 return eval_comparison (lhs, op, rhs);
962 }
963
964 string lhs = s.kernel_config[l->content];
965 string rhs = s.kernel_config[r->content];
966 return eval_comparison (lhs, op, rhs);
967 }
717a457b 968 else
ce0f6648 969 throw parse_error (_("expected string, number literal or other CONFIG_... as right side operand"), r);
dff50e09 970 }
c434ec7e 971 else if (l->type == tok_string && r->type == tok_string)
5811366a 972 {
c434ec7e
FCE
973 string lhs = l->content;
974 string rhs = r->content;
975 return eval_comparison (lhs, op, rhs);
976 // NB: no wildcarding option here
977 }
978 else if (l->type == tok_number && r->type == tok_number)
979 {
980 int64_t lhs = lex_cast<int64_t>(l->content);
981 int64_t rhs = lex_cast<int64_t>(r->content);
982 return eval_comparison (lhs, op, rhs);
7a468d68 983 // NB: no wildcarding option here
5811366a
FCE
984 }
985 else if (l->type == tok_string && r->type == tok_number
986 && op->type == tok_operator)
2677d2fb 987 throw parse_error (_("expected string literal as right value"), r);
5811366a
FCE
988 else if (l->type == tok_number && r->type == tok_string
989 && op->type == tok_operator)
2677d2fb 990 throw parse_error (_("expected number literal as right value"), r);
c434ec7e 991
177a8ead 992 else
2677d2fb
LB
993 throw parse_error (_("expected 'arch' or 'kernel_v' or 'kernel_vr' or 'CONFIG_...'\n"
994 " or comparison between strings or integers"), l);
177a8ead
FCE
995}
996
997
5811366a 998// Only tokens corresponding to the TRUE statement must be expanded
177a8ead 999const token*
b5477cd9 1000parser::scan_pp ()
177a8ead
FCE
1001{
1002 while (true)
1003 {
e92f2566
JS
1004 pp_state_t pp = PP_NONE;
1005 if (!pp_state.empty())
1006 pp = pp_state.back().second;
1007
1008 const token* t = 0;
1009 if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1010 t = skip_pp ();
1011 else
534aad8b 1012 t = scan_pp1 ();
e92f2566
JS
1013
1014 if (t == 0) // EOF
177a8ead 1015 {
e92f2566
JS
1016 if (pp != PP_NONE)
1017 {
1018 t = pp_state.back().first;
1019 pp_state.pop_back(); // so skip_some doesn't keep trying to close this
ce0f6648 1020 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
2677d2fb 1021 throw parse_error (_("incomplete conditional at end of file"), t);
e92f2566 1022 }
177a8ead
FCE
1023 return t;
1024 }
1025
e92f2566
JS
1026 // misplaced preprocessor "then"
1027 if (t->type == tok_operator && t->content == "%?")
2677d2fb 1028 throw parse_error (_("incomplete conditional - missing '%('"), t);
e92f2566
JS
1029
1030 // preprocessor "else"
1031 if (t->type == tok_operator && t->content == "%:")
1032 {
1033 if (pp == PP_NONE)
2677d2fb 1034 throw parse_error (_("incomplete conditional - missing '%('"), t);
e92f2566 1035 if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
2677d2fb 1036 throw parse_error (_("invalid conditional - duplicate '%:'"), t);
1d94e4e5
SM
1037 // XXX: here and elsewhere, error cascades might be avoided
1038 // by dropping tokens until we reach the closing %)
e92f2566
JS
1039
1040 pp_state.back().second = (pp == PP_KEEP_THEN) ?
1041 PP_SKIP_ELSE : PP_KEEP_ELSE;
1042 delete t;
1043 continue;
1044 }
1045
1046 // preprocessor close
1047 if (t->type == tok_operator && t->content == "%)")
1048 {
1049 if (pp == PP_NONE)
2677d2fb 1050 throw parse_error (_("incomplete conditional - missing '%('"), t);
e92f2566 1051 delete pp_state.back().first;
a07a2c28 1052 delete t; //this is the closing bracket
e92f2566
JS
1053 pp_state.pop_back();
1054 continue;
1055 }
dff50e09 1056
177a8ead
FCE
1057 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1058 return t;
1059
1060 // We have a %( - it's time to throw a preprocessing party!
1061
2d7881bf
PP
1062 bool result = false;
1063 bool and_result = true;
1064 const token *n = NULL;
1065 do {
1066 const token *l, *op, *r;
534aad8b
SM
1067 l = scan_pp1 ();
1068 op = scan_pp1 ();
1069 r = scan_pp1 ();
2d7881bf 1070 if (l == 0 || op == 0 || r == 0)
2677d2fb 1071 throw parse_error (_("incomplete condition after '%('"), t);
2d7881bf
PP
1072 // NB: consider generalizing to consume all tokens until %?, and
1073 // passing that as a vector to an evaluator.
1074
1075 // Do not evaluate the condition if we haven't expanded everything.
1076 // This may occur when having several recursive conditionals.
1077 and_result &= eval_pp_conditional (session, l, op, r);
a07a2c28
LB
1078 if(l->content=="systemtap_v")
1079 systemtap_v_seen=r;
1080
1081 else
1082 delete r;
1083
2d7881bf
PP
1084 delete l;
1085 delete op;
2d7881bf
PP
1086 delete n;
1087
534aad8b 1088 n = scan_pp1 ();
2d7881bf
PP
1089 if (n && n->type == tok_operator && n->content == "&&")
1090 continue;
1091 result |= and_result;
1092 and_result = true;
1093 if (! (n && n->type == tok_operator && n->content == "||"))
1094 break;
1095 } while (true);
3f847830
FCE
1096
1097 /*
1098 clog << "PP eval (" << *t << ") == " << result << endl;
1099 */
1100
e92f2566 1101 const token *m = n;
177a8ead 1102 if (! (m && m->type == tok_operator && m->content == "%?"))
2677d2fb 1103 throw parse_error (_("expected '%?' marker for conditional"), t);
70c743d8 1104 delete m; // "%?"
177a8ead 1105
e92f2566
JS
1106 pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1107 pp_state.push_back (make_pair (t, pp));
3f847830 1108
e92f2566
JS
1109 // Now loop around to look for a real token.
1110 }
1111}
3f847830 1112
3f847830 1113
e92f2566
JS
1114// Skip over tokens and any errors, heeding
1115// only nested preprocessor starts and ends.
1116const token*
1117parser::skip_pp ()
1118{
1119 const token* t = 0;
1120 unsigned nesting = 0;
1121 do
1122 {
1123 try
1124 {
534aad8b 1125 t = scan_pp1 ();
177a8ead 1126 }
e92f2566 1127 catch (const parse_error &e)
70c743d8 1128 {
e92f2566 1129 continue;
70c743d8 1130 }
e92f2566
JS
1131 if (!t)
1132 break;
1133 if (t->type == tok_operator && t->content == "%(")
1134 ++nesting;
1135 else if (nesting && t->type == tok_operator && t->content == "%)")
1136 --nesting;
1137 else if (!nesting && t->type == tok_operator &&
1138 (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1139 break;
1140 delete t;
177a8ead 1141 }
e92f2566
JS
1142 while (true);
1143 return t;
177a8ead
FCE
1144}
1145
1146
2f1a1aea 1147const token*
b5477cd9 1148parser::next ()
2f1a1aea
FCE
1149{
1150 if (! next_t)
b5477cd9 1151 next_t = scan_pp ();
2f1a1aea 1152 if (! next_t)
2677d2fb 1153 throw parse_error (_("unexpected end-of-file"));
2f1a1aea 1154
2f1a1aea
FCE
1155 last_t = next_t;
1156 // advance by zeroing next_t
1157 next_t = 0;
1158 return last_t;
1159}
1160
1161
1162const token*
b5477cd9 1163parser::peek ()
2f1a1aea
FCE
1164{
1165 if (! next_t)
b5477cd9 1166 next_t = scan_pp ();
2f1a1aea
FCE
1167
1168 // don't advance by zeroing next_t
1169 last_t = next_t;
1170 return next_t;
1171}
1172
1173
731a5359
MW
1174void
1175parser::swallow ()
1176{
1177 // can only swallow something last peeked or nexted token.
1178 assert (last_t != 0);
1179 delete last_t;
1180 // advance by zeroing next_t
1181 last_t = next_t = 0;
1182}
1183
1184
d7f3e0c5
GH
1185static inline bool
1186tok_is(token const * t, token_type tt, string const & expected)
1187{
1188 return t && t->type == tt && t->content == expected;
1189}
1190
1191
731a5359 1192void
d7f3e0c5
GH
1193parser::expect_known (token_type tt, string const & expected)
1194{
1195 const token *t = next();
57b73400 1196 if (! (t && t->type == tt && t->content == expected))
ce0f6648 1197 throw parse_error (_F("expected '%s'", expected.c_str()));
731a5359 1198 swallow (); // We are done with it, content was copied.
d7f3e0c5
GH
1199}
1200
1201
731a5359 1202void
d7f3e0c5
GH
1203parser::expect_unknown (token_type tt, string & target)
1204{
1205 const token *t = next();
1206 if (!(t && t->type == tt))
2677d2fb 1207 throw parse_error (_("expected ") + tt2str(tt));
d7f3e0c5 1208 target = t->content;
731a5359 1209 swallow (); // We are done with it, content was copied.
d7f3e0c5
GH
1210}
1211
1212
731a5359 1213void
493ee224
DS
1214parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
1215{
1216 const token *t = next();
1217 if (!(t && (t->type == tt1 || t->type == tt2)))
dd90d565 1218 throw parse_error (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
493ee224 1219 target = t->content;
731a5359 1220 swallow (); // We are done with it, content was copied.
493ee224
DS
1221}
1222
1223
731a5359 1224void
d7f3e0c5
GH
1225parser::expect_op (std::string const & expected)
1226{
731a5359 1227 expect_known (tok_operator, expected);
d7f3e0c5
GH
1228}
1229
1230
731a5359 1231void
d7f3e0c5
GH
1232parser::expect_kw (std::string const & expected)
1233{
731a5359 1234 expect_known (tok_keyword, expected);
d7f3e0c5
GH
1235}
1236
dff50e09 1237const token*
731a5359
MW
1238parser::expect_kw_token (std::string const & expected)
1239{
1240 const token *t = next();
1241 if (! (t && t->type == tok_keyword && t->content == expected))
1242 throw parse_error (_F("expected '%s'", expected.c_str()));
1243 return t;
1244}
1245
1246void
e38723d2 1247parser::expect_number (int64_t & value)
57b73400 1248{
e38723d2
MH
1249 bool neg = false;
1250 const token *t = next();
1251 if (t->type == tok_operator && t->content == "-")
1252 {
1253 neg = true;
731a5359 1254 swallow ();
e38723d2
MH
1255 t = next ();
1256 }
1257 if (!(t && t->type == tok_number))
2677d2fb 1258 throw parse_error (_("expected number"));
e38723d2
MH
1259
1260 const char* startp = t->content.c_str ();
1261 char* endp = (char*) startp;
1262
1263 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1264 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1265 // since the lexer only gives us positive digit strings, but we'll
1266 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1267 errno = 0;
1268 value = (int64_t) strtoull (startp, & endp, 0);
1269 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1270 || (neg && (unsigned long long) value > 9223372036854775808ULL)
1271 || (unsigned long long) value > 18446744073709551615ULL
1272 || value < -9223372036854775807LL-1)
2677d2fb 1273 throw parse_error (_("number invalid or out of range"));
dff50e09 1274
e38723d2
MH
1275 if (neg)
1276 value = -value;
1277
731a5359 1278 swallow (); // We are done with it, content was parsed and copied into value.
57b73400
GH
1279}
1280
d7f3e0c5 1281
dff50e09 1282const token*
50cc7cd5 1283parser::expect_ident_or_atword (std::string & target)
d7f3e0c5 1284{
06219d6f
SM
1285 const token *t = next();
1286
1287 // accept identifiers and operators beginning in '@':
1288 if (!t || (t->type != tok_identifier
1289 && (t->type != tok_operator || t->content[0] != '@')))
1290 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1291 // so the message is accurate, but keep an eye out in the future:
dd90d565 1292 throw parse_error (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
06219d6f
SM
1293
1294 target = t->content;
1295 return t;
d7f3e0c5
GH
1296}
1297
1298
731a5359 1299void
493ee224
DS
1300parser::expect_ident_or_keyword (std::string & target)
1301{
731a5359 1302 expect_unknown2 (tok_identifier, tok_keyword, target);
493ee224
DS
1303}
1304
1305
dff50e09 1306bool
d7f3e0c5
GH
1307parser::peek_op (std::string const & op)
1308{
1309 return tok_is (peek(), tok_operator, op);
1310}
1311
1312
dff50e09 1313bool
d7f3e0c5
GH
1314parser::peek_kw (std::string const & kw)
1315{
1316 return tok_is (peek(), tok_identifier, kw);
1317}
1318
1319
1320
66c7d4c1 1321lexer::lexer (istream& input, const string& in, systemtap_session& s):
03ba36d9
SM
1322 ate_comment(false), ate_whitespace(false), saw_tokens(false),
1323 input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1324 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1325 cursor_column (1), session(s), current_file (0)
eacb10ce 1326{
66c7d4c1 1327 getline(input, input_contents, '\0');
2203b032 1328
66c7d4c1
JS
1329 input_pointer = input_contents.data();
1330 input_end = input_contents.data() + input_contents.size();
1331
1332 if (keywords.empty())
1333 {
3a7ec735
FCE
1334 // NB: adding new keywords is highly disruptive to the language,
1335 // in particular to existing scripts that could be suddenly
1336 // broken. If done at all, it has to be s.compatible-sensitive,
1337 // and broadly advertised.
66c7d4c1
JS
1338 keywords.insert("probe");
1339 keywords.insert("global");
1340 keywords.insert("function");
1341 keywords.insert("if");
1342 keywords.insert("else");
1343 keywords.insert("for");
1344 keywords.insert("foreach");
1345 keywords.insert("in");
1346 keywords.insert("limit");
1347 keywords.insert("return");
1348 keywords.insert("delete");
1349 keywords.insert("while");
1350 keywords.insert("break");
1351 keywords.insert("continue");
1352 keywords.insert("next");
1353 keywords.insert("string");
1354 keywords.insert("long");
f4fe2e93
FCE
1355 keywords.insert("try");
1356 keywords.insert("catch");
66c7d4c1 1357 }
2524d1fd
SM
1358
1359 if (atwords.empty())
1360 {
1361 // NB: adding new @words is mildly disruptive to existing
1362 // scripts that define macros with the same name, but not
1363 // really. The user will merely receive a warning that they are
1364 // redefining an existing operator.
1365 atwords.insert("@cast");
1366 atwords.insert("@defined");
1367 atwords.insert("@entry");
1368 atwords.insert("@var");
1369 atwords.insert("@avg");
1370 atwords.insert("@count");
1371 atwords.insert("@sum");
1372 atwords.insert("@min");
1373 atwords.insert("@max");
1374 atwords.insert("@hist_linear");
1375 atwords.insert("@hist_log");
1376 }
eacb10ce 1377}
2f1a1aea 1378
66c7d4c1 1379set<string> lexer::keywords;
2524d1fd 1380set<string> lexer::atwords;
66c7d4c1 1381
1b1b4ceb
RA
1382void
1383lexer::set_current_file (stapfile* f)
1384{
1385 current_file = f;
2203b032
JS
1386 if (f)
1387 {
1388 f->file_contents = input_contents;
1389 f->name = input_name;
1390 }
1b1b4ceb 1391}
bb2e3076
FCE
1392
1393int
1394lexer::input_peek (unsigned n)
1395{
66c7d4c1
JS
1396 if (input_pointer + n >= input_end)
1397 return -1; // EOF
1398 return (unsigned char)*(input_pointer + n);
bb2e3076
FCE
1399}
1400
1401
dff50e09 1402int
2f1a1aea
FCE
1403lexer::input_get ()
1404{
66c7d4c1 1405 int c = input_peek();
bb2e3076
FCE
1406 if (c < 0) return c; // EOF
1407
66c7d4c1
JS
1408 ++input_pointer;
1409
3f99432c 1410 if (cursor_suspend_count)
9300f661
JS
1411 {
1412 // Track effect of input_put: preserve previous cursor/line_column
1413 // until all of its characters are consumed.
1414 if (--cursor_suspend_count == 0)
1415 {
1416 cursor_line = cursor_suspend_line;
1417 cursor_column = cursor_suspend_column;
1418 }
1419 }
3f99432c 1420 else
2f1a1aea 1421 {
3f99432c
FCE
1422 // update source cursor
1423 if (c == '\n')
1424 {
1425 cursor_line ++;
1426 cursor_column = 1;
1427 }
1428 else
1429 cursor_column ++;
2f1a1aea 1430 }
2f1a1aea 1431
eacb10ce 1432 // clog << "[" << (char)c << "]";
2f1a1aea
FCE
1433 return c;
1434}
1435
1436
3f99432c 1437void
9300f661 1438lexer::input_put (const string& chars, const token* t)
3f99432c 1439{
66c7d4c1
JS
1440 size_t pos = input_pointer - input_contents.data();
1441 // clog << "[put:" << chars << " @" << pos << "]";
1442 input_contents.insert (pos, chars);
eacb10ce 1443 cursor_suspend_count += chars.size();
9300f661
JS
1444 cursor_suspend_line = cursor_line;
1445 cursor_suspend_column = cursor_column;
1446 cursor_line = t->location.line;
1447 cursor_column = t->location.column;
66c7d4c1
JS
1448 input_pointer = input_contents.data() + pos;
1449 input_end = input_contents.data() + input_contents.size();
3f99432c
FCE
1450}
1451
1452
2f1a1aea 1453token*
b5477cd9 1454lexer::scan ()
2f1a1aea 1455{
fee28e5c 1456 ate_comment = false; // reset for each new token
b5477cd9 1457 ate_whitespace = false; // reset for each new token
534aad8b
SM
1458
1459 // XXX be very sure to restore old_saw_tokens if we return without a token:
1460 bool old_saw_tokens = saw_tokens;
1461 saw_tokens = true;
1462
2f1a1aea 1463 token* n = new token;
2203b032 1464 n->location.file = current_file;
534aad8b 1465 n->chain = NULL; // important safety dance
2f1a1aea 1466
9300f661
JS
1467skip:
1468 bool suspended = (cursor_suspend_count > 0);
2f1a1aea
FCE
1469 n->location.line = cursor_line;
1470 n->location.column = cursor_column;
1471
1472 int c = input_get();
3f99432c 1473 // clog << "{" << (char)c << (char)c2 << "}";
2f1a1aea
FCE
1474 if (c < 0)
1475 {
1476 delete n;
534aad8b 1477 saw_tokens = old_saw_tokens;
2f1a1aea
FCE
1478 return 0;
1479 }
1480
1481 if (isspace (c))
b5477cd9
SM
1482 {
1483 ate_whitespace = true;
1484 goto skip;
1485 }
2f1a1aea 1486
66c7d4c1
JS
1487 int c2 = input_peek ();
1488
3f99432c
FCE
1489 // Paste command line arguments as character streams into
1490 // the beginning of a token. $1..$999 go through as raw
1491 // characters; @1..@999 are quoted/escaped as strings.
1492 // $# and @# expand to the number of arguments, similarly
1493 // raw or quoted.
9300f661 1494 if ((c == '$' || c == '@') && (c2 == '#'))
3f99432c 1495 {
9300f661
JS
1496 n->content.push_back (c);
1497 n->content.push_back (c2);
3f99432c 1498 input_get(); // swallow '#'
9300f661 1499 if (suspended)
16fc963f
SM
1500 {
1501 n->make_junk(_("invalid nested substitution of command line arguments"));
1502 return n;
1503 }
9300f661
JS
1504 size_t num_args = session.args.size ();
1505 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1506 n->content.clear();
1507 goto skip;
3f99432c 1508 }
9300f661 1509 else if ((c == '$' || c == '@') && (isdigit (c2)))
3f99432c 1510 {
9300f661 1511 n->content.push_back (c);
3f99432c
FCE
1512 unsigned idx = 0;
1513 do
1514 {
1515 input_get ();
1516 idx = (idx * 10) + (c2 - '0');
9300f661 1517 n->content.push_back (c2);
3f99432c
FCE
1518 c2 = input_peek ();
1519 } while (c2 > 0 &&
dff50e09 1520 isdigit (c2) &&
3f99432c 1521 idx <= session.args.size()); // prevent overflow
16fc963f
SM
1522 if (suspended)
1523 {
1524 n->make_junk(_("invalid nested substitution of command line arguments"));
1525 return n;
1526 }
3f99432c
FCE
1527 if (idx == 0 ||
1528 idx-1 >= session.args.size())
16fc963f
SM
1529 {
1530 n->make_junk(_F("command line argument index %lu out of range [1-%lu]",
1531 (unsigned long) idx, (unsigned long) session.args.size()));
1532 return n;
1533 }
9300f661
JS
1534 const string& arg = session.args[idx-1];
1535 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1536 n->content.clear();
1537 goto skip;
3f99432c
FCE
1538 }
1539
b5477cd9 1540 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
2f1a1aea
FCE
1541 {
1542 n->type = tok_identifier;
1543 n->content = (char) c;
b5477cd9 1544 while (isalnum (c2) || c2 == '_' || c2 == '$')
2f1a1aea 1545 {
3f99432c
FCE
1546 input_get ();
1547 n->content.push_back (c2);
1548 c2 = input_peek ();
6e213f58 1549 }
213bee8f 1550
66c7d4c1 1551 if (keywords.count(n->content))
3f99432c 1552 n->type = tok_keyword;
06219d6f 1553 else if (n->content[0] == '@')
dd90d565 1554 // makes it easier to detect illegal use of @words:
06219d6f 1555 n->type = tok_operator;
dff50e09 1556
2f1a1aea
FCE
1557 return n;
1558 }
1559
3a20432b 1560 else if (isdigit (c)) // positive literal
2f1a1aea 1561 {
2f1a1aea 1562 n->type = tok_number;
9c0c0e46
FCE
1563 n->content = (char) c;
1564
66c7d4c1 1565 while (isalnum (c2))
2f1a1aea 1566 {
9c0c0e46
FCE
1567 // NB: isalnum is very permissive. We rely on strtol, called in
1568 // parser::parse_literal below, to confirm that the number string
1569 // is correctly formatted and in range.
1570
66c7d4c1
JS
1571 input_get ();
1572 n->content.push_back (c2);
1573 c2 = input_peek ();
2f1a1aea
FCE
1574 }
1575 return n;
1576 }
1577
1578 else if (c == '\"')
1579 {
1580 n->type = tok_string;
1581 while (1)
1582 {
1583 c = input_get ();
1584
3f99432c 1585 if (c < 0 || c == '\n')
2f1a1aea 1586 {
16fc963f
SM
1587 n->make_junk(_("Could not find matching closing quote"));
1588 return n;
2f1a1aea
FCE
1589 }
1590 if (c == '\"') // closing double-quotes
1591 break;
3f99432c 1592 else if (c == '\\') // see also input_put
dff50e09 1593 {
7d46afb8
GH
1594 c = input_get ();
1595 switch (c)
1596 {
1597 case 'a':
1598 case 'b':
1599 case 't':
1600 case 'n':
1601 case 'v':
1602 case 'f':
1603 case 'r':
f03954fd 1604 case '0' ... '7': // NB: need only match the first digit
7d46afb8 1605 case '\\':
7d46afb8 1606 // Pass these escapes through to the string value
dff50e09 1607 // being parsed; it will be emitted into a C literal.
7d46afb8
GH
1608
1609 n->content.push_back('\\');
1610
3f99432c 1611 // fall through
7d46afb8 1612 default:
7d46afb8
GH
1613 n->content.push_back(c);
1614 break;
1615 }
2f1a1aea
FCE
1616 }
1617 else
1618 n->content.push_back(c);
1619 }
1620 return n;
1621 }
1622
1623 else if (ispunct (c))
1624 {
bb2e3076 1625 int c3 = input_peek (1);
2f1a1aea 1626
3a20432b
FCE
1627 // NB: if we were to recognize negative numeric literals here,
1628 // we'd introduce another grammar ambiguity:
1629 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1630 // instead of tok_number(1) tok_operator('-') tok_number(1)
1631
66c7d4c1 1632 if (c == '#') // shell comment
2f1a1aea
FCE
1633 {
1634 unsigned this_line = cursor_line;
bb2e3076
FCE
1635 do { c = input_get (); }
1636 while (c >= 0 && cursor_line == this_line);
fee28e5c 1637 ate_comment = true;
b5477cd9 1638 ate_whitespace = true;
2f1a1aea
FCE
1639 goto skip;
1640 }
66c7d4c1 1641 else if ((c == '/' && c2 == '/')) // C++ comment
63a7c90e
FCE
1642 {
1643 unsigned this_line = cursor_line;
bb2e3076
FCE
1644 do { c = input_get (); }
1645 while (c >= 0 && cursor_line == this_line);
fee28e5c 1646 ate_comment = true;
b5477cd9 1647 ate_whitespace = true;
63a7c90e
FCE
1648 goto skip;
1649 }
1650 else if (c == '/' && c2 == '*') // C comment
1651 {
66c7d4c1
JS
1652 (void) input_get (); // swallow '*' already in c2
1653 c = input_get ();
63a7c90e 1654 c2 = input_get ();
bb2e3076 1655 while (c2 >= 0)
63a7c90e 1656 {
66c7d4c1
JS
1657 if (c == '*' && c2 == '/')
1658 break;
63a7c90e
FCE
1659 c = c2;
1660 c2 = input_get ();
63a7c90e 1661 }
fee28e5c 1662 ate_comment = true;
b5477cd9 1663 ate_whitespace = true;
bb2e3076 1664 goto skip;
63a7c90e 1665 }
54dfabe9
FCE
1666 else if (c == '%' && c2 == '{') // embedded code
1667 {
1668 n->type = tok_embedded;
1669 (void) input_get (); // swallow '{' already in c2
66c7d4c1
JS
1670 c = input_get ();
1671 c2 = input_get ();
1672 while (c2 >= 0)
54dfabe9 1673 {
66c7d4c1
JS
1674 if (c == '%' && c2 == '}')
1675 return n;
54dfabe9 1676 n->content += c;
66c7d4c1
JS
1677 c = c2;
1678 c2 = input_get ();
54dfabe9 1679 }
72cdb9cd 1680
16fc963f
SM
1681 n->make_junk(_("Could not find matching '%}' to close embedded function block"));
1682 return n;
54dfabe9 1683 }
2f1a1aea 1684
bb2e3076
FCE
1685 // We're committed to recognizing at least the first character
1686 // as an operator.
2f1a1aea 1687 n->type = tok_operator;
66c7d4c1 1688 n->content = c;
2f1a1aea 1689
bb2e3076 1690 // match all valid operators, in decreasing size order
66c7d4c1
JS
1691 if ((c == '<' && c2 == '<' && c3 == '<') ||
1692 (c == '<' && c2 == '<' && c3 == '=') ||
1693 (c == '>' && c2 == '>' && c3 == '='))
82919855 1694 {
66c7d4c1
JS
1695 n->content += c2;
1696 n->content += c3;
bb2e3076
FCE
1697 input_get (); input_get (); // swallow other two characters
1698 }
66c7d4c1
JS
1699 else if ((c == '=' && c2 == '=') ||
1700 (c == '!' && c2 == '=') ||
1701 (c == '<' && c2 == '=') ||
1702 (c == '>' && c2 == '=') ||
93daaca8
SM
1703 (c == '=' && c2 == '~') ||
1704 (c == '!' && c2 == '~') ||
66c7d4c1
JS
1705 (c == '+' && c2 == '=') ||
1706 (c == '-' && c2 == '=') ||
1707 (c == '*' && c2 == '=') ||
1708 (c == '/' && c2 == '=') ||
1709 (c == '%' && c2 == '=') ||
1710 (c == '&' && c2 == '=') ||
1711 (c == '^' && c2 == '=') ||
1712 (c == '|' && c2 == '=') ||
1713 (c == '.' && c2 == '=') ||
1714 (c == '&' && c2 == '&') ||
1715 (c == '|' && c2 == '|') ||
1716 (c == '+' && c2 == '+') ||
1717 (c == '-' && c2 == '-') ||
1718 (c == '-' && c2 == '>') ||
1719 (c == '<' && c2 == '<') ||
1720 (c == '>' && c2 == '>') ||
177a8ead 1721 // preprocessor tokens
66c7d4c1
JS
1722 (c == '%' && c2 == '(') ||
1723 (c == '%' && c2 == '?') ||
1724 (c == '%' && c2 == ':') ||
1725 (c == '%' && c2 == ')'))
bb2e3076 1726 {
66c7d4c1 1727 n->content += c2;
bb2e3076 1728 input_get (); // swallow other character
dff50e09 1729 }
2f1a1aea
FCE
1730
1731 return n;
1732 }
1733
1734 else
1735 {
1736 n->type = tok_junk;
e3795795
FCE
1737 ostringstream s;
1738 s << "\\x" << hex << setw(2) << setfill('0') << c;
1739 n->content = s.str();
16fc963f 1740 n->msg = ""; // signal parser to emit "expected X, found junk" type error
2f1a1aea
FCE
1741 return n;
1742 }
1743}
1744
16fc963f
SM
1745// ------------------------------------------------------------------------
1746
1747void
1748token::make_junk (const string new_msg)
1749{
1750 type = tok_junk;
1751 msg = new_msg;
1752}
2f1a1aea
FCE
1753
1754// ------------------------------------------------------------------------
1755
1756stapfile*
1757parser::parse ()
1758{
1759 stapfile* f = new stapfile;
1b1b4ceb 1760 input.set_current_file (f);
56099f08
FCE
1761
1762 bool empty = true;
1763
2f1a1aea
FCE
1764 while (1)
1765 {
1766 try
1767 {
a07a2c28 1768 systemtap_v_seen = 0;
2f1a1aea 1769 const token* t = peek ();
534aad8b 1770 if (! t) // nice clean EOF, modulo any preprocessing that occurred
2f1a1aea
FCE
1771 break;
1772
56099f08 1773 empty = false;
6e213f58
DS
1774 if (t->type == tok_keyword && t->content == "probe")
1775 {
1776 context = con_probe;
1777 parse_probe (f->probes, f->aliases);
1778 }
1779 else if (t->type == tok_keyword && t->content == "global")
1780 {
1781 context = con_global;
4b5f3e45 1782 parse_global (f->globals, f->probes);
6e213f58
DS
1783 }
1784 else if (t->type == tok_keyword && t->content == "function")
1785 {
1786 context = con_function;
1787 parse_functiondecl (f->functions);
1788 }
54dfabe9 1789 else if (t->type == tok_embedded)
6e213f58
DS
1790 {
1791 context = con_embedded;
1792 f->embeds.push_back (parse_embeddedcode ());
1793 }
2f1a1aea 1794 else
6e213f58
DS
1795 {
1796 context = con_unknown;
2677d2fb 1797 throw parse_error (_("expected 'probe', 'global', 'function', or '%{'"));
6e213f58 1798 }
2f1a1aea
FCE
1799 }
1800 catch (parse_error& pe)
1801 {
1802 print_error (pe);
16fc963f
SM
1803
1804 // XXX: do we want tok_junk to be able to force skip_some behaviour?
cd7116b8 1805 if (pe.skip_some) // for recovery
46954f1d
FCE
1806 // Quietly swallow all tokens until the next keyword we can start parsing from.
1807 while (1)
1808 try
1809 {
cd7116b8
FCE
1810 {
1811 const token* t = peek ();
1812 if (! t)
1813 break;
46954f1d
FCE
1814 if (t->type == tok_keyword && t->content == "probe") break;
1815 else if (t->type == tok_keyword && t->content == "global") break;
1816 else if (t->type == tok_keyword && t->content == "function") break;
1817 else if (t->type == tok_embedded) break;
731a5359 1818 swallow (); // swallow it
cd7116b8 1819 }
46954f1d
FCE
1820 }
1821 catch (parse_error& pe2)
1822 {
1823 // parse error during recovery ... ugh
1824 print_error (pe2);
1825 }
177a8ead 1826 }
2f1a1aea
FCE
1827 }
1828
56099f08
FCE
1829 if (empty)
1830 {
534aad8b
SM
1831 // vary message depending on whether file was *actually* empty:
1832 cerr << (input.saw_tokens
1833 ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
4cd32d8c 1834 : _F("Input file '%s' is empty.", input_name.c_str()))
534aad8b 1835 << endl;
56099f08 1836 delete f;
2203b032 1837 f = 0;
56099f08
FCE
1838 }
1839 else if (num_errors > 0)
2f1a1aea 1840 {
52c2652f 1841 cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
2f1a1aea 1842 delete f;
2203b032 1843 f = 0;
2f1a1aea 1844 }
dff50e09 1845
2203b032 1846 input.set_current_file(0);
2f1a1aea
FCE
1847 return f;
1848}
1849
1850
20c6c071 1851void
54dfabe9
FCE
1852parser::parse_probe (std::vector<probe *> & probe_ret,
1853 std::vector<probe_alias *> & alias_ret)
2f1a1aea 1854{
82919855 1855 const token* t0 = next ();
6e213f58 1856 if (! (t0->type == tok_keyword && t0->content == "probe"))
2677d2fb 1857 throw parse_error (_("expected 'probe'"));
82919855 1858
20c6c071
GH
1859 vector<probe_point *> aliases;
1860 vector<probe_point *> locations;
1861
1862 bool equals_ok = true;
82919855 1863
97266278
LG
1864 int epilogue_alias = 0;
1865
2f1a1aea
FCE
1866 while (1)
1867 {
b4ceace2 1868 probe_point * pp = parse_probe_point ();
dff50e09 1869
b4ceace2 1870 const token* t = peek ();
dff50e09 1871 if (equals_ok && t
b4ceace2
FCE
1872 && t->type == tok_operator && t->content == "=")
1873 {
1ad820e3 1874 if (pp->optional || pp->sufficient)
2677d2fb 1875 throw parse_error (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
b4ceace2 1876 aliases.push_back(pp);
731a5359 1877 swallow ();
b4ceace2
FCE
1878 continue;
1879 }
dff50e09 1880 else if (equals_ok && t
97266278
LG
1881 && t->type == tok_operator && t->content == "+=")
1882 {
1ad820e3 1883 if (pp->optional || pp->sufficient)
2677d2fb 1884 throw parse_error (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
97266278
LG
1885 aliases.push_back(pp);
1886 epilogue_alias = 1;
731a5359 1887 swallow ();
97266278
LG
1888 continue;
1889 }
b4ceace2
FCE
1890 else if (t && t->type == tok_operator && t->content == ",")
1891 {
1892 locations.push_back(pp);
1893 equals_ok = false;
731a5359 1894 swallow ();
b4ceace2
FCE
1895 continue;
1896 }
1897 else if (t && t->type == tok_operator && t->content == "{")
1898 {
1899 locations.push_back(pp);
1900 break;
1901 }
2f1a1aea 1902 else
2677d2fb 1903 throw parse_error (_("expected probe point specifier"));
2f1a1aea 1904 }
20c6c071 1905
20c6c071
GH
1906 if (aliases.empty())
1907 {
54dfabe9
FCE
1908 probe* p = new probe;
1909 p->tok = t0;
1910 p->locations = locations;
1911 p->body = parse_stmt_block ();
37ebca01 1912 p->privileged = privileged;
a07a2c28 1913 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 1914 probe_ret.push_back (p);
20c6c071
GH
1915 }
1916 else
1917 {
54dfabe9 1918 probe_alias* p = new probe_alias (aliases);
97266278
LG
1919 if(epilogue_alias)
1920 p->epilogue_style = true;
1921 else
1922 p->epilogue_style = false;
54dfabe9
FCE
1923 p->tok = t0;
1924 p->locations = locations;
1925 p->body = parse_stmt_block ();
37ebca01 1926 p->privileged = privileged;
a07a2c28 1927 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 1928 alias_ret.push_back (p);
20c6c071 1929 }
54dfabe9 1930}
20c6c071 1931
54dfabe9
FCE
1932
1933embeddedcode*
1934parser::parse_embeddedcode ()
1935{
1936 embeddedcode* e = new embeddedcode;
1937 const token* t = next ();
1938 if (t->type != tok_embedded)
2677d2fb 1939 throw parse_error (_("expected '%{'"));
24cb178f
FCE
1940
1941 if (! privileged)
efb02738 1942 throw parse_error (_("embedded code in unprivileged script; need stap -g"),
cd7116b8 1943 false /* don't skip tokens for parse resumption */);
54dfabe9
FCE
1944
1945 e->tok = t;
1946 e->code = t->content;
1947 return e;
2f1a1aea
FCE
1948}
1949
1950
1951block*
56099f08 1952parser::parse_stmt_block ()
2f1a1aea
FCE
1953{
1954 block* pb = new block;
1955
56099f08
FCE
1956 const token* t = next ();
1957 if (! (t->type == tok_operator && t->content == "{"))
2677d2fb 1958 throw parse_error (_("expected '{'"));
56099f08
FCE
1959
1960 pb->tok = t;
2b066ec1 1961
2f1a1aea
FCE
1962 while (1)
1963 {
46954f1d
FCE
1964 t = peek ();
1965 if (t && t->type == tok_operator && t->content == "}")
1966 {
731a5359 1967 swallow ();
46954f1d
FCE
1968 break;
1969 }
1970 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
1971 }
1972
1973 return pb;
1974}
1975
1976
f4fe2e93
FCE
1977try_block*
1978parser::parse_try_block ()
1979{
1980 try_block* pb = new try_block;
1981
731a5359 1982 pb->tok = expect_kw_token ("try");
f4fe2e93
FCE
1983 pb->try_block = parse_stmt_block();
1984 expect_kw ("catch");
1985
1986 const token* t = peek ();
3819d181 1987 if (t != NULL && t->type == tok_operator && t->content == "(")
f4fe2e93 1988 {
731a5359 1989 swallow (); // swallow the '('
f4fe2e93
FCE
1990
1991 t = next();
1992 if (! (t->type == tok_identifier))
2677d2fb 1993 throw parse_error (_("expected identifier"));
f4fe2e93
FCE
1994 symbol* sym = new symbol;
1995 sym->tok = t;
1996 sym->name = t->content;
1997 pb->catch_error_var = sym;
1998
1999 expect_op (")");
2000 }
2001 else
2002 pb->catch_error_var = 0;
2003
2004 pb->catch_block = parse_stmt_block();
2005
2006 return pb;
2007}
2008
2009
2010
2f1a1aea
FCE
2011statement*
2012parser::parse_statement ()
2013{
40b71c47 2014 statement *ret;
2f1a1aea
FCE
2015 const token* t = peek ();
2016 if (t && t->type == tok_operator && t->content == ";")
f946b10f 2017 return new null_statement (next ());
dff50e09 2018 else if (t && t->type == tok_operator && t->content == "{")
40b71c47 2019 return parse_stmt_block (); // Don't squash semicolons.
f4fe2e93
FCE
2020 else if (t && t->type == tok_keyword && t->content == "try")
2021 return parse_try_block (); // Don't squash semicolons.
6e213f58 2022 else if (t && t->type == tok_keyword && t->content == "if")
40b71c47 2023 return parse_if_statement (); // Don't squash semicolons.
6e213f58 2024 else if (t && t->type == tok_keyword && t->content == "for")
40b71c47 2025 return parse_for_loop (); // Don't squash semicolons.
6e213f58 2026 else if (t && t->type == tok_keyword && t->content == "foreach")
40b71c47
MW
2027 return parse_foreach_loop (); // Don't squash semicolons.
2028 else if (t && t->type == tok_keyword && t->content == "while")
2029 return parse_while_loop (); // Don't squash semicolons.
6e213f58 2030 else if (t && t->type == tok_keyword && t->content == "return")
40b71c47 2031 ret = parse_return_statement ();
6e213f58 2032 else if (t && t->type == tok_keyword && t->content == "delete")
40b71c47 2033 ret = parse_delete_statement ();
6e213f58 2034 else if (t && t->type == tok_keyword && t->content == "break")
40b71c47 2035 ret = parse_break_statement ();
6e213f58 2036 else if (t && t->type == tok_keyword && t->content == "continue")
40b71c47 2037 ret = parse_continue_statement ();
6e213f58 2038 else if (t && t->type == tok_keyword && t->content == "next")
40b71c47 2039 ret = parse_next_statement ();
2f1a1aea
FCE
2040 else if (t && (t->type == tok_operator || // expressions are flexible
2041 t->type == tok_identifier ||
2042 t->type == tok_number ||
7d902887
FCE
2043 t->type == tok_string ||
2044 t->type == tok_embedded ))
40b71c47 2045 ret = parse_expr_statement ();
54dfabe9 2046 // XXX: consider generally accepting tok_embedded here too
2f1a1aea 2047 else
2677d2fb 2048 throw parse_error (_("expected statement"));
40b71c47
MW
2049
2050 // Squash "empty" trailing colons after any "non-block-like" statement.
2051 t = peek ();
2052 if (t && t->type == tok_operator && t->content == ";")
2053 {
731a5359 2054 swallow (); // Silently eat trailing ; after statement
40b71c47
MW
2055 }
2056
2057 return ret;
2f1a1aea
FCE
2058}
2059
2060
56099f08 2061void
78f6bba6 2062parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
2f1a1aea 2063{
82919855 2064 const token* t0 = next ();
6e213f58 2065 if (! (t0->type == tok_keyword && t0->content == "global"))
2677d2fb 2066 throw parse_error (_("expected 'global'"));
731a5359 2067 swallow ();
82919855 2068
56099f08
FCE
2069 while (1)
2070 {
2071 const token* t = next ();
2072 if (! (t->type == tok_identifier))
2677d2fb 2073 throw parse_error (_("expected identifier"));
56099f08 2074
2b066ec1
FCE
2075 for (unsigned i=0; i<globals.size(); i++)
2076 if (globals[i]->name == t->content)
2677d2fb 2077 throw parse_error (_("duplicate global name"));
dff50e09 2078
24cb178f
FCE
2079 vardecl* d = new vardecl;
2080 d->name = t->content;
2081 d->tok = t;
a07a2c28 2082 d->systemtap_v_conditional = systemtap_v_seen;
24cb178f 2083 globals.push_back (d);
56099f08 2084
82919855 2085 t = peek ();
ef474d24 2086
74e6cc92
CM
2087 if(t && t->type == tok_operator && t->content == "%") //wrapping
2088 {
2089 d->wrap = true;
731a5359 2090 swallow ();
74e6cc92
CM
2091 t = peek();
2092 }
2093
ef474d24
JS
2094 if (t && t->type == tok_operator && t->content == "[") // array size
2095 {
2096 int64_t size;
731a5359 2097 swallow ();
ef474d24
JS
2098 expect_number(size);
2099 if (size <= 0 || size > 1000000) // arbitrary max
2677d2fb 2100 throw parse_error(_("array size out of range"));
ef474d24
JS
2101 d->maxsize = (int)size;
2102 expect_known(tok_operator, "]");
2103 t = peek ();
2104 }
2105
4b5f3e45 2106 if (t && t->type == tok_operator && t->content == "=") // initialization
ef474d24
JS
2107 {
2108 if (!d->compatible_arity(0))
2677d2fb 2109 throw parse_error(_("only scalar globals can be initialized"));
58701b78 2110 d->set_arity(0, t);
731a5359 2111 next (); // Don't swallow, set_arity() used the peeked token.
ef474d24
JS
2112 d->init = parse_literal ();
2113 d->type = d->init->type;
2114 t = peek ();
2115 }
4b5f3e45 2116
c3799d72 2117 if (t && t->type == tok_operator && t->content == ";") // termination
950da622 2118 {
731a5359 2119 swallow ();
950da622
MW
2120 break;
2121 }
c3799d72 2122
4b5f3e45 2123 if (t && t->type == tok_operator && t->content == ",") // next global
82919855 2124 {
731a5359 2125 swallow ();
82919855
FCE
2126 continue;
2127 }
56099f08 2128 else
82919855 2129 break;
56099f08
FCE
2130 }
2131}
2132
2133
24cb178f
FCE
2134void
2135parser::parse_functiondecl (std::vector<functiondecl*>& functions)
56099f08 2136{
82919855 2137 const token* t = next ();
6e213f58 2138 if (! (t->type == tok_keyword && t->content == "function"))
2677d2fb 2139 throw parse_error (_("expected 'function'"));
731a5359 2140 swallow ();
56099f08 2141
82919855 2142 t = next ();
6e213f58
DS
2143 if (! (t->type == tok_identifier)
2144 && ! (t->type == tok_keyword
2145 && (t->content == "string" || t->content == "long")))
2677d2fb 2146 throw parse_error (_("expected identifier"));
24cb178f
FCE
2147
2148 for (unsigned i=0; i<functions.size(); i++)
2149 if (functions[i]->name == t->content)
2677d2fb 2150 throw parse_error (_("duplicate function name"));
24cb178f
FCE
2151
2152 functiondecl *fd = new functiondecl ();
56099f08
FCE
2153 fd->name = t->content;
2154 fd->tok = t;
2155
2156 t = next ();
6a505121
FCE
2157 if (t->type == tok_operator && t->content == ":")
2158 {
731a5359 2159 swallow ();
6a505121 2160 t = next ();
6e213f58 2161 if (t->type == tok_keyword && t->content == "string")
6a505121 2162 fd->type = pe_string;
6e213f58 2163 else if (t->type == tok_keyword && t->content == "long")
6a505121 2164 fd->type = pe_long;
2677d2fb 2165 else throw parse_error (_("expected 'string' or 'long'"));
731a5359 2166 swallow ();
6a505121
FCE
2167
2168 t = next ();
2169 }
2170
56099f08 2171 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2172 throw parse_error (_("expected '('"));
731a5359 2173 swallow ();
56099f08
FCE
2174
2175 while (1)
2176 {
2177 t = next ();
2178
100a540e 2179 // permit zero-argument functions
56099f08 2180 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2181 {
2182 swallow ();
2183 break;
2184 }
56099f08 2185 else if (! (t->type == tok_identifier))
2677d2fb 2186 throw parse_error (_("expected identifier"));
56099f08
FCE
2187 vardecl* vd = new vardecl;
2188 vd->name = t->content;
2189 vd->tok = t;
2190 fd->formal_args.push_back (vd);
a07a2c28 2191 fd->systemtap_v_conditional = systemtap_v_seen;
56099f08
FCE
2192
2193 t = next ();
6a505121
FCE
2194 if (t->type == tok_operator && t->content == ":")
2195 {
731a5359 2196 swallow ();
6a505121 2197 t = next ();
6e213f58 2198 if (t->type == tok_keyword && t->content == "string")
6a505121 2199 vd->type = pe_string;
6e213f58 2200 else if (t->type == tok_keyword && t->content == "long")
6a505121 2201 vd->type = pe_long;
2677d2fb 2202 else throw parse_error (_("expected 'string' or 'long'"));
731a5359 2203 swallow ();
6a505121
FCE
2204 t = next ();
2205 }
56099f08 2206 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2207 {
2208 swallow ();
2209 break;
2210 }
56099f08 2211 if (t->type == tok_operator && t->content == ",")
731a5359
MW
2212 {
2213 swallow ();
2214 continue;
2215 }
56099f08 2216 else
2677d2fb 2217 throw parse_error (_("expected ',' or ')'"));
56099f08
FCE
2218 }
2219
54dfabe9
FCE
2220 t = peek ();
2221 if (t && t->type == tok_embedded)
2222 fd->body = parse_embeddedcode ();
2223 else
2224 fd->body = parse_stmt_block ();
24cb178f
FCE
2225
2226 functions.push_back (fd);
2f1a1aea
FCE
2227}
2228
2229
9c0c0e46
FCE
2230probe_point*
2231parser::parse_probe_point ()
2f1a1aea 2232{
9c0c0e46 2233 probe_point* pl = new probe_point;
2f1a1aea 2234
9c0c0e46 2235 while (1)
2f1a1aea 2236 {
b5477cd9 2237 const token* t = next ();
6e213f58
DS
2238 if (! (t->type == tok_identifier
2239 // we must allow ".return" and ".function", which are keywords
b5477cd9
SM
2240 || t->type == tok_keyword
2241 // we must allow "*", due to being an operator
2242 || (t->type == tok_operator && t->content == "*")))
2677d2fb 2243 throw parse_error (_("expected identifier or '*'"));
9c0c0e46 2244
b5477cd9
SM
2245 // loop which reconstitutes an identifier with wildcards
2246 string content = t->content;
2247 while (1)
2248 {
2249 const token* u = peek();
3819d181
MW
2250 if (u == NULL)
2251 break;
b5477cd9
SM
2252 // ensure pieces of the identifier are adjacent:
2253 if (input.ate_whitespace)
2254 break;
2255 // ensure pieces of the identifier are valid:
2256 if (! (u->type == tok_identifier
2257 // we must allow arbitrary keywords with a wildcard
2258 || u->type == tok_keyword
2259 // we must allow "*", due to being an operator
2260 || (u->type == tok_operator && u->content == "*")))
2261 break;
2262
2263 // append u to t
2264 content = content + u->content;
2265
2266 // consume u
731a5359 2267 swallow ();
b5477cd9 2268 }
534aad8b
SM
2269 // get around const-ness of t:
2270 token* new_t = new token(*t);
b5477cd9
SM
2271 new_t->content = content;
2272 delete t; t = new_t;
9c0c0e46
FCE
2273
2274 probe_point::component* c = new probe_point::component;
2275 c->functor = t->content;
f1a0157a 2276 c->tok = t;
9c0c0e46 2277 pl->components.push_back (c);
6e3347a9 2278 // NB we may add c->arg soon
9c0c0e46
FCE
2279
2280 t = peek ();
a477f3f1 2281
6e3347a9 2282 // consume optional parameter
9c0c0e46
FCE
2283 if (t && t->type == tok_operator && t->content == "(")
2284 {
731a5359 2285 swallow (); // consume "("
9c0c0e46
FCE
2286 c->arg = parse_literal ();
2287
2288 t = next ();
2289 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 2290 throw parse_error (_("expected ')'"));
731a5359 2291 swallow ();
9c0c0e46
FCE
2292
2293 t = peek ();
9c0c0e46 2294 }
9c0c0e46
FCE
2295
2296 if (t && t->type == tok_operator && t->content == ".")
6e3347a9 2297 {
731a5359 2298 swallow ();
6e3347a9
FCE
2299 continue;
2300 }
2301
f1a0157a 2302 // We only fall through here at the end of a probe point (past
6e3347a9
FCE
2303 // all the dotted/parametrized components).
2304
d898100a
FCE
2305 if (t && t->type == tok_operator &&
2306 (t->content == "?" || t->content == "!"))
6e3347a9
FCE
2307 {
2308 pl->optional = true;
d898100a
FCE
2309 if (t->content == "!") pl->sufficient = true;
2310 // NB: sufficient implies optional
731a5359 2311 swallow ();
6e3347a9
FCE
2312 t = peek ();
2313 // fall through
cbbe8080
MH
2314 }
2315
2316 if (t && t->type == tok_keyword && t->content == "if")
2317 {
731a5359 2318 swallow ();
cbbe8080 2319 t = peek ();
00917a8a 2320 if (!(t && t->type == tok_operator && t->content == "("))
2677d2fb 2321 throw parse_error (_("expected '('"));
731a5359 2322 swallow ();
cbbe8080
MH
2323
2324 pl->condition = parse_expression ();
2325
2326 t = peek ();
00917a8a 2327 if (!(t && t->type == tok_operator && t->content == ")"))
2677d2fb 2328 throw parse_error (_("expected ')'"));
731a5359 2329 swallow ();
cbbe8080
MH
2330 t = peek ();
2331 // fall through
6e3347a9
FCE
2332 }
2333
dff50e09 2334 if (t && t->type == tok_operator
6e3347a9
FCE
2335 && (t->content == "{" || t->content == "," ||
2336 t->content == "=" || t->content == "+=" ))
2337 break;
dff50e09 2338
2677d2fb 2339 throw parse_error (_("expected one of '. , ( ? ! { = +='"));
2f1a1aea
FCE
2340 }
2341
2342 return pl;
2343}
2344
2345
d24f1ff4
SM
2346literal_string*
2347parser::consume_string_literals(const token *t)
2348{
2349 literal_string *ls = new literal_string (t->content);
2350
2351 // PR11208: check if the next token is also a string literal;
2352 // auto-concatenate it. This is complicated to the extent that we
2353 // need to skip intermediate whitespace.
2354 //
2355 // NB for versions prior to 2.0: but don't skip over intervening comments
2356 const token *n = peek();
2357 while (n != NULL && n->type == tok_string
2358 && ! (strverscmp(session.compatible.c_str(), "2.0") < 0
2359 && input.ate_comment))
2360 {
2361 ls->value.append(next()->content); // consume and append the token
2362 n = peek();
2363 }
2364 return ls;
2365}
2366
2367
2368// Parse a string literal and perform backslash escaping on the contents:
2369literal_string*
2370parser::parse_literal_string ()
2371{
2372 const token* t = next ();
2373 literal_string* l;
2374 if (t->type == tok_string)
2375 l = consume_string_literals (t);
2376 else
6a420ae9 2377 throw parse_error (_("expected literal string"));
d24f1ff4
SM
2378
2379 l->tok = t;
2380 return l;
2381}
2382
2383
2f1a1aea
FCE
2384literal*
2385parser::parse_literal ()
2386{
2387 const token* t = next ();
56099f08 2388 literal* l;
2f1a1aea 2389 if (t->type == tok_string)
c5be7511 2390 {
d24f1ff4 2391 l = consume_string_literals (t);
c5be7511 2392 }
16e8f21f 2393 else
9c0c0e46 2394 {
16e8f21f
JS
2395 bool neg = false;
2396 if (t->type == tok_operator && t->content == "-")
2397 {
2398 neg = true;
731a5359 2399 swallow ();
16e8f21f
JS
2400 t = next ();
2401 }
2402
2403 if (t->type == tok_number)
2404 {
2405 const char* startp = t->content.c_str ();
2406 char* endp = (char*) startp;
2407
2408 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2409 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
79e6d33f
JS
2410 // since the lexer only gives us positive digit strings, but we'll
2411 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
16e8f21f
JS
2412 errno = 0;
2413 long long value = (long long) strtoull (startp, & endp, 0);
16e8f21f 2414 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
79e6d33f 2415 || (neg && (unsigned long long) value > 9223372036854775808ULL)
16e8f21f
JS
2416 || (unsigned long long) value > 18446744073709551615ULL
2417 || value < -9223372036854775807LL-1)
2677d2fb 2418 throw parse_error (_("number invalid or out of range"));
16e8f21f 2419
79e6d33f
JS
2420 if (neg)
2421 value = -value;
2422
16e8f21f
JS
2423 l = new literal_number (value);
2424 }
2425 else
2677d2fb 2426 throw parse_error (_("expected literal string or number"));
9c0c0e46 2427 }
56099f08
FCE
2428
2429 l->tok = t;
2430 return l;
2f1a1aea
FCE
2431}
2432
2433
2434if_statement*
2435parser::parse_if_statement ()
2436{
2437 const token* t = next ();
6e213f58 2438 if (! (t->type == tok_keyword && t->content == "if"))
2677d2fb 2439 throw parse_error (_("expected 'if'"));
56099f08
FCE
2440 if_statement* s = new if_statement;
2441 s->tok = t;
2442
2443 t = next ();
2f1a1aea 2444 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2445 throw parse_error (_("expected '('"));
731a5359 2446 swallow ();
2f1a1aea 2447
2f1a1aea
FCE
2448 s->condition = parse_expression ();
2449
2450 t = next ();
2451 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 2452 throw parse_error (_("expected ')'"));
731a5359 2453 swallow ();
2f1a1aea
FCE
2454
2455 s->thenblock = parse_statement ();
2456
2457 t = peek ();
6e213f58 2458 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea 2459 {
731a5359 2460 swallow ();
2f1a1aea
FCE
2461 s->elseblock = parse_statement ();
2462 }
ed10c639
FCE
2463 else
2464 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
2465
2466 return s;
2467}
2468
2469
69c68955
FCE
2470expr_statement*
2471parser::parse_expr_statement ()
2472{
2473 expr_statement *es = new expr_statement;
2474 const token* t = peek ();
5e58d11c
MW
2475 if (t == NULL)
2476 throw parse_error (_("expression statement expected"));
731a5359
MW
2477 // Copy, we only peeked, parse_expression might swallow.
2478 es->tok = new token (*t);
69c68955
FCE
2479 es->value = parse_expression ();
2480 return es;
2481}
2482
2483
56099f08
FCE
2484return_statement*
2485parser::parse_return_statement ()
2486{
2487 const token* t = next ();
6e213f58 2488 if (! (t->type == tok_keyword && t->content == "return"))
2677d2fb 2489 throw parse_error (_("expected 'return'"));
6e213f58 2490 if (context != con_function)
2677d2fb 2491 throw parse_error (_("found 'return' not in function context"));
56099f08
FCE
2492 return_statement* s = new return_statement;
2493 s->tok = t;
2494 s->value = parse_expression ();
2495 return s;
2496}
2497
2498
2499delete_statement*
2500parser::parse_delete_statement ()
2501{
2502 const token* t = next ();
6e213f58 2503 if (! (t->type == tok_keyword && t->content == "delete"))
2677d2fb 2504 throw parse_error (_("expected 'delete'"));
56099f08
FCE
2505 delete_statement* s = new delete_statement;
2506 s->tok = t;
2507 s->value = parse_expression ();
2508 return s;
2509}
2510
2511
f3c26ea5
FCE
2512next_statement*
2513parser::parse_next_statement ()
2514{
2515 const token* t = next ();
6e213f58 2516 if (! (t->type == tok_keyword && t->content == "next"))
2677d2fb 2517 throw parse_error (_("expected 'next'"));
6e213f58 2518 if (context != con_probe)
2677d2fb 2519 throw parse_error (_("found 'next' not in probe context"));
f3c26ea5
FCE
2520 next_statement* s = new next_statement;
2521 s->tok = t;
2522 return s;
2523}
2524
2525
2526break_statement*
2527parser::parse_break_statement ()
2528{
2529 const token* t = next ();
6e213f58 2530 if (! (t->type == tok_keyword && t->content == "break"))
2677d2fb 2531 throw parse_error (_("expected 'break'"));
f3c26ea5
FCE
2532 break_statement* s = new break_statement;
2533 s->tok = t;
2534 return s;
2535}
2536
2537
2538continue_statement*
2539parser::parse_continue_statement ()
2540{
2541 const token* t = next ();
6e213f58 2542 if (! (t->type == tok_keyword && t->content == "continue"))
2677d2fb 2543 throw parse_error (_("expected 'continue'"));
f3c26ea5
FCE
2544 continue_statement* s = new continue_statement;
2545 s->tok = t;
2546 return s;
2547}
2548
2549
69c68955
FCE
2550for_loop*
2551parser::parse_for_loop ()
2552{
f3c26ea5 2553 const token* t = next ();
6e213f58 2554 if (! (t->type == tok_keyword && t->content == "for"))
2677d2fb 2555 throw parse_error (_("expected 'for'"));
f3c26ea5
FCE
2556 for_loop* s = new for_loop;
2557 s->tok = t;
2558
2559 t = next ();
2560 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2561 throw parse_error (_("expected '('"));
731a5359 2562 swallow ();
f3c26ea5
FCE
2563
2564 // initializer + ";"
2565 t = peek ();
2566 if (t && t->type == tok_operator && t->content == ";")
2567 {
cbfbbf69 2568 s->init = 0;
731a5359 2569 swallow ();
f3c26ea5
FCE
2570 }
2571 else
2572 {
2573 s->init = parse_expr_statement ();
2574 t = next ();
2575 if (! (t->type == tok_operator && t->content == ";"))
2677d2fb 2576 throw parse_error (_("expected ';'"));
731a5359 2577 swallow ();
f3c26ea5
FCE
2578 }
2579
2580 // condition + ";"
2581 t = peek ();
2582 if (t && t->type == tok_operator && t->content == ";")
2583 {
2584 literal_number* l = new literal_number(1);
2585 s->cond = l;
2586 s->cond->tok = next ();
2587 }
2588 else
2589 {
2590 s->cond = parse_expression ();
2591 t = next ();
2592 if (! (t->type == tok_operator && t->content == ";"))
2677d2fb 2593 throw parse_error (_("expected ';'"));
731a5359 2594 swallow ();
f3c26ea5 2595 }
dff50e09 2596
f3c26ea5
FCE
2597 // increment + ")"
2598 t = peek ();
2599 if (t && t->type == tok_operator && t->content == ")")
2600 {
cbfbbf69 2601 s->incr = 0;
731a5359 2602 swallow ();
f3c26ea5
FCE
2603 }
2604 else
2605 {
2606 s->incr = parse_expr_statement ();
2607 t = next ();
2608 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 2609 throw parse_error (_("expected ')'"));
731a5359 2610 swallow ();
f3c26ea5
FCE
2611 }
2612
2613 // block
2614 s->block = parse_statement ();
2615
2616 return s;
2617}
2618
2619
2620for_loop*
2621parser::parse_while_loop ()
2622{
2623 const token* t = next ();
6e213f58 2624 if (! (t->type == tok_keyword && t->content == "while"))
2677d2fb 2625 throw parse_error (_("expected 'while'"));
f3c26ea5
FCE
2626 for_loop* s = new for_loop;
2627 s->tok = t;
2628
2629 t = next ();
2630 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2631 throw parse_error (_("expected '('"));
731a5359 2632 swallow ();
f3c26ea5
FCE
2633
2634 // dummy init and incr fields
cbfbbf69
FCE
2635 s->init = 0;
2636 s->incr = 0;
f3c26ea5
FCE
2637
2638 // condition
2639 s->cond = parse_expression ();
2640
f3c26ea5
FCE
2641 t = next ();
2642 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 2643 throw parse_error (_("expected ')'"));
731a5359 2644 swallow ();
dff50e09 2645
f3c26ea5
FCE
2646 // block
2647 s->block = parse_statement ();
2648
2649 return s;
69c68955
FCE
2650}
2651
2652
2653foreach_loop*
2654parser::parse_foreach_loop ()
2655{
2656 const token* t = next ();
6e213f58 2657 if (! (t->type == tok_keyword && t->content == "foreach"))
2677d2fb 2658 throw parse_error (_("expected 'foreach'"));
69c68955
FCE
2659 foreach_loop* s = new foreach_loop;
2660 s->tok = t;
93484556 2661 s->sort_direction = 0;
fd5689dc 2662 s->sort_aggr = sc_none;
c261711d 2663 s->value = NULL;
27f21e8c 2664 s->limit = NULL;
69c68955
FCE
2665
2666 t = next ();
2667 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2668 throw parse_error (_("expected '('"));
731a5359 2669 swallow ();
69c68955 2670
c261711d
JS
2671 symbol* lookahead_sym = NULL;
2672 int lookahead_sort = 0;
2673
2674 t = peek ();
2675 if (t && t->type == tok_identifier)
2676 {
2677 next ();
2678 lookahead_sym = new symbol;
2679 lookahead_sym->tok = t;
2680 lookahead_sym->name = t->content;
2681
2682 t = peek ();
2683 if (t && t->type == tok_operator &&
2684 (t->content == "+" || t->content == "-"))
2685 {
c261711d 2686 lookahead_sort = (t->content == "+") ? 1 : -1;
731a5359 2687 swallow ();
c261711d
JS
2688 }
2689
2690 t = peek ();
2691 if (t && t->type == tok_operator && t->content == "=")
2692 {
731a5359 2693 swallow ();
c261711d
JS
2694 s->value = lookahead_sym;
2695 if (lookahead_sort)
2696 {
2697 s->sort_direction = lookahead_sort;
2698 s->sort_column = 0;
2699 }
2700 lookahead_sym = NULL;
2701 }
2702 }
2703
69c68955
FCE
2704 // see also parse_array_in
2705
2706 bool parenthesized = false;
2707 t = peek ();
c261711d 2708 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
69c68955 2709 {
731a5359 2710 swallow ();
69c68955
FCE
2711 parenthesized = true;
2712 }
2713
c261711d
JS
2714 if (lookahead_sym)
2715 {
2716 s->indexes.push_back (lookahead_sym);
2717 if (lookahead_sort)
2718 {
2719 s->sort_direction = lookahead_sort;
2720 s->sort_column = 1;
2721 }
2722 lookahead_sym = NULL;
2723 }
2724 else while (1)
69c68955
FCE
2725 {
2726 t = next ();
2727 if (! (t->type == tok_identifier))
2677d2fb 2728 throw parse_error (_("expected identifier"));
69c68955
FCE
2729 symbol* sym = new symbol;
2730 sym->tok = t;
2731 sym->name = t->content;
2732 s->indexes.push_back (sym);
2733
93484556
FCE
2734 t = peek ();
2735 if (t && t->type == tok_operator &&
2736 (t->content == "+" || t->content == "-"))
2737 {
2738 if (s->sort_direction)
2677d2fb 2739 throw parse_error (_("multiple sort directives"));
93484556
FCE
2740 s->sort_direction = (t->content == "+") ? 1 : -1;
2741 s->sort_column = s->indexes.size();
731a5359 2742 swallow ();
93484556
FCE
2743 }
2744
69c68955
FCE
2745 if (parenthesized)
2746 {
93484556 2747 t = peek ();
69c68955
FCE
2748 if (t && t->type == tok_operator && t->content == ",")
2749 {
731a5359 2750 swallow ();
69c68955
FCE
2751 continue;
2752 }
2753 else if (t && t->type == tok_operator && t->content == "]")
2754 {
731a5359 2755 swallow ();
69c68955
FCE
2756 break;
2757 }
dff50e09 2758 else
2677d2fb 2759 throw parse_error (_("expected ',' or ']'"));
69c68955
FCE
2760 }
2761 else
2762 break; // expecting only one expression
2763 }
2764
2765 t = next ();
6e213f58 2766 if (! (t->type == tok_keyword && t->content == "in"))
2677d2fb 2767 throw parse_error (_("expected 'in'"));
731a5359 2768 swallow ();
dff50e09 2769
d02548c0 2770 s->base = parse_indexable();
69c68955 2771
fd5689dc
FCE
2772 // check for atword, see also expect_ident_or_atword,
2773 t = peek ();
2774 if (t && t->type == tok_operator && t->content[0] == '@')
2775 {
2776 if (t->content == "@avg") s->sort_aggr = sc_average;
2777 else if (t->content == "@min") s->sort_aggr = sc_min;
2778 else if (t->content == "@max") s->sort_aggr = sc_max;
2779 else if (t->content == "@count") s->sort_aggr = sc_count;
2780 else if (t->content == "@sum") s->sort_aggr = sc_sum;
2781 else throw parse_error(_("expected statistical operation"));
2782 swallow();
2783
2784 t = peek ();
2785 if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
2786 throw parse_error(_("expected sort directive"));
2787 }
2788
93484556
FCE
2789 t = peek ();
2790 if (t && t->type == tok_operator &&
2791 (t->content == "+" || t->content == "-"))
2792 {
2793 if (s->sort_direction)
2677d2fb 2794 throw parse_error (_("multiple sort directives"));
93484556
FCE
2795 s->sort_direction = (t->content == "+") ? 1 : -1;
2796 s->sort_column = 0;
731a5359 2797 swallow ();
93484556
FCE
2798 }
2799
27f21e8c
DS
2800 t = peek ();
2801 if (tok_is(t, tok_keyword, "limit"))
2802 {
731a5359 2803 swallow (); // get past the "limit"
27f21e8c
DS
2804 s->limit = parse_expression ();
2805 }
2806
69c68955
FCE
2807 t = next ();
2808 if (! (t->type == tok_operator && t->content == ")"))
2809 throw parse_error ("expected ')'");
731a5359 2810 swallow ();
69c68955
FCE
2811
2812 s->block = parse_statement ();
2813 return s;
2814}
2815
2816
2f1a1aea
FCE
2817expression*
2818parser::parse_expression ()
2819{
2820 return parse_assignment ();
2821}
2822
2f1a1aea
FCE
2823
2824expression*
2825parser::parse_assignment ()
2826{
2827 expression* op1 = parse_ternary ();
2828
2829 const token* t = peek ();
82919855 2830 // right-associative operators
dff50e09 2831 if (t && t->type == tok_operator
2f1a1aea 2832 && (t->content == "=" ||
82919855 2833 t->content == "<<<" ||
2f1a1aea 2834 t->content == "+=" ||
bb2e3076
FCE
2835 t->content == "-=" ||
2836 t->content == "*=" ||
2837 t->content == "/=" ||
2838 t->content == "%=" ||
2839 t->content == "<<=" ||
2840 t->content == ">>=" ||
2841 t->content == "&=" ||
2842 t->content == "^=" ||
2843 t->content == "|=" ||
d5d7c2cc 2844 t->content == ".=" ||
dff50e09 2845 false))
2f1a1aea 2846 {
bb2e3076 2847 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 2848 assignment* e = new assignment;
56099f08 2849 e->left = op1;
2f1a1aea 2850 e->op = t->content;
56099f08 2851 e->tok = t;
2f1a1aea 2852 next ();
82919855 2853 e->right = parse_expression ();
56099f08 2854 op1 = e;
2f1a1aea 2855 }
56099f08
FCE
2856
2857 return op1;
2f1a1aea
FCE
2858}
2859
2860
2861expression*
2862parser::parse_ternary ()
2863{
2864 expression* op1 = parse_logical_or ();
2865
2866 const token* t = peek ();
2867 if (t && t->type == tok_operator && t->content == "?")
2868 {
2f1a1aea 2869 ternary_expression* e = new ternary_expression;
56099f08 2870 e->tok = t;
2f1a1aea 2871 e->cond = op1;
56099f08
FCE
2872 next ();
2873 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
2874
2875 t = next ();
2876 if (! (t->type == tok_operator && t->content == ":"))
2677d2fb 2877 throw parse_error (_("expected ':'"));
731a5359 2878 swallow ();
2f1a1aea 2879
56099f08 2880 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
2881 return e;
2882 }
2883 else
2884 return op1;
2885}
2886
2887
2888expression*
2889parser::parse_logical_or ()
2890{
2891 expression* op1 = parse_logical_and ();
dff50e09 2892
2f1a1aea 2893 const token* t = peek ();
56099f08 2894 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 2895 {
2f1a1aea 2896 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
2897 e->tok = t;
2898 e->op = t->content;
2f1a1aea 2899 e->left = op1;
56099f08
FCE
2900 next ();
2901 e->right = parse_logical_and ();
2902 op1 = e;
2903 t = peek ();
2f1a1aea 2904 }
56099f08
FCE
2905
2906 return op1;
2f1a1aea
FCE
2907}
2908
2909
2910expression*
2911parser::parse_logical_and ()
2912{
bb2e3076 2913 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
2914
2915 const token* t = peek ();
56099f08 2916 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 2917 {
2f1a1aea
FCE
2918 logical_and_expr *e = new logical_and_expr;
2919 e->left = op1;
56099f08
FCE
2920 e->op = t->content;
2921 e->tok = t;
2922 next ();
bb2e3076
FCE
2923 e->right = parse_boolean_or ();
2924 op1 = e;
2925 t = peek ();
2926 }
2927
2928 return op1;
2929}
2930
2931
2932expression*
2933parser::parse_boolean_or ()
2934{
2935 expression* op1 = parse_boolean_xor ();
2936
2937 const token* t = peek ();
2938 while (t && t->type == tok_operator && t->content == "|")
2939 {
2940 binary_expression* e = new binary_expression;
2941 e->left = op1;
2942 e->op = t->content;
2943 e->tok = t;
2944 next ();
2945 e->right = parse_boolean_xor ();
2946 op1 = e;
2947 t = peek ();
2948 }
2949
2950 return op1;
2951}
2952
2953
2954expression*
2955parser::parse_boolean_xor ()
2956{
2957 expression* op1 = parse_boolean_and ();
2958
2959 const token* t = peek ();
2960 while (t && t->type == tok_operator && t->content == "^")
2961 {
2962 binary_expression* e = new binary_expression;
2963 e->left = op1;
2964 e->op = t->content;
2965 e->tok = t;
2966 next ();
2967 e->right = parse_boolean_and ();
2968 op1 = e;
2969 t = peek ();
2970 }
2971
2972 return op1;
2973}
2974
2975
2976expression*
2977parser::parse_boolean_and ()
2978{
2979 expression* op1 = parse_array_in ();
2980
2981 const token* t = peek ();
2982 while (t && t->type == tok_operator && t->content == "&")
2983 {
2984 binary_expression* e = new binary_expression;
2985 e->left = op1;
2986 e->op = t->content;
2987 e->tok = t;
2988 next ();
56099f08
FCE
2989 e->right = parse_array_in ();
2990 op1 = e;
2991 t = peek ();
2f1a1aea 2992 }
56099f08
FCE
2993
2994 return op1;
2f1a1aea
FCE
2995}
2996
2997
2998expression*
2999parser::parse_array_in ()
3000{
ce10591c 3001 // This is a very tricky case. All these are legit expressions:
69c68955 3002 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
3003 vector<expression*> indexes;
3004 bool parenthesized = false;
2f1a1aea
FCE
3005
3006 const token* t = peek ();
69c68955 3007 if (t && t->type == tok_operator && t->content == "[")
ce10591c 3008 {
731a5359 3009 swallow ();
ce10591c
FCE
3010 parenthesized = true;
3011 }
3012
3013 while (1)
3014 {
93daaca8 3015 expression* op1 = parse_comparison_or_regex_query ();
ce10591c
FCE
3016 indexes.push_back (op1);
3017
3018 if (parenthesized)
3019 {
3020 const token* t = peek ();
3021 if (t && t->type == tok_operator && t->content == ",")
3022 {
731a5359 3023 swallow ();
ce10591c
FCE
3024 continue;
3025 }
69c68955 3026 else if (t && t->type == tok_operator && t->content == "]")
ce10591c 3027 {
731a5359 3028 swallow ();
ce10591c
FCE
3029 break;
3030 }
dff50e09 3031 else
2677d2fb 3032 throw parse_error (_("expected ',' or ']'"));
ce10591c
FCE
3033 }
3034 else
3035 break; // expecting only one expression
3036 }
3037
3038 t = peek ();
6e213f58 3039 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 3040 {
2f1a1aea 3041 array_in *e = new array_in;
56099f08 3042 e->tok = t;
731a5359 3043 next ();
ce10591c
FCE
3044
3045 arrayindex* a = new arrayindex;
3046 a->indexes = indexes;
d02548c0 3047 a->base = parse_indexable();
d15d767c 3048 a->tok = a->base->tok;
ce10591c 3049 e->operand = a;
2f1a1aea
FCE
3050 return e;
3051 }
ce10591c
FCE
3052 else if (indexes.size() == 1) // no "in" - need one expression only
3053 return indexes[0];
2f1a1aea 3054 else
2677d2fb 3055 throw parse_error (_("unexpected comma-separated expression list"));
2f1a1aea
FCE
3056}
3057
3058
3059expression*
93daaca8 3060parser::parse_comparison_or_regex_query ()
2f1a1aea 3061{
bb2e3076 3062 expression* op1 = parse_shift ();
2f1a1aea 3063
93daaca8
SM
3064 // TODOXXX for now, =~ is nonassociative
3065 // TODOXXX maybe instead a =~ b == c =~ d --> (a =~ b) == (c =~ d) ??
3066 const token *t = peek();
3067 if (t && t->type == tok_operator
3068 && (t->content == "=~" ||
3069 t->content == "!~"))
3070 {
3071 regex_query* r = new regex_query;
3072 r->left = op1;
3073 r->op = t->content;
3074 r->tok = t;
3075 next ();
d24f1ff4 3076 r->right = r->re = parse_literal_string();
93daaca8
SM
3077 op1 = r;
3078 t = peek ();
3079 }
3080 else while (t && t->type == tok_operator
553d27a5
FCE
3081 && (t->content == ">" ||
3082 t->content == "<" ||
3083 t->content == "==" ||
3084 t->content == "!=" ||
3085 t->content == "<=" ||
bb2e3076 3086 t->content == ">="))
2f1a1aea
FCE
3087 {
3088 comparison* e = new comparison;
3089 e->left = op1;
3090 e->op = t->content;
56099f08 3091 e->tok = t;
2f1a1aea 3092 next ();
bb2e3076
FCE
3093 e->right = parse_shift ();
3094 op1 = e;
3095 t = peek ();
3096 }
3097
3098 return op1;
3099}
3100
3101
3102expression*
3103parser::parse_shift ()
3104{
3105 expression* op1 = parse_concatenation ();
3106
3107 const token* t = peek ();
dff50e09 3108 while (t && t->type == tok_operator &&
bb2e3076
FCE
3109 (t->content == "<<" || t->content == ">>"))
3110 {
3111 binary_expression* e = new binary_expression;
3112 e->left = op1;
3113 e->op = t->content;
3114 e->tok = t;
3115 next ();
56099f08
FCE
3116 e->right = parse_concatenation ();
3117 op1 = e;
3118 t = peek ();
2f1a1aea 3119 }
56099f08
FCE
3120
3121 return op1;
2f1a1aea
FCE
3122}
3123
3124
3125expression*
3126parser::parse_concatenation ()
3127{
3128 expression* op1 = parse_additive ();
3129
3130 const token* t = peek ();
3131 // XXX: the actual awk string-concatenation operator is *whitespace*.
3132 // I don't know how to easily to model that here.
56099f08 3133 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
3134 {
3135 concatenation* e = new concatenation;
3136 e->left = op1;
3137 e->op = t->content;
56099f08 3138 e->tok = t;
2f1a1aea 3139 next ();
56099f08
FCE
3140 e->right = parse_additive ();
3141 op1 = e;
3142 t = peek ();
2f1a1aea 3143 }
56099f08
FCE
3144
3145 return op1;
2f1a1aea
FCE
3146}
3147
3148
3149expression*
3150parser::parse_additive ()
3151{
3152 expression* op1 = parse_multiplicative ();
3153
3154 const token* t = peek ();
dff50e09 3155 while (t && t->type == tok_operator
2f1a1aea
FCE
3156 && (t->content == "+" || t->content == "-"))
3157 {
3158 binary_expression* e = new binary_expression;
3159 e->op = t->content;
3160 e->left = op1;
56099f08 3161 e->tok = t;
2f1a1aea 3162 next ();
56099f08
FCE
3163 e->right = parse_multiplicative ();
3164 op1 = e;
3165 t = peek ();
2f1a1aea 3166 }
56099f08
FCE
3167
3168 return op1;
2f1a1aea
FCE
3169}
3170
3171
3172expression*
3173parser::parse_multiplicative ()
3174{
3175 expression* op1 = parse_unary ();
3176
3177 const token* t = peek ();
dff50e09 3178 while (t && t->type == tok_operator
2f1a1aea
FCE
3179 && (t->content == "*" || t->content == "/" || t->content == "%"))
3180 {
3181 binary_expression* e = new binary_expression;
3182 e->op = t->content;
3183 e->left = op1;
56099f08 3184 e->tok = t;
2f1a1aea 3185 next ();
56099f08
FCE
3186 e->right = parse_unary ();
3187 op1 = e;
3188 t = peek ();
2f1a1aea 3189 }
56099f08
FCE
3190
3191 return op1;
2f1a1aea
FCE
3192}
3193
3194
3195expression*
3196parser::parse_unary ()
3197{
3198 const token* t = peek ();
dff50e09
FCE
3199 if (t && t->type == tok_operator
3200 && (t->content == "+" ||
3201 t->content == "-" ||
bb2e3076
FCE
3202 t->content == "!" ||
3203 t->content == "~" ||
3204 false))
2f1a1aea
FCE
3205 {
3206 unary_expression* e = new unary_expression;
3207 e->op = t->content;
56099f08 3208 e->tok = t;
2f1a1aea 3209 next ();
1cb79a72 3210 e->operand = parse_unary ();
2f1a1aea
FCE
3211 return e;
3212 }
3213 else
bb2e3076 3214 return parse_crement ();
2f1a1aea
FCE
3215}
3216
3217
3218expression*
3219parser::parse_crement () // as in "increment" / "decrement"
3220{
cbfbbf69
FCE
3221 // NB: Ideally, we'd parse only a symbol as an operand to the
3222 // *crement operators, instead of a general expression value. We'd
3223 // need more complex lookahead code to tell apart the postfix cases.
3224 // So we just punt, and leave it to pass-3 to signal errors on
3225 // cases like "4++".
3226
2f1a1aea 3227 const token* t = peek ();
dff50e09 3228 if (t && t->type == tok_operator
2f1a1aea
FCE
3229 && (t->content == "++" || t->content == "--"))
3230 {
3231 pre_crement* e = new pre_crement;
3232 e->op = t->content;
56099f08 3233 e->tok = t;
2f1a1aea
FCE
3234 next ();
3235 e->operand = parse_value ();
3236 return e;
3237 }
3238
3239 // post-crement or non-crement
3240 expression *op1 = parse_value ();
dff50e09 3241
2f1a1aea 3242 t = peek ();
dff50e09 3243 if (t && t->type == tok_operator
2f1a1aea
FCE
3244 && (t->content == "++" || t->content == "--"))
3245 {
3246 post_crement* e = new post_crement;
3247 e->op = t->content;
56099f08 3248 e->tok = t;
2f1a1aea
FCE
3249 next ();
3250 e->operand = op1;
3251 return e;
3252 }
3253 else
3254 return op1;
3255}
3256
3257
3258expression*
3259parser::parse_value ()
3260{
3261 const token* t = peek ();
3262 if (! t)
2677d2fb 3263 throw parse_error (_("expected value"));
2f1a1aea 3264
7d902887
FCE
3265 if (t->type == tok_embedded)
3266 {
7d902887 3267 if (! privileged)
efb02738 3268 throw parse_error (_("embedded expression code in unprivileged script; need stap -g"), false);
7d902887
FCE
3269
3270 embedded_expr *e = new embedded_expr;
3271 e->tok = t;
3272 e->code = t->content;
731a5359 3273 next ();
7d902887
FCE
3274 return e;
3275 }
3276
2f1a1aea
FCE
3277 if (t->type == tok_operator && t->content == "(")
3278 {
731a5359 3279 swallow ();
2f1a1aea
FCE
3280 expression* e = parse_expression ();
3281 t = next ();
3282 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 3283 throw parse_error (_("expected ')'"));
731a5359 3284 swallow ();
2f1a1aea
FCE
3285 return e;
3286 }
03c75a4a
JS
3287 else if (t->type == tok_operator && t->content == "&")
3288 {
731a5359 3289 next (); // Cannot swallow, passing token on...
d48afc20 3290 return parse_target_symbol (t);
03c75a4a 3291 }
06219d6f
SM
3292 else if (t->type == tok_identifier
3293 || (t->type == tok_operator && t->content[0] == '@'))
2f1a1aea
FCE
3294 return parse_symbol ();
3295 else
3296 return parse_literal ();
3297}
3298
3299
d02548c0
GH
3300const token *
3301parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
3302{
3303 hop = NULL;
50cc7cd5 3304 const token* t = expect_ident_or_atword (name);
d02548c0
GH
3305 if (name == "@hist_linear" || name == "@hist_log")
3306 {
3307 hop = new hist_op;
3308 if (name == "@hist_linear")
3309 hop->htype = hist_linear;
3310 else if (name == "@hist_log")
3311 hop->htype = hist_log;
3312 hop->tok = t;
3313 expect_op("(");
3314 hop->stat = parse_expression ();
3315 int64_t tnum;
3316 if (hop->htype == hist_linear)
3317 {
3318 for (size_t i = 0; i < 3; ++i)
3319 {
3320 expect_op (",");
3321 expect_number (tnum);
3322 hop->params.push_back (tnum);
3323 }
3324 }
d02548c0
GH
3325 expect_op(")");
3326 }
3327 return t;
3328}
3329
3330
3331indexable*
3332parser::parse_indexable ()
3333{
3334 hist_op *hop = NULL;
3335 string name;
3336 const token *tok = parse_hist_op_or_bare_name(hop, name);
3337 if (hop)
3338 return hop;
3339 else
3340 {
3341 symbol* sym = new symbol;
3342 sym->name = name;
3343 sym->tok = tok;
3344 return sym;
3345 }
3346}
3347
3348
cc9001af
MW
3349// var, indexable[index], func(parms), printf("...", ...), $var,r
3350// @cast, @defined, @entry, @var, $var->member, @stat_op(stat)
30263a73 3351expression* parser::parse_symbol ()
2f1a1aea 3352{
d02548c0
GH
3353 hist_op *hop = NULL;
3354 symbol *sym = NULL;
d7f3e0c5 3355 string name;
d02548c0
GH
3356 const token *t = parse_hist_op_or_bare_name(hop, name);
3357
3358 if (!hop)
0fefb486 3359 {
dff50e09 3360 // If we didn't get a hist_op, then we did get an identifier. We can
d02548c0
GH
3361 // now scrutinize this identifier for the various magic forms of identifier
3362 // (printf, @stat_op, and $var...)
3363
cc9001af
MW
3364 if (name == "@cast"
3365 || name == "@var"
3366 || (name.size() > 0 && name[0] == '$'))
30263a73 3367 return parse_target_symbol (t);
9b5af295 3368
db135493
FCE
3369 // NB: PR11343: @defined() is not incompatible with earlier versions
3370 // of stap, so no need to check session.compatible for 1.2
30263a73
FCE
3371 if (name == "@defined")
3372 return parse_defined_op (t);
8cc799a5
JS
3373
3374 if (name == "@entry")
3375 return parse_entry_op (t);
3376
3689db05
SC
3377 if (name == "@perf")
3378 return parse_perf_op (t);
3379
cc9001af 3380 if (name.size() > 0 && name[0] == '@')
d7f3e0c5 3381 {
d02548c0
GH
3382 stat_op *sop = new stat_op;
3383 if (name == "@avg")
3384 sop->ctype = sc_average;
3385 else if (name == "@count")
3386 sop->ctype = sc_count;
3387 else if (name == "@sum")
3388 sop->ctype = sc_sum;
3389 else if (name == "@min")
3390 sop->ctype = sc_min;
3391 else if (name == "@max")
3392 sop->ctype = sc_max;
3393 else
cc9001af 3394 throw parse_error(_("unknown operator ") + name);
d02548c0
GH
3395 expect_op("(");
3396 sop->tok = t;
3397 sop->stat = parse_expression ();
3398 expect_op(")");
3399 return sop;
3400 }
dff50e09 3401
d5e178c1 3402 else if (print_format *fmt = print_format::create(t))
d02548c0 3403 {
d02548c0 3404 expect_op("(");
b15c465c
PP
3405 if ((name == "print" || name == "println" ||
3406 name == "sprint" || name == "sprintln") &&
f34254da 3407 (peek_op("@hist_linear") || peek_op("@hist_log")))
a4636912
GH
3408 {
3409 // We have a special case where we recognize
3410 // print(@hist_foo(bar)) as a magic print-the-histogram
3411 // construct. This is sort of gross but it avoids
3412 // promoting histogram references to typeful
3413 // expressions.
dff50e09 3414
1bbeef03
GH
3415 hop = NULL;
3416 t = parse_hist_op_or_bare_name(hop, name);
3417 assert(hop);
dff50e09 3418
1bbeef03
GH
3419 // It is, sadly, possible that even while parsing a
3420 // hist_op, we *mis-guessed* and the user wishes to
3421 // print(@hist_op(foo)[bucket]), a scalar. In that case
3422 // we must parse the arrayindex and print an expression.
839325a1
JS
3423 //
3424 // XXX: This still fails if the arrayindex is part of a
3425 // larger expression. To really handle everything, we'd
3426 // need to push back all the hist tokens start over.
dff50e09 3427
1bbeef03
GH
3428 if (!peek_op ("["))
3429 fmt->hist = hop;
3430 else
3431 {
3432 // This is simplified version of the
3433 // multi-array-index parser below, because we can
3434 // only ever have one index on a histogram anyways.
3435 expect_op("[");
3436 struct arrayindex* ai = new arrayindex;
3437 ai->tok = t;
3438 ai->base = hop;
3439 ai->indexes.push_back (parse_expression ());
3440 expect_op("]");
3441 fmt->args.push_back(ai);
839325a1
JS
3442
3443 // Consume any subsequent arguments.
3444 while (!peek_op (")"))
3445 {
3446 expect_op(",");
3447 expression *e = parse_expression ();
3448 fmt->args.push_back(e);
3449 }
1bbeef03 3450 }
a4636912 3451 }
d7f3e0c5 3452 else
d02548c0 3453 {
3cb17058
JS
3454 int min_args = 0;
3455 if (fmt->print_with_format)
3456 {
3457 // Consume and convert a format string. Agreement between the
3458 // format string and the arguments is postponed to the
3459 // typechecking phase.
3460 string tmp;
3461 expect_unknown (tok_string, tmp);
3462 fmt->raw_components = tmp;
3463 fmt->components = print_format::string_to_components (tmp);
3464 }
3465 else if (fmt->print_with_delim)
3466 {
3467 // Consume a delimiter to separate arguments.
3468 fmt->delimiter.clear();
3469 fmt->delimiter.type = print_format::conv_literal;
3470 expect_unknown (tok_string, fmt->delimiter.literal_string);
3471 min_args = 2;
3472 }
3473 else
3474 {
3475 // If we are not printing with a format string, we must have
3476 // at least one argument (of any type).
3477 expression *e = parse_expression ();
3478 fmt->args.push_back(e);
3479 }
3480
3481 // Consume any subsequent arguments.
3482 while (min_args || !peek_op (")"))
3483 {
3484 expect_op(",");
3485 expression *e = parse_expression ();
3486 fmt->args.push_back(e);
3487 if (min_args)
3488 --min_args;
3489 }
d02548c0
GH
3490 }
3491 expect_op(")");
3492 return fmt;
3493 }
dff50e09 3494
d02548c0
GH
3495 else if (peek_op ("(")) // function call
3496 {
731a5359 3497 swallow ();
d02548c0
GH
3498 struct functioncall* f = new functioncall;
3499 f->tok = t;
3500 f->function = name;
3501 // Allow empty actual parameter list
3502 if (peek_op (")"))
3503 {
731a5359 3504 swallow ();
d02548c0
GH
3505 return f;
3506 }
3507 while (1)
3508 {
3509 f->args.push_back (parse_expression ());
3510 if (peek_op (")"))
3511 {
731a5359 3512 swallow ();
d02548c0
GH
3513 break;
3514 }
3515 else if (peek_op (","))
3516 {
731a5359 3517 swallow ();
d02548c0
GH
3518 continue;
3519 }
3520 else
2677d2fb 3521 throw parse_error (_("expected ',' or ')'"));
d02548c0
GH
3522 }
3523 return f;
3524 }
3525
3526 else
3527 {
3528 sym = new symbol;
3529 sym->name = name;
3530 sym->tok = t;
d7f3e0c5 3531 }
0fefb486 3532 }
dff50e09
FCE
3533
3534 // By now, either we had a hist_op in the first place, or else
d02548c0
GH
3535 // we had a plain word and it was converted to a symbol.
3536
70c743d8 3537 assert (!hop != !sym); // logical XOR
d02548c0
GH
3538
3539 // All that remains is to check for array indexing
3540
d7f3e0c5 3541 if (peek_op ("[")) // array
2f1a1aea 3542 {
731a5359 3543 swallow ();
2f1a1aea 3544 struct arrayindex* ai = new arrayindex;
d02548c0
GH
3545 ai->tok = t;
3546
3547 if (hop)
3548 ai->base = hop;
3549 else
3550 ai->base = sym;
3551
2f1a1aea
FCE
3552 while (1)
3553 {
3554 ai->indexes.push_back (parse_expression ());
d7f3e0c5 3555 if (peek_op ("]"))
dff50e09 3556 {
731a5359 3557 swallow ();
dff50e09 3558 break;
d7f3e0c5
GH
3559 }
3560 else if (peek_op (","))
3561 {
731a5359 3562 swallow ();
d7f3e0c5
GH
3563 continue;
3564 }
2f1a1aea 3565 else
2677d2fb 3566 throw parse_error (_("expected ',' or ']'"));
2f1a1aea
FCE
3567 }
3568 return ai;
3569 }
d02548c0
GH
3570
3571 // If we got to here, we *should* have a symbol; if we have
3572 // a hist_op on its own, it doesn't count as an expression,
3573 // so we throw a parse error.
3574
3575 if (hop)
2677d2fb 3576 throw parse_error(_("base histogram operator where expression expected"), t);
dff50e09
FCE
3577
3578 return sym;
2f1a1aea 3579}
56099f08 3580
30263a73
FCE
3581// Parse a @cast or $var. Given head token has already been consumed.
3582target_symbol* parser::parse_target_symbol (const token* t)
3583{
d48afc20
JS
3584 bool addressof = false;
3585 if (t->type == tok_operator && t->content == "&")
3586 {
3587 addressof = true;
3819d181
MW
3588 // Don't delete t before trying next token.
3589 // We might need it in the error message when there is no next token.
3590 const token *next_t = next ();
731a5359 3591 delete t;
3819d181 3592 t = next_t;
d48afc20
JS
3593 }
3594
06219d6f 3595 if (t->type == tok_operator && t->content == "@cast")
30263a73
FCE
3596 {
3597 cast_op *cop = new cast_op;
3598 cop->tok = t;
277c21bc 3599 cop->name = t->content;
30263a73
FCE
3600 expect_op("(");
3601 cop->operand = parse_expression ();
3602 expect_op(",");
7f6b80bd 3603 expect_unknown(tok_string, cop->type_name);
30263a73
FCE
3604 if (peek_op (","))
3605 {
731a5359 3606 swallow ();
30263a73
FCE
3607 expect_unknown(tok_string, cop->module);
3608 }
3609 expect_op(")");
3610 parse_target_symbol_components(cop);
d48afc20 3611 cop->addressof = addressof;
30263a73
FCE
3612 return cop;
3613 }
3614
3615 if (t->type == tok_identifier && t->content[0]=='$')
3616 {
3617 // target_symbol time
3618 target_symbol *tsym = new target_symbol;
3619 tsym->tok = t;
277c21bc 3620 tsym->name = t->content;
cc9001af 3621 tsym->target_name = "";
bfa7e523 3622 tsym->cu_name = "";
30263a73 3623 parse_target_symbol_components(tsym);
d48afc20 3624 tsym->addressof = addressof;
30263a73
FCE
3625 return tsym;
3626 }
3627
06219d6f 3628 if (t->type == tok_operator && t->content == "@var")
cc9001af
MW
3629 {
3630 target_symbol *tsym = new target_symbol;
3631 tsym->tok = t;
3632 tsym->name = t->content;
3633 expect_op("(");
3634 expect_unknown(tok_string, tsym->target_name);
bfa7e523
MW
3635 size_t found_at = tsym->target_name.find("@");
3636 if (found_at != string::npos)
3637 tsym->cu_name = tsym->target_name.substr(found_at + 1);
3638 else
3639 tsym->cu_name = "";
cc9001af
MW
3640 expect_op(")");
3641 parse_target_symbol_components(tsym);
3642 tsym->addressof = addressof;
3643 return tsym;
3644 }
3645
3646 throw parse_error (_("expected @cast, @var or $var"));
30263a73
FCE
3647}
3648
3649
3650// Parse a @defined(). Given head token has already been consumed.
3651expression* parser::parse_defined_op (const token* t)
3652{
3653 defined_op* dop = new defined_op;
3654 dop->tok = t;
3655 expect_op("(");
30263a73 3656 // no need for parse_hist_op... etc., as @defined takes only target_symbols as its operand.
d48afc20 3657 const token* tt = next ();
30263a73
FCE
3658 dop->operand = parse_target_symbol (tt);
3659 expect_op(")");
3660 return dop;
3661}
3662
3663
8cc799a5
JS
3664// Parse a @entry(). Given head token has already been consumed.
3665expression* parser::parse_entry_op (const token* t)
3666{
3667 entry_op* eop = new entry_op;
3668 eop->tok = t;
3669 expect_op("(");
3670 eop->operand = parse_expression ();
3671 expect_op(")");
3672 return eop;
3673}
3674
3675
3689db05
SC
3676// Parse a @perf(). Given head token has already been consumed.
3677expression* parser::parse_perf_op (const token* t)
3678{
3679 perf_op* pop = new perf_op;
3e6a17ee
SC
3680
3681 if (strverscmp(session.compatible.c_str(), "2.1") < 0)
3682 throw parse_error (_("expected @cast, @var or $var"));
3683
3689db05
SC
3684 pop->tok = t;
3685 expect_op("(");
ace7c23f
FCE
3686 pop->operand = parse_literal_string ();
3687 if (pop->operand->value == "")
3688 throw parse_error (_("expected non-empty string"));
3689db05
SC
3689 expect_op(")");
3690 return pop;
3691}
3692
3693
30263a73 3694
81931eab
JS
3695void
3696parser::parse_target_symbol_components (target_symbol* e)
3697{
5f36109e
JS
3698 bool pprint = false;
3699
3700 // check for pretty-print in the form $foo$
277c21bc 3701 string &base = e->name;
5f36109e
JS
3702 size_t pprint_pos = base.find_last_not_of('$');
3703 if (0 < pprint_pos && pprint_pos < base.length() - 1)
3704 {
3705 string pprint_val = base.substr(pprint_pos + 1);
3706 base.erase(pprint_pos + 1);
3707 e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
3708 pprint = true;
3709 }
3710
3711 while (!pprint)
81931eab 3712 {
81931eab
JS
3713 if (peek_op ("->"))
3714 {
c67847a0
JS
3715 const token* t = next();
3716 string member;
3717 expect_ident_or_keyword (member);
5f36109e
JS
3718
3719 // check for pretty-print in the form $foo->$ or $foo->bar$
3720 pprint_pos = member.find_last_not_of('$');
3721 string pprint_val;
3722 if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
3723 {
3724 pprint_val = member.substr(pprint_pos + 1);
3725 member.erase(pprint_pos + 1);
3726 pprint = true;
3727 }
3728
3729 if (!member.empty())
3730 e->components.push_back (target_symbol::component(t, member));
3731 if (pprint)
3732 e->components.push_back (target_symbol::component(t, pprint_val, true));
81931eab
JS
3733 }
3734 else if (peek_op ("["))
3735 {
c67847a0 3736 const token* t = next();
6fda2dff
JS
3737 expression* index = parse_expression();
3738 literal_number* ln = dynamic_cast<literal_number*>(index);
3739 if (ln)
3740 e->components.push_back (target_symbol::component(t, ln->value));
3741 else
3742 e->components.push_back (target_symbol::component(t, index));
81931eab 3743 expect_op ("]");
81931eab
JS
3744 }
3745 else
3746 break;
3747 }
5f36109e
JS
3748
3749 if (!pprint)
3750 {
3751 // check for pretty-print in the form $foo $
3752 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
3753 const token* t = peek();
3819d181 3754 if (t != NULL && t->type == tok_identifier &&
5f36109e
JS
3755 t->content.find_first_not_of('$') == string::npos)
3756 {
3757 t = next();
3758 e->components.push_back (target_symbol::component(t, t->content, true));
3759 pprint = true;
3760 }
3761 }
3762
3763 if (pprint && (peek_op ("->") || peek_op("[")))
ce0f6648 3764 throw parse_error(_("-> and [ are not accepted for a pretty-printing variable"));
81931eab
JS
3765}
3766
73267b89 3767/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.602051 seconds and 5 git commands to generate.