]> sourceware.org Git - systemtap.git/blame - parse.cxx
Correct systemtap.spec to match support currently available on aarch64
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
3e6a17ee 2// Copyright (C) 2005-2013 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
5811366a 4// Copyright (C) 2007 Bull S.A.S
69c68955
FCE
5//
6// This file is part of systemtap, and is free software. You can
7// redistribute it and/or modify it under the terms of the GNU General
8// Public License (GPL); either version 2, or (at your option) any
9// later version.
2f1a1aea 10
2b066ec1 11#include "config.h"
2f1a1aea
FCE
12#include "staptree.h"
13#include "parse.h"
177a8ead 14#include "session.h"
3f99432c
FCE
15#include "util.h"
16
2b066ec1 17#include <iostream>
eacb10ce 18
2b066ec1 19#include <fstream>
2f1a1aea 20#include <cctype>
9c0c0e46 21#include <cstdlib>
29e64872 22#include <cassert>
9c0c0e46
FCE
23#include <cerrno>
24#include <climits>
57b73400 25#include <sstream>
f74fb737 26#include <cstring>
3f99432c 27#include <cctype>
eacb10ce
FCE
28#include <iterator>
29
7a468d68
FCE
30extern "C" {
31#include <fnmatch.h>
32}
2f1a1aea
FCE
33
34using namespace std;
35
c18f07f8
JS
36
37class lexer
38{
39public:
fee28e5c 40 bool ate_comment; // current token follows a comment
b5477cd9 41 bool ate_whitespace; // the most recent token followed whitespace
534aad8b
SM
42 bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
43
b5477cd9 44 token* scan ();
c18f07f8
JS
45 lexer (istream&, const string&, systemtap_session&);
46 void set_current_file (stapfile* f);
47
2524d1fd
SM
48 static set<string> keywords;
49 static set<string> atwords;
c18f07f8
JS
50private:
51 inline int input_get ();
52 inline int input_peek (unsigned n=0);
53 void input_put (const string&, const token*);
54 string input_name;
55 string input_contents;
56 const char *input_pointer; // index into input_contents
57 const char *input_end;
58 unsigned cursor_suspend_count;
59 unsigned cursor_suspend_line;
60 unsigned cursor_suspend_column;
61 unsigned cursor_line;
62 unsigned cursor_column;
63 systemtap_session& session;
64 stapfile* current_file;
c18f07f8
JS
65};
66
67
68class parser
69{
70public:
4cd32d8c 71 parser (systemtap_session& s, const string& n, istream& i, bool p);
c18f07f8
JS
72 ~parser ();
73
74 stapfile* parse ();
fe410f52 75 stapfile* parse_library_macros ();
c18f07f8
JS
76
77private:
78 typedef enum {
79 PP_NONE,
80 PP_KEEP_THEN,
81 PP_SKIP_THEN,
82 PP_KEEP_ELSE,
83 PP_SKIP_ELSE,
84 } pp_state_t;
85
534aad8b
SM
86 struct pp1_activation;
87
fe410f52
SM
88 struct pp_macrodecl : public macrodecl {
89 pp1_activation* parent_act; // used for param bindings
90 virtual bool is_closure() { return parent_act != 0; }
91 pp_macrodecl () : macrodecl(), parent_act(0) { }
534aad8b
SM
92 };
93
c18f07f8
JS
94 systemtap_session& session;
95 string input_name;
c18f07f8
JS
96 lexer input;
97 bool privileged;
98 parse_context context;
99
534aad8b
SM
100 // preprocessing subordinate, first pass (macros)
101 struct pp1_activation {
102 const token* tok;
103 unsigned cursor; // position within macro body
104 map<string, pp_macrodecl*> params;
534aad8b 105
fe410f52 106 macrodecl* curr_macro;
534aad8b 107
fe410f52
SM
108 pp1_activation (const token tok, macrodecl* curr_macro)
109 : tok(new token(tok)), cursor(0), curr_macro(curr_macro) { }
534aad8b
SM
110 ~pp1_activation ();
111 };
112
fe410f52 113 map<string, macrodecl*> pp1_namespace;
534aad8b
SM
114 vector<pp1_activation*> pp1_state;
115 const token* next_pp1 ();
116 const token* scan_pp1 ();
117 const token* slurp_pp1_param (vector<const token*>& param);
118 const token* slurp_pp1_body (vector<const token*>& body);
119
120 // preprocessing subordinate, final pass (conditionals)
c18f07f8 121 vector<pair<const token*, pp_state_t> > pp_state;
b5477cd9 122 const token* scan_pp ();
c18f07f8
JS
123 const token* skip_pp ();
124
125 // scanning state
b5477cd9
SM
126 const token* next ();
127 const token* peek ();
c18f07f8 128
731a5359
MW
129 // Advance past and throw away current token after peek () or next ().
130 void swallow ();
131
a07a2c28 132 const token* systemtap_v_seen;
c18f07f8
JS
133 const token* last_t; // the last value returned by peek() or next()
134 const token* next_t; // lookahead token
135
731a5359
MW
136 // expectations, these swallow the token
137 void expect_known (token_type tt, string const & expected);
138 void expect_unknown (token_type tt, string & target);
139 void expect_unknown2 (token_type tt1, token_type tt2, string & target);
140
141 // convenience forms, these also swallow the token
142 void expect_op (string const & expected);
143 void expect_kw (string const & expected);
144 void expect_number (int64_t & expected);
145 void expect_ident_or_keyword (string & target);
146
147 // convenience forms, which return true or false, these don't swallow token
c18f07f8
JS
148 bool peek_op (string const & op);
149 bool peek_kw (string const & kw);
150
731a5359
MW
151 // convenience forms, which return the token
152 const token* expect_kw_token (string const & expected);
153 const token* expect_ident_or_atword (string & target);
154
c18f07f8
JS
155 void print_error (const parse_error& pe);
156 unsigned num_errors;
157
158private: // nonterminals
159 void parse_probe (vector<probe*>&, vector<probe_alias*>&);
160 void parse_global (vector<vardecl*>&, vector<probe*>&);
161 void parse_functiondecl (vector<functiondecl*>&);
162 embeddedcode* parse_embeddedcode ();
163 probe_point* parse_probe_point ();
d24f1ff4
SM
164 literal_string* consume_string_literals (const token*);
165 literal_string* parse_literal_string ();
c18f07f8
JS
166 literal* parse_literal ();
167 block* parse_stmt_block ();
168 try_block* parse_try_block ();
169 statement* parse_statement ();
170 if_statement* parse_if_statement ();
171 for_loop* parse_for_loop ();
172 for_loop* parse_while_loop ();
173 foreach_loop* parse_foreach_loop ();
174 expr_statement* parse_expr_statement ();
175 return_statement* parse_return_statement ();
176 delete_statement* parse_delete_statement ();
177 next_statement* parse_next_statement ();
178 break_statement* parse_break_statement ();
179 continue_statement* parse_continue_statement ();
180 indexable* parse_indexable ();
181 const token *parse_hist_op_or_bare_name (hist_op *&hop, string &name);
182 target_symbol *parse_target_symbol (const token* t);
8cc799a5 183 expression* parse_entry_op (const token* t);
c18f07f8 184 expression* parse_defined_op (const token* t);
3689db05 185 expression* parse_perf_op (const token* t);
c18f07f8
JS
186 expression* parse_expression ();
187 expression* parse_assignment ();
188 expression* parse_ternary ();
189 expression* parse_logical_or ();
190 expression* parse_logical_and ();
191 expression* parse_boolean_or ();
192 expression* parse_boolean_xor ();
193 expression* parse_boolean_and ();
194 expression* parse_array_in ();
93daaca8 195 expression* parse_comparison_or_regex_query ();
c18f07f8
JS
196 expression* parse_shift ();
197 expression* parse_concatenation ();
198 expression* parse_additive ();
199 expression* parse_multiplicative ();
200 expression* parse_unary ();
201 expression* parse_crement ();
202 expression* parse_value ();
203 expression* parse_symbol ();
204
205 void parse_target_symbol_components (target_symbol* e);
206};
207
208
2f1a1aea
FCE
209// ------------------------------------------------------------------------
210
c18f07f8
JS
211stapfile*
212parse (systemtap_session& s, istream& i, bool pr)
213{
4cd32d8c 214 parser p (s, "<input>", i, pr);
c18f07f8
JS
215 return p.parse ();
216}
217
218
219stapfile*
4cd32d8c 220parse (systemtap_session& s, const string& name, bool pr)
c18f07f8 221{
4cd32d8c
JS
222 ifstream i(name.c_str(), ios::in);
223 if (i.fail())
224 {
225 cerr << (file_exists(name)
226 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
227 : _F("Input file '%s' is missing.", name.c_str()))
228 << endl;
229 return 0;
230 }
231
232 parser p (s, name, i, pr);
c18f07f8
JS
233 return p.parse ();
234}
235
fe410f52
SM
236stapfile*
237parse_library_macros (systemtap_session& s, const string& name)
238{
239 ifstream i(name.c_str(), ios::in);
240 if (i.fail())
241 {
242 cerr << (file_exists(name)
243 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
244 : _F("Input file '%s' is missing.", name.c_str()))
245 << endl;
246 return 0;
247 }
248
249 parser p (s, name, i, false); // TODOXX pr is ...? should path be full??
250 return p.parse_library_macros ();
251}
252
c18f07f8 253// ------------------------------------------------------------------------
bb2e3076
FCE
254
255
4cd32d8c
JS
256parser::parser (systemtap_session& s, const string &n, istream& i, bool p):
257 session (s), input_name (n), input (i, input_name, s), privileged (p),
a07a2c28 258 context(con_unknown), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
4cd32d8c
JS
259{
260}
2f1a1aea
FCE
261
262parser::~parser()
263{
2f1a1aea
FCE
264}
265
d7f3e0c5
GH
266static string
267tt2str(token_type tt)
268{
269 switch (tt)
270 {
271 case tok_junk: return "junk";
272 case tok_identifier: return "identifier";
273 case tok_operator: return "operator";
274 case tok_string: return "string";
275 case tok_number: return "number";
276 case tok_embedded: return "embedded-code";
6e213f58 277 case tok_keyword: return "keyword";
d7f3e0c5
GH
278 }
279 return "unknown token";
280}
82919855 281
0323ed4d
WC
282ostream&
283operator << (ostream& o, const source_loc& loc)
284{
a704a23b 285 o << loc.file->name << ":"
0323ed4d
WC
286 << loc.line << ":"
287 << loc.column;
288
289 return o;
290}
291
56099f08
FCE
292ostream&
293operator << (ostream& o, const token& t)
294{
d7f3e0c5 295 o << tt2str(t.type);
56099f08 296
6e213f58 297 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 298 {
24cb178f
FCE
299 o << " '";
300 for (unsigned i=0; i<t.content.length(); i++)
301 {
302 char c = t.content[i];
303 o << (isprint (c) ? c : '?');
304 }
305 o << "'";
56099f08 306 }
56099f08 307
dff50e09 308 o << " at "
0323ed4d 309 << t.location;
56099f08
FCE
310
311 return o;
312}
313
314
dff50e09 315void
2f1a1aea
FCE
316parser::print_error (const parse_error &pe)
317{
16fc963f 318 const token *tok = pe.tok ? pe.tok : last_t;
6b7d7185 319 session.print_error(pe, tok, input_name);
2f1a1aea
FCE
320 num_errors ++;
321}
322
323
2f1a1aea 324
c434ec7e
FCE
325
326template <typename OPERAND>
327bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
328{
329 if (op->type == tok_operator && op->content == "<=")
330 { return lhs <= rhs; }
331 else if (op->type == tok_operator && op->content == ">=")
332 { return lhs >= rhs; }
333 else if (op->type == tok_operator && op->content == "<")
334 { return lhs < rhs; }
335 else if (op->type == tok_operator && op->content == ">")
336 { return lhs > rhs; }
337 else if (op->type == tok_operator && op->content == "==")
338 { return lhs == rhs; }
339 else if (op->type == tok_operator && op->content == "!=")
340 { return lhs != rhs; }
341 else
2677d2fb 342 throw parse_error (_("expected comparison operator"), op);
c434ec7e
FCE
343}
344
345
534aad8b
SM
346// Here, we perform on-the-fly preprocessing in two passes.
347
348// First pass - macro declaration and expansion.
349//
350// The basic form of a declaration is @define SIGNATURE %( BODY %)
351// where SIGNATURE is of the form macro_name (a, b, c, ...)
352// and BODY can obtain the parameter contents as @a, @b, @c, ....
353// Note that parameterless macros can also be declared.
354//
3932c705 355// Macro definitions may not be nested.
534aad8b
SM
356// A macro is available textually after it has been defined.
357//
358// The basic form of a macro invocation
359// for a parameterless macro is @macro_name,
360// for a macro with parameters is @macro_name(param_1, param_2, ...).
361//
26718dbe
SM
362// NB: this means that a parameterless macro @foo called as @foo(a, b, c)
363// leaves its 'parameters' alone, rather than consuming them to result
364// in a "too many parameters error". This may be useful in the unusual
365// case of wanting @foo to expand to the name of a function.
534aad8b
SM
366//
367// Invocations of unknown macros are left unexpanded, to allow
368// the continued use of constructs such as @cast, @var, etc.
369
fe410f52 370macrodecl::~macrodecl ()
534aad8b
SM
371{
372 delete tok;
373 for (vector<const token*>::iterator it = body.begin();
374 it != body.end(); it++)
375 delete *it;
376}
377
378parser::pp1_activation::~pp1_activation ()
379{
380 delete tok;
fe410f52 381 if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
534aad8b
SM
382 for (map<string, pp_macrodecl*>::iterator it = params.begin();
383 it != params.end(); it++)
384 delete it->second;
385}
386
387// Grab a token from the current input source (main file or macro body):
388const token*
389parser::next_pp1 ()
390{
391 if (pp1_state.empty())
392 return input.scan ();
393
394 // otherwise, we're inside a macro
395 pp1_activation* act = pp1_state.back();
396 unsigned& cursor = act->cursor;
397 if (cursor < act->curr_macro->body.size())
398 {
399 token* t = new token(*act->curr_macro->body[cursor]);
0002fc51 400 t->chain = new token(*act->tok); // mark chained token
534aad8b
SM
401 cursor++;
402 return t;
403 }
404 else
405 return 0; // reached end of macro body
406}
407
408const token*
409parser::scan_pp1 ()
410{
411 while (true)
412 {
413 const token* t = next_pp1 ();
414 if (t == 0) // EOF or end of macro body
415 {
416 if (pp1_state.empty()) // actual EOF
417 return 0;
418
419 // Exit macro and loop around to look for the next token.
420 pp1_activation* act = pp1_state.back();
421 pp1_state.pop_back(); delete act;
422 continue;
423 }
424
425 // macro definition
426 if (t->type == tok_operator && t->content == "@define")
427 {
428 if (!pp1_state.empty())
429 throw parse_error (_("'@define' forbidden inside macro body"), t);
430 delete t;
431
432 // handle macro definition
433 // (1) consume macro signature
3932c705 434 t = input.scan();
534aad8b
SM
435 if (! (t && t->type == tok_identifier))
436 throw parse_error (_("expected identifier"), t);
437 string name = t->content;
438
439 // check for redefinition of existing macro
440 if (pp1_namespace.find(name) != pp1_namespace.end())
78ab2de3
SM
441 {
442 parse_error er (_F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
443
444 // Also point to pp1_namespace[name]->tok, the site of
445 // the original definition:
446 er.chain = new parse_error (_F("macro '@%s' first defined here",
447 name.c_str()), pp1_namespace[name]->tok);
448 throw er;
449 }
450
451 // XXX: the above restriction was mostly necessary due to
452 // wanting to leave open the possibility of
453 // statically-scoped semantics in the future.
26718dbe
SM
454
455 // XXX: this cascades into further parse errors as the
456 // parser tries to parse the remaining definition... (e.g.
457 // it can't tell that the macro body isn't a conditional,
458 // that the uses of parameters aren't nonexistent
459 // macros.....)
534aad8b
SM
460 if (name == "define")
461 throw parse_error (_("attempt to redefine '@define'"), t);
462 if (input.atwords.count("@" + name))
463 session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
464
fe410f52
SM
465 macrodecl* decl = (pp1_namespace[name] = new macrodecl);
466 decl->tok = t;
534aad8b
SM
467
468 // determine if the macro takes parameters
4ac28d7e 469 bool saw_params = false;
3932c705
SM
470 t = input.scan();
471 if (t && t->type == tok_operator && t->content == "(")
4ac28d7e
SM
472 {
473 saw_params = true;
474 do
475 {
476 delete t;
477
478 t = input.scan ();
479 if (! (t && t->type == tok_identifier))
480 throw parse_error(_("expected identifier"), t);
fe410f52 481 decl->formal_args.push_back(t->content);
4ac28d7e
SM
482 delete t;
483
484 t = input.scan ();
485 if (t && t->type == tok_operator && t->content == ",")
486 {
487 continue;
488 }
489 else if (t && t->type == tok_operator && t->content == ")")
490 {
491 delete t;
492 t = input.scan();
493 break;
494 }
495 else
496 {
497 throw parse_error (_("expected ',' or ')'"), t);
498 }
499 }
500 while (true);
501 }
534aad8b
SM
502
503 // (2) identify & consume macro body
3932c705 504 if (! (t && t->type == tok_operator && t->content == "%("))
4ac28d7e
SM
505 {
506 if (saw_params)
507 throw parse_error (_("expected '%('"), t);
508 else
509 throw parse_error (_("expected '%(' or '('"), t);
510 }
3932c705 511 delete t;
534aad8b 512
3932c705
SM
513 t = slurp_pp1_body (decl->body);
514 if (!t)
515 throw parse_error (_("incomplete macro definition - missing '%)'"), decl->tok);
516 delete t;
534aad8b
SM
517
518 // Now loop around to look for a real token.
519 continue;
520 }
521
522 // (potential) macro invocation
523 if (t->type == tok_operator && t->content[0] == '@')
524 {
525 string name = t->content.substr(1); // strip initial '@'
526
527 // check if name refers to a real parameter or macro
fe410f52 528 macrodecl* decl;
534aad8b
SM
529 pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
530 if (act && act->params.find(name) != act->params.end())
531 decl = act->params[name];
fe410f52
SM
532 else if (!(act && act->curr_macro->context == ctx_library)
533 && pp1_namespace.find(name) != pp1_namespace.end())
534aad8b 534 decl = pp1_namespace[name];
fe410f52
SM
535 else if (session.library_macros.find(name)
536 != session.library_macros.end())
537 decl = session.library_macros[name];
534aad8b
SM
538 else // this is an ordinary @operator
539 return t;
540
541 // handle macro invocation
542 pp1_activation *new_act = new pp1_activation(*t, decl);
fe410f52 543 unsigned num_params = decl->formal_args.size();
534aad8b
SM
544
545 // (1a) restore parameter invocation closure
fe410f52 546 if (num_params == 0 && decl->is_closure())
534aad8b
SM
547 {
548 // NB: decl->parent_act is always safe since the
549 // parameter decl (if any) comes from an activation
550 // record which deeper in the stack than new_act.
551
552 // decl is a macro parameter which must be evaluated in
553 // the context of the original point of invocation:
fe410f52 554 new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
534aad8b
SM
555 goto expand;
556 }
557
558 // (1b) consume macro parameters (if any)
559 if (num_params == 0)
560 goto expand;
561
562 // for simplicity, we do not allow macro constructs here
563 // -- if we did, we'd have to recursively call scan_pp1()
564 t = next_pp1 ();
39566df2 565 if (! (t && t->type == tok_operator && t->content == "("))
534aad8b
SM
566 {
567 delete new_act;
52c2652f 568 throw parse_error (_NF
534aad8b 569 ("expected '(' in invocation of macro '@%s'"
f499dee5 570 " taking %d parameter",
534aad8b 571 "expected '(' in invocation of macro '@%s'"
f499dee5 572 " taking %d parameters",
52c2652f 573 num_params, name.c_str(), num_params), t);
534aad8b
SM
574 }
575
576 // XXX perhaps parse/count the full number of params,
577 // so we can say "expected x, found y params" on error?
578 for (unsigned i = 0; i < num_params; i++)
579 {
580 delete t;
581
582 // create parameter closure
fe410f52 583 string param_name = decl->formal_args[i];
534aad8b 584 pp_macrodecl* p = (new_act->params[param_name]
fe410f52
SM
585 = new pp_macrodecl);
586 p->tok = new token(*new_act->tok);
587 p->parent_act = act;
534aad8b
SM
588 // NB: *new_act->tok points to invocation, act is NULL at top level
589
590 t = slurp_pp1_param (p->body);
591
592 // check correct usage of ',' or ')'
593 if (t == 0) // hit unexpected EOF or end of macro
594 {
595 // XXX could we pop the stack and continue parsing
596 // the invocation, allowing macros to construct new
597 // invocations in piecemeal fashion??
598 const token* orig_t = new token(*new_act->tok);
599 delete new_act;
600 throw parse_error (_("could not find end of macro invocation"), orig_t);
601 }
602 if (t->type == tok_operator && t->content == ",")
603 {
604 if (i + 1 == num_params)
605 {
606 delete new_act;
607 throw parse_error (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
608 }
609 }
610 else if (t->type == tok_operator && t->content == ")")
611 {
612 if (i + 1 != num_params)
613 {
614 delete new_act;
615 throw parse_error (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
616 }
617 }
618 else
619 {
620 // XXX this is, incidentally, impossible
621 delete new_act;
622 throw parse_error(_("expected ',' or ')' after macro parameter"), t);
623 }
624 }
625
626 delete t;
627
628 // (2) set up macro expansion
629 expand:
630 pp1_state.push_back (new_act);
631
632 // Now loop around to look for a real token.
633 continue;
634 }
635
636 // Otherwise, we have an ordinary token.
637 return t;
638 }
639}
640
641// Consume a single macro invocation's parameters, heeding nested ( )
642// brackets and stopping on an unbalanced ')' or an unbracketed ','
643// (and returning the final separator token).
644const token*
645parser::slurp_pp1_param (vector<const token*>& param)
646{
647 const token* t = 0;
648 unsigned nesting = 0;
649 do
650 {
651 t = next_pp1 ();
652
653 if (!t)
654 break;
655 if (t->type == tok_operator && t->content == "(")
656 ++nesting;
657 else if (nesting && t->type == tok_operator && t->content == ")")
658 --nesting;
659 else if (!nesting && t->type == tok_operator
660 && (t->content == ")" || t->content == ","))
661 break;
662 param.push_back(t);
663 }
664 while (true);
665 return t; // report ")" or "," or NULL
666}
667
668
669// Consume a macro declaration's body, heeding nested %( %) brackets.
670const token*
671parser::slurp_pp1_body (vector<const token*>& body)
672{
673 const token* t = 0;
674 unsigned nesting = 0;
675 do
676 {
677 t = next_pp1 ();
678
679 if (!t)
680 break;
681 if (t->type == tok_operator && t->content == "%(")
682 ++nesting;
683 else if (nesting && t->type == tok_operator && t->content == "%)")
684 --nesting;
685 else if (!nesting && t->type == tok_operator && t->content == "%)")
686 break;
687 body.push_back(t);
688 }
689 while (true);
690 return t; // report final "%)" or NULL
691}
692
fe410f52
SM
693// Used for parsing .stpm files.
694stapfile*
695parser::parse_library_macros ()
696{
697 stapfile* f = new stapfile;
698 input.set_current_file (f);
699
700 try
701 {
702 const token* t = scan_pp1 ();
703
704 // Currently we only take objection to macro invocations if they
705 // produce a non-whitespace token after being expanded.
706
707 // XXX should we prevent macro invocations even if they expand to empty??
708
709 if (t != 0)
710 throw parse_error (_F("library macro file '%s' contains non-@define construct", input_name.c_str()), t);
711
712 // We need to first check whether *any* of the macros are duplicates,
713 // then commit to including the entire file in the global namespace
714 // (or not). Yuck.
715 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
716 it != pp1_namespace.end(); it++)
717 {
718 string name = it->first;
719
720 if (session.library_macros.find(name) != session.library_macros.end())
721 {
78ab2de3
SM
722 parse_error er(_F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
723 er.chain = new parse_error (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
724 print_error (er);
fe410f52 725
78ab2de3 726 delete er.chain;
fe410f52
SM
727 delete f;
728 return 0;
729 }
730 }
731
732 }
733 catch (const parse_error& pe)
734 {
735 print_error (pe);
736 delete f;
737 return 0;
738 }
739
740 // If no errors, include the entire file. Note how this is outside
741 // of the try-catch block -- no errors possible.
742 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
743 it != pp1_namespace.end(); it++)
744 {
745 string name = it->first;
746
747 session.library_macros[name] = it->second;
748 session.library_macros[name]->context = ctx_library;
fe410f52
SM
749 }
750
751 return f;
752}
753
534aad8b
SM
754// Second pass - preprocessor conditional expansion.
755//
177a8ead 756// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
757// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
758// or: arch COMPARISON-OP "arch-string"
db135493 759// or: systemtap_v COMPARISON-OP "version-string"
2e6dd9d0 760// or: systemtap_privilege COMPARISON-OP "privilege-string"
561079c8 761// or: CONFIG_foo COMPARISON-OP "config-string"
717a457b 762// or: CONFIG_foo COMPARISON-OP number
4227f98d 763// or: CONFIG_foo COMPARISON-OP CONFIG_bar
5811366a
FCE
764// or: "string1" COMPARISON-OP "string2"
765// or: number1 COMPARISON-OP number2
44ce8ed5 766// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
767//
768// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
c434ec7e 769// e.g. %( arch != "i?86" %? "foo" %: "baz" %)
561079c8 770// e.g. %( CONFIG_foo %? "foo" %: "baz" %)
177a8ead
FCE
771//
772// Up to an entire %( ... %) expression is processed by a single call
773// to this function. Tokens included by any nested conditions are
774// enqueued in a private vector.
775
776bool eval_pp_conditional (systemtap_session& s,
777 const token* l, const token* op, const token* r)
778{
44ce8ed5 779 if (l->type == tok_identifier && (l->content == "kernel_v" ||
db135493
FCE
780 l->content == "kernel_vr" ||
781 l->content == "systemtap_v"))
44ce8ed5 782 {
db135493 783 if (! (r->type == tok_string))
2677d2fb 784 throw parse_error (_("expected string literal"), r);
db135493 785
44ce8ed5 786 string target_kernel_vr = s.kernel_release;
197a4d62 787 string target_kernel_v = s.kernel_base_release;
db135493 788 string target;
dff50e09 789
db135493
FCE
790 if (l->content == "kernel_v") target = target_kernel_v;
791 else if (l->content == "kernel_vr") target = target_kernel_vr;
792 else if (l->content == "systemtap_v") target = s.compatible;
793 else assert (0);
7a468d68 794
7a468d68
FCE
795 string query = r->content;
796 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
797
44ce8ed5
FCE
798 // collect acceptable strverscmp results.
799 int rvc_ok1, rvc_ok2;
7a468d68 800 bool wc_ok = false;
44ce8ed5
FCE
801 if (op->type == tok_operator && op->content == "<=")
802 { rvc_ok1 = -1; rvc_ok2 = 0; }
803 else if (op->type == tok_operator && op->content == ">=")
804 { rvc_ok1 = 1; rvc_ok2 = 0; }
805 else if (op->type == tok_operator && op->content == "<")
806 { rvc_ok1 = -1; rvc_ok2 = -1; }
807 else if (op->type == tok_operator && op->content == ">")
808 { rvc_ok1 = 1; rvc_ok2 = 1; }
809 else if (op->type == tok_operator && op->content == "==")
7a468d68 810 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
44ce8ed5 811 else if (op->type == tok_operator && op->content == "!=")
7a468d68 812 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
44ce8ed5 813 else
2677d2fb 814 throw parse_error (_("expected comparison operator"), op);
7a468d68
FCE
815
816 if ((!wc_ok) && rhs_wildcard)
2677d2fb 817 throw parse_error (_("wildcard not allowed with order comparison operators"), op);
7a468d68
FCE
818
819 if (rhs_wildcard)
820 {
821 int rvc_result = fnmatch (query.c_str(), target.c_str(),
822 FNM_NOESCAPE); // spooky
823 bool badness = (rvc_result == 0) ^ (op->content == "==");
824 return !badness;
825 }
826 else
827 {
828 int rvc_result = strverscmp (target.c_str(), query.c_str());
829 // normalize rvc_result
830 if (rvc_result < 0) rvc_result = -1;
831 if (rvc_result > 0) rvc_result = 1;
832 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
833 }
44ce8ed5 834 }
2e6dd9d0
SM
835 else if (l->type == tok_identifier && l->content == "systemtap_privilege")
836 {
837 string target_privilege =
cba5b802
SM
838 /* XXX perhaps include a "guru" state */
839 pr_contains(s.privilege, pr_stapdev) ? "stapdev"
840 : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
841 : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
2e6dd9d0
SM
842 : "none"; /* should be impossible -- s.privilege always one of above */
843 assert(target_privilege != "none");
844
845 if (! (r->type == tok_string))
846 throw parse_error (_("expected string literal"), r);
847 string query_privilege = r->content;
848
849 bool nomatch = (target_privilege != query_privilege);
850
851 bool result;
852 if (op->type == tok_operator && op->content == "==")
853 result = !nomatch;
854 else if (op->type == tok_operator && op->content == "!=")
855 result = nomatch;
856 else
857 throw parse_error (_("expected '==' or '!='"), op);
cba5b802 858 /* XXX perhaps allow <= >= and similar comparisons */
2e6dd9d0
SM
859
860 return result;
861 }
44ce8ed5
FCE
862 else if (l->type == tok_identifier && l->content == "arch")
863 {
864 string target_architecture = s.architecture;
865 if (! (r->type == tok_string))
2677d2fb 866 throw parse_error (_("expected string literal"), r);
44ce8ed5 867 string query_architecture = r->content;
dff50e09 868
7a468d68
FCE
869 int nomatch = fnmatch (query_architecture.c_str(),
870 target_architecture.c_str(),
871 FNM_NOESCAPE); // still spooky
872
561079c8
FCE
873 bool result;
874 if (op->type == tok_operator && op->content == "==")
875 result = !nomatch;
876 else if (op->type == tok_operator && op->content == "!=")
877 result = nomatch;
878 else
2677d2fb 879 throw parse_error (_("expected '==' or '!='"), op);
561079c8 880
d9677d7b
DS
881 return result;
882 }
883 else if (l->type == tok_identifier && l->content == "runtime")
884 {
885 if (! (r->type == tok_string))
886 throw parse_error (_("expected string literal"), r);
887
888 string query_runtime = r->content;
889 string target_runtime;
890
891 target_runtime = (s.runtime_mode == systemtap_session::dyninst_runtime
892 ? "dyninst" : "kernel");
893 int nomatch = fnmatch (query_runtime.c_str(),
894 target_runtime.c_str(),
895 FNM_NOESCAPE); // still spooky
896
897 bool result;
898 if (op->type == tok_operator && op->content == "==")
899 result = !nomatch;
900 else if (op->type == tok_operator && op->content == "!=")
901 result = nomatch;
902 else
903 throw parse_error (_("expected '==' or '!='"), op);
904
561079c8
FCE
905 return result;
906 }
60d98537 907 else if (l->type == tok_identifier && startswith(l->content, "CONFIG_"))
561079c8 908 {
717a457b
MW
909 if (r->type == tok_string)
910 {
911 string lhs = s.kernel_config[l->content]; // may be empty
912 string rhs = r->content;
561079c8 913
717a457b 914 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
561079c8 915
717a457b
MW
916 bool result;
917 if (op->type == tok_operator && op->content == "==")
918 result = !nomatch;
919 else if (op->type == tok_operator && op->content == "!=")
920 result = nomatch;
921 else
2677d2fb 922 throw parse_error (_("expected '==' or '!='"), op);
dff50e09 923
717a457b
MW
924 return result;
925 }
926 else if (r->type == tok_number)
927 {
928 const char* startp = s.kernel_config[l->content].c_str ();
929 char* endp = (char*) startp;
930 errno = 0;
931 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
932 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
933 throw parse_error ("Config option value not a number", l);
934
935 int64_t rhs = lex_cast<int64_t>(r->content);
936 return eval_comparison (lhs, op, rhs);
937 }
4227f98d 938 else if (r->type == tok_identifier
60d98537 939 && startswith(r->content, "CONFIG_"))
4227f98d
MW
940 {
941 // First try to convert both to numbers,
942 // otherwise threat both as strings.
943 const char* startp = s.kernel_config[l->content].c_str ();
944 char* endp = (char*) startp;
945 errno = 0;
946 int64_t val = (int64_t) strtoll (startp, & endp, 0);
947 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
948 {
949 int64_t lhs = val;
950 startp = s.kernel_config[r->content].c_str ();
951 endp = (char*) startp;
952 errno = 0;
953 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
954 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
955 return eval_comparison (lhs, op, rhs);
956 }
957
958 string lhs = s.kernel_config[l->content];
959 string rhs = s.kernel_config[r->content];
960 return eval_comparison (lhs, op, rhs);
961 }
717a457b 962 else
ce0f6648 963 throw parse_error (_("expected string, number literal or other CONFIG_... as right side operand"), r);
dff50e09 964 }
c434ec7e 965 else if (l->type == tok_string && r->type == tok_string)
5811366a 966 {
c434ec7e
FCE
967 string lhs = l->content;
968 string rhs = r->content;
969 return eval_comparison (lhs, op, rhs);
970 // NB: no wildcarding option here
971 }
972 else if (l->type == tok_number && r->type == tok_number)
973 {
974 int64_t lhs = lex_cast<int64_t>(l->content);
975 int64_t rhs = lex_cast<int64_t>(r->content);
976 return eval_comparison (lhs, op, rhs);
7a468d68 977 // NB: no wildcarding option here
5811366a
FCE
978 }
979 else if (l->type == tok_string && r->type == tok_number
980 && op->type == tok_operator)
2677d2fb 981 throw parse_error (_("expected string literal as right value"), r);
5811366a
FCE
982 else if (l->type == tok_number && r->type == tok_string
983 && op->type == tok_operator)
2677d2fb 984 throw parse_error (_("expected number literal as right value"), r);
c434ec7e 985
177a8ead 986 else
d9677d7b
DS
987 throw parse_error (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
988 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
989 " comparison between strings or integers"), l);
177a8ead
FCE
990}
991
992
5811366a 993// Only tokens corresponding to the TRUE statement must be expanded
177a8ead 994const token*
b5477cd9 995parser::scan_pp ()
177a8ead
FCE
996{
997 while (true)
998 {
e92f2566
JS
999 pp_state_t pp = PP_NONE;
1000 if (!pp_state.empty())
1001 pp = pp_state.back().second;
1002
1003 const token* t = 0;
1004 if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1005 t = skip_pp ();
1006 else
534aad8b 1007 t = scan_pp1 ();
e92f2566
JS
1008
1009 if (t == 0) // EOF
177a8ead 1010 {
e92f2566
JS
1011 if (pp != PP_NONE)
1012 {
1013 t = pp_state.back().first;
1014 pp_state.pop_back(); // so skip_some doesn't keep trying to close this
ce0f6648 1015 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
2677d2fb 1016 throw parse_error (_("incomplete conditional at end of file"), t);
e92f2566 1017 }
177a8ead
FCE
1018 return t;
1019 }
1020
e92f2566
JS
1021 // misplaced preprocessor "then"
1022 if (t->type == tok_operator && t->content == "%?")
2677d2fb 1023 throw parse_error (_("incomplete conditional - missing '%('"), t);
e92f2566
JS
1024
1025 // preprocessor "else"
1026 if (t->type == tok_operator && t->content == "%:")
1027 {
1028 if (pp == PP_NONE)
2677d2fb 1029 throw parse_error (_("incomplete conditional - missing '%('"), t);
e92f2566 1030 if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
2677d2fb 1031 throw parse_error (_("invalid conditional - duplicate '%:'"), t);
1d94e4e5
SM
1032 // XXX: here and elsewhere, error cascades might be avoided
1033 // by dropping tokens until we reach the closing %)
e92f2566
JS
1034
1035 pp_state.back().second = (pp == PP_KEEP_THEN) ?
1036 PP_SKIP_ELSE : PP_KEEP_ELSE;
1037 delete t;
1038 continue;
1039 }
1040
1041 // preprocessor close
1042 if (t->type == tok_operator && t->content == "%)")
1043 {
1044 if (pp == PP_NONE)
2677d2fb 1045 throw parse_error (_("incomplete conditional - missing '%('"), t);
e92f2566 1046 delete pp_state.back().first;
a07a2c28 1047 delete t; //this is the closing bracket
e92f2566
JS
1048 pp_state.pop_back();
1049 continue;
1050 }
dff50e09 1051
177a8ead
FCE
1052 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1053 return t;
1054
1055 // We have a %( - it's time to throw a preprocessing party!
1056
2d7881bf
PP
1057 bool result = false;
1058 bool and_result = true;
1059 const token *n = NULL;
1060 do {
1061 const token *l, *op, *r;
534aad8b
SM
1062 l = scan_pp1 ();
1063 op = scan_pp1 ();
1064 r = scan_pp1 ();
2d7881bf 1065 if (l == 0 || op == 0 || r == 0)
2677d2fb 1066 throw parse_error (_("incomplete condition after '%('"), t);
2d7881bf
PP
1067 // NB: consider generalizing to consume all tokens until %?, and
1068 // passing that as a vector to an evaluator.
1069
1070 // Do not evaluate the condition if we haven't expanded everything.
1071 // This may occur when having several recursive conditionals.
1072 and_result &= eval_pp_conditional (session, l, op, r);
a07a2c28
LB
1073 if(l->content=="systemtap_v")
1074 systemtap_v_seen=r;
1075
1076 else
1077 delete r;
1078
2d7881bf
PP
1079 delete l;
1080 delete op;
2d7881bf
PP
1081 delete n;
1082
534aad8b 1083 n = scan_pp1 ();
2d7881bf
PP
1084 if (n && n->type == tok_operator && n->content == "&&")
1085 continue;
1086 result |= and_result;
1087 and_result = true;
1088 if (! (n && n->type == tok_operator && n->content == "||"))
1089 break;
1090 } while (true);
3f847830
FCE
1091
1092 /*
1093 clog << "PP eval (" << *t << ") == " << result << endl;
1094 */
1095
e92f2566 1096 const token *m = n;
177a8ead 1097 if (! (m && m->type == tok_operator && m->content == "%?"))
2677d2fb 1098 throw parse_error (_("expected '%?' marker for conditional"), t);
70c743d8 1099 delete m; // "%?"
177a8ead 1100
e92f2566
JS
1101 pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1102 pp_state.push_back (make_pair (t, pp));
3f847830 1103
e92f2566
JS
1104 // Now loop around to look for a real token.
1105 }
1106}
3f847830 1107
3f847830 1108
e92f2566
JS
1109// Skip over tokens and any errors, heeding
1110// only nested preprocessor starts and ends.
1111const token*
1112parser::skip_pp ()
1113{
1114 const token* t = 0;
1115 unsigned nesting = 0;
1116 do
1117 {
1118 try
1119 {
534aad8b 1120 t = scan_pp1 ();
177a8ead 1121 }
e92f2566 1122 catch (const parse_error &e)
70c743d8 1123 {
e92f2566 1124 continue;
70c743d8 1125 }
e92f2566
JS
1126 if (!t)
1127 break;
1128 if (t->type == tok_operator && t->content == "%(")
1129 ++nesting;
1130 else if (nesting && t->type == tok_operator && t->content == "%)")
1131 --nesting;
1132 else if (!nesting && t->type == tok_operator &&
1133 (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1134 break;
1135 delete t;
177a8ead 1136 }
e92f2566
JS
1137 while (true);
1138 return t;
177a8ead
FCE
1139}
1140
1141
2f1a1aea 1142const token*
b5477cd9 1143parser::next ()
2f1a1aea
FCE
1144{
1145 if (! next_t)
b5477cd9 1146 next_t = scan_pp ();
2f1a1aea 1147 if (! next_t)
2677d2fb 1148 throw parse_error (_("unexpected end-of-file"));
2f1a1aea 1149
2f1a1aea
FCE
1150 last_t = next_t;
1151 // advance by zeroing next_t
1152 next_t = 0;
1153 return last_t;
1154}
1155
1156
1157const token*
b5477cd9 1158parser::peek ()
2f1a1aea
FCE
1159{
1160 if (! next_t)
b5477cd9 1161 next_t = scan_pp ();
2f1a1aea
FCE
1162
1163 // don't advance by zeroing next_t
1164 last_t = next_t;
1165 return next_t;
1166}
1167
1168
731a5359
MW
1169void
1170parser::swallow ()
1171{
1172 // can only swallow something last peeked or nexted token.
1173 assert (last_t != 0);
1174 delete last_t;
1175 // advance by zeroing next_t
1176 last_t = next_t = 0;
1177}
1178
1179
d7f3e0c5
GH
1180static inline bool
1181tok_is(token const * t, token_type tt, string const & expected)
1182{
1183 return t && t->type == tt && t->content == expected;
1184}
1185
1186
731a5359 1187void
d7f3e0c5
GH
1188parser::expect_known (token_type tt, string const & expected)
1189{
1190 const token *t = next();
57b73400 1191 if (! (t && t->type == tt && t->content == expected))
ce0f6648 1192 throw parse_error (_F("expected '%s'", expected.c_str()));
731a5359 1193 swallow (); // We are done with it, content was copied.
d7f3e0c5
GH
1194}
1195
1196
731a5359 1197void
d7f3e0c5
GH
1198parser::expect_unknown (token_type tt, string & target)
1199{
1200 const token *t = next();
1201 if (!(t && t->type == tt))
2677d2fb 1202 throw parse_error (_("expected ") + tt2str(tt));
d7f3e0c5 1203 target = t->content;
731a5359 1204 swallow (); // We are done with it, content was copied.
d7f3e0c5
GH
1205}
1206
1207
731a5359 1208void
493ee224
DS
1209parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
1210{
1211 const token *t = next();
1212 if (!(t && (t->type == tt1 || t->type == tt2)))
dd90d565 1213 throw parse_error (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
493ee224 1214 target = t->content;
731a5359 1215 swallow (); // We are done with it, content was copied.
493ee224
DS
1216}
1217
1218
731a5359 1219void
d7f3e0c5
GH
1220parser::expect_op (std::string const & expected)
1221{
731a5359 1222 expect_known (tok_operator, expected);
d7f3e0c5
GH
1223}
1224
1225
731a5359 1226void
d7f3e0c5
GH
1227parser::expect_kw (std::string const & expected)
1228{
731a5359 1229 expect_known (tok_keyword, expected);
d7f3e0c5
GH
1230}
1231
dff50e09 1232const token*
731a5359
MW
1233parser::expect_kw_token (std::string const & expected)
1234{
1235 const token *t = next();
1236 if (! (t && t->type == tok_keyword && t->content == expected))
1237 throw parse_error (_F("expected '%s'", expected.c_str()));
1238 return t;
1239}
1240
1241void
e38723d2 1242parser::expect_number (int64_t & value)
57b73400 1243{
e38723d2
MH
1244 bool neg = false;
1245 const token *t = next();
1246 if (t->type == tok_operator && t->content == "-")
1247 {
1248 neg = true;
731a5359 1249 swallow ();
e38723d2
MH
1250 t = next ();
1251 }
1252 if (!(t && t->type == tok_number))
2677d2fb 1253 throw parse_error (_("expected number"));
e38723d2
MH
1254
1255 const char* startp = t->content.c_str ();
1256 char* endp = (char*) startp;
1257
1258 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1259 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1260 // since the lexer only gives us positive digit strings, but we'll
1261 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1262 errno = 0;
1263 value = (int64_t) strtoull (startp, & endp, 0);
1264 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1265 || (neg && (unsigned long long) value > 9223372036854775808ULL)
1266 || (unsigned long long) value > 18446744073709551615ULL
1267 || value < -9223372036854775807LL-1)
2677d2fb 1268 throw parse_error (_("number invalid or out of range"));
dff50e09 1269
e38723d2
MH
1270 if (neg)
1271 value = -value;
1272
731a5359 1273 swallow (); // We are done with it, content was parsed and copied into value.
57b73400
GH
1274}
1275
d7f3e0c5 1276
dff50e09 1277const token*
50cc7cd5 1278parser::expect_ident_or_atword (std::string & target)
d7f3e0c5 1279{
06219d6f
SM
1280 const token *t = next();
1281
1282 // accept identifiers and operators beginning in '@':
1283 if (!t || (t->type != tok_identifier
1284 && (t->type != tok_operator || t->content[0] != '@')))
1285 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1286 // so the message is accurate, but keep an eye out in the future:
dd90d565 1287 throw parse_error (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
06219d6f
SM
1288
1289 target = t->content;
1290 return t;
d7f3e0c5
GH
1291}
1292
1293
731a5359 1294void
493ee224
DS
1295parser::expect_ident_or_keyword (std::string & target)
1296{
731a5359 1297 expect_unknown2 (tok_identifier, tok_keyword, target);
493ee224
DS
1298}
1299
1300
dff50e09 1301bool
d7f3e0c5
GH
1302parser::peek_op (std::string const & op)
1303{
1304 return tok_is (peek(), tok_operator, op);
1305}
1306
1307
dff50e09 1308bool
d7f3e0c5
GH
1309parser::peek_kw (std::string const & kw)
1310{
1311 return tok_is (peek(), tok_identifier, kw);
1312}
1313
1314
1315
66c7d4c1 1316lexer::lexer (istream& input, const string& in, systemtap_session& s):
03ba36d9
SM
1317 ate_comment(false), ate_whitespace(false), saw_tokens(false),
1318 input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1319 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1320 cursor_column (1), session(s), current_file (0)
eacb10ce 1321{
66c7d4c1 1322 getline(input, input_contents, '\0');
2203b032 1323
66c7d4c1
JS
1324 input_pointer = input_contents.data();
1325 input_end = input_contents.data() + input_contents.size();
1326
1327 if (keywords.empty())
1328 {
3a7ec735
FCE
1329 // NB: adding new keywords is highly disruptive to the language,
1330 // in particular to existing scripts that could be suddenly
1331 // broken. If done at all, it has to be s.compatible-sensitive,
1332 // and broadly advertised.
66c7d4c1
JS
1333 keywords.insert("probe");
1334 keywords.insert("global");
1335 keywords.insert("function");
1336 keywords.insert("if");
1337 keywords.insert("else");
1338 keywords.insert("for");
1339 keywords.insert("foreach");
1340 keywords.insert("in");
1341 keywords.insert("limit");
1342 keywords.insert("return");
1343 keywords.insert("delete");
1344 keywords.insert("while");
1345 keywords.insert("break");
1346 keywords.insert("continue");
1347 keywords.insert("next");
1348 keywords.insert("string");
1349 keywords.insert("long");
f4fe2e93
FCE
1350 keywords.insert("try");
1351 keywords.insert("catch");
66c7d4c1 1352 }
2524d1fd
SM
1353
1354 if (atwords.empty())
1355 {
1356 // NB: adding new @words is mildly disruptive to existing
1357 // scripts that define macros with the same name, but not
1358 // really. The user will merely receive a warning that they are
1359 // redefining an existing operator.
1360 atwords.insert("@cast");
1361 atwords.insert("@defined");
1362 atwords.insert("@entry");
1363 atwords.insert("@var");
1364 atwords.insert("@avg");
1365 atwords.insert("@count");
1366 atwords.insert("@sum");
1367 atwords.insert("@min");
1368 atwords.insert("@max");
1369 atwords.insert("@hist_linear");
1370 atwords.insert("@hist_log");
1371 }
eacb10ce 1372}
2f1a1aea 1373
66c7d4c1 1374set<string> lexer::keywords;
2524d1fd 1375set<string> lexer::atwords;
66c7d4c1 1376
1b1b4ceb
RA
1377void
1378lexer::set_current_file (stapfile* f)
1379{
1380 current_file = f;
2203b032
JS
1381 if (f)
1382 {
1383 f->file_contents = input_contents;
1384 f->name = input_name;
1385 }
1b1b4ceb 1386}
bb2e3076
FCE
1387
1388int
1389lexer::input_peek (unsigned n)
1390{
66c7d4c1
JS
1391 if (input_pointer + n >= input_end)
1392 return -1; // EOF
1393 return (unsigned char)*(input_pointer + n);
bb2e3076
FCE
1394}
1395
1396
dff50e09 1397int
2f1a1aea
FCE
1398lexer::input_get ()
1399{
66c7d4c1 1400 int c = input_peek();
bb2e3076
FCE
1401 if (c < 0) return c; // EOF
1402
66c7d4c1
JS
1403 ++input_pointer;
1404
3f99432c 1405 if (cursor_suspend_count)
9300f661
JS
1406 {
1407 // Track effect of input_put: preserve previous cursor/line_column
1408 // until all of its characters are consumed.
1409 if (--cursor_suspend_count == 0)
1410 {
1411 cursor_line = cursor_suspend_line;
1412 cursor_column = cursor_suspend_column;
1413 }
1414 }
3f99432c 1415 else
2f1a1aea 1416 {
3f99432c
FCE
1417 // update source cursor
1418 if (c == '\n')
1419 {
1420 cursor_line ++;
1421 cursor_column = 1;
1422 }
1423 else
1424 cursor_column ++;
2f1a1aea 1425 }
2f1a1aea 1426
eacb10ce 1427 // clog << "[" << (char)c << "]";
2f1a1aea
FCE
1428 return c;
1429}
1430
1431
3f99432c 1432void
9300f661 1433lexer::input_put (const string& chars, const token* t)
3f99432c 1434{
66c7d4c1
JS
1435 size_t pos = input_pointer - input_contents.data();
1436 // clog << "[put:" << chars << " @" << pos << "]";
1437 input_contents.insert (pos, chars);
eacb10ce 1438 cursor_suspend_count += chars.size();
9300f661
JS
1439 cursor_suspend_line = cursor_line;
1440 cursor_suspend_column = cursor_column;
1441 cursor_line = t->location.line;
1442 cursor_column = t->location.column;
66c7d4c1
JS
1443 input_pointer = input_contents.data() + pos;
1444 input_end = input_contents.data() + input_contents.size();
3f99432c
FCE
1445}
1446
1447
2f1a1aea 1448token*
b5477cd9 1449lexer::scan ()
2f1a1aea 1450{
fee28e5c 1451 ate_comment = false; // reset for each new token
b5477cd9 1452 ate_whitespace = false; // reset for each new token
534aad8b
SM
1453
1454 // XXX be very sure to restore old_saw_tokens if we return without a token:
1455 bool old_saw_tokens = saw_tokens;
1456 saw_tokens = true;
1457
2f1a1aea 1458 token* n = new token;
2203b032 1459 n->location.file = current_file;
534aad8b 1460 n->chain = NULL; // important safety dance
2f1a1aea 1461
9300f661
JS
1462skip:
1463 bool suspended = (cursor_suspend_count > 0);
2f1a1aea
FCE
1464 n->location.line = cursor_line;
1465 n->location.column = cursor_column;
1466
1467 int c = input_get();
3f99432c 1468 // clog << "{" << (char)c << (char)c2 << "}";
2f1a1aea
FCE
1469 if (c < 0)
1470 {
1471 delete n;
534aad8b 1472 saw_tokens = old_saw_tokens;
2f1a1aea
FCE
1473 return 0;
1474 }
1475
1476 if (isspace (c))
b5477cd9
SM
1477 {
1478 ate_whitespace = true;
1479 goto skip;
1480 }
2f1a1aea 1481
66c7d4c1
JS
1482 int c2 = input_peek ();
1483
3f99432c
FCE
1484 // Paste command line arguments as character streams into
1485 // the beginning of a token. $1..$999 go through as raw
1486 // characters; @1..@999 are quoted/escaped as strings.
1487 // $# and @# expand to the number of arguments, similarly
1488 // raw or quoted.
9300f661 1489 if ((c == '$' || c == '@') && (c2 == '#'))
3f99432c 1490 {
9300f661
JS
1491 n->content.push_back (c);
1492 n->content.push_back (c2);
3f99432c 1493 input_get(); // swallow '#'
9300f661 1494 if (suspended)
16fc963f
SM
1495 {
1496 n->make_junk(_("invalid nested substitution of command line arguments"));
1497 return n;
1498 }
9300f661
JS
1499 size_t num_args = session.args.size ();
1500 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1501 n->content.clear();
1502 goto skip;
3f99432c 1503 }
9300f661 1504 else if ((c == '$' || c == '@') && (isdigit (c2)))
3f99432c 1505 {
9300f661 1506 n->content.push_back (c);
3f99432c
FCE
1507 unsigned idx = 0;
1508 do
1509 {
1510 input_get ();
1511 idx = (idx * 10) + (c2 - '0');
9300f661 1512 n->content.push_back (c2);
3f99432c
FCE
1513 c2 = input_peek ();
1514 } while (c2 > 0 &&
dff50e09 1515 isdigit (c2) &&
3f99432c 1516 idx <= session.args.size()); // prevent overflow
16fc963f
SM
1517 if (suspended)
1518 {
1519 n->make_junk(_("invalid nested substitution of command line arguments"));
1520 return n;
1521 }
3f99432c
FCE
1522 if (idx == 0 ||
1523 idx-1 >= session.args.size())
16fc963f
SM
1524 {
1525 n->make_junk(_F("command line argument index %lu out of range [1-%lu]",
1526 (unsigned long) idx, (unsigned long) session.args.size()));
1527 return n;
1528 }
9300f661
JS
1529 const string& arg = session.args[idx-1];
1530 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1531 n->content.clear();
1532 goto skip;
3f99432c
FCE
1533 }
1534
b5477cd9 1535 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
2f1a1aea
FCE
1536 {
1537 n->type = tok_identifier;
1538 n->content = (char) c;
b5477cd9 1539 while (isalnum (c2) || c2 == '_' || c2 == '$')
2f1a1aea 1540 {
3f99432c
FCE
1541 input_get ();
1542 n->content.push_back (c2);
1543 c2 = input_peek ();
6e213f58 1544 }
213bee8f 1545
66c7d4c1 1546 if (keywords.count(n->content))
3f99432c 1547 n->type = tok_keyword;
06219d6f 1548 else if (n->content[0] == '@')
dd90d565 1549 // makes it easier to detect illegal use of @words:
06219d6f 1550 n->type = tok_operator;
dff50e09 1551
2f1a1aea
FCE
1552 return n;
1553 }
1554
3a20432b 1555 else if (isdigit (c)) // positive literal
2f1a1aea 1556 {
2f1a1aea 1557 n->type = tok_number;
9c0c0e46
FCE
1558 n->content = (char) c;
1559
66c7d4c1 1560 while (isalnum (c2))
2f1a1aea 1561 {
9c0c0e46
FCE
1562 // NB: isalnum is very permissive. We rely on strtol, called in
1563 // parser::parse_literal below, to confirm that the number string
1564 // is correctly formatted and in range.
1565
66c7d4c1
JS
1566 input_get ();
1567 n->content.push_back (c2);
1568 c2 = input_peek ();
2f1a1aea
FCE
1569 }
1570 return n;
1571 }
1572
1573 else if (c == '\"')
1574 {
1575 n->type = tok_string;
1576 while (1)
1577 {
1578 c = input_get ();
1579
3f99432c 1580 if (c < 0 || c == '\n')
2f1a1aea 1581 {
16fc963f
SM
1582 n->make_junk(_("Could not find matching closing quote"));
1583 return n;
2f1a1aea
FCE
1584 }
1585 if (c == '\"') // closing double-quotes
1586 break;
3f99432c 1587 else if (c == '\\') // see also input_put
dff50e09 1588 {
7d46afb8
GH
1589 c = input_get ();
1590 switch (c)
1591 {
ef8a6134
SM
1592 case 'x':
1593 if (strverscmp(session.compatible.c_str(), "2.3") < 0)
1594 goto the_default;
7d46afb8
GH
1595 case 'a':
1596 case 'b':
1597 case 't':
1598 case 'n':
1599 case 'v':
1600 case 'f':
1601 case 'r':
f03954fd 1602 case '0' ... '7': // NB: need only match the first digit
7d46afb8 1603 case '\\':
7d46afb8 1604 // Pass these escapes through to the string value
dff50e09 1605 // being parsed; it will be emitted into a C literal.
c7c8d469
FCE
1606 // XXX: PR13371: perhaps we should evaluate them here
1607 // (and re-quote them during translate.cxx emission).
7d46afb8
GH
1608 n->content.push_back('\\');
1609
3f99432c 1610 // fall through
ef8a6134
SM
1611 default: the_default:
1612 n->content.push_back(c);
1613 break;
7d46afb8 1614 }
2f1a1aea
FCE
1615 }
1616 else
1617 n->content.push_back(c);
1618 }
1619 return n;
1620 }
1621
1622 else if (ispunct (c))
1623 {
bb2e3076 1624 int c3 = input_peek (1);
2f1a1aea 1625
3a20432b
FCE
1626 // NB: if we were to recognize negative numeric literals here,
1627 // we'd introduce another grammar ambiguity:
1628 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1629 // instead of tok_number(1) tok_operator('-') tok_number(1)
1630
66c7d4c1 1631 if (c == '#') // shell comment
2f1a1aea
FCE
1632 {
1633 unsigned this_line = cursor_line;
bb2e3076
FCE
1634 do { c = input_get (); }
1635 while (c >= 0 && cursor_line == this_line);
fee28e5c 1636 ate_comment = true;
b5477cd9 1637 ate_whitespace = true;
2f1a1aea
FCE
1638 goto skip;
1639 }
66c7d4c1 1640 else if ((c == '/' && c2 == '/')) // C++ comment
63a7c90e
FCE
1641 {
1642 unsigned this_line = cursor_line;
bb2e3076
FCE
1643 do { c = input_get (); }
1644 while (c >= 0 && cursor_line == this_line);
fee28e5c 1645 ate_comment = true;
b5477cd9 1646 ate_whitespace = true;
63a7c90e
FCE
1647 goto skip;
1648 }
1649 else if (c == '/' && c2 == '*') // C comment
1650 {
66c7d4c1
JS
1651 (void) input_get (); // swallow '*' already in c2
1652 c = input_get ();
63a7c90e 1653 c2 = input_get ();
bb2e3076 1654 while (c2 >= 0)
63a7c90e 1655 {
66c7d4c1
JS
1656 if (c == '*' && c2 == '/')
1657 break;
63a7c90e
FCE
1658 c = c2;
1659 c2 = input_get ();
63a7c90e 1660 }
fee28e5c 1661 ate_comment = true;
b5477cd9 1662 ate_whitespace = true;
bb2e3076 1663 goto skip;
63a7c90e 1664 }
54dfabe9
FCE
1665 else if (c == '%' && c2 == '{') // embedded code
1666 {
1667 n->type = tok_embedded;
1668 (void) input_get (); // swallow '{' already in c2
66c7d4c1
JS
1669 c = input_get ();
1670 c2 = input_get ();
1671 while (c2 >= 0)
54dfabe9 1672 {
66c7d4c1
JS
1673 if (c == '%' && c2 == '}')
1674 return n;
ebbf9df4
FCE
1675 if (c == '}' && c2 == '%') // possible typo
1676 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
54dfabe9 1677 n->content += c;
66c7d4c1
JS
1678 c = c2;
1679 c2 = input_get ();
54dfabe9 1680 }
72cdb9cd 1681
16fc963f
SM
1682 n->make_junk(_("Could not find matching '%}' to close embedded function block"));
1683 return n;
54dfabe9 1684 }
2f1a1aea 1685
bb2e3076
FCE
1686 // We're committed to recognizing at least the first character
1687 // as an operator.
2f1a1aea 1688 n->type = tok_operator;
66c7d4c1 1689 n->content = c;
2f1a1aea 1690
bb2e3076 1691 // match all valid operators, in decreasing size order
66c7d4c1
JS
1692 if ((c == '<' && c2 == '<' && c3 == '<') ||
1693 (c == '<' && c2 == '<' && c3 == '=') ||
1694 (c == '>' && c2 == '>' && c3 == '='))
82919855 1695 {
66c7d4c1
JS
1696 n->content += c2;
1697 n->content += c3;
bb2e3076
FCE
1698 input_get (); input_get (); // swallow other two characters
1699 }
66c7d4c1
JS
1700 else if ((c == '=' && c2 == '=') ||
1701 (c == '!' && c2 == '=') ||
1702 (c == '<' && c2 == '=') ||
1703 (c == '>' && c2 == '=') ||
93daaca8
SM
1704 (c == '=' && c2 == '~') ||
1705 (c == '!' && c2 == '~') ||
66c7d4c1
JS
1706 (c == '+' && c2 == '=') ||
1707 (c == '-' && c2 == '=') ||
1708 (c == '*' && c2 == '=') ||
1709 (c == '/' && c2 == '=') ||
1710 (c == '%' && c2 == '=') ||
1711 (c == '&' && c2 == '=') ||
1712 (c == '^' && c2 == '=') ||
1713 (c == '|' && c2 == '=') ||
1714 (c == '.' && c2 == '=') ||
1715 (c == '&' && c2 == '&') ||
1716 (c == '|' && c2 == '|') ||
1717 (c == '+' && c2 == '+') ||
1718 (c == '-' && c2 == '-') ||
1719 (c == '-' && c2 == '>') ||
1720 (c == '<' && c2 == '<') ||
1721 (c == '>' && c2 == '>') ||
177a8ead 1722 // preprocessor tokens
66c7d4c1
JS
1723 (c == '%' && c2 == '(') ||
1724 (c == '%' && c2 == '?') ||
1725 (c == '%' && c2 == ':') ||
1726 (c == '%' && c2 == ')'))
bb2e3076 1727 {
66c7d4c1 1728 n->content += c2;
bb2e3076 1729 input_get (); // swallow other character
dff50e09 1730 }
2f1a1aea
FCE
1731
1732 return n;
1733 }
1734
1735 else
1736 {
1737 n->type = tok_junk;
e3795795
FCE
1738 ostringstream s;
1739 s << "\\x" << hex << setw(2) << setfill('0') << c;
1740 n->content = s.str();
16fc963f 1741 n->msg = ""; // signal parser to emit "expected X, found junk" type error
2f1a1aea
FCE
1742 return n;
1743 }
1744}
1745
16fc963f
SM
1746// ------------------------------------------------------------------------
1747
1748void
1749token::make_junk (const string new_msg)
1750{
1751 type = tok_junk;
1752 msg = new_msg;
1753}
2f1a1aea
FCE
1754
1755// ------------------------------------------------------------------------
1756
1757stapfile*
1758parser::parse ()
1759{
1760 stapfile* f = new stapfile;
1b1b4ceb 1761 input.set_current_file (f);
56099f08
FCE
1762
1763 bool empty = true;
1764
2f1a1aea
FCE
1765 while (1)
1766 {
1767 try
1768 {
a07a2c28 1769 systemtap_v_seen = 0;
2f1a1aea 1770 const token* t = peek ();
534aad8b 1771 if (! t) // nice clean EOF, modulo any preprocessing that occurred
2f1a1aea
FCE
1772 break;
1773
56099f08 1774 empty = false;
6e213f58
DS
1775 if (t->type == tok_keyword && t->content == "probe")
1776 {
1777 context = con_probe;
1778 parse_probe (f->probes, f->aliases);
1779 }
1780 else if (t->type == tok_keyword && t->content == "global")
1781 {
1782 context = con_global;
4b5f3e45 1783 parse_global (f->globals, f->probes);
6e213f58
DS
1784 }
1785 else if (t->type == tok_keyword && t->content == "function")
1786 {
1787 context = con_function;
1788 parse_functiondecl (f->functions);
1789 }
54dfabe9 1790 else if (t->type == tok_embedded)
6e213f58
DS
1791 {
1792 context = con_embedded;
1793 f->embeds.push_back (parse_embeddedcode ());
1794 }
2f1a1aea 1795 else
6e213f58
DS
1796 {
1797 context = con_unknown;
2677d2fb 1798 throw parse_error (_("expected 'probe', 'global', 'function', or '%{'"));
6e213f58 1799 }
2f1a1aea
FCE
1800 }
1801 catch (parse_error& pe)
1802 {
1803 print_error (pe);
16fc963f
SM
1804
1805 // XXX: do we want tok_junk to be able to force skip_some behaviour?
cd7116b8 1806 if (pe.skip_some) // for recovery
46954f1d
FCE
1807 // Quietly swallow all tokens until the next keyword we can start parsing from.
1808 while (1)
1809 try
1810 {
cd7116b8
FCE
1811 {
1812 const token* t = peek ();
1813 if (! t)
1814 break;
46954f1d
FCE
1815 if (t->type == tok_keyword && t->content == "probe") break;
1816 else if (t->type == tok_keyword && t->content == "global") break;
1817 else if (t->type == tok_keyword && t->content == "function") break;
1818 else if (t->type == tok_embedded) break;
731a5359 1819 swallow (); // swallow it
cd7116b8 1820 }
46954f1d
FCE
1821 }
1822 catch (parse_error& pe2)
1823 {
1824 // parse error during recovery ... ugh
1825 print_error (pe2);
1826 }
177a8ead 1827 }
2f1a1aea
FCE
1828 }
1829
56099f08
FCE
1830 if (empty)
1831 {
534aad8b
SM
1832 // vary message depending on whether file was *actually* empty:
1833 cerr << (input.saw_tokens
1834 ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
4cd32d8c 1835 : _F("Input file '%s' is empty.", input_name.c_str()))
534aad8b 1836 << endl;
56099f08 1837 delete f;
2203b032 1838 f = 0;
56099f08
FCE
1839 }
1840 else if (num_errors > 0)
2f1a1aea 1841 {
52c2652f 1842 cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
2f1a1aea 1843 delete f;
2203b032 1844 f = 0;
2f1a1aea 1845 }
dff50e09 1846
2203b032 1847 input.set_current_file(0);
2f1a1aea
FCE
1848 return f;
1849}
1850
1851
20c6c071 1852void
54dfabe9
FCE
1853parser::parse_probe (std::vector<probe *> & probe_ret,
1854 std::vector<probe_alias *> & alias_ret)
2f1a1aea 1855{
82919855 1856 const token* t0 = next ();
6e213f58 1857 if (! (t0->type == tok_keyword && t0->content == "probe"))
2677d2fb 1858 throw parse_error (_("expected 'probe'"));
82919855 1859
20c6c071
GH
1860 vector<probe_point *> aliases;
1861 vector<probe_point *> locations;
1862
1863 bool equals_ok = true;
82919855 1864
97266278
LG
1865 int epilogue_alias = 0;
1866
2f1a1aea
FCE
1867 while (1)
1868 {
b4ceace2 1869 probe_point * pp = parse_probe_point ();
dff50e09 1870
b4ceace2 1871 const token* t = peek ();
dff50e09 1872 if (equals_ok && t
b4ceace2
FCE
1873 && t->type == tok_operator && t->content == "=")
1874 {
1ad820e3 1875 if (pp->optional || pp->sufficient)
2677d2fb 1876 throw parse_error (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
b4ceace2 1877 aliases.push_back(pp);
731a5359 1878 swallow ();
b4ceace2
FCE
1879 continue;
1880 }
dff50e09 1881 else if (equals_ok && t
97266278
LG
1882 && t->type == tok_operator && t->content == "+=")
1883 {
1ad820e3 1884 if (pp->optional || pp->sufficient)
2677d2fb 1885 throw parse_error (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
97266278
LG
1886 aliases.push_back(pp);
1887 epilogue_alias = 1;
731a5359 1888 swallow ();
97266278
LG
1889 continue;
1890 }
b4ceace2
FCE
1891 else if (t && t->type == tok_operator && t->content == ",")
1892 {
1893 locations.push_back(pp);
1894 equals_ok = false;
731a5359 1895 swallow ();
b4ceace2
FCE
1896 continue;
1897 }
1898 else if (t && t->type == tok_operator && t->content == "{")
1899 {
1900 locations.push_back(pp);
1901 break;
1902 }
2f1a1aea 1903 else
2677d2fb 1904 throw parse_error (_("expected probe point specifier"));
2f1a1aea 1905 }
20c6c071 1906
20c6c071
GH
1907 if (aliases.empty())
1908 {
54dfabe9
FCE
1909 probe* p = new probe;
1910 p->tok = t0;
1911 p->locations = locations;
1912 p->body = parse_stmt_block ();
37ebca01 1913 p->privileged = privileged;
a07a2c28 1914 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 1915 probe_ret.push_back (p);
20c6c071
GH
1916 }
1917 else
1918 {
54dfabe9 1919 probe_alias* p = new probe_alias (aliases);
97266278
LG
1920 if(epilogue_alias)
1921 p->epilogue_style = true;
1922 else
1923 p->epilogue_style = false;
54dfabe9
FCE
1924 p->tok = t0;
1925 p->locations = locations;
1926 p->body = parse_stmt_block ();
37ebca01 1927 p->privileged = privileged;
a07a2c28 1928 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 1929 alias_ret.push_back (p);
20c6c071 1930 }
54dfabe9 1931}
20c6c071 1932
54dfabe9
FCE
1933
1934embeddedcode*
1935parser::parse_embeddedcode ()
1936{
1937 embeddedcode* e = new embeddedcode;
1938 const token* t = next ();
1939 if (t->type != tok_embedded)
2677d2fb 1940 throw parse_error (_("expected '%{'"));
24cb178f
FCE
1941
1942 if (! privileged)
efb02738 1943 throw parse_error (_("embedded code in unprivileged script; need stap -g"),
cd7116b8 1944 false /* don't skip tokens for parse resumption */);
54dfabe9
FCE
1945
1946 e->tok = t;
1947 e->code = t->content;
1948 return e;
2f1a1aea
FCE
1949}
1950
1951
1952block*
56099f08 1953parser::parse_stmt_block ()
2f1a1aea
FCE
1954{
1955 block* pb = new block;
1956
56099f08
FCE
1957 const token* t = next ();
1958 if (! (t->type == tok_operator && t->content == "{"))
2677d2fb 1959 throw parse_error (_("expected '{'"));
56099f08
FCE
1960
1961 pb->tok = t;
2b066ec1 1962
2f1a1aea
FCE
1963 while (1)
1964 {
46954f1d
FCE
1965 t = peek ();
1966 if (t && t->type == tok_operator && t->content == "}")
1967 {
731a5359 1968 swallow ();
46954f1d
FCE
1969 break;
1970 }
1971 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
1972 }
1973
1974 return pb;
1975}
1976
1977
f4fe2e93
FCE
1978try_block*
1979parser::parse_try_block ()
1980{
1981 try_block* pb = new try_block;
1982
731a5359 1983 pb->tok = expect_kw_token ("try");
f4fe2e93
FCE
1984 pb->try_block = parse_stmt_block();
1985 expect_kw ("catch");
1986
1987 const token* t = peek ();
3819d181 1988 if (t != NULL && t->type == tok_operator && t->content == "(")
f4fe2e93 1989 {
731a5359 1990 swallow (); // swallow the '('
f4fe2e93
FCE
1991
1992 t = next();
1993 if (! (t->type == tok_identifier))
2677d2fb 1994 throw parse_error (_("expected identifier"));
f4fe2e93
FCE
1995 symbol* sym = new symbol;
1996 sym->tok = t;
1997 sym->name = t->content;
1998 pb->catch_error_var = sym;
1999
2000 expect_op (")");
2001 }
2002 else
2003 pb->catch_error_var = 0;
2004
2005 pb->catch_block = parse_stmt_block();
2006
2007 return pb;
2008}
2009
2010
2011
2f1a1aea
FCE
2012statement*
2013parser::parse_statement ()
2014{
40b71c47 2015 statement *ret;
2f1a1aea
FCE
2016 const token* t = peek ();
2017 if (t && t->type == tok_operator && t->content == ";")
f946b10f 2018 return new null_statement (next ());
dff50e09 2019 else if (t && t->type == tok_operator && t->content == "{")
40b71c47 2020 return parse_stmt_block (); // Don't squash semicolons.
f4fe2e93
FCE
2021 else if (t && t->type == tok_keyword && t->content == "try")
2022 return parse_try_block (); // Don't squash semicolons.
6e213f58 2023 else if (t && t->type == tok_keyword && t->content == "if")
40b71c47 2024 return parse_if_statement (); // Don't squash semicolons.
6e213f58 2025 else if (t && t->type == tok_keyword && t->content == "for")
40b71c47 2026 return parse_for_loop (); // Don't squash semicolons.
6e213f58 2027 else if (t && t->type == tok_keyword && t->content == "foreach")
40b71c47
MW
2028 return parse_foreach_loop (); // Don't squash semicolons.
2029 else if (t && t->type == tok_keyword && t->content == "while")
2030 return parse_while_loop (); // Don't squash semicolons.
6e213f58 2031 else if (t && t->type == tok_keyword && t->content == "return")
40b71c47 2032 ret = parse_return_statement ();
6e213f58 2033 else if (t && t->type == tok_keyword && t->content == "delete")
40b71c47 2034 ret = parse_delete_statement ();
6e213f58 2035 else if (t && t->type == tok_keyword && t->content == "break")
40b71c47 2036 ret = parse_break_statement ();
6e213f58 2037 else if (t && t->type == tok_keyword && t->content == "continue")
40b71c47 2038 ret = parse_continue_statement ();
6e213f58 2039 else if (t && t->type == tok_keyword && t->content == "next")
40b71c47 2040 ret = parse_next_statement ();
2f1a1aea
FCE
2041 else if (t && (t->type == tok_operator || // expressions are flexible
2042 t->type == tok_identifier ||
2043 t->type == tok_number ||
7d902887
FCE
2044 t->type == tok_string ||
2045 t->type == tok_embedded ))
40b71c47 2046 ret = parse_expr_statement ();
54dfabe9 2047 // XXX: consider generally accepting tok_embedded here too
2f1a1aea 2048 else
2677d2fb 2049 throw parse_error (_("expected statement"));
40b71c47
MW
2050
2051 // Squash "empty" trailing colons after any "non-block-like" statement.
2052 t = peek ();
2053 if (t && t->type == tok_operator && t->content == ";")
2054 {
731a5359 2055 swallow (); // Silently eat trailing ; after statement
40b71c47
MW
2056 }
2057
2058 return ret;
2f1a1aea
FCE
2059}
2060
2061
56099f08 2062void
78f6bba6 2063parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
2f1a1aea 2064{
82919855 2065 const token* t0 = next ();
6e213f58 2066 if (! (t0->type == tok_keyword && t0->content == "global"))
2677d2fb 2067 throw parse_error (_("expected 'global'"));
731a5359 2068 swallow ();
82919855 2069
56099f08
FCE
2070 while (1)
2071 {
2072 const token* t = next ();
2073 if (! (t->type == tok_identifier))
2677d2fb 2074 throw parse_error (_("expected identifier"));
56099f08 2075
2b066ec1
FCE
2076 for (unsigned i=0; i<globals.size(); i++)
2077 if (globals[i]->name == t->content)
2677d2fb 2078 throw parse_error (_("duplicate global name"));
dff50e09 2079
24cb178f
FCE
2080 vardecl* d = new vardecl;
2081 d->name = t->content;
2082 d->tok = t;
a07a2c28 2083 d->systemtap_v_conditional = systemtap_v_seen;
24cb178f 2084 globals.push_back (d);
56099f08 2085
82919855 2086 t = peek ();
ef474d24 2087
74e6cc92
CM
2088 if(t && t->type == tok_operator && t->content == "%") //wrapping
2089 {
2090 d->wrap = true;
731a5359 2091 swallow ();
74e6cc92
CM
2092 t = peek();
2093 }
2094
ef474d24
JS
2095 if (t && t->type == tok_operator && t->content == "[") // array size
2096 {
2097 int64_t size;
731a5359 2098 swallow ();
ef474d24
JS
2099 expect_number(size);
2100 if (size <= 0 || size > 1000000) // arbitrary max
2677d2fb 2101 throw parse_error(_("array size out of range"));
ef474d24
JS
2102 d->maxsize = (int)size;
2103 expect_known(tok_operator, "]");
2104 t = peek ();
2105 }
2106
4b5f3e45 2107 if (t && t->type == tok_operator && t->content == "=") // initialization
ef474d24
JS
2108 {
2109 if (!d->compatible_arity(0))
2677d2fb 2110 throw parse_error(_("only scalar globals can be initialized"));
58701b78 2111 d->set_arity(0, t);
731a5359 2112 next (); // Don't swallow, set_arity() used the peeked token.
ef474d24
JS
2113 d->init = parse_literal ();
2114 d->type = d->init->type;
2115 t = peek ();
2116 }
4b5f3e45 2117
c3799d72 2118 if (t && t->type == tok_operator && t->content == ";") // termination
950da622 2119 {
731a5359 2120 swallow ();
950da622
MW
2121 break;
2122 }
c3799d72 2123
4b5f3e45 2124 if (t && t->type == tok_operator && t->content == ",") // next global
82919855 2125 {
731a5359 2126 swallow ();
82919855
FCE
2127 continue;
2128 }
56099f08 2129 else
82919855 2130 break;
56099f08
FCE
2131 }
2132}
2133
2134
24cb178f
FCE
2135void
2136parser::parse_functiondecl (std::vector<functiondecl*>& functions)
56099f08 2137{
82919855 2138 const token* t = next ();
6e213f58 2139 if (! (t->type == tok_keyword && t->content == "function"))
2677d2fb 2140 throw parse_error (_("expected 'function'"));
731a5359 2141 swallow ();
56099f08 2142
82919855 2143 t = next ();
6e213f58
DS
2144 if (! (t->type == tok_identifier)
2145 && ! (t->type == tok_keyword
2146 && (t->content == "string" || t->content == "long")))
2677d2fb 2147 throw parse_error (_("expected identifier"));
24cb178f
FCE
2148
2149 for (unsigned i=0; i<functions.size(); i++)
2150 if (functions[i]->name == t->content)
2677d2fb 2151 throw parse_error (_("duplicate function name"));
24cb178f
FCE
2152
2153 functiondecl *fd = new functiondecl ();
56099f08
FCE
2154 fd->name = t->content;
2155 fd->tok = t;
2156
2157 t = next ();
6a505121
FCE
2158 if (t->type == tok_operator && t->content == ":")
2159 {
731a5359 2160 swallow ();
6a505121 2161 t = next ();
6e213f58 2162 if (t->type == tok_keyword && t->content == "string")
6a505121 2163 fd->type = pe_string;
6e213f58 2164 else if (t->type == tok_keyword && t->content == "long")
6a505121 2165 fd->type = pe_long;
2677d2fb 2166 else throw parse_error (_("expected 'string' or 'long'"));
731a5359 2167 swallow ();
6a505121
FCE
2168
2169 t = next ();
2170 }
2171
56099f08 2172 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2173 throw parse_error (_("expected '('"));
731a5359 2174 swallow ();
56099f08
FCE
2175
2176 while (1)
2177 {
2178 t = next ();
2179
100a540e 2180 // permit zero-argument functions
56099f08 2181 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2182 {
2183 swallow ();
2184 break;
2185 }
56099f08 2186 else if (! (t->type == tok_identifier))
2677d2fb 2187 throw parse_error (_("expected identifier"));
56099f08
FCE
2188 vardecl* vd = new vardecl;
2189 vd->name = t->content;
2190 vd->tok = t;
2191 fd->formal_args.push_back (vd);
a07a2c28 2192 fd->systemtap_v_conditional = systemtap_v_seen;
56099f08
FCE
2193
2194 t = next ();
6a505121
FCE
2195 if (t->type == tok_operator && t->content == ":")
2196 {
731a5359 2197 swallow ();
6a505121 2198 t = next ();
6e213f58 2199 if (t->type == tok_keyword && t->content == "string")
6a505121 2200 vd->type = pe_string;
6e213f58 2201 else if (t->type == tok_keyword && t->content == "long")
6a505121 2202 vd->type = pe_long;
2677d2fb 2203 else throw parse_error (_("expected 'string' or 'long'"));
731a5359 2204 swallow ();
6a505121
FCE
2205 t = next ();
2206 }
56099f08 2207 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2208 {
2209 swallow ();
2210 break;
2211 }
56099f08 2212 if (t->type == tok_operator && t->content == ",")
731a5359
MW
2213 {
2214 swallow ();
2215 continue;
2216 }
56099f08 2217 else
2677d2fb 2218 throw parse_error (_("expected ',' or ')'"));
56099f08
FCE
2219 }
2220
54dfabe9
FCE
2221 t = peek ();
2222 if (t && t->type == tok_embedded)
2223 fd->body = parse_embeddedcode ();
2224 else
2225 fd->body = parse_stmt_block ();
24cb178f
FCE
2226
2227 functions.push_back (fd);
2f1a1aea
FCE
2228}
2229
2230
9c0c0e46
FCE
2231probe_point*
2232parser::parse_probe_point ()
2f1a1aea 2233{
9c0c0e46 2234 probe_point* pl = new probe_point;
2f1a1aea 2235
9c0c0e46 2236 while (1)
2f1a1aea 2237 {
b5477cd9 2238 const token* t = next ();
6e213f58
DS
2239 if (! (t->type == tok_identifier
2240 // we must allow ".return" and ".function", which are keywords
b5477cd9
SM
2241 || t->type == tok_keyword
2242 // we must allow "*", due to being an operator
2243 || (t->type == tok_operator && t->content == "*")))
2677d2fb 2244 throw parse_error (_("expected identifier or '*'"));
9c0c0e46 2245
b5477cd9
SM
2246 // loop which reconstitutes an identifier with wildcards
2247 string content = t->content;
2248 while (1)
2249 {
2250 const token* u = peek();
3819d181
MW
2251 if (u == NULL)
2252 break;
b5477cd9
SM
2253 // ensure pieces of the identifier are adjacent:
2254 if (input.ate_whitespace)
2255 break;
2256 // ensure pieces of the identifier are valid:
2257 if (! (u->type == tok_identifier
2258 // we must allow arbitrary keywords with a wildcard
2259 || u->type == tok_keyword
2260 // we must allow "*", due to being an operator
2261 || (u->type == tok_operator && u->content == "*")))
2262 break;
2263
2264 // append u to t
2265 content = content + u->content;
2266
2267 // consume u
731a5359 2268 swallow ();
b5477cd9 2269 }
534aad8b
SM
2270 // get around const-ness of t:
2271 token* new_t = new token(*t);
b5477cd9
SM
2272 new_t->content = content;
2273 delete t; t = new_t;
9c0c0e46
FCE
2274
2275 probe_point::component* c = new probe_point::component;
2276 c->functor = t->content;
f1a0157a 2277 c->tok = t;
9c0c0e46 2278 pl->components.push_back (c);
6e3347a9 2279 // NB we may add c->arg soon
9c0c0e46
FCE
2280
2281 t = peek ();
a477f3f1 2282
6e3347a9 2283 // consume optional parameter
9c0c0e46
FCE
2284 if (t && t->type == tok_operator && t->content == "(")
2285 {
731a5359 2286 swallow (); // consume "("
9c0c0e46
FCE
2287 c->arg = parse_literal ();
2288
2289 t = next ();
2290 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 2291 throw parse_error (_("expected ')'"));
731a5359 2292 swallow ();
9c0c0e46
FCE
2293
2294 t = peek ();
9c0c0e46 2295 }
9c0c0e46
FCE
2296
2297 if (t && t->type == tok_operator && t->content == ".")
6e3347a9 2298 {
731a5359 2299 swallow ();
6e3347a9
FCE
2300 continue;
2301 }
2302
f1a0157a 2303 // We only fall through here at the end of a probe point (past
6e3347a9
FCE
2304 // all the dotted/parametrized components).
2305
d898100a
FCE
2306 if (t && t->type == tok_operator &&
2307 (t->content == "?" || t->content == "!"))
6e3347a9
FCE
2308 {
2309 pl->optional = true;
d898100a
FCE
2310 if (t->content == "!") pl->sufficient = true;
2311 // NB: sufficient implies optional
731a5359 2312 swallow ();
6e3347a9
FCE
2313 t = peek ();
2314 // fall through
cbbe8080
MH
2315 }
2316
2317 if (t && t->type == tok_keyword && t->content == "if")
2318 {
731a5359 2319 swallow ();
cbbe8080 2320 t = peek ();
00917a8a 2321 if (!(t && t->type == tok_operator && t->content == "("))
2677d2fb 2322 throw parse_error (_("expected '('"));
731a5359 2323 swallow ();
cbbe8080
MH
2324
2325 pl->condition = parse_expression ();
2326
2327 t = peek ();
00917a8a 2328 if (!(t && t->type == tok_operator && t->content == ")"))
2677d2fb 2329 throw parse_error (_("expected ')'"));
731a5359 2330 swallow ();
cbbe8080
MH
2331 t = peek ();
2332 // fall through
6e3347a9
FCE
2333 }
2334
dff50e09 2335 if (t && t->type == tok_operator
6e3347a9
FCE
2336 && (t->content == "{" || t->content == "," ||
2337 t->content == "=" || t->content == "+=" ))
2338 break;
dff50e09 2339
2677d2fb 2340 throw parse_error (_("expected one of '. , ( ? ! { = +='"));
2f1a1aea
FCE
2341 }
2342
2343 return pl;
2344}
2345
2346
d24f1ff4
SM
2347literal_string*
2348parser::consume_string_literals(const token *t)
2349{
2350 literal_string *ls = new literal_string (t->content);
2351
2352 // PR11208: check if the next token is also a string literal;
2353 // auto-concatenate it. This is complicated to the extent that we
2354 // need to skip intermediate whitespace.
2355 //
2356 // NB for versions prior to 2.0: but don't skip over intervening comments
2357 const token *n = peek();
2358 while (n != NULL && n->type == tok_string
2359 && ! (strverscmp(session.compatible.c_str(), "2.0") < 0
2360 && input.ate_comment))
2361 {
2362 ls->value.append(next()->content); // consume and append the token
2363 n = peek();
2364 }
2365 return ls;
2366}
2367
2368
2369// Parse a string literal and perform backslash escaping on the contents:
2370literal_string*
2371parser::parse_literal_string ()
2372{
2373 const token* t = next ();
2374 literal_string* l;
2375 if (t->type == tok_string)
2376 l = consume_string_literals (t);
2377 else
6a420ae9 2378 throw parse_error (_("expected literal string"));
d24f1ff4
SM
2379
2380 l->tok = t;
2381 return l;
2382}
2383
2384
2f1a1aea
FCE
2385literal*
2386parser::parse_literal ()
2387{
2388 const token* t = next ();
56099f08 2389 literal* l;
2f1a1aea 2390 if (t->type == tok_string)
c5be7511 2391 {
d24f1ff4 2392 l = consume_string_literals (t);
c5be7511 2393 }
16e8f21f 2394 else
9c0c0e46 2395 {
16e8f21f
JS
2396 bool neg = false;
2397 if (t->type == tok_operator && t->content == "-")
2398 {
2399 neg = true;
731a5359 2400 swallow ();
16e8f21f
JS
2401 t = next ();
2402 }
2403
2404 if (t->type == tok_number)
2405 {
2406 const char* startp = t->content.c_str ();
2407 char* endp = (char*) startp;
2408
2409 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2410 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
79e6d33f
JS
2411 // since the lexer only gives us positive digit strings, but we'll
2412 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
16e8f21f
JS
2413 errno = 0;
2414 long long value = (long long) strtoull (startp, & endp, 0);
16e8f21f 2415 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
79e6d33f 2416 || (neg && (unsigned long long) value > 9223372036854775808ULL)
16e8f21f
JS
2417 || (unsigned long long) value > 18446744073709551615ULL
2418 || value < -9223372036854775807LL-1)
2677d2fb 2419 throw parse_error (_("number invalid or out of range"));
16e8f21f 2420
79e6d33f
JS
2421 if (neg)
2422 value = -value;
2423
16e8f21f
JS
2424 l = new literal_number (value);
2425 }
2426 else
2677d2fb 2427 throw parse_error (_("expected literal string or number"));
9c0c0e46 2428 }
56099f08
FCE
2429
2430 l->tok = t;
2431 return l;
2f1a1aea
FCE
2432}
2433
2434
2435if_statement*
2436parser::parse_if_statement ()
2437{
2438 const token* t = next ();
6e213f58 2439 if (! (t->type == tok_keyword && t->content == "if"))
2677d2fb 2440 throw parse_error (_("expected 'if'"));
56099f08
FCE
2441 if_statement* s = new if_statement;
2442 s->tok = t;
2443
2444 t = next ();
2f1a1aea 2445 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2446 throw parse_error (_("expected '('"));
731a5359 2447 swallow ();
2f1a1aea 2448
2f1a1aea
FCE
2449 s->condition = parse_expression ();
2450
2451 t = next ();
2452 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 2453 throw parse_error (_("expected ')'"));
731a5359 2454 swallow ();
2f1a1aea
FCE
2455
2456 s->thenblock = parse_statement ();
2457
2458 t = peek ();
6e213f58 2459 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea 2460 {
731a5359 2461 swallow ();
2f1a1aea
FCE
2462 s->elseblock = parse_statement ();
2463 }
ed10c639
FCE
2464 else
2465 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
2466
2467 return s;
2468}
2469
2470
69c68955
FCE
2471expr_statement*
2472parser::parse_expr_statement ()
2473{
2474 expr_statement *es = new expr_statement;
2475 const token* t = peek ();
5e58d11c
MW
2476 if (t == NULL)
2477 throw parse_error (_("expression statement expected"));
731a5359
MW
2478 // Copy, we only peeked, parse_expression might swallow.
2479 es->tok = new token (*t);
69c68955
FCE
2480 es->value = parse_expression ();
2481 return es;
2482}
2483
2484
56099f08
FCE
2485return_statement*
2486parser::parse_return_statement ()
2487{
2488 const token* t = next ();
6e213f58 2489 if (! (t->type == tok_keyword && t->content == "return"))
2677d2fb 2490 throw parse_error (_("expected 'return'"));
6e213f58 2491 if (context != con_function)
2677d2fb 2492 throw parse_error (_("found 'return' not in function context"));
56099f08
FCE
2493 return_statement* s = new return_statement;
2494 s->tok = t;
2495 s->value = parse_expression ();
2496 return s;
2497}
2498
2499
2500delete_statement*
2501parser::parse_delete_statement ()
2502{
2503 const token* t = next ();
6e213f58 2504 if (! (t->type == tok_keyword && t->content == "delete"))
2677d2fb 2505 throw parse_error (_("expected 'delete'"));
56099f08
FCE
2506 delete_statement* s = new delete_statement;
2507 s->tok = t;
2508 s->value = parse_expression ();
2509 return s;
2510}
2511
2512
f3c26ea5
FCE
2513next_statement*
2514parser::parse_next_statement ()
2515{
2516 const token* t = next ();
6e213f58 2517 if (! (t->type == tok_keyword && t->content == "next"))
2677d2fb 2518 throw parse_error (_("expected 'next'"));
6e213f58 2519 if (context != con_probe)
2677d2fb 2520 throw parse_error (_("found 'next' not in probe context"));
f3c26ea5
FCE
2521 next_statement* s = new next_statement;
2522 s->tok = t;
2523 return s;
2524}
2525
2526
2527break_statement*
2528parser::parse_break_statement ()
2529{
2530 const token* t = next ();
6e213f58 2531 if (! (t->type == tok_keyword && t->content == "break"))
2677d2fb 2532 throw parse_error (_("expected 'break'"));
f3c26ea5
FCE
2533 break_statement* s = new break_statement;
2534 s->tok = t;
2535 return s;
2536}
2537
2538
2539continue_statement*
2540parser::parse_continue_statement ()
2541{
2542 const token* t = next ();
6e213f58 2543 if (! (t->type == tok_keyword && t->content == "continue"))
2677d2fb 2544 throw parse_error (_("expected 'continue'"));
f3c26ea5
FCE
2545 continue_statement* s = new continue_statement;
2546 s->tok = t;
2547 return s;
2548}
2549
2550
69c68955
FCE
2551for_loop*
2552parser::parse_for_loop ()
2553{
f3c26ea5 2554 const token* t = next ();
6e213f58 2555 if (! (t->type == tok_keyword && t->content == "for"))
2677d2fb 2556 throw parse_error (_("expected 'for'"));
f3c26ea5
FCE
2557 for_loop* s = new for_loop;
2558 s->tok = t;
2559
2560 t = next ();
2561 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2562 throw parse_error (_("expected '('"));
731a5359 2563 swallow ();
f3c26ea5
FCE
2564
2565 // initializer + ";"
2566 t = peek ();
2567 if (t && t->type == tok_operator && t->content == ";")
2568 {
cbfbbf69 2569 s->init = 0;
731a5359 2570 swallow ();
f3c26ea5
FCE
2571 }
2572 else
2573 {
2574 s->init = parse_expr_statement ();
2575 t = next ();
2576 if (! (t->type == tok_operator && t->content == ";"))
2677d2fb 2577 throw parse_error (_("expected ';'"));
731a5359 2578 swallow ();
f3c26ea5
FCE
2579 }
2580
2581 // condition + ";"
2582 t = peek ();
2583 if (t && t->type == tok_operator && t->content == ";")
2584 {
2585 literal_number* l = new literal_number(1);
2586 s->cond = l;
2587 s->cond->tok = next ();
2588 }
2589 else
2590 {
2591 s->cond = parse_expression ();
2592 t = next ();
2593 if (! (t->type == tok_operator && t->content == ";"))
2677d2fb 2594 throw parse_error (_("expected ';'"));
731a5359 2595 swallow ();
f3c26ea5 2596 }
dff50e09 2597
f3c26ea5
FCE
2598 // increment + ")"
2599 t = peek ();
2600 if (t && t->type == tok_operator && t->content == ")")
2601 {
cbfbbf69 2602 s->incr = 0;
731a5359 2603 swallow ();
f3c26ea5
FCE
2604 }
2605 else
2606 {
2607 s->incr = parse_expr_statement ();
2608 t = next ();
2609 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 2610 throw parse_error (_("expected ')'"));
731a5359 2611 swallow ();
f3c26ea5
FCE
2612 }
2613
2614 // block
2615 s->block = parse_statement ();
2616
2617 return s;
2618}
2619
2620
2621for_loop*
2622parser::parse_while_loop ()
2623{
2624 const token* t = next ();
6e213f58 2625 if (! (t->type == tok_keyword && t->content == "while"))
2677d2fb 2626 throw parse_error (_("expected 'while'"));
f3c26ea5
FCE
2627 for_loop* s = new for_loop;
2628 s->tok = t;
2629
2630 t = next ();
2631 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2632 throw parse_error (_("expected '('"));
731a5359 2633 swallow ();
f3c26ea5
FCE
2634
2635 // dummy init and incr fields
cbfbbf69
FCE
2636 s->init = 0;
2637 s->incr = 0;
f3c26ea5
FCE
2638
2639 // condition
2640 s->cond = parse_expression ();
2641
f3c26ea5
FCE
2642 t = next ();
2643 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 2644 throw parse_error (_("expected ')'"));
731a5359 2645 swallow ();
dff50e09 2646
f3c26ea5
FCE
2647 // block
2648 s->block = parse_statement ();
2649
2650 return s;
69c68955
FCE
2651}
2652
2653
2654foreach_loop*
2655parser::parse_foreach_loop ()
2656{
2657 const token* t = next ();
6e213f58 2658 if (! (t->type == tok_keyword && t->content == "foreach"))
2677d2fb 2659 throw parse_error (_("expected 'foreach'"));
69c68955
FCE
2660 foreach_loop* s = new foreach_loop;
2661 s->tok = t;
93484556 2662 s->sort_direction = 0;
fd5689dc 2663 s->sort_aggr = sc_none;
c261711d 2664 s->value = NULL;
27f21e8c 2665 s->limit = NULL;
69c68955
FCE
2666
2667 t = next ();
2668 if (! (t->type == tok_operator && t->content == "("))
2677d2fb 2669 throw parse_error (_("expected '('"));
731a5359 2670 swallow ();
69c68955 2671
c261711d
JS
2672 symbol* lookahead_sym = NULL;
2673 int lookahead_sort = 0;
2674
2675 t = peek ();
2676 if (t && t->type == tok_identifier)
2677 {
2678 next ();
2679 lookahead_sym = new symbol;
2680 lookahead_sym->tok = t;
2681 lookahead_sym->name = t->content;
2682
2683 t = peek ();
2684 if (t && t->type == tok_operator &&
2685 (t->content == "+" || t->content == "-"))
2686 {
c261711d 2687 lookahead_sort = (t->content == "+") ? 1 : -1;
731a5359 2688 swallow ();
c261711d
JS
2689 }
2690
2691 t = peek ();
2692 if (t && t->type == tok_operator && t->content == "=")
2693 {
731a5359 2694 swallow ();
c261711d
JS
2695 s->value = lookahead_sym;
2696 if (lookahead_sort)
2697 {
2698 s->sort_direction = lookahead_sort;
2699 s->sort_column = 0;
2700 }
2701 lookahead_sym = NULL;
2702 }
2703 }
2704
69c68955
FCE
2705 // see also parse_array_in
2706
2707 bool parenthesized = false;
2708 t = peek ();
c261711d 2709 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
69c68955 2710 {
731a5359 2711 swallow ();
69c68955
FCE
2712 parenthesized = true;
2713 }
2714
c261711d
JS
2715 if (lookahead_sym)
2716 {
2717 s->indexes.push_back (lookahead_sym);
2718 if (lookahead_sort)
2719 {
2720 s->sort_direction = lookahead_sort;
2721 s->sort_column = 1;
2722 }
2723 lookahead_sym = NULL;
2724 }
2725 else while (1)
69c68955
FCE
2726 {
2727 t = next ();
2728 if (! (t->type == tok_identifier))
2677d2fb 2729 throw parse_error (_("expected identifier"));
69c68955
FCE
2730 symbol* sym = new symbol;
2731 sym->tok = t;
2732 sym->name = t->content;
2733 s->indexes.push_back (sym);
2734
93484556
FCE
2735 t = peek ();
2736 if (t && t->type == tok_operator &&
2737 (t->content == "+" || t->content == "-"))
2738 {
2739 if (s->sort_direction)
2677d2fb 2740 throw parse_error (_("multiple sort directives"));
93484556
FCE
2741 s->sort_direction = (t->content == "+") ? 1 : -1;
2742 s->sort_column = s->indexes.size();
731a5359 2743 swallow ();
93484556
FCE
2744 }
2745
69c68955
FCE
2746 if (parenthesized)
2747 {
93484556 2748 t = peek ();
69c68955
FCE
2749 if (t && t->type == tok_operator && t->content == ",")
2750 {
731a5359 2751 swallow ();
69c68955
FCE
2752 continue;
2753 }
2754 else if (t && t->type == tok_operator && t->content == "]")
2755 {
731a5359 2756 swallow ();
69c68955
FCE
2757 break;
2758 }
dff50e09 2759 else
2677d2fb 2760 throw parse_error (_("expected ',' or ']'"));
69c68955
FCE
2761 }
2762 else
2763 break; // expecting only one expression
2764 }
2765
2766 t = next ();
6e213f58 2767 if (! (t->type == tok_keyword && t->content == "in"))
2677d2fb 2768 throw parse_error (_("expected 'in'"));
731a5359 2769 swallow ();
dff50e09 2770
d02548c0 2771 s->base = parse_indexable();
69c68955 2772
fd5689dc
FCE
2773 // check for atword, see also expect_ident_or_atword,
2774 t = peek ();
2775 if (t && t->type == tok_operator && t->content[0] == '@')
2776 {
2777 if (t->content == "@avg") s->sort_aggr = sc_average;
2778 else if (t->content == "@min") s->sort_aggr = sc_min;
2779 else if (t->content == "@max") s->sort_aggr = sc_max;
2780 else if (t->content == "@count") s->sort_aggr = sc_count;
2781 else if (t->content == "@sum") s->sort_aggr = sc_sum;
2782 else throw parse_error(_("expected statistical operation"));
2783 swallow();
2784
2785 t = peek ();
2786 if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
2787 throw parse_error(_("expected sort directive"));
2788 }
2789
93484556
FCE
2790 t = peek ();
2791 if (t && t->type == tok_operator &&
2792 (t->content == "+" || t->content == "-"))
2793 {
2794 if (s->sort_direction)
2677d2fb 2795 throw parse_error (_("multiple sort directives"));
93484556
FCE
2796 s->sort_direction = (t->content == "+") ? 1 : -1;
2797 s->sort_column = 0;
731a5359 2798 swallow ();
93484556
FCE
2799 }
2800
27f21e8c
DS
2801 t = peek ();
2802 if (tok_is(t, tok_keyword, "limit"))
2803 {
731a5359 2804 swallow (); // get past the "limit"
27f21e8c
DS
2805 s->limit = parse_expression ();
2806 }
2807
69c68955
FCE
2808 t = next ();
2809 if (! (t->type == tok_operator && t->content == ")"))
2810 throw parse_error ("expected ')'");
731a5359 2811 swallow ();
69c68955
FCE
2812
2813 s->block = parse_statement ();
2814 return s;
2815}
2816
2817
2f1a1aea
FCE
2818expression*
2819parser::parse_expression ()
2820{
2821 return parse_assignment ();
2822}
2823
2f1a1aea
FCE
2824
2825expression*
2826parser::parse_assignment ()
2827{
2828 expression* op1 = parse_ternary ();
2829
2830 const token* t = peek ();
82919855 2831 // right-associative operators
dff50e09 2832 if (t && t->type == tok_operator
2f1a1aea 2833 && (t->content == "=" ||
82919855 2834 t->content == "<<<" ||
2f1a1aea 2835 t->content == "+=" ||
bb2e3076
FCE
2836 t->content == "-=" ||
2837 t->content == "*=" ||
2838 t->content == "/=" ||
2839 t->content == "%=" ||
2840 t->content == "<<=" ||
2841 t->content == ">>=" ||
2842 t->content == "&=" ||
2843 t->content == "^=" ||
2844 t->content == "|=" ||
d5d7c2cc 2845 t->content == ".=" ||
dff50e09 2846 false))
2f1a1aea 2847 {
bb2e3076 2848 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 2849 assignment* e = new assignment;
56099f08 2850 e->left = op1;
2f1a1aea 2851 e->op = t->content;
56099f08 2852 e->tok = t;
2f1a1aea 2853 next ();
82919855 2854 e->right = parse_expression ();
56099f08 2855 op1 = e;
2f1a1aea 2856 }
56099f08
FCE
2857
2858 return op1;
2f1a1aea
FCE
2859}
2860
2861
2862expression*
2863parser::parse_ternary ()
2864{
2865 expression* op1 = parse_logical_or ();
2866
2867 const token* t = peek ();
2868 if (t && t->type == tok_operator && t->content == "?")
2869 {
2f1a1aea 2870 ternary_expression* e = new ternary_expression;
56099f08 2871 e->tok = t;
2f1a1aea 2872 e->cond = op1;
56099f08
FCE
2873 next ();
2874 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
2875
2876 t = next ();
2877 if (! (t->type == tok_operator && t->content == ":"))
2677d2fb 2878 throw parse_error (_("expected ':'"));
731a5359 2879 swallow ();
2f1a1aea 2880
56099f08 2881 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
2882 return e;
2883 }
2884 else
2885 return op1;
2886}
2887
2888
2889expression*
2890parser::parse_logical_or ()
2891{
2892 expression* op1 = parse_logical_and ();
dff50e09 2893
2f1a1aea 2894 const token* t = peek ();
56099f08 2895 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 2896 {
2f1a1aea 2897 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
2898 e->tok = t;
2899 e->op = t->content;
2f1a1aea 2900 e->left = op1;
56099f08
FCE
2901 next ();
2902 e->right = parse_logical_and ();
2903 op1 = e;
2904 t = peek ();
2f1a1aea 2905 }
56099f08
FCE
2906
2907 return op1;
2f1a1aea
FCE
2908}
2909
2910
2911expression*
2912parser::parse_logical_and ()
2913{
bb2e3076 2914 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
2915
2916 const token* t = peek ();
56099f08 2917 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 2918 {
2f1a1aea
FCE
2919 logical_and_expr *e = new logical_and_expr;
2920 e->left = op1;
56099f08
FCE
2921 e->op = t->content;
2922 e->tok = t;
2923 next ();
bb2e3076
FCE
2924 e->right = parse_boolean_or ();
2925 op1 = e;
2926 t = peek ();
2927 }
2928
2929 return op1;
2930}
2931
2932
2933expression*
2934parser::parse_boolean_or ()
2935{
2936 expression* op1 = parse_boolean_xor ();
2937
2938 const token* t = peek ();
2939 while (t && t->type == tok_operator && t->content == "|")
2940 {
2941 binary_expression* e = new binary_expression;
2942 e->left = op1;
2943 e->op = t->content;
2944 e->tok = t;
2945 next ();
2946 e->right = parse_boolean_xor ();
2947 op1 = e;
2948 t = peek ();
2949 }
2950
2951 return op1;
2952}
2953
2954
2955expression*
2956parser::parse_boolean_xor ()
2957{
2958 expression* op1 = parse_boolean_and ();
2959
2960 const token* t = peek ();
2961 while (t && t->type == tok_operator && t->content == "^")
2962 {
2963 binary_expression* e = new binary_expression;
2964 e->left = op1;
2965 e->op = t->content;
2966 e->tok = t;
2967 next ();
2968 e->right = parse_boolean_and ();
2969 op1 = e;
2970 t = peek ();
2971 }
2972
2973 return op1;
2974}
2975
2976
2977expression*
2978parser::parse_boolean_and ()
2979{
2980 expression* op1 = parse_array_in ();
2981
2982 const token* t = peek ();
2983 while (t && t->type == tok_operator && t->content == "&")
2984 {
2985 binary_expression* e = new binary_expression;
2986 e->left = op1;
2987 e->op = t->content;
2988 e->tok = t;
2989 next ();
56099f08
FCE
2990 e->right = parse_array_in ();
2991 op1 = e;
2992 t = peek ();
2f1a1aea 2993 }
56099f08
FCE
2994
2995 return op1;
2f1a1aea
FCE
2996}
2997
2998
2999expression*
3000parser::parse_array_in ()
3001{
ce10591c 3002 // This is a very tricky case. All these are legit expressions:
69c68955 3003 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
3004 vector<expression*> indexes;
3005 bool parenthesized = false;
2f1a1aea
FCE
3006
3007 const token* t = peek ();
69c68955 3008 if (t && t->type == tok_operator && t->content == "[")
ce10591c 3009 {
731a5359 3010 swallow ();
ce10591c
FCE
3011 parenthesized = true;
3012 }
3013
3014 while (1)
3015 {
93daaca8 3016 expression* op1 = parse_comparison_or_regex_query ();
ce10591c
FCE
3017 indexes.push_back (op1);
3018
3019 if (parenthesized)
3020 {
3021 const token* t = peek ();
3022 if (t && t->type == tok_operator && t->content == ",")
3023 {
731a5359 3024 swallow ();
ce10591c
FCE
3025 continue;
3026 }
69c68955 3027 else if (t && t->type == tok_operator && t->content == "]")
ce10591c 3028 {
731a5359 3029 swallow ();
ce10591c
FCE
3030 break;
3031 }
dff50e09 3032 else
2677d2fb 3033 throw parse_error (_("expected ',' or ']'"));
ce10591c
FCE
3034 }
3035 else
3036 break; // expecting only one expression
3037 }
3038
3039 t = peek ();
6e213f58 3040 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 3041 {
2f1a1aea 3042 array_in *e = new array_in;
56099f08 3043 e->tok = t;
731a5359 3044 next ();
ce10591c
FCE
3045
3046 arrayindex* a = new arrayindex;
3047 a->indexes = indexes;
d02548c0 3048 a->base = parse_indexable();
d15d767c 3049 a->tok = a->base->tok;
ce10591c 3050 e->operand = a;
2f1a1aea
FCE
3051 return e;
3052 }
ce10591c
FCE
3053 else if (indexes.size() == 1) // no "in" - need one expression only
3054 return indexes[0];
2f1a1aea 3055 else
2677d2fb 3056 throw parse_error (_("unexpected comma-separated expression list"));
2f1a1aea
FCE
3057}
3058
3059
3060expression*
93daaca8 3061parser::parse_comparison_or_regex_query ()
2f1a1aea 3062{
bb2e3076 3063 expression* op1 = parse_shift ();
2f1a1aea 3064
557abe61 3065 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
93daaca8
SM
3066 const token *t = peek();
3067 if (t && t->type == tok_operator
3068 && (t->content == "=~" ||
3069 t->content == "!~"))
3070 {
3071 regex_query* r = new regex_query;
3072 r->left = op1;
3073 r->op = t->content;
3074 r->tok = t;
3075 next ();
d3bc48f0 3076 r->right = parse_literal_string();
93daaca8
SM
3077 op1 = r;
3078 t = peek ();
3079 }
3080 else while (t && t->type == tok_operator
553d27a5
FCE
3081 && (t->content == ">" ||
3082 t->content == "<" ||
3083 t->content == "==" ||
3084 t->content == "!=" ||
3085 t->content == "<=" ||
bb2e3076 3086 t->content == ">="))
2f1a1aea
FCE
3087 {
3088 comparison* e = new comparison;
3089 e->left = op1;
3090 e->op = t->content;
56099f08 3091 e->tok = t;
2f1a1aea 3092 next ();
bb2e3076
FCE
3093 e->right = parse_shift ();
3094 op1 = e;
3095 t = peek ();
3096 }
3097
3098 return op1;
3099}
3100
3101
3102expression*
3103parser::parse_shift ()
3104{
3105 expression* op1 = parse_concatenation ();
3106
3107 const token* t = peek ();
dff50e09 3108 while (t && t->type == tok_operator &&
bb2e3076
FCE
3109 (t->content == "<<" || t->content == ">>"))
3110 {
3111 binary_expression* e = new binary_expression;
3112 e->left = op1;
3113 e->op = t->content;
3114 e->tok = t;
3115 next ();
56099f08
FCE
3116 e->right = parse_concatenation ();
3117 op1 = e;
3118 t = peek ();
2f1a1aea 3119 }
56099f08
FCE
3120
3121 return op1;
2f1a1aea
FCE
3122}
3123
3124
3125expression*
3126parser::parse_concatenation ()
3127{
3128 expression* op1 = parse_additive ();
3129
3130 const token* t = peek ();
3131 // XXX: the actual awk string-concatenation operator is *whitespace*.
3132 // I don't know how to easily to model that here.
56099f08 3133 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
3134 {
3135 concatenation* e = new concatenation;
3136 e->left = op1;
3137 e->op = t->content;
56099f08 3138 e->tok = t;
2f1a1aea 3139 next ();
56099f08
FCE
3140 e->right = parse_additive ();
3141 op1 = e;
3142 t = peek ();
2f1a1aea 3143 }
56099f08
FCE
3144
3145 return op1;
2f1a1aea
FCE
3146}
3147
3148
3149expression*
3150parser::parse_additive ()
3151{
3152 expression* op1 = parse_multiplicative ();
3153
3154 const token* t = peek ();
dff50e09 3155 while (t && t->type == tok_operator
2f1a1aea
FCE
3156 && (t->content == "+" || t->content == "-"))
3157 {
3158 binary_expression* e = new binary_expression;
3159 e->op = t->content;
3160 e->left = op1;
56099f08 3161 e->tok = t;
2f1a1aea 3162 next ();
56099f08
FCE
3163 e->right = parse_multiplicative ();
3164 op1 = e;
3165 t = peek ();
2f1a1aea 3166 }
56099f08
FCE
3167
3168 return op1;
2f1a1aea
FCE
3169}
3170
3171
3172expression*
3173parser::parse_multiplicative ()
3174{
3175 expression* op1 = parse_unary ();
3176
3177 const token* t = peek ();
dff50e09 3178 while (t && t->type == tok_operator
2f1a1aea
FCE
3179 && (t->content == "*" || t->content == "/" || t->content == "%"))
3180 {
3181 binary_expression* e = new binary_expression;
3182 e->op = t->content;
3183 e->left = op1;
56099f08 3184 e->tok = t;
2f1a1aea 3185 next ();
56099f08
FCE
3186 e->right = parse_unary ();
3187 op1 = e;
3188 t = peek ();
2f1a1aea 3189 }
56099f08
FCE
3190
3191 return op1;
2f1a1aea
FCE
3192}
3193
3194
3195expression*
3196parser::parse_unary ()
3197{
3198 const token* t = peek ();
dff50e09
FCE
3199 if (t && t->type == tok_operator
3200 && (t->content == "+" ||
3201 t->content == "-" ||
bb2e3076
FCE
3202 t->content == "!" ||
3203 t->content == "~" ||
3204 false))
2f1a1aea
FCE
3205 {
3206 unary_expression* e = new unary_expression;
3207 e->op = t->content;
56099f08 3208 e->tok = t;
2f1a1aea 3209 next ();
1cb79a72 3210 e->operand = parse_unary ();
2f1a1aea
FCE
3211 return e;
3212 }
3213 else
bb2e3076 3214 return parse_crement ();
2f1a1aea
FCE
3215}
3216
3217
3218expression*
3219parser::parse_crement () // as in "increment" / "decrement"
3220{
cbfbbf69
FCE
3221 // NB: Ideally, we'd parse only a symbol as an operand to the
3222 // *crement operators, instead of a general expression value. We'd
3223 // need more complex lookahead code to tell apart the postfix cases.
3224 // So we just punt, and leave it to pass-3 to signal errors on
3225 // cases like "4++".
3226
2f1a1aea 3227 const token* t = peek ();
dff50e09 3228 if (t && t->type == tok_operator
2f1a1aea
FCE
3229 && (t->content == "++" || t->content == "--"))
3230 {
3231 pre_crement* e = new pre_crement;
3232 e->op = t->content;
56099f08 3233 e->tok = t;
2f1a1aea
FCE
3234 next ();
3235 e->operand = parse_value ();
3236 return e;
3237 }
3238
3239 // post-crement or non-crement
3240 expression *op1 = parse_value ();
dff50e09 3241
2f1a1aea 3242 t = peek ();
dff50e09 3243 if (t && t->type == tok_operator
2f1a1aea
FCE
3244 && (t->content == "++" || t->content == "--"))
3245 {
3246 post_crement* e = new post_crement;
3247 e->op = t->content;
56099f08 3248 e->tok = t;
2f1a1aea
FCE
3249 next ();
3250 e->operand = op1;
3251 return e;
3252 }
3253 else
3254 return op1;
3255}
3256
3257
3258expression*
3259parser::parse_value ()
3260{
3261 const token* t = peek ();
3262 if (! t)
2677d2fb 3263 throw parse_error (_("expected value"));
2f1a1aea 3264
7d902887
FCE
3265 if (t->type == tok_embedded)
3266 {
7d902887 3267 if (! privileged)
efb02738 3268 throw parse_error (_("embedded expression code in unprivileged script; need stap -g"), false);
7d902887
FCE
3269
3270 embedded_expr *e = new embedded_expr;
3271 e->tok = t;
3272 e->code = t->content;
731a5359 3273 next ();
7d902887
FCE
3274 return e;
3275 }
3276
2f1a1aea
FCE
3277 if (t->type == tok_operator && t->content == "(")
3278 {
731a5359 3279 swallow ();
2f1a1aea
FCE
3280 expression* e = parse_expression ();
3281 t = next ();
3282 if (! (t->type == tok_operator && t->content == ")"))
2677d2fb 3283 throw parse_error (_("expected ')'"));
731a5359 3284 swallow ();
2f1a1aea
FCE
3285 return e;
3286 }
03c75a4a
JS
3287 else if (t->type == tok_operator && t->content == "&")
3288 {
731a5359 3289 next (); // Cannot swallow, passing token on...
d48afc20 3290 return parse_target_symbol (t);
03c75a4a 3291 }
06219d6f
SM
3292 else if (t->type == tok_identifier
3293 || (t->type == tok_operator && t->content[0] == '@'))
2f1a1aea
FCE
3294 return parse_symbol ();
3295 else
3296 return parse_literal ();
3297}
3298
3299
d02548c0
GH
3300const token *
3301parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
3302{
3303 hop = NULL;
50cc7cd5 3304 const token* t = expect_ident_or_atword (name);
d02548c0
GH
3305 if (name == "@hist_linear" || name == "@hist_log")
3306 {
3307 hop = new hist_op;
3308 if (name == "@hist_linear")
3309 hop->htype = hist_linear;
3310 else if (name == "@hist_log")
3311 hop->htype = hist_log;
3312 hop->tok = t;
3313 expect_op("(");
3314 hop->stat = parse_expression ();
3315 int64_t tnum;
3316 if (hop->htype == hist_linear)
3317 {
3318 for (size_t i = 0; i < 3; ++i)
3319 {
3320 expect_op (",");
3321 expect_number (tnum);
3322 hop->params.push_back (tnum);
3323 }
3324 }
d02548c0
GH
3325 expect_op(")");
3326 }
3327 return t;
3328}
3329
3330
3331indexable*
3332parser::parse_indexable ()
3333{
3334 hist_op *hop = NULL;
3335 string name;
3336 const token *tok = parse_hist_op_or_bare_name(hop, name);
3337 if (hop)
3338 return hop;
3339 else
3340 {
3341 symbol* sym = new symbol;
3342 sym->name = name;
3343 sym->tok = tok;
3344 return sym;
3345 }
3346}
3347
3348
cc9001af
MW
3349// var, indexable[index], func(parms), printf("...", ...), $var,r
3350// @cast, @defined, @entry, @var, $var->member, @stat_op(stat)
30263a73 3351expression* parser::parse_symbol ()
2f1a1aea 3352{
d02548c0
GH
3353 hist_op *hop = NULL;
3354 symbol *sym = NULL;
d7f3e0c5 3355 string name;
d02548c0
GH
3356 const token *t = parse_hist_op_or_bare_name(hop, name);
3357
3358 if (!hop)
0fefb486 3359 {
dff50e09 3360 // If we didn't get a hist_op, then we did get an identifier. We can
d02548c0
GH
3361 // now scrutinize this identifier for the various magic forms of identifier
3362 // (printf, @stat_op, and $var...)
3363
cc9001af
MW
3364 if (name == "@cast"
3365 || name == "@var"
3366 || (name.size() > 0 && name[0] == '$'))
30263a73 3367 return parse_target_symbol (t);
9b5af295 3368
db135493
FCE
3369 // NB: PR11343: @defined() is not incompatible with earlier versions
3370 // of stap, so no need to check session.compatible for 1.2
30263a73
FCE
3371 if (name == "@defined")
3372 return parse_defined_op (t);
8cc799a5
JS
3373
3374 if (name == "@entry")
3375 return parse_entry_op (t);
3376
3689db05
SC
3377 if (name == "@perf")
3378 return parse_perf_op (t);
3379
cc9001af 3380 if (name.size() > 0 && name[0] == '@')
d7f3e0c5 3381 {
d02548c0
GH
3382 stat_op *sop = new stat_op;
3383 if (name == "@avg")
3384 sop->ctype = sc_average;
3385 else if (name == "@count")
3386 sop->ctype = sc_count;
3387 else if (name == "@sum")
3388 sop->ctype = sc_sum;
3389 else if (name == "@min")
3390 sop->ctype = sc_min;
3391 else if (name == "@max")
3392 sop->ctype = sc_max;
3393 else
cc9001af 3394 throw parse_error(_("unknown operator ") + name);
d02548c0
GH
3395 expect_op("(");
3396 sop->tok = t;
3397 sop->stat = parse_expression ();
3398 expect_op(")");
3399 return sop;
3400 }
dff50e09 3401
d5e178c1 3402 else if (print_format *fmt = print_format::create(t))
d02548c0 3403 {
d02548c0 3404 expect_op("(");
b15c465c
PP
3405 if ((name == "print" || name == "println" ||
3406 name == "sprint" || name == "sprintln") &&
f34254da 3407 (peek_op("@hist_linear") || peek_op("@hist_log")))
a4636912
GH
3408 {
3409 // We have a special case where we recognize
3410 // print(@hist_foo(bar)) as a magic print-the-histogram
3411 // construct. This is sort of gross but it avoids
3412 // promoting histogram references to typeful
3413 // expressions.
dff50e09 3414
1bbeef03
GH
3415 hop = NULL;
3416 t = parse_hist_op_or_bare_name(hop, name);
3417 assert(hop);
dff50e09 3418
1bbeef03
GH
3419 // It is, sadly, possible that even while parsing a
3420 // hist_op, we *mis-guessed* and the user wishes to
3421 // print(@hist_op(foo)[bucket]), a scalar. In that case
3422 // we must parse the arrayindex and print an expression.
839325a1
JS
3423 //
3424 // XXX: This still fails if the arrayindex is part of a
3425 // larger expression. To really handle everything, we'd
3426 // need to push back all the hist tokens start over.
dff50e09 3427
1bbeef03
GH
3428 if (!peek_op ("["))
3429 fmt->hist = hop;
3430 else
3431 {
3432 // This is simplified version of the
3433 // multi-array-index parser below, because we can
3434 // only ever have one index on a histogram anyways.
3435 expect_op("[");
3436 struct arrayindex* ai = new arrayindex;
3437 ai->tok = t;
3438 ai->base = hop;
3439 ai->indexes.push_back (parse_expression ());
3440 expect_op("]");
3441 fmt->args.push_back(ai);
839325a1
JS
3442
3443 // Consume any subsequent arguments.
3444 while (!peek_op (")"))
3445 {
3446 expect_op(",");
3447 expression *e = parse_expression ();
3448 fmt->args.push_back(e);
3449 }
1bbeef03 3450 }
a4636912 3451 }
d7f3e0c5 3452 else
d02548c0 3453 {
3cb17058
JS
3454 int min_args = 0;
3455 if (fmt->print_with_format)
3456 {
3457 // Consume and convert a format string. Agreement between the
3458 // format string and the arguments is postponed to the
3459 // typechecking phase.
3460 string tmp;
3461 expect_unknown (tok_string, tmp);
3462 fmt->raw_components = tmp;
3463 fmt->components = print_format::string_to_components (tmp);
3464 }
3465 else if (fmt->print_with_delim)
3466 {
3467 // Consume a delimiter to separate arguments.
3468 fmt->delimiter.clear();
3469 fmt->delimiter.type = print_format::conv_literal;
3470 expect_unknown (tok_string, fmt->delimiter.literal_string);
3471 min_args = 2;
3472 }
3473 else
3474 {
3475 // If we are not printing with a format string, we must have
3476 // at least one argument (of any type).
3477 expression *e = parse_expression ();
3478 fmt->args.push_back(e);
3479 }
3480
3481 // Consume any subsequent arguments.
3482 while (min_args || !peek_op (")"))
3483 {
3484 expect_op(",");
3485 expression *e = parse_expression ();
3486 fmt->args.push_back(e);
3487 if (min_args)
3488 --min_args;
3489 }
d02548c0
GH
3490 }
3491 expect_op(")");
3492 return fmt;
3493 }
dff50e09 3494
d02548c0
GH
3495 else if (peek_op ("(")) // function call
3496 {
731a5359 3497 swallow ();
d02548c0
GH
3498 struct functioncall* f = new functioncall;
3499 f->tok = t;
3500 f->function = name;
3501 // Allow empty actual parameter list
3502 if (peek_op (")"))
3503 {
731a5359 3504 swallow ();
d02548c0
GH
3505 return f;
3506 }
3507 while (1)
3508 {
3509 f->args.push_back (parse_expression ());
3510 if (peek_op (")"))
3511 {
731a5359 3512 swallow ();
d02548c0
GH
3513 break;
3514 }
3515 else if (peek_op (","))
3516 {
731a5359 3517 swallow ();
d02548c0
GH
3518 continue;
3519 }
3520 else
2677d2fb 3521 throw parse_error (_("expected ',' or ')'"));
d02548c0
GH
3522 }
3523 return f;
3524 }
3525
3526 else
3527 {
3528 sym = new symbol;
3529 sym->name = name;
3530 sym->tok = t;
d7f3e0c5 3531 }
0fefb486 3532 }
dff50e09
FCE
3533
3534 // By now, either we had a hist_op in the first place, or else
d02548c0
GH
3535 // we had a plain word and it was converted to a symbol.
3536
70c743d8 3537 assert (!hop != !sym); // logical XOR
d02548c0
GH
3538
3539 // All that remains is to check for array indexing
3540
d7f3e0c5 3541 if (peek_op ("[")) // array
2f1a1aea 3542 {
731a5359 3543 swallow ();
2f1a1aea 3544 struct arrayindex* ai = new arrayindex;
d02548c0
GH
3545 ai->tok = t;
3546
3547 if (hop)
3548 ai->base = hop;
3549 else
3550 ai->base = sym;
3551
2f1a1aea
FCE
3552 while (1)
3553 {
3554 ai->indexes.push_back (parse_expression ());
d7f3e0c5 3555 if (peek_op ("]"))
dff50e09 3556 {
731a5359 3557 swallow ();
dff50e09 3558 break;
d7f3e0c5
GH
3559 }
3560 else if (peek_op (","))
3561 {
731a5359 3562 swallow ();
d7f3e0c5
GH
3563 continue;
3564 }
2f1a1aea 3565 else
2677d2fb 3566 throw parse_error (_("expected ',' or ']'"));
2f1a1aea
FCE
3567 }
3568 return ai;
3569 }
d02548c0
GH
3570
3571 // If we got to here, we *should* have a symbol; if we have
3572 // a hist_op on its own, it doesn't count as an expression,
3573 // so we throw a parse error.
3574
3575 if (hop)
2677d2fb 3576 throw parse_error(_("base histogram operator where expression expected"), t);
dff50e09
FCE
3577
3578 return sym;
2f1a1aea 3579}
56099f08 3580
30263a73
FCE
3581// Parse a @cast or $var. Given head token has already been consumed.
3582target_symbol* parser::parse_target_symbol (const token* t)
3583{
d48afc20
JS
3584 bool addressof = false;
3585 if (t->type == tok_operator && t->content == "&")
3586 {
3587 addressof = true;
3819d181
MW
3588 // Don't delete t before trying next token.
3589 // We might need it in the error message when there is no next token.
3590 const token *next_t = next ();
731a5359 3591 delete t;
3819d181 3592 t = next_t;
d48afc20
JS
3593 }
3594
06219d6f 3595 if (t->type == tok_operator && t->content == "@cast")
30263a73
FCE
3596 {
3597 cast_op *cop = new cast_op;
3598 cop->tok = t;
277c21bc 3599 cop->name = t->content;
30263a73
FCE
3600 expect_op("(");
3601 cop->operand = parse_expression ();
3602 expect_op(",");
7f6b80bd 3603 expect_unknown(tok_string, cop->type_name);
30263a73
FCE
3604 if (peek_op (","))
3605 {
731a5359 3606 swallow ();
30263a73
FCE
3607 expect_unknown(tok_string, cop->module);
3608 }
3609 expect_op(")");
3610 parse_target_symbol_components(cop);
d48afc20 3611 cop->addressof = addressof;
30263a73
FCE
3612 return cop;
3613 }
3614
3615 if (t->type == tok_identifier && t->content[0]=='$')
3616 {
3617 // target_symbol time
3618 target_symbol *tsym = new target_symbol;
3619 tsym->tok = t;
277c21bc 3620 tsym->name = t->content;
30263a73 3621 parse_target_symbol_components(tsym);
d48afc20 3622 tsym->addressof = addressof;
30263a73
FCE
3623 return tsym;
3624 }
3625
06219d6f 3626 if (t->type == tok_operator && t->content == "@var")
cc9001af 3627 {
bd1fcbad
YZ
3628 atvar_op *aop = new atvar_op;
3629 aop->tok = t;
3630 aop->name = t->content;
cc9001af 3631 expect_op("(");
bd1fcbad
YZ
3632 expect_unknown(tok_string, aop->target_name);
3633 size_t found_at = aop->target_name.find("@");
bfa7e523 3634 if (found_at != string::npos)
bd1fcbad 3635 aop->cu_name = aop->target_name.substr(found_at + 1);
bfa7e523 3636 else
bd1fcbad
YZ
3637 aop->cu_name = "";
3638 if (peek_op (","))
3639 {
3640 swallow ();
3641 expect_unknown (tok_string, aop->module);
3642 }
3643 else
3644 aop->module = "";
cc9001af 3645 expect_op(")");
bd1fcbad
YZ
3646 parse_target_symbol_components(aop);
3647 aop->addressof = addressof;
3648 return aop;
cc9001af
MW
3649 }
3650
3651 throw parse_error (_("expected @cast, @var or $var"));
30263a73
FCE
3652}
3653
3654
3655// Parse a @defined(). Given head token has already been consumed.
3656expression* parser::parse_defined_op (const token* t)
3657{
3658 defined_op* dop = new defined_op;
3659 dop->tok = t;
3660 expect_op("(");
30263a73 3661 // no need for parse_hist_op... etc., as @defined takes only target_symbols as its operand.
d48afc20 3662 const token* tt = next ();
30263a73
FCE
3663 dop->operand = parse_target_symbol (tt);
3664 expect_op(")");
3665 return dop;
3666}
3667
3668
8cc799a5
JS
3669// Parse a @entry(). Given head token has already been consumed.
3670expression* parser::parse_entry_op (const token* t)
3671{
3672 entry_op* eop = new entry_op;
3673 eop->tok = t;
3674 expect_op("(");
3675 eop->operand = parse_expression ();
3676 expect_op(")");
3677 return eop;
3678}
3679
3680
3689db05
SC
3681// Parse a @perf(). Given head token has already been consumed.
3682expression* parser::parse_perf_op (const token* t)
3683{
3684 perf_op* pop = new perf_op;
3e6a17ee
SC
3685
3686 if (strverscmp(session.compatible.c_str(), "2.1") < 0)
3687 throw parse_error (_("expected @cast, @var or $var"));
3688
3689db05
SC
3689 pop->tok = t;
3690 expect_op("(");
ace7c23f
FCE
3691 pop->operand = parse_literal_string ();
3692 if (pop->operand->value == "")
3693 throw parse_error (_("expected non-empty string"));
3689db05
SC
3694 expect_op(")");
3695 return pop;
3696}
3697
3698
30263a73 3699
81931eab
JS
3700void
3701parser::parse_target_symbol_components (target_symbol* e)
3702{
5f36109e
JS
3703 bool pprint = false;
3704
3705 // check for pretty-print in the form $foo$
277c21bc 3706 string &base = e->name;
5f36109e
JS
3707 size_t pprint_pos = base.find_last_not_of('$');
3708 if (0 < pprint_pos && pprint_pos < base.length() - 1)
3709 {
3710 string pprint_val = base.substr(pprint_pos + 1);
3711 base.erase(pprint_pos + 1);
3712 e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
3713 pprint = true;
3714 }
3715
3716 while (!pprint)
81931eab 3717 {
81931eab
JS
3718 if (peek_op ("->"))
3719 {
c67847a0
JS
3720 const token* t = next();
3721 string member;
3722 expect_ident_or_keyword (member);
5f36109e
JS
3723
3724 // check for pretty-print in the form $foo->$ or $foo->bar$
3725 pprint_pos = member.find_last_not_of('$');
3726 string pprint_val;
3727 if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
3728 {
3729 pprint_val = member.substr(pprint_pos + 1);
3730 member.erase(pprint_pos + 1);
3731 pprint = true;
3732 }
3733
3734 if (!member.empty())
3735 e->components.push_back (target_symbol::component(t, member));
3736 if (pprint)
3737 e->components.push_back (target_symbol::component(t, pprint_val, true));
81931eab
JS
3738 }
3739 else if (peek_op ("["))
3740 {
c67847a0 3741 const token* t = next();
6fda2dff
JS
3742 expression* index = parse_expression();
3743 literal_number* ln = dynamic_cast<literal_number*>(index);
3744 if (ln)
3745 e->components.push_back (target_symbol::component(t, ln->value));
3746 else
3747 e->components.push_back (target_symbol::component(t, index));
81931eab 3748 expect_op ("]");
81931eab
JS
3749 }
3750 else
3751 break;
3752 }
5f36109e
JS
3753
3754 if (!pprint)
3755 {
3756 // check for pretty-print in the form $foo $
3757 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
3758 const token* t = peek();
3819d181 3759 if (t != NULL && t->type == tok_identifier &&
5f36109e
JS
3760 t->content.find_first_not_of('$') == string::npos)
3761 {
3762 t = next();
3763 e->components.push_back (target_symbol::component(t, t->content, true));
3764 pprint = true;
3765 }
3766 }
3767
3768 if (pprint && (peek_op ("->") || peek_op("[")))
ce0f6648 3769 throw parse_error(_("-> and [ are not accepted for a pretty-printing variable"));
81931eab
JS
3770}
3771
73267b89 3772/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.672377 seconds and 5 git commands to generate.