]> sourceware.org Git - systemtap.git/blob - parse.cxx
buildrun.cxx: adapt to kernel 5.4+
[systemtap.git] / parse.cxx
1 // recursive descent parser for systemtap scripts
2 // Copyright (C) 2005-2019 Red Hat Inc.
3 // Copyright (C) 2006 Intel Corporation.
4 // Copyright (C) 2007 Bull S.A.S
5 // Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
6 //
7 // This file is part of systemtap, and is free software. You can
8 // redistribute it and/or modify it under the terms of the GNU General
9 // Public License (GPL); either version 2, or (at your option) any
10 // later version.
11
12 #include "config.h"
13 #include "staptree.h"
14 #include "parse.h"
15 #include "session.h"
16 #include "util.h"
17 #include "stringtable.h"
18
19 #include <iostream>
20
21 #include <fstream>
22 #include <cctype>
23 #include <cstdlib>
24 #include <cassert>
25 #include <cerrno>
26 #include <climits>
27 #include <sstream>
28 #include <cstring>
29 #include <cctype>
30 #include <iterator>
31 #include <unordered_set>
32
33 extern "C" {
34 #include <fnmatch.h>
35 }
36
37 using namespace std;
38
39
40 class parser;
41 class lexer
42 {
43 public:
44 bool ate_comment; // current token follows a comment
45 bool ate_whitespace; // the most recent token followed whitespace
46 bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
47 bool check_compatible; // whether to gate features on session.compatible
48
49 token* scan ();
50 lexer (istream&, const string&, systemtap_session&, bool);
51 void set_current_file (stapfile* f);
52 void set_current_token_chain (const token* tok);
53 inline bool has_version (const char* v) const;
54
55 unordered_set<interned_string> keywords;
56 static unordered_set<string> atwords;
57 private:
58 inline int input_get ();
59 inline int input_peek (unsigned n=0);
60 void input_put (const string&, const token*);
61 string input_name;
62 string input_contents; // NB: being a temporary, no need to interned_string optimize this object
63 const char *input_pointer; // index into input_contents; NB: recompute if input_contents changed!
64 const char *input_end;
65 unsigned cursor_suspend_count;
66 unsigned cursor_suspend_line;
67 unsigned cursor_suspend_column;
68 unsigned cursor_line;
69 unsigned cursor_column;
70 systemtap_session& session;
71 stapfile* current_file;
72 const token* current_token_chain;
73 };
74
75
76 class parser
77 {
78 public:
79 parser (systemtap_session& s, const string& n, istream& i, unsigned flags=0);
80 ~parser ();
81
82 stapfile* parse ();
83 probe* parse_synthetic_probe (const token* chain);
84 stapfile* parse_library_macros ();
85
86 private:
87 typedef enum {
88 PP_NONE,
89 PP_KEEP_THEN,
90 PP_SKIP_THEN,
91 PP_KEEP_ELSE,
92 PP_SKIP_ELSE,
93 } pp_state_t;
94
95 struct pp1_activation;
96
97 struct pp_macrodecl : public macrodecl {
98 pp1_activation* parent_act; // used for param bindings
99 virtual bool is_closure() { return parent_act != 0; }
100 pp_macrodecl () : macrodecl(), parent_act(0) { }
101 };
102
103 systemtap_session& session;
104 string input_name;
105 lexer input;
106 bool errs_as_warnings;
107 bool privileged;
108 bool user_file;
109 bool auto_path;
110 parse_context context;
111
112 // preprocessing subordinate, first pass (macros)
113 struct pp1_activation {
114 const token* tok;
115 unsigned cursor; // position within macro body
116 map<string, pp_macrodecl*> params;
117
118 macrodecl* curr_macro;
119
120 pp1_activation (const token* tok, macrodecl* curr_macro)
121 : tok(tok), cursor(0), curr_macro(curr_macro) { }
122 ~pp1_activation ();
123 };
124
125 map<string, macrodecl*> pp1_namespace;
126 vector<pp1_activation*> pp1_state;
127 const token* next_pp1 ();
128 const token* scan_pp1 (bool ignore_macros);
129 const token* slurp_pp1_param (vector<const token*>& param);
130 const token* slurp_pp1_body (vector<const token*>& body);
131
132 // preprocessing subordinate, final pass (conditionals)
133 vector<pair<const token*, pp_state_t> > pp_state;
134 const token* scan_pp ();
135 const token* skip_pp ();
136
137 // scanning state
138 const token* next ();
139 const token* peek ();
140
141 // Advance past and throw away current token after peek () or next ().
142 void swallow ();
143
144 const token* systemtap_v_seen;
145 const token* last_t; // the last value returned by peek() or next()
146 const token* next_t; // lookahead token
147
148 // expectations, these swallow the token
149 void expect_known (token_type tt, string const & expected);
150 void expect_unknown (token_type tt, interned_string & target);
151 void expect_unknown2 (token_type tt1, token_type tt2, interned_string & target);
152
153 // convenience forms, these also swallow the token
154 void expect_op (string const & expected);
155 interned_string expect_op_any (initializer_list<const char*> expected);
156 void expect_kw (string const & expected);
157 void expect_number (int64_t & expected);
158 void expect_ident_or_keyword (interned_string & target);
159
160 // convenience forms, which return true or false, these don't swallow token
161 bool peek_op (string const & op);
162 bool peek_kw (string const & kw);
163
164 // convenience forms, which return the token
165 const token* expect_kw_token (string const & expected);
166 const token* expect_ident_or_atword (interned_string & target);
167
168 void print_error (const parse_error& pe, bool errs_as_warnings = false);
169 unsigned num_errors;
170
171 private: // nonterminals
172 void parse_probe (vector<probe*>&, vector<probe_alias*>&);
173 void parse_private (vector<vardecl*>&, vector<probe*>&,
174 string const&, vector<functiondecl*>&);
175 void parse_global (vector<vardecl*>&, vector<probe*>&,
176 string const&);
177 void do_parse_global (vector<vardecl*>&, vector<probe*>&,
178 string const&, const token*, bool);
179 void parse_functiondecl (vector<functiondecl*>&, string const&);
180 void do_parse_functiondecl (vector<functiondecl*>&, const token*,
181 string const&, bool);
182 embeddedcode* parse_embeddedcode ();
183 vector<probe_point*> parse_probe_points ();
184 vector<probe_point*> parse_components ();
185 vector<probe_point*> parse_component ();
186 literal_string* consume_string_literals (const token*);
187 literal_string* parse_literal_string ();
188 literal* parse_literal ();
189 block* parse_stmt_block ();
190 try_block* parse_try_block ();
191 statement* parse_statement ();
192 if_statement* parse_if_statement ();
193 for_loop* parse_for_loop ();
194 for_loop* parse_while_loop ();
195 foreach_loop* parse_foreach_loop ();
196 expr_statement* parse_expr_statement ();
197 return_statement* parse_return_statement ();
198 delete_statement* parse_delete_statement ();
199 next_statement* parse_next_statement ();
200 break_statement* parse_break_statement ();
201 continue_statement* parse_continue_statement ();
202 indexable* parse_indexable ();
203 const token *parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name);
204 target_symbol *parse_target_symbol ();
205 cast_op *parse_cast_op ();
206 atvar_op *parse_atvar_op ();
207 expression* parse_entry_op (const token* t);
208 expression* parse_defined_op (const token* t);
209 expression* parse_const_op (const token* t);
210 expression* parse_perf_op (const token* t);
211 expression* parse_target_register (const token* t);
212 expression* parse_target_deref (const token* t);
213 expression* parse_expression ();
214 expression* parse_assignment ();
215 expression* parse_ternary ();
216 expression* parse_logical_or ();
217 expression* parse_logical_and ();
218 expression* parse_boolean_or ();
219 expression* parse_boolean_xor ();
220 expression* parse_boolean_and ();
221 expression* parse_array_in ();
222 expression* parse_comparison_or_regex_query ();
223 expression* parse_shift ();
224 expression* parse_concatenation ();
225 expression* parse_additive ();
226 expression* parse_multiplicative ();
227 expression* parse_unary ();
228 expression* parse_crement ();
229 expression* parse_dwarf_value ();
230 expression* parse_value ();
231 expression* parse_symbol ();
232
233 bool peek_target_symbol_components ();
234 void parse_target_symbol_components (target_symbol* e);
235 };
236
237
238 // ------------------------------------------------------------------------
239
240 stapfile*
241 parse (systemtap_session& s, const string& n, istream& i, unsigned flags)
242 {
243 parser p (s, n, i, flags);
244 return p.parse ();
245 }
246
247 stapfile*
248 parse (systemtap_session& s, const string& name, unsigned flags)
249 {
250 ifstream i(name.c_str(), ios::in);
251 if (i.fail())
252 {
253 cerr << (file_exists(name)
254 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
255 : _F("Input file '%s' is missing.", name.c_str()))
256 << endl;
257 return 0;
258 }
259
260 parser p (s, name, i, flags);
261 return p.parse ();
262 }
263
264 stapfile*
265 parse_library_macros (systemtap_session& s, const string& name)
266 {
267 ifstream i(name.c_str(), ios::in);
268 if (i.fail())
269 {
270 cerr << (file_exists(name)
271 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
272 : _F("Input file '%s' is missing.", name.c_str()))
273 << endl;
274 return 0;
275 }
276
277 parser p (s, name, i);
278 return p.parse_library_macros ();
279 }
280
281 probe*
282 parse_synthetic_probe (systemtap_session &s, istream& i, const token* tok)
283 {
284 parser p (s, tok ? tok->location.file->name : "<synthetic>", i);
285 return p.parse_synthetic_probe (tok);
286 }
287
288 // ------------------------------------------------------------------------
289
290 parser::parser (systemtap_session& s, const string &n, istream& i, unsigned flags):
291 session (s), input_name (n), input (i, input_name, s, !(flags & pf_no_compatible)),
292 errs_as_warnings(flags & pf_squash_errors), privileged (flags & pf_guru),
293 user_file (flags & pf_user_file), auto_path (flags & pf_auto_path),
294 context(con_unknown), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
295 {
296 }
297
298 parser::~parser()
299 {
300 }
301
302 static string
303 tt2str(token_type tt)
304 {
305 switch (tt)
306 {
307 case tok_junk: return "junk";
308 case tok_identifier: return "identifier";
309 case tok_operator: return "operator";
310 case tok_string: return "string";
311 case tok_number: return "number";
312 case tok_embedded: return "embedded-code";
313 case tok_keyword: return "keyword";
314 }
315 return "unknown token";
316 }
317
318 ostream&
319 operator << (ostream& o, const source_loc& loc)
320 {
321 o << loc.file->name << ":"
322 << loc.line << ":"
323 << loc.column;
324
325 return o;
326 }
327
328 ostream&
329 operator << (ostream& o, const token& t)
330 {
331 o << tt2str(t.type);
332
333 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
334 {
335 o << " '";
336 for (unsigned i=0; i<t.content.length(); i++)
337 {
338 char c = t.content[i];
339 o << (isprint (c) ? c : '?');
340 }
341 o << "'";
342 }
343
344 o << " at "
345 << t.location;
346
347 return o;
348 }
349
350
351 void
352 parser::print_error (const parse_error &pe, bool errs_as_warnings)
353 {
354 const token *tok = pe.tok ? pe.tok : last_t;
355 session.print_error(pe, tok, input_name, errs_as_warnings);
356 num_errors ++;
357 }
358
359
360
361
362 template <typename OPERAND>
363 bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
364 {
365 if (op->type == tok_operator && op->content == "<=")
366 { return lhs <= rhs; }
367 else if (op->type == tok_operator && op->content == ">=")
368 { return lhs >= rhs; }
369 else if (op->type == tok_operator && op->content == "<")
370 { return lhs < rhs; }
371 else if (op->type == tok_operator && op->content == ">")
372 { return lhs > rhs; }
373 else if (op->type == tok_operator && op->content == "==")
374 { return lhs == rhs; }
375 else if (op->type == tok_operator && op->content == "!=")
376 { return lhs != rhs; }
377 else
378 throw PARSE_ERROR (_("expected comparison operator"), op);
379 }
380
381
382 // Here, we perform on-the-fly preprocessing in two passes.
383
384 // First pass - macro declaration and expansion.
385 //
386 // The basic form of a declaration is @define SIGNATURE %( BODY %)
387 // where SIGNATURE is of the form macro_name (a, b, c, ...)
388 // and BODY can obtain the parameter contents as @a, @b, @c, ....
389 // Note that parameterless macros can also be declared.
390 //
391 // Macro definitions may not be nested.
392 // A macro is available textually after it has been defined.
393 //
394 // The basic form of a macro invocation
395 // for a parameterless macro is @macro_name,
396 // for a macro with parameters is @macro_name(param_1, param_2, ...).
397 //
398 // NB: this means that a parameterless macro @foo called as @foo(a, b, c)
399 // leaves its 'parameters' alone, rather than consuming them to result
400 // in a "too many parameters error". This may be useful in the unusual
401 // case of wanting @foo to expand to the name of a function.
402 //
403 // Invocations of unknown macros are left unexpanded, to allow
404 // the continued use of constructs such as @cast, @var, etc.
405
406 macrodecl::~macrodecl ()
407 {
408 delete tok;
409 for (vector<const token*>::iterator it = body.begin();
410 it != body.end(); it++)
411 delete *it;
412 }
413
414 parser::pp1_activation::~pp1_activation ()
415 {
416 delete tok;
417 if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
418 for (map<string, pp_macrodecl*>::iterator it = params.begin();
419 it != params.end(); it++)
420 delete it->second;
421 }
422
423 // Grab a token from the current input source (main file or macro body):
424 const token*
425 parser::next_pp1 ()
426 {
427 if (pp1_state.empty())
428 return input.scan ();
429
430 // otherwise, we're inside a macro
431 pp1_activation* act = pp1_state.back();
432 unsigned& cursor = act->cursor;
433 if (cursor < act->curr_macro->body.size())
434 {
435 token* t = new token(*act->curr_macro->body[cursor]);
436 t->chain = new token(*act->tok); // mark chained token
437 cursor++;
438 return t;
439 }
440 else
441 return 0; // reached end of macro body
442 }
443
444 const token*
445 parser::scan_pp1 (bool ignore_macros = false)
446 {
447 while (true)
448 {
449 const token* t = next_pp1 ();
450 if (t == 0) // EOF or end of macro body
451 {
452 if (pp1_state.empty()) // actual EOF
453 return 0;
454
455 // Exit macro and loop around to look for the next token.
456 pp1_activation* act = pp1_state.back();
457 pp1_state.pop_back(); delete act;
458 continue;
459 }
460
461 // macro definition
462 // PR18462 don't catalog preprocessor-disabled macros
463 if (t->type == tok_operator && t->content == "@define" && !ignore_macros)
464 {
465 if (!pp1_state.empty())
466 throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t);
467 delete t;
468
469 // handle macro definition
470 // (1) consume macro signature
471 t = input.scan();
472 if (! (t && t->type == tok_identifier))
473 throw PARSE_ERROR (_("expected identifier"), t);
474 string name = t->content;
475
476 // check for redefinition of existing macro
477 if (pp1_namespace.find(name) != pp1_namespace.end())
478 {
479 parse_error er (ERR_SRC, _F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
480
481 // Also point to pp1_namespace[name]->tok, the site of
482 // the original definition:
483 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here",
484 name.c_str()), pp1_namespace[name]->tok);
485 throw er;
486 }
487
488 // XXX: the above restriction was mostly necessary due to
489 // wanting to leave open the possibility of
490 // statically-scoped semantics in the future.
491
492 // XXX: this cascades into further parse errors as the
493 // parser tries to parse the remaining definition... (e.g.
494 // it can't tell that the macro body isn't a conditional,
495 // that the uses of parameters aren't nonexistent
496 // macros.....)
497 if (name == "define")
498 throw PARSE_ERROR (_("attempt to redefine '@define'"), t);
499 if (input.atwords.count(name))
500 session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
501
502 macrodecl* decl = (pp1_namespace[name] = new macrodecl);
503 decl->tok = t;
504
505 // determine if the macro takes parameters
506 bool saw_params = false;
507 t = input.scan();
508 if (t && t->type == tok_operator && t->content == "(")
509 {
510 saw_params = true;
511 do
512 {
513 delete t;
514
515 t = input.scan ();
516 if (! (t && t->type == tok_identifier))
517 throw PARSE_ERROR(_("expected identifier"), t);
518 decl->formal_args.push_back(t->content);
519 delete t;
520
521 t = input.scan ();
522 if (t && t->type == tok_operator && t->content == ",")
523 {
524 continue;
525 }
526 else if (t && t->type == tok_operator && t->content == ")")
527 {
528 delete t;
529 t = input.scan();
530 break;
531 }
532 else
533 {
534 throw PARSE_ERROR (_("expected ',' or ')'"), t);
535 }
536 }
537 while (true);
538 }
539
540 // (2) identify & consume macro body
541 if (! (t && t->type == tok_operator && t->content == "%("))
542 {
543 if (saw_params)
544 throw PARSE_ERROR (_("expected '%('"), t);
545 else
546 throw PARSE_ERROR (_("expected '%(' or '('"), t);
547 }
548 delete t;
549
550 t = slurp_pp1_body (decl->body);
551 if (!t)
552 throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl->tok);
553 delete t;
554
555 // Now loop around to look for a real token.
556 continue;
557 }
558
559 // (potential) macro invocation
560 if (t->type == tok_operator && t->content[0] == '@')
561 {
562 const string& name = t->content.substr(1); // strip initial '@'
563
564 // check if name refers to a real parameter or macro
565 macrodecl* decl;
566 pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
567 if (act && act->params.find(name) != act->params.end())
568 decl = act->params[name];
569 else if (!(act && act->curr_macro->context == ctx_library)
570 && pp1_namespace.find(name) != pp1_namespace.end())
571 decl = pp1_namespace[name];
572 else if (session.library_macros.find(name)
573 != session.library_macros.end())
574 decl = session.library_macros[name];
575 else // this is an ordinary @operator
576 return t;
577
578 // handle macro invocation, taking ownership of t
579 pp1_activation *new_act = new pp1_activation(t, decl);
580 unsigned num_params = decl->formal_args.size();
581
582 // (1a) restore parameter invocation closure
583 if (num_params == 0 && decl->is_closure())
584 {
585 // NB: decl->parent_act is always safe since the
586 // parameter decl (if any) comes from an activation
587 // record which deeper in the stack than new_act.
588
589 // decl is a macro parameter which must be evaluated in
590 // the context of the original point of invocation:
591 new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
592 goto expand;
593 }
594
595 // (1b) consume macro parameters (if any)
596 if (num_params == 0)
597 goto expand;
598
599 // for simplicity, we do not allow macro constructs here
600 // -- if we did, we'd have to recursively call scan_pp1()
601 t = next_pp1 ();
602 if (! (t && t->type == tok_operator && t->content == "("))
603 {
604 delete new_act;
605 throw PARSE_ERROR (_NF
606 ("expected '(' in invocation of macro '@%s'"
607 " taking %d parameter",
608 "expected '(' in invocation of macro '@%s'"
609 " taking %d parameters",
610 num_params, name.c_str(), num_params), t);
611 }
612
613 // XXX perhaps parse/count the full number of params,
614 // so we can say "expected x, found y params" on error?
615 for (unsigned i = 0; i < num_params; i++)
616 {
617 delete t;
618
619 // create parameter closure
620 string param_name = decl->formal_args[i];
621 pp_macrodecl* p = (new_act->params[param_name]
622 = new pp_macrodecl);
623 p->tok = new token(*new_act->tok);
624 p->parent_act = act;
625 // NB: *new_act->tok points to invocation, act is NULL at top level
626
627 t = slurp_pp1_param (p->body);
628
629 // check correct usage of ',' or ')'
630 if (t == 0) // hit unexpected EOF or end of macro
631 {
632 // XXX could we pop the stack and continue parsing
633 // the invocation, allowing macros to construct new
634 // invocations in piecemeal fashion??
635 const token* orig_t = new token(*new_act->tok);
636 delete new_act;
637 throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t);
638 }
639 if (t->type == tok_operator && t->content == ",")
640 {
641 if (i + 1 == num_params)
642 {
643 delete new_act;
644 throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
645 }
646 }
647 else if (t->type == tok_operator && t->content == ")")
648 {
649 if (i + 1 != num_params)
650 {
651 delete new_act;
652 throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
653 }
654 }
655 else
656 {
657 // XXX this is, incidentally, impossible
658 delete new_act;
659 throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t);
660 }
661 }
662
663 delete t;
664
665 // (2) set up macro expansion
666 expand:
667 pp1_state.push_back (new_act);
668
669 // Now loop around to look for a real token.
670 continue;
671 }
672
673 // Otherwise, we have an ordinary token.
674 return t;
675 }
676 }
677
678 // Consume a single macro invocation's parameters, heeding nesting
679 // brackets and stopping on an unbalanced ')' or an unbracketed ','
680 // (and returning the final separator token).
681 const token*
682 parser::slurp_pp1_param (vector<const token*>& param)
683 {
684 const token* t = 0;
685 unsigned nesting = 0;
686 do
687 {
688 t = next_pp1 ();
689
690 if (!t)
691 break;
692 // [ needed in case macro paramater is used as prefix for array-deref operation
693 if (t->type == tok_operator && (t->content == "(" || t->content == "["))
694 ++nesting;
695 else if (nesting && t->type == tok_operator && (t->content == ")" || t->content == "]"))
696 --nesting;
697 else if (!nesting && t->type == tok_operator
698 && (t->content == ")" || t->content == ","))
699 break;
700 param.push_back(t);
701 }
702 while (true);
703 return t; // report ")" or "," or NULL
704 }
705
706
707 // Consume a macro declaration's body, heeding nested %( %) brackets.
708 const token*
709 parser::slurp_pp1_body (vector<const token*>& body)
710 {
711 const token* t = 0;
712 unsigned nesting = 0;
713 do
714 {
715 t = next_pp1 ();
716
717 if (!t)
718 break;
719 if (t->type == tok_operator && t->content == "%(")
720 ++nesting;
721 else if (nesting && t->type == tok_operator && t->content == "%)")
722 --nesting;
723 else if (!nesting && t->type == tok_operator && t->content == "%)")
724 break;
725 body.push_back(t);
726 }
727 while (true);
728 return t; // report final "%)" or NULL
729 }
730
731 // Used for parsing .stpm files.
732 stapfile*
733 parser::parse_library_macros ()
734 {
735 stapfile* f = new stapfile;
736 f->privileged = this->privileged;
737 input.set_current_file (f);
738
739 try
740 {
741 const token* t = scan_pp ();
742
743 // Currently we only take objection to macro invocations if they
744 // produce a non-whitespace token after being expanded.
745
746 // XXX should we prevent macro invocations even if they expand to empty??
747
748 if (t != 0)
749 throw PARSE_ERROR (_F("unexpected token in library macro file '%s'", input_name.c_str()), t);
750
751 // We need to first check whether *any* of the macros are duplicates,
752 // then commit to including the entire file in the global namespace
753 // (or not). Yuck.
754 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
755 it != pp1_namespace.end(); it++)
756 {
757 string name = it->first;
758
759 if (session.library_macros.find(name) != session.library_macros.end())
760 {
761 parse_error er(ERR_SRC, _F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
762 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
763 print_error (er);
764
765 delete er.chain;
766 delete f;
767 return 0;
768 }
769 }
770
771 }
772 catch (const parse_error& pe)
773 {
774 print_error (pe, errs_as_warnings);
775 delete f;
776 return 0;
777 }
778
779 // If no errors, include the entire file. Note how this is outside
780 // of the try-catch block -- no errors possible.
781 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
782 it != pp1_namespace.end(); it++)
783 {
784 string name = it->first;
785
786 session.library_macros[name] = it->second;
787 session.library_macros[name]->context = ctx_library;
788 }
789
790 return f;
791 }
792
793 // Second pass - preprocessor conditional expansion.
794 //
795 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
796 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
797 // or: arch COMPARISON-OP "arch-string"
798 // or: systemtap_v COMPARISON-OP "version-string"
799 // or: systemtap_privilege COMPARISON-OP "privilege-string"
800 // or: CONFIG_foo COMPARISON-OP "config-string"
801 // or: CONFIG_foo COMPARISON-OP number
802 // or: CONFIG_foo COMPARISON-OP CONFIG_bar
803 // or: "string1" COMPARISON-OP "string2"
804 // or: number1 COMPARISON-OP number2
805 // The %: ELSE-TOKENS part is optional.
806 //
807 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
808 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
809 // e.g. %( CONFIG_foo %? "foo" %: "baz" %)
810 //
811 // Up to an entire %( ... %) expression is processed by a single call
812 // to this function. Tokens included by any nested conditions are
813 // enqueued in a private vector.
814
815 bool eval_pp_conditional (systemtap_session& s,
816 const token* l, const token* op, const token* r)
817 {
818 if (l->type == tok_identifier && (l->content == "kernel_v" ||
819 l->content == "kernel_vr" ||
820 l->content == "systemtap_v"))
821 {
822 if (! (r->type == tok_string))
823 throw PARSE_ERROR (_("expected string literal"), r);
824
825 string target_kernel_vr = s.kernel_release;
826 string target_kernel_v = s.kernel_base_release;
827 string target;
828
829 if (l->content == "kernel_v") target = target_kernel_v;
830 else if (l->content == "kernel_vr") target = target_kernel_vr;
831 else if (l->content == "systemtap_v") target = s.compatible;
832 else assert (0);
833
834 string query = r->content;
835 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
836
837 // collect acceptable strverscmp results.
838 int rvc_ok1, rvc_ok2;
839 bool wc_ok = false;
840 if (op->type == tok_operator && op->content == "<=")
841 { rvc_ok1 = -1; rvc_ok2 = 0; }
842 else if (op->type == tok_operator && op->content == ">=")
843 { rvc_ok1 = 1; rvc_ok2 = 0; }
844 else if (op->type == tok_operator && op->content == "<")
845 { rvc_ok1 = -1; rvc_ok2 = -1; }
846 else if (op->type == tok_operator && op->content == ">")
847 { rvc_ok1 = 1; rvc_ok2 = 1; }
848 else if (op->type == tok_operator && op->content == "==")
849 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
850 else if (op->type == tok_operator && op->content == "!=")
851 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
852 else
853 throw PARSE_ERROR (_("expected comparison operator"), op);
854
855 if ((!wc_ok) && rhs_wildcard)
856 throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op);
857
858 if (rhs_wildcard)
859 {
860 int rvc_result = fnmatch (query.c_str(), target.c_str(),
861 FNM_NOESCAPE); // spooky
862 bool badness = (rvc_result == 0) ^ (op->content == "==");
863 return !badness;
864 }
865 else
866 {
867 int rvc_result = strverscmp (target.c_str(), query.c_str());
868 // normalize rvc_result
869 if (rvc_result < 0) rvc_result = -1;
870 if (rvc_result > 0) rvc_result = 1;
871 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
872 }
873 }
874 else if (l->type == tok_identifier && l->content == "systemtap_privilege")
875 {
876 string target_privilege =
877 pr_contains(s.privilege, pr_stapdev) ? "stapdev"
878 : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
879 : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
880 : "none"; /* should be impossible -- s.privilege always one of above */
881 assert(target_privilege != "none");
882
883 if (! (r->type == tok_string))
884 throw PARSE_ERROR (_("expected string literal"), r);
885 string query_privilege = r->content;
886
887 bool nomatch = (target_privilege != query_privilege);
888
889 bool result;
890 if (op->type == tok_operator && op->content == "==")
891 result = !nomatch;
892 else if (op->type == tok_operator && op->content == "!=")
893 result = nomatch;
894 else
895 throw PARSE_ERROR (_("expected '==' or '!='"), op);
896 /* XXX perhaps allow <= >= and similar comparisons */
897
898 return result;
899 }
900 else if (l->type == tok_identifier && l->content == "guru_mode")
901 {
902 if (! (r->type == tok_number))
903 throw PARSE_ERROR (_("expected number"), r);
904 int64_t lhs = (int64_t) s.guru_mode;
905 int64_t rhs = lex_cast<int64_t>(r->content);
906 if (!((rhs == 0)||(rhs == 1)))
907 throw PARSE_ERROR (_("expected 0 or 1"), op);
908 if (!((op->type == tok_operator && op->content == "==") ||
909 (op->type == tok_operator && op->content == "!=")))
910 throw PARSE_ERROR (_("expected '==' or '!='"), op);
911
912 return eval_comparison (lhs, op, rhs);
913 }
914 else if (l->type == tok_identifier && l->content == "arch")
915 {
916 string target_architecture = s.architecture;
917 if (! (r->type == tok_string))
918 throw PARSE_ERROR (_("expected string literal"), r);
919 string query_architecture = r->content;
920
921 int nomatch = fnmatch (query_architecture.c_str(),
922 target_architecture.c_str(),
923 FNM_NOESCAPE); // still spooky
924
925 bool result;
926 if (op->type == tok_operator && op->content == "==")
927 result = !nomatch;
928 else if (op->type == tok_operator && op->content == "!=")
929 result = nomatch;
930 else
931 throw PARSE_ERROR (_("expected '==' or '!='"), op);
932
933 return result;
934 }
935 else if (l->type == tok_identifier && l->content == "runtime")
936 {
937 if (! (r->type == tok_string))
938 throw PARSE_ERROR (_("expected string literal"), r);
939
940 string query_runtime = r->content;
941 string target_runtime;
942
943 if (s.runtime_mode == systemtap_session::dyninst_runtime)
944 target_runtime = "dyninst";
945 else if (s.runtime_mode == systemtap_session::bpf_runtime)
946 target_runtime = "bpf";
947 else
948 target_runtime = "kernel";
949
950 int nomatch = fnmatch (query_runtime.c_str(),
951 target_runtime.c_str(),
952 FNM_NOESCAPE); // still spooky
953
954 bool result;
955 if (op->type == tok_operator && op->content == "==")
956 result = !nomatch;
957 else if (op->type == tok_operator && op->content == "!=")
958 result = nomatch;
959 else
960 throw PARSE_ERROR (_("expected '==' or '!='"), op);
961
962 return result;
963 }
964 else if (l->type == tok_identifier && l->content.starts_with("CONFIG_"))
965 {
966 if (r->type == tok_string)
967 {
968 string lhs = s.kernel_config[l->content]; // may be empty
969 string rhs = r->content;
970
971 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
972
973 bool result;
974 if (op->type == tok_operator && op->content == "==")
975 result = !nomatch;
976 else if (op->type == tok_operator && op->content == "!=")
977 result = nomatch;
978 else
979 throw PARSE_ERROR (_("expected '==' or '!='"), op);
980
981 return result;
982 }
983 else if (r->type == tok_number)
984 {
985 const string& lhs_string = s.kernel_config[l->content];
986 const char* startp = lhs_string.c_str ();
987 char* endp = (char*) startp;
988 errno = 0;
989 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
990 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
991 throw PARSE_ERROR ("Config option value not a number", l);
992
993 int64_t rhs = lex_cast<int64_t>(r->content);
994 return eval_comparison (lhs, op, rhs);
995 }
996 else if (r->type == tok_identifier
997 && r->content.starts_with( "CONFIG_"))
998 {
999 // First try to convert both to numbers,
1000 // otherwise threat both as strings.
1001 const string& lhs_string = s.kernel_config[l->content];
1002 const string& rhs_string = s.kernel_config[r->content];
1003 const char* startp = lhs_string.c_str ();
1004 char* endp = (char*) startp;
1005 errno = 0;
1006 int64_t val = (int64_t) strtoll (startp, & endp, 0);
1007 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
1008 {
1009 int64_t lhs = val;
1010 startp = rhs_string.c_str ();
1011 endp = (char*) startp;
1012 errno = 0;
1013 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
1014 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
1015 return eval_comparison (lhs, op, rhs);
1016 }
1017
1018 return eval_comparison (lhs_string, op, rhs_string);
1019 }
1020 else
1021 throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r);
1022 }
1023 else if (l->type == tok_string && r->type == tok_string)
1024 {
1025 string lhs = l->content;
1026 string rhs = r->content;
1027 return eval_comparison (lhs, op, rhs);
1028 // NB: no wildcarding option here
1029 }
1030 else if (l->type == tok_number && r->type == tok_number)
1031 {
1032 int64_t lhs = lex_cast<int64_t>(l->content);
1033 int64_t rhs = lex_cast<int64_t>(r->content);
1034 return eval_comparison (lhs, op, rhs);
1035 // NB: no wildcarding option here
1036 }
1037 else if (l->type == tok_string && r->type == tok_number
1038 && op->type == tok_operator)
1039 throw PARSE_ERROR (_("expected string literal as right value"), r);
1040 else if (l->type == tok_number && r->type == tok_string
1041 && op->type == tok_operator)
1042 throw PARSE_ERROR (_("expected number literal as right value"), r);
1043
1044 else
1045 throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
1046 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1047 " comparison between strings or integers"), l);
1048 }
1049
1050
1051 // Only tokens corresponding to the TRUE statement must be expanded
1052 const token*
1053 parser::scan_pp ()
1054 {
1055 while (true)
1056 {
1057 pp_state_t pp = PP_NONE;
1058 if (!pp_state.empty())
1059 pp = pp_state.back().second;
1060
1061 const token* t = 0;
1062 if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1063 t = skip_pp ();
1064 else
1065 t = scan_pp1 ();
1066
1067 if (t == 0) // EOF
1068 {
1069 if (pp != PP_NONE)
1070 {
1071 t = pp_state.back().first;
1072 pp_state.pop_back(); // so skip_some doesn't keep trying to close this
1073 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
1074 throw PARSE_ERROR (_("incomplete conditional at end of file"), t);
1075 }
1076 return t;
1077 }
1078
1079 // misplaced preprocessor "then"
1080 if (t->type == tok_operator && t->content == "%?")
1081 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1082
1083 // preprocessor "else"
1084 if (t->type == tok_operator && t->content == "%:")
1085 {
1086 if (pp == PP_NONE)
1087 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1088 if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
1089 throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t);
1090 // XXX: here and elsewhere, error cascades might be avoided
1091 // by dropping tokens until we reach the closing %)
1092
1093 pp_state.back().second = (pp == PP_KEEP_THEN) ?
1094 PP_SKIP_ELSE : PP_KEEP_ELSE;
1095 delete t;
1096 continue;
1097 }
1098
1099 // preprocessor close
1100 if (t->type == tok_operator && t->content == "%)")
1101 {
1102 if (pp == PP_NONE)
1103 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1104 delete pp_state.back().first;
1105 delete t; //this is the closing bracket
1106 pp_state.pop_back();
1107 continue;
1108 }
1109
1110 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1111 return t;
1112
1113 // We have a %( - it's time to throw a preprocessing party!
1114
1115 bool result = false;
1116 bool and_result = true;
1117 const token *n = NULL;
1118 do {
1119 const token *l, *op, *r;
1120 l = scan_pp1 ();
1121 op = scan_pp1 ();
1122 r = scan_pp1 ();
1123 if (l == 0 || op == 0 || r == 0)
1124 throw PARSE_ERROR (_("incomplete condition after '%('"), t);
1125 // NB: consider generalizing to consume all tokens until %?, and
1126 // passing that as a vector to an evaluator.
1127
1128 // Do not evaluate the condition if we haven't expanded everything.
1129 // This may occur when having several recursive conditionals.
1130 and_result &= eval_pp_conditional (session, l, op, r);
1131 if(l->content=="systemtap_v")
1132 systemtap_v_seen=r;
1133
1134 else
1135 delete r;
1136
1137 delete l;
1138 delete op;
1139 delete n;
1140
1141 n = scan_pp1 ();
1142 if (n && n->type == tok_operator && n->content == "&&")
1143 continue;
1144 result |= and_result;
1145 and_result = true;
1146 if (! (n && n->type == tok_operator && n->content == "||"))
1147 break;
1148 } while (true);
1149
1150 /*
1151 clog << "PP eval (" << *t << ") == " << result << endl;
1152 */
1153
1154 const token *m = n;
1155 if (! (m && m->type == tok_operator && m->content == "%?"))
1156 throw PARSE_ERROR (_("expected '%?' marker for conditional"), t);
1157 delete m; // "%?"
1158
1159 pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1160 pp_state.push_back (make_pair (t, pp));
1161
1162 // Now loop around to look for a real token.
1163 }
1164 }
1165
1166
1167 // Skip over tokens and any errors, heeding
1168 // only nested preprocessor starts and ends.
1169 const token*
1170 parser::skip_pp ()
1171 {
1172 const token* t = 0;
1173 unsigned nesting = 0;
1174 do
1175 {
1176 try
1177 {
1178 t = scan_pp1 (true);
1179 }
1180 catch (const parse_error &e)
1181 {
1182 continue;
1183 }
1184 if (!t)
1185 break;
1186 if (t->type == tok_operator && t->content == "%(")
1187 ++nesting;
1188 else if (nesting && t->type == tok_operator && t->content == "%)")
1189 --nesting;
1190 else if (!nesting && t->type == tok_operator &&
1191 (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1192 break;
1193 delete t;
1194 }
1195 while (true);
1196 return t;
1197 }
1198
1199
1200 const token*
1201 parser::next ()
1202 {
1203 if (! next_t)
1204 next_t = scan_pp ();
1205 if (! next_t)
1206 throw PARSE_ERROR (_("unexpected end-of-file"));
1207
1208 last_t = next_t;
1209 // advance by zeroing next_t
1210 next_t = 0;
1211 return last_t;
1212 }
1213
1214
1215 const token*
1216 parser::peek ()
1217 {
1218 if (! next_t)
1219 next_t = scan_pp ();
1220
1221 // don't advance by zeroing next_t
1222 last_t = next_t;
1223 return next_t;
1224 }
1225
1226
1227 void
1228 parser::swallow ()
1229 {
1230 // can only swallow something last peeked or nexted token.
1231 assert (last_t != 0);
1232 delete last_t;
1233 // advance by zeroing next_t
1234 last_t = next_t = 0;
1235 }
1236
1237
1238 static inline bool
1239 tok_is(token const * t, token_type tt, string const & expected)
1240 {
1241 return t && t->type == tt && t->content == expected;
1242 }
1243
1244
1245 void
1246 parser::expect_known (token_type tt, string const & expected)
1247 {
1248 const token *t = next();
1249 if (! (t && t->type == tt && t->content == expected))
1250 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1251 // NB: PR25174 may require consume_string_literals() someday
1252 swallow (); // We are done with it, content was copied.
1253 }
1254
1255
1256 void
1257 parser::expect_unknown (token_type tt, interned_string & target)
1258 {
1259 const token *t = next();
1260 if (!(t && t->type == tt))
1261 throw PARSE_ERROR (_("expected ") + tt2str(tt));
1262 if (t->type==tok_string)
1263 {
1264 literal_string *ls = consume_string_literals (t);
1265 target = ls->value;
1266 delete ls;
1267 }
1268 else
1269 {
1270 target = t->content;
1271 swallow (); // We are done with it, content was copied.
1272 }
1273 }
1274
1275
1276 void
1277 parser::expect_unknown2 (token_type tt1, token_type tt2, interned_string & target)
1278 {
1279 const token *t = next();
1280 if (!(t && (t->type == tt1 || t->type == tt2)))
1281 throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
1282 // NB: PR25174 may require consume_string_literals() someday
1283 target = t->content;
1284 swallow (); // We are done with it, content was copied.
1285 }
1286
1287
1288 void
1289 parser::expect_op (string const & expected)
1290 {
1291 expect_known (tok_operator, expected);
1292 }
1293
1294 interned_string
1295 parser::expect_op_any (initializer_list<const char*> expected)
1296 {
1297 const token *t = next();
1298 if (t && t->type == tok_operator)
1299 for (auto it = expected.begin(); it != expected.end(); ++it)
1300 if (t->content == *it)
1301 {
1302 interned_string found = t->content;
1303 swallow (); // We are done with it, content was copied.
1304 return found;
1305 }
1306
1307 string msg;
1308 for (auto it = expected.begin(); it != expected.end(); ++it)
1309 {
1310 if (it != expected.begin())
1311 msg.append(" ");
1312 msg.append(*it);
1313 }
1314 throw PARSE_ERROR (_F("expected one of '%s'", msg.c_str()));
1315 }
1316
1317 void
1318 parser::expect_kw (string const & expected)
1319 {
1320 expect_known (tok_keyword, expected);
1321 }
1322
1323 const token*
1324 parser::expect_kw_token (string const & expected)
1325 {
1326 const token *t = next();
1327 if (! (t && t->type == tok_keyword && t->content == expected))
1328 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1329 return t;
1330 }
1331
1332 void
1333 parser::expect_number (int64_t & value)
1334 {
1335 bool neg = false;
1336 const token *t = next();
1337 if (t->type == tok_operator && t->content == "-")
1338 {
1339 neg = true;
1340 swallow ();
1341 t = next ();
1342 }
1343 if (!(t && t->type == tok_number))
1344 throw PARSE_ERROR (_("expected number"));
1345
1346 const string& s = t->content;
1347 const char* startp = s.c_str ();
1348 char* endp = (char*) startp;
1349
1350 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1351 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1352 // since the lexer only gives us positive digit strings, but we'll
1353 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1354 errno = 0;
1355 value = (int64_t) strtoull (startp, & endp, 0);
1356 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1357 || (neg && (unsigned long long) value > 9223372036854775808ULL)
1358 || (unsigned long long) value > 18446744073709551615ULL
1359 || value < -9223372036854775807LL-1)
1360 throw PARSE_ERROR (_("number invalid or out of range"));
1361
1362 if (neg)
1363 value = -value;
1364
1365 swallow (); // We are done with it, content was parsed and copied into value.
1366 }
1367
1368
1369 const token*
1370 parser::expect_ident_or_atword (interned_string & target)
1371 {
1372 const token *t = next();
1373
1374 // accept identifiers and operators beginning in '@':
1375 if (!t || (t->type != tok_identifier
1376 && (t->type != tok_operator || t->content[0] != '@')))
1377 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1378 // so the message is accurate, but keep an eye out in the future:
1379 throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
1380
1381 target = t->content;
1382 return t;
1383 }
1384
1385
1386 void
1387 parser::expect_ident_or_keyword (interned_string & target)
1388 {
1389 expect_unknown2 (tok_identifier, tok_keyword, target);
1390 }
1391
1392
1393 bool
1394 parser::peek_op (string const & op)
1395 {
1396 return tok_is (peek(), tok_operator, op);
1397 }
1398
1399
1400 bool
1401 parser::peek_kw (string const & kw)
1402 {
1403 return tok_is (peek(), tok_identifier, kw);
1404 }
1405
1406
1407
1408 lexer::lexer (istream& input, const string& in, systemtap_session& s, bool cc):
1409 ate_comment(false), ate_whitespace(false), saw_tokens(false), check_compatible(cc),
1410 input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1411 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1412 cursor_column (1), session(s), current_file (0), current_token_chain (0)
1413 {
1414 getline(input, input_contents, '\0');
1415
1416 input_pointer = input_contents.data();
1417 input_end = input_contents.data() + input_contents.size();
1418
1419 if (keywords.empty())
1420 {
1421 // NB: adding new keywords is highly disruptive to the language,
1422 // in particular to existing scripts that could be suddenly
1423 // broken. If done at all, it has to be s.compatible-sensitive,
1424 // and broadly advertised.
1425 keywords.insert("probe");
1426 keywords.insert("global");
1427 if (has_version("3.0"))
1428 keywords.insert("private");
1429 keywords.insert("function");
1430 keywords.insert("if");
1431 keywords.insert("else");
1432 keywords.insert("for");
1433 keywords.insert("foreach");
1434 keywords.insert("in");
1435 keywords.insert("limit");
1436 keywords.insert("return");
1437 keywords.insert("delete");
1438 keywords.insert("while");
1439 keywords.insert("break");
1440 keywords.insert("continue");
1441 keywords.insert("next");
1442 keywords.insert("string");
1443 keywords.insert("long");
1444 keywords.insert("try");
1445 keywords.insert("catch");
1446 }
1447
1448 if (atwords.empty())
1449 {
1450 // NB: adding new @words is mildly disruptive to existing
1451 // scripts that define macros with the same name, but not
1452 // really. The user will merely receive a warning that they are
1453 // redefining an existing operator.
1454
1455 // These are inserted without the actual '@', so we can directly check
1456 // proposed macro names without building a string with that prefix.
1457 atwords.insert("cast");
1458 atwords.insert("defined");
1459 atwords.insert("entry");
1460 atwords.insert("perf");
1461 atwords.insert("var");
1462 atwords.insert("avg");
1463 atwords.insert("count");
1464 atwords.insert("sum");
1465 atwords.insert("min");
1466 atwords.insert("max");
1467 atwords.insert("hist_linear");
1468 atwords.insert("hist_log");
1469 if (has_version("3.1"))
1470 {
1471 atwords.insert("const");
1472 atwords.insert("variance");
1473 }
1474 if (has_version("4.0"))
1475 {
1476 atwords.insert("kregister");
1477 atwords.insert("uregister");
1478 atwords.insert("kderef");
1479 atwords.insert("uderef");
1480 }
1481 }
1482 }
1483
1484 unordered_set<string> lexer::atwords;
1485
1486 void
1487 lexer::set_current_file (stapfile* f)
1488 {
1489 current_file = f;
1490 if (f)
1491 {
1492 f->file_contents = input_contents;
1493 f->name = input_name;
1494 }
1495 }
1496
1497 void
1498 lexer::set_current_token_chain (const token* tok)
1499 {
1500 current_token_chain = tok;
1501 }
1502
1503 int
1504 lexer::input_peek (unsigned n)
1505 {
1506 if (input_pointer + n >= input_end)
1507 return -1; // EOF
1508 return (unsigned char)*(input_pointer + n);
1509 }
1510
1511
1512 bool
1513 lexer::has_version (const char* v) const
1514 {
1515 return check_compatible
1516 ? strverscmp(session.compatible.c_str(), v) >= 0
1517 : true;
1518 }
1519
1520 int
1521 lexer::input_get ()
1522 {
1523 int c = input_peek();
1524 if (c < 0) return c; // EOF
1525
1526 ++input_pointer;
1527
1528 if (cursor_suspend_count)
1529 {
1530 // Track effect of input_put: preserve previous cursor/line_column
1531 // until all of its characters are consumed.
1532 if (--cursor_suspend_count == 0)
1533 {
1534 cursor_line = cursor_suspend_line;
1535 cursor_column = cursor_suspend_column;
1536 }
1537 }
1538 else
1539 {
1540 // update source cursor
1541 if (c == '\n')
1542 {
1543 cursor_line ++;
1544 cursor_column = 1;
1545 }
1546 else
1547 cursor_column ++;
1548 }
1549
1550 // clog << "[" << (char)c << "]";
1551 return c;
1552 }
1553
1554
1555 void
1556 lexer::input_put (const string& chars, const token* t)
1557 {
1558 size_t pos = input_pointer - input_contents.data();
1559 // clog << "[put:" << chars << " @" << pos << "]";
1560 input_contents.insert (pos, chars);
1561 cursor_suspend_count += chars.size();
1562 cursor_suspend_line = cursor_line;
1563 cursor_suspend_column = cursor_column;
1564 cursor_line = t->location.line;
1565 cursor_column = t->location.column;
1566 input_pointer = input_contents.data() + pos;
1567 input_end = input_contents.data() + input_contents.size();
1568 }
1569
1570
1571 token*
1572 lexer::scan ()
1573 {
1574 ate_comment = false; // reset for each new token
1575 ate_whitespace = false; // reset for each new token
1576
1577 // XXX be very sure to restore old_saw_tokens if we return without a token:
1578 bool old_saw_tokens = saw_tokens;
1579 saw_tokens = true;
1580
1581 token* n = new token;
1582 string token_str; // accumulate here instead of by incremental interning
1583 n->location.file = current_file;
1584 n->chain = current_token_chain;
1585
1586 skip:
1587 bool suspended = (cursor_suspend_count > 0);
1588 n->location.line = cursor_line;
1589 n->location.column = cursor_column;
1590
1591 int c = input_get();
1592 // clog << "{" << (char)c << (char)c2 << "}";
1593 if (c < 0)
1594 {
1595 delete n;
1596 saw_tokens = old_saw_tokens;
1597 return 0;
1598 }
1599
1600 if (isspace (c))
1601 {
1602 ate_whitespace = true;
1603 goto skip;
1604 }
1605
1606 int c2 = input_peek ();
1607
1608 // Paste command line arguments as character streams into
1609 // the beginning of a token. $1..$999 go through as raw
1610 // characters; @1..@999 are quoted/escaped as strings.
1611 // $# and @# expand to the number of arguments, similarly
1612 // raw or quoted.
1613 if ((c == '$' || c == '@') && (c2 == '#'))
1614 {
1615 token_str.push_back (c);
1616 token_str.push_back (c2);
1617 input_get(); // swallow '#'
1618
1619 if (suspended)
1620 {
1621 n->make_junk(tok_junk_nested_arg);
1622 return n;
1623 }
1624 size_t num_args = session.args.size ();
1625 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1626 token_str.clear();
1627 goto skip;
1628 }
1629 else if ((c == '$' || c == '@') && (isdigit (c2)))
1630 {
1631 unsigned idx = 0;
1632 token_str.push_back (c);
1633 do
1634 {
1635 input_get ();
1636 token_str.push_back (c2);
1637 idx = (idx * 10) + (c2 - '0');
1638 c2 = input_peek ();
1639 } while (c2 > 0 &&
1640 isdigit (c2) &&
1641 idx <= session.args.size()); // prevent overflow
1642 if (suspended)
1643 {
1644 n->make_junk(tok_junk_nested_arg);
1645 return n;
1646 }
1647 if (idx == 0 ||
1648 idx-1 >= session.args.size())
1649 {
1650 n->make_junk(tok_junk_invalid_arg);
1651 return n;
1652 }
1653 session.used_args[idx-1] = true;
1654 const string& arg = session.args[idx-1];
1655 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1656 token_str.clear();
1657 goto skip;
1658 }
1659
1660 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
1661 {
1662 token_str = (char) c;
1663 while (isalnum (c2) || c2 == '_' || c2 == '$')
1664 {
1665 input_get ();
1666 token_str.push_back (c2);
1667 c2 = input_peek ();
1668 }
1669 n->content = token_str;
1670
1671 if (n->content[0] == '@')
1672 // makes it easier to detect illegal use of @words:
1673 n->type = tok_operator;
1674 else if (keywords.count(n->content))
1675 n->type = tok_keyword;
1676 else
1677 n->type = tok_identifier;
1678
1679 return n;
1680 }
1681
1682 else if (isdigit (c)) // positive literal
1683 {
1684 n->type = tok_number;
1685 token_str = (char) c;
1686
1687 while (isalnum (c2))
1688 {
1689 // NB: isalnum is very permissive. We rely on strtol, called in
1690 // parser::parse_literal below, to confirm that the number string
1691 // is correctly formatted and in range.
1692
1693 input_get ();
1694 token_str.push_back (c2);
1695 c2 = input_peek ();
1696 }
1697
1698 n->content = token_str;
1699 return n;
1700 }
1701
1702 else if (c == '\"')
1703 {
1704 n->type = tok_string;
1705 while (1)
1706 {
1707 c = input_get ();
1708
1709 if (c < 0 || c == '\n')
1710 {
1711 n->make_junk(tok_junk_unclosed_quote);
1712 return n;
1713 }
1714 if (c == '\"') // closing double-quotes
1715 break;
1716 else if (c == '\\') // see also input_put
1717 {
1718 c = input_get();
1719 switch (c)
1720 {
1721 case 'x':
1722 if (!has_version("2.3"))
1723 goto the_default;
1724 /* FALLTHROUGH */
1725 case 'a':
1726 case 'b':
1727 case 't':
1728 case 'n':
1729 case 'v':
1730 case 'f':
1731 case 'r':
1732 case '0' ... '7': // NB: need only match the first digit
1733 case '\\':
1734 // Pass these escapes through to the string value
1735 // being parsed; it will be emitted into a C literal.
1736 // XXX: PR13371: perhaps we should evaluate them here
1737 // (and re-quote them during translate.cxx emission).
1738 token_str.push_back ('\\');
1739
1740 // fall through
1741 default: the_default:
1742 token_str.push_back (c);
1743 break;
1744 }
1745 }
1746 else
1747 token_str.push_back (c);
1748 }
1749 n->content = token_str;
1750 return n;
1751 }
1752
1753 else if (ispunct (c))
1754 {
1755 int c3 = input_peek (1);
1756
1757 // NB: if we were to recognize negative numeric literals here,
1758 // we'd introduce another grammar ambiguity:
1759 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1760 // instead of tok_number(1) tok_operator('-') tok_number(1)
1761
1762 if (c == '#') // shell comment
1763 {
1764 unsigned this_line = cursor_line;
1765 do { c = input_get (); }
1766 while (c >= 0 && cursor_line == this_line);
1767 ate_comment = true;
1768 ate_whitespace = true;
1769 goto skip;
1770 }
1771 else if ((c == '/' && c2 == '/')) // C++ comment
1772 {
1773 unsigned this_line = cursor_line;
1774 do { c = input_get (); }
1775 while (c >= 0 && cursor_line == this_line);
1776 ate_comment = true;
1777 ate_whitespace = true;
1778 goto skip;
1779 }
1780 else if (c == '/' && c2 == '*') // C comment
1781 {
1782 (void) input_get (); // swallow '*' already in c2
1783 c = input_get ();
1784 c2 = input_get ();
1785 while (c2 >= 0)
1786 {
1787 if (c == '*' && c2 == '/')
1788 break;
1789 c = c2;
1790 c2 = input_get ();
1791 }
1792 ate_comment = true;
1793 ate_whitespace = true;
1794 goto skip;
1795 }
1796 else if (c == '%' && c2 == '{') // embedded code
1797 {
1798 n->type = tok_embedded;
1799 (void) input_get (); // swallow '{' already in c2
1800 c = input_get ();
1801 c2 = input_get ();
1802 while (c2 >= 0)
1803 {
1804 if (c == '%' && c2 == '}')
1805 {
1806 n->content = token_str;
1807 return n;
1808 }
1809 if (c == '}' && c2 == '%') // possible typo
1810 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
1811 token_str.push_back (c);
1812 c = c2;
1813 c2 = input_get();
1814 }
1815
1816 n->make_junk(tok_junk_unclosed_embedded);
1817 return n;
1818 }
1819
1820 // We're committed to recognizing at least the first character
1821 // as an operator.
1822 n->type = tok_operator;
1823 token_str = (char) c;
1824
1825 // match all valid operators, in decreasing size order
1826 if ((c == '<' && c2 == '<' && c3 == '<') ||
1827 (c == '>' && c2 == '>' && c3 == '>') ||
1828 (c == '<' && c2 == '<' && c3 == '=') ||
1829 (c == '>' && c2 == '>' && c3 == '='))
1830 {
1831 token_str.push_back (c2);
1832 token_str.push_back (c3);
1833 input_get (); // c2
1834 input_get (); // c3
1835 }
1836 else if ((c == '=' && c2 == '=') ||
1837 (c == '!' && c2 == '=') ||
1838 (c == '<' && c2 == '=') ||
1839 (c == '>' && c2 == '=') ||
1840 (c == '=' && c2 == '~') ||
1841 (c == '!' && c2 == '~') ||
1842 (c == '+' && c2 == '=') ||
1843 (c == '-' && c2 == '=') ||
1844 (c == '*' && c2 == '=') ||
1845 (c == '/' && c2 == '=') ||
1846 (c == '%' && c2 == '=') ||
1847 (c == '&' && c2 == '=') ||
1848 (c == '^' && c2 == '=') ||
1849 (c == '|' && c2 == '=') ||
1850 (c == '.' && c2 == '=') ||
1851 (c == '&' && c2 == '&') ||
1852 (c == '|' && c2 == '|') ||
1853 (c == '+' && c2 == '+') ||
1854 (c == '-' && c2 == '-') ||
1855 (c == '-' && c2 == '>') ||
1856 (c == '<' && c2 == '<') ||
1857 (c == '>' && c2 == '>') ||
1858 // preprocessor tokens
1859 (c == '%' && c2 == '(') ||
1860 (c == '%' && c2 == '?') ||
1861 (c == '%' && c2 == ':') ||
1862 (c == '%' && c2 == ')'))
1863 {
1864 token_str.push_back (c2);
1865 input_get (); // swallow other character
1866 }
1867
1868 n->content = token_str;
1869 return n;
1870 }
1871
1872 else
1873 {
1874 n->type = tok_junk;
1875 ostringstream s;
1876 s << "\\x" << hex << setw(2) << setfill('0') << c;
1877 n->content = s.str();
1878 // signal parser to emit "expected X, found junk" type error
1879 n->make_junk(tok_junk_unknown);
1880 return n;
1881 }
1882 }
1883
1884 // ------------------------------------------------------------------------
1885
1886 void
1887 token::make_junk (token_junk_type junk)
1888 {
1889 type = tok_junk;
1890 junk_type = junk;
1891 }
1892
1893 // ------------------------------------------------------------------------
1894
1895 string
1896 token::junk_message(systemtap_session& session) const
1897 {
1898 switch (junk_type)
1899 {
1900 case tok_junk_nested_arg:
1901 return _("invalid nested substitution of command line arguments");
1902
1903 case tok_junk_invalid_arg:
1904 return _F("command line argument out of range [1-%lu]",
1905 (unsigned long) session.args.size());
1906
1907 case tok_junk_unclosed_quote:
1908 return _("Could not find matching closing quote");
1909
1910 case tok_junk_unclosed_embedded:
1911 return _("Could not find matching '%}' to close embedded function block");
1912
1913 default:
1914 return _("unknown junk token");
1915 }
1916 }
1917
1918 // ------------------------------------------------------------------------
1919
1920 stapfile*
1921 parser::parse ()
1922 {
1923 stapfile* f = new stapfile;
1924 f->privileged = this->privileged;
1925 input.set_current_file (f);
1926
1927 bool empty = true;
1928
1929 while (1)
1930 {
1931 try
1932 {
1933 systemtap_v_seen = 0;
1934 const token* t = peek ();
1935 if (! t) // nice clean EOF, modulo any preprocessing that occurred
1936 break;
1937
1938 empty = false;
1939 if (t->type == tok_keyword && t->content == "probe")
1940 {
1941 context = con_probe;
1942 parse_probe (f->probes, f->aliases);
1943 }
1944 else if (t->type == tok_keyword && t->content == "private")
1945 {
1946 context = con_unknown;
1947 parse_private (f->globals, f->probes, f->name, f->functions);
1948 }
1949 else if (t->type == tok_keyword && t->content == "global")
1950 {
1951 context = con_global;
1952 parse_global (f->globals, f->probes, f->name);
1953 }
1954 else if (t->type == tok_keyword && t->content == "function")
1955 {
1956 context = con_function;
1957 parse_functiondecl (f->functions, f->name);
1958 }
1959 else if (t->type == tok_embedded)
1960 {
1961 context = con_embedded;
1962 f->embeds.push_back (parse_embeddedcode ());
1963 }
1964 else
1965 {
1966 context = con_unknown;
1967 throw PARSE_ERROR (_("expected 'probe', 'global', 'private', 'function', or '%{'"));
1968 }
1969 }
1970 catch (parse_error& pe)
1971 {
1972 print_error (pe, errs_as_warnings);
1973
1974 // XXX: do we want tok_junk to be able to force skip_some behaviour?
1975 if (pe.skip_some) // for recovery
1976 // Quietly swallow all tokens until the next keyword we can start parsing from.
1977 while (1)
1978 try
1979 {
1980 {
1981 const token* t = peek ();
1982 if (! t)
1983 break;
1984 if (t->type == tok_keyword && t->content == "probe") break;
1985 else if (t->type == tok_keyword && t->content == "private") break;
1986 else if (t->type == tok_keyword && t->content == "global") break;
1987 else if (t->type == tok_keyword && t->content == "function") break;
1988 else if (t->type == tok_embedded) break;
1989 swallow (); // swallow it
1990 }
1991 }
1992 catch (parse_error& pe2)
1993 {
1994 // parse error during recovery ... ugh
1995 print_error (pe2);
1996 }
1997 }
1998 }
1999
2000 if (empty && user_file)
2001 {
2002 // vary message depending on whether file was *actually* empty:
2003 cerr << (input.saw_tokens
2004 ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
2005 : _F("Input file '%s' is empty.", input_name.c_str()))
2006 << endl;
2007 delete f;
2008 f = 0;
2009 }
2010 else if (num_errors > 0)
2011 {
2012 cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
2013 delete f;
2014 f = 0;
2015 }
2016
2017 input.set_current_file(0);
2018 return f;
2019 }
2020
2021
2022 probe*
2023 parser::parse_synthetic_probe (const token* chain)
2024 {
2025 probe* p = NULL;
2026 stapfile* f = new stapfile;
2027 f->privileged = this->privileged;
2028 f->synthetic = true;
2029 input.set_current_file (f);
2030 input.set_current_token_chain (chain);
2031
2032 try
2033 {
2034 context = con_probe;
2035 parse_probe (f->probes, f->aliases);
2036
2037 if (f->probes.size() != 1 || !f->aliases.empty())
2038 throw PARSE_ERROR (_("expected a single synthetic probe"));
2039 p = f->probes[0];
2040 }
2041 catch (parse_error& pe)
2042 {
2043 print_error (pe, errs_as_warnings);
2044 }
2045
2046 // TODO check for unparsed tokens?
2047
2048 input.set_current_file(0);
2049 input.set_current_token_chain(0);
2050 p->synthetic = true;
2051 return p;
2052 }
2053
2054
2055 void
2056 parser::parse_probe (vector<probe *> & probe_ret,
2057 vector<probe_alias *> & alias_ret)
2058 {
2059 const token* t0 = next ();
2060 if (! (t0->type == tok_keyword && t0->content == "probe"))
2061 throw PARSE_ERROR (_("expected 'probe'"));
2062
2063 vector<probe_point *> aliases;
2064 vector<probe_point *> locations;
2065
2066 int epilogue_alias = 0;
2067
2068 while (1)
2069 {
2070 vector<probe_point*> pps = parse_probe_points();
2071
2072 const token* t = peek ();
2073 if (pps.size() == 1 && t
2074 && t->type == tok_operator && t->content == "=")
2075 {
2076 if (pps[0]->optional || pps[0]->sufficient)
2077 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2078 aliases.push_back(pps[0]);
2079 swallow ();
2080 continue;
2081 }
2082 else if (pps.size() == 1 && t
2083 && t->type == tok_operator && t->content == "+=")
2084 {
2085 if (pps[0]->optional || pps[0]->sufficient)
2086 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2087 aliases.push_back(pps[0]);
2088 epilogue_alias = 1;
2089 swallow ();
2090 continue;
2091 }
2092 else if (t && t->type == tok_operator && t->content == "{")
2093 {
2094 locations.insert(locations.end(), pps.begin(), pps.end());
2095 break;
2096 }
2097 else
2098 throw PARSE_ERROR (_("expected probe point specifier"));
2099 }
2100
2101 if (aliases.empty())
2102 {
2103 probe* p = new probe;
2104 p->tok = t0;
2105 p->locations = locations;
2106 p->body = parse_stmt_block ();
2107 p->privileged = privileged;
2108 p->systemtap_v_conditional = systemtap_v_seen;
2109 probe_ret.push_back (p);
2110 }
2111 else
2112 {
2113 probe_alias* p = new probe_alias (aliases);
2114 if(epilogue_alias)
2115 p->epilogue_style = true;
2116 else
2117 p->epilogue_style = false;
2118 p->tok = t0;
2119 p->locations = locations;
2120 p->body = parse_stmt_block ();
2121 p->privileged = privileged;
2122 p->systemtap_v_conditional = systemtap_v_seen;
2123 alias_ret.push_back (p);
2124 }
2125 }
2126
2127
2128 embeddedcode*
2129 parser::parse_embeddedcode ()
2130 {
2131 embeddedcode* e = new embeddedcode;
2132 const token* t = next ();
2133 if (t->type != tok_embedded)
2134 throw PARSE_ERROR (_("expected '%{'"));
2135
2136 if (! privileged)
2137 throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
2138 false /* don't skip tokens for parse resumption */);
2139
2140 e->tok = t;
2141 e->code = t->content;
2142 return e;
2143 }
2144
2145
2146 block*
2147 parser::parse_stmt_block ()
2148 {
2149 block* pb = new block;
2150
2151 const token* t = next ();
2152 if (! (t->type == tok_operator && t->content == "{"))
2153 throw PARSE_ERROR (_("expected '{'"));
2154
2155 pb->tok = t;
2156
2157 while (1)
2158 {
2159 t = peek ();
2160 if (t && t->type == tok_operator && t->content == "}")
2161 {
2162 swallow ();
2163 break;
2164 }
2165 pb->statements.push_back (parse_statement ());
2166 }
2167
2168 return pb;
2169 }
2170
2171
2172 try_block*
2173 parser::parse_try_block ()
2174 {
2175 try_block* pb = new try_block;
2176
2177 pb->tok = expect_kw_token ("try");
2178 pb->try_block = parse_stmt_block();
2179 expect_kw ("catch");
2180
2181 const token* t = peek ();
2182 if (t != NULL && t->type == tok_operator && t->content == "(")
2183 {
2184 swallow (); // swallow the '('
2185
2186 t = next();
2187 if (! (t->type == tok_identifier))
2188 throw PARSE_ERROR (_("expected identifier"));
2189 symbol* sym = new symbol;
2190 sym->tok = t;
2191 sym->name = t->content;
2192 pb->catch_error_var = sym;
2193
2194 expect_op (")");
2195 }
2196 else
2197 pb->catch_error_var = 0;
2198
2199 pb->catch_block = parse_stmt_block();
2200
2201 return pb;
2202 }
2203
2204
2205
2206 statement*
2207 parser::parse_statement ()
2208 {
2209 statement *ret;
2210 const token* t = peek ();
2211 if (t && t->type == tok_operator && t->content == ";")
2212 return new null_statement (next ());
2213 else if (t && t->type == tok_operator && t->content == "{")
2214 return parse_stmt_block (); // Don't squash semicolons.
2215 else if (t && t->type == tok_keyword && t->content == "try")
2216 return parse_try_block (); // Don't squash semicolons.
2217 else if (t && t->type == tok_keyword && t->content == "if")
2218 return parse_if_statement (); // Don't squash semicolons.
2219 else if (t && t->type == tok_keyword && t->content == "for")
2220 return parse_for_loop (); // Don't squash semicolons.
2221 else if (t && t->type == tok_keyword && t->content == "foreach")
2222 return parse_foreach_loop (); // Don't squash semicolons.
2223 else if (t && t->type == tok_keyword && t->content == "while")
2224 return parse_while_loop (); // Don't squash semicolons.
2225 else if (t && t->type == tok_keyword && t->content == "return")
2226 ret = parse_return_statement ();
2227 else if (t && t->type == tok_keyword && t->content == "delete")
2228 ret = parse_delete_statement ();
2229 else if (t && t->type == tok_keyword && t->content == "break")
2230 ret = parse_break_statement ();
2231 else if (t && t->type == tok_keyword && t->content == "continue")
2232 ret = parse_continue_statement ();
2233 else if (t && t->type == tok_keyword && t->content == "next")
2234 ret = parse_next_statement ();
2235 else if (t && (t->type == tok_operator || // expressions are flexible
2236 t->type == tok_identifier ||
2237 t->type == tok_number ||
2238 t->type == tok_string ||
2239 t->type == tok_embedded ))
2240 ret = parse_expr_statement ();
2241 // XXX: consider generally accepting tok_embedded here too
2242 else
2243 throw PARSE_ERROR (_("expected statement"));
2244
2245 // Squash "empty" trailing colons after any "non-block-like" statement.
2246 t = peek ();
2247 if (t && t->type == tok_operator && t->content == ";")
2248 {
2249 swallow (); // Silently eat trailing ; after statement
2250 }
2251
2252 return ret;
2253 }
2254
2255 void
2256 parser::parse_private (vector <vardecl*>& globals, vector<probe*>& probes,
2257 string const & fname, vector<functiondecl*>& functions)
2258 {
2259 const token* t = next ();
2260 if (! (t->type == tok_keyword && t->content == "private"))
2261 throw PARSE_ERROR (_("expected 'private'"));
2262 swallow ();
2263 t = next ();
2264 if (t->type == tok_keyword && t->content == "function")
2265 {
2266 swallow ();
2267 context = con_function;
2268 do_parse_functiondecl(functions, t, fname, true);
2269 }
2270 else if (t->type == tok_keyword && t->content == "global")
2271 {
2272 swallow ();
2273 context = con_global;
2274 t = next ();
2275 if (! (t->type == tok_identifier))
2276 throw PARSE_ERROR (_("expected identifier"));
2277 do_parse_global(globals, probes, fname, t, true);
2278 }
2279 // The `private <identifier>` is an acceptable shorthand
2280 // for `private global <identifier>` per above.
2281 else if (t->type == tok_identifier)
2282 {
2283 context = con_global;
2284 do_parse_global(globals, probes, fname, t, true);
2285 }
2286 else
2287 throw PARSE_ERROR (_("expected 'function' or identifier"));
2288 }
2289
2290 void
2291 parser::parse_global (vector <vardecl*>& globals, vector<probe*>& probes,
2292 string const & fname)
2293 {
2294 const token* t0 = next ();
2295 if (! (t0->type == tok_keyword && t0->content == "global"))
2296 throw PARSE_ERROR (_("expected 'global' or 'private'"));
2297 swallow ();
2298 do_parse_global(globals, probes, fname, 0, false);
2299 }
2300
2301 void
2302 parser::do_parse_global (vector <vardecl*>& globals, vector<probe*>&,
2303 string const & fname, const token* t0, bool priv)
2304 {
2305 bool iter0 = true;
2306 const token* t;
2307 while (1)
2308 {
2309 t = (iter0 && priv) ? t0 : next ();
2310 iter0 = false;
2311 if (! (t->type == tok_identifier))
2312 throw PARSE_ERROR (_("expected identifier"));
2313
2314 string gname = "__global_" + string(t->content);
2315 string pname = "__private_" + detox_path(fname) + string(t->content);
2316 string name = priv ? pname : gname;
2317
2318 for (unsigned i=0; i<globals.size(); i++)
2319 {
2320 if (globals[i]->name == name)
2321 throw PARSE_ERROR (_("duplicate global name"));
2322 if ((globals[i]->name == gname) || (globals[i]->name == pname))
2323 throw PARSE_ERROR (_("global versus private variable declaration conflict"));
2324 }
2325
2326 vardecl* d = new vardecl;
2327 d->unmangled_name = t->content;
2328 d->name = name;
2329 d->tok = t;
2330 d->systemtap_v_conditional = systemtap_v_seen;
2331 globals.push_back (d);
2332
2333 t = peek ();
2334
2335 if(t && t->type == tok_operator && t->content == "%") //wrapping
2336 {
2337 d->wrap = true;
2338 swallow ();
2339 t = peek();
2340 }
2341
2342 if (t && t->type == tok_operator && t->content == "[") // array size
2343 {
2344 int64_t size;
2345 swallow ();
2346 expect_number(size);
2347 if (size <= 0 || size > INT_MAX)
2348 throw PARSE_ERROR(_("array size out of range"));
2349 d->maxsize = (int)size;
2350 expect_known(tok_operator, "]");
2351 t = peek ();
2352 }
2353
2354 if (t && t->type == tok_operator && t->content == "=") // initialization
2355 {
2356 if (!d->compatible_arity(0))
2357 throw PARSE_ERROR(_("only scalar globals can be initialized"));
2358 d->set_arity(0, t);
2359 next (); // Don't swallow, set_arity() used the peeked token.
2360 d->init = parse_literal ();
2361 d->type = d->init->type;
2362 t = peek ();
2363 }
2364
2365 if (t && t->type == tok_operator && t->content == ";") // termination
2366 {
2367 swallow ();
2368 break;
2369 }
2370
2371 if (t && t->type == tok_operator && t->content == ",") // next global
2372 {
2373 swallow ();
2374 continue;
2375 }
2376 else
2377 break;
2378 }
2379 }
2380
2381 void
2382 parser::parse_functiondecl (vector<functiondecl*>& functions,
2383 string const & fname)
2384 {
2385 const token* t = next ();
2386 if (! (t->type == tok_keyword && t->content == "function"))
2387 throw PARSE_ERROR (_("expected 'function'"));
2388 swallow ();
2389 do_parse_functiondecl(functions, t, fname, false);
2390 }
2391
2392 void
2393 parser::do_parse_functiondecl (vector<functiondecl*>& functions, const token* t,
2394 string const & fname, bool priv)
2395 {
2396 t = next ();
2397 if (! (t->type == tok_identifier)
2398 && ! (t->type == tok_keyword
2399 && (t->content == "string" || t->content == "long")))
2400 throw PARSE_ERROR (_("expected identifier"));
2401
2402 string gname = "__global_" + string(t->content);
2403 string pname = "__private_" + detox_path(fname) + string(t->content);
2404 string name = priv ? pname : gname;
2405 name += "__overload_" + lex_cast(session.overload_count[t->content]++);
2406
2407 functiondecl *fd = new functiondecl ();
2408 fd->unmangled_name = t->content;
2409 fd->name = name;
2410 fd->tok = t;
2411
2412 t = next ();
2413 if (t->type == tok_operator && t->content == ":")
2414 {
2415 swallow ();
2416 t = next ();
2417 if (t->type == tok_keyword && t->content == "string")
2418 fd->type = pe_string;
2419 else if (t->type == tok_keyword && t->content == "long")
2420 fd->type = pe_long;
2421 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2422 swallow ();
2423
2424 t = next ();
2425 }
2426
2427 if (! (t->type == tok_operator && t->content == "("))
2428 throw PARSE_ERROR (_("expected '('"));
2429 swallow ();
2430
2431 while (1)
2432 {
2433 t = next ();
2434
2435 // permit zero-argument functions
2436 if (t->type == tok_operator && t->content == ")")
2437 {
2438 swallow ();
2439 break;
2440 }
2441 else if (! (t->type == tok_identifier))
2442 throw PARSE_ERROR (_("expected identifier"));
2443 vardecl* vd = new vardecl;
2444 vd->unmangled_name = vd->name = t->content;
2445
2446 for (auto it = fd->formal_args.begin() ; it != fd->formal_args.end(); ++it)
2447 {
2448 string param = vd->unmangled_name;
2449 if ((*it)->unmangled_name == param)
2450 throw PARSE_ERROR(_("duplicate parameter names"));
2451 }
2452
2453 vd->tok = t;
2454 fd->formal_args.push_back (vd);
2455 fd->systemtap_v_conditional = systemtap_v_seen;
2456
2457 t = next ();
2458
2459 if (t->type == tok_operator && t->content == ":")
2460 {
2461 swallow ();
2462 t = next ();
2463 if (t->type == tok_keyword && t->content == "string")
2464 vd->type = pe_string;
2465 else if (t->type == tok_keyword && t->content == "long")
2466 vd->type = pe_long;
2467 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2468 swallow ();
2469 t = next ();
2470 }
2471 if (t->type == tok_operator && t->content == ")")
2472 {
2473 swallow ();
2474 break;
2475 }
2476 if (t->type == tok_operator && t->content == ",")
2477 {
2478 swallow ();
2479 continue;
2480 }
2481 else
2482 throw PARSE_ERROR (_("expected ',' or ')'"));
2483 }
2484
2485 t = peek();
2486 if (t->type == tok_operator && t->content == ":")
2487 {
2488 swallow();
2489 literal* literal = parse_literal();
2490 literal_number* ln = dynamic_cast<literal_number*>(literal);
2491 if (ln == 0)
2492 throw PARSE_ERROR (_("expected literal number"));
2493 fd->priority = ln->value;
2494
2495 // reserve priority 0 for user script implementation
2496 if (fd->priority < 1)
2497 throw PARSE_ERROR (_("specified priority must be > 0"));
2498 delete literal;
2499 }
2500 else if (user_file)
2501 {
2502 // allow script file implementation override automatically when
2503 // priority not specified
2504 fd->priority = 0;
2505 }
2506
2507 t = peek ();
2508 if (t && t->type == tok_embedded)
2509 fd->body = parse_embeddedcode ();
2510 else
2511 fd->body = parse_stmt_block ();
2512
2513 functions.push_back (fd);
2514 }
2515
2516 vector<probe_point*>
2517 parser::parse_probe_points()
2518 {
2519 vector<probe_point*> pps;
2520 while (1)
2521 {
2522 vector<probe_point*> tail = parse_components();
2523 pps.insert(pps.end(), tail.begin(), tail.end());
2524
2525 const token* t = peek();
2526 if (t && t->type == tok_operator && t->content == ",")
2527 {
2528 swallow();
2529 continue;
2530 }
2531
2532 if (t && t->type == tok_operator
2533 && (t->content == "{" || t->content == "=" ||
2534 t->content == "+="|| t->content == "}"))
2535 break;
2536
2537 throw PARSE_ERROR (_("expected one of ', { } = +='"));
2538 }
2539 return pps;
2540 }
2541
2542 vector<probe_point*>
2543 parser::parse_components()
2544 {
2545 vector<probe_point*> pps;
2546 while (1)
2547 {
2548 vector<probe_point*> suffix = parse_component();
2549
2550 // Cartesian product of components
2551 if (pps.empty())
2552 pps = suffix;
2553 else
2554 {
2555 assert(!suffix.empty());
2556 vector<probe_point*> product;
2557 for (unsigned i = 0; i < pps.size(); i++)
2558 {
2559 if (pps[i]->optional || pps[i]->sufficient || pps[i]->condition)
2560 throw PARSE_ERROR (_("'?', '!' or condition must only be specified in suffix"),
2561 pps[i]->components[0]->tok);
2562 for (unsigned j = 0; j < suffix.size(); j++)
2563 {
2564 probe_point* pp = new probe_point;
2565 pp->components.insert(pp->components.end(),
2566 pps[i]->components.begin(),
2567 pps[i]->components.end());
2568 pp->components.insert(pp->components.end(),
2569 suffix[j]->components.begin(),
2570 suffix[j]->components.end());
2571 pp->optional = suffix[j]->optional;
2572 pp->sufficient = suffix[j]->sufficient;
2573 if (auto_path)
2574 pp->auto_path = suffix[j]->auto_path;
2575 pp->condition = suffix[j]->condition;
2576 product.push_back(pp);
2577 }
2578 }
2579 for (unsigned i = 0; i < pps.size(); i++) delete pps[i];
2580 for (unsigned i = 0; i < suffix.size(); i++) delete suffix[i];
2581 pps = product;
2582 }
2583
2584 const token* t = peek();
2585 if (t && t->type == tok_operator && t->content == ".")
2586 {
2587 swallow ();
2588 continue;
2589 }
2590
2591 // We only fall through here at the end of a probe point (past
2592 // all the dotted/parametrized components).
2593
2594 if (t && t->type == tok_operator &&
2595 (t->content == "?" || t->content == "!"))
2596 {
2597 for (unsigned i = 0; i < pps.size(); i++)
2598 {
2599 if (pps[i]->optional || pps[i]->sufficient)
2600 throw PARSE_ERROR (_("'?' or '!' respecified"));
2601 pps[i]->optional = true;
2602 if (t->content == "!") pps[i]->sufficient = true;
2603 }
2604 // NB: sufficient implies optional
2605 swallow ();
2606 t = peek ();
2607 // fall through
2608 }
2609
2610 if (t && t->type == tok_keyword && t->content == "if")
2611 {
2612 swallow ();
2613 t = peek ();
2614 if (!(t && t->type == tok_operator && t->content == "("))
2615 throw PARSE_ERROR (_("expected '('"));
2616 swallow ();
2617
2618 expression* e = parse_expression();
2619 for (unsigned i = 0; i < pps.size(); i++)
2620 {
2621 if (pps[i]->condition != 0)
2622 throw PARSE_ERROR (_("condition respecified"));
2623 pps[i]->condition = e;
2624 }
2625
2626 t = peek ();
2627 if (!(t && t->type == tok_operator && t->content == ")"))
2628 throw PARSE_ERROR (_("expected ')'"));
2629 swallow ();
2630 }
2631
2632 break;
2633 }
2634 return pps;
2635 }
2636
2637 vector<probe_point*>
2638 parser::parse_component()
2639 {
2640 const token* t = next ();
2641 if (! (t->type == tok_identifier
2642 // we must allow ".return" and ".function", which are keywords
2643 || t->type == tok_keyword
2644 // we must allow "*", due to being an operator
2645 || (t->type == tok_operator && (t->content == "*" || t->content == "{"))))
2646 throw PARSE_ERROR (_("expected identifier or '*' or '{'"));
2647
2648 if (t && t->type == tok_operator && t->content == "{")
2649 {
2650 swallow();
2651 vector<probe_point*> pps = parse_probe_points();
2652 t = peek();
2653 if (!(t && t->type == tok_operator && t->content == "}"))
2654 throw PARSE_ERROR (_("expected '}'"));
2655 swallow();
2656 return pps;
2657 }
2658 else
2659 {
2660 // loop which reconstitutes an identifier with wildcards
2661 string content = t->content;
2662 bool changed_p = false;
2663 while (1)
2664 {
2665 const token* u = peek();
2666 if (u == NULL)
2667 break;
2668 // ensure pieces of the identifier are adjacent:
2669 if (input.ate_whitespace)
2670 break;
2671 // ensure pieces of the identifier are valid:
2672 if (! (u->type == tok_identifier
2673 // we must allow arbitrary keywords with a wildcard
2674 || u->type == tok_keyword
2675 // we must allow "*", due to being an operator
2676 || (u->type == tok_operator && u->content == "*")))
2677 break;
2678
2679 // append u to t
2680 content = content + (string)u->content;
2681 changed_p = true;
2682
2683 // consume u
2684 swallow ();
2685 }
2686
2687 if (changed_p)
2688 {
2689 // We've already swallowed the first token and we're not
2690 // putting it back; no one else has a copy; so we can
2691 // safely overwrite its content and reuse it.
2692 const_cast<token*>(t)->content = content;
2693 }
2694
2695 probe_point::component* c = new probe_point::component;
2696 c->functor = t->content;
2697 c->tok = t;
2698 vector<probe_point*> pps;
2699 probe_point* pp = new probe_point;
2700 if (auto_path)
2701 pp->auto_path = input_name;
2702 pp->components.push_back(c);
2703 pps.push_back(pp);
2704 // NB we may add c->arg soon
2705
2706 t = peek ();
2707
2708 // consume optional parameter
2709 if (t && t->type == tok_operator && t->content == "(")
2710 {
2711 swallow (); // consume "("
2712 c->arg = parse_literal ();
2713
2714 t = next ();
2715 if (! (t->type == tok_operator && t->content == ")"))
2716 throw PARSE_ERROR (_("expected ')'"));
2717 swallow ();
2718 }
2719
2720 return pps;
2721 }
2722 }
2723
2724 literal_string*
2725 parser::consume_string_literals(const token *t)
2726 {
2727 literal_string *ls = new literal_string (t->content);
2728
2729 // PR11208: check if the next token is also a string literal;
2730 // auto-concatenate it. This is complicated to the extent that we
2731 // need to skip intermediate whitespace.
2732 //
2733 // NB for versions prior to 2.0: but don't skip over intervening comments
2734 string concat;
2735 bool p_concat = false;
2736 const token *n = peek();
2737 while (n != NULL && n->type == tok_string
2738 && ! (!input.has_version("2.0") && input.ate_comment))
2739 {
2740 if (!p_concat)
2741 {
2742 concat = t->content;
2743 p_concat = true;
2744 }
2745 concat.append(n->content.data(), n->content.size());
2746 next(); // consume the token
2747 n = peek();
2748 }
2749 if (p_concat)
2750 ls->value = concat;
2751 return ls;
2752 }
2753
2754
2755 // Parse a string literal and perform backslash escaping on the contents:
2756 literal_string*
2757 parser::parse_literal_string ()
2758 {
2759 const token* t = next ();
2760 literal_string* l;
2761 if (t->type == tok_string)
2762 l = consume_string_literals (t);
2763 else
2764 throw PARSE_ERROR (_("expected literal string"));
2765
2766 l->tok = t;
2767 return l;
2768 }
2769
2770
2771 literal*
2772 parser::parse_literal ()
2773 {
2774 const token* t = next ();
2775 literal* l;
2776 if (t->type == tok_string)
2777 {
2778 l = consume_string_literals (t);
2779 }
2780 else
2781 {
2782 bool neg = false;
2783 if (t->type == tok_operator && t->content == "-")
2784 {
2785 neg = true;
2786 swallow ();
2787 t = next ();
2788 }
2789
2790 if (t->type == tok_number)
2791 {
2792 const string& s = t->content;
2793 const char* startp = s.c_str ();
2794 char* endp = (char*) startp;
2795
2796 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2797 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
2798 // since the lexer only gives us positive digit strings, but we'll
2799 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
2800 errno = 0;
2801 long long value = (long long) strtoull (startp, & endp, 0);
2802 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
2803 || (neg && (unsigned long long) value > 9223372036854775808ULL)
2804 || (unsigned long long) value > 18446744073709551615ULL
2805 || value < -9223372036854775807LL-1)
2806 throw PARSE_ERROR (_("number invalid or out of range"));
2807
2808 if (neg)
2809 value = -value;
2810
2811 l = new literal_number (value);
2812 }
2813 else
2814 throw PARSE_ERROR (_("expected literal string or number"));
2815 }
2816
2817 l->tok = t;
2818 return l;
2819 }
2820
2821
2822 if_statement*
2823 parser::parse_if_statement ()
2824 {
2825 const token* t = next ();
2826 if (! (t->type == tok_keyword && t->content == "if"))
2827 throw PARSE_ERROR (_("expected 'if'"));
2828 if_statement* s = new if_statement;
2829 s->tok = t;
2830
2831 t = next ();
2832 if (! (t->type == tok_operator && t->content == "("))
2833 throw PARSE_ERROR (_("expected '('"));
2834 swallow ();
2835
2836 s->condition = parse_expression ();
2837
2838 t = next ();
2839 if (! (t->type == tok_operator && t->content == ")"))
2840 throw PARSE_ERROR (_("expected ')'"));
2841 swallow ();
2842
2843 s->thenblock = parse_statement ();
2844
2845 t = peek ();
2846 if (t && t->type == tok_keyword && t->content == "else")
2847 {
2848 swallow ();
2849 s->elseblock = parse_statement ();
2850 }
2851 else
2852 s->elseblock = 0; // in case not otherwise initialized
2853
2854 return s;
2855 }
2856
2857
2858 expr_statement*
2859 parser::parse_expr_statement ()
2860 {
2861 expr_statement *es = new expr_statement;
2862 const token* t = peek ();
2863 if (t == NULL)
2864 throw PARSE_ERROR (_("expression statement expected"));
2865 // Copy, we only peeked, parse_expression might swallow.
2866 es->tok = new token (*t);
2867 es->value = parse_expression ();
2868 return es;
2869 }
2870
2871
2872 return_statement*
2873 parser::parse_return_statement ()
2874 {
2875 const token* t = next ();
2876 if (! (t->type == tok_keyword && t->content == "return"))
2877 throw PARSE_ERROR (_("expected 'return'"));
2878 if (context != con_function)
2879 throw PARSE_ERROR (_("found 'return' not in function context"));
2880 return_statement* s = new return_statement;
2881 s->tok = t;
2882
2883 t = peek ();
2884 if (t->type == tok_operator && (t->content == ";" || t->content == "}"))
2885 s->value = NULL; // no return value
2886 else
2887 s->value = parse_expression ();
2888 return s;
2889 }
2890
2891
2892 delete_statement*
2893 parser::parse_delete_statement ()
2894 {
2895 const token* t = next ();
2896 if (! (t->type == tok_keyword && t->content == "delete"))
2897 throw PARSE_ERROR (_("expected 'delete'"));
2898 delete_statement* s = new delete_statement;
2899 s->tok = t;
2900 s->value = parse_expression ();
2901 return s;
2902 }
2903
2904
2905 next_statement*
2906 parser::parse_next_statement ()
2907 {
2908 const token* t = next ();
2909 if (! (t->type == tok_keyword && t->content == "next"))
2910 throw PARSE_ERROR (_("expected 'next'"));
2911 next_statement* s = new next_statement;
2912 s->tok = t;
2913 return s;
2914 }
2915
2916
2917 break_statement*
2918 parser::parse_break_statement ()
2919 {
2920 const token* t = next ();
2921 if (! (t->type == tok_keyword && t->content == "break"))
2922 throw PARSE_ERROR (_("expected 'break'"));
2923 break_statement* s = new break_statement;
2924 s->tok = t;
2925 return s;
2926 }
2927
2928
2929 continue_statement*
2930 parser::parse_continue_statement ()
2931 {
2932 const token* t = next ();
2933 if (! (t->type == tok_keyword && t->content == "continue"))
2934 throw PARSE_ERROR (_("expected 'continue'"));
2935 continue_statement* s = new continue_statement;
2936 s->tok = t;
2937 return s;
2938 }
2939
2940
2941 for_loop*
2942 parser::parse_for_loop ()
2943 {
2944 const token* t = next ();
2945 if (! (t->type == tok_keyword && t->content == "for"))
2946 throw PARSE_ERROR (_("expected 'for'"));
2947 for_loop* s = new for_loop;
2948 s->tok = t;
2949
2950 t = next ();
2951 if (! (t->type == tok_operator && t->content == "("))
2952 throw PARSE_ERROR (_("expected '('"));
2953 swallow ();
2954
2955 // initializer + ";"
2956 t = peek ();
2957 if (t && t->type == tok_operator && t->content == ";")
2958 {
2959 s->init = 0;
2960 swallow ();
2961 }
2962 else
2963 {
2964 s->init = parse_expr_statement ();
2965 t = next ();
2966 if (! (t->type == tok_operator && t->content == ";"))
2967 throw PARSE_ERROR (_("expected ';'"));
2968 swallow ();
2969 }
2970
2971 // condition + ";"
2972 t = peek ();
2973 if (t && t->type == tok_operator && t->content == ";")
2974 {
2975 literal_number* l = new literal_number(1);
2976 s->cond = l;
2977 s->cond->tok = next ();
2978 }
2979 else
2980 {
2981 s->cond = parse_expression ();
2982 t = next ();
2983 if (! (t->type == tok_operator && t->content == ";"))
2984 throw PARSE_ERROR (_("expected ';'"));
2985 swallow ();
2986 }
2987
2988 // increment + ")"
2989 t = peek ();
2990 if (t && t->type == tok_operator && t->content == ")")
2991 {
2992 s->incr = 0;
2993 swallow ();
2994 }
2995 else
2996 {
2997 s->incr = parse_expr_statement ();
2998 t = next ();
2999 if (! (t->type == tok_operator && t->content == ")"))
3000 throw PARSE_ERROR (_("expected ')'"));
3001 swallow ();
3002 }
3003
3004 // block
3005 s->block = parse_statement ();
3006
3007 return s;
3008 }
3009
3010
3011 for_loop*
3012 parser::parse_while_loop ()
3013 {
3014 const token* t = next ();
3015 if (! (t->type == tok_keyword && t->content == "while"))
3016 throw PARSE_ERROR (_("expected 'while'"));
3017 for_loop* s = new for_loop;
3018 s->tok = t;
3019
3020 t = next ();
3021 if (! (t->type == tok_operator && t->content == "("))
3022 throw PARSE_ERROR (_("expected '('"));
3023 swallow ();
3024
3025 // dummy init and incr fields
3026 s->init = 0;
3027 s->incr = 0;
3028
3029 // condition
3030 s->cond = parse_expression ();
3031
3032 t = next ();
3033 if (! (t->type == tok_operator && t->content == ")"))
3034 throw PARSE_ERROR (_("expected ')'"));
3035 swallow ();
3036
3037 // block
3038 s->block = parse_statement ();
3039
3040 return s;
3041 }
3042
3043
3044 foreach_loop*
3045 parser::parse_foreach_loop ()
3046 {
3047 const token* t = next ();
3048 if (! (t->type == tok_keyword && t->content == "foreach"))
3049 throw PARSE_ERROR (_("expected 'foreach'"));
3050 foreach_loop* s = new foreach_loop;
3051 s->tok = t;
3052 s->sort_direction = 0;
3053 s->sort_aggr = sc_none;
3054 s->value = NULL;
3055 s->limit = NULL;
3056
3057 t = next ();
3058 if (! (t->type == tok_operator && t->content == "("))
3059 throw PARSE_ERROR (_("expected '('"));
3060 swallow ();
3061
3062 symbol* lookahead_sym = NULL;
3063 int lookahead_sort = 0;
3064
3065 t = peek ();
3066 if (t && t->type == tok_identifier)
3067 {
3068 next ();
3069 lookahead_sym = new symbol;
3070 lookahead_sym->tok = t;
3071 lookahead_sym->name = t->content;
3072
3073 t = peek ();
3074 if (t && t->type == tok_operator &&
3075 (t->content == "+" || t->content == "-"))
3076 {
3077 lookahead_sort = (t->content == "+") ? 1 : -1;
3078 swallow ();
3079 }
3080
3081 t = peek ();
3082 if (t && t->type == tok_operator && t->content == "=")
3083 {
3084 swallow ();
3085 s->value = lookahead_sym;
3086 if (lookahead_sort)
3087 {
3088 s->sort_direction = lookahead_sort;
3089 s->sort_column = 0;
3090 }
3091 lookahead_sym = NULL;
3092 }
3093 }
3094
3095 // see also parse_array_in
3096
3097 bool parenthesized = false;
3098 t = peek ();
3099 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
3100 {
3101 swallow ();
3102 parenthesized = true;
3103 }
3104
3105 if (lookahead_sym)
3106 {
3107 s->indexes.push_back (lookahead_sym);
3108 if (lookahead_sort)
3109 {
3110 s->sort_direction = lookahead_sort;
3111 s->sort_column = 1;
3112 }
3113 lookahead_sym = NULL;
3114 }
3115 else while (1)
3116 {
3117 t = next ();
3118 if (! (t->type == tok_identifier))
3119 throw PARSE_ERROR (_("expected identifier"));
3120 symbol* sym = new symbol;
3121 sym->tok = t;
3122 sym->name = t->content;
3123 s->indexes.push_back (sym);
3124
3125 t = peek ();
3126 if (t && t->type == tok_operator &&
3127 (t->content == "+" || t->content == "-"))
3128 {
3129 if (s->sort_direction)
3130 throw PARSE_ERROR (_("multiple sort directives"));
3131 s->sort_direction = (t->content == "+") ? 1 : -1;
3132 s->sort_column = s->indexes.size();
3133 swallow ();
3134 }
3135
3136 if (parenthesized)
3137 {
3138 t = peek ();
3139 if (t && t->type == tok_operator && t->content == ",")
3140 {
3141 swallow ();
3142 continue;
3143 }
3144 else if (t && t->type == tok_operator && t->content == "]")
3145 {
3146 swallow ();
3147 break;
3148 }
3149 else
3150 throw PARSE_ERROR (_("expected ',' or ']'"));
3151 }
3152 else
3153 break; // expecting only one expression
3154 }
3155
3156 t = next ();
3157 if (! (t->type == tok_keyword && t->content == "in"))
3158 throw PARSE_ERROR (_("expected 'in'"));
3159 swallow ();
3160
3161 s->base = parse_indexable();
3162
3163 // check if there was an array slice that was specified
3164 t = peek();
3165 if (t && t->type == tok_operator && t->content == "[")
3166 {
3167 swallow();
3168 while (1)
3169 {
3170 t = peek();
3171 if (t && t->type == tok_operator && t->content == "*")
3172 {
3173 swallow();
3174 s->array_slice.push_back (NULL);
3175 }
3176 else
3177 s->array_slice.push_back (parse_expression());
3178
3179 t = peek ();
3180 if (t && t->type == tok_operator && t->content == ",")
3181 {
3182 swallow ();
3183 continue;
3184 }
3185 else if (t && t->type == tok_operator && t->content == "]")
3186 {
3187 swallow ();
3188 break;
3189 }
3190 else
3191 throw PARSE_ERROR (_("expected ',' or ']'"));
3192 }
3193 }
3194
3195
3196 // check for atword, see also expect_ident_or_atword,
3197 t = peek ();
3198 if (t && t->type == tok_operator && t->content[0] == '@')
3199 {
3200 if (t->content == "@avg") s->sort_aggr = sc_average;
3201 else if (t->content == "@min") s->sort_aggr = sc_min;
3202 else if (t->content == "@max") s->sort_aggr = sc_max;
3203 else if (t->content == "@count") s->sort_aggr = sc_count;
3204 else if (t->content == "@sum") s->sort_aggr = sc_sum;
3205 else if (t->content == "@variance") s->sort_aggr = sc_variance;
3206 else throw PARSE_ERROR(_("expected statistical operation"));
3207 swallow();
3208
3209 t = peek ();
3210 if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
3211 throw PARSE_ERROR(_("expected sort directive"));
3212 }
3213
3214 t = peek ();
3215 if (t && t->type == tok_operator &&
3216 (t->content == "+" || t->content == "-"))
3217 {
3218 if (s->sort_direction)
3219 throw PARSE_ERROR (_("multiple sort directives"));
3220 s->sort_direction = (t->content == "+") ? 1 : -1;
3221 s->sort_column = 0;
3222 swallow ();
3223 }
3224
3225 t = peek ();
3226 if (tok_is(t, tok_keyword, "limit"))
3227 {
3228 swallow (); // get past the "limit"
3229 s->limit = parse_expression ();
3230 }
3231
3232 t = next ();
3233 if (! (t->type == tok_operator && t->content == ")"))
3234 throw PARSE_ERROR ("expected ')'");
3235 swallow ();
3236
3237 s->block = parse_statement ();
3238 return s;
3239 }
3240
3241
3242 expression*
3243 parser::parse_expression ()
3244 {
3245 return parse_assignment ();
3246 }
3247
3248
3249 expression*
3250 parser::parse_assignment ()
3251 {
3252 expression* op1 = parse_ternary ();
3253
3254 const token* t = peek ();
3255 // right-associative operators
3256 if (t && t->type == tok_operator
3257 && (t->content == "=" ||
3258 t->content == "<<<" ||
3259 t->content == "+=" ||
3260 t->content == "-=" ||
3261 t->content == "*=" ||
3262 t->content == "/=" ||
3263 t->content == "%=" ||
3264 t->content == "<<=" ||
3265 t->content == ">>=" ||
3266 t->content == "&=" ||
3267 t->content == "^=" ||
3268 t->content == "|=" ||
3269 t->content == ".=" ||
3270 false))
3271 {
3272 // NB: lvalueness is checked during elaboration / translation
3273 assignment* e = new assignment;
3274 e->left = op1;
3275 e->op = t->content;
3276 e->tok = t;
3277 next ();
3278 e->right = parse_expression ();
3279 op1 = e;
3280 }
3281
3282 return op1;
3283 }
3284
3285
3286 expression*
3287 parser::parse_ternary ()
3288 {
3289 expression* op1 = parse_logical_or ();
3290
3291 const token* t = peek ();
3292 if (t && t->type == tok_operator && t->content == "?")
3293 {
3294 ternary_expression* e = new ternary_expression;
3295 e->tok = t;
3296 e->cond = op1;
3297 next ();
3298 e->truevalue = parse_expression (); // XXX
3299
3300 t = next ();
3301 if (! (t->type == tok_operator && t->content == ":"))
3302 throw PARSE_ERROR (_("expected ':'"));
3303 swallow ();
3304
3305 if (input.has_version("4.0"))
3306 e->falsevalue = parse_ternary ();
3307 else
3308 e->falsevalue = parse_expression ();
3309 return e;
3310 }
3311 else
3312 return op1;
3313 }
3314
3315
3316 expression*
3317 parser::parse_logical_or ()
3318 {
3319 expression* op1 = parse_logical_and ();
3320
3321 const token* t = peek ();
3322 while (t && t->type == tok_operator && t->content == "||")
3323 {
3324 logical_or_expr* e = new logical_or_expr;
3325 e->tok = t;
3326 e->op = t->content;
3327 e->left = op1;
3328 next ();
3329 e->right = parse_logical_and ();
3330 op1 = e;
3331 t = peek ();
3332 }
3333
3334 return op1;
3335 }
3336
3337
3338 expression*
3339 parser::parse_logical_and ()
3340 {
3341 expression* op1 = parse_boolean_or ();
3342
3343 const token* t = peek ();
3344 while (t && t->type == tok_operator && t->content == "&&")
3345 {
3346 logical_and_expr *e = new logical_and_expr;
3347 e->left = op1;
3348 e->op = t->content;
3349 e->tok = t;
3350 next ();
3351 e->right = parse_boolean_or ();
3352 op1 = e;
3353 t = peek ();
3354 }
3355
3356 return op1;
3357 }
3358
3359
3360 expression*
3361 parser::parse_boolean_or ()
3362 {
3363 expression* op1 = parse_boolean_xor ();
3364
3365 const token* t = peek ();
3366 while (t && t->type == tok_operator && t->content == "|")
3367 {
3368 binary_expression* e = new binary_expression;
3369 e->left = op1;
3370 e->op = t->content;
3371 e->tok = t;
3372 next ();
3373 e->right = parse_boolean_xor ();
3374 op1 = e;
3375 t = peek ();
3376 }
3377
3378 return op1;
3379 }
3380
3381
3382 expression*
3383 parser::parse_boolean_xor ()
3384 {
3385 expression* op1 = parse_boolean_and ();
3386
3387 const token* t = peek ();
3388 while (t && t->type == tok_operator && t->content == "^")
3389 {
3390 binary_expression* e = new binary_expression;
3391 e->left = op1;
3392 e->op = t->content;
3393 e->tok = t;
3394 next ();
3395 e->right = parse_boolean_and ();
3396 op1 = e;
3397 t = peek ();
3398 }
3399
3400 return op1;
3401 }
3402
3403
3404 expression*
3405 parser::parse_boolean_and ()
3406 {
3407 expression* op1 = parse_array_in ();
3408
3409 const token* t = peek ();
3410 while (t && t->type == tok_operator && t->content == "&")
3411 {
3412 binary_expression* e = new binary_expression;
3413 e->left = op1;
3414 e->op = t->content;
3415 e->tok = t;
3416 next ();
3417 e->right = parse_array_in ();
3418 op1 = e;
3419 t = peek ();
3420 }
3421
3422 return op1;
3423 }
3424
3425
3426 expression*
3427 parser::parse_array_in ()
3428 {
3429 // This is a very tricky case. All these are legit expressions:
3430 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
3431 vector<expression*> indexes;
3432 bool parenthesized = false;
3433
3434 const token* t = peek ();
3435 if (t && t->type == tok_operator && t->content == "[")
3436 {
3437 swallow ();
3438 parenthesized = true;
3439 }
3440
3441 while (1)
3442 {
3443 t = peek();
3444 if (t && t->type == tok_operator && t->content == "*" && parenthesized)
3445 {
3446 swallow();
3447 indexes.push_back(NULL);
3448 }
3449 else
3450 {
3451 expression* op1 = parse_comparison_or_regex_query ();
3452 indexes.push_back (op1);
3453 }
3454
3455 if (parenthesized)
3456 {
3457 const token* t = peek ();
3458 if (t && t->type == tok_operator && t->content == ",")
3459 {
3460 swallow ();
3461 continue;
3462 }
3463 else if (t && t->type == tok_operator && t->content == "]")
3464 {
3465 swallow ();
3466 break;
3467 }
3468 else
3469 throw PARSE_ERROR (_("expected ',' or ']'"));
3470 }
3471 else
3472 break; // expecting only one expression
3473 }
3474
3475 t = peek ();
3476 if (t && t->type == tok_keyword && t->content == "in")
3477 {
3478 array_in *e = new array_in;
3479 e->tok = t;
3480 next ();
3481
3482 arrayindex* a = new arrayindex;
3483 a->indexes = indexes;
3484 a->base = parse_indexable();
3485 a->tok = a->base->tok;
3486 e->operand = a;
3487 return e;
3488 }
3489 else if (indexes.size() == 1) // no "in" - need one expression only
3490 return indexes[0];
3491 else
3492 throw PARSE_ERROR (_("unexpected comma-separated expression list"));
3493 }
3494
3495
3496 expression*
3497 parser::parse_comparison_or_regex_query ()
3498 {
3499 expression* op1 = parse_shift ();
3500
3501 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
3502 const token *t = peek();
3503 if (t && t->type == tok_operator
3504 && (t->content == "=~" ||
3505 t->content == "!~"))
3506 {
3507 regex_query* r = new regex_query;
3508 r->left = op1;
3509 r->op = t->content;
3510 r->tok = t;
3511 next ();
3512 r->right = parse_literal_string();
3513 op1 = r;
3514 t = peek ();
3515 }
3516 else while (t && t->type == tok_operator
3517 && (t->content == ">" ||
3518 t->content == "<" ||
3519 t->content == "==" ||
3520 t->content == "!=" ||
3521 t->content == "<=" ||
3522 t->content == ">="))
3523 {
3524 comparison* e = new comparison;
3525 e->left = op1;
3526 e->op = t->content;
3527 e->tok = t;
3528 next ();
3529 e->right = parse_shift ();
3530 op1 = e;
3531 t = peek ();
3532 }
3533
3534 return op1;
3535 }
3536
3537
3538 expression*
3539 parser::parse_shift ()
3540 {
3541 expression* op1 = parse_concatenation ();
3542
3543 const token* t = peek ();
3544 while (t && t->type == tok_operator &&
3545 (t->content == "<<" || t->content == ">>" || t->content == ">>>"))
3546 {
3547 binary_expression* e = new binary_expression;
3548 e->left = op1;
3549 e->op = t->content;
3550 e->tok = t;
3551 next ();
3552 e->right = parse_concatenation ();
3553 op1 = e;
3554 t = peek ();
3555 }
3556
3557 return op1;
3558 }
3559
3560
3561 expression*
3562 parser::parse_concatenation ()
3563 {
3564 expression* op1 = parse_additive ();
3565
3566 const token* t = peek ();
3567 // XXX: the actual awk string-concatenation operator is *whitespace*.
3568 // I don't know how to easily to model that here.
3569 while (t && t->type == tok_operator && t->content == ".")
3570 {
3571 concatenation* e = new concatenation;
3572 e->left = op1;
3573 e->op = t->content;
3574 e->tok = t;
3575 next ();
3576 e->right = parse_additive ();
3577 op1 = e;
3578 t = peek ();
3579 }
3580
3581 return op1;
3582 }
3583
3584
3585 expression*
3586 parser::parse_additive ()
3587 {
3588 expression* op1 = parse_multiplicative ();
3589
3590 const token* t = peek ();
3591 while (t && t->type == tok_operator
3592 && (t->content == "+" || t->content == "-"))
3593 {
3594 binary_expression* e = new binary_expression;
3595 e->op = t->content;
3596 e->left = op1;
3597 e->tok = t;
3598 next ();
3599 e->right = parse_multiplicative ();
3600 op1 = e;
3601 t = peek ();
3602 }
3603
3604 return op1;
3605 }
3606
3607
3608 expression*
3609 parser::parse_multiplicative ()
3610 {
3611 expression* op1 = parse_unary ();
3612
3613 const token* t = peek ();
3614 while (t && t->type == tok_operator
3615 && (t->content == "*" || t->content == "/" || t->content == "%"))
3616 {
3617 binary_expression* e = new binary_expression;
3618 e->op = t->content;
3619 e->left = op1;
3620 e->tok = t;
3621 next ();
3622 e->right = parse_unary ();
3623 op1 = e;
3624 t = peek ();
3625 }
3626
3627 return op1;
3628 }
3629
3630
3631 expression*
3632 parser::parse_unary ()
3633 {
3634 const token* t = peek ();
3635 if (t && t->type == tok_operator
3636 && (t->content == "+" ||
3637 t->content == "-" ||
3638 t->content == "!" ||
3639 t->content == "~" ||
3640 false))
3641 {
3642 unary_expression* e = new unary_expression;
3643 e->op = t->content;
3644 e->tok = t;
3645 next ();
3646 e->operand = parse_unary ();
3647 return e;
3648 }
3649 else
3650 return parse_crement ();
3651 }
3652
3653
3654 expression*
3655 parser::parse_crement () // as in "increment" / "decrement"
3656 {
3657 // NB: Ideally, we'd parse only a symbol as an operand to the
3658 // *crement operators, instead of a general expression value. We'd
3659 // need more complex lookahead code to tell apart the postfix cases.
3660 // So we just punt, and leave it to pass-3 to signal errors on
3661 // cases like "4++".
3662
3663 const token* t = peek ();
3664 if (t && t->type == tok_operator
3665 && (t->content == "++" || t->content == "--"))
3666 {
3667 pre_crement* e = new pre_crement;
3668 e->op = t->content;
3669 e->tok = t;
3670 next ();
3671 e->operand = parse_dwarf_value ();
3672 return e;
3673 }
3674
3675 // post-crement or non-crement
3676 expression *op1 = parse_dwarf_value ();
3677
3678 t = peek ();
3679 if (t && t->type == tok_operator
3680 && (t->content == "++" || t->content == "--"))
3681 {
3682 post_crement* e = new post_crement;
3683 e->op = t->content;
3684 e->tok = t;
3685 next ();
3686 e->operand = op1;
3687 return e;
3688 }
3689 else
3690 return op1;
3691 }
3692
3693
3694 expression*
3695 parser::parse_dwarf_value ()
3696 {
3697 expression* expr = NULL;
3698 target_symbol* tsym = NULL;
3699
3700 // With '&' we'll definitely be making a target symbol of some sort
3701 const token* addrtok = peek_op ("&") ? next () : NULL;
3702 bool addressof = (addrtok != NULL);
3703
3704 // First try target_symbol types: $var, @cast, and @var.
3705 const token* t = peek ();
3706 if (t && t->type == tok_identifier && t->content[0] == '$')
3707 expr = tsym = parse_target_symbol ();
3708 else if (tok_is (t, tok_operator, "@cast"))
3709 expr = tsym = parse_cast_op ();
3710 else if (tok_is (t, tok_operator, "@var"))
3711 expr = tsym = parse_atvar_op ();
3712 else if (addressof && !input.has_version("2.6"))
3713 // '&' on old version only allowed specific target_symbol types
3714 throw PARSE_ERROR (_("expected @cast, @var or $var"));
3715 else
3716 {
3717 // Otherwise just get a plain value of any sort.
3718 expr = parse_value ();
3719 if (addressof)
3720 {
3721 tsym = dynamic_cast<target_symbol*> (expr);
3722 if (tsym && tsym->addressof)
3723 throw PARSE_ERROR (_("cannot take address more than once"),
3724 addrtok);
3725 }
3726 }
3727
3728 // If we had '&' or see any target suffixes, that forces a target_symbol.
3729 // For compatibility, we only do this starting with 2.6.
3730 if (!tsym && (addressof || peek_target_symbol_components ())
3731 && input.has_version("2.6"))
3732 {
3733 autocast_op *cop = new autocast_op;
3734 cop->tok = addrtok ?: peek ();
3735 cop->operand = expr;
3736 expr = tsym = cop;
3737 }
3738
3739 if (tsym)
3740 {
3741 // Parse the rest of any kind of target symbol
3742 tsym->addressof = addressof;
3743 parse_target_symbol_components (tsym);
3744 }
3745
3746 return expr;
3747 }
3748
3749
3750 expression*
3751 parser::parse_value ()
3752 {
3753 const token* t = peek ();
3754 if (! t)
3755 throw PARSE_ERROR (_("expected value"));
3756
3757 if (t->type == tok_embedded)
3758 {
3759 if (! privileged)
3760 throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
3761
3762 embedded_expr *e = new embedded_expr;
3763 e->tok = t;
3764 e->code = t->content;
3765 next ();
3766 return e;
3767 }
3768
3769 if (t->type == tok_operator && t->content == "(")
3770 {
3771 swallow ();
3772 expression* e = parse_expression ();
3773 t = next ();
3774 if (! (t->type == tok_operator && t->content == ")"))
3775 throw PARSE_ERROR (_("expected ')'"));
3776 swallow ();
3777 return e;
3778 }
3779 else if (t->type == tok_identifier
3780 || (t->type == tok_operator && t->content[0] == '@'))
3781 return parse_symbol ();
3782 else
3783 return parse_literal ();
3784 }
3785
3786
3787 const token *
3788 parser::parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name)
3789 {
3790 hop = NULL;
3791 const token* t = expect_ident_or_atword (name);
3792 if (name == "@hist_linear" || name == "@hist_log")
3793 {
3794 hop = new hist_op;
3795 if (name == "@hist_linear")
3796 hop->htype = hist_linear;
3797 else if (name == "@hist_log")
3798 hop->htype = hist_log;
3799 hop->tok = t;
3800 expect_op("(");
3801 hop->stat = parse_expression ();
3802 int64_t tnum;
3803 if (hop->htype == hist_linear)
3804 {
3805 for (size_t i = 0; i < 3; ++i)
3806 {
3807 expect_op (",");
3808 expect_number (tnum);
3809 hop->params.push_back (tnum);
3810 }
3811 }
3812 expect_op(")");
3813 }
3814 return t;
3815 }
3816
3817
3818 indexable*
3819 parser::parse_indexable ()
3820 {
3821 hist_op *hop = NULL;
3822 interned_string name;
3823 const token *tok = parse_hist_op_or_bare_name(hop, name);
3824 if (hop)
3825 return hop;
3826 else
3827 {
3828 symbol* sym = new symbol;
3829 sym->name = name;
3830 sym->tok = tok;
3831 return sym;
3832 }
3833 }
3834
3835
3836 // var, indexable[index], func(parms), printf("...", ...),
3837 // @defined, @entry, @stat_op(stat)
3838 expression* parser::parse_symbol ()
3839 {
3840 hist_op *hop = NULL;
3841 symbol *sym = NULL;
3842 interned_string name;
3843 unsigned max_params = 0;
3844 const token *t = parse_hist_op_or_bare_name(hop, name);
3845
3846 if (!hop)
3847 {
3848 // If we didn't get a hist_op, then we did get an identifier. We can
3849 // now scrutinize this identifier for the various magic forms of identifier
3850 // (printf, @stat_op...)
3851
3852 // NB: PR11343: @defined() is not incompatible with earlier versions
3853 // of stap, so no need to check session.compatible for 1.2
3854 if (name == "@defined")
3855 return parse_defined_op (t);
3856
3857 if (name == "@const")
3858 return parse_const_op (t);
3859
3860 if (name == "@entry")
3861 return parse_entry_op (t);
3862
3863 if (name == "@perf")
3864 return parse_perf_op (t);
3865
3866 if (input.has_version("4.0"))
3867 {
3868 if (name == "@kregister" || name == "@uregister")
3869 return parse_target_register (t);
3870
3871 if (name == "@kderef" || name == "@uderef")
3872 return parse_target_deref (t);
3873 }
3874
3875 if (name.size() > 0 && name[0] == '@')
3876 {
3877 stat_op *sop = new stat_op;
3878 if (name == "@avg")
3879 sop->ctype = sc_average;
3880 else if (name == "@variance")
3881 sop->ctype = sc_variance, max_params = 1;
3882 else if (name == "@count")
3883 sop->ctype = sc_count;
3884 else if (name == "@sum")
3885 sop->ctype = sc_sum;
3886 else if (name == "@min")
3887 sop->ctype = sc_min;
3888 else if (name == "@max")
3889 sop->ctype = sc_max;
3890 else
3891 throw PARSE_ERROR(_F("unknown operator %s",
3892 name.to_string().c_str()));
3893 expect_op("(");
3894 sop->tok = t;
3895 sop->stat = parse_expression ();
3896
3897 while(1)
3898 {
3899 t = next ();
3900 if (t && t->type == tok_operator && t->content == ")")
3901 {
3902 swallow ();
3903 break;
3904 }
3905 else if (t && t->type == tok_operator && t->content == ",")
3906 {
3907 if (sop->params.size() >= max_params)
3908 throw PARSE_ERROR(_NF("not more than %d parameter allowed",
3909 "not more than %d parameters allowed",
3910 max_params+1, max_params+1), t);
3911
3912 swallow ();
3913 int64_t tnum;
3914 expect_number (tnum);
3915 sop->params.push_back (tnum);
3916 }
3917 }
3918 return sop;
3919 }
3920
3921 else if (print_format *fmt = print_format::create(t))
3922 {
3923 expect_op("(");
3924 if ((name == "print" || name == "println" ||
3925 name == "sprint" || name == "sprintln") &&
3926 (peek_op("@hist_linear") || peek_op("@hist_log")))
3927 {
3928 // We have a special case where we recognize
3929 // print(@hist_foo(bar)) as a magic print-the-histogram
3930 // construct. This is sort of gross but it avoids
3931 // promoting histogram references to typeful
3932 // expressions.
3933
3934 hop = NULL;
3935 t = parse_hist_op_or_bare_name(hop, name);
3936 assert(hop);
3937
3938 // It is, sadly, possible that even while parsing a
3939 // hist_op, we *mis-guessed* and the user wishes to
3940 // print(@hist_op(foo)[bucket]), a scalar. In that case
3941 // we must parse the arrayindex and print an expression.
3942 //
3943 // XXX: This still fails if the arrayindex is part of a
3944 // larger expression. To really handle everything, we'd
3945 // need to push back all the hist tokens start over.
3946
3947 if (!peek_op ("["))
3948 fmt->hist = hop;
3949 else
3950 {
3951 // This is simplified version of the
3952 // multi-array-index parser below, because we can
3953 // only ever have one index on a histogram anyways.
3954 expect_op("[");
3955 struct arrayindex* ai = new arrayindex;
3956 ai->tok = t;
3957 ai->base = hop;
3958 ai->indexes.push_back (parse_expression ());
3959 expect_op("]");
3960 fmt->args.push_back(ai);
3961
3962 // Consume any subsequent arguments.
3963 while (!peek_op(")"))
3964 {
3965 // ')' is not possible here but we want to output a nicer
3966 // parser error message.
3967 (void) expect_op_any ({",", ")"});
3968 expression *e = parse_expression ();
3969 fmt->args.push_back(e);
3970 }
3971 }
3972 }
3973 else
3974 {
3975 int min_args = 0;
3976 bool consumed_arg = false;
3977 if (fmt->print_with_format)
3978 {
3979 // Consume and convert a format string. Agreement between the
3980 // format string and the arguments is postponed to the
3981 // typechecking phase.
3982 literal_string* ls = parse_literal_string();
3983 fmt->raw_components = ls->value;
3984 delete ls;
3985 fmt->components = print_format::string_to_components (fmt->raw_components);
3986 consumed_arg = true;
3987 }
3988 else if (fmt->print_with_delim)
3989 {
3990 // Consume a delimiter to separate arguments.
3991 literal_string* ls = parse_literal_string();
3992 fmt->delimiter = ls->value;
3993 delete ls;
3994 consumed_arg = true;
3995 min_args = 2; // so that the delim is used at least once
3996 }
3997 else if (!fmt->print_with_newline)
3998 {
3999 // If we are not printing with a format string, nor with a
4000 // delim, nor with a newline, then it's either print() or
4001 // sprint(), both of which require at least one argument (of
4002 // any type).
4003 min_args = 1;
4004 }
4005
4006 // Consume any subsequent arguments.
4007 while (min_args || !peek_op (")"))
4008 {
4009 // ')' is not possible here but we want to output a nicer
4010 // parser error message.
4011 if (consumed_arg)
4012 (void) expect_op_any({",", ")"});
4013 expression *e = parse_expression ();
4014 fmt->args.push_back(e);
4015 consumed_arg = true;
4016 if (min_args)
4017 --min_args;
4018 }
4019 }
4020 expect_op(")");
4021 return fmt;
4022 }
4023
4024 else if (peek_op ("(")) // function call
4025 {
4026 swallow ();
4027 struct functioncall* f = new functioncall;
4028 f->tok = t;
4029 f->function = name;
4030 // Allow empty actual parameter list
4031 if (peek_op (")"))
4032 {
4033 swallow ();
4034 return f;
4035 }
4036 while (1)
4037 {
4038 f->args.push_back (parse_expression ());
4039 interned_string op = expect_op_any({")", ","});
4040 if (op == ")")
4041 break;
4042 else if (op == ",")
4043 continue;
4044 }
4045 return f;
4046 }
4047
4048 else
4049 {
4050 sym = new symbol;
4051 sym->name = name;
4052 sym->tok = t;
4053 }
4054 }
4055
4056 // By now, either we had a hist_op in the first place, or else
4057 // we had a plain word and it was converted to a symbol.
4058
4059 assert (!hop != !sym); // logical XOR
4060
4061 // All that remains is to check for array indexing
4062
4063 if (peek_op ("[")) // array
4064 {
4065 swallow ();
4066 struct arrayindex* ai = new arrayindex;
4067 ai->tok = t;
4068
4069 if (hop)
4070 ai->base = hop;
4071 else
4072 ai->base = sym;
4073
4074 while (1)
4075 {
4076 if (peek_op("*"))
4077 {
4078 swallow();
4079 ai->indexes.push_back (NULL);
4080 }
4081 else
4082 ai->indexes.push_back (parse_expression ());
4083 interned_string op = expect_op_any({"]", ","});
4084 if (op == "]")
4085 break;
4086 else if (op == ",")
4087 continue;
4088 }
4089
4090 return ai;
4091 }
4092
4093 // If we got to here, we *should* have a symbol; if we have
4094 // a hist_op on its own, it doesn't count as an expression,
4095 // so we throw a parse error.
4096
4097 if (hop)
4098 throw PARSE_ERROR(_("base histogram operator where expression expected"), t);
4099
4100 return sym;
4101 }
4102
4103 // Parse a $var.
4104 target_symbol* parser::parse_target_symbol ()
4105 {
4106 const token* t = next ();
4107 if (t->type == tok_identifier && t->content[0]=='$')
4108 {
4109 // target_symbol time
4110 target_symbol *tsym = new target_symbol;
4111 tsym->tok = t;
4112 tsym->name = t->content;
4113 return tsym;
4114 }
4115
4116 throw PARSE_ERROR (_("expected $var"));
4117 }
4118
4119
4120 // Parse a @cast.
4121 cast_op* parser::parse_cast_op ()
4122 {
4123 const token* t = next ();
4124 if (t->type == tok_operator && t->content == "@cast")
4125 {
4126 cast_op *cop = new cast_op;
4127 cop->tok = t;
4128 cop->name = t->content;
4129 expect_op("(");
4130 cop->operand = parse_expression ();
4131 expect_op(",");
4132 expect_unknown(tok_string, cop->type_name);
4133 if (cop->type_name.empty())
4134 throw PARSE_ERROR (_("expected non-empty string"));
4135 if (peek_op (","))
4136 {
4137 swallow ();
4138 expect_unknown(tok_string, cop->module);
4139 }
4140 expect_op(")");
4141 return cop;
4142 }
4143
4144 throw PARSE_ERROR (_("expected @cast"));
4145 }
4146
4147
4148 // Parse a @var.
4149 atvar_op* parser::parse_atvar_op ()
4150 {
4151 const token* t = next ();
4152 if (t->type == tok_operator && t->content == "@var")
4153 {
4154 atvar_op *aop = new atvar_op;
4155 aop->tok = t;
4156 aop->name = t->content;
4157 expect_op("(");
4158 expect_unknown(tok_string, aop->target_name);
4159 size_t found_at = aop->target_name.find("@");
4160 if (found_at != string::npos)
4161 aop->cu_name = aop->target_name.substr(found_at + 1);
4162 else
4163 aop->cu_name = "";
4164 if (peek_op (","))
4165 {
4166 swallow ();
4167 expect_unknown (tok_string, aop->module);
4168 }
4169 else
4170 aop->module = "";
4171 expect_op(")");
4172 return aop;
4173 }
4174
4175 throw PARSE_ERROR (_("expected @var"));
4176 }
4177
4178
4179 // Parse a @defined(). Given head token has already been consumed.
4180 expression* parser::parse_defined_op (const token* t)
4181 {
4182 defined_op* dop = new defined_op;
4183 dop->tok = t;
4184 expect_op("(");
4185 dop->operand = parse_expression ();
4186 expect_op(")");
4187 return dop;
4188 }
4189
4190
4191 // Parse a @const(). Given head token has already been consumed.
4192 expression* parser::parse_const_op (const token* t)
4193 {
4194 if (! privileged)
4195 throw PARSE_ERROR (_("using @const operator not permitted; need stap -g"),
4196 false /* don't skip tokens for parse resumption */);
4197
4198 interned_string cnst;
4199 embedded_expr *ee = new embedded_expr;
4200 ee->tok = t;
4201 expect_op("(");
4202 expect_unknown(tok_string, cnst);
4203 if(cnst.empty())
4204 throw PARSE_ERROR (_("expected non-empty string"));
4205 expect_op(")");
4206 ee->code = string("/* pure */ /* unprivileged */ /* stable */ ") + string(cnst);
4207 return ee;
4208 }
4209
4210
4211 // Parse a @entry(). Given head token has already been consumed.
4212 expression* parser::parse_entry_op (const token* t)
4213 {
4214 entry_op* eop = new entry_op;
4215 eop->tok = t;
4216 expect_op("(");
4217 eop->operand = parse_expression ();
4218 expect_op(")");
4219 return eop;
4220 }
4221
4222
4223 // Parse a @perf(). Given head token has already been consumed.
4224 expression* parser::parse_perf_op (const token* t)
4225 {
4226 perf_op* pop = new perf_op;
4227 pop->tok = t;
4228 expect_op("(");
4229 pop->operand = parse_literal_string ();
4230 if (pop->operand->value == "")
4231 throw PARSE_ERROR (_("expected non-empty string"));
4232 expect_op(")");
4233 return pop;
4234 }
4235
4236 // Parse a @kregister or @uregister. Given head token has already been consumed.
4237 expression* parser::parse_target_register (const token* t)
4238 {
4239 target_register *treg = new target_register;
4240 int64_t regno;
4241 treg->tok = t;
4242 treg->userspace_p = (t->content[1] == 'u');
4243 if (! treg->userspace_p && ! privileged)
4244 throw PARSE_ERROR (_("using @kregister operator not permitted; need stap -g"),
4245 false /* don't skip tokens for parse resumption */);
4246 expect_op("(");
4247 expect_number(regno);
4248 treg->regno = regno;
4249 expect_op(")");
4250 return treg;
4251 }
4252
4253 // Parse a @kderef or @uderef. Given head token has already been consumed.
4254 expression* parser::parse_target_deref (const token* t)
4255 {
4256 target_deref *tderef = new target_deref;
4257 int64_t size;
4258 tderef->tok = t;
4259 tderef->userspace_p = (t->content[1] == 'u');
4260 if (! tderef->userspace_p && ! privileged)
4261 throw PARSE_ERROR (_("using @kderef operator not permitted; need stap -g"),
4262 false /* don't skip tokens for parse resumption */);
4263 expect_op("(");
4264 expect_number(size);
4265 tderef->size = size;
4266 expect_op(",");
4267 tderef->addr = parse_expression();
4268 expect_op(")");
4269 return tderef;
4270 }
4271
4272 bool
4273 parser::peek_target_symbol_components ()
4274 {
4275 const token * t = peek ();
4276 return t &&
4277 ((t->type == tok_operator && (t->content == "->" || t->content == "["))
4278 || (t->type == tok_identifier &&
4279 t->content.find_first_not_of('$') == string::npos));
4280 }
4281
4282 void
4283 parser::parse_target_symbol_components (target_symbol* e)
4284 {
4285 bool pprint = false;
4286
4287 // check for pretty-print in the form $foo$
4288 string base = e->name;
4289 size_t pprint_pos = base.find_last_not_of('$');
4290 if (0 < pprint_pos && pprint_pos < base.length() - 1)
4291 {
4292 string pprint_val = base.substr(pprint_pos + 1);
4293 base.erase(pprint_pos + 1);
4294 e->name = base;
4295 e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
4296 pprint = true;
4297 }
4298
4299 while (!pprint)
4300 {
4301 if (peek_op ("->"))
4302 {
4303 const token* t = next();
4304 interned_string member;
4305 expect_ident_or_keyword (member);
4306
4307 // check for pretty-print in the form $foo->$ or $foo->bar$
4308 pprint_pos = member.find_last_not_of('$');
4309 interned_string pprint_val;
4310 if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
4311 {
4312 pprint_val = member.substr(pprint_pos + 1);
4313 member = member.substr(0, pprint_pos + 1);
4314 pprint = true;
4315 }
4316
4317 if (!member.empty())
4318 e->components.push_back (target_symbol::component(t, member));
4319 if (pprint)
4320 e->components.push_back (target_symbol::component(t, pprint_val, true));
4321 }
4322 else if (peek_op ("["))
4323 {
4324 const token* t = next();
4325 expression* index = parse_expression();
4326 literal_number* ln = dynamic_cast<literal_number*>(index);
4327 if (ln)
4328 e->components.push_back (target_symbol::component(t, ln->value));
4329 else
4330 e->components.push_back (target_symbol::component(t, index));
4331 expect_op ("]");
4332 }
4333 else
4334 break;
4335 }
4336
4337 if (!pprint)
4338 {
4339 // check for pretty-print in the form $foo $
4340 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
4341 const token* t = peek();
4342 if (t != NULL && t->type == tok_identifier &&
4343 t->content.find_first_not_of('$') == string::npos)
4344 {
4345 t = next();
4346 e->components.push_back (target_symbol::component(t, t->content, true));
4347 pprint = true;
4348 }
4349 }
4350
4351 if (pprint && (peek_op ("->") || peek_op("[")))
4352 throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
4353 }
4354
4355 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.242638 seconds and 5 git commands to generate.