]> sourceware.org Git - systemtap.git/blob - parse.cxx
PR18431: Function overloading
[systemtap.git] / parse.cxx
1 // recursive descent parser for systemtap scripts
2 // Copyright (C) 2005-2015 Red Hat Inc.
3 // Copyright (C) 2006 Intel Corporation.
4 // Copyright (C) 2007 Bull S.A.S
5 // Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
6 //
7 // This file is part of systemtap, and is free software. You can
8 // redistribute it and/or modify it under the terms of the GNU General
9 // Public License (GPL); either version 2, or (at your option) any
10 // later version.
11
12 #include "config.h"
13 #include "staptree.h"
14 #include "parse.h"
15 #include "session.h"
16 #include "util.h"
17 #include "stringtable.h"
18 #include "unordered.h"
19
20 #include <iostream>
21
22 #include <fstream>
23 #include <cctype>
24 #include <cstdlib>
25 #include <cassert>
26 #include <cerrno>
27 #include <climits>
28 #include <sstream>
29 #include <cstring>
30 #include <cctype>
31 #include <iterator>
32
33 extern "C" {
34 #include <fnmatch.h>
35 }
36
37 using namespace std;
38
39
40 class lexer
41 {
42 public:
43 bool ate_comment; // current token follows a comment
44 bool ate_whitespace; // the most recent token followed whitespace
45 bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
46 bool check_compatible; // whether to gate features on session.compatible
47
48 token* scan ();
49 lexer (istream&, const string&, systemtap_session&, bool);
50 void set_current_file (stapfile* f);
51 void set_current_token_chain (const token* tok);
52 inline bool has_version (const char* v) const;
53
54 unordered_set<interned_string> keywords;
55 static unordered_set<string> atwords;
56 private:
57 inline int input_get ();
58 inline int input_peek (unsigned n=0);
59 void input_put (const string&, const token*);
60 string input_name;
61 string input_contents; // NB: being a temporary, no need to interned_string optimize this object
62 const char *input_pointer; // index into input_contents; NB: recompute if input_contents changed!
63 const char *input_end;
64 unsigned cursor_suspend_count;
65 unsigned cursor_suspend_line;
66 unsigned cursor_suspend_column;
67 unsigned cursor_line;
68 unsigned cursor_column;
69 systemtap_session& session;
70 stapfile* current_file;
71 const token* current_token_chain;
72 };
73
74
75 class parser
76 {
77 public:
78 parser (systemtap_session& s, const string& n, istream& i, unsigned flags=0);
79 ~parser ();
80
81 stapfile* parse ();
82 probe* parse_synthetic_probe (const token* chain);
83 stapfile* parse_library_macros ();
84
85 private:
86 typedef enum {
87 PP_NONE,
88 PP_KEEP_THEN,
89 PP_SKIP_THEN,
90 PP_KEEP_ELSE,
91 PP_SKIP_ELSE,
92 } pp_state_t;
93
94 struct pp1_activation;
95
96 struct pp_macrodecl : public macrodecl {
97 pp1_activation* parent_act; // used for param bindings
98 virtual bool is_closure() { return parent_act != 0; }
99 pp_macrodecl () : macrodecl(), parent_act(0) { }
100 };
101
102 systemtap_session& session;
103 string input_name;
104 lexer input;
105 bool errs_as_warnings;
106 bool privileged;
107 bool user_file;
108 parse_context context;
109
110 // preprocessing subordinate, first pass (macros)
111 struct pp1_activation {
112 const token* tok;
113 unsigned cursor; // position within macro body
114 map<string, pp_macrodecl*> params;
115
116 macrodecl* curr_macro;
117
118 pp1_activation (const token* tok, macrodecl* curr_macro)
119 : tok(tok), cursor(0), curr_macro(curr_macro) { }
120 ~pp1_activation ();
121 };
122
123 map<string, macrodecl*> pp1_namespace;
124 vector<pp1_activation*> pp1_state;
125 const token* next_pp1 ();
126 const token* scan_pp1 (bool ignore_macros);
127 const token* slurp_pp1_param (vector<const token*>& param);
128 const token* slurp_pp1_body (vector<const token*>& body);
129
130 // preprocessing subordinate, final pass (conditionals)
131 vector<pair<const token*, pp_state_t> > pp_state;
132 const token* scan_pp ();
133 const token* skip_pp ();
134
135 // scanning state
136 const token* next ();
137 const token* peek ();
138
139 // Advance past and throw away current token after peek () or next ().
140 void swallow ();
141
142 const token* systemtap_v_seen;
143 const token* last_t; // the last value returned by peek() or next()
144 const token* next_t; // lookahead token
145
146 // expectations, these swallow the token
147 void expect_known (token_type tt, string const & expected);
148 void expect_unknown (token_type tt, interned_string & target);
149 void expect_unknown2 (token_type tt1, token_type tt2, interned_string & target);
150
151 // convenience forms, these also swallow the token
152 void expect_op (string const & expected);
153 void expect_kw (string const & expected);
154 void expect_number (int64_t & expected);
155 void expect_ident_or_keyword (interned_string & target);
156
157 // convenience forms, which return true or false, these don't swallow token
158 bool peek_op (string const & op);
159 bool peek_kw (string const & kw);
160
161 // convenience forms, which return the token
162 const token* expect_kw_token (string const & expected);
163 const token* expect_ident_or_atword (interned_string & target);
164
165 void print_error (const parse_error& pe, bool errs_as_warnings = false);
166 unsigned num_errors;
167
168 private: // nonterminals
169 void parse_probe (vector<probe*>&, vector<probe_alias*>&);
170 void parse_private (vector<vardecl*>&, vector<probe*>&,
171 string const&, vector<functiondecl*>&);
172 void parse_global (vector<vardecl*>&, vector<probe*>&,
173 string const&);
174 void do_parse_global (vector<vardecl*>&, vector<probe*>&,
175 string const&, const token*, bool);
176 void parse_functiondecl (vector<functiondecl*>&, string const&);
177 void do_parse_functiondecl (vector<functiondecl*>&, const token*,
178 string const&, bool);
179 embeddedcode* parse_embeddedcode ();
180 vector<probe_point*> parse_probe_points ();
181 vector<probe_point*> parse_components ();
182 vector<probe_point*> parse_component ();
183 literal_string* consume_string_literals (const token*);
184 literal_string* parse_literal_string ();
185 literal* parse_literal ();
186 block* parse_stmt_block ();
187 try_block* parse_try_block ();
188 statement* parse_statement ();
189 if_statement* parse_if_statement ();
190 for_loop* parse_for_loop ();
191 for_loop* parse_while_loop ();
192 foreach_loop* parse_foreach_loop ();
193 expr_statement* parse_expr_statement ();
194 return_statement* parse_return_statement ();
195 delete_statement* parse_delete_statement ();
196 next_statement* parse_next_statement ();
197 break_statement* parse_break_statement ();
198 continue_statement* parse_continue_statement ();
199 indexable* parse_indexable ();
200 const token *parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name);
201 target_symbol *parse_target_symbol ();
202 cast_op *parse_cast_op ();
203 atvar_op *parse_atvar_op ();
204 expression* parse_entry_op (const token* t);
205 expression* parse_defined_op (const token* t);
206 expression* parse_perf_op (const token* t);
207 expression* parse_expression ();
208 expression* parse_assignment ();
209 expression* parse_ternary ();
210 expression* parse_logical_or ();
211 expression* parse_logical_and ();
212 expression* parse_boolean_or ();
213 expression* parse_boolean_xor ();
214 expression* parse_boolean_and ();
215 expression* parse_array_in ();
216 expression* parse_comparison_or_regex_query ();
217 expression* parse_shift ();
218 expression* parse_concatenation ();
219 expression* parse_additive ();
220 expression* parse_multiplicative ();
221 expression* parse_unary ();
222 expression* parse_crement ();
223 expression* parse_dwarf_value ();
224 expression* parse_value ();
225 expression* parse_symbol ();
226
227 bool peek_target_symbol_components ();
228 void parse_target_symbol_components (target_symbol* e);
229 };
230
231
232 // ------------------------------------------------------------------------
233
234 stapfile*
235 parse (systemtap_session& s, const string& n, istream& i, unsigned flags)
236 {
237 parser p (s, n, i, flags);
238 return p.parse ();
239 }
240
241 stapfile*
242 parse (systemtap_session& s, const string& name, unsigned flags)
243 {
244 ifstream i(name.c_str(), ios::in);
245 if (i.fail())
246 {
247 cerr << (file_exists(name)
248 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
249 : _F("Input file '%s' is missing.", name.c_str()))
250 << endl;
251 return 0;
252 }
253
254 parser p (s, name, i, flags);
255 return p.parse ();
256 }
257
258 stapfile*
259 parse_library_macros (systemtap_session& s, const string& name)
260 {
261 ifstream i(name.c_str(), ios::in);
262 if (i.fail())
263 {
264 cerr << (file_exists(name)
265 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
266 : _F("Input file '%s' is missing.", name.c_str()))
267 << endl;
268 return 0;
269 }
270
271 parser p (s, name, i);
272 return p.parse_library_macros ();
273 }
274
275 probe*
276 parse_synthetic_probe (systemtap_session &s, istream& i, const token* tok)
277 {
278 parser p (s, tok ? tok->location.file->name : "<synthetic>", i);
279 return p.parse_synthetic_probe (tok);
280 }
281
282 // ------------------------------------------------------------------------
283
284 parser::parser (systemtap_session& s, const string &n, istream& i, unsigned flags):
285 session (s), input_name (n), input (i, input_name, s, !(flags & pf_no_compatible)),
286 errs_as_warnings(flags & pf_squash_errors), privileged (flags & pf_guru),
287 user_file (flags & pf_user_file), context(con_unknown), systemtap_v_seen(0),
288 last_t (0), next_t (0), num_errors (0)
289 {
290 }
291
292 parser::~parser()
293 {
294 }
295
296 static string
297 tt2str(token_type tt)
298 {
299 switch (tt)
300 {
301 case tok_junk: return "junk";
302 case tok_identifier: return "identifier";
303 case tok_operator: return "operator";
304 case tok_string: return "string";
305 case tok_number: return "number";
306 case tok_embedded: return "embedded-code";
307 case tok_keyword: return "keyword";
308 }
309 return "unknown token";
310 }
311
312 ostream&
313 operator << (ostream& o, const source_loc& loc)
314 {
315 o << loc.file->name << ":"
316 << loc.line << ":"
317 << loc.column;
318
319 return o;
320 }
321
322 ostream&
323 operator << (ostream& o, const token& t)
324 {
325 o << tt2str(t.type);
326
327 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
328 {
329 o << " '";
330 for (unsigned i=0; i<t.content.length(); i++)
331 {
332 char c = t.content[i];
333 o << (isprint (c) ? c : '?');
334 }
335 o << "'";
336 }
337
338 o << " at "
339 << t.location;
340
341 return o;
342 }
343
344
345 void
346 parser::print_error (const parse_error &pe, bool errs_as_warnings)
347 {
348 const token *tok = pe.tok ? pe.tok : last_t;
349 session.print_error(pe, tok, input_name, errs_as_warnings);
350 num_errors ++;
351 }
352
353
354
355
356 template <typename OPERAND>
357 bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
358 {
359 if (op->type == tok_operator && op->content == "<=")
360 { return lhs <= rhs; }
361 else if (op->type == tok_operator && op->content == ">=")
362 { return lhs >= rhs; }
363 else if (op->type == tok_operator && op->content == "<")
364 { return lhs < rhs; }
365 else if (op->type == tok_operator && op->content == ">")
366 { return lhs > rhs; }
367 else if (op->type == tok_operator && op->content == "==")
368 { return lhs == rhs; }
369 else if (op->type == tok_operator && op->content == "!=")
370 { return lhs != rhs; }
371 else
372 throw PARSE_ERROR (_("expected comparison operator"), op);
373 }
374
375
376 // Here, we perform on-the-fly preprocessing in two passes.
377
378 // First pass - macro declaration and expansion.
379 //
380 // The basic form of a declaration is @define SIGNATURE %( BODY %)
381 // where SIGNATURE is of the form macro_name (a, b, c, ...)
382 // and BODY can obtain the parameter contents as @a, @b, @c, ....
383 // Note that parameterless macros can also be declared.
384 //
385 // Macro definitions may not be nested.
386 // A macro is available textually after it has been defined.
387 //
388 // The basic form of a macro invocation
389 // for a parameterless macro is @macro_name,
390 // for a macro with parameters is @macro_name(param_1, param_2, ...).
391 //
392 // NB: this means that a parameterless macro @foo called as @foo(a, b, c)
393 // leaves its 'parameters' alone, rather than consuming them to result
394 // in a "too many parameters error". This may be useful in the unusual
395 // case of wanting @foo to expand to the name of a function.
396 //
397 // Invocations of unknown macros are left unexpanded, to allow
398 // the continued use of constructs such as @cast, @var, etc.
399
400 macrodecl::~macrodecl ()
401 {
402 delete tok;
403 for (vector<const token*>::iterator it = body.begin();
404 it != body.end(); it++)
405 delete *it;
406 }
407
408 parser::pp1_activation::~pp1_activation ()
409 {
410 delete tok;
411 if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
412 for (map<string, pp_macrodecl*>::iterator it = params.begin();
413 it != params.end(); it++)
414 delete it->second;
415 }
416
417 // Grab a token from the current input source (main file or macro body):
418 const token*
419 parser::next_pp1 ()
420 {
421 if (pp1_state.empty())
422 return input.scan ();
423
424 // otherwise, we're inside a macro
425 pp1_activation* act = pp1_state.back();
426 unsigned& cursor = act->cursor;
427 if (cursor < act->curr_macro->body.size())
428 {
429 token* t = new token(*act->curr_macro->body[cursor]);
430 t->chain = new token(*act->tok); // mark chained token
431 cursor++;
432 return t;
433 }
434 else
435 return 0; // reached end of macro body
436 }
437
438 const token*
439 parser::scan_pp1 (bool ignore_macros = false)
440 {
441 while (true)
442 {
443 const token* t = next_pp1 ();
444 if (t == 0) // EOF or end of macro body
445 {
446 if (pp1_state.empty()) // actual EOF
447 return 0;
448
449 // Exit macro and loop around to look for the next token.
450 pp1_activation* act = pp1_state.back();
451 pp1_state.pop_back(); delete act;
452 continue;
453 }
454
455 // macro definition
456 // PR18462 don't catalog preprocessor-disabled macros
457 if (t->type == tok_operator && t->content == "@define" && !ignore_macros)
458 {
459 if (!pp1_state.empty())
460 throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t);
461 delete t;
462
463 // handle macro definition
464 // (1) consume macro signature
465 t = input.scan();
466 if (! (t && t->type == tok_identifier))
467 throw PARSE_ERROR (_("expected identifier"), t);
468 string name = t->content;
469
470 // check for redefinition of existing macro
471 if (pp1_namespace.find(name) != pp1_namespace.end())
472 {
473 parse_error er (ERR_SRC, _F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
474
475 // Also point to pp1_namespace[name]->tok, the site of
476 // the original definition:
477 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here",
478 name.c_str()), pp1_namespace[name]->tok);
479 throw er;
480 }
481
482 // XXX: the above restriction was mostly necessary due to
483 // wanting to leave open the possibility of
484 // statically-scoped semantics in the future.
485
486 // XXX: this cascades into further parse errors as the
487 // parser tries to parse the remaining definition... (e.g.
488 // it can't tell that the macro body isn't a conditional,
489 // that the uses of parameters aren't nonexistent
490 // macros.....)
491 if (name == "define")
492 throw PARSE_ERROR (_("attempt to redefine '@define'"), t);
493 if (input.atwords.count(name))
494 session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
495
496 macrodecl* decl = (pp1_namespace[name] = new macrodecl);
497 decl->tok = t;
498
499 // determine if the macro takes parameters
500 bool saw_params = false;
501 t = input.scan();
502 if (t && t->type == tok_operator && t->content == "(")
503 {
504 saw_params = true;
505 do
506 {
507 delete t;
508
509 t = input.scan ();
510 if (! (t && t->type == tok_identifier))
511 throw PARSE_ERROR(_("expected identifier"), t);
512 decl->formal_args.push_back(t->content);
513 delete t;
514
515 t = input.scan ();
516 if (t && t->type == tok_operator && t->content == ",")
517 {
518 continue;
519 }
520 else if (t && t->type == tok_operator && t->content == ")")
521 {
522 delete t;
523 t = input.scan();
524 break;
525 }
526 else
527 {
528 throw PARSE_ERROR (_("expected ',' or ')'"), t);
529 }
530 }
531 while (true);
532 }
533
534 // (2) identify & consume macro body
535 if (! (t && t->type == tok_operator && t->content == "%("))
536 {
537 if (saw_params)
538 throw PARSE_ERROR (_("expected '%('"), t);
539 else
540 throw PARSE_ERROR (_("expected '%(' or '('"), t);
541 }
542 delete t;
543
544 t = slurp_pp1_body (decl->body);
545 if (!t)
546 throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl->tok);
547 delete t;
548
549 // Now loop around to look for a real token.
550 continue;
551 }
552
553 // (potential) macro invocation
554 if (t->type == tok_operator && t->content[0] == '@')
555 {
556 const string& name = t->content.substr(1); // strip initial '@'
557
558 // check if name refers to a real parameter or macro
559 macrodecl* decl;
560 pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
561 if (act && act->params.find(name) != act->params.end())
562 decl = act->params[name];
563 else if (!(act && act->curr_macro->context == ctx_library)
564 && pp1_namespace.find(name) != pp1_namespace.end())
565 decl = pp1_namespace[name];
566 else if (session.library_macros.find(name)
567 != session.library_macros.end())
568 decl = session.library_macros[name];
569 else // this is an ordinary @operator
570 return t;
571
572 // handle macro invocation, taking ownership of t
573 pp1_activation *new_act = new pp1_activation(t, decl);
574 unsigned num_params = decl->formal_args.size();
575
576 // (1a) restore parameter invocation closure
577 if (num_params == 0 && decl->is_closure())
578 {
579 // NB: decl->parent_act is always safe since the
580 // parameter decl (if any) comes from an activation
581 // record which deeper in the stack than new_act.
582
583 // decl is a macro parameter which must be evaluated in
584 // the context of the original point of invocation:
585 new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
586 goto expand;
587 }
588
589 // (1b) consume macro parameters (if any)
590 if (num_params == 0)
591 goto expand;
592
593 // for simplicity, we do not allow macro constructs here
594 // -- if we did, we'd have to recursively call scan_pp1()
595 t = next_pp1 ();
596 if (! (t && t->type == tok_operator && t->content == "("))
597 {
598 delete new_act;
599 throw PARSE_ERROR (_NF
600 ("expected '(' in invocation of macro '@%s'"
601 " taking %d parameter",
602 "expected '(' in invocation of macro '@%s'"
603 " taking %d parameters",
604 num_params, name.c_str(), num_params), t);
605 }
606
607 // XXX perhaps parse/count the full number of params,
608 // so we can say "expected x, found y params" on error?
609 for (unsigned i = 0; i < num_params; i++)
610 {
611 delete t;
612
613 // create parameter closure
614 string param_name = decl->formal_args[i];
615 pp_macrodecl* p = (new_act->params[param_name]
616 = new pp_macrodecl);
617 p->tok = new token(*new_act->tok);
618 p->parent_act = act;
619 // NB: *new_act->tok points to invocation, act is NULL at top level
620
621 t = slurp_pp1_param (p->body);
622
623 // check correct usage of ',' or ')'
624 if (t == 0) // hit unexpected EOF or end of macro
625 {
626 // XXX could we pop the stack and continue parsing
627 // the invocation, allowing macros to construct new
628 // invocations in piecemeal fashion??
629 const token* orig_t = new token(*new_act->tok);
630 delete new_act;
631 throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t);
632 }
633 if (t->type == tok_operator && t->content == ",")
634 {
635 if (i + 1 == num_params)
636 {
637 delete new_act;
638 throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
639 }
640 }
641 else if (t->type == tok_operator && t->content == ")")
642 {
643 if (i + 1 != num_params)
644 {
645 delete new_act;
646 throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
647 }
648 }
649 else
650 {
651 // XXX this is, incidentally, impossible
652 delete new_act;
653 throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t);
654 }
655 }
656
657 delete t;
658
659 // (2) set up macro expansion
660 expand:
661 pp1_state.push_back (new_act);
662
663 // Now loop around to look for a real token.
664 continue;
665 }
666
667 // Otherwise, we have an ordinary token.
668 return t;
669 }
670 }
671
672 // Consume a single macro invocation's parameters, heeding nesting
673 // brackets and stopping on an unbalanced ')' or an unbracketed ','
674 // (and returning the final separator token).
675 const token*
676 parser::slurp_pp1_param (vector<const token*>& param)
677 {
678 const token* t = 0;
679 unsigned nesting = 0;
680 do
681 {
682 t = next_pp1 ();
683
684 if (!t)
685 break;
686 // [ needed in case macro paramater is used as prefix for array-deref operation
687 if (t->type == tok_operator && (t->content == "(" || t->content == "["))
688 ++nesting;
689 else if (nesting && t->type == tok_operator && (t->content == ")" || t->content == "]"))
690 --nesting;
691 else if (!nesting && t->type == tok_operator
692 && (t->content == ")" || t->content == ","))
693 break;
694 param.push_back(t);
695 }
696 while (true);
697 return t; // report ")" or "," or NULL
698 }
699
700
701 // Consume a macro declaration's body, heeding nested %( %) brackets.
702 const token*
703 parser::slurp_pp1_body (vector<const token*>& body)
704 {
705 const token* t = 0;
706 unsigned nesting = 0;
707 do
708 {
709 t = next_pp1 ();
710
711 if (!t)
712 break;
713 if (t->type == tok_operator && t->content == "%(")
714 ++nesting;
715 else if (nesting && t->type == tok_operator && t->content == "%)")
716 --nesting;
717 else if (!nesting && t->type == tok_operator && t->content == "%)")
718 break;
719 body.push_back(t);
720 }
721 while (true);
722 return t; // report final "%)" or NULL
723 }
724
725 // Used for parsing .stpm files.
726 stapfile*
727 parser::parse_library_macros ()
728 {
729 stapfile* f = new stapfile;
730 input.set_current_file (f);
731
732 try
733 {
734 const token* t = scan_pp ();
735
736 // Currently we only take objection to macro invocations if they
737 // produce a non-whitespace token after being expanded.
738
739 // XXX should we prevent macro invocations even if they expand to empty??
740
741 if (t != 0)
742 throw PARSE_ERROR (_F("unexpected token in library macro file '%s'", input_name.c_str()), t);
743
744 // We need to first check whether *any* of the macros are duplicates,
745 // then commit to including the entire file in the global namespace
746 // (or not). Yuck.
747 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
748 it != pp1_namespace.end(); it++)
749 {
750 string name = it->first;
751
752 if (session.library_macros.find(name) != session.library_macros.end())
753 {
754 parse_error er(ERR_SRC, _F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
755 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
756 print_error (er);
757
758 delete er.chain;
759 delete f;
760 return 0;
761 }
762 }
763
764 }
765 catch (const parse_error& pe)
766 {
767 print_error (pe, errs_as_warnings);
768 delete f;
769 return 0;
770 }
771
772 // If no errors, include the entire file. Note how this is outside
773 // of the try-catch block -- no errors possible.
774 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
775 it != pp1_namespace.end(); it++)
776 {
777 string name = it->first;
778
779 session.library_macros[name] = it->second;
780 session.library_macros[name]->context = ctx_library;
781 }
782
783 return f;
784 }
785
786 // Second pass - preprocessor conditional expansion.
787 //
788 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
789 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
790 // or: arch COMPARISON-OP "arch-string"
791 // or: systemtap_v COMPARISON-OP "version-string"
792 // or: systemtap_privilege COMPARISON-OP "privilege-string"
793 // or: CONFIG_foo COMPARISON-OP "config-string"
794 // or: CONFIG_foo COMPARISON-OP number
795 // or: CONFIG_foo COMPARISON-OP CONFIG_bar
796 // or: "string1" COMPARISON-OP "string2"
797 // or: number1 COMPARISON-OP number2
798 // The %: ELSE-TOKENS part is optional.
799 //
800 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
801 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
802 // e.g. %( CONFIG_foo %? "foo" %: "baz" %)
803 //
804 // Up to an entire %( ... %) expression is processed by a single call
805 // to this function. Tokens included by any nested conditions are
806 // enqueued in a private vector.
807
808 bool eval_pp_conditional (systemtap_session& s,
809 const token* l, const token* op, const token* r)
810 {
811 if (l->type == tok_identifier && (l->content == "kernel_v" ||
812 l->content == "kernel_vr" ||
813 l->content == "systemtap_v"))
814 {
815 if (! (r->type == tok_string))
816 throw PARSE_ERROR (_("expected string literal"), r);
817
818 string target_kernel_vr = s.kernel_release;
819 string target_kernel_v = s.kernel_base_release;
820 string target;
821
822 if (l->content == "kernel_v") target = target_kernel_v;
823 else if (l->content == "kernel_vr") target = target_kernel_vr;
824 else if (l->content == "systemtap_v") target = s.compatible;
825 else assert (0);
826
827 string query = r->content;
828 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
829
830 // collect acceptable strverscmp results.
831 int rvc_ok1, rvc_ok2;
832 bool wc_ok = false;
833 if (op->type == tok_operator && op->content == "<=")
834 { rvc_ok1 = -1; rvc_ok2 = 0; }
835 else if (op->type == tok_operator && op->content == ">=")
836 { rvc_ok1 = 1; rvc_ok2 = 0; }
837 else if (op->type == tok_operator && op->content == "<")
838 { rvc_ok1 = -1; rvc_ok2 = -1; }
839 else if (op->type == tok_operator && op->content == ">")
840 { rvc_ok1 = 1; rvc_ok2 = 1; }
841 else if (op->type == tok_operator && op->content == "==")
842 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
843 else if (op->type == tok_operator && op->content == "!=")
844 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
845 else
846 throw PARSE_ERROR (_("expected comparison operator"), op);
847
848 if ((!wc_ok) && rhs_wildcard)
849 throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op);
850
851 if (rhs_wildcard)
852 {
853 int rvc_result = fnmatch (query.c_str(), target.c_str(),
854 FNM_NOESCAPE); // spooky
855 bool badness = (rvc_result == 0) ^ (op->content == "==");
856 return !badness;
857 }
858 else
859 {
860 int rvc_result = strverscmp (target.c_str(), query.c_str());
861 // normalize rvc_result
862 if (rvc_result < 0) rvc_result = -1;
863 if (rvc_result > 0) rvc_result = 1;
864 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
865 }
866 }
867 else if (l->type == tok_identifier && l->content == "systemtap_privilege")
868 {
869 string target_privilege =
870 pr_contains(s.privilege, pr_stapdev) ? "stapdev"
871 : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
872 : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
873 : "none"; /* should be impossible -- s.privilege always one of above */
874 assert(target_privilege != "none");
875
876 if (! (r->type == tok_string))
877 throw PARSE_ERROR (_("expected string literal"), r);
878 string query_privilege = r->content;
879
880 bool nomatch = (target_privilege != query_privilege);
881
882 bool result;
883 if (op->type == tok_operator && op->content == "==")
884 result = !nomatch;
885 else if (op->type == tok_operator && op->content == "!=")
886 result = nomatch;
887 else
888 throw PARSE_ERROR (_("expected '==' or '!='"), op);
889 /* XXX perhaps allow <= >= and similar comparisons */
890
891 return result;
892 }
893 else if (l->type == tok_identifier && l->content == "guru_mode")
894 {
895 if (! (r->type == tok_number))
896 throw PARSE_ERROR (_("expected number"), r);
897 int64_t lhs = (int64_t) s.guru_mode;
898 int64_t rhs = lex_cast<int64_t>(r->content);
899 if (!((rhs == 0)||(rhs == 1)))
900 throw PARSE_ERROR (_("expected 0 or 1"), op);
901 if (!((op->type == tok_operator && op->content == "==") ||
902 (op->type == tok_operator && op->content == "!=")))
903 throw PARSE_ERROR (_("expected '==' or '!='"), op);
904
905 return eval_comparison (lhs, op, rhs);
906 }
907 else if (l->type == tok_identifier && l->content == "arch")
908 {
909 string target_architecture = s.architecture;
910 if (! (r->type == tok_string))
911 throw PARSE_ERROR (_("expected string literal"), r);
912 string query_architecture = r->content;
913
914 int nomatch = fnmatch (query_architecture.c_str(),
915 target_architecture.c_str(),
916 FNM_NOESCAPE); // still spooky
917
918 bool result;
919 if (op->type == tok_operator && op->content == "==")
920 result = !nomatch;
921 else if (op->type == tok_operator && op->content == "!=")
922 result = nomatch;
923 else
924 throw PARSE_ERROR (_("expected '==' or '!='"), op);
925
926 return result;
927 }
928 else if (l->type == tok_identifier && l->content == "runtime")
929 {
930 if (! (r->type == tok_string))
931 throw PARSE_ERROR (_("expected string literal"), r);
932
933 string query_runtime = r->content;
934 string target_runtime;
935
936 target_runtime = (s.runtime_mode == systemtap_session::dyninst_runtime
937 ? "dyninst" : "kernel");
938 int nomatch = fnmatch (query_runtime.c_str(),
939 target_runtime.c_str(),
940 FNM_NOESCAPE); // still spooky
941
942 bool result;
943 if (op->type == tok_operator && op->content == "==")
944 result = !nomatch;
945 else if (op->type == tok_operator && op->content == "!=")
946 result = nomatch;
947 else
948 throw PARSE_ERROR (_("expected '==' or '!='"), op);
949
950 return result;
951 }
952 else if (l->type == tok_identifier && l->content.starts_with("CONFIG_"))
953 {
954 if (r->type == tok_string)
955 {
956 string lhs = s.kernel_config[l->content]; // may be empty
957 string rhs = r->content;
958
959 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
960
961 bool result;
962 if (op->type == tok_operator && op->content == "==")
963 result = !nomatch;
964 else if (op->type == tok_operator && op->content == "!=")
965 result = nomatch;
966 else
967 throw PARSE_ERROR (_("expected '==' or '!='"), op);
968
969 return result;
970 }
971 else if (r->type == tok_number)
972 {
973 const string& lhs_string = s.kernel_config[l->content];
974 const char* startp = lhs_string.c_str ();
975 char* endp = (char*) startp;
976 errno = 0;
977 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
978 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
979 throw PARSE_ERROR ("Config option value not a number", l);
980
981 int64_t rhs = lex_cast<int64_t>(r->content);
982 return eval_comparison (lhs, op, rhs);
983 }
984 else if (r->type == tok_identifier
985 && r->content.starts_with( "CONFIG_"))
986 {
987 // First try to convert both to numbers,
988 // otherwise threat both as strings.
989 const string& lhs_string = s.kernel_config[l->content];
990 const string& rhs_string = s.kernel_config[r->content];
991 const char* startp = lhs_string.c_str ();
992 char* endp = (char*) startp;
993 errno = 0;
994 int64_t val = (int64_t) strtoll (startp, & endp, 0);
995 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
996 {
997 int64_t lhs = val;
998 startp = rhs_string.c_str ();
999 endp = (char*) startp;
1000 errno = 0;
1001 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
1002 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
1003 return eval_comparison (lhs, op, rhs);
1004 }
1005
1006 return eval_comparison (lhs_string, op, rhs_string);
1007 }
1008 else
1009 throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r);
1010 }
1011 else if (l->type == tok_string && r->type == tok_string)
1012 {
1013 string lhs = l->content;
1014 string rhs = r->content;
1015 return eval_comparison (lhs, op, rhs);
1016 // NB: no wildcarding option here
1017 }
1018 else if (l->type == tok_number && r->type == tok_number)
1019 {
1020 int64_t lhs = lex_cast<int64_t>(l->content);
1021 int64_t rhs = lex_cast<int64_t>(r->content);
1022 return eval_comparison (lhs, op, rhs);
1023 // NB: no wildcarding option here
1024 }
1025 else if (l->type == tok_string && r->type == tok_number
1026 && op->type == tok_operator)
1027 throw PARSE_ERROR (_("expected string literal as right value"), r);
1028 else if (l->type == tok_number && r->type == tok_string
1029 && op->type == tok_operator)
1030 throw PARSE_ERROR (_("expected number literal as right value"), r);
1031
1032 else
1033 throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
1034 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1035 " comparison between strings or integers"), l);
1036 }
1037
1038
1039 // Only tokens corresponding to the TRUE statement must be expanded
1040 const token*
1041 parser::scan_pp ()
1042 {
1043 while (true)
1044 {
1045 pp_state_t pp = PP_NONE;
1046 if (!pp_state.empty())
1047 pp = pp_state.back().second;
1048
1049 const token* t = 0;
1050 if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1051 t = skip_pp ();
1052 else
1053 t = scan_pp1 ();
1054
1055 if (t == 0) // EOF
1056 {
1057 if (pp != PP_NONE)
1058 {
1059 t = pp_state.back().first;
1060 pp_state.pop_back(); // so skip_some doesn't keep trying to close this
1061 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
1062 throw PARSE_ERROR (_("incomplete conditional at end of file"), t);
1063 }
1064 return t;
1065 }
1066
1067 // misplaced preprocessor "then"
1068 if (t->type == tok_operator && t->content == "%?")
1069 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1070
1071 // preprocessor "else"
1072 if (t->type == tok_operator && t->content == "%:")
1073 {
1074 if (pp == PP_NONE)
1075 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1076 if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
1077 throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t);
1078 // XXX: here and elsewhere, error cascades might be avoided
1079 // by dropping tokens until we reach the closing %)
1080
1081 pp_state.back().second = (pp == PP_KEEP_THEN) ?
1082 PP_SKIP_ELSE : PP_KEEP_ELSE;
1083 delete t;
1084 continue;
1085 }
1086
1087 // preprocessor close
1088 if (t->type == tok_operator && t->content == "%)")
1089 {
1090 if (pp == PP_NONE)
1091 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1092 delete pp_state.back().first;
1093 delete t; //this is the closing bracket
1094 pp_state.pop_back();
1095 continue;
1096 }
1097
1098 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1099 return t;
1100
1101 // We have a %( - it's time to throw a preprocessing party!
1102
1103 bool result = false;
1104 bool and_result = true;
1105 const token *n = NULL;
1106 do {
1107 const token *l, *op, *r;
1108 l = scan_pp1 ();
1109 op = scan_pp1 ();
1110 r = scan_pp1 ();
1111 if (l == 0 || op == 0 || r == 0)
1112 throw PARSE_ERROR (_("incomplete condition after '%('"), t);
1113 // NB: consider generalizing to consume all tokens until %?, and
1114 // passing that as a vector to an evaluator.
1115
1116 // Do not evaluate the condition if we haven't expanded everything.
1117 // This may occur when having several recursive conditionals.
1118 and_result &= eval_pp_conditional (session, l, op, r);
1119 if(l->content=="systemtap_v")
1120 systemtap_v_seen=r;
1121
1122 else
1123 delete r;
1124
1125 delete l;
1126 delete op;
1127 delete n;
1128
1129 n = scan_pp1 ();
1130 if (n && n->type == tok_operator && n->content == "&&")
1131 continue;
1132 result |= and_result;
1133 and_result = true;
1134 if (! (n && n->type == tok_operator && n->content == "||"))
1135 break;
1136 } while (true);
1137
1138 /*
1139 clog << "PP eval (" << *t << ") == " << result << endl;
1140 */
1141
1142 const token *m = n;
1143 if (! (m && m->type == tok_operator && m->content == "%?"))
1144 throw PARSE_ERROR (_("expected '%?' marker for conditional"), t);
1145 delete m; // "%?"
1146
1147 pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1148 pp_state.push_back (make_pair (t, pp));
1149
1150 // Now loop around to look for a real token.
1151 }
1152 }
1153
1154
1155 // Skip over tokens and any errors, heeding
1156 // only nested preprocessor starts and ends.
1157 const token*
1158 parser::skip_pp ()
1159 {
1160 const token* t = 0;
1161 unsigned nesting = 0;
1162 do
1163 {
1164 try
1165 {
1166 t = scan_pp1 (true);
1167 }
1168 catch (const parse_error &e)
1169 {
1170 continue;
1171 }
1172 if (!t)
1173 break;
1174 if (t->type == tok_operator && t->content == "%(")
1175 ++nesting;
1176 else if (nesting && t->type == tok_operator && t->content == "%)")
1177 --nesting;
1178 else if (!nesting && t->type == tok_operator &&
1179 (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1180 break;
1181 delete t;
1182 }
1183 while (true);
1184 return t;
1185 }
1186
1187
1188 const token*
1189 parser::next ()
1190 {
1191 if (! next_t)
1192 next_t = scan_pp ();
1193 if (! next_t)
1194 throw PARSE_ERROR (_("unexpected end-of-file"));
1195
1196 last_t = next_t;
1197 // advance by zeroing next_t
1198 next_t = 0;
1199 return last_t;
1200 }
1201
1202
1203 const token*
1204 parser::peek ()
1205 {
1206 if (! next_t)
1207 next_t = scan_pp ();
1208
1209 // don't advance by zeroing next_t
1210 last_t = next_t;
1211 return next_t;
1212 }
1213
1214
1215 void
1216 parser::swallow ()
1217 {
1218 // can only swallow something last peeked or nexted token.
1219 assert (last_t != 0);
1220 delete last_t;
1221 // advance by zeroing next_t
1222 last_t = next_t = 0;
1223 }
1224
1225
1226 static inline bool
1227 tok_is(token const * t, token_type tt, string const & expected)
1228 {
1229 return t && t->type == tt && t->content == expected;
1230 }
1231
1232
1233 void
1234 parser::expect_known (token_type tt, string const & expected)
1235 {
1236 const token *t = next();
1237 if (! (t && t->type == tt && t->content == expected))
1238 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1239 swallow (); // We are done with it, content was copied.
1240 }
1241
1242
1243 void
1244 parser::expect_unknown (token_type tt, interned_string & target)
1245 {
1246 const token *t = next();
1247 if (!(t && t->type == tt))
1248 throw PARSE_ERROR (_("expected ") + tt2str(tt));
1249 target = t->content;
1250 swallow (); // We are done with it, content was copied.
1251 }
1252
1253
1254 void
1255 parser::expect_unknown2 (token_type tt1, token_type tt2, interned_string & target)
1256 {
1257 const token *t = next();
1258 if (!(t && (t->type == tt1 || t->type == tt2)))
1259 throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
1260 target = t->content;
1261 swallow (); // We are done with it, content was copied.
1262 }
1263
1264
1265 void
1266 parser::expect_op (string const & expected)
1267 {
1268 expect_known (tok_operator, expected);
1269 }
1270
1271
1272 void
1273 parser::expect_kw (string const & expected)
1274 {
1275 expect_known (tok_keyword, expected);
1276 }
1277
1278 const token*
1279 parser::expect_kw_token (string const & expected)
1280 {
1281 const token *t = next();
1282 if (! (t && t->type == tok_keyword && t->content == expected))
1283 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1284 return t;
1285 }
1286
1287 void
1288 parser::expect_number (int64_t & value)
1289 {
1290 bool neg = false;
1291 const token *t = next();
1292 if (t->type == tok_operator && t->content == "-")
1293 {
1294 neg = true;
1295 swallow ();
1296 t = next ();
1297 }
1298 if (!(t && t->type == tok_number))
1299 throw PARSE_ERROR (_("expected number"));
1300
1301 const string& s = t->content;
1302 const char* startp = s.c_str ();
1303 char* endp = (char*) startp;
1304
1305 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1306 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1307 // since the lexer only gives us positive digit strings, but we'll
1308 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1309 errno = 0;
1310 value = (int64_t) strtoull (startp, & endp, 0);
1311 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1312 || (neg && (unsigned long long) value > 9223372036854775808ULL)
1313 || (unsigned long long) value > 18446744073709551615ULL
1314 || value < -9223372036854775807LL-1)
1315 throw PARSE_ERROR (_("number invalid or out of range"));
1316
1317 if (neg)
1318 value = -value;
1319
1320 swallow (); // We are done with it, content was parsed and copied into value.
1321 }
1322
1323
1324 const token*
1325 parser::expect_ident_or_atword (interned_string & target)
1326 {
1327 const token *t = next();
1328
1329 // accept identifiers and operators beginning in '@':
1330 if (!t || (t->type != tok_identifier
1331 && (t->type != tok_operator || t->content[0] != '@')))
1332 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1333 // so the message is accurate, but keep an eye out in the future:
1334 throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
1335
1336 target = t->content;
1337 return t;
1338 }
1339
1340
1341 void
1342 parser::expect_ident_or_keyword (interned_string & target)
1343 {
1344 expect_unknown2 (tok_identifier, tok_keyword, target);
1345 }
1346
1347
1348 bool
1349 parser::peek_op (string const & op)
1350 {
1351 return tok_is (peek(), tok_operator, op);
1352 }
1353
1354
1355 bool
1356 parser::peek_kw (string const & kw)
1357 {
1358 return tok_is (peek(), tok_identifier, kw);
1359 }
1360
1361
1362
1363 lexer::lexer (istream& input, const string& in, systemtap_session& s, bool cc):
1364 ate_comment(false), ate_whitespace(false), saw_tokens(false), check_compatible(cc),
1365 input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1366 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1367 cursor_column (1), session(s), current_file (0), current_token_chain (0)
1368 {
1369 getline(input, input_contents, '\0');
1370
1371 input_pointer = input_contents.data();
1372 input_end = input_contents.data() + input_contents.size();
1373
1374 if (keywords.empty())
1375 {
1376 // NB: adding new keywords is highly disruptive to the language,
1377 // in particular to existing scripts that could be suddenly
1378 // broken. If done at all, it has to be s.compatible-sensitive,
1379 // and broadly advertised.
1380 keywords.insert("probe");
1381 keywords.insert("global");
1382 if (has_version("3.0"))
1383 keywords.insert("private");
1384 keywords.insert("function");
1385 keywords.insert("if");
1386 keywords.insert("else");
1387 keywords.insert("for");
1388 keywords.insert("foreach");
1389 keywords.insert("in");
1390 keywords.insert("limit");
1391 keywords.insert("return");
1392 keywords.insert("delete");
1393 keywords.insert("while");
1394 keywords.insert("break");
1395 keywords.insert("continue");
1396 keywords.insert("next");
1397 keywords.insert("string");
1398 keywords.insert("long");
1399 keywords.insert("try");
1400 keywords.insert("catch");
1401 }
1402
1403 if (atwords.empty())
1404 {
1405 // NB: adding new @words is mildly disruptive to existing
1406 // scripts that define macros with the same name, but not
1407 // really. The user will merely receive a warning that they are
1408 // redefining an existing operator.
1409
1410 // These are inserted without the actual '@', so we can directly check
1411 // proposed macro names without building a string with that prefix.
1412 atwords.insert("cast");
1413 atwords.insert("defined");
1414 atwords.insert("entry");
1415 atwords.insert("perf");
1416 atwords.insert("var");
1417 atwords.insert("avg");
1418 atwords.insert("count");
1419 atwords.insert("sum");
1420 atwords.insert("min");
1421 atwords.insert("max");
1422 atwords.insert("hist_linear");
1423 atwords.insert("hist_log");
1424 }
1425 }
1426
1427 unordered_set<string> lexer::atwords;
1428
1429 void
1430 lexer::set_current_file (stapfile* f)
1431 {
1432 current_file = f;
1433 if (f)
1434 {
1435 f->file_contents = input_contents;
1436 f->name = input_name;
1437 }
1438 }
1439
1440 void
1441 lexer::set_current_token_chain (const token* tok)
1442 {
1443 current_token_chain = tok;
1444 }
1445
1446 int
1447 lexer::input_peek (unsigned n)
1448 {
1449 if (input_pointer + n >= input_end)
1450 return -1; // EOF
1451 return (unsigned char)*(input_pointer + n);
1452 }
1453
1454
1455 bool
1456 lexer::has_version (const char* v) const
1457 {
1458 return check_compatible
1459 ? strverscmp(session.compatible.c_str(), v) >= 0
1460 : true;
1461 }
1462
1463 int
1464 lexer::input_get ()
1465 {
1466 int c = input_peek();
1467 if (c < 0) return c; // EOF
1468
1469 ++input_pointer;
1470
1471 if (cursor_suspend_count)
1472 {
1473 // Track effect of input_put: preserve previous cursor/line_column
1474 // until all of its characters are consumed.
1475 if (--cursor_suspend_count == 0)
1476 {
1477 cursor_line = cursor_suspend_line;
1478 cursor_column = cursor_suspend_column;
1479 }
1480 }
1481 else
1482 {
1483 // update source cursor
1484 if (c == '\n')
1485 {
1486 cursor_line ++;
1487 cursor_column = 1;
1488 }
1489 else
1490 cursor_column ++;
1491 }
1492
1493 // clog << "[" << (char)c << "]";
1494 return c;
1495 }
1496
1497
1498 void
1499 lexer::input_put (const string& chars, const token* t)
1500 {
1501 size_t pos = input_pointer - input_contents.data();
1502 // clog << "[put:" << chars << " @" << pos << "]";
1503 input_contents.insert (pos, chars);
1504 cursor_suspend_count += chars.size();
1505 cursor_suspend_line = cursor_line;
1506 cursor_suspend_column = cursor_column;
1507 cursor_line = t->location.line;
1508 cursor_column = t->location.column;
1509 input_pointer = input_contents.data() + pos;
1510 input_end = input_contents.data() + input_contents.size();
1511 }
1512
1513
1514 token*
1515 lexer::scan ()
1516 {
1517 ate_comment = false; // reset for each new token
1518 ate_whitespace = false; // reset for each new token
1519
1520 // XXX be very sure to restore old_saw_tokens if we return without a token:
1521 bool old_saw_tokens = saw_tokens;
1522 saw_tokens = true;
1523
1524 token* n = new token;
1525 string token_str; // accumulate here instead of by incremental interning
1526 n->location.file = current_file;
1527 n->chain = current_token_chain;
1528
1529 skip:
1530 bool suspended = (cursor_suspend_count > 0);
1531 n->location.line = cursor_line;
1532 n->location.column = cursor_column;
1533
1534 int c = input_get();
1535 // clog << "{" << (char)c << (char)c2 << "}";
1536 if (c < 0)
1537 {
1538 delete n;
1539 saw_tokens = old_saw_tokens;
1540 return 0;
1541 }
1542
1543 if (isspace (c))
1544 {
1545 ate_whitespace = true;
1546 goto skip;
1547 }
1548
1549 int c2 = input_peek ();
1550
1551 // Paste command line arguments as character streams into
1552 // the beginning of a token. $1..$999 go through as raw
1553 // characters; @1..@999 are quoted/escaped as strings.
1554 // $# and @# expand to the number of arguments, similarly
1555 // raw or quoted.
1556 if ((c == '$' || c == '@') && (c2 == '#'))
1557 {
1558 token_str.push_back (c);
1559 token_str.push_back (c2);
1560 input_get(); // swallow '#'
1561
1562 if (suspended)
1563 {
1564 n->make_junk(tok_junk_nested_arg);
1565 return n;
1566 }
1567 size_t num_args = session.args.size ();
1568 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1569 token_str.clear();
1570 goto skip;
1571 }
1572 else if ((c == '$' || c == '@') && (isdigit (c2)))
1573 {
1574 unsigned idx = 0;
1575 token_str.push_back (c);
1576 do
1577 {
1578 input_get ();
1579 token_str.push_back (c2);
1580 idx = (idx * 10) + (c2 - '0');
1581 c2 = input_peek ();
1582 } while (c2 > 0 &&
1583 isdigit (c2) &&
1584 idx <= session.args.size()); // prevent overflow
1585 if (suspended)
1586 {
1587 n->make_junk(tok_junk_nested_arg);
1588 return n;
1589 }
1590 if (idx == 0 ||
1591 idx-1 >= session.args.size())
1592 {
1593 n->make_junk(tok_junk_invalid_arg);
1594 return n;
1595 }
1596 const string& arg = session.args[idx-1];
1597 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1598 token_str.clear();
1599 goto skip;
1600 }
1601
1602 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
1603 {
1604 token_str = (char) c;
1605 while (isalnum (c2) || c2 == '_' || c2 == '$')
1606 {
1607 input_get ();
1608 token_str.push_back (c2);
1609 c2 = input_peek ();
1610 }
1611 n->content = token_str;
1612
1613 if (n->content[0] == '@')
1614 // makes it easier to detect illegal use of @words:
1615 n->type = tok_operator;
1616 else if (keywords.count(n->content))
1617 n->type = tok_keyword;
1618 else
1619 n->type = tok_identifier;
1620
1621 return n;
1622 }
1623
1624 else if (isdigit (c)) // positive literal
1625 {
1626 n->type = tok_number;
1627 token_str = (char) c;
1628
1629 while (isalnum (c2))
1630 {
1631 // NB: isalnum is very permissive. We rely on strtol, called in
1632 // parser::parse_literal below, to confirm that the number string
1633 // is correctly formatted and in range.
1634
1635 input_get ();
1636 token_str.push_back (c2);
1637 c2 = input_peek ();
1638 }
1639
1640 n->content = token_str;
1641 return n;
1642 }
1643
1644 else if (c == '\"')
1645 {
1646 n->type = tok_string;
1647 while (1)
1648 {
1649 c = input_get ();
1650
1651 if (c < 0 || c == '\n')
1652 {
1653 n->make_junk(tok_junk_unclosed_quote);
1654 return n;
1655 }
1656 if (c == '\"') // closing double-quotes
1657 break;
1658 else if (c == '\\') // see also input_put
1659 {
1660 c = input_get();
1661 switch (c)
1662 {
1663 case 'x':
1664 if (!has_version("2.3"))
1665 goto the_default;
1666 /* FALLTHROUGH */
1667 case 'a':
1668 case 'b':
1669 case 't':
1670 case 'n':
1671 case 'v':
1672 case 'f':
1673 case 'r':
1674 case '0' ... '7': // NB: need only match the first digit
1675 case '\\':
1676 // Pass these escapes through to the string value
1677 // being parsed; it will be emitted into a C literal.
1678 // XXX: PR13371: perhaps we should evaluate them here
1679 // (and re-quote them during translate.cxx emission).
1680 token_str.push_back ('\\');
1681
1682 // fall through
1683 default: the_default:
1684 token_str.push_back (c);
1685 break;
1686 }
1687 }
1688 else
1689 token_str.push_back (c);
1690 }
1691 n->content = token_str;
1692 return n;
1693 }
1694
1695 else if (ispunct (c))
1696 {
1697 int c3 = input_peek (1);
1698
1699 // NB: if we were to recognize negative numeric literals here,
1700 // we'd introduce another grammar ambiguity:
1701 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1702 // instead of tok_number(1) tok_operator('-') tok_number(1)
1703
1704 if (c == '#') // shell comment
1705 {
1706 unsigned this_line = cursor_line;
1707 do { c = input_get (); }
1708 while (c >= 0 && cursor_line == this_line);
1709 ate_comment = true;
1710 ate_whitespace = true;
1711 goto skip;
1712 }
1713 else if ((c == '/' && c2 == '/')) // C++ comment
1714 {
1715 unsigned this_line = cursor_line;
1716 do { c = input_get (); }
1717 while (c >= 0 && cursor_line == this_line);
1718 ate_comment = true;
1719 ate_whitespace = true;
1720 goto skip;
1721 }
1722 else if (c == '/' && c2 == '*') // C comment
1723 {
1724 (void) input_get (); // swallow '*' already in c2
1725 c = input_get ();
1726 c2 = input_get ();
1727 while (c2 >= 0)
1728 {
1729 if (c == '*' && c2 == '/')
1730 break;
1731 c = c2;
1732 c2 = input_get ();
1733 }
1734 ate_comment = true;
1735 ate_whitespace = true;
1736 goto skip;
1737 }
1738 else if (c == '%' && c2 == '{') // embedded code
1739 {
1740 n->type = tok_embedded;
1741 (void) input_get (); // swallow '{' already in c2
1742 c = input_get ();
1743 c2 = input_get ();
1744 while (c2 >= 0)
1745 {
1746 if (c == '%' && c2 == '}')
1747 {
1748 n->content = token_str;
1749 return n;
1750 }
1751 if (c == '}' && c2 == '%') // possible typo
1752 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
1753 token_str.push_back (c);
1754 c = c2;
1755 c2 = input_get();
1756 }
1757
1758 n->make_junk(tok_junk_unclosed_embedded);
1759 return n;
1760 }
1761
1762 // We're committed to recognizing at least the first character
1763 // as an operator.
1764 n->type = tok_operator;
1765 token_str = (char) c;
1766
1767 // match all valid operators, in decreasing size order
1768 if ((c == '<' && c2 == '<' && c3 == '<') ||
1769 (c == '<' && c2 == '<' && c3 == '=') ||
1770 (c == '>' && c2 == '>' && c3 == '='))
1771 {
1772 token_str.push_back (c2);
1773 token_str.push_back (c3);
1774 input_get (); // c2
1775 input_get (); // c3
1776 }
1777 else if ((c == '=' && c2 == '=') ||
1778 (c == '!' && c2 == '=') ||
1779 (c == '<' && c2 == '=') ||
1780 (c == '>' && c2 == '=') ||
1781 (c == '=' && c2 == '~') ||
1782 (c == '!' && c2 == '~') ||
1783 (c == '+' && c2 == '=') ||
1784 (c == '-' && c2 == '=') ||
1785 (c == '*' && c2 == '=') ||
1786 (c == '/' && c2 == '=') ||
1787 (c == '%' && c2 == '=') ||
1788 (c == '&' && c2 == '=') ||
1789 (c == '^' && c2 == '=') ||
1790 (c == '|' && c2 == '=') ||
1791 (c == '.' && c2 == '=') ||
1792 (c == '&' && c2 == '&') ||
1793 (c == '|' && c2 == '|') ||
1794 (c == '+' && c2 == '+') ||
1795 (c == '-' && c2 == '-') ||
1796 (c == '-' && c2 == '>') ||
1797 (c == '<' && c2 == '<') ||
1798 (c == '>' && c2 == '>') ||
1799 // preprocessor tokens
1800 (c == '%' && c2 == '(') ||
1801 (c == '%' && c2 == '?') ||
1802 (c == '%' && c2 == ':') ||
1803 (c == '%' && c2 == ')'))
1804 {
1805 token_str.push_back (c2);
1806 input_get (); // swallow other character
1807 }
1808
1809 n->content = token_str;
1810 return n;
1811 }
1812
1813 else
1814 {
1815 n->type = tok_junk;
1816 ostringstream s;
1817 s << "\\x" << hex << setw(2) << setfill('0') << c;
1818 n->content = s.str();
1819 // signal parser to emit "expected X, found junk" type error
1820 n->make_junk(tok_junk_unknown);
1821 return n;
1822 }
1823 }
1824
1825 // ------------------------------------------------------------------------
1826
1827 void
1828 token::make_junk (token_junk_type junk)
1829 {
1830 type = tok_junk;
1831 junk_type = junk;
1832 }
1833
1834 // ------------------------------------------------------------------------
1835
1836 string
1837 token::junk_message(systemtap_session& session) const
1838 {
1839 switch (junk_type)
1840 {
1841 case tok_junk_nested_arg:
1842 return _("invalid nested substitution of command line arguments");
1843
1844 case tok_junk_invalid_arg:
1845 return _F("command line argument out of range [1-%lu]",
1846 (unsigned long) session.args.size());
1847
1848 case tok_junk_unclosed_quote:
1849 return _("Could not find matching closing quote");
1850
1851 case tok_junk_unclosed_embedded:
1852 return _("Could not find matching '%}' to close embedded function block");
1853
1854 default:
1855 return _("unknown junk token");
1856 }
1857 }
1858
1859 // ------------------------------------------------------------------------
1860
1861 stapfile*
1862 parser::parse ()
1863 {
1864 stapfile* f = new stapfile;
1865 input.set_current_file (f);
1866
1867 bool empty = true;
1868
1869 while (1)
1870 {
1871 try
1872 {
1873 systemtap_v_seen = 0;
1874 const token* t = peek ();
1875 if (! t) // nice clean EOF, modulo any preprocessing that occurred
1876 break;
1877
1878 empty = false;
1879 if (t->type == tok_keyword && t->content == "probe")
1880 {
1881 context = con_probe;
1882 parse_probe (f->probes, f->aliases);
1883 }
1884 else if (t->type == tok_keyword && t->content == "private")
1885 {
1886 context = con_unknown;
1887 parse_private (f->globals, f->probes, f->name, f->functions);
1888 }
1889 else if (t->type == tok_keyword && t->content == "global")
1890 {
1891 context = con_global;
1892 parse_global (f->globals, f->probes, f->name);
1893 }
1894 else if (t->type == tok_keyword && t->content == "function")
1895 {
1896 context = con_function;
1897 parse_functiondecl (f->functions, f->name);
1898 }
1899 else if (t->type == tok_embedded)
1900 {
1901 context = con_embedded;
1902 f->embeds.push_back (parse_embeddedcode ());
1903 }
1904 else
1905 {
1906 context = con_unknown;
1907 throw PARSE_ERROR (_("expected 'probe', 'global', 'private', 'function', or '%{'"));
1908 }
1909 }
1910 catch (parse_error& pe)
1911 {
1912 print_error (pe, errs_as_warnings);
1913
1914 // XXX: do we want tok_junk to be able to force skip_some behaviour?
1915 if (pe.skip_some) // for recovery
1916 // Quietly swallow all tokens until the next keyword we can start parsing from.
1917 while (1)
1918 try
1919 {
1920 {
1921 const token* t = peek ();
1922 if (! t)
1923 break;
1924 if (t->type == tok_keyword && t->content == "probe") break;
1925 else if (t->type == tok_keyword && t->content == "private") break;
1926 else if (t->type == tok_keyword && t->content == "global") break;
1927 else if (t->type == tok_keyword && t->content == "function") break;
1928 else if (t->type == tok_embedded) break;
1929 swallow (); // swallow it
1930 }
1931 }
1932 catch (parse_error& pe2)
1933 {
1934 // parse error during recovery ... ugh
1935 print_error (pe2);
1936 }
1937 }
1938 }
1939
1940 if (empty)
1941 {
1942 // vary message depending on whether file was *actually* empty:
1943 cerr << (input.saw_tokens
1944 ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
1945 : _F("Input file '%s' is empty.", input_name.c_str()))
1946 << endl;
1947 delete f;
1948 f = 0;
1949 }
1950 else if (num_errors > 0)
1951 {
1952 cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
1953 delete f;
1954 f = 0;
1955 }
1956
1957 input.set_current_file(0);
1958 return f;
1959 }
1960
1961
1962 probe*
1963 parser::parse_synthetic_probe (const token* chain)
1964 {
1965 probe* p = NULL;
1966 stapfile* f = new stapfile;
1967 f->synthetic = true;
1968 input.set_current_file (f);
1969 input.set_current_token_chain (chain);
1970
1971 try
1972 {
1973 context = con_probe;
1974 parse_probe (f->probes, f->aliases);
1975
1976 if (f->probes.size() != 1 || !f->aliases.empty())
1977 throw PARSE_ERROR (_("expected a single synthetic probe"));
1978 p = f->probes[0];
1979 }
1980 catch (parse_error& pe)
1981 {
1982 print_error (pe, errs_as_warnings);
1983 }
1984
1985 // TODO check for unparsed tokens?
1986
1987 input.set_current_file(0);
1988 input.set_current_token_chain(0);
1989 return p;
1990 }
1991
1992
1993 void
1994 parser::parse_probe (vector<probe *> & probe_ret,
1995 vector<probe_alias *> & alias_ret)
1996 {
1997 const token* t0 = next ();
1998 if (! (t0->type == tok_keyword && t0->content == "probe"))
1999 throw PARSE_ERROR (_("expected 'probe'"));
2000
2001 vector<probe_point *> aliases;
2002 vector<probe_point *> locations;
2003
2004 int epilogue_alias = 0;
2005
2006 while (1)
2007 {
2008 vector<probe_point*> pps = parse_probe_points();
2009
2010 const token* t = peek ();
2011 if (pps.size() == 1 && t
2012 && t->type == tok_operator && t->content == "=")
2013 {
2014 if (pps[0]->optional || pps[0]->sufficient)
2015 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2016 aliases.push_back(pps[0]);
2017 swallow ();
2018 continue;
2019 }
2020 else if (pps.size() == 1 && t
2021 && t->type == tok_operator && t->content == "+=")
2022 {
2023 if (pps[0]->optional || pps[0]->sufficient)
2024 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2025 aliases.push_back(pps[0]);
2026 epilogue_alias = 1;
2027 swallow ();
2028 continue;
2029 }
2030 else if (t && t->type == tok_operator && t->content == "{")
2031 {
2032 locations.insert(locations.end(), pps.begin(), pps.end());
2033 break;
2034 }
2035 else
2036 throw PARSE_ERROR (_("expected probe point specifier"));
2037 }
2038
2039 if (aliases.empty())
2040 {
2041 probe* p = new probe;
2042 p->tok = t0;
2043 p->locations = locations;
2044 p->body = parse_stmt_block ();
2045 p->privileged = privileged;
2046 p->systemtap_v_conditional = systemtap_v_seen;
2047 probe_ret.push_back (p);
2048 }
2049 else
2050 {
2051 probe_alias* p = new probe_alias (aliases);
2052 if(epilogue_alias)
2053 p->epilogue_style = true;
2054 else
2055 p->epilogue_style = false;
2056 p->tok = t0;
2057 p->locations = locations;
2058 p->body = parse_stmt_block ();
2059 p->privileged = privileged;
2060 p->systemtap_v_conditional = systemtap_v_seen;
2061 alias_ret.push_back (p);
2062 }
2063 }
2064
2065
2066 embeddedcode*
2067 parser::parse_embeddedcode ()
2068 {
2069 embeddedcode* e = new embeddedcode;
2070 const token* t = next ();
2071 if (t->type != tok_embedded)
2072 throw PARSE_ERROR (_("expected '%{'"));
2073
2074 if (! privileged)
2075 throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
2076 false /* don't skip tokens for parse resumption */);
2077
2078 e->tok = t;
2079 e->code = t->content;
2080 return e;
2081 }
2082
2083
2084 block*
2085 parser::parse_stmt_block ()
2086 {
2087 block* pb = new block;
2088
2089 const token* t = next ();
2090 if (! (t->type == tok_operator && t->content == "{"))
2091 throw PARSE_ERROR (_("expected '{'"));
2092
2093 pb->tok = t;
2094
2095 while (1)
2096 {
2097 t = peek ();
2098 if (t && t->type == tok_operator && t->content == "}")
2099 {
2100 swallow ();
2101 break;
2102 }
2103 pb->statements.push_back (parse_statement ());
2104 }
2105
2106 return pb;
2107 }
2108
2109
2110 try_block*
2111 parser::parse_try_block ()
2112 {
2113 try_block* pb = new try_block;
2114
2115 pb->tok = expect_kw_token ("try");
2116 pb->try_block = parse_stmt_block();
2117 expect_kw ("catch");
2118
2119 const token* t = peek ();
2120 if (t != NULL && t->type == tok_operator && t->content == "(")
2121 {
2122 swallow (); // swallow the '('
2123
2124 t = next();
2125 if (! (t->type == tok_identifier))
2126 throw PARSE_ERROR (_("expected identifier"));
2127 symbol* sym = new symbol;
2128 sym->tok = t;
2129 sym->name = t->content;
2130 pb->catch_error_var = sym;
2131
2132 expect_op (")");
2133 }
2134 else
2135 pb->catch_error_var = 0;
2136
2137 pb->catch_block = parse_stmt_block();
2138
2139 return pb;
2140 }
2141
2142
2143
2144 statement*
2145 parser::parse_statement ()
2146 {
2147 statement *ret;
2148 const token* t = peek ();
2149 if (t && t->type == tok_operator && t->content == ";")
2150 return new null_statement (next ());
2151 else if (t && t->type == tok_operator && t->content == "{")
2152 return parse_stmt_block (); // Don't squash semicolons.
2153 else if (t && t->type == tok_keyword && t->content == "try")
2154 return parse_try_block (); // Don't squash semicolons.
2155 else if (t && t->type == tok_keyword && t->content == "if")
2156 return parse_if_statement (); // Don't squash semicolons.
2157 else if (t && t->type == tok_keyword && t->content == "for")
2158 return parse_for_loop (); // Don't squash semicolons.
2159 else if (t && t->type == tok_keyword && t->content == "foreach")
2160 return parse_foreach_loop (); // Don't squash semicolons.
2161 else if (t && t->type == tok_keyword && t->content == "while")
2162 return parse_while_loop (); // Don't squash semicolons.
2163 else if (t && t->type == tok_keyword && t->content == "return")
2164 ret = parse_return_statement ();
2165 else if (t && t->type == tok_keyword && t->content == "delete")
2166 ret = parse_delete_statement ();
2167 else if (t && t->type == tok_keyword && t->content == "break")
2168 ret = parse_break_statement ();
2169 else if (t && t->type == tok_keyword && t->content == "continue")
2170 ret = parse_continue_statement ();
2171 else if (t && t->type == tok_keyword && t->content == "next")
2172 ret = parse_next_statement ();
2173 else if (t && (t->type == tok_operator || // expressions are flexible
2174 t->type == tok_identifier ||
2175 t->type == tok_number ||
2176 t->type == tok_string ||
2177 t->type == tok_embedded ))
2178 ret = parse_expr_statement ();
2179 // XXX: consider generally accepting tok_embedded here too
2180 else
2181 throw PARSE_ERROR (_("expected statement"));
2182
2183 // Squash "empty" trailing colons after any "non-block-like" statement.
2184 t = peek ();
2185 if (t && t->type == tok_operator && t->content == ";")
2186 {
2187 swallow (); // Silently eat trailing ; after statement
2188 }
2189
2190 return ret;
2191 }
2192
2193 void
2194 parser::parse_private (vector <vardecl*>& globals, vector<probe*>& probes,
2195 string const & fname, vector<functiondecl*>& functions)
2196 {
2197 const token* t = next ();
2198 if (! (t->type == tok_keyword && t->content == "private"))
2199 throw PARSE_ERROR (_("expected 'private'"));
2200 swallow ();
2201 t = next ();
2202 if (t->type == tok_keyword && t->content == "function")
2203 {
2204 swallow ();
2205 context = con_function;
2206 do_parse_functiondecl(functions, t, fname, true);
2207 }
2208 else if (t->type == tok_keyword && t->content == "global")
2209 {
2210 swallow ();
2211 context = con_global;
2212 t = next ();
2213 if (! (t->type == tok_identifier))
2214 throw PARSE_ERROR (_("expected identifier"));
2215 do_parse_global(globals, probes, fname, t, true);
2216 }
2217 // The `private <identifier>` is an acceptable shorthand
2218 // for `private global <identifier>` per above.
2219 else if (t->type == tok_identifier)
2220 {
2221 context = con_global;
2222 do_parse_global(globals, probes, fname, t, true);
2223 }
2224 else
2225 throw PARSE_ERROR (_("expected 'function' or identifier"));
2226 }
2227
2228 void
2229 parser::parse_global (vector <vardecl*>& globals, vector<probe*>& probes,
2230 string const & fname)
2231 {
2232 const token* t0 = next ();
2233 if (! (t0->type == tok_keyword && t0->content == "global"))
2234 throw PARSE_ERROR (_("expected 'global' or 'private'"));
2235 swallow ();
2236 do_parse_global(globals, probes, fname, 0, false);
2237 }
2238
2239 void
2240 parser::do_parse_global (vector <vardecl*>& globals, vector<probe*>&,
2241 string const & fname, const token* t0, bool priv)
2242 {
2243 bool iter0 = true;
2244 const token* t;
2245 while (1)
2246 {
2247 t = (iter0 && priv) ? t0 : next ();
2248 iter0 = false;
2249 if (! (t->type == tok_identifier))
2250 throw PARSE_ERROR (_("expected identifier"));
2251
2252 string gname = "__global_" + string(t->content);
2253 string pname = "__private_" + detox_path(fname) + string(t->content);
2254 string name = priv ? pname : gname;
2255
2256 for (unsigned i=0; i<globals.size(); i++)
2257 {
2258 if (globals[i]->name == name)
2259 throw PARSE_ERROR (_("duplicate global name"));
2260 if ((globals[i]->name == gname) || (globals[i]->name == pname))
2261 throw PARSE_ERROR (_("global versus private variable declaration conflict"));
2262 }
2263
2264 vardecl* d = new vardecl;
2265 d->unmangled_name = t->content;
2266 d->name = name;
2267 d->tok = t;
2268 d->systemtap_v_conditional = systemtap_v_seen;
2269 globals.push_back (d);
2270
2271 t = peek ();
2272
2273 if(t && t->type == tok_operator && t->content == "%") //wrapping
2274 {
2275 d->wrap = true;
2276 swallow ();
2277 t = peek();
2278 }
2279
2280 if (t && t->type == tok_operator && t->content == "[") // array size
2281 {
2282 int64_t size;
2283 swallow ();
2284 expect_number(size);
2285 if (size <= 0 || size > 1000000) // arbitrary max
2286 throw PARSE_ERROR(_("array size out of range"));
2287 d->maxsize = (int)size;
2288 expect_known(tok_operator, "]");
2289 t = peek ();
2290 }
2291
2292 if (t && t->type == tok_operator && t->content == "=") // initialization
2293 {
2294 if (!d->compatible_arity(0))
2295 throw PARSE_ERROR(_("only scalar globals can be initialized"));
2296 d->set_arity(0, t);
2297 next (); // Don't swallow, set_arity() used the peeked token.
2298 d->init = parse_literal ();
2299 d->type = d->init->type;
2300 t = peek ();
2301 }
2302
2303 if (t && t->type == tok_operator && t->content == ";") // termination
2304 {
2305 swallow ();
2306 break;
2307 }
2308
2309 if (t && t->type == tok_operator && t->content == ",") // next global
2310 {
2311 swallow ();
2312 continue;
2313 }
2314 else
2315 break;
2316 }
2317 }
2318
2319 void
2320 parser::parse_functiondecl (vector<functiondecl*>& functions,
2321 string const & fname)
2322 {
2323 const token* t = next ();
2324 if (! (t->type == tok_keyword && t->content == "function"))
2325 throw PARSE_ERROR (_("expected 'function'"));
2326 swallow ();
2327 do_parse_functiondecl(functions, t, fname, false);
2328 }
2329
2330 void
2331 parser::do_parse_functiondecl (vector<functiondecl*>& functions, const token* t,
2332 string const & fname, bool priv)
2333 {
2334 t = next ();
2335 if (! (t->type == tok_identifier)
2336 && ! (t->type == tok_keyword
2337 && (t->content == "string" || t->content == "long")))
2338 throw PARSE_ERROR (_("expected identifier"));
2339
2340 string gname = "__global_" + string(t->content);
2341 string pname = "__private_" + detox_path(fname) + string(t->content);
2342 string name = priv ? pname : gname;
2343 name += "__overload_" + lex_cast(session.overload_count[t->content]++);
2344
2345 functiondecl *fd = new functiondecl ();
2346 fd->unmangled_name = t->content;
2347 fd->name = name;
2348 fd->tok = t;
2349
2350 t = next ();
2351 if (t->type == tok_operator && t->content == ":")
2352 {
2353 swallow ();
2354 t = next ();
2355 if (t->type == tok_keyword && t->content == "string")
2356 fd->type = pe_string;
2357 else if (t->type == tok_keyword && t->content == "long")
2358 fd->type = pe_long;
2359 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2360 swallow ();
2361
2362 t = next ();
2363 }
2364
2365 if (! (t->type == tok_operator && t->content == "("))
2366 throw PARSE_ERROR (_("expected '('"));
2367 swallow ();
2368
2369 while (1)
2370 {
2371 t = next ();
2372
2373 // permit zero-argument functions
2374 if (t->type == tok_operator && t->content == ")")
2375 {
2376 swallow ();
2377 break;
2378 }
2379 else if (! (t->type == tok_identifier))
2380 throw PARSE_ERROR (_("expected identifier"));
2381 vardecl* vd = new vardecl;
2382 vd->unmangled_name = vd->name = t->content;
2383 vd->tok = t;
2384 fd->formal_args.push_back (vd);
2385 fd->systemtap_v_conditional = systemtap_v_seen;
2386
2387 t = next ();
2388 if (t->type == tok_operator && t->content == ":")
2389 {
2390 swallow ();
2391 t = next ();
2392 if (t->type == tok_keyword && t->content == "string")
2393 vd->type = pe_string;
2394 else if (t->type == tok_keyword && t->content == "long")
2395 vd->type = pe_long;
2396 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2397 swallow ();
2398 t = next ();
2399 }
2400 if (t->type == tok_operator && t->content == ")")
2401 {
2402 swallow ();
2403 break;
2404 }
2405 if (t->type == tok_operator && t->content == ",")
2406 {
2407 swallow ();
2408 continue;
2409 }
2410 else
2411 throw PARSE_ERROR (_("expected ',' or ')'"));
2412 }
2413
2414 t = peek();
2415 if (t->type == tok_operator && t->content == ":")
2416 {
2417 int64_t priority;
2418 swallow();
2419 expect_number(priority);
2420 fd->priority = priority;
2421 // reserve priority 0 for user script implementation
2422 if (priority < 1)
2423 throw PARSE_ERROR (_("specified priority must be > 0"));
2424 }
2425 else if (user_file)
2426 {
2427 // allow script file implementation override automatically when
2428 // priority not specified
2429 fd->priority = 0;
2430 }
2431
2432 t = peek ();
2433 if (t && t->type == tok_embedded)
2434 fd->body = parse_embeddedcode ();
2435 else
2436 fd->body = parse_stmt_block ();
2437
2438 functions.push_back (fd);
2439 }
2440
2441 vector<probe_point*>
2442 parser::parse_probe_points()
2443 {
2444 vector<probe_point*> pps;
2445 while (1)
2446 {
2447 vector<probe_point*> tail = parse_components();
2448 pps.insert(pps.end(), tail.begin(), tail.end());
2449
2450 const token* t = peek();
2451 if (t && t->type == tok_operator && t->content == ",")
2452 {
2453 swallow();
2454 continue;
2455 }
2456
2457 if (t && t->type == tok_operator
2458 && (t->content == "{" || t->content == "=" ||
2459 t->content == "+="|| t->content == "}"))
2460 break;
2461
2462 throw PARSE_ERROR (_("expected one of ', { } = +='"));
2463 }
2464 return pps;
2465 }
2466
2467 vector<probe_point*>
2468 parser::parse_components()
2469 {
2470 vector<probe_point*> pps;
2471 while (1)
2472 {
2473 vector<probe_point*> suffix = parse_component();
2474
2475 // Cartesian product of components
2476 if (pps.empty())
2477 pps = suffix;
2478 else
2479 {
2480 assert(!suffix.empty());
2481 vector<probe_point*> product;
2482 for (unsigned i = 0; i < pps.size(); i++)
2483 {
2484 if (pps[i]->optional || pps[i]->sufficient || pps[i]->condition)
2485 throw PARSE_ERROR (_("'?', '!' or condition must only be specified in suffix"),
2486 pps[i]->components[0]->tok);
2487 for (unsigned j = 0; j < suffix.size(); j++)
2488 {
2489 probe_point* pp = new probe_point;
2490 pp->components.insert(pp->components.end(),
2491 pps[i]->components.begin(), pps[i]->components.end());
2492 pp->components.insert(pp->components.end(),
2493 suffix[j]->components.begin(), suffix[j]->components.end());
2494 pp->optional = suffix[j]->optional;
2495 pp->sufficient = suffix[j]->sufficient;
2496 pp->condition = suffix[j]->condition;
2497 product.push_back(pp);
2498 }
2499 }
2500 for (unsigned i = 0; i < pps.size(); i++) delete pps[i];
2501 for (unsigned i = 0; i < suffix.size(); i++) delete suffix[i];
2502 pps = product;
2503 }
2504
2505 const token* t = peek();
2506 if (t && t->type == tok_operator && t->content == ".")
2507 {
2508 swallow ();
2509 continue;
2510 }
2511
2512 // We only fall through here at the end of a probe point (past
2513 // all the dotted/parametrized components).
2514
2515 if (t && t->type == tok_operator &&
2516 (t->content == "?" || t->content == "!"))
2517 {
2518 for (unsigned i = 0; i < pps.size(); i++)
2519 {
2520 if (pps[i]->optional || pps[i]->sufficient)
2521 throw PARSE_ERROR (_("'?' or '!' respecified"));
2522 pps[i]->optional = true;
2523 if (t->content == "!") pps[i]->sufficient = true;
2524 }
2525 // NB: sufficient implies optional
2526 swallow ();
2527 t = peek ();
2528 // fall through
2529 }
2530
2531 if (t && t->type == tok_keyword && t->content == "if")
2532 {
2533 swallow ();
2534 t = peek ();
2535 if (!(t && t->type == tok_operator && t->content == "("))
2536 throw PARSE_ERROR (_("expected '('"));
2537 swallow ();
2538
2539 expression* e = parse_expression();
2540 for (unsigned i = 0; i < pps.size(); i++)
2541 {
2542 if (pps[i]->condition != 0)
2543 throw PARSE_ERROR (_("condition respecified"));
2544 pps[i]->condition = e;
2545 }
2546
2547 t = peek ();
2548 if (!(t && t->type == tok_operator && t->content == ")"))
2549 throw PARSE_ERROR (_("expected ')'"));
2550 swallow ();
2551 }
2552
2553 break;
2554 }
2555 return pps;
2556 }
2557
2558 vector<probe_point*>
2559 parser::parse_component()
2560 {
2561 const token* t = next ();
2562 if (! (t->type == tok_identifier
2563 // we must allow ".return" and ".function", which are keywords
2564 || t->type == tok_keyword
2565 // we must allow "*", due to being an operator
2566 || (t->type == tok_operator && (t->content == "*" || t->content == "{"))))
2567 throw PARSE_ERROR (_("expected identifier or '*' or '{'"));
2568
2569 if (t && t->type == tok_operator && t->content == "{")
2570 {
2571 swallow();
2572 vector<probe_point*> pps = parse_probe_points();
2573 t = peek();
2574 if (!(t && t->type == tok_operator && t->content == "}"))
2575 throw PARSE_ERROR (_("expected '}'"));
2576 swallow();
2577 return pps;
2578 }
2579 else
2580 {
2581 // loop which reconstitutes an identifier with wildcards
2582 string content = t->content;
2583 bool changed_p = false;
2584 while (1)
2585 {
2586 const token* u = peek();
2587 if (u == NULL)
2588 break;
2589 // ensure pieces of the identifier are adjacent:
2590 if (input.ate_whitespace)
2591 break;
2592 // ensure pieces of the identifier are valid:
2593 if (! (u->type == tok_identifier
2594 // we must allow arbitrary keywords with a wildcard
2595 || u->type == tok_keyword
2596 // we must allow "*", due to being an operator
2597 || (u->type == tok_operator && u->content == "*")))
2598 break;
2599
2600 // append u to t
2601 content = content + (string)u->content;
2602 changed_p = true;
2603
2604 // consume u
2605 swallow ();
2606 }
2607
2608 if (changed_p)
2609 {
2610 // We've already swallowed the first token and we're not
2611 // putting it back; no one else has a copy; so we can
2612 // safely overwrite its content and reuse it.
2613 const_cast<token*>(t)->content = content;
2614 }
2615
2616 probe_point::component* c = new probe_point::component;
2617 c->functor = t->content;
2618 c->tok = t;
2619 vector<probe_point*> pps;
2620 probe_point* pp = new probe_point;
2621 pp->components.push_back(c);
2622 pps.push_back(pp);
2623 // NB we may add c->arg soon
2624
2625 t = peek ();
2626
2627 // consume optional parameter
2628 if (t && t->type == tok_operator && t->content == "(")
2629 {
2630 swallow (); // consume "("
2631 c->arg = parse_literal ();
2632
2633 t = next ();
2634 if (! (t->type == tok_operator && t->content == ")"))
2635 throw PARSE_ERROR (_("expected ')'"));
2636 swallow ();
2637 }
2638 return pps;
2639 }
2640 }
2641
2642 literal_string*
2643 parser::consume_string_literals(const token *t)
2644 {
2645 literal_string *ls = new literal_string (t->content);
2646
2647 // PR11208: check if the next token is also a string literal;
2648 // auto-concatenate it. This is complicated to the extent that we
2649 // need to skip intermediate whitespace.
2650 //
2651 // NB for versions prior to 2.0: but don't skip over intervening comments
2652 string concat;
2653 bool p_concat = false;
2654 const token *n = peek();
2655 while (n != NULL && n->type == tok_string
2656 && ! (!input.has_version("2.0") && input.ate_comment))
2657 {
2658 if (!p_concat)
2659 {
2660 concat = t->content;
2661 p_concat = true;
2662 }
2663 concat.append(n->content.data(), n->content.size());
2664 next(); // consume the token
2665 n = peek();
2666 }
2667 if (p_concat)
2668 ls->value = concat;
2669 return ls;
2670 }
2671
2672
2673 // Parse a string literal and perform backslash escaping on the contents:
2674 literal_string*
2675 parser::parse_literal_string ()
2676 {
2677 const token* t = next ();
2678 literal_string* l;
2679 if (t->type == tok_string)
2680 l = consume_string_literals (t);
2681 else
2682 throw PARSE_ERROR (_("expected literal string"));
2683
2684 l->tok = t;
2685 return l;
2686 }
2687
2688
2689 literal*
2690 parser::parse_literal ()
2691 {
2692 const token* t = next ();
2693 literal* l;
2694 if (t->type == tok_string)
2695 {
2696 l = consume_string_literals (t);
2697 }
2698 else
2699 {
2700 bool neg = false;
2701 if (t->type == tok_operator && t->content == "-")
2702 {
2703 neg = true;
2704 swallow ();
2705 t = next ();
2706 }
2707
2708 if (t->type == tok_number)
2709 {
2710 const string& s = t->content;
2711 const char* startp = s.c_str ();
2712 char* endp = (char*) startp;
2713
2714 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2715 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
2716 // since the lexer only gives us positive digit strings, but we'll
2717 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
2718 errno = 0;
2719 long long value = (long long) strtoull (startp, & endp, 0);
2720 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
2721 || (neg && (unsigned long long) value > 9223372036854775808ULL)
2722 || (unsigned long long) value > 18446744073709551615ULL
2723 || value < -9223372036854775807LL-1)
2724 throw PARSE_ERROR (_("number invalid or out of range"));
2725
2726 if (neg)
2727 value = -value;
2728
2729 l = new literal_number (value);
2730 }
2731 else
2732 throw PARSE_ERROR (_("expected literal string or number"));
2733 }
2734
2735 l->tok = t;
2736 return l;
2737 }
2738
2739
2740 if_statement*
2741 parser::parse_if_statement ()
2742 {
2743 const token* t = next ();
2744 if (! (t->type == tok_keyword && t->content == "if"))
2745 throw PARSE_ERROR (_("expected 'if'"));
2746 if_statement* s = new if_statement;
2747 s->tok = t;
2748
2749 t = next ();
2750 if (! (t->type == tok_operator && t->content == "("))
2751 throw PARSE_ERROR (_("expected '('"));
2752 swallow ();
2753
2754 s->condition = parse_expression ();
2755
2756 t = next ();
2757 if (! (t->type == tok_operator && t->content == ")"))
2758 throw PARSE_ERROR (_("expected ')'"));
2759 swallow ();
2760
2761 s->thenblock = parse_statement ();
2762
2763 t = peek ();
2764 if (t && t->type == tok_keyword && t->content == "else")
2765 {
2766 swallow ();
2767 s->elseblock = parse_statement ();
2768 }
2769 else
2770 s->elseblock = 0; // in case not otherwise initialized
2771
2772 return s;
2773 }
2774
2775
2776 expr_statement*
2777 parser::parse_expr_statement ()
2778 {
2779 expr_statement *es = new expr_statement;
2780 const token* t = peek ();
2781 if (t == NULL)
2782 throw PARSE_ERROR (_("expression statement expected"));
2783 // Copy, we only peeked, parse_expression might swallow.
2784 es->tok = new token (*t);
2785 es->value = parse_expression ();
2786 return es;
2787 }
2788
2789
2790 return_statement*
2791 parser::parse_return_statement ()
2792 {
2793 const token* t = next ();
2794 if (! (t->type == tok_keyword && t->content == "return"))
2795 throw PARSE_ERROR (_("expected 'return'"));
2796 if (context != con_function)
2797 throw PARSE_ERROR (_("found 'return' not in function context"));
2798 return_statement* s = new return_statement;
2799 s->tok = t;
2800 s->value = parse_expression ();
2801 return s;
2802 }
2803
2804
2805 delete_statement*
2806 parser::parse_delete_statement ()
2807 {
2808 const token* t = next ();
2809 if (! (t->type == tok_keyword && t->content == "delete"))
2810 throw PARSE_ERROR (_("expected 'delete'"));
2811 delete_statement* s = new delete_statement;
2812 s->tok = t;
2813 s->value = parse_expression ();
2814 return s;
2815 }
2816
2817
2818 next_statement*
2819 parser::parse_next_statement ()
2820 {
2821 const token* t = next ();
2822 if (! (t->type == tok_keyword && t->content == "next"))
2823 throw PARSE_ERROR (_("expected 'next'"));
2824 next_statement* s = new next_statement;
2825 s->tok = t;
2826 return s;
2827 }
2828
2829
2830 break_statement*
2831 parser::parse_break_statement ()
2832 {
2833 const token* t = next ();
2834 if (! (t->type == tok_keyword && t->content == "break"))
2835 throw PARSE_ERROR (_("expected 'break'"));
2836 break_statement* s = new break_statement;
2837 s->tok = t;
2838 return s;
2839 }
2840
2841
2842 continue_statement*
2843 parser::parse_continue_statement ()
2844 {
2845 const token* t = next ();
2846 if (! (t->type == tok_keyword && t->content == "continue"))
2847 throw PARSE_ERROR (_("expected 'continue'"));
2848 continue_statement* s = new continue_statement;
2849 s->tok = t;
2850 return s;
2851 }
2852
2853
2854 for_loop*
2855 parser::parse_for_loop ()
2856 {
2857 const token* t = next ();
2858 if (! (t->type == tok_keyword && t->content == "for"))
2859 throw PARSE_ERROR (_("expected 'for'"));
2860 for_loop* s = new for_loop;
2861 s->tok = t;
2862
2863 t = next ();
2864 if (! (t->type == tok_operator && t->content == "("))
2865 throw PARSE_ERROR (_("expected '('"));
2866 swallow ();
2867
2868 // initializer + ";"
2869 t = peek ();
2870 if (t && t->type == tok_operator && t->content == ";")
2871 {
2872 s->init = 0;
2873 swallow ();
2874 }
2875 else
2876 {
2877 s->init = parse_expr_statement ();
2878 t = next ();
2879 if (! (t->type == tok_operator && t->content == ";"))
2880 throw PARSE_ERROR (_("expected ';'"));
2881 swallow ();
2882 }
2883
2884 // condition + ";"
2885 t = peek ();
2886 if (t && t->type == tok_operator && t->content == ";")
2887 {
2888 literal_number* l = new literal_number(1);
2889 s->cond = l;
2890 s->cond->tok = next ();
2891 }
2892 else
2893 {
2894 s->cond = parse_expression ();
2895 t = next ();
2896 if (! (t->type == tok_operator && t->content == ";"))
2897 throw PARSE_ERROR (_("expected ';'"));
2898 swallow ();
2899 }
2900
2901 // increment + ")"
2902 t = peek ();
2903 if (t && t->type == tok_operator && t->content == ")")
2904 {
2905 s->incr = 0;
2906 swallow ();
2907 }
2908 else
2909 {
2910 s->incr = parse_expr_statement ();
2911 t = next ();
2912 if (! (t->type == tok_operator && t->content == ")"))
2913 throw PARSE_ERROR (_("expected ')'"));
2914 swallow ();
2915 }
2916
2917 // block
2918 s->block = parse_statement ();
2919
2920 return s;
2921 }
2922
2923
2924 for_loop*
2925 parser::parse_while_loop ()
2926 {
2927 const token* t = next ();
2928 if (! (t->type == tok_keyword && t->content == "while"))
2929 throw PARSE_ERROR (_("expected 'while'"));
2930 for_loop* s = new for_loop;
2931 s->tok = t;
2932
2933 t = next ();
2934 if (! (t->type == tok_operator && t->content == "("))
2935 throw PARSE_ERROR (_("expected '('"));
2936 swallow ();
2937
2938 // dummy init and incr fields
2939 s->init = 0;
2940 s->incr = 0;
2941
2942 // condition
2943 s->cond = parse_expression ();
2944
2945 t = next ();
2946 if (! (t->type == tok_operator && t->content == ")"))
2947 throw PARSE_ERROR (_("expected ')'"));
2948 swallow ();
2949
2950 // block
2951 s->block = parse_statement ();
2952
2953 return s;
2954 }
2955
2956
2957 foreach_loop*
2958 parser::parse_foreach_loop ()
2959 {
2960 const token* t = next ();
2961 if (! (t->type == tok_keyword && t->content == "foreach"))
2962 throw PARSE_ERROR (_("expected 'foreach'"));
2963 foreach_loop* s = new foreach_loop;
2964 s->tok = t;
2965 s->sort_direction = 0;
2966 s->sort_aggr = sc_none;
2967 s->value = NULL;
2968 s->limit = NULL;
2969
2970 t = next ();
2971 if (! (t->type == tok_operator && t->content == "("))
2972 throw PARSE_ERROR (_("expected '('"));
2973 swallow ();
2974
2975 symbol* lookahead_sym = NULL;
2976 int lookahead_sort = 0;
2977
2978 t = peek ();
2979 if (t && t->type == tok_identifier)
2980 {
2981 next ();
2982 lookahead_sym = new symbol;
2983 lookahead_sym->tok = t;
2984 lookahead_sym->name = t->content;
2985
2986 t = peek ();
2987 if (t && t->type == tok_operator &&
2988 (t->content == "+" || t->content == "-"))
2989 {
2990 lookahead_sort = (t->content == "+") ? 1 : -1;
2991 swallow ();
2992 }
2993
2994 t = peek ();
2995 if (t && t->type == tok_operator && t->content == "=")
2996 {
2997 swallow ();
2998 s->value = lookahead_sym;
2999 if (lookahead_sort)
3000 {
3001 s->sort_direction = lookahead_sort;
3002 s->sort_column = 0;
3003 }
3004 lookahead_sym = NULL;
3005 }
3006 }
3007
3008 // see also parse_array_in
3009
3010 bool parenthesized = false;
3011 t = peek ();
3012 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
3013 {
3014 swallow ();
3015 parenthesized = true;
3016 }
3017
3018 if (lookahead_sym)
3019 {
3020 s->indexes.push_back (lookahead_sym);
3021 if (lookahead_sort)
3022 {
3023 s->sort_direction = lookahead_sort;
3024 s->sort_column = 1;
3025 }
3026 lookahead_sym = NULL;
3027 }
3028 else while (1)
3029 {
3030 t = next ();
3031 if (! (t->type == tok_identifier))
3032 throw PARSE_ERROR (_("expected identifier"));
3033 symbol* sym = new symbol;
3034 sym->tok = t;
3035 sym->name = t->content;
3036 s->indexes.push_back (sym);
3037
3038 t = peek ();
3039 if (t && t->type == tok_operator &&
3040 (t->content == "+" || t->content == "-"))
3041 {
3042 if (s->sort_direction)
3043 throw PARSE_ERROR (_("multiple sort directives"));
3044 s->sort_direction = (t->content == "+") ? 1 : -1;
3045 s->sort_column = s->indexes.size();
3046 swallow ();
3047 }
3048
3049 if (parenthesized)
3050 {
3051 t = peek ();
3052 if (t && t->type == tok_operator && t->content == ",")
3053 {
3054 swallow ();
3055 continue;
3056 }
3057 else if (t && t->type == tok_operator && t->content == "]")
3058 {
3059 swallow ();
3060 break;
3061 }
3062 else
3063 throw PARSE_ERROR (_("expected ',' or ']'"));
3064 }
3065 else
3066 break; // expecting only one expression
3067 }
3068
3069 t = next ();
3070 if (! (t->type == tok_keyword && t->content == "in"))
3071 throw PARSE_ERROR (_("expected 'in'"));
3072 swallow ();
3073
3074 s->base = parse_indexable();
3075
3076 // check if there was an array slice that was specified
3077 t = peek();
3078 if (t && t->type == tok_operator && t->content == "[")
3079 {
3080 swallow();
3081 while (1)
3082 {
3083 t = peek();
3084 if (t && t->type == tok_operator && t->content == "*")
3085 {
3086 swallow();
3087 s->array_slice.push_back (NULL);
3088 }
3089 else
3090 s->array_slice.push_back (parse_expression());
3091
3092 t = peek ();
3093 if (t && t->type == tok_operator && t->content == ",")
3094 {
3095 swallow ();
3096 continue;
3097 }
3098 else if (t && t->type == tok_operator && t->content == "]")
3099 {
3100 swallow ();
3101 break;
3102 }
3103 else
3104 throw PARSE_ERROR (_("expected ',' or ']'"));
3105 }
3106 }
3107
3108
3109 // check for atword, see also expect_ident_or_atword,
3110 t = peek ();
3111 if (t && t->type == tok_operator && t->content[0] == '@')
3112 {
3113 if (t->content == "@avg") s->sort_aggr = sc_average;
3114 else if (t->content == "@min") s->sort_aggr = sc_min;
3115 else if (t->content == "@max") s->sort_aggr = sc_max;
3116 else if (t->content == "@count") s->sort_aggr = sc_count;
3117 else if (t->content == "@sum") s->sort_aggr = sc_sum;
3118 else throw PARSE_ERROR(_("expected statistical operation"));
3119 swallow();
3120
3121 t = peek ();
3122 if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
3123 throw PARSE_ERROR(_("expected sort directive"));
3124 }
3125
3126 t = peek ();
3127 if (t && t->type == tok_operator &&
3128 (t->content == "+" || t->content == "-"))
3129 {
3130 if (s->sort_direction)
3131 throw PARSE_ERROR (_("multiple sort directives"));
3132 s->sort_direction = (t->content == "+") ? 1 : -1;
3133 s->sort_column = 0;
3134 swallow ();
3135 }
3136
3137 t = peek ();
3138 if (tok_is(t, tok_keyword, "limit"))
3139 {
3140 swallow (); // get past the "limit"
3141 s->limit = parse_expression ();
3142 }
3143
3144 t = next ();
3145 if (! (t->type == tok_operator && t->content == ")"))
3146 throw PARSE_ERROR ("expected ')'");
3147 swallow ();
3148
3149 s->block = parse_statement ();
3150 return s;
3151 }
3152
3153
3154 expression*
3155 parser::parse_expression ()
3156 {
3157 return parse_assignment ();
3158 }
3159
3160
3161 expression*
3162 parser::parse_assignment ()
3163 {
3164 expression* op1 = parse_ternary ();
3165
3166 const token* t = peek ();
3167 // right-associative operators
3168 if (t && t->type == tok_operator
3169 && (t->content == "=" ||
3170 t->content == "<<<" ||
3171 t->content == "+=" ||
3172 t->content == "-=" ||
3173 t->content == "*=" ||
3174 t->content == "/=" ||
3175 t->content == "%=" ||
3176 t->content == "<<=" ||
3177 t->content == ">>=" ||
3178 t->content == "&=" ||
3179 t->content == "^=" ||
3180 t->content == "|=" ||
3181 t->content == ".=" ||
3182 false))
3183 {
3184 // NB: lvalueness is checked during elaboration / translation
3185 assignment* e = new assignment;
3186 e->left = op1;
3187 e->op = t->content;
3188 e->tok = t;
3189 next ();
3190 e->right = parse_expression ();
3191 op1 = e;
3192 }
3193
3194 return op1;
3195 }
3196
3197
3198 expression*
3199 parser::parse_ternary ()
3200 {
3201 expression* op1 = parse_logical_or ();
3202
3203 const token* t = peek ();
3204 if (t && t->type == tok_operator && t->content == "?")
3205 {
3206 ternary_expression* e = new ternary_expression;
3207 e->tok = t;
3208 e->cond = op1;
3209 next ();
3210 e->truevalue = parse_expression (); // XXX
3211
3212 t = next ();
3213 if (! (t->type == tok_operator && t->content == ":"))
3214 throw PARSE_ERROR (_("expected ':'"));
3215 swallow ();
3216
3217 e->falsevalue = parse_expression (); // XXX
3218 return e;
3219 }
3220 else
3221 return op1;
3222 }
3223
3224
3225 expression*
3226 parser::parse_logical_or ()
3227 {
3228 expression* op1 = parse_logical_and ();
3229
3230 const token* t = peek ();
3231 while (t && t->type == tok_operator && t->content == "||")
3232 {
3233 logical_or_expr* e = new logical_or_expr;
3234 e->tok = t;
3235 e->op = t->content;
3236 e->left = op1;
3237 next ();
3238 e->right = parse_logical_and ();
3239 op1 = e;
3240 t = peek ();
3241 }
3242
3243 return op1;
3244 }
3245
3246
3247 expression*
3248 parser::parse_logical_and ()
3249 {
3250 expression* op1 = parse_boolean_or ();
3251
3252 const token* t = peek ();
3253 while (t && t->type == tok_operator && t->content == "&&")
3254 {
3255 logical_and_expr *e = new logical_and_expr;
3256 e->left = op1;
3257 e->op = t->content;
3258 e->tok = t;
3259 next ();
3260 e->right = parse_boolean_or ();
3261 op1 = e;
3262 t = peek ();
3263 }
3264
3265 return op1;
3266 }
3267
3268
3269 expression*
3270 parser::parse_boolean_or ()
3271 {
3272 expression* op1 = parse_boolean_xor ();
3273
3274 const token* t = peek ();
3275 while (t && t->type == tok_operator && t->content == "|")
3276 {
3277 binary_expression* e = new binary_expression;
3278 e->left = op1;
3279 e->op = t->content;
3280 e->tok = t;
3281 next ();
3282 e->right = parse_boolean_xor ();
3283 op1 = e;
3284 t = peek ();
3285 }
3286
3287 return op1;
3288 }
3289
3290
3291 expression*
3292 parser::parse_boolean_xor ()
3293 {
3294 expression* op1 = parse_boolean_and ();
3295
3296 const token* t = peek ();
3297 while (t && t->type == tok_operator && t->content == "^")
3298 {
3299 binary_expression* e = new binary_expression;
3300 e->left = op1;
3301 e->op = t->content;
3302 e->tok = t;
3303 next ();
3304 e->right = parse_boolean_and ();
3305 op1 = e;
3306 t = peek ();
3307 }
3308
3309 return op1;
3310 }
3311
3312
3313 expression*
3314 parser::parse_boolean_and ()
3315 {
3316 expression* op1 = parse_array_in ();
3317
3318 const token* t = peek ();
3319 while (t && t->type == tok_operator && t->content == "&")
3320 {
3321 binary_expression* e = new binary_expression;
3322 e->left = op1;
3323 e->op = t->content;
3324 e->tok = t;
3325 next ();
3326 e->right = parse_array_in ();
3327 op1 = e;
3328 t = peek ();
3329 }
3330
3331 return op1;
3332 }
3333
3334
3335 expression*
3336 parser::parse_array_in ()
3337 {
3338 // This is a very tricky case. All these are legit expressions:
3339 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
3340 vector<expression*> indexes;
3341 bool parenthesized = false;
3342
3343 const token* t = peek ();
3344 if (t && t->type == tok_operator && t->content == "[")
3345 {
3346 swallow ();
3347 parenthesized = true;
3348 }
3349
3350 while (1)
3351 {
3352 t = peek();
3353 if (t && t->type == tok_operator && t->content == "*" && parenthesized)
3354 {
3355 swallow();
3356 indexes.push_back(NULL);
3357 }
3358 else
3359 {
3360 expression* op1 = parse_comparison_or_regex_query ();
3361 indexes.push_back (op1);
3362 }
3363
3364 if (parenthesized)
3365 {
3366 const token* t = peek ();
3367 if (t && t->type == tok_operator && t->content == ",")
3368 {
3369 swallow ();
3370 continue;
3371 }
3372 else if (t && t->type == tok_operator && t->content == "]")
3373 {
3374 swallow ();
3375 break;
3376 }
3377 else
3378 throw PARSE_ERROR (_("expected ',' or ']'"));
3379 }
3380 else
3381 break; // expecting only one expression
3382 }
3383
3384 t = peek ();
3385 if (t && t->type == tok_keyword && t->content == "in")
3386 {
3387 array_in *e = new array_in;
3388 e->tok = t;
3389 next ();
3390
3391 arrayindex* a = new arrayindex;
3392 a->indexes = indexes;
3393 a->base = parse_indexable();
3394 a->tok = a->base->tok;
3395 e->operand = a;
3396 return e;
3397 }
3398 else if (indexes.size() == 1) // no "in" - need one expression only
3399 return indexes[0];
3400 else
3401 throw PARSE_ERROR (_("unexpected comma-separated expression list"));
3402 }
3403
3404
3405 expression*
3406 parser::parse_comparison_or_regex_query ()
3407 {
3408 expression* op1 = parse_shift ();
3409
3410 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
3411 const token *t = peek();
3412 if (t && t->type == tok_operator
3413 && (t->content == "=~" ||
3414 t->content == "!~"))
3415 {
3416 regex_query* r = new regex_query;
3417 r->left = op1;
3418 r->op = t->content;
3419 r->tok = t;
3420 next ();
3421 r->right = parse_literal_string();
3422 op1 = r;
3423 t = peek ();
3424 }
3425 else while (t && t->type == tok_operator
3426 && (t->content == ">" ||
3427 t->content == "<" ||
3428 t->content == "==" ||
3429 t->content == "!=" ||
3430 t->content == "<=" ||
3431 t->content == ">="))
3432 {
3433 comparison* e = new comparison;
3434 e->left = op1;
3435 e->op = t->content;
3436 e->tok = t;
3437 next ();
3438 e->right = parse_shift ();
3439 op1 = e;
3440 t = peek ();
3441 }
3442
3443 return op1;
3444 }
3445
3446
3447 expression*
3448 parser::parse_shift ()
3449 {
3450 expression* op1 = parse_concatenation ();
3451
3452 const token* t = peek ();
3453 while (t && t->type == tok_operator &&
3454 (t->content == "<<" || t->content == ">>"))
3455 {
3456 binary_expression* e = new binary_expression;
3457 e->left = op1;
3458 e->op = t->content;
3459 e->tok = t;
3460 next ();
3461 e->right = parse_concatenation ();
3462 op1 = e;
3463 t = peek ();
3464 }
3465
3466 return op1;
3467 }
3468
3469
3470 expression*
3471 parser::parse_concatenation ()
3472 {
3473 expression* op1 = parse_additive ();
3474
3475 const token* t = peek ();
3476 // XXX: the actual awk string-concatenation operator is *whitespace*.
3477 // I don't know how to easily to model that here.
3478 while (t && t->type == tok_operator && t->content == ".")
3479 {
3480 concatenation* e = new concatenation;
3481 e->left = op1;
3482 e->op = t->content;
3483 e->tok = t;
3484 next ();
3485 e->right = parse_additive ();
3486 op1 = e;
3487 t = peek ();
3488 }
3489
3490 return op1;
3491 }
3492
3493
3494 expression*
3495 parser::parse_additive ()
3496 {
3497 expression* op1 = parse_multiplicative ();
3498
3499 const token* t = peek ();
3500 while (t && t->type == tok_operator
3501 && (t->content == "+" || t->content == "-"))
3502 {
3503 binary_expression* e = new binary_expression;
3504 e->op = t->content;
3505 e->left = op1;
3506 e->tok = t;
3507 next ();
3508 e->right = parse_multiplicative ();
3509 op1 = e;
3510 t = peek ();
3511 }
3512
3513 return op1;
3514 }
3515
3516
3517 expression*
3518 parser::parse_multiplicative ()
3519 {
3520 expression* op1 = parse_unary ();
3521
3522 const token* t = peek ();
3523 while (t && t->type == tok_operator
3524 && (t->content == "*" || t->content == "/" || t->content == "%"))
3525 {
3526 binary_expression* e = new binary_expression;
3527 e->op = t->content;
3528 e->left = op1;
3529 e->tok = t;
3530 next ();
3531 e->right = parse_unary ();
3532 op1 = e;
3533 t = peek ();
3534 }
3535
3536 return op1;
3537 }
3538
3539
3540 expression*
3541 parser::parse_unary ()
3542 {
3543 const token* t = peek ();
3544 if (t && t->type == tok_operator
3545 && (t->content == "+" ||
3546 t->content == "-" ||
3547 t->content == "!" ||
3548 t->content == "~" ||
3549 false))
3550 {
3551 unary_expression* e = new unary_expression;
3552 e->op = t->content;
3553 e->tok = t;
3554 next ();
3555 e->operand = parse_unary ();
3556 return e;
3557 }
3558 else
3559 return parse_crement ();
3560 }
3561
3562
3563 expression*
3564 parser::parse_crement () // as in "increment" / "decrement"
3565 {
3566 // NB: Ideally, we'd parse only a symbol as an operand to the
3567 // *crement operators, instead of a general expression value. We'd
3568 // need more complex lookahead code to tell apart the postfix cases.
3569 // So we just punt, and leave it to pass-3 to signal errors on
3570 // cases like "4++".
3571
3572 const token* t = peek ();
3573 if (t && t->type == tok_operator
3574 && (t->content == "++" || t->content == "--"))
3575 {
3576 pre_crement* e = new pre_crement;
3577 e->op = t->content;
3578 e->tok = t;
3579 next ();
3580 e->operand = parse_dwarf_value ();
3581 return e;
3582 }
3583
3584 // post-crement or non-crement
3585 expression *op1 = parse_dwarf_value ();
3586
3587 t = peek ();
3588 if (t && t->type == tok_operator
3589 && (t->content == "++" || t->content == "--"))
3590 {
3591 post_crement* e = new post_crement;
3592 e->op = t->content;
3593 e->tok = t;
3594 next ();
3595 e->operand = op1;
3596 return e;
3597 }
3598 else
3599 return op1;
3600 }
3601
3602
3603 expression*
3604 parser::parse_dwarf_value ()
3605 {
3606 expression* expr = NULL;
3607 target_symbol* tsym = NULL;
3608
3609 // With '&' we'll definitely be making a target symbol of some sort
3610 const token* addrtok = peek_op ("&") ? next () : NULL;
3611 bool addressof = (addrtok != NULL);
3612
3613 // First try target_symbol types: $var, @cast, and @var.
3614 const token* t = peek ();
3615 if (t && t->type == tok_identifier && t->content[0] == '$')
3616 expr = tsym = parse_target_symbol ();
3617 else if (tok_is (t, tok_operator, "@cast"))
3618 expr = tsym = parse_cast_op ();
3619 else if (tok_is (t, tok_operator, "@var"))
3620 expr = tsym = parse_atvar_op ();
3621 else if (addressof && !input.has_version("2.6"))
3622 // '&' on old version only allowed specific target_symbol types
3623 throw PARSE_ERROR (_("expected @cast, @var or $var"));
3624 else
3625 // Otherwise just get a plain value of any sort.
3626 expr = parse_value ();
3627
3628 // If we had '&' or see any target suffixes, that forces a target_symbol.
3629 // For compatibility, we only do this starting with 2.6.
3630 if (!tsym && (addressof || peek_target_symbol_components ())
3631 && input.has_version("2.6"))
3632 {
3633 autocast_op *cop = new autocast_op;
3634 cop->tok = addrtok ?: peek ();
3635 cop->operand = expr;
3636 expr = tsym = cop;
3637 }
3638
3639 if (tsym)
3640 {
3641 // Parse the rest of any kind of target symbol
3642 tsym->addressof = addressof;
3643 parse_target_symbol_components (tsym);
3644 }
3645
3646 return expr;
3647 }
3648
3649
3650 expression*
3651 parser::parse_value ()
3652 {
3653 const token* t = peek ();
3654 if (! t)
3655 throw PARSE_ERROR (_("expected value"));
3656
3657 if (t->type == tok_embedded)
3658 {
3659 if (! privileged)
3660 throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
3661
3662 embedded_expr *e = new embedded_expr;
3663 e->tok = t;
3664 e->code = t->content;
3665 next ();
3666 return e;
3667 }
3668
3669 if (t->type == tok_operator && t->content == "(")
3670 {
3671 swallow ();
3672 expression* e = parse_expression ();
3673 t = next ();
3674 if (! (t->type == tok_operator && t->content == ")"))
3675 throw PARSE_ERROR (_("expected ')'"));
3676 swallow ();
3677 return e;
3678 }
3679 else if (t->type == tok_identifier
3680 || (t->type == tok_operator && t->content[0] == '@'))
3681 return parse_symbol ();
3682 else
3683 return parse_literal ();
3684 }
3685
3686
3687 const token *
3688 parser::parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name)
3689 {
3690 hop = NULL;
3691 const token* t = expect_ident_or_atword (name);
3692 if (name == "@hist_linear" || name == "@hist_log")
3693 {
3694 hop = new hist_op;
3695 if (name == "@hist_linear")
3696 hop->htype = hist_linear;
3697 else if (name == "@hist_log")
3698 hop->htype = hist_log;
3699 hop->tok = t;
3700 expect_op("(");
3701 hop->stat = parse_expression ();
3702 int64_t tnum;
3703 if (hop->htype == hist_linear)
3704 {
3705 for (size_t i = 0; i < 3; ++i)
3706 {
3707 expect_op (",");
3708 expect_number (tnum);
3709 hop->params.push_back (tnum);
3710 }
3711 }
3712 expect_op(")");
3713 }
3714 return t;
3715 }
3716
3717
3718 indexable*
3719 parser::parse_indexable ()
3720 {
3721 hist_op *hop = NULL;
3722 interned_string name;
3723 const token *tok = parse_hist_op_or_bare_name(hop, name);
3724 if (hop)
3725 return hop;
3726 else
3727 {
3728 symbol* sym = new symbol;
3729 sym->name = name;
3730 sym->tok = tok;
3731 return sym;
3732 }
3733 }
3734
3735
3736 // var, indexable[index], func(parms), printf("...", ...),
3737 // @defined, @entry, @stat_op(stat)
3738 expression* parser::parse_symbol ()
3739 {
3740 hist_op *hop = NULL;
3741 symbol *sym = NULL;
3742 interned_string name;
3743 const token *t = parse_hist_op_or_bare_name(hop, name);
3744
3745 if (!hop)
3746 {
3747 // If we didn't get a hist_op, then we did get an identifier. We can
3748 // now scrutinize this identifier for the various magic forms of identifier
3749 // (printf, @stat_op...)
3750
3751 // NB: PR11343: @defined() is not incompatible with earlier versions
3752 // of stap, so no need to check session.compatible for 1.2
3753 if (name == "@defined")
3754 return parse_defined_op (t);
3755
3756 if (name == "@entry")
3757 return parse_entry_op (t);
3758
3759 if (name == "@perf")
3760 return parse_perf_op (t);
3761
3762 if (name.size() > 0 && name[0] == '@')
3763 {
3764 stat_op *sop = new stat_op;
3765 if (name == "@avg")
3766 sop->ctype = sc_average;
3767 else if (name == "@count")
3768 sop->ctype = sc_count;
3769 else if (name == "@sum")
3770 sop->ctype = sc_sum;
3771 else if (name == "@min")
3772 sop->ctype = sc_min;
3773 else if (name == "@max")
3774 sop->ctype = sc_max;
3775 else
3776 throw PARSE_ERROR(_F("unknown operator %s",
3777 name.to_string().c_str()));
3778 expect_op("(");
3779 sop->tok = t;
3780 sop->stat = parse_expression ();
3781 expect_op(")");
3782 return sop;
3783 }
3784
3785 else if (print_format *fmt = print_format::create(t))
3786 {
3787 expect_op("(");
3788 if ((name == "print" || name == "println" ||
3789 name == "sprint" || name == "sprintln") &&
3790 (peek_op("@hist_linear") || peek_op("@hist_log")))
3791 {
3792 // We have a special case where we recognize
3793 // print(@hist_foo(bar)) as a magic print-the-histogram
3794 // construct. This is sort of gross but it avoids
3795 // promoting histogram references to typeful
3796 // expressions.
3797
3798 hop = NULL;
3799 t = parse_hist_op_or_bare_name(hop, name);
3800 assert(hop);
3801
3802 // It is, sadly, possible that even while parsing a
3803 // hist_op, we *mis-guessed* and the user wishes to
3804 // print(@hist_op(foo)[bucket]), a scalar. In that case
3805 // we must parse the arrayindex and print an expression.
3806 //
3807 // XXX: This still fails if the arrayindex is part of a
3808 // larger expression. To really handle everything, we'd
3809 // need to push back all the hist tokens start over.
3810
3811 if (!peek_op ("["))
3812 fmt->hist = hop;
3813 else
3814 {
3815 // This is simplified version of the
3816 // multi-array-index parser below, because we can
3817 // only ever have one index on a histogram anyways.
3818 expect_op("[");
3819 struct arrayindex* ai = new arrayindex;
3820 ai->tok = t;
3821 ai->base = hop;
3822 ai->indexes.push_back (parse_expression ());
3823 expect_op("]");
3824 fmt->args.push_back(ai);
3825
3826 // Consume any subsequent arguments.
3827 while (!peek_op (")"))
3828 {
3829 expect_op(",");
3830 expression *e = parse_expression ();
3831 fmt->args.push_back(e);
3832 }
3833 }
3834 }
3835 else
3836 {
3837 int min_args = 0;
3838 bool consumed_arg = false;
3839 if (fmt->print_with_format)
3840 {
3841 // Consume and convert a format string. Agreement between the
3842 // format string and the arguments is postponed to the
3843 // typechecking phase.
3844 literal_string* ls = parse_literal_string();
3845 fmt->raw_components = ls->value;
3846 delete ls;
3847 fmt->components = print_format::string_to_components (fmt->raw_components);
3848 consumed_arg = true;
3849 }
3850 else if (fmt->print_with_delim)
3851 {
3852 // Consume a delimiter to separate arguments.
3853 literal_string* ls = parse_literal_string();
3854 fmt->delimiter = ls->value;
3855 delete ls;
3856 consumed_arg = true;
3857 min_args = 2; // so that the delim is used at least once
3858 }
3859 else if (!fmt->print_with_newline)
3860 {
3861 // If we are not printing with a format string, nor with a
3862 // delim, nor with a newline, then it's either print() or
3863 // sprint(), both of which require at least one argument (of
3864 // any type).
3865 min_args = 1;
3866 }
3867
3868 // Consume any subsequent arguments.
3869 while (min_args || !peek_op (")"))
3870 {
3871 if (consumed_arg)
3872 expect_op(",");
3873 expression *e = parse_expression ();
3874 fmt->args.push_back(e);
3875 consumed_arg = true;
3876 if (min_args)
3877 --min_args;
3878 }
3879 }
3880 expect_op(")");
3881 return fmt;
3882 }
3883
3884 else if (peek_op ("(")) // function call
3885 {
3886 swallow ();
3887 struct functioncall* f = new functioncall;
3888 f->tok = t;
3889 f->function = name;
3890 // Allow empty actual parameter list
3891 if (peek_op (")"))
3892 {
3893 swallow ();
3894 return f;
3895 }
3896 while (1)
3897 {
3898 f->args.push_back (parse_expression ());
3899 if (peek_op (")"))
3900 {
3901 swallow ();
3902 break;
3903 }
3904 else if (peek_op (","))
3905 {
3906 swallow ();
3907 continue;
3908 }
3909 else
3910 throw PARSE_ERROR (_("expected ',' or ')'"));
3911 }
3912 return f;
3913 }
3914
3915 else
3916 {
3917 sym = new symbol;
3918 sym->name = name;
3919 sym->tok = t;
3920 }
3921 }
3922
3923 // By now, either we had a hist_op in the first place, or else
3924 // we had a plain word and it was converted to a symbol.
3925
3926 assert (!hop != !sym); // logical XOR
3927
3928 // All that remains is to check for array indexing
3929
3930 if (peek_op ("[")) // array
3931 {
3932 swallow ();
3933 struct arrayindex* ai = new arrayindex;
3934 ai->tok = t;
3935
3936 if (hop)
3937 ai->base = hop;
3938 else
3939 ai->base = sym;
3940
3941 while (1)
3942 {
3943 if (peek_op("*"))
3944 {
3945 swallow();
3946 ai->indexes.push_back (NULL);
3947 }
3948 else
3949 ai->indexes.push_back (parse_expression ());
3950 if (peek_op ("]"))
3951 {
3952 swallow ();
3953 break;
3954 }
3955 else if (peek_op (","))
3956 {
3957 swallow ();
3958 continue;
3959 }
3960 else
3961 throw PARSE_ERROR (_("expected ',' or ']'"));
3962 }
3963
3964 return ai;
3965 }
3966
3967 // If we got to here, we *should* have a symbol; if we have
3968 // a hist_op on its own, it doesn't count as an expression,
3969 // so we throw a parse error.
3970
3971 if (hop)
3972 throw PARSE_ERROR(_("base histogram operator where expression expected"), t);
3973
3974 return sym;
3975 }
3976
3977 // Parse a $var.
3978 target_symbol* parser::parse_target_symbol ()
3979 {
3980 const token* t = next ();
3981 if (t->type == tok_identifier && t->content[0]=='$')
3982 {
3983 // target_symbol time
3984 target_symbol *tsym = new target_symbol;
3985 tsym->tok = t;
3986 tsym->name = t->content;
3987 return tsym;
3988 }
3989
3990 throw PARSE_ERROR (_("expected $var"));
3991 }
3992
3993
3994 // Parse a @cast.
3995 cast_op* parser::parse_cast_op ()
3996 {
3997 const token* t = next ();
3998 if (t->type == tok_operator && t->content == "@cast")
3999 {
4000 cast_op *cop = new cast_op;
4001 cop->tok = t;
4002 cop->name = t->content;
4003 expect_op("(");
4004 cop->operand = parse_expression ();
4005 expect_op(",");
4006 expect_unknown(tok_string, cop->type_name);
4007 if (cop->type_name.empty())
4008 throw PARSE_ERROR (_("expected non-empty string"));
4009 if (peek_op (","))
4010 {
4011 swallow ();
4012 expect_unknown(tok_string, cop->module);
4013 }
4014 expect_op(")");
4015 return cop;
4016 }
4017
4018 throw PARSE_ERROR (_("expected @cast"));
4019 }
4020
4021
4022 // Parse a @var.
4023 atvar_op* parser::parse_atvar_op ()
4024 {
4025 const token* t = next ();
4026 if (t->type == tok_operator && t->content == "@var")
4027 {
4028 atvar_op *aop = new atvar_op;
4029 aop->tok = t;
4030 aop->name = t->content;
4031 expect_op("(");
4032 expect_unknown(tok_string, aop->target_name);
4033 size_t found_at = aop->target_name.find("@");
4034 if (found_at != string::npos)
4035 aop->cu_name = aop->target_name.substr(found_at + 1);
4036 else
4037 aop->cu_name = "";
4038 if (peek_op (","))
4039 {
4040 swallow ();
4041 expect_unknown (tok_string, aop->module);
4042 }
4043 else
4044 aop->module = "";
4045 expect_op(")");
4046 return aop;
4047 }
4048
4049 throw PARSE_ERROR (_("expected @var"));
4050 }
4051
4052
4053 // Parse a @defined(). Given head token has already been consumed.
4054 expression* parser::parse_defined_op (const token* t)
4055 {
4056 defined_op* dop = new defined_op;
4057 dop->tok = t;
4058 expect_op("(");
4059 dop->operand = parse_expression ();
4060 expect_op(")");
4061 return dop;
4062 }
4063
4064
4065 // Parse a @entry(). Given head token has already been consumed.
4066 expression* parser::parse_entry_op (const token* t)
4067 {
4068 entry_op* eop = new entry_op;
4069 eop->tok = t;
4070 expect_op("(");
4071 eop->operand = parse_expression ();
4072 expect_op(")");
4073 return eop;
4074 }
4075
4076
4077 // Parse a @perf(). Given head token has already been consumed.
4078 expression* parser::parse_perf_op (const token* t)
4079 {
4080 perf_op* pop = new perf_op;
4081 pop->tok = t;
4082 expect_op("(");
4083 pop->operand = parse_literal_string ();
4084 if (pop->operand->value == "")
4085 throw PARSE_ERROR (_("expected non-empty string"));
4086 expect_op(")");
4087 return pop;
4088 }
4089
4090
4091 bool
4092 parser::peek_target_symbol_components ()
4093 {
4094 const token * t = peek ();
4095 return t &&
4096 ((t->type == tok_operator && (t->content == "->" || t->content == "["))
4097 || (t->type == tok_identifier &&
4098 t->content.find_first_not_of('$') == string::npos));
4099 }
4100
4101 void
4102 parser::parse_target_symbol_components (target_symbol* e)
4103 {
4104 bool pprint = false;
4105
4106 // check for pretty-print in the form $foo$
4107 string base = e->name;
4108 size_t pprint_pos = base.find_last_not_of('$');
4109 if (0 < pprint_pos && pprint_pos < base.length() - 1)
4110 {
4111 string pprint_val = base.substr(pprint_pos + 1);
4112 base.erase(pprint_pos + 1);
4113 e->name = base;
4114 e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
4115 pprint = true;
4116 }
4117
4118 while (!pprint)
4119 {
4120 if (peek_op ("->"))
4121 {
4122 const token* t = next();
4123 interned_string member;
4124 expect_ident_or_keyword (member);
4125
4126 // check for pretty-print in the form $foo->$ or $foo->bar$
4127 pprint_pos = member.find_last_not_of('$');
4128 interned_string pprint_val;
4129 if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
4130 {
4131 pprint_val = member.substr(pprint_pos + 1);
4132 member = member.substr(0, pprint_pos + 1);
4133 pprint = true;
4134 }
4135
4136 if (!member.empty())
4137 e->components.push_back (target_symbol::component(t, member));
4138 if (pprint)
4139 e->components.push_back (target_symbol::component(t, pprint_val, true));
4140 }
4141 else if (peek_op ("["))
4142 {
4143 const token* t = next();
4144 expression* index = parse_expression();
4145 literal_number* ln = dynamic_cast<literal_number*>(index);
4146 if (ln)
4147 e->components.push_back (target_symbol::component(t, ln->value));
4148 else
4149 e->components.push_back (target_symbol::component(t, index));
4150 expect_op ("]");
4151 }
4152 else
4153 break;
4154 }
4155
4156 if (!pprint)
4157 {
4158 // check for pretty-print in the form $foo $
4159 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
4160 const token* t = peek();
4161 if (t != NULL && t->type == tok_identifier &&
4162 t->content.find_first_not_of('$') == string::npos)
4163 {
4164 t = next();
4165 e->components.push_back (target_symbol::component(t, t->content, true));
4166 pprint = true;
4167 }
4168 }
4169
4170 if (pprint && (peek_op ("->") || peek_op("[")))
4171 throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
4172 }
4173
4174 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.227152 seconds and 5 git commands to generate.