]> sourceware.org Git - systemtap.git/blob - parse.cxx
update copyrights
[systemtap.git] / parse.cxx
1 // recursive descent parser for systemtap scripts
2 // Copyright (C) 2005-2014 Red Hat Inc.
3 // Copyright (C) 2006 Intel Corporation.
4 // Copyright (C) 2007 Bull S.A.S
5 // Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
6 //
7 // This file is part of systemtap, and is free software. You can
8 // redistribute it and/or modify it under the terms of the GNU General
9 // Public License (GPL); either version 2, or (at your option) any
10 // later version.
11
12 #include "config.h"
13 #include "staptree.h"
14 #include "parse.h"
15 #include "session.h"
16 #include "util.h"
17
18 #include <iostream>
19
20 #include <fstream>
21 #include <cctype>
22 #include <cstdlib>
23 #include <cassert>
24 #include <cerrno>
25 #include <climits>
26 #include <sstream>
27 #include <cstring>
28 #include <cctype>
29 #include <iterator>
30
31 extern "C" {
32 #include <fnmatch.h>
33 }
34
35 using namespace std;
36
37
38 class lexer
39 {
40 public:
41 bool ate_comment; // current token follows a comment
42 bool ate_whitespace; // the most recent token followed whitespace
43 bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
44
45 token* scan ();
46 lexer (istream&, const string&, systemtap_session&);
47 void set_current_file (stapfile* f);
48 void set_current_token_chain (const token* tok);
49
50 static set<string> keywords;
51 static set<string> atwords;
52 private:
53 inline int input_get ();
54 inline int input_peek (unsigned n=0);
55 void input_put (const string&, const token*);
56 string input_name;
57 string input_contents;
58 const char *input_pointer; // index into input_contents
59 const char *input_end;
60 unsigned cursor_suspend_count;
61 unsigned cursor_suspend_line;
62 unsigned cursor_suspend_column;
63 unsigned cursor_line;
64 unsigned cursor_column;
65 systemtap_session& session;
66 stapfile* current_file;
67 const token* current_token_chain;
68 };
69
70
71 class parser
72 {
73 public:
74 parser (systemtap_session& s, const string& n, istream& i, bool p);
75 ~parser ();
76
77 stapfile* parse (bool errs_as_warnings);
78 probe* parse_synthetic_probe (const token* chain, bool errs_as_warnings);
79 stapfile* parse_library_macros (bool errs_as_warnings);
80
81 private:
82 typedef enum {
83 PP_NONE,
84 PP_KEEP_THEN,
85 PP_SKIP_THEN,
86 PP_KEEP_ELSE,
87 PP_SKIP_ELSE,
88 } pp_state_t;
89
90 struct pp1_activation;
91
92 struct pp_macrodecl : public macrodecl {
93 pp1_activation* parent_act; // used for param bindings
94 virtual bool is_closure() { return parent_act != 0; }
95 pp_macrodecl () : macrodecl(), parent_act(0) { }
96 };
97
98 systemtap_session& session;
99 string input_name;
100 lexer input;
101 bool privileged;
102 parse_context context;
103
104 // preprocessing subordinate, first pass (macros)
105 struct pp1_activation {
106 const token* tok;
107 unsigned cursor; // position within macro body
108 map<string, pp_macrodecl*> params;
109
110 macrodecl* curr_macro;
111
112 pp1_activation (const token* tok, macrodecl* curr_macro)
113 : tok(tok), cursor(0), curr_macro(curr_macro) { }
114 ~pp1_activation ();
115 };
116
117 map<string, macrodecl*> pp1_namespace;
118 vector<pp1_activation*> pp1_state;
119 const token* next_pp1 ();
120 const token* scan_pp1 ();
121 const token* slurp_pp1_param (vector<const token*>& param);
122 const token* slurp_pp1_body (vector<const token*>& body);
123
124 // preprocessing subordinate, final pass (conditionals)
125 vector<pair<const token*, pp_state_t> > pp_state;
126 const token* scan_pp ();
127 const token* skip_pp ();
128
129 // scanning state
130 const token* next ();
131 const token* peek ();
132
133 // Advance past and throw away current token after peek () or next ().
134 void swallow ();
135
136 const token* systemtap_v_seen;
137 const token* last_t; // the last value returned by peek() or next()
138 const token* next_t; // lookahead token
139
140 // expectations, these swallow the token
141 void expect_known (token_type tt, string const & expected);
142 void expect_unknown (token_type tt, string & target);
143 void expect_unknown2 (token_type tt1, token_type tt2, string & target);
144
145 // convenience forms, these also swallow the token
146 void expect_op (string const & expected);
147 void expect_kw (string const & expected);
148 void expect_number (int64_t & expected);
149 void expect_ident_or_keyword (string & target);
150
151 // convenience forms, which return true or false, these don't swallow token
152 bool peek_op (string const & op);
153 bool peek_kw (string const & kw);
154
155 // convenience forms, which return the token
156 const token* expect_kw_token (string const & expected);
157 const token* expect_ident_or_atword (string & target);
158
159 void print_error (const parse_error& pe, bool errs_as_warnings = false);
160 unsigned num_errors;
161
162 private: // nonterminals
163 void parse_probe (vector<probe*>&, vector<probe_alias*>&);
164 void parse_global (vector<vardecl*>&, vector<probe*>&);
165 void parse_functiondecl (vector<functiondecl*>&);
166 embeddedcode* parse_embeddedcode ();
167 probe_point* parse_probe_point ();
168 literal_string* consume_string_literals (const token*);
169 literal_string* parse_literal_string ();
170 literal* parse_literal ();
171 block* parse_stmt_block ();
172 try_block* parse_try_block ();
173 statement* parse_statement ();
174 if_statement* parse_if_statement ();
175 for_loop* parse_for_loop ();
176 for_loop* parse_while_loop ();
177 foreach_loop* parse_foreach_loop ();
178 expr_statement* parse_expr_statement ();
179 return_statement* parse_return_statement ();
180 delete_statement* parse_delete_statement ();
181 next_statement* parse_next_statement ();
182 break_statement* parse_break_statement ();
183 continue_statement* parse_continue_statement ();
184 indexable* parse_indexable ();
185 const token *parse_hist_op_or_bare_name (hist_op *&hop, string &name);
186 target_symbol *parse_target_symbol (const token* t);
187 expression* parse_entry_op (const token* t);
188 expression* parse_defined_op (const token* t);
189 expression* parse_perf_op (const token* t);
190 expression* parse_expression ();
191 expression* parse_assignment ();
192 expression* parse_ternary ();
193 expression* parse_logical_or ();
194 expression* parse_logical_and ();
195 expression* parse_boolean_or ();
196 expression* parse_boolean_xor ();
197 expression* parse_boolean_and ();
198 expression* parse_array_in ();
199 expression* parse_comparison_or_regex_query ();
200 expression* parse_shift ();
201 expression* parse_concatenation ();
202 expression* parse_additive ();
203 expression* parse_multiplicative ();
204 expression* parse_unary ();
205 expression* parse_crement ();
206 expression* parse_value ();
207 expression* parse_symbol ();
208
209 void parse_target_symbol_components (target_symbol* e);
210 };
211
212
213 // ------------------------------------------------------------------------
214
215 stapfile*
216 parse (systemtap_session& s, istream& i, bool pr, bool errs_as_warnings)
217 {
218 parser p (s, "<input>", i, pr);
219 return p.parse (errs_as_warnings);
220 }
221
222
223 stapfile*
224 parse (systemtap_session& s, const string& name, bool pr, bool errs_as_warnings)
225 {
226 ifstream i(name.c_str(), ios::in);
227 if (i.fail())
228 {
229 cerr << (file_exists(name)
230 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
231 : _F("Input file '%s' is missing.", name.c_str()))
232 << endl;
233 return 0;
234 }
235
236 parser p (s, name, i, pr);
237 return p.parse (errs_as_warnings);
238 }
239
240 stapfile*
241 parse_library_macros (systemtap_session& s, const string& name, bool errs_as_warnings)
242 {
243 ifstream i(name.c_str(), ios::in);
244 if (i.fail())
245 {
246 cerr << (file_exists(name)
247 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
248 : _F("Input file '%s' is missing.", name.c_str()))
249 << endl;
250 return 0;
251 }
252
253 parser p (s, name, i, false); // TODOXX pr is ...? should path be full??
254 return p.parse_library_macros (errs_as_warnings);
255 }
256
257 probe*
258 parse_synthetic_probe (systemtap_session &s, std::istream& i, const token* tok)
259 {
260 parser p (s, "<synthetic>", i, false);
261 return p.parse_synthetic_probe (tok, false);
262 }
263
264 // ------------------------------------------------------------------------
265
266
267 parser::parser (systemtap_session& s, const string &n, istream& i, bool p):
268 session (s), input_name (n), input (i, input_name, s), privileged (p),
269 context(con_unknown), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
270 {
271 }
272
273 parser::~parser()
274 {
275 }
276
277 static string
278 tt2str(token_type tt)
279 {
280 switch (tt)
281 {
282 case tok_junk: return "junk";
283 case tok_identifier: return "identifier";
284 case tok_operator: return "operator";
285 case tok_string: return "string";
286 case tok_number: return "number";
287 case tok_embedded: return "embedded-code";
288 case tok_keyword: return "keyword";
289 }
290 return "unknown token";
291 }
292
293 ostream&
294 operator << (ostream& o, const source_loc& loc)
295 {
296 o << loc.file->name << ":"
297 << loc.line << ":"
298 << loc.column;
299
300 return o;
301 }
302
303 ostream&
304 operator << (ostream& o, const token& t)
305 {
306 o << tt2str(t.type);
307
308 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
309 {
310 o << " '";
311 for (unsigned i=0; i<t.content.length(); i++)
312 {
313 char c = t.content[i];
314 o << (isprint (c) ? c : '?');
315 }
316 o << "'";
317 }
318
319 o << " at "
320 << t.location;
321
322 return o;
323 }
324
325
326 void
327 parser::print_error (const parse_error &pe, bool errs_as_warnings)
328 {
329 const token *tok = pe.tok ? pe.tok : last_t;
330 session.print_error(pe, tok, input_name, errs_as_warnings);
331 num_errors ++;
332 }
333
334
335
336
337 template <typename OPERAND>
338 bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
339 {
340 if (op->type == tok_operator && op->content == "<=")
341 { return lhs <= rhs; }
342 else if (op->type == tok_operator && op->content == ">=")
343 { return lhs >= rhs; }
344 else if (op->type == tok_operator && op->content == "<")
345 { return lhs < rhs; }
346 else if (op->type == tok_operator && op->content == ">")
347 { return lhs > rhs; }
348 else if (op->type == tok_operator && op->content == "==")
349 { return lhs == rhs; }
350 else if (op->type == tok_operator && op->content == "!=")
351 { return lhs != rhs; }
352 else
353 throw PARSE_ERROR (_("expected comparison operator"), op);
354 }
355
356
357 // Here, we perform on-the-fly preprocessing in two passes.
358
359 // First pass - macro declaration and expansion.
360 //
361 // The basic form of a declaration is @define SIGNATURE %( BODY %)
362 // where SIGNATURE is of the form macro_name (a, b, c, ...)
363 // and BODY can obtain the parameter contents as @a, @b, @c, ....
364 // Note that parameterless macros can also be declared.
365 //
366 // Macro definitions may not be nested.
367 // A macro is available textually after it has been defined.
368 //
369 // The basic form of a macro invocation
370 // for a parameterless macro is @macro_name,
371 // for a macro with parameters is @macro_name(param_1, param_2, ...).
372 //
373 // NB: this means that a parameterless macro @foo called as @foo(a, b, c)
374 // leaves its 'parameters' alone, rather than consuming them to result
375 // in a "too many parameters error". This may be useful in the unusual
376 // case of wanting @foo to expand to the name of a function.
377 //
378 // Invocations of unknown macros are left unexpanded, to allow
379 // the continued use of constructs such as @cast, @var, etc.
380
381 macrodecl::~macrodecl ()
382 {
383 delete tok;
384 for (vector<const token*>::iterator it = body.begin();
385 it != body.end(); it++)
386 delete *it;
387 }
388
389 parser::pp1_activation::~pp1_activation ()
390 {
391 delete tok;
392 if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
393 for (map<string, pp_macrodecl*>::iterator it = params.begin();
394 it != params.end(); it++)
395 delete it->second;
396 }
397
398 // Grab a token from the current input source (main file or macro body):
399 const token*
400 parser::next_pp1 ()
401 {
402 if (pp1_state.empty())
403 return input.scan ();
404
405 // otherwise, we're inside a macro
406 pp1_activation* act = pp1_state.back();
407 unsigned& cursor = act->cursor;
408 if (cursor < act->curr_macro->body.size())
409 {
410 token* t = new token(*act->curr_macro->body[cursor]);
411 t->chain = new token(*act->tok); // mark chained token
412 cursor++;
413 return t;
414 }
415 else
416 return 0; // reached end of macro body
417 }
418
419 const token*
420 parser::scan_pp1 ()
421 {
422 while (true)
423 {
424 const token* t = next_pp1 ();
425 if (t == 0) // EOF or end of macro body
426 {
427 if (pp1_state.empty()) // actual EOF
428 return 0;
429
430 // Exit macro and loop around to look for the next token.
431 pp1_activation* act = pp1_state.back();
432 pp1_state.pop_back(); delete act;
433 continue;
434 }
435
436 // macro definition
437 if (t->type == tok_operator && t->content == "@define")
438 {
439 if (!pp1_state.empty())
440 throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t);
441 delete t;
442
443 // handle macro definition
444 // (1) consume macro signature
445 t = input.scan();
446 if (! (t && t->type == tok_identifier))
447 throw PARSE_ERROR (_("expected identifier"), t);
448 string name = t->content;
449
450 // check for redefinition of existing macro
451 if (pp1_namespace.find(name) != pp1_namespace.end())
452 {
453 parse_error er (ERR_SRC, _F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
454
455 // Also point to pp1_namespace[name]->tok, the site of
456 // the original definition:
457 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here",
458 name.c_str()), pp1_namespace[name]->tok);
459 throw er;
460 }
461
462 // XXX: the above restriction was mostly necessary due to
463 // wanting to leave open the possibility of
464 // statically-scoped semantics in the future.
465
466 // XXX: this cascades into further parse errors as the
467 // parser tries to parse the remaining definition... (e.g.
468 // it can't tell that the macro body isn't a conditional,
469 // that the uses of parameters aren't nonexistent
470 // macros.....)
471 if (name == "define")
472 throw PARSE_ERROR (_("attempt to redefine '@define'"), t);
473 if (input.atwords.count("@" + name))
474 session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
475
476 macrodecl* decl = (pp1_namespace[name] = new macrodecl);
477 decl->tok = t;
478
479 // determine if the macro takes parameters
480 bool saw_params = false;
481 t = input.scan();
482 if (t && t->type == tok_operator && t->content == "(")
483 {
484 saw_params = true;
485 do
486 {
487 delete t;
488
489 t = input.scan ();
490 if (! (t && t->type == tok_identifier))
491 throw PARSE_ERROR(_("expected identifier"), t);
492 decl->formal_args.push_back(t->content);
493 delete t;
494
495 t = input.scan ();
496 if (t && t->type == tok_operator && t->content == ",")
497 {
498 continue;
499 }
500 else if (t && t->type == tok_operator && t->content == ")")
501 {
502 delete t;
503 t = input.scan();
504 break;
505 }
506 else
507 {
508 throw PARSE_ERROR (_("expected ',' or ')'"), t);
509 }
510 }
511 while (true);
512 }
513
514 // (2) identify & consume macro body
515 if (! (t && t->type == tok_operator && t->content == "%("))
516 {
517 if (saw_params)
518 throw PARSE_ERROR (_("expected '%('"), t);
519 else
520 throw PARSE_ERROR (_("expected '%(' or '('"), t);
521 }
522 delete t;
523
524 t = slurp_pp1_body (decl->body);
525 if (!t)
526 throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl->tok);
527 delete t;
528
529 // Now loop around to look for a real token.
530 continue;
531 }
532
533 // (potential) macro invocation
534 if (t->type == tok_operator && t->content[0] == '@')
535 {
536 string name = t->content.substr(1); // strip initial '@'
537
538 // check if name refers to a real parameter or macro
539 macrodecl* decl;
540 pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
541 if (act && act->params.find(name) != act->params.end())
542 decl = act->params[name];
543 else if (!(act && act->curr_macro->context == ctx_library)
544 && pp1_namespace.find(name) != pp1_namespace.end())
545 decl = pp1_namespace[name];
546 else if (session.library_macros.find(name)
547 != session.library_macros.end())
548 decl = session.library_macros[name];
549 else // this is an ordinary @operator
550 return t;
551
552 // handle macro invocation, taking ownership of t
553 pp1_activation *new_act = new pp1_activation(t, decl);
554 unsigned num_params = decl->formal_args.size();
555
556 // (1a) restore parameter invocation closure
557 if (num_params == 0 && decl->is_closure())
558 {
559 // NB: decl->parent_act is always safe since the
560 // parameter decl (if any) comes from an activation
561 // record which deeper in the stack than new_act.
562
563 // decl is a macro parameter which must be evaluated in
564 // the context of the original point of invocation:
565 new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
566 goto expand;
567 }
568
569 // (1b) consume macro parameters (if any)
570 if (num_params == 0)
571 goto expand;
572
573 // for simplicity, we do not allow macro constructs here
574 // -- if we did, we'd have to recursively call scan_pp1()
575 t = next_pp1 ();
576 if (! (t && t->type == tok_operator && t->content == "("))
577 {
578 delete new_act;
579 throw PARSE_ERROR (_NF
580 ("expected '(' in invocation of macro '@%s'"
581 " taking %d parameter",
582 "expected '(' in invocation of macro '@%s'"
583 " taking %d parameters",
584 num_params, name.c_str(), num_params), t);
585 }
586
587 // XXX perhaps parse/count the full number of params,
588 // so we can say "expected x, found y params" on error?
589 for (unsigned i = 0; i < num_params; i++)
590 {
591 delete t;
592
593 // create parameter closure
594 string param_name = decl->formal_args[i];
595 pp_macrodecl* p = (new_act->params[param_name]
596 = new pp_macrodecl);
597 p->tok = new token(*new_act->tok);
598 p->parent_act = act;
599 // NB: *new_act->tok points to invocation, act is NULL at top level
600
601 t = slurp_pp1_param (p->body);
602
603 // check correct usage of ',' or ')'
604 if (t == 0) // hit unexpected EOF or end of macro
605 {
606 // XXX could we pop the stack and continue parsing
607 // the invocation, allowing macros to construct new
608 // invocations in piecemeal fashion??
609 const token* orig_t = new token(*new_act->tok);
610 delete new_act;
611 throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t);
612 }
613 if (t->type == tok_operator && t->content == ",")
614 {
615 if (i + 1 == num_params)
616 {
617 delete new_act;
618 throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
619 }
620 }
621 else if (t->type == tok_operator && t->content == ")")
622 {
623 if (i + 1 != num_params)
624 {
625 delete new_act;
626 throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
627 }
628 }
629 else
630 {
631 // XXX this is, incidentally, impossible
632 delete new_act;
633 throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t);
634 }
635 }
636
637 delete t;
638
639 // (2) set up macro expansion
640 expand:
641 pp1_state.push_back (new_act);
642
643 // Now loop around to look for a real token.
644 continue;
645 }
646
647 // Otherwise, we have an ordinary token.
648 return t;
649 }
650 }
651
652 // Consume a single macro invocation's parameters, heeding nested ( )
653 // brackets and stopping on an unbalanced ')' or an unbracketed ','
654 // (and returning the final separator token).
655 const token*
656 parser::slurp_pp1_param (vector<const token*>& param)
657 {
658 const token* t = 0;
659 unsigned nesting = 0;
660 do
661 {
662 t = next_pp1 ();
663
664 if (!t)
665 break;
666 if (t->type == tok_operator && t->content == "(")
667 ++nesting;
668 else if (nesting && t->type == tok_operator && t->content == ")")
669 --nesting;
670 else if (!nesting && t->type == tok_operator
671 && (t->content == ")" || t->content == ","))
672 break;
673 param.push_back(t);
674 }
675 while (true);
676 return t; // report ")" or "," or NULL
677 }
678
679
680 // Consume a macro declaration's body, heeding nested %( %) brackets.
681 const token*
682 parser::slurp_pp1_body (vector<const token*>& body)
683 {
684 const token* t = 0;
685 unsigned nesting = 0;
686 do
687 {
688 t = next_pp1 ();
689
690 if (!t)
691 break;
692 if (t->type == tok_operator && t->content == "%(")
693 ++nesting;
694 else if (nesting && t->type == tok_operator && t->content == "%)")
695 --nesting;
696 else if (!nesting && t->type == tok_operator && t->content == "%)")
697 break;
698 body.push_back(t);
699 }
700 while (true);
701 return t; // report final "%)" or NULL
702 }
703
704 // Used for parsing .stpm files.
705 stapfile*
706 parser::parse_library_macros (bool errs_as_warnings)
707 {
708 stapfile* f = new stapfile;
709 input.set_current_file (f);
710
711 try
712 {
713 const token* t = scan_pp1 ();
714
715 // Currently we only take objection to macro invocations if they
716 // produce a non-whitespace token after being expanded.
717
718 // XXX should we prevent macro invocations even if they expand to empty??
719
720 if (t != 0)
721 throw PARSE_ERROR (_F("library macro file '%s' contains non-@define construct", input_name.c_str()), t);
722
723 // We need to first check whether *any* of the macros are duplicates,
724 // then commit to including the entire file in the global namespace
725 // (or not). Yuck.
726 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
727 it != pp1_namespace.end(); it++)
728 {
729 string name = it->first;
730
731 if (session.library_macros.find(name) != session.library_macros.end())
732 {
733 parse_error er(ERR_SRC, _F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
734 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
735 print_error (er);
736
737 delete er.chain;
738 delete f;
739 return 0;
740 }
741 }
742
743 }
744 catch (const parse_error& pe)
745 {
746 print_error (pe, errs_as_warnings);
747 delete f;
748 return 0;
749 }
750
751 // If no errors, include the entire file. Note how this is outside
752 // of the try-catch block -- no errors possible.
753 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
754 it != pp1_namespace.end(); it++)
755 {
756 string name = it->first;
757
758 session.library_macros[name] = it->second;
759 session.library_macros[name]->context = ctx_library;
760 }
761
762 return f;
763 }
764
765 // Second pass - preprocessor conditional expansion.
766 //
767 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
768 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
769 // or: arch COMPARISON-OP "arch-string"
770 // or: systemtap_v COMPARISON-OP "version-string"
771 // or: systemtap_privilege COMPARISON-OP "privilege-string"
772 // or: CONFIG_foo COMPARISON-OP "config-string"
773 // or: CONFIG_foo COMPARISON-OP number
774 // or: CONFIG_foo COMPARISON-OP CONFIG_bar
775 // or: "string1" COMPARISON-OP "string2"
776 // or: number1 COMPARISON-OP number2
777 // The %: ELSE-TOKENS part is optional.
778 //
779 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
780 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
781 // e.g. %( CONFIG_foo %? "foo" %: "baz" %)
782 //
783 // Up to an entire %( ... %) expression is processed by a single call
784 // to this function. Tokens included by any nested conditions are
785 // enqueued in a private vector.
786
787 bool eval_pp_conditional (systemtap_session& s,
788 const token* l, const token* op, const token* r)
789 {
790 if (l->type == tok_identifier && (l->content == "kernel_v" ||
791 l->content == "kernel_vr" ||
792 l->content == "systemtap_v"))
793 {
794 if (! (r->type == tok_string))
795 throw PARSE_ERROR (_("expected string literal"), r);
796
797 string target_kernel_vr = s.kernel_release;
798 string target_kernel_v = s.kernel_base_release;
799 string target;
800
801 if (l->content == "kernel_v") target = target_kernel_v;
802 else if (l->content == "kernel_vr") target = target_kernel_vr;
803 else if (l->content == "systemtap_v") target = s.compatible;
804 else assert (0);
805
806 string query = r->content;
807 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
808
809 // collect acceptable strverscmp results.
810 int rvc_ok1, rvc_ok2;
811 bool wc_ok = false;
812 if (op->type == tok_operator && op->content == "<=")
813 { rvc_ok1 = -1; rvc_ok2 = 0; }
814 else if (op->type == tok_operator && op->content == ">=")
815 { rvc_ok1 = 1; rvc_ok2 = 0; }
816 else if (op->type == tok_operator && op->content == "<")
817 { rvc_ok1 = -1; rvc_ok2 = -1; }
818 else if (op->type == tok_operator && op->content == ">")
819 { rvc_ok1 = 1; rvc_ok2 = 1; }
820 else if (op->type == tok_operator && op->content == "==")
821 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
822 else if (op->type == tok_operator && op->content == "!=")
823 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
824 else
825 throw PARSE_ERROR (_("expected comparison operator"), op);
826
827 if ((!wc_ok) && rhs_wildcard)
828 throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op);
829
830 if (rhs_wildcard)
831 {
832 int rvc_result = fnmatch (query.c_str(), target.c_str(),
833 FNM_NOESCAPE); // spooky
834 bool badness = (rvc_result == 0) ^ (op->content == "==");
835 return !badness;
836 }
837 else
838 {
839 int rvc_result = strverscmp (target.c_str(), query.c_str());
840 // normalize rvc_result
841 if (rvc_result < 0) rvc_result = -1;
842 if (rvc_result > 0) rvc_result = 1;
843 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
844 }
845 }
846 else if (l->type == tok_identifier && l->content == "systemtap_privilege")
847 {
848 string target_privilege =
849 pr_contains(s.privilege, pr_stapdev) ? "stapdev"
850 : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
851 : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
852 : "none"; /* should be impossible -- s.privilege always one of above */
853 assert(target_privilege != "none");
854
855 if (! (r->type == tok_string))
856 throw PARSE_ERROR (_("expected string literal"), r);
857 string query_privilege = r->content;
858
859 bool nomatch = (target_privilege != query_privilege);
860
861 bool result;
862 if (op->type == tok_operator && op->content == "==")
863 result = !nomatch;
864 else if (op->type == tok_operator && op->content == "!=")
865 result = nomatch;
866 else
867 throw PARSE_ERROR (_("expected '==' or '!='"), op);
868 /* XXX perhaps allow <= >= and similar comparisons */
869
870 return result;
871 }
872 else if (l->type == tok_identifier && l->content == "guru_mode")
873 {
874 if (! (r->type == tok_number))
875 throw PARSE_ERROR (_("expected number"), r);
876 int64_t lhs = (int64_t) s.guru_mode;
877 int64_t rhs = lex_cast<int64_t>(r->content);
878 if (!((rhs == 0)||(rhs == 1)))
879 throw PARSE_ERROR (_("expected 0 or 1"), op);
880 if (!((op->type == tok_operator && op->content == "==") ||
881 (op->type == tok_operator && op->content == "!=")))
882 throw PARSE_ERROR (_("expected '==' or '!='"), op);
883
884 return eval_comparison (lhs, op, rhs);
885 }
886 else if (l->type == tok_identifier && l->content == "arch")
887 {
888 string target_architecture = s.architecture;
889 if (! (r->type == tok_string))
890 throw PARSE_ERROR (_("expected string literal"), r);
891 string query_architecture = r->content;
892
893 int nomatch = fnmatch (query_architecture.c_str(),
894 target_architecture.c_str(),
895 FNM_NOESCAPE); // still spooky
896
897 bool result;
898 if (op->type == tok_operator && op->content == "==")
899 result = !nomatch;
900 else if (op->type == tok_operator && op->content == "!=")
901 result = nomatch;
902 else
903 throw PARSE_ERROR (_("expected '==' or '!='"), op);
904
905 return result;
906 }
907 else if (l->type == tok_identifier && l->content == "runtime")
908 {
909 if (! (r->type == tok_string))
910 throw PARSE_ERROR (_("expected string literal"), r);
911
912 string query_runtime = r->content;
913 string target_runtime;
914
915 target_runtime = (s.runtime_mode == systemtap_session::dyninst_runtime
916 ? "dyninst" : "kernel");
917 int nomatch = fnmatch (query_runtime.c_str(),
918 target_runtime.c_str(),
919 FNM_NOESCAPE); // still spooky
920
921 bool result;
922 if (op->type == tok_operator && op->content == "==")
923 result = !nomatch;
924 else if (op->type == tok_operator && op->content == "!=")
925 result = nomatch;
926 else
927 throw PARSE_ERROR (_("expected '==' or '!='"), op);
928
929 return result;
930 }
931 else if (l->type == tok_identifier && startswith(l->content, "CONFIG_"))
932 {
933 if (r->type == tok_string)
934 {
935 string lhs = s.kernel_config[l->content]; // may be empty
936 string rhs = r->content;
937
938 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
939
940 bool result;
941 if (op->type == tok_operator && op->content == "==")
942 result = !nomatch;
943 else if (op->type == tok_operator && op->content == "!=")
944 result = nomatch;
945 else
946 throw PARSE_ERROR (_("expected '==' or '!='"), op);
947
948 return result;
949 }
950 else if (r->type == tok_number)
951 {
952 const char* startp = s.kernel_config[l->content].c_str ();
953 char* endp = (char*) startp;
954 errno = 0;
955 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
956 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
957 throw PARSE_ERROR ("Config option value not a number", l);
958
959 int64_t rhs = lex_cast<int64_t>(r->content);
960 return eval_comparison (lhs, op, rhs);
961 }
962 else if (r->type == tok_identifier
963 && startswith(r->content, "CONFIG_"))
964 {
965 // First try to convert both to numbers,
966 // otherwise threat both as strings.
967 const char* startp = s.kernel_config[l->content].c_str ();
968 char* endp = (char*) startp;
969 errno = 0;
970 int64_t val = (int64_t) strtoll (startp, & endp, 0);
971 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
972 {
973 int64_t lhs = val;
974 startp = s.kernel_config[r->content].c_str ();
975 endp = (char*) startp;
976 errno = 0;
977 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
978 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
979 return eval_comparison (lhs, op, rhs);
980 }
981
982 string lhs = s.kernel_config[l->content];
983 string rhs = s.kernel_config[r->content];
984 return eval_comparison (lhs, op, rhs);
985 }
986 else
987 throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r);
988 }
989 else if (l->type == tok_string && r->type == tok_string)
990 {
991 string lhs = l->content;
992 string rhs = r->content;
993 return eval_comparison (lhs, op, rhs);
994 // NB: no wildcarding option here
995 }
996 else if (l->type == tok_number && r->type == tok_number)
997 {
998 int64_t lhs = lex_cast<int64_t>(l->content);
999 int64_t rhs = lex_cast<int64_t>(r->content);
1000 return eval_comparison (lhs, op, rhs);
1001 // NB: no wildcarding option here
1002 }
1003 else if (l->type == tok_string && r->type == tok_number
1004 && op->type == tok_operator)
1005 throw PARSE_ERROR (_("expected string literal as right value"), r);
1006 else if (l->type == tok_number && r->type == tok_string
1007 && op->type == tok_operator)
1008 throw PARSE_ERROR (_("expected number literal as right value"), r);
1009
1010 else
1011 throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
1012 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1013 " comparison between strings or integers"), l);
1014 }
1015
1016
1017 // Only tokens corresponding to the TRUE statement must be expanded
1018 const token*
1019 parser::scan_pp ()
1020 {
1021 while (true)
1022 {
1023 pp_state_t pp = PP_NONE;
1024 if (!pp_state.empty())
1025 pp = pp_state.back().second;
1026
1027 const token* t = 0;
1028 if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1029 t = skip_pp ();
1030 else
1031 t = scan_pp1 ();
1032
1033 if (t == 0) // EOF
1034 {
1035 if (pp != PP_NONE)
1036 {
1037 t = pp_state.back().first;
1038 pp_state.pop_back(); // so skip_some doesn't keep trying to close this
1039 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
1040 throw PARSE_ERROR (_("incomplete conditional at end of file"), t);
1041 }
1042 return t;
1043 }
1044
1045 // misplaced preprocessor "then"
1046 if (t->type == tok_operator && t->content == "%?")
1047 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1048
1049 // preprocessor "else"
1050 if (t->type == tok_operator && t->content == "%:")
1051 {
1052 if (pp == PP_NONE)
1053 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1054 if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
1055 throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t);
1056 // XXX: here and elsewhere, error cascades might be avoided
1057 // by dropping tokens until we reach the closing %)
1058
1059 pp_state.back().second = (pp == PP_KEEP_THEN) ?
1060 PP_SKIP_ELSE : PP_KEEP_ELSE;
1061 delete t;
1062 continue;
1063 }
1064
1065 // preprocessor close
1066 if (t->type == tok_operator && t->content == "%)")
1067 {
1068 if (pp == PP_NONE)
1069 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
1070 delete pp_state.back().first;
1071 delete t; //this is the closing bracket
1072 pp_state.pop_back();
1073 continue;
1074 }
1075
1076 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1077 return t;
1078
1079 // We have a %( - it's time to throw a preprocessing party!
1080
1081 bool result = false;
1082 bool and_result = true;
1083 const token *n = NULL;
1084 do {
1085 const token *l, *op, *r;
1086 l = scan_pp1 ();
1087 op = scan_pp1 ();
1088 r = scan_pp1 ();
1089 if (l == 0 || op == 0 || r == 0)
1090 throw PARSE_ERROR (_("incomplete condition after '%('"), t);
1091 // NB: consider generalizing to consume all tokens until %?, and
1092 // passing that as a vector to an evaluator.
1093
1094 // Do not evaluate the condition if we haven't expanded everything.
1095 // This may occur when having several recursive conditionals.
1096 and_result &= eval_pp_conditional (session, l, op, r);
1097 if(l->content=="systemtap_v")
1098 systemtap_v_seen=r;
1099
1100 else
1101 delete r;
1102
1103 delete l;
1104 delete op;
1105 delete n;
1106
1107 n = scan_pp1 ();
1108 if (n && n->type == tok_operator && n->content == "&&")
1109 continue;
1110 result |= and_result;
1111 and_result = true;
1112 if (! (n && n->type == tok_operator && n->content == "||"))
1113 break;
1114 } while (true);
1115
1116 /*
1117 clog << "PP eval (" << *t << ") == " << result << endl;
1118 */
1119
1120 const token *m = n;
1121 if (! (m && m->type == tok_operator && m->content == "%?"))
1122 throw PARSE_ERROR (_("expected '%?' marker for conditional"), t);
1123 delete m; // "%?"
1124
1125 pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1126 pp_state.push_back (make_pair (t, pp));
1127
1128 // Now loop around to look for a real token.
1129 }
1130 }
1131
1132
1133 // Skip over tokens and any errors, heeding
1134 // only nested preprocessor starts and ends.
1135 const token*
1136 parser::skip_pp ()
1137 {
1138 const token* t = 0;
1139 unsigned nesting = 0;
1140 do
1141 {
1142 try
1143 {
1144 t = scan_pp1 ();
1145 }
1146 catch (const parse_error &e)
1147 {
1148 continue;
1149 }
1150 if (!t)
1151 break;
1152 if (t->type == tok_operator && t->content == "%(")
1153 ++nesting;
1154 else if (nesting && t->type == tok_operator && t->content == "%)")
1155 --nesting;
1156 else if (!nesting && t->type == tok_operator &&
1157 (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1158 break;
1159 delete t;
1160 }
1161 while (true);
1162 return t;
1163 }
1164
1165
1166 const token*
1167 parser::next ()
1168 {
1169 if (! next_t)
1170 next_t = scan_pp ();
1171 if (! next_t)
1172 throw PARSE_ERROR (_("unexpected end-of-file"));
1173
1174 last_t = next_t;
1175 // advance by zeroing next_t
1176 next_t = 0;
1177 return last_t;
1178 }
1179
1180
1181 const token*
1182 parser::peek ()
1183 {
1184 if (! next_t)
1185 next_t = scan_pp ();
1186
1187 // don't advance by zeroing next_t
1188 last_t = next_t;
1189 return next_t;
1190 }
1191
1192
1193 void
1194 parser::swallow ()
1195 {
1196 // can only swallow something last peeked or nexted token.
1197 assert (last_t != 0);
1198 delete last_t;
1199 // advance by zeroing next_t
1200 last_t = next_t = 0;
1201 }
1202
1203
1204 static inline bool
1205 tok_is(token const * t, token_type tt, string const & expected)
1206 {
1207 return t && t->type == tt && t->content == expected;
1208 }
1209
1210
1211 void
1212 parser::expect_known (token_type tt, string const & expected)
1213 {
1214 const token *t = next();
1215 if (! (t && t->type == tt && t->content == expected))
1216 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1217 swallow (); // We are done with it, content was copied.
1218 }
1219
1220
1221 void
1222 parser::expect_unknown (token_type tt, string & target)
1223 {
1224 const token *t = next();
1225 if (!(t && t->type == tt))
1226 throw PARSE_ERROR (_("expected ") + tt2str(tt));
1227 target = t->content;
1228 swallow (); // We are done with it, content was copied.
1229 }
1230
1231
1232 void
1233 parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
1234 {
1235 const token *t = next();
1236 if (!(t && (t->type == tt1 || t->type == tt2)))
1237 throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
1238 target = t->content;
1239 swallow (); // We are done with it, content was copied.
1240 }
1241
1242
1243 void
1244 parser::expect_op (std::string const & expected)
1245 {
1246 expect_known (tok_operator, expected);
1247 }
1248
1249
1250 void
1251 parser::expect_kw (std::string const & expected)
1252 {
1253 expect_known (tok_keyword, expected);
1254 }
1255
1256 const token*
1257 parser::expect_kw_token (std::string const & expected)
1258 {
1259 const token *t = next();
1260 if (! (t && t->type == tok_keyword && t->content == expected))
1261 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
1262 return t;
1263 }
1264
1265 void
1266 parser::expect_number (int64_t & value)
1267 {
1268 bool neg = false;
1269 const token *t = next();
1270 if (t->type == tok_operator && t->content == "-")
1271 {
1272 neg = true;
1273 swallow ();
1274 t = next ();
1275 }
1276 if (!(t && t->type == tok_number))
1277 throw PARSE_ERROR (_("expected number"));
1278
1279 const char* startp = t->content.c_str ();
1280 char* endp = (char*) startp;
1281
1282 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1283 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1284 // since the lexer only gives us positive digit strings, but we'll
1285 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1286 errno = 0;
1287 value = (int64_t) strtoull (startp, & endp, 0);
1288 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1289 || (neg && (unsigned long long) value > 9223372036854775808ULL)
1290 || (unsigned long long) value > 18446744073709551615ULL
1291 || value < -9223372036854775807LL-1)
1292 throw PARSE_ERROR (_("number invalid or out of range"));
1293
1294 if (neg)
1295 value = -value;
1296
1297 swallow (); // We are done with it, content was parsed and copied into value.
1298 }
1299
1300
1301 const token*
1302 parser::expect_ident_or_atword (std::string & target)
1303 {
1304 const token *t = next();
1305
1306 // accept identifiers and operators beginning in '@':
1307 if (!t || (t->type != tok_identifier
1308 && (t->type != tok_operator || t->content[0] != '@')))
1309 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1310 // so the message is accurate, but keep an eye out in the future:
1311 throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
1312
1313 target = t->content;
1314 return t;
1315 }
1316
1317
1318 void
1319 parser::expect_ident_or_keyword (std::string & target)
1320 {
1321 expect_unknown2 (tok_identifier, tok_keyword, target);
1322 }
1323
1324
1325 bool
1326 parser::peek_op (std::string const & op)
1327 {
1328 return tok_is (peek(), tok_operator, op);
1329 }
1330
1331
1332 bool
1333 parser::peek_kw (std::string const & kw)
1334 {
1335 return tok_is (peek(), tok_identifier, kw);
1336 }
1337
1338
1339
1340 lexer::lexer (istream& input, const string& in, systemtap_session& s):
1341 ate_comment(false), ate_whitespace(false), saw_tokens(false),
1342 input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1343 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1344 cursor_column (1), session(s), current_file (0), current_token_chain (0)
1345 {
1346 getline(input, input_contents, '\0');
1347
1348 input_pointer = input_contents.data();
1349 input_end = input_contents.data() + input_contents.size();
1350
1351 if (keywords.empty())
1352 {
1353 // NB: adding new keywords is highly disruptive to the language,
1354 // in particular to existing scripts that could be suddenly
1355 // broken. If done at all, it has to be s.compatible-sensitive,
1356 // and broadly advertised.
1357 keywords.insert("probe");
1358 keywords.insert("global");
1359 keywords.insert("function");
1360 keywords.insert("if");
1361 keywords.insert("else");
1362 keywords.insert("for");
1363 keywords.insert("foreach");
1364 keywords.insert("in");
1365 keywords.insert("limit");
1366 keywords.insert("return");
1367 keywords.insert("delete");
1368 keywords.insert("while");
1369 keywords.insert("break");
1370 keywords.insert("continue");
1371 keywords.insert("next");
1372 keywords.insert("string");
1373 keywords.insert("long");
1374 keywords.insert("try");
1375 keywords.insert("catch");
1376 }
1377
1378 if (atwords.empty())
1379 {
1380 // NB: adding new @words is mildly disruptive to existing
1381 // scripts that define macros with the same name, but not
1382 // really. The user will merely receive a warning that they are
1383 // redefining an existing operator.
1384 atwords.insert("@cast");
1385 atwords.insert("@defined");
1386 atwords.insert("@entry");
1387 atwords.insert("@perf");
1388 atwords.insert("@var");
1389 atwords.insert("@avg");
1390 atwords.insert("@count");
1391 atwords.insert("@sum");
1392 atwords.insert("@min");
1393 atwords.insert("@max");
1394 atwords.insert("@hist_linear");
1395 atwords.insert("@hist_log");
1396 }
1397 }
1398
1399 set<string> lexer::keywords;
1400 set<string> lexer::atwords;
1401
1402 void
1403 lexer::set_current_file (stapfile* f)
1404 {
1405 current_file = f;
1406 if (f)
1407 {
1408 f->file_contents = input_contents;
1409 f->name = input_name;
1410 }
1411 }
1412
1413 void
1414 lexer::set_current_token_chain (const token* tok)
1415 {
1416 current_token_chain = tok;
1417 }
1418
1419 int
1420 lexer::input_peek (unsigned n)
1421 {
1422 if (input_pointer + n >= input_end)
1423 return -1; // EOF
1424 return (unsigned char)*(input_pointer + n);
1425 }
1426
1427
1428 int
1429 lexer::input_get ()
1430 {
1431 int c = input_peek();
1432 if (c < 0) return c; // EOF
1433
1434 ++input_pointer;
1435
1436 if (cursor_suspend_count)
1437 {
1438 // Track effect of input_put: preserve previous cursor/line_column
1439 // until all of its characters are consumed.
1440 if (--cursor_suspend_count == 0)
1441 {
1442 cursor_line = cursor_suspend_line;
1443 cursor_column = cursor_suspend_column;
1444 }
1445 }
1446 else
1447 {
1448 // update source cursor
1449 if (c == '\n')
1450 {
1451 cursor_line ++;
1452 cursor_column = 1;
1453 }
1454 else
1455 cursor_column ++;
1456 }
1457
1458 // clog << "[" << (char)c << "]";
1459 return c;
1460 }
1461
1462
1463 void
1464 lexer::input_put (const string& chars, const token* t)
1465 {
1466 size_t pos = input_pointer - input_contents.data();
1467 // clog << "[put:" << chars << " @" << pos << "]";
1468 input_contents.insert (pos, chars);
1469 cursor_suspend_count += chars.size();
1470 cursor_suspend_line = cursor_line;
1471 cursor_suspend_column = cursor_column;
1472 cursor_line = t->location.line;
1473 cursor_column = t->location.column;
1474 input_pointer = input_contents.data() + pos;
1475 input_end = input_contents.data() + input_contents.size();
1476 }
1477
1478
1479 token*
1480 lexer::scan ()
1481 {
1482 ate_comment = false; // reset for each new token
1483 ate_whitespace = false; // reset for each new token
1484
1485 // XXX be very sure to restore old_saw_tokens if we return without a token:
1486 bool old_saw_tokens = saw_tokens;
1487 saw_tokens = true;
1488
1489 token* n = new token;
1490 n->location.file = current_file;
1491 n->chain = current_token_chain;
1492
1493 skip:
1494 bool suspended = (cursor_suspend_count > 0);
1495 n->location.line = cursor_line;
1496 n->location.column = cursor_column;
1497
1498 int c = input_get();
1499 // clog << "{" << (char)c << (char)c2 << "}";
1500 if (c < 0)
1501 {
1502 delete n;
1503 saw_tokens = old_saw_tokens;
1504 return 0;
1505 }
1506
1507 if (isspace (c))
1508 {
1509 ate_whitespace = true;
1510 goto skip;
1511 }
1512
1513 int c2 = input_peek ();
1514
1515 // Paste command line arguments as character streams into
1516 // the beginning of a token. $1..$999 go through as raw
1517 // characters; @1..@999 are quoted/escaped as strings.
1518 // $# and @# expand to the number of arguments, similarly
1519 // raw or quoted.
1520 if ((c == '$' || c == '@') && (c2 == '#'))
1521 {
1522 n->content.push_back (c);
1523 n->content.push_back (c2);
1524 input_get(); // swallow '#'
1525 if (suspended)
1526 {
1527 n->make_junk(_("invalid nested substitution of command line arguments"));
1528 return n;
1529 }
1530 size_t num_args = session.args.size ();
1531 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1532 n->content.clear();
1533 goto skip;
1534 }
1535 else if ((c == '$' || c == '@') && (isdigit (c2)))
1536 {
1537 n->content.push_back (c);
1538 unsigned idx = 0;
1539 do
1540 {
1541 input_get ();
1542 idx = (idx * 10) + (c2 - '0');
1543 n->content.push_back (c2);
1544 c2 = input_peek ();
1545 } while (c2 > 0 &&
1546 isdigit (c2) &&
1547 idx <= session.args.size()); // prevent overflow
1548 if (suspended)
1549 {
1550 n->make_junk(_("invalid nested substitution of command line arguments"));
1551 return n;
1552 }
1553 if (idx == 0 ||
1554 idx-1 >= session.args.size())
1555 {
1556 n->make_junk(_F("command line argument index %lu out of range [1-%lu]",
1557 (unsigned long) idx, (unsigned long) session.args.size()));
1558 return n;
1559 }
1560 const string& arg = session.args[idx-1];
1561 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1562 n->content.clear();
1563 goto skip;
1564 }
1565
1566 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
1567 {
1568 n->type = tok_identifier;
1569 n->content = (char) c;
1570 while (isalnum (c2) || c2 == '_' || c2 == '$')
1571 {
1572 input_get ();
1573 n->content.push_back (c2);
1574 c2 = input_peek ();
1575 }
1576
1577 if (keywords.count(n->content))
1578 n->type = tok_keyword;
1579 else if (n->content[0] == '@')
1580 // makes it easier to detect illegal use of @words:
1581 n->type = tok_operator;
1582
1583 return n;
1584 }
1585
1586 else if (isdigit (c)) // positive literal
1587 {
1588 n->type = tok_number;
1589 n->content = (char) c;
1590
1591 while (isalnum (c2))
1592 {
1593 // NB: isalnum is very permissive. We rely on strtol, called in
1594 // parser::parse_literal below, to confirm that the number string
1595 // is correctly formatted and in range.
1596
1597 input_get ();
1598 n->content.push_back (c2);
1599 c2 = input_peek ();
1600 }
1601 return n;
1602 }
1603
1604 else if (c == '\"')
1605 {
1606 n->type = tok_string;
1607 while (1)
1608 {
1609 c = input_get ();
1610
1611 if (c < 0 || c == '\n')
1612 {
1613 n->make_junk(_("Could not find matching closing quote"));
1614 return n;
1615 }
1616 if (c == '\"') // closing double-quotes
1617 break;
1618 else if (c == '\\') // see also input_put
1619 {
1620 c = input_get ();
1621 switch (c)
1622 {
1623 case 'x':
1624 if (strverscmp(session.compatible.c_str(), "2.3") < 0)
1625 goto the_default;
1626 case 'a':
1627 case 'b':
1628 case 't':
1629 case 'n':
1630 case 'v':
1631 case 'f':
1632 case 'r':
1633 case '0' ... '7': // NB: need only match the first digit
1634 case '\\':
1635 // Pass these escapes through to the string value
1636 // being parsed; it will be emitted into a C literal.
1637 // XXX: PR13371: perhaps we should evaluate them here
1638 // (and re-quote them during translate.cxx emission).
1639 n->content.push_back('\\');
1640
1641 // fall through
1642 default: the_default:
1643 n->content.push_back(c);
1644 break;
1645 }
1646 }
1647 else
1648 n->content.push_back(c);
1649 }
1650 return n;
1651 }
1652
1653 else if (ispunct (c))
1654 {
1655 int c3 = input_peek (1);
1656
1657 // NB: if we were to recognize negative numeric literals here,
1658 // we'd introduce another grammar ambiguity:
1659 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1660 // instead of tok_number(1) tok_operator('-') tok_number(1)
1661
1662 if (c == '#') // shell comment
1663 {
1664 unsigned this_line = cursor_line;
1665 do { c = input_get (); }
1666 while (c >= 0 && cursor_line == this_line);
1667 ate_comment = true;
1668 ate_whitespace = true;
1669 goto skip;
1670 }
1671 else if ((c == '/' && c2 == '/')) // C++ comment
1672 {
1673 unsigned this_line = cursor_line;
1674 do { c = input_get (); }
1675 while (c >= 0 && cursor_line == this_line);
1676 ate_comment = true;
1677 ate_whitespace = true;
1678 goto skip;
1679 }
1680 else if (c == '/' && c2 == '*') // C comment
1681 {
1682 (void) input_get (); // swallow '*' already in c2
1683 c = input_get ();
1684 c2 = input_get ();
1685 while (c2 >= 0)
1686 {
1687 if (c == '*' && c2 == '/')
1688 break;
1689 c = c2;
1690 c2 = input_get ();
1691 }
1692 ate_comment = true;
1693 ate_whitespace = true;
1694 goto skip;
1695 }
1696 else if (c == '%' && c2 == '{') // embedded code
1697 {
1698 n->type = tok_embedded;
1699 (void) input_get (); // swallow '{' already in c2
1700 c = input_get ();
1701 c2 = input_get ();
1702 while (c2 >= 0)
1703 {
1704 if (c == '%' && c2 == '}')
1705 return n;
1706 if (c == '}' && c2 == '%') // possible typo
1707 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
1708 n->content += c;
1709 c = c2;
1710 c2 = input_get ();
1711 }
1712
1713 n->make_junk(_("Could not find matching '%}' to close embedded function block"));
1714 return n;
1715 }
1716
1717 // We're committed to recognizing at least the first character
1718 // as an operator.
1719 n->type = tok_operator;
1720 n->content = c;
1721
1722 // match all valid operators, in decreasing size order
1723 if ((c == '<' && c2 == '<' && c3 == '<') ||
1724 (c == '<' && c2 == '<' && c3 == '=') ||
1725 (c == '>' && c2 == '>' && c3 == '='))
1726 {
1727 n->content += c2;
1728 n->content += c3;
1729 input_get (); input_get (); // swallow other two characters
1730 }
1731 else if ((c == '=' && c2 == '=') ||
1732 (c == '!' && c2 == '=') ||
1733 (c == '<' && c2 == '=') ||
1734 (c == '>' && c2 == '=') ||
1735 (c == '=' && c2 == '~') ||
1736 (c == '!' && c2 == '~') ||
1737 (c == '+' && c2 == '=') ||
1738 (c == '-' && c2 == '=') ||
1739 (c == '*' && c2 == '=') ||
1740 (c == '/' && c2 == '=') ||
1741 (c == '%' && c2 == '=') ||
1742 (c == '&' && c2 == '=') ||
1743 (c == '^' && c2 == '=') ||
1744 (c == '|' && c2 == '=') ||
1745 (c == '.' && c2 == '=') ||
1746 (c == '&' && c2 == '&') ||
1747 (c == '|' && c2 == '|') ||
1748 (c == '+' && c2 == '+') ||
1749 (c == '-' && c2 == '-') ||
1750 (c == '-' && c2 == '>') ||
1751 (c == '<' && c2 == '<') ||
1752 (c == '>' && c2 == '>') ||
1753 // preprocessor tokens
1754 (c == '%' && c2 == '(') ||
1755 (c == '%' && c2 == '?') ||
1756 (c == '%' && c2 == ':') ||
1757 (c == '%' && c2 == ')'))
1758 {
1759 n->content += c2;
1760 input_get (); // swallow other character
1761 }
1762
1763 return n;
1764 }
1765
1766 else
1767 {
1768 n->type = tok_junk;
1769 ostringstream s;
1770 s << "\\x" << hex << setw(2) << setfill('0') << c;
1771 n->content = s.str();
1772 n->msg = ""; // signal parser to emit "expected X, found junk" type error
1773 return n;
1774 }
1775 }
1776
1777 // ------------------------------------------------------------------------
1778
1779 void
1780 token::make_junk (const string new_msg)
1781 {
1782 type = tok_junk;
1783 msg = new_msg;
1784 }
1785
1786 // ------------------------------------------------------------------------
1787
1788 stapfile*
1789 parser::parse (bool errs_as_warnings)
1790 {
1791 stapfile* f = new stapfile;
1792 input.set_current_file (f);
1793
1794 bool empty = true;
1795
1796 while (1)
1797 {
1798 try
1799 {
1800 systemtap_v_seen = 0;
1801 const token* t = peek ();
1802 if (! t) // nice clean EOF, modulo any preprocessing that occurred
1803 break;
1804
1805 empty = false;
1806 if (t->type == tok_keyword && t->content == "probe")
1807 {
1808 context = con_probe;
1809 parse_probe (f->probes, f->aliases);
1810 }
1811 else if (t->type == tok_keyword && t->content == "global")
1812 {
1813 context = con_global;
1814 parse_global (f->globals, f->probes);
1815 }
1816 else if (t->type == tok_keyword && t->content == "function")
1817 {
1818 context = con_function;
1819 parse_functiondecl (f->functions);
1820 }
1821 else if (t->type == tok_embedded)
1822 {
1823 context = con_embedded;
1824 f->embeds.push_back (parse_embeddedcode ());
1825 }
1826 else
1827 {
1828 context = con_unknown;
1829 throw PARSE_ERROR (_("expected 'probe', 'global', 'function', or '%{'"));
1830 }
1831 }
1832 catch (parse_error& pe)
1833 {
1834 print_error (pe, errs_as_warnings);
1835
1836 // XXX: do we want tok_junk to be able to force skip_some behaviour?
1837 if (pe.skip_some) // for recovery
1838 // Quietly swallow all tokens until the next keyword we can start parsing from.
1839 while (1)
1840 try
1841 {
1842 {
1843 const token* t = peek ();
1844 if (! t)
1845 break;
1846 if (t->type == tok_keyword && t->content == "probe") break;
1847 else if (t->type == tok_keyword && t->content == "global") break;
1848 else if (t->type == tok_keyword && t->content == "function") break;
1849 else if (t->type == tok_embedded) break;
1850 swallow (); // swallow it
1851 }
1852 }
1853 catch (parse_error& pe2)
1854 {
1855 // parse error during recovery ... ugh
1856 print_error (pe2);
1857 }
1858 }
1859 }
1860
1861 if (empty)
1862 {
1863 // vary message depending on whether file was *actually* empty:
1864 cerr << (input.saw_tokens
1865 ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
1866 : _F("Input file '%s' is empty.", input_name.c_str()))
1867 << endl;
1868 delete f;
1869 f = 0;
1870 }
1871 else if (num_errors > 0)
1872 {
1873 cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
1874 delete f;
1875 f = 0;
1876 }
1877
1878 input.set_current_file(0);
1879 return f;
1880 }
1881
1882
1883 probe*
1884 parser::parse_synthetic_probe (const token* chain, bool errs_as_warnings)
1885 {
1886 probe* p = NULL;
1887 stapfile* f = new stapfile;
1888 f->synthetic = true;
1889 input.set_current_file (f);
1890 input.set_current_token_chain (chain);
1891
1892 try
1893 {
1894 context = con_probe;
1895 parse_probe (f->probes, f->aliases);
1896
1897 if (f->probes.size() != 1 || !f->aliases.empty())
1898 throw PARSE_ERROR (_("expected a single synthetic probe"));
1899 p = f->probes[0];
1900 }
1901 catch (parse_error& pe)
1902 {
1903 print_error (pe, errs_as_warnings);
1904 }
1905
1906 // TODO check for unparsed tokens?
1907
1908 input.set_current_file(0);
1909 input.set_current_token_chain(0);
1910 return p;
1911 }
1912
1913
1914 void
1915 parser::parse_probe (std::vector<probe *> & probe_ret,
1916 std::vector<probe_alias *> & alias_ret)
1917 {
1918 const token* t0 = next ();
1919 if (! (t0->type == tok_keyword && t0->content == "probe"))
1920 throw PARSE_ERROR (_("expected 'probe'"));
1921
1922 vector<probe_point *> aliases;
1923 vector<probe_point *> locations;
1924
1925 bool equals_ok = true;
1926
1927 int epilogue_alias = 0;
1928
1929 while (1)
1930 {
1931 probe_point * pp = parse_probe_point ();
1932
1933 const token* t = peek ();
1934 if (equals_ok && t
1935 && t->type == tok_operator && t->content == "=")
1936 {
1937 if (pp->optional || pp->sufficient)
1938 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
1939 aliases.push_back(pp);
1940 swallow ();
1941 continue;
1942 }
1943 else if (equals_ok && t
1944 && t->type == tok_operator && t->content == "+=")
1945 {
1946 if (pp->optional || pp->sufficient)
1947 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
1948 aliases.push_back(pp);
1949 epilogue_alias = 1;
1950 swallow ();
1951 continue;
1952 }
1953 else if (t && t->type == tok_operator && t->content == ",")
1954 {
1955 locations.push_back(pp);
1956 equals_ok = false;
1957 swallow ();
1958 continue;
1959 }
1960 else if (t && t->type == tok_operator && t->content == "{")
1961 {
1962 locations.push_back(pp);
1963 break;
1964 }
1965 else
1966 throw PARSE_ERROR (_("expected probe point specifier"));
1967 }
1968
1969 if (aliases.empty())
1970 {
1971 probe* p = new probe;
1972 p->tok = t0;
1973 p->locations = locations;
1974 p->body = parse_stmt_block ();
1975 p->privileged = privileged;
1976 p->systemtap_v_conditional = systemtap_v_seen;
1977 probe_ret.push_back (p);
1978 }
1979 else
1980 {
1981 probe_alias* p = new probe_alias (aliases);
1982 if(epilogue_alias)
1983 p->epilogue_style = true;
1984 else
1985 p->epilogue_style = false;
1986 p->tok = t0;
1987 p->locations = locations;
1988 p->body = parse_stmt_block ();
1989 p->privileged = privileged;
1990 p->systemtap_v_conditional = systemtap_v_seen;
1991 alias_ret.push_back (p);
1992 }
1993 }
1994
1995
1996 embeddedcode*
1997 parser::parse_embeddedcode ()
1998 {
1999 embeddedcode* e = new embeddedcode;
2000 const token* t = next ();
2001 if (t->type != tok_embedded)
2002 throw PARSE_ERROR (_("expected '%{'"));
2003
2004 if (! privileged)
2005 throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
2006 false /* don't skip tokens for parse resumption */);
2007
2008 e->tok = t;
2009 e->code = t->content;
2010 return e;
2011 }
2012
2013
2014 block*
2015 parser::parse_stmt_block ()
2016 {
2017 block* pb = new block;
2018
2019 const token* t = next ();
2020 if (! (t->type == tok_operator && t->content == "{"))
2021 throw PARSE_ERROR (_("expected '{'"));
2022
2023 pb->tok = t;
2024
2025 while (1)
2026 {
2027 t = peek ();
2028 if (t && t->type == tok_operator && t->content == "}")
2029 {
2030 swallow ();
2031 break;
2032 }
2033 pb->statements.push_back (parse_statement ());
2034 }
2035
2036 return pb;
2037 }
2038
2039
2040 try_block*
2041 parser::parse_try_block ()
2042 {
2043 try_block* pb = new try_block;
2044
2045 pb->tok = expect_kw_token ("try");
2046 pb->try_block = parse_stmt_block();
2047 expect_kw ("catch");
2048
2049 const token* t = peek ();
2050 if (t != NULL && t->type == tok_operator && t->content == "(")
2051 {
2052 swallow (); // swallow the '('
2053
2054 t = next();
2055 if (! (t->type == tok_identifier))
2056 throw PARSE_ERROR (_("expected identifier"));
2057 symbol* sym = new symbol;
2058 sym->tok = t;
2059 sym->name = t->content;
2060 pb->catch_error_var = sym;
2061
2062 expect_op (")");
2063 }
2064 else
2065 pb->catch_error_var = 0;
2066
2067 pb->catch_block = parse_stmt_block();
2068
2069 return pb;
2070 }
2071
2072
2073
2074 statement*
2075 parser::parse_statement ()
2076 {
2077 statement *ret;
2078 const token* t = peek ();
2079 if (t && t->type == tok_operator && t->content == ";")
2080 return new null_statement (next ());
2081 else if (t && t->type == tok_operator && t->content == "{")
2082 return parse_stmt_block (); // Don't squash semicolons.
2083 else if (t && t->type == tok_keyword && t->content == "try")
2084 return parse_try_block (); // Don't squash semicolons.
2085 else if (t && t->type == tok_keyword && t->content == "if")
2086 return parse_if_statement (); // Don't squash semicolons.
2087 else if (t && t->type == tok_keyword && t->content == "for")
2088 return parse_for_loop (); // Don't squash semicolons.
2089 else if (t && t->type == tok_keyword && t->content == "foreach")
2090 return parse_foreach_loop (); // Don't squash semicolons.
2091 else if (t && t->type == tok_keyword && t->content == "while")
2092 return parse_while_loop (); // Don't squash semicolons.
2093 else if (t && t->type == tok_keyword && t->content == "return")
2094 ret = parse_return_statement ();
2095 else if (t && t->type == tok_keyword && t->content == "delete")
2096 ret = parse_delete_statement ();
2097 else if (t && t->type == tok_keyword && t->content == "break")
2098 ret = parse_break_statement ();
2099 else if (t && t->type == tok_keyword && t->content == "continue")
2100 ret = parse_continue_statement ();
2101 else if (t && t->type == tok_keyword && t->content == "next")
2102 ret = parse_next_statement ();
2103 else if (t && (t->type == tok_operator || // expressions are flexible
2104 t->type == tok_identifier ||
2105 t->type == tok_number ||
2106 t->type == tok_string ||
2107 t->type == tok_embedded ))
2108 ret = parse_expr_statement ();
2109 // XXX: consider generally accepting tok_embedded here too
2110 else
2111 throw PARSE_ERROR (_("expected statement"));
2112
2113 // Squash "empty" trailing colons after any "non-block-like" statement.
2114 t = peek ();
2115 if (t && t->type == tok_operator && t->content == ";")
2116 {
2117 swallow (); // Silently eat trailing ; after statement
2118 }
2119
2120 return ret;
2121 }
2122
2123
2124 void
2125 parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
2126 {
2127 const token* t0 = next ();
2128 if (! (t0->type == tok_keyword && t0->content == "global"))
2129 throw PARSE_ERROR (_("expected 'global'"));
2130 swallow ();
2131
2132 while (1)
2133 {
2134 const token* t = next ();
2135 if (! (t->type == tok_identifier))
2136 throw PARSE_ERROR (_("expected identifier"));
2137
2138 for (unsigned i=0; i<globals.size(); i++)
2139 if (globals[i]->name == t->content)
2140 throw PARSE_ERROR (_("duplicate global name"));
2141
2142 vardecl* d = new vardecl;
2143 d->name = t->content;
2144 d->tok = t;
2145 d->systemtap_v_conditional = systemtap_v_seen;
2146 globals.push_back (d);
2147
2148 t = peek ();
2149
2150 if(t && t->type == tok_operator && t->content == "%") //wrapping
2151 {
2152 d->wrap = true;
2153 swallow ();
2154 t = peek();
2155 }
2156
2157 if (t && t->type == tok_operator && t->content == "[") // array size
2158 {
2159 int64_t size;
2160 swallow ();
2161 expect_number(size);
2162 if (size <= 0 || size > 1000000) // arbitrary max
2163 throw PARSE_ERROR(_("array size out of range"));
2164 d->maxsize = (int)size;
2165 expect_known(tok_operator, "]");
2166 t = peek ();
2167 }
2168
2169 if (t && t->type == tok_operator && t->content == "=") // initialization
2170 {
2171 if (!d->compatible_arity(0))
2172 throw PARSE_ERROR(_("only scalar globals can be initialized"));
2173 d->set_arity(0, t);
2174 next (); // Don't swallow, set_arity() used the peeked token.
2175 d->init = parse_literal ();
2176 d->type = d->init->type;
2177 t = peek ();
2178 }
2179
2180 if (t && t->type == tok_operator && t->content == ";") // termination
2181 {
2182 swallow ();
2183 break;
2184 }
2185
2186 if (t && t->type == tok_operator && t->content == ",") // next global
2187 {
2188 swallow ();
2189 continue;
2190 }
2191 else
2192 break;
2193 }
2194 }
2195
2196
2197 void
2198 parser::parse_functiondecl (std::vector<functiondecl*>& functions)
2199 {
2200 const token* t = next ();
2201 if (! (t->type == tok_keyword && t->content == "function"))
2202 throw PARSE_ERROR (_("expected 'function'"));
2203 swallow ();
2204
2205 t = next ();
2206 if (! (t->type == tok_identifier)
2207 && ! (t->type == tok_keyword
2208 && (t->content == "string" || t->content == "long")))
2209 throw PARSE_ERROR (_("expected identifier"));
2210
2211 for (unsigned i=0; i<functions.size(); i++)
2212 if (functions[i]->name == t->content)
2213 throw PARSE_ERROR (_("duplicate function name"));
2214
2215 functiondecl *fd = new functiondecl ();
2216 fd->name = t->content;
2217 fd->tok = t;
2218
2219 t = next ();
2220 if (t->type == tok_operator && t->content == ":")
2221 {
2222 swallow ();
2223 t = next ();
2224 if (t->type == tok_keyword && t->content == "string")
2225 fd->type = pe_string;
2226 else if (t->type == tok_keyword && t->content == "long")
2227 fd->type = pe_long;
2228 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2229 swallow ();
2230
2231 t = next ();
2232 }
2233
2234 if (! (t->type == tok_operator && t->content == "("))
2235 throw PARSE_ERROR (_("expected '('"));
2236 swallow ();
2237
2238 while (1)
2239 {
2240 t = next ();
2241
2242 // permit zero-argument functions
2243 if (t->type == tok_operator && t->content == ")")
2244 {
2245 swallow ();
2246 break;
2247 }
2248 else if (! (t->type == tok_identifier))
2249 throw PARSE_ERROR (_("expected identifier"));
2250 vardecl* vd = new vardecl;
2251 vd->name = t->content;
2252 vd->tok = t;
2253 fd->formal_args.push_back (vd);
2254 fd->systemtap_v_conditional = systemtap_v_seen;
2255
2256 t = next ();
2257 if (t->type == tok_operator && t->content == ":")
2258 {
2259 swallow ();
2260 t = next ();
2261 if (t->type == tok_keyword && t->content == "string")
2262 vd->type = pe_string;
2263 else if (t->type == tok_keyword && t->content == "long")
2264 vd->type = pe_long;
2265 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2266 swallow ();
2267 t = next ();
2268 }
2269 if (t->type == tok_operator && t->content == ")")
2270 {
2271 swallow ();
2272 break;
2273 }
2274 if (t->type == tok_operator && t->content == ",")
2275 {
2276 swallow ();
2277 continue;
2278 }
2279 else
2280 throw PARSE_ERROR (_("expected ',' or ')'"));
2281 }
2282
2283 t = peek ();
2284 if (t && t->type == tok_embedded)
2285 fd->body = parse_embeddedcode ();
2286 else
2287 fd->body = parse_stmt_block ();
2288
2289 functions.push_back (fd);
2290 }
2291
2292
2293 probe_point*
2294 parser::parse_probe_point ()
2295 {
2296 probe_point* pl = new probe_point;
2297
2298 while (1)
2299 {
2300 const token* t = next ();
2301 if (! (t->type == tok_identifier
2302 // we must allow ".return" and ".function", which are keywords
2303 || t->type == tok_keyword
2304 // we must allow "*", due to being an operator
2305 || (t->type == tok_operator && t->content == "*")))
2306 throw PARSE_ERROR (_("expected identifier or '*'"));
2307
2308 // loop which reconstitutes an identifier with wildcards
2309 string content = t->content;
2310 while (1)
2311 {
2312 const token* u = peek();
2313 if (u == NULL)
2314 break;
2315 // ensure pieces of the identifier are adjacent:
2316 if (input.ate_whitespace)
2317 break;
2318 // ensure pieces of the identifier are valid:
2319 if (! (u->type == tok_identifier
2320 // we must allow arbitrary keywords with a wildcard
2321 || u->type == tok_keyword
2322 // we must allow "*", due to being an operator
2323 || (u->type == tok_operator && u->content == "*")))
2324 break;
2325
2326 // append u to t
2327 content = content + u->content;
2328
2329 // consume u
2330 swallow ();
2331 }
2332 // get around const-ness of t:
2333 token* new_t = new token(*t);
2334 new_t->content = content;
2335 delete t; t = new_t;
2336
2337 probe_point::component* c = new probe_point::component;
2338 c->functor = t->content;
2339 c->tok = t;
2340 pl->components.push_back (c);
2341 // NB we may add c->arg soon
2342
2343 t = peek ();
2344
2345 // consume optional parameter
2346 if (t && t->type == tok_operator && t->content == "(")
2347 {
2348 swallow (); // consume "("
2349 c->arg = parse_literal ();
2350
2351 t = next ();
2352 if (! (t->type == tok_operator && t->content == ")"))
2353 throw PARSE_ERROR (_("expected ')'"));
2354 swallow ();
2355
2356 t = peek ();
2357 }
2358
2359 if (t && t->type == tok_operator && t->content == ".")
2360 {
2361 swallow ();
2362 continue;
2363 }
2364
2365 // We only fall through here at the end of a probe point (past
2366 // all the dotted/parametrized components).
2367
2368 if (t && t->type == tok_operator &&
2369 (t->content == "?" || t->content == "!"))
2370 {
2371 pl->optional = true;
2372 if (t->content == "!") pl->sufficient = true;
2373 // NB: sufficient implies optional
2374 swallow ();
2375 t = peek ();
2376 // fall through
2377 }
2378
2379 if (t && t->type == tok_keyword && t->content == "if")
2380 {
2381 swallow ();
2382 t = peek ();
2383 if (!(t && t->type == tok_operator && t->content == "("))
2384 throw PARSE_ERROR (_("expected '('"));
2385 swallow ();
2386
2387 pl->condition = parse_expression ();
2388
2389 t = peek ();
2390 if (!(t && t->type == tok_operator && t->content == ")"))
2391 throw PARSE_ERROR (_("expected ')'"));
2392 swallow ();
2393 t = peek ();
2394 // fall through
2395 }
2396
2397 if (t && t->type == tok_operator
2398 && (t->content == "{" || t->content == "," ||
2399 t->content == "=" || t->content == "+=" ))
2400 break;
2401
2402 throw PARSE_ERROR (_("expected one of '. , ( ? ! { = +='"));
2403 }
2404
2405 return pl;
2406 }
2407
2408
2409 literal_string*
2410 parser::consume_string_literals(const token *t)
2411 {
2412 literal_string *ls = new literal_string (t->content);
2413
2414 // PR11208: check if the next token is also a string literal;
2415 // auto-concatenate it. This is complicated to the extent that we
2416 // need to skip intermediate whitespace.
2417 //
2418 // NB for versions prior to 2.0: but don't skip over intervening comments
2419 const token *n = peek();
2420 while (n != NULL && n->type == tok_string
2421 && ! (strverscmp(session.compatible.c_str(), "2.0") < 0
2422 && input.ate_comment))
2423 {
2424 ls->value.append(next()->content); // consume and append the token
2425 n = peek();
2426 }
2427 return ls;
2428 }
2429
2430
2431 // Parse a string literal and perform backslash escaping on the contents:
2432 literal_string*
2433 parser::parse_literal_string ()
2434 {
2435 const token* t = next ();
2436 literal_string* l;
2437 if (t->type == tok_string)
2438 l = consume_string_literals (t);
2439 else
2440 throw PARSE_ERROR (_("expected literal string"));
2441
2442 l->tok = t;
2443 return l;
2444 }
2445
2446
2447 literal*
2448 parser::parse_literal ()
2449 {
2450 const token* t = next ();
2451 literal* l;
2452 if (t->type == tok_string)
2453 {
2454 l = consume_string_literals (t);
2455 }
2456 else
2457 {
2458 bool neg = false;
2459 if (t->type == tok_operator && t->content == "-")
2460 {
2461 neg = true;
2462 swallow ();
2463 t = next ();
2464 }
2465
2466 if (t->type == tok_number)
2467 {
2468 const char* startp = t->content.c_str ();
2469 char* endp = (char*) startp;
2470
2471 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2472 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
2473 // since the lexer only gives us positive digit strings, but we'll
2474 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
2475 errno = 0;
2476 long long value = (long long) strtoull (startp, & endp, 0);
2477 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
2478 || (neg && (unsigned long long) value > 9223372036854775808ULL)
2479 || (unsigned long long) value > 18446744073709551615ULL
2480 || value < -9223372036854775807LL-1)
2481 throw PARSE_ERROR (_("number invalid or out of range"));
2482
2483 if (neg)
2484 value = -value;
2485
2486 l = new literal_number (value);
2487 }
2488 else
2489 throw PARSE_ERROR (_("expected literal string or number"));
2490 }
2491
2492 l->tok = t;
2493 return l;
2494 }
2495
2496
2497 if_statement*
2498 parser::parse_if_statement ()
2499 {
2500 const token* t = next ();
2501 if (! (t->type == tok_keyword && t->content == "if"))
2502 throw PARSE_ERROR (_("expected 'if'"));
2503 if_statement* s = new if_statement;
2504 s->tok = t;
2505
2506 t = next ();
2507 if (! (t->type == tok_operator && t->content == "("))
2508 throw PARSE_ERROR (_("expected '('"));
2509 swallow ();
2510
2511 s->condition = parse_expression ();
2512
2513 t = next ();
2514 if (! (t->type == tok_operator && t->content == ")"))
2515 throw PARSE_ERROR (_("expected ')'"));
2516 swallow ();
2517
2518 s->thenblock = parse_statement ();
2519
2520 t = peek ();
2521 if (t && t->type == tok_keyword && t->content == "else")
2522 {
2523 swallow ();
2524 s->elseblock = parse_statement ();
2525 }
2526 else
2527 s->elseblock = 0; // in case not otherwise initialized
2528
2529 return s;
2530 }
2531
2532
2533 expr_statement*
2534 parser::parse_expr_statement ()
2535 {
2536 expr_statement *es = new expr_statement;
2537 const token* t = peek ();
2538 if (t == NULL)
2539 throw PARSE_ERROR (_("expression statement expected"));
2540 // Copy, we only peeked, parse_expression might swallow.
2541 es->tok = new token (*t);
2542 es->value = parse_expression ();
2543 return es;
2544 }
2545
2546
2547 return_statement*
2548 parser::parse_return_statement ()
2549 {
2550 const token* t = next ();
2551 if (! (t->type == tok_keyword && t->content == "return"))
2552 throw PARSE_ERROR (_("expected 'return'"));
2553 if (context != con_function)
2554 throw PARSE_ERROR (_("found 'return' not in function context"));
2555 return_statement* s = new return_statement;
2556 s->tok = t;
2557 s->value = parse_expression ();
2558 return s;
2559 }
2560
2561
2562 delete_statement*
2563 parser::parse_delete_statement ()
2564 {
2565 const token* t = next ();
2566 if (! (t->type == tok_keyword && t->content == "delete"))
2567 throw PARSE_ERROR (_("expected 'delete'"));
2568 delete_statement* s = new delete_statement;
2569 s->tok = t;
2570 s->value = parse_expression ();
2571 return s;
2572 }
2573
2574
2575 next_statement*
2576 parser::parse_next_statement ()
2577 {
2578 const token* t = next ();
2579 if (! (t->type == tok_keyword && t->content == "next"))
2580 throw PARSE_ERROR (_("expected 'next'"));
2581 if (context != con_probe)
2582 throw PARSE_ERROR (_("found 'next' not in probe context"));
2583 next_statement* s = new next_statement;
2584 s->tok = t;
2585 return s;
2586 }
2587
2588
2589 break_statement*
2590 parser::parse_break_statement ()
2591 {
2592 const token* t = next ();
2593 if (! (t->type == tok_keyword && t->content == "break"))
2594 throw PARSE_ERROR (_("expected 'break'"));
2595 break_statement* s = new break_statement;
2596 s->tok = t;
2597 return s;
2598 }
2599
2600
2601 continue_statement*
2602 parser::parse_continue_statement ()
2603 {
2604 const token* t = next ();
2605 if (! (t->type == tok_keyword && t->content == "continue"))
2606 throw PARSE_ERROR (_("expected 'continue'"));
2607 continue_statement* s = new continue_statement;
2608 s->tok = t;
2609 return s;
2610 }
2611
2612
2613 for_loop*
2614 parser::parse_for_loop ()
2615 {
2616 const token* t = next ();
2617 if (! (t->type == tok_keyword && t->content == "for"))
2618 throw PARSE_ERROR (_("expected 'for'"));
2619 for_loop* s = new for_loop;
2620 s->tok = t;
2621
2622 t = next ();
2623 if (! (t->type == tok_operator && t->content == "("))
2624 throw PARSE_ERROR (_("expected '('"));
2625 swallow ();
2626
2627 // initializer + ";"
2628 t = peek ();
2629 if (t && t->type == tok_operator && t->content == ";")
2630 {
2631 s->init = 0;
2632 swallow ();
2633 }
2634 else
2635 {
2636 s->init = parse_expr_statement ();
2637 t = next ();
2638 if (! (t->type == tok_operator && t->content == ";"))
2639 throw PARSE_ERROR (_("expected ';'"));
2640 swallow ();
2641 }
2642
2643 // condition + ";"
2644 t = peek ();
2645 if (t && t->type == tok_operator && t->content == ";")
2646 {
2647 literal_number* l = new literal_number(1);
2648 s->cond = l;
2649 s->cond->tok = next ();
2650 }
2651 else
2652 {
2653 s->cond = parse_expression ();
2654 t = next ();
2655 if (! (t->type == tok_operator && t->content == ";"))
2656 throw PARSE_ERROR (_("expected ';'"));
2657 swallow ();
2658 }
2659
2660 // increment + ")"
2661 t = peek ();
2662 if (t && t->type == tok_operator && t->content == ")")
2663 {
2664 s->incr = 0;
2665 swallow ();
2666 }
2667 else
2668 {
2669 s->incr = parse_expr_statement ();
2670 t = next ();
2671 if (! (t->type == tok_operator && t->content == ")"))
2672 throw PARSE_ERROR (_("expected ')'"));
2673 swallow ();
2674 }
2675
2676 // block
2677 s->block = parse_statement ();
2678
2679 return s;
2680 }
2681
2682
2683 for_loop*
2684 parser::parse_while_loop ()
2685 {
2686 const token* t = next ();
2687 if (! (t->type == tok_keyword && t->content == "while"))
2688 throw PARSE_ERROR (_("expected 'while'"));
2689 for_loop* s = new for_loop;
2690 s->tok = t;
2691
2692 t = next ();
2693 if (! (t->type == tok_operator && t->content == "("))
2694 throw PARSE_ERROR (_("expected '('"));
2695 swallow ();
2696
2697 // dummy init and incr fields
2698 s->init = 0;
2699 s->incr = 0;
2700
2701 // condition
2702 s->cond = parse_expression ();
2703
2704 t = next ();
2705 if (! (t->type == tok_operator && t->content == ")"))
2706 throw PARSE_ERROR (_("expected ')'"));
2707 swallow ();
2708
2709 // block
2710 s->block = parse_statement ();
2711
2712 return s;
2713 }
2714
2715
2716 foreach_loop*
2717 parser::parse_foreach_loop ()
2718 {
2719 const token* t = next ();
2720 if (! (t->type == tok_keyword && t->content == "foreach"))
2721 throw PARSE_ERROR (_("expected 'foreach'"));
2722 foreach_loop* s = new foreach_loop;
2723 s->tok = t;
2724 s->sort_direction = 0;
2725 s->sort_aggr = sc_none;
2726 s->value = NULL;
2727 s->limit = NULL;
2728
2729 t = next ();
2730 if (! (t->type == tok_operator && t->content == "("))
2731 throw PARSE_ERROR (_("expected '('"));
2732 swallow ();
2733
2734 symbol* lookahead_sym = NULL;
2735 int lookahead_sort = 0;
2736
2737 t = peek ();
2738 if (t && t->type == tok_identifier)
2739 {
2740 next ();
2741 lookahead_sym = new symbol;
2742 lookahead_sym->tok = t;
2743 lookahead_sym->name = t->content;
2744
2745 t = peek ();
2746 if (t && t->type == tok_operator &&
2747 (t->content == "+" || t->content == "-"))
2748 {
2749 lookahead_sort = (t->content == "+") ? 1 : -1;
2750 swallow ();
2751 }
2752
2753 t = peek ();
2754 if (t && t->type == tok_operator && t->content == "=")
2755 {
2756 swallow ();
2757 s->value = lookahead_sym;
2758 if (lookahead_sort)
2759 {
2760 s->sort_direction = lookahead_sort;
2761 s->sort_column = 0;
2762 }
2763 lookahead_sym = NULL;
2764 }
2765 }
2766
2767 // see also parse_array_in
2768
2769 bool parenthesized = false;
2770 t = peek ();
2771 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
2772 {
2773 swallow ();
2774 parenthesized = true;
2775 }
2776
2777 if (lookahead_sym)
2778 {
2779 s->indexes.push_back (lookahead_sym);
2780 if (lookahead_sort)
2781 {
2782 s->sort_direction = lookahead_sort;
2783 s->sort_column = 1;
2784 }
2785 lookahead_sym = NULL;
2786 }
2787 else while (1)
2788 {
2789 t = next ();
2790 if (! (t->type == tok_identifier))
2791 throw PARSE_ERROR (_("expected identifier"));
2792 symbol* sym = new symbol;
2793 sym->tok = t;
2794 sym->name = t->content;
2795 s->indexes.push_back (sym);
2796
2797 t = peek ();
2798 if (t && t->type == tok_operator &&
2799 (t->content == "+" || t->content == "-"))
2800 {
2801 if (s->sort_direction)
2802 throw PARSE_ERROR (_("multiple sort directives"));
2803 s->sort_direction = (t->content == "+") ? 1 : -1;
2804 s->sort_column = s->indexes.size();
2805 swallow ();
2806 }
2807
2808 if (parenthesized)
2809 {
2810 t = peek ();
2811 if (t && t->type == tok_operator && t->content == ",")
2812 {
2813 swallow ();
2814 continue;
2815 }
2816 else if (t && t->type == tok_operator && t->content == "]")
2817 {
2818 swallow ();
2819 break;
2820 }
2821 else
2822 throw PARSE_ERROR (_("expected ',' or ']'"));
2823 }
2824 else
2825 break; // expecting only one expression
2826 }
2827
2828 t = next ();
2829 if (! (t->type == tok_keyword && t->content == "in"))
2830 throw PARSE_ERROR (_("expected 'in'"));
2831 swallow ();
2832
2833 s->base = parse_indexable();
2834
2835 // check for atword, see also expect_ident_or_atword,
2836 t = peek ();
2837 if (t && t->type == tok_operator && t->content[0] == '@')
2838 {
2839 if (t->content == "@avg") s->sort_aggr = sc_average;
2840 else if (t->content == "@min") s->sort_aggr = sc_min;
2841 else if (t->content == "@max") s->sort_aggr = sc_max;
2842 else if (t->content == "@count") s->sort_aggr = sc_count;
2843 else if (t->content == "@sum") s->sort_aggr = sc_sum;
2844 else throw PARSE_ERROR(_("expected statistical operation"));
2845 swallow();
2846
2847 t = peek ();
2848 if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
2849 throw PARSE_ERROR(_("expected sort directive"));
2850 }
2851
2852 t = peek ();
2853 if (t && t->type == tok_operator &&
2854 (t->content == "+" || t->content == "-"))
2855 {
2856 if (s->sort_direction)
2857 throw PARSE_ERROR (_("multiple sort directives"));
2858 s->sort_direction = (t->content == "+") ? 1 : -1;
2859 s->sort_column = 0;
2860 swallow ();
2861 }
2862
2863 t = peek ();
2864 if (tok_is(t, tok_keyword, "limit"))
2865 {
2866 swallow (); // get past the "limit"
2867 s->limit = parse_expression ();
2868 }
2869
2870 t = next ();
2871 if (! (t->type == tok_operator && t->content == ")"))
2872 throw PARSE_ERROR ("expected ')'");
2873 swallow ();
2874
2875 s->block = parse_statement ();
2876 return s;
2877 }
2878
2879
2880 expression*
2881 parser::parse_expression ()
2882 {
2883 return parse_assignment ();
2884 }
2885
2886
2887 expression*
2888 parser::parse_assignment ()
2889 {
2890 expression* op1 = parse_ternary ();
2891
2892 const token* t = peek ();
2893 // right-associative operators
2894 if (t && t->type == tok_operator
2895 && (t->content == "=" ||
2896 t->content == "<<<" ||
2897 t->content == "+=" ||
2898 t->content == "-=" ||
2899 t->content == "*=" ||
2900 t->content == "/=" ||
2901 t->content == "%=" ||
2902 t->content == "<<=" ||
2903 t->content == ">>=" ||
2904 t->content == "&=" ||
2905 t->content == "^=" ||
2906 t->content == "|=" ||
2907 t->content == ".=" ||
2908 false))
2909 {
2910 // NB: lvalueness is checked during elaboration / translation
2911 assignment* e = new assignment;
2912 e->left = op1;
2913 e->op = t->content;
2914 e->tok = t;
2915 next ();
2916 e->right = parse_expression ();
2917 op1 = e;
2918 }
2919
2920 return op1;
2921 }
2922
2923
2924 expression*
2925 parser::parse_ternary ()
2926 {
2927 expression* op1 = parse_logical_or ();
2928
2929 const token* t = peek ();
2930 if (t && t->type == tok_operator && t->content == "?")
2931 {
2932 ternary_expression* e = new ternary_expression;
2933 e->tok = t;
2934 e->cond = op1;
2935 next ();
2936 e->truevalue = parse_expression (); // XXX
2937
2938 t = next ();
2939 if (! (t->type == tok_operator && t->content == ":"))
2940 throw PARSE_ERROR (_("expected ':'"));
2941 swallow ();
2942
2943 e->falsevalue = parse_expression (); // XXX
2944 return e;
2945 }
2946 else
2947 return op1;
2948 }
2949
2950
2951 expression*
2952 parser::parse_logical_or ()
2953 {
2954 expression* op1 = parse_logical_and ();
2955
2956 const token* t = peek ();
2957 while (t && t->type == tok_operator && t->content == "||")
2958 {
2959 logical_or_expr* e = new logical_or_expr;
2960 e->tok = t;
2961 e->op = t->content;
2962 e->left = op1;
2963 next ();
2964 e->right = parse_logical_and ();
2965 op1 = e;
2966 t = peek ();
2967 }
2968
2969 return op1;
2970 }
2971
2972
2973 expression*
2974 parser::parse_logical_and ()
2975 {
2976 expression* op1 = parse_boolean_or ();
2977
2978 const token* t = peek ();
2979 while (t && t->type == tok_operator && t->content == "&&")
2980 {
2981 logical_and_expr *e = new logical_and_expr;
2982 e->left = op1;
2983 e->op = t->content;
2984 e->tok = t;
2985 next ();
2986 e->right = parse_boolean_or ();
2987 op1 = e;
2988 t = peek ();
2989 }
2990
2991 return op1;
2992 }
2993
2994
2995 expression*
2996 parser::parse_boolean_or ()
2997 {
2998 expression* op1 = parse_boolean_xor ();
2999
3000 const token* t = peek ();
3001 while (t && t->type == tok_operator && t->content == "|")
3002 {
3003 binary_expression* e = new binary_expression;
3004 e->left = op1;
3005 e->op = t->content;
3006 e->tok = t;
3007 next ();
3008 e->right = parse_boolean_xor ();
3009 op1 = e;
3010 t = peek ();
3011 }
3012
3013 return op1;
3014 }
3015
3016
3017 expression*
3018 parser::parse_boolean_xor ()
3019 {
3020 expression* op1 = parse_boolean_and ();
3021
3022 const token* t = peek ();
3023 while (t && t->type == tok_operator && t->content == "^")
3024 {
3025 binary_expression* e = new binary_expression;
3026 e->left = op1;
3027 e->op = t->content;
3028 e->tok = t;
3029 next ();
3030 e->right = parse_boolean_and ();
3031 op1 = e;
3032 t = peek ();
3033 }
3034
3035 return op1;
3036 }
3037
3038
3039 expression*
3040 parser::parse_boolean_and ()
3041 {
3042 expression* op1 = parse_array_in ();
3043
3044 const token* t = peek ();
3045 while (t && t->type == tok_operator && t->content == "&")
3046 {
3047 binary_expression* e = new binary_expression;
3048 e->left = op1;
3049 e->op = t->content;
3050 e->tok = t;
3051 next ();
3052 e->right = parse_array_in ();
3053 op1 = e;
3054 t = peek ();
3055 }
3056
3057 return op1;
3058 }
3059
3060
3061 expression*
3062 parser::parse_array_in ()
3063 {
3064 // This is a very tricky case. All these are legit expressions:
3065 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
3066 vector<expression*> indexes;
3067 bool parenthesized = false;
3068
3069 const token* t = peek ();
3070 if (t && t->type == tok_operator && t->content == "[")
3071 {
3072 swallow ();
3073 parenthesized = true;
3074 }
3075
3076 while (1)
3077 {
3078 expression* op1 = parse_comparison_or_regex_query ();
3079 indexes.push_back (op1);
3080
3081 if (parenthesized)
3082 {
3083 const token* t = peek ();
3084 if (t && t->type == tok_operator && t->content == ",")
3085 {
3086 swallow ();
3087 continue;
3088 }
3089 else if (t && t->type == tok_operator && t->content == "]")
3090 {
3091 swallow ();
3092 break;
3093 }
3094 else
3095 throw PARSE_ERROR (_("expected ',' or ']'"));
3096 }
3097 else
3098 break; // expecting only one expression
3099 }
3100
3101 t = peek ();
3102 if (t && t->type == tok_keyword && t->content == "in")
3103 {
3104 array_in *e = new array_in;
3105 e->tok = t;
3106 next ();
3107
3108 arrayindex* a = new arrayindex;
3109 a->indexes = indexes;
3110 a->base = parse_indexable();
3111 a->tok = a->base->tok;
3112 e->operand = a;
3113 return e;
3114 }
3115 else if (indexes.size() == 1) // no "in" - need one expression only
3116 return indexes[0];
3117 else
3118 throw PARSE_ERROR (_("unexpected comma-separated expression list"));
3119 }
3120
3121
3122 expression*
3123 parser::parse_comparison_or_regex_query ()
3124 {
3125 expression* op1 = parse_shift ();
3126
3127 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
3128 const token *t = peek();
3129 if (t && t->type == tok_operator
3130 && (t->content == "=~" ||
3131 t->content == "!~"))
3132 {
3133 regex_query* r = new regex_query;
3134 r->left = op1;
3135 r->op = t->content;
3136 r->tok = t;
3137 next ();
3138 r->right = parse_literal_string();
3139 op1 = r;
3140 t = peek ();
3141 }
3142 else while (t && t->type == tok_operator
3143 && (t->content == ">" ||
3144 t->content == "<" ||
3145 t->content == "==" ||
3146 t->content == "!=" ||
3147 t->content == "<=" ||
3148 t->content == ">="))
3149 {
3150 comparison* e = new comparison;
3151 e->left = op1;
3152 e->op = t->content;
3153 e->tok = t;
3154 next ();
3155 e->right = parse_shift ();
3156 op1 = e;
3157 t = peek ();
3158 }
3159
3160 return op1;
3161 }
3162
3163
3164 expression*
3165 parser::parse_shift ()
3166 {
3167 expression* op1 = parse_concatenation ();
3168
3169 const token* t = peek ();
3170 while (t && t->type == tok_operator &&
3171 (t->content == "<<" || t->content == ">>"))
3172 {
3173 binary_expression* e = new binary_expression;
3174 e->left = op1;
3175 e->op = t->content;
3176 e->tok = t;
3177 next ();
3178 e->right = parse_concatenation ();
3179 op1 = e;
3180 t = peek ();
3181 }
3182
3183 return op1;
3184 }
3185
3186
3187 expression*
3188 parser::parse_concatenation ()
3189 {
3190 expression* op1 = parse_additive ();
3191
3192 const token* t = peek ();
3193 // XXX: the actual awk string-concatenation operator is *whitespace*.
3194 // I don't know how to easily to model that here.
3195 while (t && t->type == tok_operator && t->content == ".")
3196 {
3197 concatenation* e = new concatenation;
3198 e->left = op1;
3199 e->op = t->content;
3200 e->tok = t;
3201 next ();
3202 e->right = parse_additive ();
3203 op1 = e;
3204 t = peek ();
3205 }
3206
3207 return op1;
3208 }
3209
3210
3211 expression*
3212 parser::parse_additive ()
3213 {
3214 expression* op1 = parse_multiplicative ();
3215
3216 const token* t = peek ();
3217 while (t && t->type == tok_operator
3218 && (t->content == "+" || t->content == "-"))
3219 {
3220 binary_expression* e = new binary_expression;
3221 e->op = t->content;
3222 e->left = op1;
3223 e->tok = t;
3224 next ();
3225 e->right = parse_multiplicative ();
3226 op1 = e;
3227 t = peek ();
3228 }
3229
3230 return op1;
3231 }
3232
3233
3234 expression*
3235 parser::parse_multiplicative ()
3236 {
3237 expression* op1 = parse_unary ();
3238
3239 const token* t = peek ();
3240 while (t && t->type == tok_operator
3241 && (t->content == "*" || t->content == "/" || t->content == "%"))
3242 {
3243 binary_expression* e = new binary_expression;
3244 e->op = t->content;
3245 e->left = op1;
3246 e->tok = t;
3247 next ();
3248 e->right = parse_unary ();
3249 op1 = e;
3250 t = peek ();
3251 }
3252
3253 return op1;
3254 }
3255
3256
3257 expression*
3258 parser::parse_unary ()
3259 {
3260 const token* t = peek ();
3261 if (t && t->type == tok_operator
3262 && (t->content == "+" ||
3263 t->content == "-" ||
3264 t->content == "!" ||
3265 t->content == "~" ||
3266 false))
3267 {
3268 unary_expression* e = new unary_expression;
3269 e->op = t->content;
3270 e->tok = t;
3271 next ();
3272 e->operand = parse_unary ();
3273 return e;
3274 }
3275 else
3276 return parse_crement ();
3277 }
3278
3279
3280 expression*
3281 parser::parse_crement () // as in "increment" / "decrement"
3282 {
3283 // NB: Ideally, we'd parse only a symbol as an operand to the
3284 // *crement operators, instead of a general expression value. We'd
3285 // need more complex lookahead code to tell apart the postfix cases.
3286 // So we just punt, and leave it to pass-3 to signal errors on
3287 // cases like "4++".
3288
3289 const token* t = peek ();
3290 if (t && t->type == tok_operator
3291 && (t->content == "++" || t->content == "--"))
3292 {
3293 pre_crement* e = new pre_crement;
3294 e->op = t->content;
3295 e->tok = t;
3296 next ();
3297 e->operand = parse_value ();
3298 return e;
3299 }
3300
3301 // post-crement or non-crement
3302 expression *op1 = parse_value ();
3303
3304 t = peek ();
3305 if (t && t->type == tok_operator
3306 && (t->content == "++" || t->content == "--"))
3307 {
3308 post_crement* e = new post_crement;
3309 e->op = t->content;
3310 e->tok = t;
3311 next ();
3312 e->operand = op1;
3313 return e;
3314 }
3315 else
3316 return op1;
3317 }
3318
3319
3320 expression*
3321 parser::parse_value ()
3322 {
3323 const token* t = peek ();
3324 if (! t)
3325 throw PARSE_ERROR (_("expected value"));
3326
3327 if (t->type == tok_embedded)
3328 {
3329 if (! privileged)
3330 throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
3331
3332 embedded_expr *e = new embedded_expr;
3333 e->tok = t;
3334 e->code = t->content;
3335 next ();
3336 return e;
3337 }
3338
3339 if (t->type == tok_operator && t->content == "(")
3340 {
3341 swallow ();
3342 expression* e = parse_expression ();
3343 t = next ();
3344 if (! (t->type == tok_operator && t->content == ")"))
3345 throw PARSE_ERROR (_("expected ')'"));
3346 swallow ();
3347 return e;
3348 }
3349 else if (t->type == tok_operator && t->content == "&")
3350 {
3351 next (); // Cannot swallow, passing token on...
3352 return parse_target_symbol (t);
3353 }
3354 else if (t->type == tok_identifier
3355 || (t->type == tok_operator && t->content[0] == '@'))
3356 return parse_symbol ();
3357 else
3358 return parse_literal ();
3359 }
3360
3361
3362 const token *
3363 parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
3364 {
3365 hop = NULL;
3366 const token* t = expect_ident_or_atword (name);
3367 if (name == "@hist_linear" || name == "@hist_log")
3368 {
3369 hop = new hist_op;
3370 if (name == "@hist_linear")
3371 hop->htype = hist_linear;
3372 else if (name == "@hist_log")
3373 hop->htype = hist_log;
3374 hop->tok = t;
3375 expect_op("(");
3376 hop->stat = parse_expression ();
3377 int64_t tnum;
3378 if (hop->htype == hist_linear)
3379 {
3380 for (size_t i = 0; i < 3; ++i)
3381 {
3382 expect_op (",");
3383 expect_number (tnum);
3384 hop->params.push_back (tnum);
3385 }
3386 }
3387 expect_op(")");
3388 }
3389 return t;
3390 }
3391
3392
3393 indexable*
3394 parser::parse_indexable ()
3395 {
3396 hist_op *hop = NULL;
3397 string name;
3398 const token *tok = parse_hist_op_or_bare_name(hop, name);
3399 if (hop)
3400 return hop;
3401 else
3402 {
3403 symbol* sym = new symbol;
3404 sym->name = name;
3405 sym->tok = tok;
3406 return sym;
3407 }
3408 }
3409
3410
3411 // var, indexable[index], func(parms), printf("...", ...), $var,r
3412 // @cast, @defined, @entry, @var, $var->member, @stat_op(stat)
3413 expression* parser::parse_symbol ()
3414 {
3415 hist_op *hop = NULL;
3416 symbol *sym = NULL;
3417 string name;
3418 const token *t = parse_hist_op_or_bare_name(hop, name);
3419
3420 if (!hop)
3421 {
3422 // If we didn't get a hist_op, then we did get an identifier. We can
3423 // now scrutinize this identifier for the various magic forms of identifier
3424 // (printf, @stat_op, and $var...)
3425
3426 if (name == "@cast"
3427 || name == "@var"
3428 || (name.size() > 0 && name[0] == '$'))
3429 return parse_target_symbol (t);
3430
3431 // NB: PR11343: @defined() is not incompatible with earlier versions
3432 // of stap, so no need to check session.compatible for 1.2
3433 if (name == "@defined")
3434 return parse_defined_op (t);
3435
3436 if (name == "@entry")
3437 return parse_entry_op (t);
3438
3439 if (name == "@perf")
3440 return parse_perf_op (t);
3441
3442 if (name.size() > 0 && name[0] == '@')
3443 {
3444 stat_op *sop = new stat_op;
3445 if (name == "@avg")
3446 sop->ctype = sc_average;
3447 else if (name == "@count")
3448 sop->ctype = sc_count;
3449 else if (name == "@sum")
3450 sop->ctype = sc_sum;
3451 else if (name == "@min")
3452 sop->ctype = sc_min;
3453 else if (name == "@max")
3454 sop->ctype = sc_max;
3455 else
3456 throw PARSE_ERROR(_("unknown operator ") + name);
3457 expect_op("(");
3458 sop->tok = t;
3459 sop->stat = parse_expression ();
3460 expect_op(")");
3461 return sop;
3462 }
3463
3464 else if (print_format *fmt = print_format::create(t))
3465 {
3466 expect_op("(");
3467 if ((name == "print" || name == "println" ||
3468 name == "sprint" || name == "sprintln") &&
3469 (peek_op("@hist_linear") || peek_op("@hist_log")))
3470 {
3471 // We have a special case where we recognize
3472 // print(@hist_foo(bar)) as a magic print-the-histogram
3473 // construct. This is sort of gross but it avoids
3474 // promoting histogram references to typeful
3475 // expressions.
3476
3477 hop = NULL;
3478 t = parse_hist_op_or_bare_name(hop, name);
3479 assert(hop);
3480
3481 // It is, sadly, possible that even while parsing a
3482 // hist_op, we *mis-guessed* and the user wishes to
3483 // print(@hist_op(foo)[bucket]), a scalar. In that case
3484 // we must parse the arrayindex and print an expression.
3485 //
3486 // XXX: This still fails if the arrayindex is part of a
3487 // larger expression. To really handle everything, we'd
3488 // need to push back all the hist tokens start over.
3489
3490 if (!peek_op ("["))
3491 fmt->hist = hop;
3492 else
3493 {
3494 // This is simplified version of the
3495 // multi-array-index parser below, because we can
3496 // only ever have one index on a histogram anyways.
3497 expect_op("[");
3498 struct arrayindex* ai = new arrayindex;
3499 ai->tok = t;
3500 ai->base = hop;
3501 ai->indexes.push_back (parse_expression ());
3502 expect_op("]");
3503 fmt->args.push_back(ai);
3504
3505 // Consume any subsequent arguments.
3506 while (!peek_op (")"))
3507 {
3508 expect_op(",");
3509 expression *e = parse_expression ();
3510 fmt->args.push_back(e);
3511 }
3512 }
3513 }
3514 else
3515 {
3516 int min_args = 0;
3517 bool consumed_arg = false;
3518 if (fmt->print_with_format)
3519 {
3520 // Consume and convert a format string. Agreement between the
3521 // format string and the arguments is postponed to the
3522 // typechecking phase.
3523 string tmp;
3524 expect_unknown (tok_string, tmp);
3525 fmt->raw_components = tmp;
3526 fmt->components = print_format::string_to_components (tmp);
3527 consumed_arg = true;
3528 }
3529 else if (fmt->print_with_delim)
3530 {
3531 // Consume a delimiter to separate arguments.
3532 fmt->delimiter.clear();
3533 fmt->delimiter.type = print_format::conv_literal;
3534 expect_unknown (tok_string, fmt->delimiter.literal_string);
3535 consumed_arg = true;
3536 min_args = 2; // so that the delim is used at least once
3537 }
3538 else if (!fmt->print_with_newline)
3539 {
3540 // If we are not printing with a format string, nor with a
3541 // delim, nor with a newline, then it's either print() or
3542 // sprint(), both of which require at least one argument (of
3543 // any type).
3544 min_args = 1;
3545 }
3546
3547 // Consume any subsequent arguments.
3548 while (min_args || !peek_op (")"))
3549 {
3550 if (consumed_arg)
3551 expect_op(",");
3552 expression *e = parse_expression ();
3553 fmt->args.push_back(e);
3554 consumed_arg = true;
3555 if (min_args)
3556 --min_args;
3557 }
3558 }
3559 expect_op(")");
3560 return fmt;
3561 }
3562
3563 else if (peek_op ("(")) // function call
3564 {
3565 swallow ();
3566 struct functioncall* f = new functioncall;
3567 f->tok = t;
3568 f->function = name;
3569 // Allow empty actual parameter list
3570 if (peek_op (")"))
3571 {
3572 swallow ();
3573 return f;
3574 }
3575 while (1)
3576 {
3577 f->args.push_back (parse_expression ());
3578 if (peek_op (")"))
3579 {
3580 swallow ();
3581 break;
3582 }
3583 else if (peek_op (","))
3584 {
3585 swallow ();
3586 continue;
3587 }
3588 else
3589 throw PARSE_ERROR (_("expected ',' or ')'"));
3590 }
3591 return f;
3592 }
3593
3594 else
3595 {
3596 sym = new symbol;
3597 sym->name = name;
3598 sym->tok = t;
3599 }
3600 }
3601
3602 // By now, either we had a hist_op in the first place, or else
3603 // we had a plain word and it was converted to a symbol.
3604
3605 assert (!hop != !sym); // logical XOR
3606
3607 // All that remains is to check for array indexing
3608
3609 if (peek_op ("[")) // array
3610 {
3611 swallow ();
3612 struct arrayindex* ai = new arrayindex;
3613 ai->tok = t;
3614
3615 if (hop)
3616 ai->base = hop;
3617 else
3618 ai->base = sym;
3619
3620 while (1)
3621 {
3622 ai->indexes.push_back (parse_expression ());
3623 if (peek_op ("]"))
3624 {
3625 swallow ();
3626 break;
3627 }
3628 else if (peek_op (","))
3629 {
3630 swallow ();
3631 continue;
3632 }
3633 else
3634 throw PARSE_ERROR (_("expected ',' or ']'"));
3635 }
3636 return ai;
3637 }
3638
3639 // If we got to here, we *should* have a symbol; if we have
3640 // a hist_op on its own, it doesn't count as an expression,
3641 // so we throw a parse error.
3642
3643 if (hop)
3644 throw PARSE_ERROR(_("base histogram operator where expression expected"), t);
3645
3646 return sym;
3647 }
3648
3649 // Parse a @cast or $var. Given head token has already been consumed.
3650 target_symbol* parser::parse_target_symbol (const token* t)
3651 {
3652 bool addressof = false;
3653 if (t->type == tok_operator && t->content == "&")
3654 {
3655 addressof = true;
3656 // Don't delete t before trying next token.
3657 // We might need it in the error message when there is no next token.
3658 const token *next_t = next ();
3659 delete t;
3660 t = next_t;
3661 }
3662
3663 if (t->type == tok_operator && t->content == "@cast")
3664 {
3665 cast_op *cop = new cast_op;
3666 cop->tok = t;
3667 cop->name = t->content;
3668 expect_op("(");
3669 cop->operand = parse_expression ();
3670 expect_op(",");
3671 expect_unknown(tok_string, cop->type_name);
3672 if (peek_op (","))
3673 {
3674 swallow ();
3675 expect_unknown(tok_string, cop->module);
3676 }
3677 expect_op(")");
3678 parse_target_symbol_components(cop);
3679 cop->addressof = addressof;
3680 return cop;
3681 }
3682
3683 if (t->type == tok_identifier && t->content[0]=='$')
3684 {
3685 // target_symbol time
3686 target_symbol *tsym = new target_symbol;
3687 tsym->tok = t;
3688 tsym->name = t->content;
3689 parse_target_symbol_components(tsym);
3690 tsym->addressof = addressof;
3691 return tsym;
3692 }
3693
3694 if (t->type == tok_operator && t->content == "@var")
3695 {
3696 atvar_op *aop = new atvar_op;
3697 aop->tok = t;
3698 aop->name = t->content;
3699 expect_op("(");
3700 expect_unknown(tok_string, aop->target_name);
3701 size_t found_at = aop->target_name.find("@");
3702 if (found_at != string::npos)
3703 aop->cu_name = aop->target_name.substr(found_at + 1);
3704 else
3705 aop->cu_name = "";
3706 if (peek_op (","))
3707 {
3708 swallow ();
3709 expect_unknown (tok_string, aop->module);
3710 }
3711 else
3712 aop->module = "";
3713 expect_op(")");
3714 parse_target_symbol_components(aop);
3715 aop->addressof = addressof;
3716 return aop;
3717 }
3718
3719 throw PARSE_ERROR (_("expected @cast, @var or $var"));
3720 }
3721
3722
3723 // Parse a @defined(). Given head token has already been consumed.
3724 expression* parser::parse_defined_op (const token* t)
3725 {
3726 defined_op* dop = new defined_op;
3727 dop->tok = t;
3728 expect_op("(");
3729 // no need for parse_hist_op... etc., as @defined takes only target_symbols as its operand.
3730 const token* tt = next ();
3731 dop->operand = parse_target_symbol (tt);
3732 expect_op(")");
3733 return dop;
3734 }
3735
3736
3737 // Parse a @entry(). Given head token has already been consumed.
3738 expression* parser::parse_entry_op (const token* t)
3739 {
3740 entry_op* eop = new entry_op;
3741 eop->tok = t;
3742 expect_op("(");
3743 eop->operand = parse_expression ();
3744 expect_op(")");
3745 return eop;
3746 }
3747
3748
3749 // Parse a @perf(). Given head token has already been consumed.
3750 expression* parser::parse_perf_op (const token* t)
3751 {
3752 perf_op* pop = new perf_op;
3753 pop->tok = t;
3754 expect_op("(");
3755 pop->operand = parse_literal_string ();
3756 if (pop->operand->value == "")
3757 throw PARSE_ERROR (_("expected non-empty string"));
3758 expect_op(")");
3759 return pop;
3760 }
3761
3762
3763
3764 void
3765 parser::parse_target_symbol_components (target_symbol* e)
3766 {
3767 bool pprint = false;
3768
3769 // check for pretty-print in the form $foo$
3770 string &base = e->name;
3771 size_t pprint_pos = base.find_last_not_of('$');
3772 if (0 < pprint_pos && pprint_pos < base.length() - 1)
3773 {
3774 string pprint_val = base.substr(pprint_pos + 1);
3775 base.erase(pprint_pos + 1);
3776 e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
3777 pprint = true;
3778 }
3779
3780 while (!pprint)
3781 {
3782 if (peek_op ("->"))
3783 {
3784 const token* t = next();
3785 string member;
3786 expect_ident_or_keyword (member);
3787
3788 // check for pretty-print in the form $foo->$ or $foo->bar$
3789 pprint_pos = member.find_last_not_of('$');
3790 string pprint_val;
3791 if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
3792 {
3793 pprint_val = member.substr(pprint_pos + 1);
3794 member.erase(pprint_pos + 1);
3795 pprint = true;
3796 }
3797
3798 if (!member.empty())
3799 e->components.push_back (target_symbol::component(t, member));
3800 if (pprint)
3801 e->components.push_back (target_symbol::component(t, pprint_val, true));
3802 }
3803 else if (peek_op ("["))
3804 {
3805 const token* t = next();
3806 expression* index = parse_expression();
3807 literal_number* ln = dynamic_cast<literal_number*>(index);
3808 if (ln)
3809 e->components.push_back (target_symbol::component(t, ln->value));
3810 else
3811 e->components.push_back (target_symbol::component(t, index));
3812 expect_op ("]");
3813 }
3814 else
3815 break;
3816 }
3817
3818 if (!pprint)
3819 {
3820 // check for pretty-print in the form $foo $
3821 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
3822 const token* t = peek();
3823 if (t != NULL && t->type == tok_identifier &&
3824 t->content.find_first_not_of('$') == string::npos)
3825 {
3826 t = next();
3827 e->components.push_back (target_symbol::component(t, t->content, true));
3828 pprint = true;
3829 }
3830 }
3831
3832 if (pprint && (peek_op ("->") || peek_op("[")))
3833 throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
3834 }
3835
3836 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.217598 seconds and 5 git commands to generate.