1 // recursive descent parser for systemtap scripts
2 // Copyright (C) 2005-2014 Red Hat Inc.
3 // Copyright (C) 2006 Intel Corporation.
4 // Copyright (C) 2007 Bull S.A.S
5 // Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
7 // This file is part of systemtap, and is free software. You can
8 // redistribute it and/or modify it under the terms of the GNU General
9 // Public License (GPL); either version 2, or (at your option) any
41 bool ate_comment
; // current token follows a comment
42 bool ate_whitespace
; // the most recent token followed whitespace
43 bool saw_tokens
; // the lexer found tokens (before preprocessing occurred)
46 lexer (istream
&, const string
&, systemtap_session
&);
47 void set_current_file (stapfile
* f
);
48 void set_current_token_chain (const token
* tok
);
50 static set
<string
> keywords
;
51 static set
<string
> atwords
;
53 inline int input_get ();
54 inline int input_peek (unsigned n
=0);
55 void input_put (const string
&, const token
*);
57 string input_contents
;
58 const char *input_pointer
; // index into input_contents
59 const char *input_end
;
60 unsigned cursor_suspend_count
;
61 unsigned cursor_suspend_line
;
62 unsigned cursor_suspend_column
;
64 unsigned cursor_column
;
65 systemtap_session
& session
;
66 stapfile
* current_file
;
67 const token
* current_token_chain
;
74 parser (systemtap_session
& s
, const string
& n
, istream
& i
, bool p
);
77 stapfile
* parse (bool errs_as_warnings
);
78 probe
* parse_synthetic_probe (const token
* chain
, bool errs_as_warnings
);
79 stapfile
* parse_library_macros (bool errs_as_warnings
);
90 struct pp1_activation
;
92 struct pp_macrodecl
: public macrodecl
{
93 pp1_activation
* parent_act
; // used for param bindings
94 virtual bool is_closure() { return parent_act
!= 0; }
95 pp_macrodecl () : macrodecl(), parent_act(0) { }
98 systemtap_session
& session
;
102 parse_context context
;
104 // preprocessing subordinate, first pass (macros)
105 struct pp1_activation
{
107 unsigned cursor
; // position within macro body
108 map
<string
, pp_macrodecl
*> params
;
110 macrodecl
* curr_macro
;
112 pp1_activation (const token
* tok
, macrodecl
* curr_macro
)
113 : tok(tok
), cursor(0), curr_macro(curr_macro
) { }
117 map
<string
, macrodecl
*> pp1_namespace
;
118 vector
<pp1_activation
*> pp1_state
;
119 const token
* next_pp1 ();
120 const token
* scan_pp1 ();
121 const token
* slurp_pp1_param (vector
<const token
*>& param
);
122 const token
* slurp_pp1_body (vector
<const token
*>& body
);
124 // preprocessing subordinate, final pass (conditionals)
125 vector
<pair
<const token
*, pp_state_t
> > pp_state
;
126 const token
* scan_pp ();
127 const token
* skip_pp ();
130 const token
* next ();
131 const token
* peek ();
133 // Advance past and throw away current token after peek () or next ().
136 const token
* systemtap_v_seen
;
137 const token
* last_t
; // the last value returned by peek() or next()
138 const token
* next_t
; // lookahead token
140 // expectations, these swallow the token
141 void expect_known (token_type tt
, string
const & expected
);
142 void expect_unknown (token_type tt
, string
& target
);
143 void expect_unknown2 (token_type tt1
, token_type tt2
, string
& target
);
145 // convenience forms, these also swallow the token
146 void expect_op (string
const & expected
);
147 void expect_kw (string
const & expected
);
148 void expect_number (int64_t & expected
);
149 void expect_ident_or_keyword (string
& target
);
151 // convenience forms, which return true or false, these don't swallow token
152 bool peek_op (string
const & op
);
153 bool peek_kw (string
const & kw
);
155 // convenience forms, which return the token
156 const token
* expect_kw_token (string
const & expected
);
157 const token
* expect_ident_or_atword (string
& target
);
159 void print_error (const parse_error
& pe
, bool errs_as_warnings
= false);
162 private: // nonterminals
163 void parse_probe (vector
<probe
*>&, vector
<probe_alias
*>&);
164 void parse_global (vector
<vardecl
*>&, vector
<probe
*>&);
165 void parse_functiondecl (vector
<functiondecl
*>&);
166 embeddedcode
* parse_embeddedcode ();
167 probe_point
* parse_probe_point ();
168 literal_string
* consume_string_literals (const token
*);
169 literal_string
* parse_literal_string ();
170 literal
* parse_literal ();
171 block
* parse_stmt_block ();
172 try_block
* parse_try_block ();
173 statement
* parse_statement ();
174 if_statement
* parse_if_statement ();
175 for_loop
* parse_for_loop ();
176 for_loop
* parse_while_loop ();
177 foreach_loop
* parse_foreach_loop ();
178 expr_statement
* parse_expr_statement ();
179 return_statement
* parse_return_statement ();
180 delete_statement
* parse_delete_statement ();
181 next_statement
* parse_next_statement ();
182 break_statement
* parse_break_statement ();
183 continue_statement
* parse_continue_statement ();
184 indexable
* parse_indexable ();
185 const token
*parse_hist_op_or_bare_name (hist_op
*&hop
, string
&name
);
186 target_symbol
*parse_target_symbol (const token
* t
);
187 expression
* parse_entry_op (const token
* t
);
188 expression
* parse_defined_op (const token
* t
);
189 expression
* parse_perf_op (const token
* t
);
190 expression
* parse_expression ();
191 expression
* parse_assignment ();
192 expression
* parse_ternary ();
193 expression
* parse_logical_or ();
194 expression
* parse_logical_and ();
195 expression
* parse_boolean_or ();
196 expression
* parse_boolean_xor ();
197 expression
* parse_boolean_and ();
198 expression
* parse_array_in ();
199 expression
* parse_comparison_or_regex_query ();
200 expression
* parse_shift ();
201 expression
* parse_concatenation ();
202 expression
* parse_additive ();
203 expression
* parse_multiplicative ();
204 expression
* parse_unary ();
205 expression
* parse_crement ();
206 expression
* parse_value ();
207 expression
* parse_symbol ();
209 void parse_target_symbol_components (target_symbol
* e
);
213 // ------------------------------------------------------------------------
216 parse (systemtap_session
& s
, istream
& i
, bool pr
, bool errs_as_warnings
)
218 parser
p (s
, "<input>", i
, pr
);
219 return p
.parse (errs_as_warnings
);
224 parse (systemtap_session
& s
, const string
& name
, bool pr
, bool errs_as_warnings
)
226 ifstream
i(name
.c_str(), ios::in
);
229 cerr
<< (file_exists(name
)
230 ? _F("Input file '%s' can't be opened for reading.", name
.c_str())
231 : _F("Input file '%s' is missing.", name
.c_str()))
236 parser
p (s
, name
, i
, pr
);
237 return p
.parse (errs_as_warnings
);
241 parse_library_macros (systemtap_session
& s
, const string
& name
, bool errs_as_warnings
)
243 ifstream
i(name
.c_str(), ios::in
);
246 cerr
<< (file_exists(name
)
247 ? _F("Input file '%s' can't be opened for reading.", name
.c_str())
248 : _F("Input file '%s' is missing.", name
.c_str()))
253 parser
p (s
, name
, i
, false); // TODOXX pr is ...? should path be full??
254 return p
.parse_library_macros (errs_as_warnings
);
258 parse_synthetic_probe (systemtap_session
&s
, std::istream
& i
, const token
* tok
)
260 parser
p (s
, "<synthetic>", i
, false);
261 return p
.parse_synthetic_probe (tok
, false);
264 // ------------------------------------------------------------------------
267 parser::parser (systemtap_session
& s
, const string
&n
, istream
& i
, bool p
):
268 session (s
), input_name (n
), input (i
, input_name
, s
), privileged (p
),
269 context(con_unknown
), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
278 tt2str(token_type tt
)
282 case tok_junk
: return "junk";
283 case tok_identifier
: return "identifier";
284 case tok_operator
: return "operator";
285 case tok_string
: return "string";
286 case tok_number
: return "number";
287 case tok_embedded
: return "embedded-code";
288 case tok_keyword
: return "keyword";
290 return "unknown token";
294 operator << (ostream
& o
, const source_loc
& loc
)
296 o
<< loc
.file
->name
<< ":"
304 operator << (ostream
& o
, const token
& t
)
308 if (t
.type
!= tok_embedded
&& t
.type
!= tok_keyword
) // XXX: other types?
311 for (unsigned i
=0; i
<t
.content
.length(); i
++)
313 char c
= t
.content
[i
];
314 o
<< (isprint (c
) ? c
: '?');
327 parser::print_error (const parse_error
&pe
, bool errs_as_warnings
)
329 const token
*tok
= pe
.tok
? pe
.tok
: last_t
;
330 session
.print_error(pe
, tok
, input_name
, errs_as_warnings
);
337 template <typename OPERAND
>
338 bool eval_comparison (const OPERAND
& lhs
, const token
* op
, const OPERAND
& rhs
)
340 if (op
->type
== tok_operator
&& op
->content
== "<=")
341 { return lhs
<= rhs
; }
342 else if (op
->type
== tok_operator
&& op
->content
== ">=")
343 { return lhs
>= rhs
; }
344 else if (op
->type
== tok_operator
&& op
->content
== "<")
345 { return lhs
< rhs
; }
346 else if (op
->type
== tok_operator
&& op
->content
== ">")
347 { return lhs
> rhs
; }
348 else if (op
->type
== tok_operator
&& op
->content
== "==")
349 { return lhs
== rhs
; }
350 else if (op
->type
== tok_operator
&& op
->content
== "!=")
351 { return lhs
!= rhs
; }
353 throw PARSE_ERROR (_("expected comparison operator"), op
);
357 // Here, we perform on-the-fly preprocessing in two passes.
359 // First pass - macro declaration and expansion.
361 // The basic form of a declaration is @define SIGNATURE %( BODY %)
362 // where SIGNATURE is of the form macro_name (a, b, c, ...)
363 // and BODY can obtain the parameter contents as @a, @b, @c, ....
364 // Note that parameterless macros can also be declared.
366 // Macro definitions may not be nested.
367 // A macro is available textually after it has been defined.
369 // The basic form of a macro invocation
370 // for a parameterless macro is @macro_name,
371 // for a macro with parameters is @macro_name(param_1, param_2, ...).
373 // NB: this means that a parameterless macro @foo called as @foo(a, b, c)
374 // leaves its 'parameters' alone, rather than consuming them to result
375 // in a "too many parameters error". This may be useful in the unusual
376 // case of wanting @foo to expand to the name of a function.
378 // Invocations of unknown macros are left unexpanded, to allow
379 // the continued use of constructs such as @cast, @var, etc.
381 macrodecl::~macrodecl ()
384 for (vector
<const token
*>::iterator it
= body
.begin();
385 it
!= body
.end(); it
++)
389 parser::pp1_activation::~pp1_activation ()
392 if (curr_macro
->is_closure()) return; // body is shared with an earlier declaration
393 for (map
<string
, pp_macrodecl
*>::iterator it
= params
.begin();
394 it
!= params
.end(); it
++)
398 // Grab a token from the current input source (main file or macro body):
402 if (pp1_state
.empty())
403 return input
.scan ();
405 // otherwise, we're inside a macro
406 pp1_activation
* act
= pp1_state
.back();
407 unsigned& cursor
= act
->cursor
;
408 if (cursor
< act
->curr_macro
->body
.size())
410 token
* t
= new token(*act
->curr_macro
->body
[cursor
]);
411 t
->chain
= new token(*act
->tok
); // mark chained token
416 return 0; // reached end of macro body
424 const token
* t
= next_pp1 ();
425 if (t
== 0) // EOF or end of macro body
427 if (pp1_state
.empty()) // actual EOF
430 // Exit macro and loop around to look for the next token.
431 pp1_activation
* act
= pp1_state
.back();
432 pp1_state
.pop_back(); delete act
;
437 if (t
->type
== tok_operator
&& t
->content
== "@define")
439 if (!pp1_state
.empty())
440 throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t
);
443 // handle macro definition
444 // (1) consume macro signature
446 if (! (t
&& t
->type
== tok_identifier
))
447 throw PARSE_ERROR (_("expected identifier"), t
);
448 string name
= t
->content
;
450 // check for redefinition of existing macro
451 if (pp1_namespace
.find(name
) != pp1_namespace
.end())
453 parse_error
er (ERR_SRC
, _F("attempt to redefine macro '@%s' in the same file", name
.c_str ()), t
);
455 // Also point to pp1_namespace[name]->tok, the site of
456 // the original definition:
457 er
.chain
= new PARSE_ERROR (_F("macro '@%s' first defined here",
458 name
.c_str()), pp1_namespace
[name
]->tok
);
462 // XXX: the above restriction was mostly necessary due to
463 // wanting to leave open the possibility of
464 // statically-scoped semantics in the future.
466 // XXX: this cascades into further parse errors as the
467 // parser tries to parse the remaining definition... (e.g.
468 // it can't tell that the macro body isn't a conditional,
469 // that the uses of parameters aren't nonexistent
471 if (name
== "define")
472 throw PARSE_ERROR (_("attempt to redefine '@define'"), t
);
473 if (input
.atwords
.count("@" + name
))
474 session
.print_warning (_F("macro redefines built-in operator '@%s'", name
.c_str()), t
);
476 macrodecl
* decl
= (pp1_namespace
[name
] = new macrodecl
);
479 // determine if the macro takes parameters
480 bool saw_params
= false;
482 if (t
&& t
->type
== tok_operator
&& t
->content
== "(")
490 if (! (t
&& t
->type
== tok_identifier
))
491 throw PARSE_ERROR(_("expected identifier"), t
);
492 decl
->formal_args
.push_back(t
->content
);
496 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
500 else if (t
&& t
->type
== tok_operator
&& t
->content
== ")")
508 throw PARSE_ERROR (_("expected ',' or ')'"), t
);
514 // (2) identify & consume macro body
515 if (! (t
&& t
->type
== tok_operator
&& t
->content
== "%("))
518 throw PARSE_ERROR (_("expected '%('"), t
);
520 throw PARSE_ERROR (_("expected '%(' or '('"), t
);
524 t
= slurp_pp1_body (decl
->body
);
526 throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl
->tok
);
529 // Now loop around to look for a real token.
533 // (potential) macro invocation
534 if (t
->type
== tok_operator
&& t
->content
[0] == '@')
536 string name
= t
->content
.substr(1); // strip initial '@'
538 // check if name refers to a real parameter or macro
540 pp1_activation
* act
= pp1_state
.empty() ? 0 : pp1_state
.back();
541 if (act
&& act
->params
.find(name
) != act
->params
.end())
542 decl
= act
->params
[name
];
543 else if (!(act
&& act
->curr_macro
->context
== ctx_library
)
544 && pp1_namespace
.find(name
) != pp1_namespace
.end())
545 decl
= pp1_namespace
[name
];
546 else if (session
.library_macros
.find(name
)
547 != session
.library_macros
.end())
548 decl
= session
.library_macros
[name
];
549 else // this is an ordinary @operator
552 // handle macro invocation, taking ownership of t
553 pp1_activation
*new_act
= new pp1_activation(t
, decl
);
554 unsigned num_params
= decl
->formal_args
.size();
556 // (1a) restore parameter invocation closure
557 if (num_params
== 0 && decl
->is_closure())
559 // NB: decl->parent_act is always safe since the
560 // parameter decl (if any) comes from an activation
561 // record which deeper in the stack than new_act.
563 // decl is a macro parameter which must be evaluated in
564 // the context of the original point of invocation:
565 new_act
->params
= ((pp_macrodecl
*)decl
)->parent_act
->params
;
569 // (1b) consume macro parameters (if any)
573 // for simplicity, we do not allow macro constructs here
574 // -- if we did, we'd have to recursively call scan_pp1()
576 if (! (t
&& t
->type
== tok_operator
&& t
->content
== "("))
579 throw PARSE_ERROR (_NF
580 ("expected '(' in invocation of macro '@%s'"
581 " taking %d parameter",
582 "expected '(' in invocation of macro '@%s'"
583 " taking %d parameters",
584 num_params
, name
.c_str(), num_params
), t
);
587 // XXX perhaps parse/count the full number of params,
588 // so we can say "expected x, found y params" on error?
589 for (unsigned i
= 0; i
< num_params
; i
++)
593 // create parameter closure
594 string param_name
= decl
->formal_args
[i
];
595 pp_macrodecl
* p
= (new_act
->params
[param_name
]
597 p
->tok
= new token(*new_act
->tok
);
599 // NB: *new_act->tok points to invocation, act is NULL at top level
601 t
= slurp_pp1_param (p
->body
);
603 // check correct usage of ',' or ')'
604 if (t
== 0) // hit unexpected EOF or end of macro
606 // XXX could we pop the stack and continue parsing
607 // the invocation, allowing macros to construct new
608 // invocations in piecemeal fashion??
609 const token
* orig_t
= new token(*new_act
->tok
);
611 throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t
);
613 if (t
->type
== tok_operator
&& t
->content
== ",")
615 if (i
+ 1 == num_params
)
618 throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name
.c_str(), num_params
), t
);
621 else if (t
->type
== tok_operator
&& t
->content
== ")")
623 if (i
+ 1 != num_params
)
626 throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name
.c_str(), num_params
), t
);
631 // XXX this is, incidentally, impossible
633 throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t
);
639 // (2) set up macro expansion
641 pp1_state
.push_back (new_act
);
643 // Now loop around to look for a real token.
647 // Otherwise, we have an ordinary token.
652 // Consume a single macro invocation's parameters, heeding nested ( )
653 // brackets and stopping on an unbalanced ')' or an unbracketed ','
654 // (and returning the final separator token).
656 parser::slurp_pp1_param (vector
<const token
*>& param
)
659 unsigned nesting
= 0;
666 if (t
->type
== tok_operator
&& t
->content
== "(")
668 else if (nesting
&& t
->type
== tok_operator
&& t
->content
== ")")
670 else if (!nesting
&& t
->type
== tok_operator
671 && (t
->content
== ")" || t
->content
== ","))
676 return t
; // report ")" or "," or NULL
680 // Consume a macro declaration's body, heeding nested %( %) brackets.
682 parser::slurp_pp1_body (vector
<const token
*>& body
)
685 unsigned nesting
= 0;
692 if (t
->type
== tok_operator
&& t
->content
== "%(")
694 else if (nesting
&& t
->type
== tok_operator
&& t
->content
== "%)")
696 else if (!nesting
&& t
->type
== tok_operator
&& t
->content
== "%)")
701 return t
; // report final "%)" or NULL
704 // Used for parsing .stpm files.
706 parser::parse_library_macros (bool errs_as_warnings
)
708 stapfile
* f
= new stapfile
;
709 input
.set_current_file (f
);
713 const token
* t
= scan_pp1 ();
715 // Currently we only take objection to macro invocations if they
716 // produce a non-whitespace token after being expanded.
718 // XXX should we prevent macro invocations even if they expand to empty??
721 throw PARSE_ERROR (_F("library macro file '%s' contains non-@define construct", input_name
.c_str()), t
);
723 // We need to first check whether *any* of the macros are duplicates,
724 // then commit to including the entire file in the global namespace
726 for (map
<string
, macrodecl
*>::iterator it
= pp1_namespace
.begin();
727 it
!= pp1_namespace
.end(); it
++)
729 string name
= it
->first
;
731 if (session
.library_macros
.find(name
) != session
.library_macros
.end())
733 parse_error
er(ERR_SRC
, _F("duplicate definition of library macro '@%s'", name
.c_str()), it
->second
->tok
);
734 er
.chain
= new PARSE_ERROR (_F("macro '@%s' first defined here", name
.c_str()), session
.library_macros
[name
]->tok
);
744 catch (const parse_error
& pe
)
746 print_error (pe
, errs_as_warnings
);
751 // If no errors, include the entire file. Note how this is outside
752 // of the try-catch block -- no errors possible.
753 for (map
<string
, macrodecl
*>::iterator it
= pp1_namespace
.begin();
754 it
!= pp1_namespace
.end(); it
++)
756 string name
= it
->first
;
758 session
.library_macros
[name
] = it
->second
;
759 session
.library_macros
[name
]->context
= ctx_library
;
765 // Second pass - preprocessor conditional expansion.
767 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
768 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
769 // or: arch COMPARISON-OP "arch-string"
770 // or: systemtap_v COMPARISON-OP "version-string"
771 // or: systemtap_privilege COMPARISON-OP "privilege-string"
772 // or: CONFIG_foo COMPARISON-OP "config-string"
773 // or: CONFIG_foo COMPARISON-OP number
774 // or: CONFIG_foo COMPARISON-OP CONFIG_bar
775 // or: "string1" COMPARISON-OP "string2"
776 // or: number1 COMPARISON-OP number2
777 // The %: ELSE-TOKENS part is optional.
779 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
780 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
781 // e.g. %( CONFIG_foo %? "foo" %: "baz" %)
783 // Up to an entire %( ... %) expression is processed by a single call
784 // to this function. Tokens included by any nested conditions are
785 // enqueued in a private vector.
787 bool eval_pp_conditional (systemtap_session
& s
,
788 const token
* l
, const token
* op
, const token
* r
)
790 if (l
->type
== tok_identifier
&& (l
->content
== "kernel_v" ||
791 l
->content
== "kernel_vr" ||
792 l
->content
== "systemtap_v"))
794 if (! (r
->type
== tok_string
))
795 throw PARSE_ERROR (_("expected string literal"), r
);
797 string target_kernel_vr
= s
.kernel_release
;
798 string target_kernel_v
= s
.kernel_base_release
;
801 if (l
->content
== "kernel_v") target
= target_kernel_v
;
802 else if (l
->content
== "kernel_vr") target
= target_kernel_vr
;
803 else if (l
->content
== "systemtap_v") target
= s
.compatible
;
806 string query
= r
->content
;
807 bool rhs_wildcard
= (strpbrk (query
.c_str(), "*?[") != 0);
809 // collect acceptable strverscmp results.
810 int rvc_ok1
, rvc_ok2
;
812 if (op
->type
== tok_operator
&& op
->content
== "<=")
813 { rvc_ok1
= -1; rvc_ok2
= 0; }
814 else if (op
->type
== tok_operator
&& op
->content
== ">=")
815 { rvc_ok1
= 1; rvc_ok2
= 0; }
816 else if (op
->type
== tok_operator
&& op
->content
== "<")
817 { rvc_ok1
= -1; rvc_ok2
= -1; }
818 else if (op
->type
== tok_operator
&& op
->content
== ">")
819 { rvc_ok1
= 1; rvc_ok2
= 1; }
820 else if (op
->type
== tok_operator
&& op
->content
== "==")
821 { rvc_ok1
= 0; rvc_ok2
= 0; wc_ok
= true; }
822 else if (op
->type
== tok_operator
&& op
->content
== "!=")
823 { rvc_ok1
= -1; rvc_ok2
= 1; wc_ok
= true; }
825 throw PARSE_ERROR (_("expected comparison operator"), op
);
827 if ((!wc_ok
) && rhs_wildcard
)
828 throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op
);
832 int rvc_result
= fnmatch (query
.c_str(), target
.c_str(),
833 FNM_NOESCAPE
); // spooky
834 bool badness
= (rvc_result
== 0) ^ (op
->content
== "==");
839 int rvc_result
= strverscmp (target
.c_str(), query
.c_str());
840 // normalize rvc_result
841 if (rvc_result
< 0) rvc_result
= -1;
842 if (rvc_result
> 0) rvc_result
= 1;
843 return (rvc_result
== rvc_ok1
|| rvc_result
== rvc_ok2
);
846 else if (l
->type
== tok_identifier
&& l
->content
== "systemtap_privilege")
848 string target_privilege
=
849 pr_contains(s
.privilege
, pr_stapdev
) ? "stapdev"
850 : pr_contains(s
.privilege
, pr_stapsys
) ? "stapsys"
851 : pr_contains(s
.privilege
, pr_stapusr
) ? "stapusr"
852 : "none"; /* should be impossible -- s.privilege always one of above */
853 assert(target_privilege
!= "none");
855 if (! (r
->type
== tok_string
))
856 throw PARSE_ERROR (_("expected string literal"), r
);
857 string query_privilege
= r
->content
;
859 bool nomatch
= (target_privilege
!= query_privilege
);
862 if (op
->type
== tok_operator
&& op
->content
== "==")
864 else if (op
->type
== tok_operator
&& op
->content
== "!=")
867 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
868 /* XXX perhaps allow <= >= and similar comparisons */
872 else if (l
->type
== tok_identifier
&& l
->content
== "guru_mode")
874 if (! (r
->type
== tok_number
))
875 throw PARSE_ERROR (_("expected number"), r
);
876 int64_t lhs
= (int64_t) s
.guru_mode
;
877 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
878 if (!((rhs
== 0)||(rhs
== 1)))
879 throw PARSE_ERROR (_("expected 0 or 1"), op
);
880 if (!((op
->type
== tok_operator
&& op
->content
== "==") ||
881 (op
->type
== tok_operator
&& op
->content
== "!=")))
882 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
884 return eval_comparison (lhs
, op
, rhs
);
886 else if (l
->type
== tok_identifier
&& l
->content
== "arch")
888 string target_architecture
= s
.architecture
;
889 if (! (r
->type
== tok_string
))
890 throw PARSE_ERROR (_("expected string literal"), r
);
891 string query_architecture
= r
->content
;
893 int nomatch
= fnmatch (query_architecture
.c_str(),
894 target_architecture
.c_str(),
895 FNM_NOESCAPE
); // still spooky
898 if (op
->type
== tok_operator
&& op
->content
== "==")
900 else if (op
->type
== tok_operator
&& op
->content
== "!=")
903 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
907 else if (l
->type
== tok_identifier
&& l
->content
== "runtime")
909 if (! (r
->type
== tok_string
))
910 throw PARSE_ERROR (_("expected string literal"), r
);
912 string query_runtime
= r
->content
;
913 string target_runtime
;
915 target_runtime
= (s
.runtime_mode
== systemtap_session::dyninst_runtime
916 ? "dyninst" : "kernel");
917 int nomatch
= fnmatch (query_runtime
.c_str(),
918 target_runtime
.c_str(),
919 FNM_NOESCAPE
); // still spooky
922 if (op
->type
== tok_operator
&& op
->content
== "==")
924 else if (op
->type
== tok_operator
&& op
->content
== "!=")
927 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
931 else if (l
->type
== tok_identifier
&& startswith(l
->content
, "CONFIG_"))
933 if (r
->type
== tok_string
)
935 string lhs
= s
.kernel_config
[l
->content
]; // may be empty
936 string rhs
= r
->content
;
938 int nomatch
= fnmatch (rhs
.c_str(), lhs
.c_str(), FNM_NOESCAPE
); // still spooky
941 if (op
->type
== tok_operator
&& op
->content
== "==")
943 else if (op
->type
== tok_operator
&& op
->content
== "!=")
946 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
950 else if (r
->type
== tok_number
)
952 const char* startp
= s
.kernel_config
[l
->content
].c_str ();
953 char* endp
= (char*) startp
;
955 int64_t lhs
= (int64_t) strtoll (startp
, & endp
, 0);
956 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0')
957 throw PARSE_ERROR ("Config option value not a number", l
);
959 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
960 return eval_comparison (lhs
, op
, rhs
);
962 else if (r
->type
== tok_identifier
963 && startswith(r
->content
, "CONFIG_"))
965 // First try to convert both to numbers,
966 // otherwise threat both as strings.
967 const char* startp
= s
.kernel_config
[l
->content
].c_str ();
968 char* endp
= (char*) startp
;
970 int64_t val
= (int64_t) strtoll (startp
, & endp
, 0);
971 if (errno
!= ERANGE
&& errno
!= EINVAL
&& *endp
== '\0')
974 startp
= s
.kernel_config
[r
->content
].c_str ();
975 endp
= (char*) startp
;
977 int64_t rhs
= (int64_t) strtoll (startp
, & endp
, 0);
978 if (errno
!= ERANGE
&& errno
!= EINVAL
&& *endp
== '\0')
979 return eval_comparison (lhs
, op
, rhs
);
982 string lhs
= s
.kernel_config
[l
->content
];
983 string rhs
= s
.kernel_config
[r
->content
];
984 return eval_comparison (lhs
, op
, rhs
);
987 throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r
);
989 else if (l
->type
== tok_string
&& r
->type
== tok_string
)
991 string lhs
= l
->content
;
992 string rhs
= r
->content
;
993 return eval_comparison (lhs
, op
, rhs
);
994 // NB: no wildcarding option here
996 else if (l
->type
== tok_number
&& r
->type
== tok_number
)
998 int64_t lhs
= lex_cast
<int64_t>(l
->content
);
999 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
1000 return eval_comparison (lhs
, op
, rhs
);
1001 // NB: no wildcarding option here
1003 else if (l
->type
== tok_string
&& r
->type
== tok_number
1004 && op
->type
== tok_operator
)
1005 throw PARSE_ERROR (_("expected string literal as right value"), r
);
1006 else if (l
->type
== tok_number
&& r
->type
== tok_string
1007 && op
->type
== tok_operator
)
1008 throw PARSE_ERROR (_("expected number literal as right value"), r
);
1011 throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
1012 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1013 " comparison between strings or integers"), l
);
1017 // Only tokens corresponding to the TRUE statement must be expanded
1023 pp_state_t pp
= PP_NONE
;
1024 if (!pp_state
.empty())
1025 pp
= pp_state
.back().second
;
1028 if (pp
== PP_SKIP_THEN
|| pp
== PP_SKIP_ELSE
)
1037 t
= pp_state
.back().first
;
1038 pp_state
.pop_back(); // so skip_some doesn't keep trying to close this
1039 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
1040 throw PARSE_ERROR (_("incomplete conditional at end of file"), t
);
1045 // misplaced preprocessor "then"
1046 if (t
->type
== tok_operator
&& t
->content
== "%?")
1047 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t
);
1049 // preprocessor "else"
1050 if (t
->type
== tok_operator
&& t
->content
== "%:")
1053 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t
);
1054 if (pp
== PP_KEEP_ELSE
|| pp
== PP_SKIP_ELSE
)
1055 throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t
);
1056 // XXX: here and elsewhere, error cascades might be avoided
1057 // by dropping tokens until we reach the closing %)
1059 pp_state
.back().second
= (pp
== PP_KEEP_THEN
) ?
1060 PP_SKIP_ELSE
: PP_KEEP_ELSE
;
1065 // preprocessor close
1066 if (t
->type
== tok_operator
&& t
->content
== "%)")
1069 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t
);
1070 delete pp_state
.back().first
;
1071 delete t
; //this is the closing bracket
1072 pp_state
.pop_back();
1076 if (! (t
->type
== tok_operator
&& t
->content
== "%(")) // ordinary token
1079 // We have a %( - it's time to throw a preprocessing party!
1081 bool result
= false;
1082 bool and_result
= true;
1083 const token
*n
= NULL
;
1085 const token
*l
, *op
, *r
;
1089 if (l
== 0 || op
== 0 || r
== 0)
1090 throw PARSE_ERROR (_("incomplete condition after '%('"), t
);
1091 // NB: consider generalizing to consume all tokens until %?, and
1092 // passing that as a vector to an evaluator.
1094 // Do not evaluate the condition if we haven't expanded everything.
1095 // This may occur when having several recursive conditionals.
1096 and_result
&= eval_pp_conditional (session
, l
, op
, r
);
1097 if(l
->content
=="systemtap_v")
1108 if (n
&& n
->type
== tok_operator
&& n
->content
== "&&")
1110 result
|= and_result
;
1112 if (! (n
&& n
->type
== tok_operator
&& n
->content
== "||"))
1117 clog << "PP eval (" << *t << ") == " << result << endl;
1121 if (! (m
&& m
->type
== tok_operator
&& m
->content
== "%?"))
1122 throw PARSE_ERROR (_("expected '%?' marker for conditional"), t
);
1125 pp
= result
? PP_KEEP_THEN
: PP_SKIP_THEN
;
1126 pp_state
.push_back (make_pair (t
, pp
));
1128 // Now loop around to look for a real token.
1133 // Skip over tokens and any errors, heeding
1134 // only nested preprocessor starts and ends.
1139 unsigned nesting
= 0;
1146 catch (const parse_error
&e
)
1152 if (t
->type
== tok_operator
&& t
->content
== "%(")
1154 else if (nesting
&& t
->type
== tok_operator
&& t
->content
== "%)")
1156 else if (!nesting
&& t
->type
== tok_operator
&&
1157 (t
->content
== "%:" || t
->content
== "%?" || t
->content
== "%)"))
1170 next_t
= scan_pp ();
1172 throw PARSE_ERROR (_("unexpected end-of-file"));
1175 // advance by zeroing next_t
1185 next_t
= scan_pp ();
1187 // don't advance by zeroing next_t
1196 // can only swallow something last peeked or nexted token.
1197 assert (last_t
!= 0);
1199 // advance by zeroing next_t
1200 last_t
= next_t
= 0;
1205 tok_is(token
const * t
, token_type tt
, string
const & expected
)
1207 return t
&& t
->type
== tt
&& t
->content
== expected
;
1212 parser::expect_known (token_type tt
, string
const & expected
)
1214 const token
*t
= next();
1215 if (! (t
&& t
->type
== tt
&& t
->content
== expected
))
1216 throw PARSE_ERROR (_F("expected '%s'", expected
.c_str()));
1217 swallow (); // We are done with it, content was copied.
1222 parser::expect_unknown (token_type tt
, string
& target
)
1224 const token
*t
= next();
1225 if (!(t
&& t
->type
== tt
))
1226 throw PARSE_ERROR (_("expected ") + tt2str(tt
));
1227 target
= t
->content
;
1228 swallow (); // We are done with it, content was copied.
1233 parser::expect_unknown2 (token_type tt1
, token_type tt2
, string
& target
)
1235 const token
*t
= next();
1236 if (!(t
&& (t
->type
== tt1
|| t
->type
== tt2
)))
1237 throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1
).c_str(), tt2str(tt2
).c_str()));
1238 target
= t
->content
;
1239 swallow (); // We are done with it, content was copied.
1244 parser::expect_op (std::string
const & expected
)
1246 expect_known (tok_operator
, expected
);
1251 parser::expect_kw (std::string
const & expected
)
1253 expect_known (tok_keyword
, expected
);
1257 parser::expect_kw_token (std::string
const & expected
)
1259 const token
*t
= next();
1260 if (! (t
&& t
->type
== tok_keyword
&& t
->content
== expected
))
1261 throw PARSE_ERROR (_F("expected '%s'", expected
.c_str()));
1266 parser::expect_number (int64_t & value
)
1269 const token
*t
= next();
1270 if (t
->type
== tok_operator
&& t
->content
== "-")
1276 if (!(t
&& t
->type
== tok_number
))
1277 throw PARSE_ERROR (_("expected number"));
1279 const char* startp
= t
->content
.c_str ();
1280 char* endp
= (char*) startp
;
1282 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1283 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1284 // since the lexer only gives us positive digit strings, but we'll
1285 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1287 value
= (int64_t) strtoull (startp
, & endp
, 0);
1288 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0'
1289 || (neg
&& (unsigned long long) value
> 9223372036854775808ULL)
1290 || (unsigned long long) value
> 18446744073709551615ULL
1291 || value
< -9223372036854775807LL-1)
1292 throw PARSE_ERROR (_("number invalid or out of range"));
1297 swallow (); // We are done with it, content was parsed and copied into value.
1302 parser::expect_ident_or_atword (std::string
& target
)
1304 const token
*t
= next();
1306 // accept identifiers and operators beginning in '@':
1307 if (!t
|| (t
->type
!= tok_identifier
1308 && (t
->type
!= tok_operator
|| t
->content
[0] != '@')))
1309 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1310 // so the message is accurate, but keep an eye out in the future:
1311 throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier
).c_str()));
1313 target
= t
->content
;
1319 parser::expect_ident_or_keyword (std::string
& target
)
1321 expect_unknown2 (tok_identifier
, tok_keyword
, target
);
1326 parser::peek_op (std::string
const & op
)
1328 return tok_is (peek(), tok_operator
, op
);
1333 parser::peek_kw (std::string
const & kw
)
1335 return tok_is (peek(), tok_identifier
, kw
);
1340 lexer::lexer (istream
& input
, const string
& in
, systemtap_session
& s
):
1341 ate_comment(false), ate_whitespace(false), saw_tokens(false),
1342 input_name (in
), input_pointer (0), input_end (0), cursor_suspend_count(0),
1343 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1344 cursor_column (1), session(s
), current_file (0), current_token_chain (0)
1346 getline(input
, input_contents
, '\0');
1348 input_pointer
= input_contents
.data();
1349 input_end
= input_contents
.data() + input_contents
.size();
1351 if (keywords
.empty())
1353 // NB: adding new keywords is highly disruptive to the language,
1354 // in particular to existing scripts that could be suddenly
1355 // broken. If done at all, it has to be s.compatible-sensitive,
1356 // and broadly advertised.
1357 keywords
.insert("probe");
1358 keywords
.insert("global");
1359 keywords
.insert("function");
1360 keywords
.insert("if");
1361 keywords
.insert("else");
1362 keywords
.insert("for");
1363 keywords
.insert("foreach");
1364 keywords
.insert("in");
1365 keywords
.insert("limit");
1366 keywords
.insert("return");
1367 keywords
.insert("delete");
1368 keywords
.insert("while");
1369 keywords
.insert("break");
1370 keywords
.insert("continue");
1371 keywords
.insert("next");
1372 keywords
.insert("string");
1373 keywords
.insert("long");
1374 keywords
.insert("try");
1375 keywords
.insert("catch");
1378 if (atwords
.empty())
1380 // NB: adding new @words is mildly disruptive to existing
1381 // scripts that define macros with the same name, but not
1382 // really. The user will merely receive a warning that they are
1383 // redefining an existing operator.
1384 atwords
.insert("@cast");
1385 atwords
.insert("@defined");
1386 atwords
.insert("@entry");
1387 atwords
.insert("@perf");
1388 atwords
.insert("@var");
1389 atwords
.insert("@avg");
1390 atwords
.insert("@count");
1391 atwords
.insert("@sum");
1392 atwords
.insert("@min");
1393 atwords
.insert("@max");
1394 atwords
.insert("@hist_linear");
1395 atwords
.insert("@hist_log");
1399 set
<string
> lexer::keywords
;
1400 set
<string
> lexer::atwords
;
1403 lexer::set_current_file (stapfile
* f
)
1408 f
->file_contents
= input_contents
;
1409 f
->name
= input_name
;
1414 lexer::set_current_token_chain (const token
* tok
)
1416 current_token_chain
= tok
;
1420 lexer::input_peek (unsigned n
)
1422 if (input_pointer
+ n
>= input_end
)
1424 return (unsigned char)*(input_pointer
+ n
);
1431 int c
= input_peek();
1432 if (c
< 0) return c
; // EOF
1436 if (cursor_suspend_count
)
1438 // Track effect of input_put: preserve previous cursor/line_column
1439 // until all of its characters are consumed.
1440 if (--cursor_suspend_count
== 0)
1442 cursor_line
= cursor_suspend_line
;
1443 cursor_column
= cursor_suspend_column
;
1448 // update source cursor
1458 // clog << "[" << (char)c << "]";
1464 lexer::input_put (const string
& chars
, const token
* t
)
1466 size_t pos
= input_pointer
- input_contents
.data();
1467 // clog << "[put:" << chars << " @" << pos << "]";
1468 input_contents
.insert (pos
, chars
);
1469 cursor_suspend_count
+= chars
.size();
1470 cursor_suspend_line
= cursor_line
;
1471 cursor_suspend_column
= cursor_column
;
1472 cursor_line
= t
->location
.line
;
1473 cursor_column
= t
->location
.column
;
1474 input_pointer
= input_contents
.data() + pos
;
1475 input_end
= input_contents
.data() + input_contents
.size();
1482 ate_comment
= false; // reset for each new token
1483 ate_whitespace
= false; // reset for each new token
1485 // XXX be very sure to restore old_saw_tokens if we return without a token:
1486 bool old_saw_tokens
= saw_tokens
;
1489 token
* n
= new token
;
1490 n
->location
.file
= current_file
;
1491 n
->chain
= current_token_chain
;
1494 bool suspended
= (cursor_suspend_count
> 0);
1495 n
->location
.line
= cursor_line
;
1496 n
->location
.column
= cursor_column
;
1498 int c
= input_get();
1499 // clog << "{" << (char)c << (char)c2 << "}";
1503 saw_tokens
= old_saw_tokens
;
1509 ate_whitespace
= true;
1513 int c2
= input_peek ();
1515 // Paste command line arguments as character streams into
1516 // the beginning of a token. $1..$999 go through as raw
1517 // characters; @1..@999 are quoted/escaped as strings.
1518 // $# and @# expand to the number of arguments, similarly
1520 if ((c
== '$' || c
== '@') && (c2
== '#'))
1522 n
->content
.push_back (c
);
1523 n
->content
.push_back (c2
);
1524 input_get(); // swallow '#'
1527 n
->make_junk(_("invalid nested substitution of command line arguments"));
1530 size_t num_args
= session
.args
.size ();
1531 input_put ((c
== '$') ? lex_cast (num_args
) : lex_cast_qstring (num_args
), n
);
1535 else if ((c
== '$' || c
== '@') && (isdigit (c2
)))
1537 n
->content
.push_back (c
);
1542 idx
= (idx
* 10) + (c2
- '0');
1543 n
->content
.push_back (c2
);
1547 idx
<= session
.args
.size()); // prevent overflow
1550 n
->make_junk(_("invalid nested substitution of command line arguments"));
1554 idx
-1 >= session
.args
.size())
1556 n
->make_junk(_F("command line argument index %lu out of range [1-%lu]",
1557 (unsigned long) idx
, (unsigned long) session
.args
.size()));
1560 const string
& arg
= session
.args
[idx
-1];
1561 input_put ((c
== '$') ? arg
: lex_cast_qstring (arg
), n
);
1566 else if (isalpha (c
) || c
== '$' || c
== '@' || c
== '_')
1568 n
->type
= tok_identifier
;
1569 n
->content
= (char) c
;
1570 while (isalnum (c2
) || c2
== '_' || c2
== '$')
1573 n
->content
.push_back (c2
);
1577 if (keywords
.count(n
->content
))
1578 n
->type
= tok_keyword
;
1579 else if (n
->content
[0] == '@')
1580 // makes it easier to detect illegal use of @words:
1581 n
->type
= tok_operator
;
1586 else if (isdigit (c
)) // positive literal
1588 n
->type
= tok_number
;
1589 n
->content
= (char) c
;
1591 while (isalnum (c2
))
1593 // NB: isalnum is very permissive. We rely on strtol, called in
1594 // parser::parse_literal below, to confirm that the number string
1595 // is correctly formatted and in range.
1598 n
->content
.push_back (c2
);
1606 n
->type
= tok_string
;
1611 if (c
< 0 || c
== '\n')
1613 n
->make_junk(_("Could not find matching closing quote"));
1616 if (c
== '\"') // closing double-quotes
1618 else if (c
== '\\') // see also input_put
1624 if (strverscmp(session
.compatible
.c_str(), "2.3") < 0)
1633 case '0' ... '7': // NB: need only match the first digit
1635 // Pass these escapes through to the string value
1636 // being parsed; it will be emitted into a C literal.
1637 // XXX: PR13371: perhaps we should evaluate them here
1638 // (and re-quote them during translate.cxx emission).
1639 n
->content
.push_back('\\');
1642 default: the_default
:
1643 n
->content
.push_back(c
);
1648 n
->content
.push_back(c
);
1653 else if (ispunct (c
))
1655 int c3
= input_peek (1);
1657 // NB: if we were to recognize negative numeric literals here,
1658 // we'd introduce another grammar ambiguity:
1659 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1660 // instead of tok_number(1) tok_operator('-') tok_number(1)
1662 if (c
== '#') // shell comment
1664 unsigned this_line
= cursor_line
;
1665 do { c
= input_get (); }
1666 while (c
>= 0 && cursor_line
== this_line
);
1668 ate_whitespace
= true;
1671 else if ((c
== '/' && c2
== '/')) // C++ comment
1673 unsigned this_line
= cursor_line
;
1674 do { c
= input_get (); }
1675 while (c
>= 0 && cursor_line
== this_line
);
1677 ate_whitespace
= true;
1680 else if (c
== '/' && c2
== '*') // C comment
1682 (void) input_get (); // swallow '*' already in c2
1687 if (c
== '*' && c2
== '/')
1693 ate_whitespace
= true;
1696 else if (c
== '%' && c2
== '{') // embedded code
1698 n
->type
= tok_embedded
;
1699 (void) input_get (); // swallow '{' already in c2
1704 if (c
== '%' && c2
== '}')
1706 if (c
== '}' && c2
== '%') // possible typo
1707 session
.print_warning (_("possible erroneous closing '}%', use '%}'?"), n
);
1713 n
->make_junk(_("Could not find matching '%}' to close embedded function block"));
1717 // We're committed to recognizing at least the first character
1719 n
->type
= tok_operator
;
1722 // match all valid operators, in decreasing size order
1723 if ((c
== '<' && c2
== '<' && c3
== '<') ||
1724 (c
== '<' && c2
== '<' && c3
== '=') ||
1725 (c
== '>' && c2
== '>' && c3
== '='))
1729 input_get (); input_get (); // swallow other two characters
1731 else if ((c
== '=' && c2
== '=') ||
1732 (c
== '!' && c2
== '=') ||
1733 (c
== '<' && c2
== '=') ||
1734 (c
== '>' && c2
== '=') ||
1735 (c
== '=' && c2
== '~') ||
1736 (c
== '!' && c2
== '~') ||
1737 (c
== '+' && c2
== '=') ||
1738 (c
== '-' && c2
== '=') ||
1739 (c
== '*' && c2
== '=') ||
1740 (c
== '/' && c2
== '=') ||
1741 (c
== '%' && c2
== '=') ||
1742 (c
== '&' && c2
== '=') ||
1743 (c
== '^' && c2
== '=') ||
1744 (c
== '|' && c2
== '=') ||
1745 (c
== '.' && c2
== '=') ||
1746 (c
== '&' && c2
== '&') ||
1747 (c
== '|' && c2
== '|') ||
1748 (c
== '+' && c2
== '+') ||
1749 (c
== '-' && c2
== '-') ||
1750 (c
== '-' && c2
== '>') ||
1751 (c
== '<' && c2
== '<') ||
1752 (c
== '>' && c2
== '>') ||
1753 // preprocessor tokens
1754 (c
== '%' && c2
== '(') ||
1755 (c
== '%' && c2
== '?') ||
1756 (c
== '%' && c2
== ':') ||
1757 (c
== '%' && c2
== ')'))
1760 input_get (); // swallow other character
1770 s
<< "\\x" << hex
<< setw(2) << setfill('0') << c
;
1771 n
->content
= s
.str();
1772 n
->msg
= ""; // signal parser to emit "expected X, found junk" type error
1777 // ------------------------------------------------------------------------
1780 token::make_junk (const string new_msg
)
1786 // ------------------------------------------------------------------------
1789 parser::parse (bool errs_as_warnings
)
1791 stapfile
* f
= new stapfile
;
1792 input
.set_current_file (f
);
1800 systemtap_v_seen
= 0;
1801 const token
* t
= peek ();
1802 if (! t
) // nice clean EOF, modulo any preprocessing that occurred
1806 if (t
->type
== tok_keyword
&& t
->content
== "probe")
1808 context
= con_probe
;
1809 parse_probe (f
->probes
, f
->aliases
);
1811 else if (t
->type
== tok_keyword
&& t
->content
== "global")
1813 context
= con_global
;
1814 parse_global (f
->globals
, f
->probes
);
1816 else if (t
->type
== tok_keyword
&& t
->content
== "function")
1818 context
= con_function
;
1819 parse_functiondecl (f
->functions
);
1821 else if (t
->type
== tok_embedded
)
1823 context
= con_embedded
;
1824 f
->embeds
.push_back (parse_embeddedcode ());
1828 context
= con_unknown
;
1829 throw PARSE_ERROR (_("expected 'probe', 'global', 'function', or '%{'"));
1832 catch (parse_error
& pe
)
1834 print_error (pe
, errs_as_warnings
);
1836 // XXX: do we want tok_junk to be able to force skip_some behaviour?
1837 if (pe
.skip_some
) // for recovery
1838 // Quietly swallow all tokens until the next keyword we can start parsing from.
1843 const token
* t
= peek ();
1846 if (t
->type
== tok_keyword
&& t
->content
== "probe") break;
1847 else if (t
->type
== tok_keyword
&& t
->content
== "global") break;
1848 else if (t
->type
== tok_keyword
&& t
->content
== "function") break;
1849 else if (t
->type
== tok_embedded
) break;
1850 swallow (); // swallow it
1853 catch (parse_error
& pe2
)
1855 // parse error during recovery ... ugh
1863 // vary message depending on whether file was *actually* empty:
1864 cerr
<< (input
.saw_tokens
1865 ? _F("Input file '%s' is empty after preprocessing.", input_name
.c_str())
1866 : _F("Input file '%s' is empty.", input_name
.c_str()))
1871 else if (num_errors
> 0)
1873 cerr
<< _NF("%d parse error.", "%d parse errors.", num_errors
, num_errors
) << endl
;
1878 input
.set_current_file(0);
1884 parser::parse_synthetic_probe (const token
* chain
, bool errs_as_warnings
)
1887 stapfile
* f
= new stapfile
;
1888 f
->synthetic
= true;
1889 input
.set_current_file (f
);
1890 input
.set_current_token_chain (chain
);
1894 context
= con_probe
;
1895 parse_probe (f
->probes
, f
->aliases
);
1897 if (f
->probes
.size() != 1 || !f
->aliases
.empty())
1898 throw PARSE_ERROR (_("expected a single synthetic probe"));
1901 catch (parse_error
& pe
)
1903 print_error (pe
, errs_as_warnings
);
1906 // TODO check for unparsed tokens?
1908 input
.set_current_file(0);
1909 input
.set_current_token_chain(0);
1915 parser::parse_probe (std::vector
<probe
*> & probe_ret
,
1916 std::vector
<probe_alias
*> & alias_ret
)
1918 const token
* t0
= next ();
1919 if (! (t0
->type
== tok_keyword
&& t0
->content
== "probe"))
1920 throw PARSE_ERROR (_("expected 'probe'"));
1922 vector
<probe_point
*> aliases
;
1923 vector
<probe_point
*> locations
;
1925 bool equals_ok
= true;
1927 int epilogue_alias
= 0;
1931 probe_point
* pp
= parse_probe_point ();
1933 const token
* t
= peek ();
1935 && t
->type
== tok_operator
&& t
->content
== "=")
1937 if (pp
->optional
|| pp
->sufficient
)
1938 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pp
->components
.front()->tok
);
1939 aliases
.push_back(pp
);
1943 else if (equals_ok
&& t
1944 && t
->type
== tok_operator
&& t
->content
== "+=")
1946 if (pp
->optional
|| pp
->sufficient
)
1947 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pp
->components
.front()->tok
);
1948 aliases
.push_back(pp
);
1953 else if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
1955 locations
.push_back(pp
);
1960 else if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
1962 locations
.push_back(pp
);
1966 throw PARSE_ERROR (_("expected probe point specifier"));
1969 if (aliases
.empty())
1971 probe
* p
= new probe
;
1973 p
->locations
= locations
;
1974 p
->body
= parse_stmt_block ();
1975 p
->privileged
= privileged
;
1976 p
->systemtap_v_conditional
= systemtap_v_seen
;
1977 probe_ret
.push_back (p
);
1981 probe_alias
* p
= new probe_alias (aliases
);
1983 p
->epilogue_style
= true;
1985 p
->epilogue_style
= false;
1987 p
->locations
= locations
;
1988 p
->body
= parse_stmt_block ();
1989 p
->privileged
= privileged
;
1990 p
->systemtap_v_conditional
= systemtap_v_seen
;
1991 alias_ret
.push_back (p
);
1997 parser::parse_embeddedcode ()
1999 embeddedcode
* e
= new embeddedcode
;
2000 const token
* t
= next ();
2001 if (t
->type
!= tok_embedded
)
2002 throw PARSE_ERROR (_("expected '%{'"));
2005 throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
2006 false /* don't skip tokens for parse resumption */);
2009 e
->code
= t
->content
;
2015 parser::parse_stmt_block ()
2017 block
* pb
= new block
;
2019 const token
* t
= next ();
2020 if (! (t
->type
== tok_operator
&& t
->content
== "{"))
2021 throw PARSE_ERROR (_("expected '{'"));
2028 if (t
&& t
->type
== tok_operator
&& t
->content
== "}")
2033 pb
->statements
.push_back (parse_statement ());
2041 parser::parse_try_block ()
2043 try_block
* pb
= new try_block
;
2045 pb
->tok
= expect_kw_token ("try");
2046 pb
->try_block
= parse_stmt_block();
2047 expect_kw ("catch");
2049 const token
* t
= peek ();
2050 if (t
!= NULL
&& t
->type
== tok_operator
&& t
->content
== "(")
2052 swallow (); // swallow the '('
2055 if (! (t
->type
== tok_identifier
))
2056 throw PARSE_ERROR (_("expected identifier"));
2057 symbol
* sym
= new symbol
;
2059 sym
->name
= t
->content
;
2060 pb
->catch_error_var
= sym
;
2065 pb
->catch_error_var
= 0;
2067 pb
->catch_block
= parse_stmt_block();
2075 parser::parse_statement ()
2078 const token
* t
= peek ();
2079 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2080 return new null_statement (next ());
2081 else if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
2082 return parse_stmt_block (); // Don't squash semicolons.
2083 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "try")
2084 return parse_try_block (); // Don't squash semicolons.
2085 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "if")
2086 return parse_if_statement (); // Don't squash semicolons.
2087 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "for")
2088 return parse_for_loop (); // Don't squash semicolons.
2089 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "foreach")
2090 return parse_foreach_loop (); // Don't squash semicolons.
2091 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "while")
2092 return parse_while_loop (); // Don't squash semicolons.
2093 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "return")
2094 ret
= parse_return_statement ();
2095 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "delete")
2096 ret
= parse_delete_statement ();
2097 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "break")
2098 ret
= parse_break_statement ();
2099 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "continue")
2100 ret
= parse_continue_statement ();
2101 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "next")
2102 ret
= parse_next_statement ();
2103 else if (t
&& (t
->type
== tok_operator
|| // expressions are flexible
2104 t
->type
== tok_identifier
||
2105 t
->type
== tok_number
||
2106 t
->type
== tok_string
||
2107 t
->type
== tok_embedded
))
2108 ret
= parse_expr_statement ();
2109 // XXX: consider generally accepting tok_embedded here too
2111 throw PARSE_ERROR (_("expected statement"));
2113 // Squash "empty" trailing colons after any "non-block-like" statement.
2115 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2117 swallow (); // Silently eat trailing ; after statement
2125 parser::parse_global (vector
<vardecl
*>& globals
, vector
<probe
*>&)
2127 const token
* t0
= next ();
2128 if (! (t0
->type
== tok_keyword
&& t0
->content
== "global"))
2129 throw PARSE_ERROR (_("expected 'global'"));
2134 const token
* t
= next ();
2135 if (! (t
->type
== tok_identifier
))
2136 throw PARSE_ERROR (_("expected identifier"));
2138 for (unsigned i
=0; i
<globals
.size(); i
++)
2139 if (globals
[i
]->name
== t
->content
)
2140 throw PARSE_ERROR (_("duplicate global name"));
2142 vardecl
* d
= new vardecl
;
2143 d
->name
= t
->content
;
2145 d
->systemtap_v_conditional
= systemtap_v_seen
;
2146 globals
.push_back (d
);
2150 if(t
&& t
->type
== tok_operator
&& t
->content
== "%") //wrapping
2157 if (t
&& t
->type
== tok_operator
&& t
->content
== "[") // array size
2161 expect_number(size
);
2162 if (size
<= 0 || size
> 1000000) // arbitrary max
2163 throw PARSE_ERROR(_("array size out of range"));
2164 d
->maxsize
= (int)size
;
2165 expect_known(tok_operator
, "]");
2169 if (t
&& t
->type
== tok_operator
&& t
->content
== "=") // initialization
2171 if (!d
->compatible_arity(0))
2172 throw PARSE_ERROR(_("only scalar globals can be initialized"));
2174 next (); // Don't swallow, set_arity() used the peeked token.
2175 d
->init
= parse_literal ();
2176 d
->type
= d
->init
->type
;
2180 if (t
&& t
->type
== tok_operator
&& t
->content
== ";") // termination
2186 if (t
&& t
->type
== tok_operator
&& t
->content
== ",") // next global
2198 parser::parse_functiondecl (std::vector
<functiondecl
*>& functions
)
2200 const token
* t
= next ();
2201 if (! (t
->type
== tok_keyword
&& t
->content
== "function"))
2202 throw PARSE_ERROR (_("expected 'function'"));
2206 if (! (t
->type
== tok_identifier
)
2207 && ! (t
->type
== tok_keyword
2208 && (t
->content
== "string" || t
->content
== "long")))
2209 throw PARSE_ERROR (_("expected identifier"));
2211 for (unsigned i
=0; i
<functions
.size(); i
++)
2212 if (functions
[i
]->name
== t
->content
)
2213 throw PARSE_ERROR (_("duplicate function name"));
2215 functiondecl
*fd
= new functiondecl ();
2216 fd
->name
= t
->content
;
2220 if (t
->type
== tok_operator
&& t
->content
== ":")
2224 if (t
->type
== tok_keyword
&& t
->content
== "string")
2225 fd
->type
= pe_string
;
2226 else if (t
->type
== tok_keyword
&& t
->content
== "long")
2228 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2234 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2235 throw PARSE_ERROR (_("expected '('"));
2242 // permit zero-argument functions
2243 if (t
->type
== tok_operator
&& t
->content
== ")")
2248 else if (! (t
->type
== tok_identifier
))
2249 throw PARSE_ERROR (_("expected identifier"));
2250 vardecl
* vd
= new vardecl
;
2251 vd
->name
= t
->content
;
2253 fd
->formal_args
.push_back (vd
);
2254 fd
->systemtap_v_conditional
= systemtap_v_seen
;
2257 if (t
->type
== tok_operator
&& t
->content
== ":")
2261 if (t
->type
== tok_keyword
&& t
->content
== "string")
2262 vd
->type
= pe_string
;
2263 else if (t
->type
== tok_keyword
&& t
->content
== "long")
2265 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2269 if (t
->type
== tok_operator
&& t
->content
== ")")
2274 if (t
->type
== tok_operator
&& t
->content
== ",")
2280 throw PARSE_ERROR (_("expected ',' or ')'"));
2284 if (t
&& t
->type
== tok_embedded
)
2285 fd
->body
= parse_embeddedcode ();
2287 fd
->body
= parse_stmt_block ();
2289 functions
.push_back (fd
);
2294 parser::parse_probe_point ()
2296 probe_point
* pl
= new probe_point
;
2300 const token
* t
= next ();
2301 if (! (t
->type
== tok_identifier
2302 // we must allow ".return" and ".function", which are keywords
2303 || t
->type
== tok_keyword
2304 // we must allow "*", due to being an operator
2305 || (t
->type
== tok_operator
&& t
->content
== "*")))
2306 throw PARSE_ERROR (_("expected identifier or '*'"));
2308 // loop which reconstitutes an identifier with wildcards
2309 string content
= t
->content
;
2312 const token
* u
= peek();
2315 // ensure pieces of the identifier are adjacent:
2316 if (input
.ate_whitespace
)
2318 // ensure pieces of the identifier are valid:
2319 if (! (u
->type
== tok_identifier
2320 // we must allow arbitrary keywords with a wildcard
2321 || u
->type
== tok_keyword
2322 // we must allow "*", due to being an operator
2323 || (u
->type
== tok_operator
&& u
->content
== "*")))
2327 content
= content
+ u
->content
;
2332 // get around const-ness of t:
2333 token
* new_t
= new token(*t
);
2334 new_t
->content
= content
;
2335 delete t
; t
= new_t
;
2337 probe_point::component
* c
= new probe_point::component
;
2338 c
->functor
= t
->content
;
2340 pl
->components
.push_back (c
);
2341 // NB we may add c->arg soon
2345 // consume optional parameter
2346 if (t
&& t
->type
== tok_operator
&& t
->content
== "(")
2348 swallow (); // consume "("
2349 c
->arg
= parse_literal ();
2352 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2353 throw PARSE_ERROR (_("expected ')'"));
2359 if (t
&& t
->type
== tok_operator
&& t
->content
== ".")
2365 // We only fall through here at the end of a probe point (past
2366 // all the dotted/parametrized components).
2368 if (t
&& t
->type
== tok_operator
&&
2369 (t
->content
== "?" || t
->content
== "!"))
2371 pl
->optional
= true;
2372 if (t
->content
== "!") pl
->sufficient
= true;
2373 // NB: sufficient implies optional
2379 if (t
&& t
->type
== tok_keyword
&& t
->content
== "if")
2383 if (!(t
&& t
->type
== tok_operator
&& t
->content
== "("))
2384 throw PARSE_ERROR (_("expected '('"));
2387 pl
->condition
= parse_expression ();
2390 if (!(t
&& t
->type
== tok_operator
&& t
->content
== ")"))
2391 throw PARSE_ERROR (_("expected ')'"));
2397 if (t
&& t
->type
== tok_operator
2398 && (t
->content
== "{" || t
->content
== "," ||
2399 t
->content
== "=" || t
->content
== "+=" ))
2402 throw PARSE_ERROR (_("expected one of '. , ( ? ! { = +='"));
2410 parser::consume_string_literals(const token
*t
)
2412 literal_string
*ls
= new literal_string (t
->content
);
2414 // PR11208: check if the next token is also a string literal;
2415 // auto-concatenate it. This is complicated to the extent that we
2416 // need to skip intermediate whitespace.
2418 // NB for versions prior to 2.0: but don't skip over intervening comments
2419 const token
*n
= peek();
2420 while (n
!= NULL
&& n
->type
== tok_string
2421 && ! (strverscmp(session
.compatible
.c_str(), "2.0") < 0
2422 && input
.ate_comment
))
2424 ls
->value
.append(next()->content
); // consume and append the token
2431 // Parse a string literal and perform backslash escaping on the contents:
2433 parser::parse_literal_string ()
2435 const token
* t
= next ();
2437 if (t
->type
== tok_string
)
2438 l
= consume_string_literals (t
);
2440 throw PARSE_ERROR (_("expected literal string"));
2448 parser::parse_literal ()
2450 const token
* t
= next ();
2452 if (t
->type
== tok_string
)
2454 l
= consume_string_literals (t
);
2459 if (t
->type
== tok_operator
&& t
->content
== "-")
2466 if (t
->type
== tok_number
)
2468 const char* startp
= t
->content
.c_str ();
2469 char* endp
= (char*) startp
;
2471 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2472 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
2473 // since the lexer only gives us positive digit strings, but we'll
2474 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
2476 long long value
= (long long) strtoull (startp
, & endp
, 0);
2477 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0'
2478 || (neg
&& (unsigned long long) value
> 9223372036854775808ULL)
2479 || (unsigned long long) value
> 18446744073709551615ULL
2480 || value
< -9223372036854775807LL-1)
2481 throw PARSE_ERROR (_("number invalid or out of range"));
2486 l
= new literal_number (value
);
2489 throw PARSE_ERROR (_("expected literal string or number"));
2498 parser::parse_if_statement ()
2500 const token
* t
= next ();
2501 if (! (t
->type
== tok_keyword
&& t
->content
== "if"))
2502 throw PARSE_ERROR (_("expected 'if'"));
2503 if_statement
* s
= new if_statement
;
2507 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2508 throw PARSE_ERROR (_("expected '('"));
2511 s
->condition
= parse_expression ();
2514 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2515 throw PARSE_ERROR (_("expected ')'"));
2518 s
->thenblock
= parse_statement ();
2521 if (t
&& t
->type
== tok_keyword
&& t
->content
== "else")
2524 s
->elseblock
= parse_statement ();
2527 s
->elseblock
= 0; // in case not otherwise initialized
2534 parser::parse_expr_statement ()
2536 expr_statement
*es
= new expr_statement
;
2537 const token
* t
= peek ();
2539 throw PARSE_ERROR (_("expression statement expected"));
2540 // Copy, we only peeked, parse_expression might swallow.
2541 es
->tok
= new token (*t
);
2542 es
->value
= parse_expression ();
2548 parser::parse_return_statement ()
2550 const token
* t
= next ();
2551 if (! (t
->type
== tok_keyword
&& t
->content
== "return"))
2552 throw PARSE_ERROR (_("expected 'return'"));
2553 if (context
!= con_function
)
2554 throw PARSE_ERROR (_("found 'return' not in function context"));
2555 return_statement
* s
= new return_statement
;
2557 s
->value
= parse_expression ();
2563 parser::parse_delete_statement ()
2565 const token
* t
= next ();
2566 if (! (t
->type
== tok_keyword
&& t
->content
== "delete"))
2567 throw PARSE_ERROR (_("expected 'delete'"));
2568 delete_statement
* s
= new delete_statement
;
2570 s
->value
= parse_expression ();
2576 parser::parse_next_statement ()
2578 const token
* t
= next ();
2579 if (! (t
->type
== tok_keyword
&& t
->content
== "next"))
2580 throw PARSE_ERROR (_("expected 'next'"));
2581 if (context
!= con_probe
)
2582 throw PARSE_ERROR (_("found 'next' not in probe context"));
2583 next_statement
* s
= new next_statement
;
2590 parser::parse_break_statement ()
2592 const token
* t
= next ();
2593 if (! (t
->type
== tok_keyword
&& t
->content
== "break"))
2594 throw PARSE_ERROR (_("expected 'break'"));
2595 break_statement
* s
= new break_statement
;
2602 parser::parse_continue_statement ()
2604 const token
* t
= next ();
2605 if (! (t
->type
== tok_keyword
&& t
->content
== "continue"))
2606 throw PARSE_ERROR (_("expected 'continue'"));
2607 continue_statement
* s
= new continue_statement
;
2614 parser::parse_for_loop ()
2616 const token
* t
= next ();
2617 if (! (t
->type
== tok_keyword
&& t
->content
== "for"))
2618 throw PARSE_ERROR (_("expected 'for'"));
2619 for_loop
* s
= new for_loop
;
2623 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2624 throw PARSE_ERROR (_("expected '('"));
2627 // initializer + ";"
2629 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2636 s
->init
= parse_expr_statement ();
2638 if (! (t
->type
== tok_operator
&& t
->content
== ";"))
2639 throw PARSE_ERROR (_("expected ';'"));
2645 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2647 literal_number
* l
= new literal_number(1);
2649 s
->cond
->tok
= next ();
2653 s
->cond
= parse_expression ();
2655 if (! (t
->type
== tok_operator
&& t
->content
== ";"))
2656 throw PARSE_ERROR (_("expected ';'"));
2662 if (t
&& t
->type
== tok_operator
&& t
->content
== ")")
2669 s
->incr
= parse_expr_statement ();
2671 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2672 throw PARSE_ERROR (_("expected ')'"));
2677 s
->block
= parse_statement ();
2684 parser::parse_while_loop ()
2686 const token
* t
= next ();
2687 if (! (t
->type
== tok_keyword
&& t
->content
== "while"))
2688 throw PARSE_ERROR (_("expected 'while'"));
2689 for_loop
* s
= new for_loop
;
2693 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2694 throw PARSE_ERROR (_("expected '('"));
2697 // dummy init and incr fields
2702 s
->cond
= parse_expression ();
2705 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2706 throw PARSE_ERROR (_("expected ')'"));
2710 s
->block
= parse_statement ();
2717 parser::parse_foreach_loop ()
2719 const token
* t
= next ();
2720 if (! (t
->type
== tok_keyword
&& t
->content
== "foreach"))
2721 throw PARSE_ERROR (_("expected 'foreach'"));
2722 foreach_loop
* s
= new foreach_loop
;
2724 s
->sort_direction
= 0;
2725 s
->sort_aggr
= sc_none
;
2730 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2731 throw PARSE_ERROR (_("expected '('"));
2734 symbol
* lookahead_sym
= NULL
;
2735 int lookahead_sort
= 0;
2738 if (t
&& t
->type
== tok_identifier
)
2741 lookahead_sym
= new symbol
;
2742 lookahead_sym
->tok
= t
;
2743 lookahead_sym
->name
= t
->content
;
2746 if (t
&& t
->type
== tok_operator
&&
2747 (t
->content
== "+" || t
->content
== "-"))
2749 lookahead_sort
= (t
->content
== "+") ? 1 : -1;
2754 if (t
&& t
->type
== tok_operator
&& t
->content
== "=")
2757 s
->value
= lookahead_sym
;
2760 s
->sort_direction
= lookahead_sort
;
2763 lookahead_sym
= NULL
;
2767 // see also parse_array_in
2769 bool parenthesized
= false;
2771 if (!lookahead_sym
&& t
&& t
->type
== tok_operator
&& t
->content
== "[")
2774 parenthesized
= true;
2779 s
->indexes
.push_back (lookahead_sym
);
2782 s
->sort_direction
= lookahead_sort
;
2785 lookahead_sym
= NULL
;
2790 if (! (t
->type
== tok_identifier
))
2791 throw PARSE_ERROR (_("expected identifier"));
2792 symbol
* sym
= new symbol
;
2794 sym
->name
= t
->content
;
2795 s
->indexes
.push_back (sym
);
2798 if (t
&& t
->type
== tok_operator
&&
2799 (t
->content
== "+" || t
->content
== "-"))
2801 if (s
->sort_direction
)
2802 throw PARSE_ERROR (_("multiple sort directives"));
2803 s
->sort_direction
= (t
->content
== "+") ? 1 : -1;
2804 s
->sort_column
= s
->indexes
.size();
2811 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
2816 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
2822 throw PARSE_ERROR (_("expected ',' or ']'"));
2825 break; // expecting only one expression
2829 if (! (t
->type
== tok_keyword
&& t
->content
== "in"))
2830 throw PARSE_ERROR (_("expected 'in'"));
2833 s
->base
= parse_indexable();
2835 // check for atword, see also expect_ident_or_atword,
2837 if (t
&& t
->type
== tok_operator
&& t
->content
[0] == '@')
2839 if (t
->content
== "@avg") s
->sort_aggr
= sc_average
;
2840 else if (t
->content
== "@min") s
->sort_aggr
= sc_min
;
2841 else if (t
->content
== "@max") s
->sort_aggr
= sc_max
;
2842 else if (t
->content
== "@count") s
->sort_aggr
= sc_count
;
2843 else if (t
->content
== "@sum") s
->sort_aggr
= sc_sum
;
2844 else throw PARSE_ERROR(_("expected statistical operation"));
2848 if (! (t
&& t
->type
== tok_operator
&& (t
->content
== "+" || t
->content
== "-")))
2849 throw PARSE_ERROR(_("expected sort directive"));
2853 if (t
&& t
->type
== tok_operator
&&
2854 (t
->content
== "+" || t
->content
== "-"))
2856 if (s
->sort_direction
)
2857 throw PARSE_ERROR (_("multiple sort directives"));
2858 s
->sort_direction
= (t
->content
== "+") ? 1 : -1;
2864 if (tok_is(t
, tok_keyword
, "limit"))
2866 swallow (); // get past the "limit"
2867 s
->limit
= parse_expression ();
2871 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2872 throw PARSE_ERROR ("expected ')'");
2875 s
->block
= parse_statement ();
2881 parser::parse_expression ()
2883 return parse_assignment ();
2888 parser::parse_assignment ()
2890 expression
* op1
= parse_ternary ();
2892 const token
* t
= peek ();
2893 // right-associative operators
2894 if (t
&& t
->type
== tok_operator
2895 && (t
->content
== "=" ||
2896 t
->content
== "<<<" ||
2897 t
->content
== "+=" ||
2898 t
->content
== "-=" ||
2899 t
->content
== "*=" ||
2900 t
->content
== "/=" ||
2901 t
->content
== "%=" ||
2902 t
->content
== "<<=" ||
2903 t
->content
== ">>=" ||
2904 t
->content
== "&=" ||
2905 t
->content
== "^=" ||
2906 t
->content
== "|=" ||
2907 t
->content
== ".=" ||
2910 // NB: lvalueness is checked during elaboration / translation
2911 assignment
* e
= new assignment
;
2916 e
->right
= parse_expression ();
2925 parser::parse_ternary ()
2927 expression
* op1
= parse_logical_or ();
2929 const token
* t
= peek ();
2930 if (t
&& t
->type
== tok_operator
&& t
->content
== "?")
2932 ternary_expression
* e
= new ternary_expression
;
2936 e
->truevalue
= parse_expression (); // XXX
2939 if (! (t
->type
== tok_operator
&& t
->content
== ":"))
2940 throw PARSE_ERROR (_("expected ':'"));
2943 e
->falsevalue
= parse_expression (); // XXX
2952 parser::parse_logical_or ()
2954 expression
* op1
= parse_logical_and ();
2956 const token
* t
= peek ();
2957 while (t
&& t
->type
== tok_operator
&& t
->content
== "||")
2959 logical_or_expr
* e
= new logical_or_expr
;
2964 e
->right
= parse_logical_and ();
2974 parser::parse_logical_and ()
2976 expression
* op1
= parse_boolean_or ();
2978 const token
* t
= peek ();
2979 while (t
&& t
->type
== tok_operator
&& t
->content
== "&&")
2981 logical_and_expr
*e
= new logical_and_expr
;
2986 e
->right
= parse_boolean_or ();
2996 parser::parse_boolean_or ()
2998 expression
* op1
= parse_boolean_xor ();
3000 const token
* t
= peek ();
3001 while (t
&& t
->type
== tok_operator
&& t
->content
== "|")
3003 binary_expression
* e
= new binary_expression
;
3008 e
->right
= parse_boolean_xor ();
3018 parser::parse_boolean_xor ()
3020 expression
* op1
= parse_boolean_and ();
3022 const token
* t
= peek ();
3023 while (t
&& t
->type
== tok_operator
&& t
->content
== "^")
3025 binary_expression
* e
= new binary_expression
;
3030 e
->right
= parse_boolean_and ();
3040 parser::parse_boolean_and ()
3042 expression
* op1
= parse_array_in ();
3044 const token
* t
= peek ();
3045 while (t
&& t
->type
== tok_operator
&& t
->content
== "&")
3047 binary_expression
* e
= new binary_expression
;
3052 e
->right
= parse_array_in ();
3062 parser::parse_array_in ()
3064 // This is a very tricky case. All these are legit expressions:
3065 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
3066 vector
<expression
*> indexes
;
3067 bool parenthesized
= false;
3069 const token
* t
= peek ();
3070 if (t
&& t
->type
== tok_operator
&& t
->content
== "[")
3073 parenthesized
= true;
3078 expression
* op1
= parse_comparison_or_regex_query ();
3079 indexes
.push_back (op1
);
3083 const token
* t
= peek ();
3084 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
3089 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
3095 throw PARSE_ERROR (_("expected ',' or ']'"));
3098 break; // expecting only one expression
3102 if (t
&& t
->type
== tok_keyword
&& t
->content
== "in")
3104 array_in
*e
= new array_in
;
3108 arrayindex
* a
= new arrayindex
;
3109 a
->indexes
= indexes
;
3110 a
->base
= parse_indexable();
3111 a
->tok
= a
->base
->tok
;
3115 else if (indexes
.size() == 1) // no "in" - need one expression only
3118 throw PARSE_ERROR (_("unexpected comma-separated expression list"));
3123 parser::parse_comparison_or_regex_query ()
3125 expression
* op1
= parse_shift ();
3127 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
3128 const token
*t
= peek();
3129 if (t
&& t
->type
== tok_operator
3130 && (t
->content
== "=~" ||
3131 t
->content
== "!~"))
3133 regex_query
* r
= new regex_query
;
3138 r
->right
= parse_literal_string();
3142 else while (t
&& t
->type
== tok_operator
3143 && (t
->content
== ">" ||
3144 t
->content
== "<" ||
3145 t
->content
== "==" ||
3146 t
->content
== "!=" ||
3147 t
->content
== "<=" ||
3148 t
->content
== ">="))
3150 comparison
* e
= new comparison
;
3155 e
->right
= parse_shift ();
3165 parser::parse_shift ()
3167 expression
* op1
= parse_concatenation ();
3169 const token
* t
= peek ();
3170 while (t
&& t
->type
== tok_operator
&&
3171 (t
->content
== "<<" || t
->content
== ">>"))
3173 binary_expression
* e
= new binary_expression
;
3178 e
->right
= parse_concatenation ();
3188 parser::parse_concatenation ()
3190 expression
* op1
= parse_additive ();
3192 const token
* t
= peek ();
3193 // XXX: the actual awk string-concatenation operator is *whitespace*.
3194 // I don't know how to easily to model that here.
3195 while (t
&& t
->type
== tok_operator
&& t
->content
== ".")
3197 concatenation
* e
= new concatenation
;
3202 e
->right
= parse_additive ();
3212 parser::parse_additive ()
3214 expression
* op1
= parse_multiplicative ();
3216 const token
* t
= peek ();
3217 while (t
&& t
->type
== tok_operator
3218 && (t
->content
== "+" || t
->content
== "-"))
3220 binary_expression
* e
= new binary_expression
;
3225 e
->right
= parse_multiplicative ();
3235 parser::parse_multiplicative ()
3237 expression
* op1
= parse_unary ();
3239 const token
* t
= peek ();
3240 while (t
&& t
->type
== tok_operator
3241 && (t
->content
== "*" || t
->content
== "/" || t
->content
== "%"))
3243 binary_expression
* e
= new binary_expression
;
3248 e
->right
= parse_unary ();
3258 parser::parse_unary ()
3260 const token
* t
= peek ();
3261 if (t
&& t
->type
== tok_operator
3262 && (t
->content
== "+" ||
3263 t
->content
== "-" ||
3264 t
->content
== "!" ||
3265 t
->content
== "~" ||
3268 unary_expression
* e
= new unary_expression
;
3272 e
->operand
= parse_unary ();
3276 return parse_crement ();
3281 parser::parse_crement () // as in "increment" / "decrement"
3283 // NB: Ideally, we'd parse only a symbol as an operand to the
3284 // *crement operators, instead of a general expression value. We'd
3285 // need more complex lookahead code to tell apart the postfix cases.
3286 // So we just punt, and leave it to pass-3 to signal errors on
3287 // cases like "4++".
3289 const token
* t
= peek ();
3290 if (t
&& t
->type
== tok_operator
3291 && (t
->content
== "++" || t
->content
== "--"))
3293 pre_crement
* e
= new pre_crement
;
3297 e
->operand
= parse_value ();
3301 // post-crement or non-crement
3302 expression
*op1
= parse_value ();
3305 if (t
&& t
->type
== tok_operator
3306 && (t
->content
== "++" || t
->content
== "--"))
3308 post_crement
* e
= new post_crement
;
3321 parser::parse_value ()
3323 const token
* t
= peek ();
3325 throw PARSE_ERROR (_("expected value"));
3327 if (t
->type
== tok_embedded
)
3330 throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
3332 embedded_expr
*e
= new embedded_expr
;
3334 e
->code
= t
->content
;
3339 if (t
->type
== tok_operator
&& t
->content
== "(")
3342 expression
* e
= parse_expression ();
3344 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
3345 throw PARSE_ERROR (_("expected ')'"));
3349 else if (t
->type
== tok_operator
&& t
->content
== "&")
3351 next (); // Cannot swallow, passing token on...
3352 return parse_target_symbol (t
);
3354 else if (t
->type
== tok_identifier
3355 || (t
->type
== tok_operator
&& t
->content
[0] == '@'))
3356 return parse_symbol ();
3358 return parse_literal ();
3363 parser::parse_hist_op_or_bare_name (hist_op
*&hop
, string
&name
)
3366 const token
* t
= expect_ident_or_atword (name
);
3367 if (name
== "@hist_linear" || name
== "@hist_log")
3370 if (name
== "@hist_linear")
3371 hop
->htype
= hist_linear
;
3372 else if (name
== "@hist_log")
3373 hop
->htype
= hist_log
;
3376 hop
->stat
= parse_expression ();
3378 if (hop
->htype
== hist_linear
)
3380 for (size_t i
= 0; i
< 3; ++i
)
3383 expect_number (tnum
);
3384 hop
->params
.push_back (tnum
);
3394 parser::parse_indexable ()
3396 hist_op
*hop
= NULL
;
3398 const token
*tok
= parse_hist_op_or_bare_name(hop
, name
);
3403 symbol
* sym
= new symbol
;
3411 // var, indexable[index], func(parms), printf("...", ...), $var,r
3412 // @cast, @defined, @entry, @var, $var->member, @stat_op(stat)
3413 expression
* parser::parse_symbol ()
3415 hist_op
*hop
= NULL
;
3418 const token
*t
= parse_hist_op_or_bare_name(hop
, name
);
3422 // If we didn't get a hist_op, then we did get an identifier. We can
3423 // now scrutinize this identifier for the various magic forms of identifier
3424 // (printf, @stat_op, and $var...)
3428 || (name
.size() > 0 && name
[0] == '$'))
3429 return parse_target_symbol (t
);
3431 // NB: PR11343: @defined() is not incompatible with earlier versions
3432 // of stap, so no need to check session.compatible for 1.2
3433 if (name
== "@defined")
3434 return parse_defined_op (t
);
3436 if (name
== "@entry")
3437 return parse_entry_op (t
);
3439 if (name
== "@perf")
3440 return parse_perf_op (t
);
3442 if (name
.size() > 0 && name
[0] == '@')
3444 stat_op
*sop
= new stat_op
;
3446 sop
->ctype
= sc_average
;
3447 else if (name
== "@count")
3448 sop
->ctype
= sc_count
;
3449 else if (name
== "@sum")
3450 sop
->ctype
= sc_sum
;
3451 else if (name
== "@min")
3452 sop
->ctype
= sc_min
;
3453 else if (name
== "@max")
3454 sop
->ctype
= sc_max
;
3456 throw PARSE_ERROR(_("unknown operator ") + name
);
3459 sop
->stat
= parse_expression ();
3464 else if (print_format
*fmt
= print_format::create(t
))
3467 if ((name
== "print" || name
== "println" ||
3468 name
== "sprint" || name
== "sprintln") &&
3469 (peek_op("@hist_linear") || peek_op("@hist_log")))
3471 // We have a special case where we recognize
3472 // print(@hist_foo(bar)) as a magic print-the-histogram
3473 // construct. This is sort of gross but it avoids
3474 // promoting histogram references to typeful
3478 t
= parse_hist_op_or_bare_name(hop
, name
);
3481 // It is, sadly, possible that even while parsing a
3482 // hist_op, we *mis-guessed* and the user wishes to
3483 // print(@hist_op(foo)[bucket]), a scalar. In that case
3484 // we must parse the arrayindex and print an expression.
3486 // XXX: This still fails if the arrayindex is part of a
3487 // larger expression. To really handle everything, we'd
3488 // need to push back all the hist tokens start over.
3494 // This is simplified version of the
3495 // multi-array-index parser below, because we can
3496 // only ever have one index on a histogram anyways.
3498 struct arrayindex
* ai
= new arrayindex
;
3501 ai
->indexes
.push_back (parse_expression ());
3503 fmt
->args
.push_back(ai
);
3505 // Consume any subsequent arguments.
3506 while (!peek_op (")"))
3509 expression
*e
= parse_expression ();
3510 fmt
->args
.push_back(e
);
3517 bool consumed_arg
= false;
3518 if (fmt
->print_with_format
)
3520 // Consume and convert a format string. Agreement between the
3521 // format string and the arguments is postponed to the
3522 // typechecking phase.
3524 expect_unknown (tok_string
, tmp
);
3525 fmt
->raw_components
= tmp
;
3526 fmt
->components
= print_format::string_to_components (tmp
);
3527 consumed_arg
= true;
3529 else if (fmt
->print_with_delim
)
3531 // Consume a delimiter to separate arguments.
3532 fmt
->delimiter
.clear();
3533 fmt
->delimiter
.type
= print_format::conv_literal
;
3534 expect_unknown (tok_string
, fmt
->delimiter
.literal_string
);
3535 consumed_arg
= true;
3536 min_args
= 2; // so that the delim is used at least once
3538 else if (!fmt
->print_with_newline
)
3540 // If we are not printing with a format string, nor with a
3541 // delim, nor with a newline, then it's either print() or
3542 // sprint(), both of which require at least one argument (of
3547 // Consume any subsequent arguments.
3548 while (min_args
|| !peek_op (")"))
3552 expression
*e
= parse_expression ();
3553 fmt
->args
.push_back(e
);
3554 consumed_arg
= true;
3563 else if (peek_op ("(")) // function call
3566 struct functioncall
* f
= new functioncall
;
3569 // Allow empty actual parameter list
3577 f
->args
.push_back (parse_expression ());
3583 else if (peek_op (","))
3589 throw PARSE_ERROR (_("expected ',' or ')'"));
3602 // By now, either we had a hist_op in the first place, or else
3603 // we had a plain word and it was converted to a symbol.
3605 assert (!hop
!= !sym
); // logical XOR
3607 // All that remains is to check for array indexing
3609 if (peek_op ("[")) // array
3612 struct arrayindex
* ai
= new arrayindex
;
3622 ai
->indexes
.push_back (parse_expression ());
3628 else if (peek_op (","))
3634 throw PARSE_ERROR (_("expected ',' or ']'"));
3639 // If we got to here, we *should* have a symbol; if we have
3640 // a hist_op on its own, it doesn't count as an expression,
3641 // so we throw a parse error.
3644 throw PARSE_ERROR(_("base histogram operator where expression expected"), t
);
3649 // Parse a @cast or $var. Given head token has already been consumed.
3650 target_symbol
* parser::parse_target_symbol (const token
* t
)
3652 bool addressof
= false;
3653 if (t
->type
== tok_operator
&& t
->content
== "&")
3656 // Don't delete t before trying next token.
3657 // We might need it in the error message when there is no next token.
3658 const token
*next_t
= next ();
3663 if (t
->type
== tok_operator
&& t
->content
== "@cast")
3665 cast_op
*cop
= new cast_op
;
3667 cop
->name
= t
->content
;
3669 cop
->operand
= parse_expression ();
3671 expect_unknown(tok_string
, cop
->type_name
);
3675 expect_unknown(tok_string
, cop
->module
);
3678 parse_target_symbol_components(cop
);
3679 cop
->addressof
= addressof
;
3683 if (t
->type
== tok_identifier
&& t
->content
[0]=='$')
3685 // target_symbol time
3686 target_symbol
*tsym
= new target_symbol
;
3688 tsym
->name
= t
->content
;
3689 parse_target_symbol_components(tsym
);
3690 tsym
->addressof
= addressof
;
3694 if (t
->type
== tok_operator
&& t
->content
== "@var")
3696 atvar_op
*aop
= new atvar_op
;
3698 aop
->name
= t
->content
;
3700 expect_unknown(tok_string
, aop
->target_name
);
3701 size_t found_at
= aop
->target_name
.find("@");
3702 if (found_at
!= string::npos
)
3703 aop
->cu_name
= aop
->target_name
.substr(found_at
+ 1);
3709 expect_unknown (tok_string
, aop
->module
);
3714 parse_target_symbol_components(aop
);
3715 aop
->addressof
= addressof
;
3719 throw PARSE_ERROR (_("expected @cast, @var or $var"));
3723 // Parse a @defined(). Given head token has already been consumed.
3724 expression
* parser::parse_defined_op (const token
* t
)
3726 defined_op
* dop
= new defined_op
;
3729 // no need for parse_hist_op... etc., as @defined takes only target_symbols as its operand.
3730 const token
* tt
= next ();
3731 dop
->operand
= parse_target_symbol (tt
);
3737 // Parse a @entry(). Given head token has already been consumed.
3738 expression
* parser::parse_entry_op (const token
* t
)
3740 entry_op
* eop
= new entry_op
;
3743 eop
->operand
= parse_expression ();
3749 // Parse a @perf(). Given head token has already been consumed.
3750 expression
* parser::parse_perf_op (const token
* t
)
3752 perf_op
* pop
= new perf_op
;
3755 pop
->operand
= parse_literal_string ();
3756 if (pop
->operand
->value
== "")
3757 throw PARSE_ERROR (_("expected non-empty string"));
3765 parser::parse_target_symbol_components (target_symbol
* e
)
3767 bool pprint
= false;
3769 // check for pretty-print in the form $foo$
3770 string
&base
= e
->name
;
3771 size_t pprint_pos
= base
.find_last_not_of('$');
3772 if (0 < pprint_pos
&& pprint_pos
< base
.length() - 1)
3774 string pprint_val
= base
.substr(pprint_pos
+ 1);
3775 base
.erase(pprint_pos
+ 1);
3776 e
->components
.push_back (target_symbol::component(e
->tok
, pprint_val
, true));
3784 const token
* t
= next();
3786 expect_ident_or_keyword (member
);
3788 // check for pretty-print in the form $foo->$ or $foo->bar$
3789 pprint_pos
= member
.find_last_not_of('$');
3791 if (pprint_pos
== string::npos
|| pprint_pos
< member
.length() - 1)
3793 pprint_val
= member
.substr(pprint_pos
+ 1);
3794 member
.erase(pprint_pos
+ 1);
3798 if (!member
.empty())
3799 e
->components
.push_back (target_symbol::component(t
, member
));
3801 e
->components
.push_back (target_symbol::component(t
, pprint_val
, true));
3803 else if (peek_op ("["))
3805 const token
* t
= next();
3806 expression
* index
= parse_expression();
3807 literal_number
* ln
= dynamic_cast<literal_number
*>(index
);
3809 e
->components
.push_back (target_symbol::component(t
, ln
->value
));
3811 e
->components
.push_back (target_symbol::component(t
, index
));
3820 // check for pretty-print in the form $foo $
3821 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
3822 const token
* t
= peek();
3823 if (t
!= NULL
&& t
->type
== tok_identifier
&&
3824 t
->content
.find_first_not_of('$') == string::npos
)
3827 e
->components
.push_back (target_symbol::component(t
, t
->content
, true));
3832 if (pprint
&& (peek_op ("->") || peek_op("[")))
3833 throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
3836 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */